1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 #include <linux/zalloc.h> 12 13 /* 14 * AGGR_GLOBAL: Use CPU 0 15 * AGGR_SOCKET: Use first CPU of socket 16 * AGGR_DIE: Use first CPU of die 17 * AGGR_CORE: Use first CPU of core 18 * AGGR_NONE: Use matching CPU 19 * AGGR_THREAD: Not supported? 20 */ 21 22 struct runtime_stat rt_stat; 23 struct stats walltime_nsecs_stats; 24 25 struct saved_value { 26 struct rb_node rb_node; 27 struct evsel *evsel; 28 enum stat_type type; 29 int ctx; 30 int cpu; 31 struct runtime_stat *stat; 32 struct stats stats; 33 u64 metric_total; 34 int metric_other; 35 }; 36 37 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 38 { 39 struct saved_value *a = container_of(rb_node, 40 struct saved_value, 41 rb_node); 42 const struct saved_value *b = entry; 43 44 if (a->cpu != b->cpu) 45 return a->cpu - b->cpu; 46 47 /* 48 * Previously the rbtree was used to link generic metrics. 49 * The keys were evsel/cpu. Now the rbtree is extended to support 50 * per-thread shadow stats. For shadow stats case, the keys 51 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 52 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 53 */ 54 if (a->type != b->type) 55 return a->type - b->type; 56 57 if (a->ctx != b->ctx) 58 return a->ctx - b->ctx; 59 60 if (a->evsel == NULL && b->evsel == NULL) { 61 if (a->stat == b->stat) 62 return 0; 63 64 if ((char *)a->stat < (char *)b->stat) 65 return -1; 66 67 return 1; 68 } 69 70 if (a->evsel == b->evsel) 71 return 0; 72 if ((char *)a->evsel < (char *)b->evsel) 73 return -1; 74 return +1; 75 } 76 77 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 78 const void *entry) 79 { 80 struct saved_value *nd = malloc(sizeof(struct saved_value)); 81 82 if (!nd) 83 return NULL; 84 memcpy(nd, entry, sizeof(struct saved_value)); 85 return &nd->rb_node; 86 } 87 88 static void saved_value_delete(struct rblist *rblist __maybe_unused, 89 struct rb_node *rb_node) 90 { 91 struct saved_value *v; 92 93 BUG_ON(!rb_node); 94 v = container_of(rb_node, struct saved_value, rb_node); 95 free(v); 96 } 97 98 static struct saved_value *saved_value_lookup(struct evsel *evsel, 99 int cpu, 100 bool create, 101 enum stat_type type, 102 int ctx, 103 struct runtime_stat *st) 104 { 105 struct rblist *rblist; 106 struct rb_node *nd; 107 struct saved_value dm = { 108 .cpu = cpu, 109 .evsel = evsel, 110 .type = type, 111 .ctx = ctx, 112 .stat = st, 113 }; 114 115 rblist = &st->value_list; 116 117 nd = rblist__find(rblist, &dm); 118 if (nd) 119 return container_of(nd, struct saved_value, rb_node); 120 if (create) { 121 rblist__add_node(rblist, &dm); 122 nd = rblist__find(rblist, &dm); 123 if (nd) 124 return container_of(nd, struct saved_value, rb_node); 125 } 126 return NULL; 127 } 128 129 void runtime_stat__init(struct runtime_stat *st) 130 { 131 struct rblist *rblist = &st->value_list; 132 133 rblist__init(rblist); 134 rblist->node_cmp = saved_value_cmp; 135 rblist->node_new = saved_value_new; 136 rblist->node_delete = saved_value_delete; 137 } 138 139 void runtime_stat__exit(struct runtime_stat *st) 140 { 141 rblist__exit(&st->value_list); 142 } 143 144 void perf_stat__init_shadow_stats(void) 145 { 146 runtime_stat__init(&rt_stat); 147 } 148 149 static int evsel_context(struct evsel *evsel) 150 { 151 int ctx = 0; 152 153 if (evsel->core.attr.exclude_kernel) 154 ctx |= CTX_BIT_KERNEL; 155 if (evsel->core.attr.exclude_user) 156 ctx |= CTX_BIT_USER; 157 if (evsel->core.attr.exclude_hv) 158 ctx |= CTX_BIT_HV; 159 if (evsel->core.attr.exclude_host) 160 ctx |= CTX_BIT_HOST; 161 if (evsel->core.attr.exclude_idle) 162 ctx |= CTX_BIT_IDLE; 163 164 return ctx; 165 } 166 167 static void reset_stat(struct runtime_stat *st) 168 { 169 struct rblist *rblist; 170 struct rb_node *pos, *next; 171 172 rblist = &st->value_list; 173 next = rb_first_cached(&rblist->entries); 174 while (next) { 175 pos = next; 176 next = rb_next(pos); 177 memset(&container_of(pos, struct saved_value, rb_node)->stats, 178 0, 179 sizeof(struct stats)); 180 } 181 } 182 183 void perf_stat__reset_shadow_stats(void) 184 { 185 reset_stat(&rt_stat); 186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 187 } 188 189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 190 { 191 reset_stat(st); 192 } 193 194 static void update_runtime_stat(struct runtime_stat *st, 195 enum stat_type type, 196 int ctx, int cpu, u64 count) 197 { 198 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 199 type, ctx, st); 200 201 if (v) 202 update_stats(&v->stats, count); 203 } 204 205 /* 206 * Update various tracking values we maintain to print 207 * more semantic information such as miss/hit ratios, 208 * instruction rates, etc: 209 */ 210 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, 211 int cpu, struct runtime_stat *st) 212 { 213 int ctx = evsel_context(counter); 214 u64 count_ns = count; 215 struct saved_value *v; 216 217 count *= counter->scale; 218 219 if (perf_evsel__is_clock(counter)) 220 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 221 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 222 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 223 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 224 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 225 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 226 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 227 else if (perf_stat_evsel__is(counter, ELISION_START)) 228 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 229 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 230 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 231 ctx, cpu, count); 232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 234 ctx, cpu, count); 235 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 236 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 237 ctx, cpu, count); 238 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 239 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 240 ctx, cpu, count); 241 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 242 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 243 ctx, cpu, count); 244 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 245 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 246 ctx, cpu, count); 247 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 248 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 249 ctx, cpu, count); 250 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 251 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 252 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 253 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 254 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 255 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 256 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 257 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 258 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 259 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 260 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 261 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 262 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 263 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 264 else if (perf_stat_evsel__is(counter, SMI_NUM)) 265 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 266 else if (perf_stat_evsel__is(counter, APERF)) 267 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 268 269 if (counter->collect_stat) { 270 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); 271 update_stats(&v->stats, count); 272 if (counter->metric_leader) 273 v->metric_total += count; 274 } else if (counter->metric_leader) { 275 v = saved_value_lookup(counter->metric_leader, 276 cpu, true, STAT_NONE, 0, st); 277 v->metric_total += count; 278 v->metric_other++; 279 } 280 } 281 282 /* used for get_ratio_color() */ 283 enum grc_type { 284 GRC_STALLED_CYCLES_FE, 285 GRC_STALLED_CYCLES_BE, 286 GRC_CACHE_MISSES, 287 GRC_MAX_NR 288 }; 289 290 static const char *get_ratio_color(enum grc_type type, double ratio) 291 { 292 static const double grc_table[GRC_MAX_NR][3] = { 293 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 294 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 295 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 296 }; 297 const char *color = PERF_COLOR_NORMAL; 298 299 if (ratio > grc_table[type][0]) 300 color = PERF_COLOR_RED; 301 else if (ratio > grc_table[type][1]) 302 color = PERF_COLOR_MAGENTA; 303 else if (ratio > grc_table[type][2]) 304 color = PERF_COLOR_YELLOW; 305 306 return color; 307 } 308 309 static struct evsel *perf_stat__find_event(struct evlist *evsel_list, 310 const char *name) 311 { 312 struct evsel *c2; 313 314 evlist__for_each_entry (evsel_list, c2) { 315 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 316 return c2; 317 } 318 return NULL; 319 } 320 321 /* Mark MetricExpr target events and link events using them to them. */ 322 void perf_stat__collect_metric_expr(struct evlist *evsel_list) 323 { 324 struct evsel *counter, *leader, **metric_events, *oc; 325 bool found; 326 const char **metric_names; 327 int i; 328 int num_metric_names; 329 330 evlist__for_each_entry(evsel_list, counter) { 331 bool invalid = false; 332 333 leader = counter->leader; 334 if (!counter->metric_expr) 335 continue; 336 metric_events = counter->metric_events; 337 if (!metric_events) { 338 if (expr__find_other(counter->metric_expr, counter->name, 339 &metric_names, &num_metric_names) < 0) 340 continue; 341 342 metric_events = calloc(sizeof(struct evsel *), 343 num_metric_names + 1); 344 if (!metric_events) 345 return; 346 counter->metric_events = metric_events; 347 } 348 349 for (i = 0; i < num_metric_names; i++) { 350 found = false; 351 if (leader) { 352 /* Search in group */ 353 for_each_group_member (oc, leader) { 354 if (!strcasecmp(oc->name, metric_names[i]) && 355 !oc->collect_stat) { 356 found = true; 357 break; 358 } 359 } 360 } 361 if (!found) { 362 /* Search ignoring groups */ 363 oc = perf_stat__find_event(evsel_list, metric_names[i]); 364 } 365 if (!oc) { 366 /* Deduping one is good enough to handle duplicated PMUs. */ 367 static char *printed; 368 369 /* 370 * Adding events automatically would be difficult, because 371 * it would risk creating groups that are not schedulable. 372 * perf stat doesn't understand all the scheduling constraints 373 * of events. So we ask the user instead to add the missing 374 * events. 375 */ 376 if (!printed || strcasecmp(printed, metric_names[i])) { 377 fprintf(stderr, 378 "Add %s event to groups to get metric expression for %s\n", 379 metric_names[i], 380 counter->name); 381 printed = strdup(metric_names[i]); 382 } 383 invalid = true; 384 continue; 385 } 386 metric_events[i] = oc; 387 oc->collect_stat = true; 388 } 389 metric_events[i] = NULL; 390 free(metric_names); 391 if (invalid) { 392 free(metric_events); 393 counter->metric_events = NULL; 394 counter->metric_expr = NULL; 395 } 396 } 397 } 398 399 static double runtime_stat_avg(struct runtime_stat *st, 400 enum stat_type type, int ctx, int cpu) 401 { 402 struct saved_value *v; 403 404 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 405 if (!v) 406 return 0.0; 407 408 return avg_stats(&v->stats); 409 } 410 411 static double runtime_stat_n(struct runtime_stat *st, 412 enum stat_type type, int ctx, int cpu) 413 { 414 struct saved_value *v; 415 416 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 417 if (!v) 418 return 0.0; 419 420 return v->stats.n; 421 } 422 423 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 424 int cpu, 425 struct evsel *evsel, double avg, 426 struct perf_stat_output_ctx *out, 427 struct runtime_stat *st) 428 { 429 double total, ratio = 0.0; 430 const char *color; 431 int ctx = evsel_context(evsel); 432 433 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 434 435 if (total) 436 ratio = avg / total * 100.0; 437 438 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 439 440 if (ratio) 441 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 442 ratio); 443 else 444 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 445 } 446 447 static void print_stalled_cycles_backend(struct perf_stat_config *config, 448 int cpu, 449 struct evsel *evsel, double avg, 450 struct perf_stat_output_ctx *out, 451 struct runtime_stat *st) 452 { 453 double total, ratio = 0.0; 454 const char *color; 455 int ctx = evsel_context(evsel); 456 457 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 458 459 if (total) 460 ratio = avg / total * 100.0; 461 462 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 463 464 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 465 } 466 467 static void print_branch_misses(struct perf_stat_config *config, 468 int cpu, 469 struct evsel *evsel, 470 double avg, 471 struct perf_stat_output_ctx *out, 472 struct runtime_stat *st) 473 { 474 double total, ratio = 0.0; 475 const char *color; 476 int ctx = evsel_context(evsel); 477 478 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 479 480 if (total) 481 ratio = avg / total * 100.0; 482 483 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 484 485 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 486 } 487 488 static void print_l1_dcache_misses(struct perf_stat_config *config, 489 int cpu, 490 struct evsel *evsel, 491 double avg, 492 struct perf_stat_output_ctx *out, 493 struct runtime_stat *st) 494 495 { 496 double total, ratio = 0.0; 497 const char *color; 498 int ctx = evsel_context(evsel); 499 500 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 501 502 if (total) 503 ratio = avg / total * 100.0; 504 505 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 506 507 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 508 } 509 510 static void print_l1_icache_misses(struct perf_stat_config *config, 511 int cpu, 512 struct evsel *evsel, 513 double avg, 514 struct perf_stat_output_ctx *out, 515 struct runtime_stat *st) 516 517 { 518 double total, ratio = 0.0; 519 const char *color; 520 int ctx = evsel_context(evsel); 521 522 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 523 524 if (total) 525 ratio = avg / total * 100.0; 526 527 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 528 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 529 } 530 531 static void print_dtlb_cache_misses(struct perf_stat_config *config, 532 int cpu, 533 struct evsel *evsel, 534 double avg, 535 struct perf_stat_output_ctx *out, 536 struct runtime_stat *st) 537 { 538 double total, ratio = 0.0; 539 const char *color; 540 int ctx = evsel_context(evsel); 541 542 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 543 544 if (total) 545 ratio = avg / total * 100.0; 546 547 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 548 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 549 } 550 551 static void print_itlb_cache_misses(struct perf_stat_config *config, 552 int cpu, 553 struct evsel *evsel, 554 double avg, 555 struct perf_stat_output_ctx *out, 556 struct runtime_stat *st) 557 { 558 double total, ratio = 0.0; 559 const char *color; 560 int ctx = evsel_context(evsel); 561 562 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 563 564 if (total) 565 ratio = avg / total * 100.0; 566 567 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 568 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 569 } 570 571 static void print_ll_cache_misses(struct perf_stat_config *config, 572 int cpu, 573 struct evsel *evsel, 574 double avg, 575 struct perf_stat_output_ctx *out, 576 struct runtime_stat *st) 577 { 578 double total, ratio = 0.0; 579 const char *color; 580 int ctx = evsel_context(evsel); 581 582 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 583 584 if (total) 585 ratio = avg / total * 100.0; 586 587 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 588 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 589 } 590 591 /* 592 * High level "TopDown" CPU core pipe line bottleneck break down. 593 * 594 * Basic concept following 595 * Yasin, A Top Down Method for Performance analysis and Counter architecture 596 * ISPASS14 597 * 598 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 599 * 600 * Frontend -> Backend -> Retiring 601 * BadSpeculation in addition means out of order execution that is thrown away 602 * (for example branch mispredictions) 603 * Frontend is instruction decoding. 604 * Backend is execution, like computation and accessing data in memory 605 * Retiring is good execution that is not directly bottlenecked 606 * 607 * The formulas are computed in slots. 608 * A slot is an entry in the pipeline each for the pipeline width 609 * (for example a 4-wide pipeline has 4 slots for each cycle) 610 * 611 * Formulas: 612 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 613 * TotalSlots 614 * Retiring = SlotsRetired / TotalSlots 615 * FrontendBound = FetchBubbles / TotalSlots 616 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 617 * 618 * The kernel provides the mapping to the low level CPU events and any scaling 619 * needed for the CPU pipeline width, for example: 620 * 621 * TotalSlots = Cycles * 4 622 * 623 * The scaling factor is communicated in the sysfs unit. 624 * 625 * In some cases the CPU may not be able to measure all the formulas due to 626 * missing events. In this case multiple formulas are combined, as possible. 627 * 628 * Full TopDown supports more levels to sub-divide each area: for example 629 * BackendBound into computing bound and memory bound. For now we only 630 * support Level 1 TopDown. 631 */ 632 633 static double sanitize_val(double x) 634 { 635 if (x < 0 && x >= -0.02) 636 return 0.0; 637 return x; 638 } 639 640 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 641 { 642 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 643 } 644 645 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 646 { 647 double bad_spec = 0; 648 double total_slots; 649 double total; 650 651 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 652 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 653 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 654 655 total_slots = td_total_slots(ctx, cpu, st); 656 if (total_slots) 657 bad_spec = total / total_slots; 658 return sanitize_val(bad_spec); 659 } 660 661 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 662 { 663 double retiring = 0; 664 double total_slots = td_total_slots(ctx, cpu, st); 665 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 666 ctx, cpu); 667 668 if (total_slots) 669 retiring = ret_slots / total_slots; 670 return retiring; 671 } 672 673 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 674 { 675 double fe_bound = 0; 676 double total_slots = td_total_slots(ctx, cpu, st); 677 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 678 ctx, cpu); 679 680 if (total_slots) 681 fe_bound = fetch_bub / total_slots; 682 return fe_bound; 683 } 684 685 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 686 { 687 double sum = (td_fe_bound(ctx, cpu, st) + 688 td_bad_spec(ctx, cpu, st) + 689 td_retiring(ctx, cpu, st)); 690 if (sum == 0) 691 return 0; 692 return sanitize_val(1.0 - sum); 693 } 694 695 static void print_smi_cost(struct perf_stat_config *config, 696 int cpu, struct evsel *evsel, 697 struct perf_stat_output_ctx *out, 698 struct runtime_stat *st) 699 { 700 double smi_num, aperf, cycles, cost = 0.0; 701 int ctx = evsel_context(evsel); 702 const char *color = NULL; 703 704 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 705 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 706 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 707 708 if ((cycles == 0) || (aperf == 0)) 709 return; 710 711 if (smi_num) 712 cost = (aperf - cycles) / aperf * 100.00; 713 714 if (cost > 10) 715 color = PERF_COLOR_RED; 716 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 717 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 718 } 719 720 static void generic_metric(struct perf_stat_config *config, 721 const char *metric_expr, 722 struct evsel **metric_events, 723 char *name, 724 const char *metric_name, 725 const char *metric_unit, 726 double avg, 727 int cpu, 728 struct perf_stat_output_ctx *out, 729 struct runtime_stat *st) 730 { 731 print_metric_t print_metric = out->print_metric; 732 struct parse_ctx pctx; 733 double ratio, scale; 734 int i; 735 void *ctxp = out->ctx; 736 char *n, *pn; 737 738 expr__ctx_init(&pctx); 739 /* Must be first id entry */ 740 expr__add_id(&pctx, name, avg); 741 for (i = 0; metric_events[i]; i++) { 742 struct saved_value *v; 743 struct stats *stats; 744 u64 metric_total = 0; 745 746 if (!strcmp(metric_events[i]->name, "duration_time")) { 747 stats = &walltime_nsecs_stats; 748 scale = 1e-9; 749 } else { 750 v = saved_value_lookup(metric_events[i], cpu, false, 751 STAT_NONE, 0, st); 752 if (!v) 753 break; 754 stats = &v->stats; 755 scale = 1.0; 756 757 if (v->metric_other) 758 metric_total = v->metric_total; 759 } 760 761 n = strdup(metric_events[i]->name); 762 if (!n) 763 return; 764 /* 765 * This display code with --no-merge adds [cpu] postfixes. 766 * These are not supported by the parser. Remove everything 767 * after the space. 768 */ 769 pn = strchr(n, ' '); 770 if (pn) 771 *pn = 0; 772 773 if (metric_total) 774 expr__add_id(&pctx, n, metric_total); 775 else 776 expr__add_id(&pctx, n, avg_stats(stats)*scale); 777 } 778 779 if (!metric_events[i]) { 780 if (expr__parse(&ratio, &pctx, metric_expr) == 0) { 781 char *unit; 782 char metric_bf[64]; 783 784 if (metric_unit && metric_name) { 785 if (perf_pmu__convert_scale(metric_unit, 786 &unit, &scale) >= 0) { 787 ratio *= scale; 788 } 789 790 scnprintf(metric_bf, sizeof(metric_bf), 791 "%s %s", unit, metric_name); 792 print_metric(config, ctxp, NULL, "%8.1f", 793 metric_bf, ratio); 794 } else { 795 print_metric(config, ctxp, NULL, "%8.1f", 796 metric_name ? 797 metric_name : 798 out->force_header ? name : "", 799 ratio); 800 } 801 } else { 802 print_metric(config, ctxp, NULL, NULL, 803 out->force_header ? 804 (metric_name ? metric_name : name) : "", 0); 805 } 806 } else { 807 print_metric(config, ctxp, NULL, NULL, 808 out->force_header ? 809 (metric_name ? metric_name : name) : "", 0); 810 } 811 812 for (i = 1; i < pctx.num_ids; i++) 813 zfree(&pctx.ids[i].name); 814 } 815 816 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 817 struct evsel *evsel, 818 double avg, int cpu, 819 struct perf_stat_output_ctx *out, 820 struct rblist *metric_events, 821 struct runtime_stat *st) 822 { 823 void *ctxp = out->ctx; 824 print_metric_t print_metric = out->print_metric; 825 double total, ratio = 0.0, total2; 826 const char *color = NULL; 827 int ctx = evsel_context(evsel); 828 struct metric_event *me; 829 int num = 1; 830 831 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 832 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 833 834 if (total) { 835 ratio = avg / total; 836 print_metric(config, ctxp, NULL, "%7.2f ", 837 "insn per cycle", ratio); 838 } else { 839 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 840 } 841 842 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 843 ctx, cpu); 844 845 total = max(total, runtime_stat_avg(st, 846 STAT_STALLED_CYCLES_BACK, 847 ctx, cpu)); 848 849 if (total && avg) { 850 out->new_line(config, ctxp); 851 ratio = total / avg; 852 print_metric(config, ctxp, NULL, "%7.2f ", 853 "stalled cycles per insn", 854 ratio); 855 } 856 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 857 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 858 print_branch_misses(config, cpu, evsel, avg, out, st); 859 else 860 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 861 } else if ( 862 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 863 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D | 864 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 865 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 866 867 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 868 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 869 else 870 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 871 } else if ( 872 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 873 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I | 874 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 875 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 876 877 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 878 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 879 else 880 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 881 } else if ( 882 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 883 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 884 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 885 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 886 887 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 888 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 889 else 890 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 891 } else if ( 892 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 893 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 894 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 895 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 896 897 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 898 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 899 else 900 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 901 } else if ( 902 evsel->core.attr.type == PERF_TYPE_HW_CACHE && 903 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL | 904 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 905 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 906 907 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 908 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 909 else 910 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 911 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 912 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 913 914 if (total) 915 ratio = avg * 100 / total; 916 917 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 918 print_metric(config, ctxp, NULL, "%8.3f %%", 919 "of all cache refs", ratio); 920 else 921 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 922 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 923 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 924 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 925 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 926 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 927 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 928 929 if (total) { 930 ratio = avg / total; 931 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 932 } else { 933 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 934 } 935 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 936 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 937 938 if (total) 939 print_metric(config, ctxp, NULL, 940 "%7.2f%%", "transactional cycles", 941 100.0 * (avg / total)); 942 else 943 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 944 0); 945 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 946 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 947 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 948 949 if (total2 < avg) 950 total2 = avg; 951 if (total) 952 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 953 100.0 * ((total2-avg) / total)); 954 else 955 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 956 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 957 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 958 ctx, cpu); 959 960 if (avg) 961 ratio = total / avg; 962 963 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 964 print_metric(config, ctxp, NULL, "%8.0f", 965 "cycles / transaction", ratio); 966 else 967 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 968 0); 969 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 970 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 971 ctx, cpu); 972 973 if (avg) 974 ratio = total / avg; 975 976 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 977 } else if (perf_evsel__is_clock(evsel)) { 978 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 979 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 980 avg / (ratio * evsel->scale)); 981 else 982 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 983 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 984 double fe_bound = td_fe_bound(ctx, cpu, st); 985 986 if (fe_bound > 0.2) 987 color = PERF_COLOR_RED; 988 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 989 fe_bound * 100.); 990 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 991 double retiring = td_retiring(ctx, cpu, st); 992 993 if (retiring > 0.7) 994 color = PERF_COLOR_GREEN; 995 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 996 retiring * 100.); 997 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 998 double bad_spec = td_bad_spec(ctx, cpu, st); 999 1000 if (bad_spec > 0.1) 1001 color = PERF_COLOR_RED; 1002 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 1003 bad_spec * 100.); 1004 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 1005 double be_bound = td_be_bound(ctx, cpu, st); 1006 const char *name = "backend bound"; 1007 static int have_recovery_bubbles = -1; 1008 1009 /* In case the CPU does not support topdown-recovery-bubbles */ 1010 if (have_recovery_bubbles < 0) 1011 have_recovery_bubbles = pmu_have_event("cpu", 1012 "topdown-recovery-bubbles"); 1013 if (!have_recovery_bubbles) 1014 name = "backend bound/bad spec"; 1015 1016 if (be_bound > 0.2) 1017 color = PERF_COLOR_RED; 1018 if (td_total_slots(ctx, cpu, st) > 0) 1019 print_metric(config, ctxp, color, "%8.1f%%", name, 1020 be_bound * 100.); 1021 else 1022 print_metric(config, ctxp, NULL, NULL, name, 0); 1023 } else if (evsel->metric_expr) { 1024 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 1025 evsel->metric_name, NULL, avg, cpu, out, st); 1026 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 1027 char unit = 'M'; 1028 char unit_buf[10]; 1029 1030 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 1031 1032 if (total) 1033 ratio = 1000.0 * avg / total; 1034 if (ratio < 0.001) { 1035 ratio *= 1000; 1036 unit = 'K'; 1037 } 1038 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1039 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1040 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1041 print_smi_cost(config, cpu, evsel, out, st); 1042 } else { 1043 num = 0; 1044 } 1045 1046 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1047 struct metric_expr *mexp; 1048 1049 list_for_each_entry (mexp, &me->head, nd) { 1050 if (num++ > 0) 1051 out->new_line(config, ctxp); 1052 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1053 evsel->name, mexp->metric_name, 1054 mexp->metric_unit, avg, cpu, out, st); 1055 } 1056 } 1057 if (num == 0) 1058 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1059 } 1060