1 // SPDX-License-Identifier: GPL-2.0 2 #include <stdio.h> 3 #include "evsel.h" 4 #include "stat.h" 5 #include "color.h" 6 #include "pmu.h" 7 #include "rblist.h" 8 #include "evlist.h" 9 #include "expr.h" 10 #include "metricgroup.h" 11 #include <linux/zalloc.h> 12 13 /* 14 * AGGR_GLOBAL: Use CPU 0 15 * AGGR_SOCKET: Use first CPU of socket 16 * AGGR_DIE: Use first CPU of die 17 * AGGR_CORE: Use first CPU of core 18 * AGGR_NONE: Use matching CPU 19 * AGGR_THREAD: Not supported? 20 */ 21 static bool have_frontend_stalled; 22 23 struct runtime_stat rt_stat; 24 struct stats walltime_nsecs_stats; 25 26 struct saved_value { 27 struct rb_node rb_node; 28 struct perf_evsel *evsel; 29 enum stat_type type; 30 int ctx; 31 int cpu; 32 struct runtime_stat *stat; 33 struct stats stats; 34 }; 35 36 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 37 { 38 struct saved_value *a = container_of(rb_node, 39 struct saved_value, 40 rb_node); 41 const struct saved_value *b = entry; 42 43 if (a->cpu != b->cpu) 44 return a->cpu - b->cpu; 45 46 /* 47 * Previously the rbtree was used to link generic metrics. 48 * The keys were evsel/cpu. Now the rbtree is extended to support 49 * per-thread shadow stats. For shadow stats case, the keys 50 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 51 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 52 */ 53 if (a->type != b->type) 54 return a->type - b->type; 55 56 if (a->ctx != b->ctx) 57 return a->ctx - b->ctx; 58 59 if (a->evsel == NULL && b->evsel == NULL) { 60 if (a->stat == b->stat) 61 return 0; 62 63 if ((char *)a->stat < (char *)b->stat) 64 return -1; 65 66 return 1; 67 } 68 69 if (a->evsel == b->evsel) 70 return 0; 71 if ((char *)a->evsel < (char *)b->evsel) 72 return -1; 73 return +1; 74 } 75 76 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 77 const void *entry) 78 { 79 struct saved_value *nd = malloc(sizeof(struct saved_value)); 80 81 if (!nd) 82 return NULL; 83 memcpy(nd, entry, sizeof(struct saved_value)); 84 return &nd->rb_node; 85 } 86 87 static void saved_value_delete(struct rblist *rblist __maybe_unused, 88 struct rb_node *rb_node) 89 { 90 struct saved_value *v; 91 92 BUG_ON(!rb_node); 93 v = container_of(rb_node, struct saved_value, rb_node); 94 free(v); 95 } 96 97 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, 98 int cpu, 99 bool create, 100 enum stat_type type, 101 int ctx, 102 struct runtime_stat *st) 103 { 104 struct rblist *rblist; 105 struct rb_node *nd; 106 struct saved_value dm = { 107 .cpu = cpu, 108 .evsel = evsel, 109 .type = type, 110 .ctx = ctx, 111 .stat = st, 112 }; 113 114 rblist = &st->value_list; 115 116 nd = rblist__find(rblist, &dm); 117 if (nd) 118 return container_of(nd, struct saved_value, rb_node); 119 if (create) { 120 rblist__add_node(rblist, &dm); 121 nd = rblist__find(rblist, &dm); 122 if (nd) 123 return container_of(nd, struct saved_value, rb_node); 124 } 125 return NULL; 126 } 127 128 void runtime_stat__init(struct runtime_stat *st) 129 { 130 struct rblist *rblist = &st->value_list; 131 132 rblist__init(rblist); 133 rblist->node_cmp = saved_value_cmp; 134 rblist->node_new = saved_value_new; 135 rblist->node_delete = saved_value_delete; 136 } 137 138 void runtime_stat__exit(struct runtime_stat *st) 139 { 140 rblist__exit(&st->value_list); 141 } 142 143 void perf_stat__init_shadow_stats(void) 144 { 145 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); 146 runtime_stat__init(&rt_stat); 147 } 148 149 static int evsel_context(struct perf_evsel *evsel) 150 { 151 int ctx = 0; 152 153 if (evsel->attr.exclude_kernel) 154 ctx |= CTX_BIT_KERNEL; 155 if (evsel->attr.exclude_user) 156 ctx |= CTX_BIT_USER; 157 if (evsel->attr.exclude_hv) 158 ctx |= CTX_BIT_HV; 159 if (evsel->attr.exclude_host) 160 ctx |= CTX_BIT_HOST; 161 if (evsel->attr.exclude_idle) 162 ctx |= CTX_BIT_IDLE; 163 164 return ctx; 165 } 166 167 static void reset_stat(struct runtime_stat *st) 168 { 169 struct rblist *rblist; 170 struct rb_node *pos, *next; 171 172 rblist = &st->value_list; 173 next = rb_first_cached(&rblist->entries); 174 while (next) { 175 pos = next; 176 next = rb_next(pos); 177 memset(&container_of(pos, struct saved_value, rb_node)->stats, 178 0, 179 sizeof(struct stats)); 180 } 181 } 182 183 void perf_stat__reset_shadow_stats(void) 184 { 185 reset_stat(&rt_stat); 186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 187 } 188 189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) 190 { 191 reset_stat(st); 192 } 193 194 static void update_runtime_stat(struct runtime_stat *st, 195 enum stat_type type, 196 int ctx, int cpu, u64 count) 197 { 198 struct saved_value *v = saved_value_lookup(NULL, cpu, true, 199 type, ctx, st); 200 201 if (v) 202 update_stats(&v->stats, count); 203 } 204 205 /* 206 * Update various tracking values we maintain to print 207 * more semantic information such as miss/hit ratios, 208 * instruction rates, etc: 209 */ 210 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, 211 int cpu, struct runtime_stat *st) 212 { 213 int ctx = evsel_context(counter); 214 u64 count_ns = count; 215 216 count *= counter->scale; 217 218 if (perf_evsel__is_clock(counter)) 219 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); 220 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 221 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); 222 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) 223 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); 224 else if (perf_stat_evsel__is(counter, TRANSACTION_START)) 225 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); 226 else if (perf_stat_evsel__is(counter, ELISION_START)) 227 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); 228 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) 229 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, 230 ctx, cpu, count); 231 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) 232 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, 233 ctx, cpu, count); 234 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) 235 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, 236 ctx, cpu, count); 237 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) 238 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, 239 ctx, cpu, count); 240 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) 241 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, 242 ctx, cpu, count); 243 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 244 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, 245 ctx, cpu, count); 246 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 247 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, 248 ctx, cpu, count); 249 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 250 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); 251 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 252 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); 253 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 254 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); 255 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 256 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); 257 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 258 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); 259 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 260 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); 261 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 262 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); 263 else if (perf_stat_evsel__is(counter, SMI_NUM)) 264 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); 265 else if (perf_stat_evsel__is(counter, APERF)) 266 update_runtime_stat(st, STAT_APERF, ctx, cpu, count); 267 268 if (counter->collect_stat) { 269 struct saved_value *v = saved_value_lookup(counter, cpu, true, 270 STAT_NONE, 0, st); 271 update_stats(&v->stats, count); 272 } 273 } 274 275 /* used for get_ratio_color() */ 276 enum grc_type { 277 GRC_STALLED_CYCLES_FE, 278 GRC_STALLED_CYCLES_BE, 279 GRC_CACHE_MISSES, 280 GRC_MAX_NR 281 }; 282 283 static const char *get_ratio_color(enum grc_type type, double ratio) 284 { 285 static const double grc_table[GRC_MAX_NR][3] = { 286 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, 287 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, 288 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, 289 }; 290 const char *color = PERF_COLOR_NORMAL; 291 292 if (ratio > grc_table[type][0]) 293 color = PERF_COLOR_RED; 294 else if (ratio > grc_table[type][1]) 295 color = PERF_COLOR_MAGENTA; 296 else if (ratio > grc_table[type][2]) 297 color = PERF_COLOR_YELLOW; 298 299 return color; 300 } 301 302 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, 303 const char *name) 304 { 305 struct perf_evsel *c2; 306 307 evlist__for_each_entry (evsel_list, c2) { 308 if (!strcasecmp(c2->name, name) && !c2->collect_stat) 309 return c2; 310 } 311 return NULL; 312 } 313 314 /* Mark MetricExpr target events and link events using them to them. */ 315 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) 316 { 317 struct perf_evsel *counter, *leader, **metric_events, *oc; 318 bool found; 319 const char **metric_names; 320 int i; 321 int num_metric_names; 322 323 evlist__for_each_entry(evsel_list, counter) { 324 bool invalid = false; 325 326 leader = counter->leader; 327 if (!counter->metric_expr) 328 continue; 329 metric_events = counter->metric_events; 330 if (!metric_events) { 331 if (expr__find_other(counter->metric_expr, counter->name, 332 &metric_names, &num_metric_names) < 0) 333 continue; 334 335 metric_events = calloc(sizeof(struct perf_evsel *), 336 num_metric_names + 1); 337 if (!metric_events) 338 return; 339 counter->metric_events = metric_events; 340 } 341 342 for (i = 0; i < num_metric_names; i++) { 343 found = false; 344 if (leader) { 345 /* Search in group */ 346 for_each_group_member (oc, leader) { 347 if (!strcasecmp(oc->name, metric_names[i]) && 348 !oc->collect_stat) { 349 found = true; 350 break; 351 } 352 } 353 } 354 if (!found) { 355 /* Search ignoring groups */ 356 oc = perf_stat__find_event(evsel_list, metric_names[i]); 357 } 358 if (!oc) { 359 /* Deduping one is good enough to handle duplicated PMUs. */ 360 static char *printed; 361 362 /* 363 * Adding events automatically would be difficult, because 364 * it would risk creating groups that are not schedulable. 365 * perf stat doesn't understand all the scheduling constraints 366 * of events. So we ask the user instead to add the missing 367 * events. 368 */ 369 if (!printed || strcasecmp(printed, metric_names[i])) { 370 fprintf(stderr, 371 "Add %s event to groups to get metric expression for %s\n", 372 metric_names[i], 373 counter->name); 374 printed = strdup(metric_names[i]); 375 } 376 invalid = true; 377 continue; 378 } 379 metric_events[i] = oc; 380 oc->collect_stat = true; 381 } 382 metric_events[i] = NULL; 383 free(metric_names); 384 if (invalid) { 385 free(metric_events); 386 counter->metric_events = NULL; 387 counter->metric_expr = NULL; 388 } 389 } 390 } 391 392 static double runtime_stat_avg(struct runtime_stat *st, 393 enum stat_type type, int ctx, int cpu) 394 { 395 struct saved_value *v; 396 397 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 398 if (!v) 399 return 0.0; 400 401 return avg_stats(&v->stats); 402 } 403 404 static double runtime_stat_n(struct runtime_stat *st, 405 enum stat_type type, int ctx, int cpu) 406 { 407 struct saved_value *v; 408 409 v = saved_value_lookup(NULL, cpu, false, type, ctx, st); 410 if (!v) 411 return 0.0; 412 413 return v->stats.n; 414 } 415 416 static void print_stalled_cycles_frontend(struct perf_stat_config *config, 417 int cpu, 418 struct perf_evsel *evsel, double avg, 419 struct perf_stat_output_ctx *out, 420 struct runtime_stat *st) 421 { 422 double total, ratio = 0.0; 423 const char *color; 424 int ctx = evsel_context(evsel); 425 426 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 427 428 if (total) 429 ratio = avg / total * 100.0; 430 431 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); 432 433 if (ratio) 434 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", 435 ratio); 436 else 437 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); 438 } 439 440 static void print_stalled_cycles_backend(struct perf_stat_config *config, 441 int cpu, 442 struct perf_evsel *evsel, double avg, 443 struct perf_stat_output_ctx *out, 444 struct runtime_stat *st) 445 { 446 double total, ratio = 0.0; 447 const char *color; 448 int ctx = evsel_context(evsel); 449 450 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 451 452 if (total) 453 ratio = avg / total * 100.0; 454 455 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); 456 457 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); 458 } 459 460 static void print_branch_misses(struct perf_stat_config *config, 461 int cpu, 462 struct perf_evsel *evsel, 463 double avg, 464 struct perf_stat_output_ctx *out, 465 struct runtime_stat *st) 466 { 467 double total, ratio = 0.0; 468 const char *color; 469 int ctx = evsel_context(evsel); 470 471 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); 472 473 if (total) 474 ratio = avg / total * 100.0; 475 476 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 477 478 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); 479 } 480 481 static void print_l1_dcache_misses(struct perf_stat_config *config, 482 int cpu, 483 struct perf_evsel *evsel, 484 double avg, 485 struct perf_stat_output_ctx *out, 486 struct runtime_stat *st) 487 488 { 489 double total, ratio = 0.0; 490 const char *color; 491 int ctx = evsel_context(evsel); 492 493 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); 494 495 if (total) 496 ratio = avg / total * 100.0; 497 498 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 499 500 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); 501 } 502 503 static void print_l1_icache_misses(struct perf_stat_config *config, 504 int cpu, 505 struct perf_evsel *evsel, 506 double avg, 507 struct perf_stat_output_ctx *out, 508 struct runtime_stat *st) 509 510 { 511 double total, ratio = 0.0; 512 const char *color; 513 int ctx = evsel_context(evsel); 514 515 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); 516 517 if (total) 518 ratio = avg / total * 100.0; 519 520 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 521 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); 522 } 523 524 static void print_dtlb_cache_misses(struct perf_stat_config *config, 525 int cpu, 526 struct perf_evsel *evsel, 527 double avg, 528 struct perf_stat_output_ctx *out, 529 struct runtime_stat *st) 530 { 531 double total, ratio = 0.0; 532 const char *color; 533 int ctx = evsel_context(evsel); 534 535 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); 536 537 if (total) 538 ratio = avg / total * 100.0; 539 540 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 541 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); 542 } 543 544 static void print_itlb_cache_misses(struct perf_stat_config *config, 545 int cpu, 546 struct perf_evsel *evsel, 547 double avg, 548 struct perf_stat_output_ctx *out, 549 struct runtime_stat *st) 550 { 551 double total, ratio = 0.0; 552 const char *color; 553 int ctx = evsel_context(evsel); 554 555 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); 556 557 if (total) 558 ratio = avg / total * 100.0; 559 560 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 561 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); 562 } 563 564 static void print_ll_cache_misses(struct perf_stat_config *config, 565 int cpu, 566 struct perf_evsel *evsel, 567 double avg, 568 struct perf_stat_output_ctx *out, 569 struct runtime_stat *st) 570 { 571 double total, ratio = 0.0; 572 const char *color; 573 int ctx = evsel_context(evsel); 574 575 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); 576 577 if (total) 578 ratio = avg / total * 100.0; 579 580 color = get_ratio_color(GRC_CACHE_MISSES, ratio); 581 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); 582 } 583 584 /* 585 * High level "TopDown" CPU core pipe line bottleneck break down. 586 * 587 * Basic concept following 588 * Yasin, A Top Down Method for Performance analysis and Counter architecture 589 * ISPASS14 590 * 591 * The CPU pipeline is divided into 4 areas that can be bottlenecks: 592 * 593 * Frontend -> Backend -> Retiring 594 * BadSpeculation in addition means out of order execution that is thrown away 595 * (for example branch mispredictions) 596 * Frontend is instruction decoding. 597 * Backend is execution, like computation and accessing data in memory 598 * Retiring is good execution that is not directly bottlenecked 599 * 600 * The formulas are computed in slots. 601 * A slot is an entry in the pipeline each for the pipeline width 602 * (for example a 4-wide pipeline has 4 slots for each cycle) 603 * 604 * Formulas: 605 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / 606 * TotalSlots 607 * Retiring = SlotsRetired / TotalSlots 608 * FrontendBound = FetchBubbles / TotalSlots 609 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound 610 * 611 * The kernel provides the mapping to the low level CPU events and any scaling 612 * needed for the CPU pipeline width, for example: 613 * 614 * TotalSlots = Cycles * 4 615 * 616 * The scaling factor is communicated in the sysfs unit. 617 * 618 * In some cases the CPU may not be able to measure all the formulas due to 619 * missing events. In this case multiple formulas are combined, as possible. 620 * 621 * Full TopDown supports more levels to sub-divide each area: for example 622 * BackendBound into computing bound and memory bound. For now we only 623 * support Level 1 TopDown. 624 */ 625 626 static double sanitize_val(double x) 627 { 628 if (x < 0 && x >= -0.02) 629 return 0.0; 630 return x; 631 } 632 633 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) 634 { 635 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); 636 } 637 638 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) 639 { 640 double bad_spec = 0; 641 double total_slots; 642 double total; 643 644 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - 645 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + 646 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); 647 648 total_slots = td_total_slots(ctx, cpu, st); 649 if (total_slots) 650 bad_spec = total / total_slots; 651 return sanitize_val(bad_spec); 652 } 653 654 static double td_retiring(int ctx, int cpu, struct runtime_stat *st) 655 { 656 double retiring = 0; 657 double total_slots = td_total_slots(ctx, cpu, st); 658 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, 659 ctx, cpu); 660 661 if (total_slots) 662 retiring = ret_slots / total_slots; 663 return retiring; 664 } 665 666 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) 667 { 668 double fe_bound = 0; 669 double total_slots = td_total_slots(ctx, cpu, st); 670 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, 671 ctx, cpu); 672 673 if (total_slots) 674 fe_bound = fetch_bub / total_slots; 675 return fe_bound; 676 } 677 678 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) 679 { 680 double sum = (td_fe_bound(ctx, cpu, st) + 681 td_bad_spec(ctx, cpu, st) + 682 td_retiring(ctx, cpu, st)); 683 if (sum == 0) 684 return 0; 685 return sanitize_val(1.0 - sum); 686 } 687 688 static void print_smi_cost(struct perf_stat_config *config, 689 int cpu, struct perf_evsel *evsel, 690 struct perf_stat_output_ctx *out, 691 struct runtime_stat *st) 692 { 693 double smi_num, aperf, cycles, cost = 0.0; 694 int ctx = evsel_context(evsel); 695 const char *color = NULL; 696 697 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); 698 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); 699 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 700 701 if ((cycles == 0) || (aperf == 0)) 702 return; 703 704 if (smi_num) 705 cost = (aperf - cycles) / aperf * 100.00; 706 707 if (cost > 10) 708 color = PERF_COLOR_RED; 709 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); 710 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); 711 } 712 713 static void generic_metric(struct perf_stat_config *config, 714 const char *metric_expr, 715 struct perf_evsel **metric_events, 716 char *name, 717 const char *metric_name, 718 double avg, 719 int cpu, 720 struct perf_stat_output_ctx *out, 721 struct runtime_stat *st) 722 { 723 print_metric_t print_metric = out->print_metric; 724 struct parse_ctx pctx; 725 double ratio; 726 int i; 727 void *ctxp = out->ctx; 728 char *n, *pn; 729 730 expr__ctx_init(&pctx); 731 expr__add_id(&pctx, name, avg); 732 for (i = 0; metric_events[i]; i++) { 733 struct saved_value *v; 734 struct stats *stats; 735 double scale; 736 737 if (!strcmp(metric_events[i]->name, "duration_time")) { 738 stats = &walltime_nsecs_stats; 739 scale = 1e-9; 740 } else { 741 v = saved_value_lookup(metric_events[i], cpu, false, 742 STAT_NONE, 0, st); 743 if (!v) 744 break; 745 stats = &v->stats; 746 scale = 1.0; 747 } 748 749 n = strdup(metric_events[i]->name); 750 if (!n) 751 return; 752 /* 753 * This display code with --no-merge adds [cpu] postfixes. 754 * These are not supported by the parser. Remove everything 755 * after the space. 756 */ 757 pn = strchr(n, ' '); 758 if (pn) 759 *pn = 0; 760 expr__add_id(&pctx, n, avg_stats(stats)*scale); 761 } 762 if (!metric_events[i]) { 763 const char *p = metric_expr; 764 765 if (expr__parse(&ratio, &pctx, &p) == 0) 766 print_metric(config, ctxp, NULL, "%8.1f", 767 metric_name ? 768 metric_name : 769 out->force_header ? name : "", 770 ratio); 771 else 772 print_metric(config, ctxp, NULL, NULL, 773 out->force_header ? 774 (metric_name ? metric_name : name) : "", 0); 775 } else 776 print_metric(config, ctxp, NULL, NULL, "", 0); 777 778 for (i = 1; i < pctx.num_ids; i++) 779 zfree(&pctx.ids[i].name); 780 } 781 782 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 783 struct perf_evsel *evsel, 784 double avg, int cpu, 785 struct perf_stat_output_ctx *out, 786 struct rblist *metric_events, 787 struct runtime_stat *st) 788 { 789 void *ctxp = out->ctx; 790 print_metric_t print_metric = out->print_metric; 791 double total, ratio = 0.0, total2; 792 const char *color = NULL; 793 int ctx = evsel_context(evsel); 794 struct metric_event *me; 795 int num = 1; 796 797 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { 798 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 799 800 if (total) { 801 ratio = avg / total; 802 print_metric(config, ctxp, NULL, "%7.2f ", 803 "insn per cycle", ratio); 804 } else { 805 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 806 } 807 808 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, 809 ctx, cpu); 810 811 total = max(total, runtime_stat_avg(st, 812 STAT_STALLED_CYCLES_BACK, 813 ctx, cpu)); 814 815 if (total && avg) { 816 out->new_line(config, ctxp); 817 ratio = total / avg; 818 print_metric(config, ctxp, NULL, "%7.2f ", 819 "stalled cycles per insn", 820 ratio); 821 } else if (have_frontend_stalled) { 822 out->new_line(config, ctxp); 823 print_metric(config, ctxp, NULL, "%7.2f ", 824 "stalled cycles per insn", 0); 825 } 826 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { 827 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) 828 print_branch_misses(config, cpu, evsel, avg, out, st); 829 else 830 print_metric(config, ctxp, NULL, NULL, "of all branches", 0); 831 } else if ( 832 evsel->attr.type == PERF_TYPE_HW_CACHE && 833 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | 834 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 835 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 836 837 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) 838 print_l1_dcache_misses(config, cpu, evsel, avg, out, st); 839 else 840 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); 841 } else if ( 842 evsel->attr.type == PERF_TYPE_HW_CACHE && 843 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | 844 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 845 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 846 847 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) 848 print_l1_icache_misses(config, cpu, evsel, avg, out, st); 849 else 850 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); 851 } else if ( 852 evsel->attr.type == PERF_TYPE_HW_CACHE && 853 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | 854 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 855 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 856 857 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) 858 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); 859 else 860 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); 861 } else if ( 862 evsel->attr.type == PERF_TYPE_HW_CACHE && 863 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | 864 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 865 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 866 867 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) 868 print_itlb_cache_misses(config, cpu, evsel, avg, out, st); 869 else 870 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); 871 } else if ( 872 evsel->attr.type == PERF_TYPE_HW_CACHE && 873 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | 874 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 875 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { 876 877 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) 878 print_ll_cache_misses(config, cpu, evsel, avg, out, st); 879 else 880 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); 881 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { 882 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); 883 884 if (total) 885 ratio = avg * 100 / total; 886 887 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) 888 print_metric(config, ctxp, NULL, "%8.3f %%", 889 "of all cache refs", ratio); 890 else 891 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); 892 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { 893 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); 894 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { 895 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); 896 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { 897 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 898 899 if (total) { 900 ratio = avg / total; 901 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); 902 } else { 903 print_metric(config, ctxp, NULL, NULL, "Ghz", 0); 904 } 905 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { 906 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 907 908 if (total) 909 print_metric(config, ctxp, NULL, 910 "%7.2f%%", "transactional cycles", 911 100.0 * (avg / total)); 912 else 913 print_metric(config, ctxp, NULL, NULL, "transactional cycles", 914 0); 915 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { 916 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); 917 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); 918 919 if (total2 < avg) 920 total2 = avg; 921 if (total) 922 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 923 100.0 * ((total2-avg) / total)); 924 else 925 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); 926 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { 927 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 928 ctx, cpu); 929 930 if (avg) 931 ratio = total / avg; 932 933 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) 934 print_metric(config, ctxp, NULL, "%8.0f", 935 "cycles / transaction", ratio); 936 else 937 print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 938 0); 939 } else if (perf_stat_evsel__is(evsel, ELISION_START)) { 940 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, 941 ctx, cpu); 942 943 if (avg) 944 ratio = total / avg; 945 946 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); 947 } else if (perf_evsel__is_clock(evsel)) { 948 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) 949 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 950 avg / (ratio * evsel->scale)); 951 else 952 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 953 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { 954 double fe_bound = td_fe_bound(ctx, cpu, st); 955 956 if (fe_bound > 0.2) 957 color = PERF_COLOR_RED; 958 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", 959 fe_bound * 100.); 960 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { 961 double retiring = td_retiring(ctx, cpu, st); 962 963 if (retiring > 0.7) 964 color = PERF_COLOR_GREEN; 965 print_metric(config, ctxp, color, "%8.1f%%", "retiring", 966 retiring * 100.); 967 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { 968 double bad_spec = td_bad_spec(ctx, cpu, st); 969 970 if (bad_spec > 0.1) 971 color = PERF_COLOR_RED; 972 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", 973 bad_spec * 100.); 974 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { 975 double be_bound = td_be_bound(ctx, cpu, st); 976 const char *name = "backend bound"; 977 static int have_recovery_bubbles = -1; 978 979 /* In case the CPU does not support topdown-recovery-bubbles */ 980 if (have_recovery_bubbles < 0) 981 have_recovery_bubbles = pmu_have_event("cpu", 982 "topdown-recovery-bubbles"); 983 if (!have_recovery_bubbles) 984 name = "backend bound/bad spec"; 985 986 if (be_bound > 0.2) 987 color = PERF_COLOR_RED; 988 if (td_total_slots(ctx, cpu, st) > 0) 989 print_metric(config, ctxp, color, "%8.1f%%", name, 990 be_bound * 100.); 991 else 992 print_metric(config, ctxp, NULL, NULL, name, 0); 993 } else if (evsel->metric_expr) { 994 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, 995 evsel->metric_name, avg, cpu, out, st); 996 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { 997 char unit = 'M'; 998 char unit_buf[10]; 999 1000 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); 1001 1002 if (total) 1003 ratio = 1000.0 * avg / total; 1004 if (ratio < 0.001) { 1005 ratio *= 1000; 1006 unit = 'K'; 1007 } 1008 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 1009 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 1010 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { 1011 print_smi_cost(config, cpu, evsel, out, st); 1012 } else { 1013 num = 0; 1014 } 1015 1016 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 1017 struct metric_expr *mexp; 1018 1019 list_for_each_entry (mexp, &me->head, nd) { 1020 if (num++ > 0) 1021 out->new_line(config, ctxp); 1022 generic_metric(config, mexp->metric_expr, mexp->metric_events, 1023 evsel->name, mexp->metric_name, 1024 avg, cpu, out, st); 1025 } 1026 } 1027 if (num == 0) 1028 print_metric(config, ctxp, NULL, NULL, NULL, 0); 1029 } 1030