1 // SPDX-License-Identifier: GPL-2.0 2 #include <math.h> 3 #include <stdio.h> 4 #include "evsel.h" 5 #include "stat.h" 6 #include "color.h" 7 #include "debug.h" 8 #include "pmu.h" 9 #include "rblist.h" 10 #include "evlist.h" 11 #include "expr.h" 12 #include "metricgroup.h" 13 #include "cgroup.h" 14 #include "units.h" 15 #include <linux/zalloc.h> 16 #include "iostat.h" 17 #include "util/hashmap.h" 18 19 /* 20 * AGGR_GLOBAL: Use CPU 0 21 * AGGR_SOCKET: Use first CPU of socket 22 * AGGR_DIE: Use first CPU of die 23 * AGGR_CORE: Use first CPU of core 24 * AGGR_NONE: Use matching CPU 25 * AGGR_THREAD: Not supported? 26 */ 27 28 struct stats walltime_nsecs_stats; 29 struct rusage_stats ru_stats; 30 31 static struct runtime_stat { 32 struct rblist value_list; 33 } rt_stat; 34 35 enum { 36 CTX_BIT_USER = 1 << 0, 37 CTX_BIT_KERNEL = 1 << 1, 38 CTX_BIT_HV = 1 << 2, 39 CTX_BIT_HOST = 1 << 3, 40 CTX_BIT_IDLE = 1 << 4, 41 CTX_BIT_MAX = 1 << 5, 42 }; 43 44 enum stat_type { 45 STAT_NONE = 0, 46 STAT_NSECS, 47 STAT_CYCLES, 48 STAT_INSTRUCTIONS, 49 STAT_STALLED_CYCLES_FRONT, 50 STAT_STALLED_CYCLES_BACK, 51 STAT_BRANCHES, 52 STAT_BRANCH_MISS, 53 STAT_CACHE_REFS, 54 STAT_CACHE_MISSES, 55 STAT_L1_DCACHE, 56 STAT_L1_ICACHE, 57 STAT_LL_CACHE, 58 STAT_ITLB_CACHE, 59 STAT_DTLB_CACHE, 60 STAT_L1D_MISS, 61 STAT_L1I_MISS, 62 STAT_LL_MISS, 63 STAT_DTLB_MISS, 64 STAT_ITLB_MISS, 65 STAT_MAX 66 }; 67 68 struct saved_value { 69 struct rb_node rb_node; 70 struct evsel *evsel; 71 enum stat_type type; 72 int ctx; 73 int map_idx; /* cpu or thread map index */ 74 struct cgroup *cgrp; 75 struct stats stats; 76 u64 metric_total; 77 int metric_other; 78 }; 79 80 static int saved_value_cmp(struct rb_node *rb_node, const void *entry) 81 { 82 struct saved_value *a = container_of(rb_node, 83 struct saved_value, 84 rb_node); 85 const struct saved_value *b = entry; 86 87 if (a->map_idx != b->map_idx) 88 return a->map_idx - b->map_idx; 89 90 /* 91 * Previously the rbtree was used to link generic metrics. 92 * The keys were evsel/cpu. Now the rbtree is extended to support 93 * per-thread shadow stats. For shadow stats case, the keys 94 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics 95 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). 96 */ 97 if (a->type != b->type) 98 return a->type - b->type; 99 100 if (a->ctx != b->ctx) 101 return a->ctx - b->ctx; 102 103 if (a->cgrp != b->cgrp) 104 return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; 105 106 if (a->evsel == b->evsel) 107 return 0; 108 if ((char *)a->evsel < (char *)b->evsel) 109 return -1; 110 return +1; 111 } 112 113 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, 114 const void *entry) 115 { 116 struct saved_value *nd = malloc(sizeof(struct saved_value)); 117 118 if (!nd) 119 return NULL; 120 memcpy(nd, entry, sizeof(struct saved_value)); 121 return &nd->rb_node; 122 } 123 124 static void saved_value_delete(struct rblist *rblist __maybe_unused, 125 struct rb_node *rb_node) 126 { 127 struct saved_value *v; 128 129 BUG_ON(!rb_node); 130 v = container_of(rb_node, struct saved_value, rb_node); 131 free(v); 132 } 133 134 static struct saved_value *saved_value_lookup(struct evsel *evsel, 135 int map_idx, 136 bool create, 137 enum stat_type type, 138 int ctx, 139 struct cgroup *cgrp) 140 { 141 struct rblist *rblist; 142 struct rb_node *nd; 143 struct saved_value dm = { 144 .map_idx = map_idx, 145 .evsel = evsel, 146 .type = type, 147 .ctx = ctx, 148 .cgrp = cgrp, 149 }; 150 151 rblist = &rt_stat.value_list; 152 153 /* don't use context info for clock events */ 154 if (type == STAT_NSECS) 155 dm.ctx = 0; 156 157 nd = rblist__find(rblist, &dm); 158 if (nd) 159 return container_of(nd, struct saved_value, rb_node); 160 if (create) { 161 rblist__add_node(rblist, &dm); 162 nd = rblist__find(rblist, &dm); 163 if (nd) 164 return container_of(nd, struct saved_value, rb_node); 165 } 166 return NULL; 167 } 168 169 void perf_stat__init_shadow_stats(void) 170 { 171 struct rblist *rblist = &rt_stat.value_list; 172 173 rblist__init(rblist); 174 rblist->node_cmp = saved_value_cmp; 175 rblist->node_new = saved_value_new; 176 rblist->node_delete = saved_value_delete; 177 } 178 179 static int evsel_context(const struct evsel *evsel) 180 { 181 int ctx = 0; 182 183 if (evsel->core.attr.exclude_kernel) 184 ctx |= CTX_BIT_KERNEL; 185 if (evsel->core.attr.exclude_user) 186 ctx |= CTX_BIT_USER; 187 if (evsel->core.attr.exclude_hv) 188 ctx |= CTX_BIT_HV; 189 if (evsel->core.attr.exclude_host) 190 ctx |= CTX_BIT_HOST; 191 if (evsel->core.attr.exclude_idle) 192 ctx |= CTX_BIT_IDLE; 193 194 return ctx; 195 } 196 197 void perf_stat__reset_shadow_per_stat(void) 198 { 199 struct rblist *rblist; 200 struct rb_node *pos, *next; 201 202 rblist = &rt_stat.value_list; 203 next = rb_first_cached(&rblist->entries); 204 while (next) { 205 pos = next; 206 next = rb_next(pos); 207 memset(&container_of(pos, struct saved_value, rb_node)->stats, 208 0, 209 sizeof(struct stats)); 210 } 211 } 212 213 void perf_stat__reset_shadow_stats(void) 214 { 215 perf_stat__reset_shadow_per_stat(); 216 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 217 memset(&ru_stats, 0, sizeof(ru_stats)); 218 } 219 220 struct runtime_stat_data { 221 int ctx; 222 struct cgroup *cgrp; 223 }; 224 225 static void update_runtime_stat(enum stat_type type, 226 int map_idx, u64 count, 227 struct runtime_stat_data *rsd) 228 { 229 struct saved_value *v = saved_value_lookup(NULL, map_idx, true, type, 230 rsd->ctx, rsd->cgrp); 231 232 if (v) 233 update_stats(&v->stats, count); 234 } 235 236 /* 237 * Update various tracking values we maintain to print 238 * more semantic information such as miss/hit ratios, 239 * instruction rates, etc: 240 */ 241 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, 242 int aggr_idx) 243 { 244 u64 count_ns = count; 245 struct runtime_stat_data rsd = { 246 .ctx = evsel_context(counter), 247 .cgrp = counter->cgrp, 248 }; 249 count *= counter->scale; 250 251 if (evsel__is_clock(counter)) 252 update_runtime_stat(STAT_NSECS, aggr_idx, count_ns, &rsd); 253 else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) 254 update_runtime_stat(STAT_CYCLES, aggr_idx, count, &rsd); 255 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 256 update_runtime_stat(STAT_STALLED_CYCLES_FRONT, 257 aggr_idx, count, &rsd); 258 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 259 update_runtime_stat(STAT_STALLED_CYCLES_BACK, 260 aggr_idx, count, &rsd); 261 else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 262 update_runtime_stat(STAT_BRANCHES, aggr_idx, count, &rsd); 263 else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) 264 update_runtime_stat(STAT_CACHE_REFS, aggr_idx, count, &rsd); 265 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) 266 update_runtime_stat(STAT_L1_DCACHE, aggr_idx, count, &rsd); 267 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) 268 update_runtime_stat(STAT_L1_ICACHE, aggr_idx, count, &rsd); 269 else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) 270 update_runtime_stat(STAT_LL_CACHE, aggr_idx, count, &rsd); 271 else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) 272 update_runtime_stat(STAT_DTLB_CACHE, aggr_idx, count, &rsd); 273 else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 274 update_runtime_stat(STAT_ITLB_CACHE, aggr_idx, count, &rsd); 275 } 276 277 static enum stat_type evsel__stat_type(const struct evsel *evsel) 278 { 279 /* Fake perf_hw_cache_op_id values for use with evsel__match. */ 280 u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | 281 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 282 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 283 u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I | 284 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 285 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 286 u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL | 287 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 288 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 289 u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB | 290 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 291 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 292 u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB | 293 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 294 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 295 296 if (evsel__is_clock(evsel)) 297 return STAT_NSECS; 298 else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) 299 return STAT_CYCLES; 300 else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) 301 return STAT_INSTRUCTIONS; 302 else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 303 return STAT_STALLED_CYCLES_FRONT; 304 else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 305 return STAT_STALLED_CYCLES_BACK; 306 else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 307 return STAT_BRANCHES; 308 else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) 309 return STAT_BRANCH_MISS; 310 else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES)) 311 return STAT_CACHE_REFS; 312 else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) 313 return STAT_CACHE_MISSES; 314 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D)) 315 return STAT_L1_DCACHE; 316 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I)) 317 return STAT_L1_ICACHE; 318 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL)) 319 return STAT_LL_CACHE; 320 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB)) 321 return STAT_DTLB_CACHE; 322 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB)) 323 return STAT_ITLB_CACHE; 324 else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss)) 325 return STAT_L1D_MISS; 326 else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss)) 327 return STAT_L1I_MISS; 328 else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss)) 329 return STAT_LL_MISS; 330 else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss)) 331 return STAT_DTLB_MISS; 332 else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss)) 333 return STAT_ITLB_MISS; 334 return STAT_NONE; 335 } 336 337 static const char *get_ratio_color(const double ratios[3], double val) 338 { 339 const char *color = PERF_COLOR_NORMAL; 340 341 if (val > ratios[0]) 342 color = PERF_COLOR_RED; 343 else if (val > ratios[1]) 344 color = PERF_COLOR_MAGENTA; 345 else if (val > ratios[2]) 346 color = PERF_COLOR_YELLOW; 347 348 return color; 349 } 350 351 static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) 352 { 353 const struct evsel *cur; 354 int evsel_ctx = evsel_context(evsel); 355 356 evlist__for_each_entry(evsel->evlist, cur) { 357 struct perf_stat_aggr *aggr; 358 359 /* Ignore the evsel that is being searched from. */ 360 if (evsel == cur) 361 continue; 362 363 /* Ignore evsels that are part of different groups. */ 364 if (evsel->core.leader->nr_members && 365 evsel->core.leader != cur->core.leader) 366 continue; 367 /* Ignore evsels with mismatched modifiers. */ 368 if (evsel_ctx != evsel_context(cur)) 369 continue; 370 /* Ignore if not the cgroup we're looking for. */ 371 if (evsel->cgrp != cur->cgrp) 372 continue; 373 /* Ignore if not the stat we're looking for. */ 374 if (type != evsel__stat_type(cur)) 375 continue; 376 377 aggr = &cur->stats->aggr[aggr_idx]; 378 if (type == STAT_NSECS) 379 return aggr->counts.val; 380 return aggr->counts.val * cur->scale; 381 } 382 return 0.0; 383 } 384 385 static void print_ratio(struct perf_stat_config *config, 386 const struct evsel *evsel, int aggr_idx, 387 double numerator, struct perf_stat_output_ctx *out, 388 enum stat_type denominator_type, 389 const double color_ratios[3], const char *unit) 390 { 391 double denominator = find_stat(evsel, aggr_idx, denominator_type); 392 393 if (numerator && denominator) { 394 double ratio = numerator / denominator * 100.0; 395 const char *color = get_ratio_color(color_ratios, ratio); 396 397 out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); 398 } else 399 out->print_metric(config, out->ctx, NULL, NULL, unit, 0); 400 } 401 402 static void print_stalled_cycles_front(struct perf_stat_config *config, 403 const struct evsel *evsel, 404 int aggr_idx, double stalled, 405 struct perf_stat_output_ctx *out) 406 { 407 static const double color_ratios[3] = {50.0, 30.0, 10.0}; 408 409 print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, 410 "frontend cycles idle"); 411 } 412 413 static void print_stalled_cycles_back(struct perf_stat_config *config, 414 const struct evsel *evsel, 415 int aggr_idx, double stalled, 416 struct perf_stat_output_ctx *out) 417 { 418 static const double color_ratios[3] = {75.0, 50.0, 20.0}; 419 420 print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, 421 "backend cycles idle"); 422 } 423 424 static void print_branch_miss(struct perf_stat_config *config, 425 const struct evsel *evsel, 426 int aggr_idx, double misses, 427 struct perf_stat_output_ctx *out) 428 { 429 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 430 431 print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, 432 "of all branches"); 433 } 434 435 static void print_l1d_miss(struct perf_stat_config *config, 436 const struct evsel *evsel, 437 int aggr_idx, double misses, 438 struct perf_stat_output_ctx *out) 439 { 440 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 441 442 print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, 443 "of all L1-dcache accesses"); 444 } 445 446 static void print_l1i_miss(struct perf_stat_config *config, 447 const struct evsel *evsel, 448 int aggr_idx, double misses, 449 struct perf_stat_output_ctx *out) 450 { 451 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 452 453 print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, 454 "of all L1-icache accesses"); 455 } 456 457 static void print_ll_miss(struct perf_stat_config *config, 458 const struct evsel *evsel, 459 int aggr_idx, double misses, 460 struct perf_stat_output_ctx *out) 461 { 462 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 463 464 print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, 465 "of all L1-icache accesses"); 466 } 467 468 static void print_dtlb_miss(struct perf_stat_config *config, 469 const struct evsel *evsel, 470 int aggr_idx, double misses, 471 struct perf_stat_output_ctx *out) 472 { 473 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 474 475 print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, 476 "of all dTLB cache accesses"); 477 } 478 479 static void print_itlb_miss(struct perf_stat_config *config, 480 const struct evsel *evsel, 481 int aggr_idx, double misses, 482 struct perf_stat_output_ctx *out) 483 { 484 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 485 486 print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, 487 "of all iTLB cache accesses"); 488 } 489 490 static void print_cache_miss(struct perf_stat_config *config, 491 const struct evsel *evsel, 492 int aggr_idx, double misses, 493 struct perf_stat_output_ctx *out) 494 { 495 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 496 497 print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, 498 "of all cache refs"); 499 } 500 501 static void print_instructions(struct perf_stat_config *config, 502 const struct evsel *evsel, 503 int aggr_idx, double instructions, 504 struct perf_stat_output_ctx *out) 505 { 506 print_metric_t print_metric = out->print_metric; 507 void *ctxp = out->ctx; 508 double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES); 509 double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT), 510 find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); 511 512 if (cycles) { 513 print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", 514 instructions / cycles); 515 } else 516 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 517 518 if (max_stalled && instructions) { 519 out->new_line(config, ctxp); 520 print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", 521 max_stalled / instructions); 522 } 523 } 524 525 static void print_cycles(struct perf_stat_config *config, 526 const struct evsel *evsel, 527 int aggr_idx, double cycles, 528 struct perf_stat_output_ctx *out) 529 { 530 double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); 531 532 if (cycles && nsecs) { 533 double ratio = cycles / nsecs; 534 535 out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); 536 } else 537 out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); 538 } 539 540 static void print_nsecs(struct perf_stat_config *config, 541 const struct evsel *evsel, 542 int aggr_idx __maybe_unused, double nsecs, 543 struct perf_stat_output_ctx *out) 544 { 545 print_metric_t print_metric = out->print_metric; 546 void *ctxp = out->ctx; 547 double wall_time = avg_stats(&walltime_nsecs_stats); 548 549 if (wall_time) { 550 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 551 nsecs / (wall_time * evsel->scale)); 552 } else 553 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 554 } 555 556 static int prepare_metric(struct evsel **metric_events, 557 struct metric_ref *metric_refs, 558 struct expr_parse_ctx *pctx, 559 int aggr_idx) 560 { 561 int i; 562 563 for (i = 0; metric_events[i]; i++) { 564 char *n; 565 double val; 566 int source_count = 0; 567 568 if (evsel__is_tool(metric_events[i])) { 569 struct stats *stats; 570 double scale; 571 572 switch (metric_events[i]->tool_event) { 573 case PERF_TOOL_DURATION_TIME: 574 stats = &walltime_nsecs_stats; 575 scale = 1e-9; 576 break; 577 case PERF_TOOL_USER_TIME: 578 stats = &ru_stats.ru_utime_usec_stat; 579 scale = 1e-6; 580 break; 581 case PERF_TOOL_SYSTEM_TIME: 582 stats = &ru_stats.ru_stime_usec_stat; 583 scale = 1e-6; 584 break; 585 case PERF_TOOL_NONE: 586 pr_err("Invalid tool event 'none'"); 587 abort(); 588 case PERF_TOOL_MAX: 589 pr_err("Invalid tool event 'max'"); 590 abort(); 591 default: 592 pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); 593 abort(); 594 } 595 val = avg_stats(stats) * scale; 596 source_count = 1; 597 } else { 598 struct perf_stat_evsel *ps = metric_events[i]->stats; 599 struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; 600 601 if (!aggr) 602 break; 603 604 /* 605 * If an event was scaled during stat gathering, reverse 606 * the scale before computing the metric. 607 */ 608 val = aggr->counts.val * (1.0 / metric_events[i]->scale); 609 source_count = evsel__source_count(metric_events[i]); 610 } 611 n = strdup(evsel__metric_id(metric_events[i])); 612 if (!n) 613 return -ENOMEM; 614 615 expr__add_id_val_source_count(pctx, n, val, source_count); 616 } 617 618 for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) { 619 int ret = expr__add_ref(pctx, &metric_refs[j]); 620 621 if (ret) 622 return ret; 623 } 624 625 return i; 626 } 627 628 static void generic_metric(struct perf_stat_config *config, 629 const char *metric_expr, 630 const char *metric_threshold, 631 struct evsel **metric_events, 632 struct metric_ref *metric_refs, 633 char *name, 634 const char *metric_name, 635 const char *metric_unit, 636 int runtime, 637 int aggr_idx, 638 struct perf_stat_output_ctx *out) 639 { 640 print_metric_t print_metric = out->print_metric; 641 struct expr_parse_ctx *pctx; 642 double ratio, scale, threshold; 643 int i; 644 void *ctxp = out->ctx; 645 const char *color = NULL; 646 647 pctx = expr__ctx_new(); 648 if (!pctx) 649 return; 650 651 if (config->user_requested_cpu_list) 652 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); 653 pctx->sctx.runtime = runtime; 654 pctx->sctx.system_wide = config->system_wide; 655 i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx); 656 if (i < 0) { 657 expr__ctx_free(pctx); 658 return; 659 } 660 if (!metric_events[i]) { 661 if (expr__parse(&ratio, pctx, metric_expr) == 0) { 662 char *unit; 663 char metric_bf[64]; 664 665 if (metric_threshold && 666 expr__parse(&threshold, pctx, metric_threshold) == 0 && 667 !isnan(threshold)) { 668 color = fpclassify(threshold) == FP_ZERO 669 ? PERF_COLOR_GREEN : PERF_COLOR_RED; 670 } 671 672 if (metric_unit && metric_name) { 673 if (perf_pmu__convert_scale(metric_unit, 674 &unit, &scale) >= 0) { 675 ratio *= scale; 676 } 677 if (strstr(metric_expr, "?")) 678 scnprintf(metric_bf, sizeof(metric_bf), 679 "%s %s_%d", unit, metric_name, runtime); 680 else 681 scnprintf(metric_bf, sizeof(metric_bf), 682 "%s %s", unit, metric_name); 683 684 print_metric(config, ctxp, color, "%8.1f", 685 metric_bf, ratio); 686 } else { 687 print_metric(config, ctxp, color, "%8.2f", 688 metric_name ? 689 metric_name : 690 out->force_header ? name : "", 691 ratio); 692 } 693 } else { 694 print_metric(config, ctxp, color, /*unit=*/NULL, 695 out->force_header ? 696 (metric_name ? metric_name : name) : "", 0); 697 } 698 } else { 699 print_metric(config, ctxp, color, /*unit=*/NULL, 700 out->force_header ? 701 (metric_name ? metric_name : name) : "", 0); 702 } 703 704 expr__ctx_free(pctx); 705 } 706 707 double test_generic_metric(struct metric_expr *mexp, int aggr_idx) 708 { 709 struct expr_parse_ctx *pctx; 710 double ratio = 0.0; 711 712 pctx = expr__ctx_new(); 713 if (!pctx) 714 return NAN; 715 716 if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0) 717 goto out; 718 719 if (expr__parse(&ratio, pctx, mexp->metric_expr)) 720 ratio = 0.0; 721 722 out: 723 expr__ctx_free(pctx); 724 return ratio; 725 } 726 727 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 728 struct evsel *evsel, 729 double avg, int aggr_idx, 730 struct perf_stat_output_ctx *out, 731 struct rblist *metric_events) 732 { 733 typedef void (*stat_print_function_t)(struct perf_stat_config *config, 734 const struct evsel *evsel, 735 int aggr_idx, double misses, 736 struct perf_stat_output_ctx *out); 737 static const stat_print_function_t stat_print_function[STAT_MAX] = { 738 [STAT_INSTRUCTIONS] = print_instructions, 739 [STAT_BRANCH_MISS] = print_branch_miss, 740 [STAT_L1D_MISS] = print_l1d_miss, 741 [STAT_L1I_MISS] = print_l1i_miss, 742 [STAT_DTLB_MISS] = print_dtlb_miss, 743 [STAT_ITLB_MISS] = print_itlb_miss, 744 [STAT_LL_MISS] = print_ll_miss, 745 [STAT_CACHE_MISSES] = print_cache_miss, 746 [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front, 747 [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back, 748 [STAT_CYCLES] = print_cycles, 749 [STAT_NSECS] = print_nsecs, 750 }; 751 print_metric_t print_metric = out->print_metric; 752 void *ctxp = out->ctx; 753 struct metric_event *me; 754 int num = 1; 755 756 if (config->iostat_run) { 757 iostat_print_metric(config, evsel, out); 758 } else { 759 stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)]; 760 761 if (fn) 762 fn(config, evsel, aggr_idx, avg, out); 763 else { 764 double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); 765 766 if (nsecs) { 767 char unit = ' '; 768 char unit_buf[10] = "/sec"; 769 double ratio = convert_unit_double(1000000000.0 * avg / nsecs, 770 &unit); 771 772 if (unit != ' ') 773 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 774 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 775 } else 776 num = 0; 777 } 778 } 779 780 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 781 struct metric_expr *mexp; 782 783 list_for_each_entry (mexp, &me->head, nd) { 784 if (num++ > 0) 785 out->new_line(config, ctxp); 786 generic_metric(config, mexp->metric_expr, mexp->metric_threshold, 787 mexp->metric_events, mexp->metric_refs, evsel->name, 788 mexp->metric_name, mexp->metric_unit, mexp->runtime, 789 aggr_idx, out); 790 } 791 } 792 if (num == 0) 793 print_metric(config, ctxp, NULL, NULL, NULL, 0); 794 } 795