1 // SPDX-License-Identifier: GPL-2.0 2 #include <math.h> 3 #include <stdio.h> 4 #include "evsel.h" 5 #include "stat.h" 6 #include "color.h" 7 #include "debug.h" 8 #include "pmu.h" 9 #include "rblist.h" 10 #include "evlist.h" 11 #include "expr.h" 12 #include "metricgroup.h" 13 #include "cgroup.h" 14 #include "units.h" 15 #include <linux/zalloc.h> 16 #include "iostat.h" 17 #include "util/hashmap.h" 18 19 struct stats walltime_nsecs_stats; 20 struct rusage_stats ru_stats; 21 22 enum { 23 CTX_BIT_USER = 1 << 0, 24 CTX_BIT_KERNEL = 1 << 1, 25 CTX_BIT_HV = 1 << 2, 26 CTX_BIT_HOST = 1 << 3, 27 CTX_BIT_IDLE = 1 << 4, 28 CTX_BIT_MAX = 1 << 5, 29 }; 30 31 enum stat_type { 32 STAT_NONE = 0, 33 STAT_NSECS, 34 STAT_CYCLES, 35 STAT_INSTRUCTIONS, 36 STAT_STALLED_CYCLES_FRONT, 37 STAT_STALLED_CYCLES_BACK, 38 STAT_BRANCHES, 39 STAT_BRANCH_MISS, 40 STAT_CACHE_REFS, 41 STAT_CACHE_MISSES, 42 STAT_L1_DCACHE, 43 STAT_L1_ICACHE, 44 STAT_LL_CACHE, 45 STAT_ITLB_CACHE, 46 STAT_DTLB_CACHE, 47 STAT_L1D_MISS, 48 STAT_L1I_MISS, 49 STAT_LL_MISS, 50 STAT_DTLB_MISS, 51 STAT_ITLB_MISS, 52 STAT_MAX 53 }; 54 55 static int evsel_context(const struct evsel *evsel) 56 { 57 int ctx = 0; 58 59 if (evsel->core.attr.exclude_kernel) 60 ctx |= CTX_BIT_KERNEL; 61 if (evsel->core.attr.exclude_user) 62 ctx |= CTX_BIT_USER; 63 if (evsel->core.attr.exclude_hv) 64 ctx |= CTX_BIT_HV; 65 if (evsel->core.attr.exclude_host) 66 ctx |= CTX_BIT_HOST; 67 if (evsel->core.attr.exclude_idle) 68 ctx |= CTX_BIT_IDLE; 69 70 return ctx; 71 } 72 73 void perf_stat__reset_shadow_stats(void) 74 { 75 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); 76 memset(&ru_stats, 0, sizeof(ru_stats)); 77 } 78 79 static enum stat_type evsel__stat_type(const struct evsel *evsel) 80 { 81 /* Fake perf_hw_cache_op_id values for use with evsel__match. */ 82 u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | 83 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 84 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 85 u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I | 86 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 87 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 88 u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL | 89 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 90 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 91 u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB | 92 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 93 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 94 u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB | 95 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | 96 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); 97 98 if (evsel__is_clock(evsel)) 99 return STAT_NSECS; 100 else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) 101 return STAT_CYCLES; 102 else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) 103 return STAT_INSTRUCTIONS; 104 else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) 105 return STAT_STALLED_CYCLES_FRONT; 106 else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) 107 return STAT_STALLED_CYCLES_BACK; 108 else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS)) 109 return STAT_BRANCHES; 110 else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) 111 return STAT_BRANCH_MISS; 112 else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES)) 113 return STAT_CACHE_REFS; 114 else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) 115 return STAT_CACHE_MISSES; 116 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D)) 117 return STAT_L1_DCACHE; 118 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I)) 119 return STAT_L1_ICACHE; 120 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL)) 121 return STAT_LL_CACHE; 122 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB)) 123 return STAT_DTLB_CACHE; 124 else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB)) 125 return STAT_ITLB_CACHE; 126 else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss)) 127 return STAT_L1D_MISS; 128 else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss)) 129 return STAT_L1I_MISS; 130 else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss)) 131 return STAT_LL_MISS; 132 else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss)) 133 return STAT_DTLB_MISS; 134 else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss)) 135 return STAT_ITLB_MISS; 136 return STAT_NONE; 137 } 138 139 static const char *get_ratio_color(const double ratios[3], double val) 140 { 141 const char *color = PERF_COLOR_NORMAL; 142 143 if (val > ratios[0]) 144 color = PERF_COLOR_RED; 145 else if (val > ratios[1]) 146 color = PERF_COLOR_MAGENTA; 147 else if (val > ratios[2]) 148 color = PERF_COLOR_YELLOW; 149 150 return color; 151 } 152 153 static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) 154 { 155 const struct evsel *cur; 156 int evsel_ctx = evsel_context(evsel); 157 158 evlist__for_each_entry(evsel->evlist, cur) { 159 struct perf_stat_aggr *aggr; 160 161 /* Ignore the evsel that is being searched from. */ 162 if (evsel == cur) 163 continue; 164 165 /* Ignore evsels that are part of different groups. */ 166 if (evsel->core.leader->nr_members > 1 && 167 evsel->core.leader != cur->core.leader) 168 continue; 169 /* Ignore evsels with mismatched modifiers. */ 170 if (evsel_ctx != evsel_context(cur)) 171 continue; 172 /* Ignore if not the cgroup we're looking for. */ 173 if (evsel->cgrp != cur->cgrp) 174 continue; 175 /* Ignore if not the stat we're looking for. */ 176 if (type != evsel__stat_type(cur)) 177 continue; 178 179 aggr = &cur->stats->aggr[aggr_idx]; 180 if (type == STAT_NSECS) 181 return aggr->counts.val; 182 return aggr->counts.val * cur->scale; 183 } 184 return 0.0; 185 } 186 187 static void print_ratio(struct perf_stat_config *config, 188 const struct evsel *evsel, int aggr_idx, 189 double numerator, struct perf_stat_output_ctx *out, 190 enum stat_type denominator_type, 191 const double color_ratios[3], const char *unit) 192 { 193 double denominator = find_stat(evsel, aggr_idx, denominator_type); 194 195 if (numerator && denominator) { 196 double ratio = numerator / denominator * 100.0; 197 const char *color = get_ratio_color(color_ratios, ratio); 198 199 out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); 200 } else 201 out->print_metric(config, out->ctx, NULL, NULL, unit, 0); 202 } 203 204 static void print_stalled_cycles_front(struct perf_stat_config *config, 205 const struct evsel *evsel, 206 int aggr_idx, double stalled, 207 struct perf_stat_output_ctx *out) 208 { 209 static const double color_ratios[3] = {50.0, 30.0, 10.0}; 210 211 print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, 212 "frontend cycles idle"); 213 } 214 215 static void print_stalled_cycles_back(struct perf_stat_config *config, 216 const struct evsel *evsel, 217 int aggr_idx, double stalled, 218 struct perf_stat_output_ctx *out) 219 { 220 static const double color_ratios[3] = {75.0, 50.0, 20.0}; 221 222 print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, 223 "backend cycles idle"); 224 } 225 226 static void print_branch_miss(struct perf_stat_config *config, 227 const struct evsel *evsel, 228 int aggr_idx, double misses, 229 struct perf_stat_output_ctx *out) 230 { 231 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 232 233 print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, 234 "of all branches"); 235 } 236 237 static void print_l1d_miss(struct perf_stat_config *config, 238 const struct evsel *evsel, 239 int aggr_idx, double misses, 240 struct perf_stat_output_ctx *out) 241 { 242 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 243 244 print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, 245 "of all L1-dcache accesses"); 246 } 247 248 static void print_l1i_miss(struct perf_stat_config *config, 249 const struct evsel *evsel, 250 int aggr_idx, double misses, 251 struct perf_stat_output_ctx *out) 252 { 253 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 254 255 print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, 256 "of all L1-icache accesses"); 257 } 258 259 static void print_ll_miss(struct perf_stat_config *config, 260 const struct evsel *evsel, 261 int aggr_idx, double misses, 262 struct perf_stat_output_ctx *out) 263 { 264 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 265 266 print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, 267 "of all L1-icache accesses"); 268 } 269 270 static void print_dtlb_miss(struct perf_stat_config *config, 271 const struct evsel *evsel, 272 int aggr_idx, double misses, 273 struct perf_stat_output_ctx *out) 274 { 275 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 276 277 print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, 278 "of all dTLB cache accesses"); 279 } 280 281 static void print_itlb_miss(struct perf_stat_config *config, 282 const struct evsel *evsel, 283 int aggr_idx, double misses, 284 struct perf_stat_output_ctx *out) 285 { 286 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 287 288 print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, 289 "of all iTLB cache accesses"); 290 } 291 292 static void print_cache_miss(struct perf_stat_config *config, 293 const struct evsel *evsel, 294 int aggr_idx, double misses, 295 struct perf_stat_output_ctx *out) 296 { 297 static const double color_ratios[3] = {20.0, 10.0, 5.0}; 298 299 print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, 300 "of all cache refs"); 301 } 302 303 static void print_instructions(struct perf_stat_config *config, 304 const struct evsel *evsel, 305 int aggr_idx, double instructions, 306 struct perf_stat_output_ctx *out) 307 { 308 print_metric_t print_metric = out->print_metric; 309 void *ctxp = out->ctx; 310 double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES); 311 double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT), 312 find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); 313 314 if (cycles) { 315 print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", 316 instructions / cycles); 317 } else 318 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); 319 320 if (max_stalled && instructions) { 321 out->new_line(config, ctxp); 322 print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", 323 max_stalled / instructions); 324 } 325 } 326 327 static void print_cycles(struct perf_stat_config *config, 328 const struct evsel *evsel, 329 int aggr_idx, double cycles, 330 struct perf_stat_output_ctx *out) 331 { 332 double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); 333 334 if (cycles && nsecs) { 335 double ratio = cycles / nsecs; 336 337 out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); 338 } else 339 out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); 340 } 341 342 static void print_nsecs(struct perf_stat_config *config, 343 const struct evsel *evsel, 344 int aggr_idx __maybe_unused, double nsecs, 345 struct perf_stat_output_ctx *out) 346 { 347 print_metric_t print_metric = out->print_metric; 348 void *ctxp = out->ctx; 349 double wall_time = avg_stats(&walltime_nsecs_stats); 350 351 if (wall_time) { 352 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", 353 nsecs / (wall_time * evsel->scale)); 354 } else 355 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); 356 } 357 358 static int prepare_metric(struct evsel **metric_events, 359 struct metric_ref *metric_refs, 360 struct expr_parse_ctx *pctx, 361 int aggr_idx) 362 { 363 int i; 364 365 for (i = 0; metric_events[i]; i++) { 366 char *n; 367 double val; 368 int source_count = 0; 369 370 if (evsel__is_tool(metric_events[i])) { 371 struct stats *stats; 372 double scale; 373 374 switch (metric_events[i]->tool_event) { 375 case PERF_TOOL_DURATION_TIME: 376 stats = &walltime_nsecs_stats; 377 scale = 1e-9; 378 break; 379 case PERF_TOOL_USER_TIME: 380 stats = &ru_stats.ru_utime_usec_stat; 381 scale = 1e-6; 382 break; 383 case PERF_TOOL_SYSTEM_TIME: 384 stats = &ru_stats.ru_stime_usec_stat; 385 scale = 1e-6; 386 break; 387 case PERF_TOOL_NONE: 388 pr_err("Invalid tool event 'none'"); 389 abort(); 390 case PERF_TOOL_MAX: 391 pr_err("Invalid tool event 'max'"); 392 abort(); 393 default: 394 pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); 395 abort(); 396 } 397 val = avg_stats(stats) * scale; 398 source_count = 1; 399 } else { 400 struct perf_stat_evsel *ps = metric_events[i]->stats; 401 struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; 402 403 if (!aggr) 404 break; 405 406 if (!metric_events[i]->supported) { 407 /* 408 * Not supported events will have a count of 0, 409 * which can be confusing in a 410 * metric. Explicitly set the value to NAN. Not 411 * counted events (enable time of 0) are read as 412 * 0. 413 */ 414 val = NAN; 415 source_count = 0; 416 } else { 417 /* 418 * If an event was scaled during stat gathering, 419 * reverse the scale before computing the 420 * metric. 421 */ 422 val = aggr->counts.val * (1.0 / metric_events[i]->scale); 423 source_count = evsel__source_count(metric_events[i]); 424 } 425 } 426 n = strdup(evsel__metric_id(metric_events[i])); 427 if (!n) 428 return -ENOMEM; 429 430 expr__add_id_val_source_count(pctx, n, val, source_count); 431 } 432 433 for (int j = 0; metric_refs && metric_refs[j].metric_name; j++) { 434 int ret = expr__add_ref(pctx, &metric_refs[j]); 435 436 if (ret) 437 return ret; 438 } 439 440 return i; 441 } 442 443 static void generic_metric(struct perf_stat_config *config, 444 const char *metric_expr, 445 const char *metric_threshold, 446 struct evsel **metric_events, 447 struct metric_ref *metric_refs, 448 char *name, 449 const char *metric_name, 450 const char *metric_unit, 451 int runtime, 452 int aggr_idx, 453 struct perf_stat_output_ctx *out) 454 { 455 print_metric_t print_metric = out->print_metric; 456 struct expr_parse_ctx *pctx; 457 double ratio, scale, threshold; 458 int i; 459 void *ctxp = out->ctx; 460 const char *color = NULL; 461 462 pctx = expr__ctx_new(); 463 if (!pctx) 464 return; 465 466 if (config->user_requested_cpu_list) 467 pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); 468 pctx->sctx.runtime = runtime; 469 pctx->sctx.system_wide = config->system_wide; 470 i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx); 471 if (i < 0) { 472 expr__ctx_free(pctx); 473 return; 474 } 475 if (!metric_events[i]) { 476 if (expr__parse(&ratio, pctx, metric_expr) == 0) { 477 char *unit; 478 char metric_bf[64]; 479 480 if (metric_threshold && 481 expr__parse(&threshold, pctx, metric_threshold) == 0 && 482 !isnan(threshold)) { 483 color = fpclassify(threshold) == FP_ZERO 484 ? PERF_COLOR_GREEN : PERF_COLOR_RED; 485 } 486 487 if (metric_unit && metric_name) { 488 if (perf_pmu__convert_scale(metric_unit, 489 &unit, &scale) >= 0) { 490 ratio *= scale; 491 } 492 if (strstr(metric_expr, "?")) 493 scnprintf(metric_bf, sizeof(metric_bf), 494 "%s %s_%d", unit, metric_name, runtime); 495 else 496 scnprintf(metric_bf, sizeof(metric_bf), 497 "%s %s", unit, metric_name); 498 499 print_metric(config, ctxp, color, "%8.1f", 500 metric_bf, ratio); 501 } else { 502 print_metric(config, ctxp, color, "%8.2f", 503 metric_name ? 504 metric_name : 505 out->force_header ? name : "", 506 ratio); 507 } 508 } else { 509 print_metric(config, ctxp, color, /*unit=*/NULL, 510 out->force_header ? 511 (metric_name ? metric_name : name) : "", 0); 512 } 513 } else { 514 print_metric(config, ctxp, color, /*unit=*/NULL, 515 out->force_header ? 516 (metric_name ? metric_name : name) : "", 0); 517 } 518 519 expr__ctx_free(pctx); 520 } 521 522 double test_generic_metric(struct metric_expr *mexp, int aggr_idx) 523 { 524 struct expr_parse_ctx *pctx; 525 double ratio = 0.0; 526 527 pctx = expr__ctx_new(); 528 if (!pctx) 529 return NAN; 530 531 if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0) 532 goto out; 533 534 if (expr__parse(&ratio, pctx, mexp->metric_expr)) 535 ratio = 0.0; 536 537 out: 538 expr__ctx_free(pctx); 539 return ratio; 540 } 541 542 void perf_stat__print_shadow_stats(struct perf_stat_config *config, 543 struct evsel *evsel, 544 double avg, int aggr_idx, 545 struct perf_stat_output_ctx *out, 546 struct rblist *metric_events) 547 { 548 typedef void (*stat_print_function_t)(struct perf_stat_config *config, 549 const struct evsel *evsel, 550 int aggr_idx, double misses, 551 struct perf_stat_output_ctx *out); 552 static const stat_print_function_t stat_print_function[STAT_MAX] = { 553 [STAT_INSTRUCTIONS] = print_instructions, 554 [STAT_BRANCH_MISS] = print_branch_miss, 555 [STAT_L1D_MISS] = print_l1d_miss, 556 [STAT_L1I_MISS] = print_l1i_miss, 557 [STAT_DTLB_MISS] = print_dtlb_miss, 558 [STAT_ITLB_MISS] = print_itlb_miss, 559 [STAT_LL_MISS] = print_ll_miss, 560 [STAT_CACHE_MISSES] = print_cache_miss, 561 [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front, 562 [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back, 563 [STAT_CYCLES] = print_cycles, 564 [STAT_NSECS] = print_nsecs, 565 }; 566 print_metric_t print_metric = out->print_metric; 567 void *ctxp = out->ctx; 568 struct metric_event *me; 569 int num = 1; 570 571 if (config->iostat_run) { 572 iostat_print_metric(config, evsel, out); 573 } else { 574 stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)]; 575 576 if (fn) 577 fn(config, evsel, aggr_idx, avg, out); 578 else { 579 double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); 580 581 if (nsecs) { 582 char unit = ' '; 583 char unit_buf[10] = "/sec"; 584 double ratio = convert_unit_double(1000000000.0 * avg / nsecs, 585 &unit); 586 587 if (unit != ' ') 588 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 589 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); 590 } else 591 num = 0; 592 } 593 } 594 595 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { 596 struct metric_expr *mexp; 597 598 list_for_each_entry (mexp, &me->head, nd) { 599 if (num++ > 0) 600 out->new_line(config, ctxp); 601 generic_metric(config, mexp->metric_expr, mexp->metric_threshold, 602 mexp->metric_events, mexp->metric_refs, evsel->name, 603 mexp->metric_name, mexp->metric_unit, mexp->runtime, 604 aggr_idx, out); 605 } 606 } 607 if (num == 0) 608 print_metric(config, ctxp, NULL, NULL, NULL, 0); 609 } 610