1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-annotate.c 4 * 5 * Builtin annotate command: Analyze the perf.data input file, 6 * look up and read DSOs and symbol information and display 7 * a histogram of results, along various sorting keys. 8 */ 9 #include "builtin.h" 10 11 #include "util/color.h" 12 #include <linux/list.h> 13 #include "util/cache.h" 14 #include <linux/rbtree.h> 15 #include <linux/zalloc.h> 16 #include "util/symbol.h" 17 18 #include "perf.h" 19 #include "util/debug.h" 20 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/annotate.h" 24 #include "util/event.h" 25 #include <subcmd/parse-options.h> 26 #include "util/parse-events.h" 27 #include "util/sort.h" 28 #include "util/hist.h" 29 #include "util/dso.h" 30 #include "util/map.h" 31 #include "util/session.h" 32 #include "util/tool.h" 33 #include "util/data.h" 34 #include "arch/common.h" 35 #include "util/block-range.h" 36 #include "util/map_symbol.h" 37 #include "util/branch.h" 38 39 #include <dlfcn.h> 40 #include <errno.h> 41 #include <linux/bitmap.h> 42 43 struct perf_annotate { 44 struct perf_tool tool; 45 struct perf_session *session; 46 struct annotation_options opts; 47 bool use_tui, use_stdio, use_stdio2, use_gtk; 48 bool skip_missing; 49 bool has_br_stack; 50 bool group_set; 51 const char *sym_hist_filter; 52 const char *cpu_list; 53 DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 54 }; 55 56 /* 57 * Given one basic block: 58 * 59 * from to branch_i 60 * * ----> * 61 * | 62 * | block 63 * v 64 * * ----> * 65 * from to branch_i+1 66 * 67 * where the horizontal are the branches and the vertical is the executed 68 * block of instructions. 69 * 70 * We count, for each 'instruction', the number of blocks that covered it as 71 * well as count the ratio each branch is taken. 72 * 73 * We can do this without knowing the actual instruction stream by keeping 74 * track of the address ranges. We break down ranges such that there is no 75 * overlap and iterate from the start until the end. 76 * 77 * @acme: once we parse the objdump output _before_ processing the samples, 78 * we can easily fold the branch.cycles IPC bits in. 79 */ 80 static void process_basic_block(struct addr_map_symbol *start, 81 struct addr_map_symbol *end, 82 struct branch_flags *flags) 83 { 84 struct symbol *sym = start->sym; 85 struct annotation *notes = sym ? symbol__annotation(sym) : NULL; 86 struct block_range_iter iter; 87 struct block_range *entry; 88 89 /* 90 * Sanity; NULL isn't executable and the CPU cannot execute backwards 91 */ 92 if (!start->addr || start->addr > end->addr) 93 return; 94 95 iter = block_range__create(start->addr, end->addr); 96 if (!block_range_iter__valid(&iter)) 97 return; 98 99 /* 100 * First block in range is a branch target. 101 */ 102 entry = block_range_iter(&iter); 103 assert(entry->is_target); 104 entry->entry++; 105 106 do { 107 entry = block_range_iter(&iter); 108 109 entry->coverage++; 110 entry->sym = sym; 111 112 if (notes) 113 notes->max_coverage = max(notes->max_coverage, entry->coverage); 114 115 } while (block_range_iter__next(&iter)); 116 117 /* 118 * Last block in rage is a branch. 119 */ 120 entry = block_range_iter(&iter); 121 assert(entry->is_branch); 122 entry->taken++; 123 if (flags->predicted) 124 entry->pred++; 125 } 126 127 static void process_branch_stack(struct branch_stack *bs, struct addr_location *al, 128 struct perf_sample *sample) 129 { 130 struct addr_map_symbol *prev = NULL; 131 struct branch_info *bi; 132 int i; 133 134 if (!bs || !bs->nr) 135 return; 136 137 bi = sample__resolve_bstack(sample, al); 138 if (!bi) 139 return; 140 141 for (i = bs->nr - 1; i >= 0; i--) { 142 /* 143 * XXX filter against symbol 144 */ 145 if (prev) 146 process_basic_block(prev, &bi[i].from, &bi[i].flags); 147 prev = &bi[i].to; 148 } 149 150 free(bi); 151 } 152 153 static int hist_iter__branch_callback(struct hist_entry_iter *iter, 154 struct addr_location *al __maybe_unused, 155 bool single __maybe_unused, 156 void *arg __maybe_unused) 157 { 158 struct hist_entry *he = iter->he; 159 struct branch_info *bi; 160 struct perf_sample *sample = iter->sample; 161 struct evsel *evsel = iter->evsel; 162 int err; 163 164 bi = he->branch_info; 165 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); 166 167 if (err) 168 goto out; 169 170 err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); 171 172 out: 173 return err; 174 } 175 176 static int process_branch_callback(struct evsel *evsel, 177 struct perf_sample *sample, 178 struct addr_location *al __maybe_unused, 179 struct perf_annotate *ann, 180 struct machine *machine) 181 { 182 struct hist_entry_iter iter = { 183 .evsel = evsel, 184 .sample = sample, 185 .add_entry_cb = hist_iter__branch_callback, 186 .hide_unresolved = symbol_conf.hide_unresolved, 187 .ops = &hist_iter_branch, 188 }; 189 190 struct addr_location a; 191 int ret; 192 193 if (machine__resolve(machine, &a, sample) < 0) 194 return -1; 195 196 if (a.sym == NULL) 197 return 0; 198 199 if (a.map != NULL) 200 a.map->dso->hit = 1; 201 202 hist__account_cycles(sample->branch_stack, al, sample, false); 203 204 ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann); 205 return ret; 206 } 207 208 static bool has_annotation(struct perf_annotate *ann) 209 { 210 return ui__has_annotation() || ann->use_stdio2; 211 } 212 213 static int perf_evsel__add_sample(struct evsel *evsel, 214 struct perf_sample *sample, 215 struct addr_location *al, 216 struct perf_annotate *ann, 217 struct machine *machine) 218 { 219 struct hists *hists = evsel__hists(evsel); 220 struct hist_entry *he; 221 int ret; 222 223 if ((!ann->has_br_stack || !has_annotation(ann)) && 224 ann->sym_hist_filter != NULL && 225 (al->sym == NULL || 226 strcmp(ann->sym_hist_filter, al->sym->name) != 0)) { 227 /* We're only interested in a symbol named sym_hist_filter */ 228 /* 229 * FIXME: why isn't this done in the symbol_filter when loading 230 * the DSO? 231 */ 232 if (al->sym != NULL) { 233 rb_erase_cached(&al->sym->rb_node, 234 &al->map->dso->symbols); 235 symbol__delete(al->sym); 236 dso__reset_find_symbol_cache(al->map->dso); 237 } 238 return 0; 239 } 240 241 /* 242 * XXX filtered samples can still have branch entires pointing into our 243 * symbol and are missed. 244 */ 245 process_branch_stack(sample->branch_stack, al, sample); 246 247 if (ann->has_br_stack && has_annotation(ann)) 248 return process_branch_callback(evsel, sample, al, ann, machine); 249 250 he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); 251 if (he == NULL) 252 return -ENOMEM; 253 254 ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); 255 hists__inc_nr_samples(hists, true); 256 return ret; 257 } 258 259 static int process_sample_event(struct perf_tool *tool, 260 union perf_event *event, 261 struct perf_sample *sample, 262 struct evsel *evsel, 263 struct machine *machine) 264 { 265 struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); 266 struct addr_location al; 267 int ret = 0; 268 269 if (machine__resolve(machine, &al, sample) < 0) { 270 pr_warning("problem processing %d event, skipping it.\n", 271 event->header.type); 272 return -1; 273 } 274 275 if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) 276 goto out_put; 277 278 if (!al.filtered && 279 perf_evsel__add_sample(evsel, sample, &al, ann, machine)) { 280 pr_warning("problem incrementing symbol count, " 281 "skipping event\n"); 282 ret = -1; 283 } 284 out_put: 285 addr_location__put(&al); 286 return ret; 287 } 288 289 static int process_feature_event(struct perf_session *session, 290 union perf_event *event) 291 { 292 if (event->feat.feat_id < HEADER_LAST_FEATURE) 293 return perf_event__process_feature(session, event); 294 return 0; 295 } 296 297 static int hist_entry__tty_annotate(struct hist_entry *he, 298 struct evsel *evsel, 299 struct perf_annotate *ann) 300 { 301 if (!ann->use_stdio2) 302 return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts); 303 304 return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts); 305 } 306 307 static void hists__find_annotations(struct hists *hists, 308 struct evsel *evsel, 309 struct perf_annotate *ann) 310 { 311 struct rb_node *nd = rb_first_cached(&hists->entries), *next; 312 int key = K_RIGHT; 313 314 while (nd) { 315 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 316 struct annotation *notes; 317 318 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned) 319 goto find_next; 320 321 if (ann->sym_hist_filter && 322 (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0)) 323 goto find_next; 324 325 notes = symbol__annotation(he->ms.sym); 326 if (notes->src == NULL) { 327 find_next: 328 if (key == K_LEFT) 329 nd = rb_prev(nd); 330 else 331 nd = rb_next(nd); 332 continue; 333 } 334 335 if (use_browser == 2) { 336 int ret; 337 int (*annotate)(struct hist_entry *he, 338 struct evsel *evsel, 339 struct hist_browser_timer *hbt); 340 341 annotate = dlsym(perf_gtk_handle, 342 "hist_entry__gtk_annotate"); 343 if (annotate == NULL) { 344 ui__error("GTK browser not found!\n"); 345 return; 346 } 347 348 ret = annotate(he, evsel, NULL); 349 if (!ret || !ann->skip_missing) 350 return; 351 352 /* skip missing symbols */ 353 nd = rb_next(nd); 354 } else if (use_browser == 1) { 355 key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); 356 357 switch (key) { 358 case -1: 359 if (!ann->skip_missing) 360 return; 361 /* fall through */ 362 case K_RIGHT: 363 next = rb_next(nd); 364 break; 365 case K_LEFT: 366 next = rb_prev(nd); 367 break; 368 default: 369 return; 370 } 371 372 if (next != NULL) 373 nd = next; 374 } else { 375 hist_entry__tty_annotate(he, evsel, ann); 376 nd = rb_next(nd); 377 /* 378 * Since we have a hist_entry per IP for the same 379 * symbol, free he->ms.sym->src to signal we already 380 * processed this symbol. 381 */ 382 zfree(¬es->src->cycles_hist); 383 zfree(¬es->src); 384 } 385 } 386 } 387 388 static int __cmd_annotate(struct perf_annotate *ann) 389 { 390 int ret; 391 struct perf_session *session = ann->session; 392 struct evsel *pos; 393 u64 total_nr_samples; 394 395 if (ann->cpu_list) { 396 ret = perf_session__cpu_bitmap(session, ann->cpu_list, 397 ann->cpu_bitmap); 398 if (ret) 399 goto out; 400 } 401 402 if (!ann->opts.objdump_path) { 403 ret = perf_env__lookup_objdump(&session->header.env, 404 &ann->opts.objdump_path); 405 if (ret) 406 goto out; 407 } 408 409 ret = perf_session__process_events(session); 410 if (ret) 411 goto out; 412 413 if (dump_trace) { 414 perf_session__fprintf_nr_events(session, stdout); 415 perf_evlist__fprintf_nr_events(session->evlist, stdout); 416 goto out; 417 } 418 419 if (verbose > 3) 420 perf_session__fprintf(session, stdout); 421 422 if (verbose > 2) 423 perf_session__fprintf_dsos(session, stdout); 424 425 total_nr_samples = 0; 426 evlist__for_each_entry(session->evlist, pos) { 427 struct hists *hists = evsel__hists(pos); 428 u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 429 430 if (nr_samples > 0) { 431 total_nr_samples += nr_samples; 432 hists__collapse_resort(hists, NULL); 433 /* Don't sort callchain */ 434 perf_evsel__reset_sample_bit(pos, CALLCHAIN); 435 perf_evsel__output_resort(pos, NULL); 436 437 if (symbol_conf.event_group && 438 !perf_evsel__is_group_leader(pos)) 439 continue; 440 441 hists__find_annotations(hists, pos, ann); 442 } 443 } 444 445 if (total_nr_samples == 0) { 446 ui__error("The %s data has no samples!\n", session->data->path); 447 goto out; 448 } 449 450 if (use_browser == 2) { 451 void (*show_annotations)(void); 452 453 show_annotations = dlsym(perf_gtk_handle, 454 "perf_gtk__show_annotations"); 455 if (show_annotations == NULL) { 456 ui__error("GTK browser not found!\n"); 457 goto out; 458 } 459 show_annotations(); 460 } 461 462 out: 463 return ret; 464 } 465 466 static const char * const annotate_usage[] = { 467 "perf annotate [<options>]", 468 NULL 469 }; 470 471 int cmd_annotate(int argc, const char **argv) 472 { 473 struct perf_annotate annotate = { 474 .tool = { 475 .sample = process_sample_event, 476 .mmap = perf_event__process_mmap, 477 .mmap2 = perf_event__process_mmap2, 478 .comm = perf_event__process_comm, 479 .exit = perf_event__process_exit, 480 .fork = perf_event__process_fork, 481 .namespaces = perf_event__process_namespaces, 482 .attr = perf_event__process_attr, 483 .build_id = perf_event__process_build_id, 484 .tracing_data = perf_event__process_tracing_data, 485 .feature = process_feature_event, 486 .ordered_events = true, 487 .ordering_requires_timestamps = true, 488 }, 489 .opts = annotation__default_options, 490 }; 491 struct perf_data data = { 492 .mode = PERF_DATA_MODE_READ, 493 }; 494 struct option options[] = { 495 OPT_STRING('i', "input", &input_name, "file", 496 "input file name"), 497 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 498 "only consider symbols in these dsos"), 499 OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol", 500 "symbol to annotate"), 501 OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), 502 OPT_INCR('v', "verbose", &verbose, 503 "be more verbose (show symbol address, etc)"), 504 OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"), 505 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 506 "dump raw trace in ASCII"), 507 OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"), 508 OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"), 509 OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"), 510 OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"), 511 OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, 512 "don't load vmlinux even if found"), 513 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 514 "file", "vmlinux pathname"), 515 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 516 "load module symbols - WARNING: use only with -k and LIVE kernel"), 517 OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, 518 "print matching source lines (may be slow)"), 519 OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, 520 "Don't shorten the displayed pathnames"), 521 OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, 522 "Skip symbols that cannot be annotated"), 523 OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, 524 &annotate.group_set, 525 "Show event group information together"), 526 OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), 527 OPT_CALLBACK(0, "symfs", NULL, "directory", 528 "Look for files with symbols relative to this directory", 529 symbol__config_symfs), 530 OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, 531 "Interleave source code with assembly code (default)"), 532 OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, 533 "Display raw encoding of assembly instructions (default)"), 534 OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style", 535 "Specify disassembler style (e.g. -M intel for intel syntax)"), 536 OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", 537 "objdump binary to use for disassembly and annotations"), 538 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 539 "Show event group information together"), 540 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 541 "Show a column with the sum of periods"), 542 OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, 543 "Show a column with the number of samples"), 544 OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", 545 "'always' (default), 'never' or 'auto' only applicable to --stdio mode", 546 stdio__config_color, "always"), 547 OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", 548 "Set percent type local/global-period/hits", 549 annotate_parse_percent_type), 550 551 OPT_END() 552 }; 553 int ret; 554 555 set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); 556 set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); 557 558 559 ret = hists__init(); 560 if (ret < 0) 561 return ret; 562 563 argc = parse_options(argc, argv, options, annotate_usage, 0); 564 if (argc) { 565 /* 566 * Special case: if there's an argument left then assume that 567 * it's a symbol filter: 568 */ 569 if (argc > 1) 570 usage_with_options(annotate_usage, options); 571 572 annotate.sym_hist_filter = argv[0]; 573 } 574 575 if (symbol_conf.show_nr_samples && annotate.use_gtk) { 576 pr_err("--show-nr-samples is not available in --gtk mode at this time\n"); 577 return ret; 578 } 579 580 if (quiet) 581 perf_quiet_option(); 582 583 data.path = input_name; 584 585 annotate.session = perf_session__new(&data, false, &annotate.tool); 586 if (annotate.session == NULL) 587 return -1; 588 589 annotate.has_br_stack = perf_header__has_feat(&annotate.session->header, 590 HEADER_BRANCH_STACK); 591 592 if (annotate.group_set) 593 perf_evlist__force_leader(annotate.session->evlist); 594 595 ret = symbol__annotation_init(); 596 if (ret < 0) 597 goto out_delete; 598 599 annotation_config__init(); 600 601 symbol_conf.try_vmlinux_path = true; 602 603 ret = symbol__init(&annotate.session->header.env); 604 if (ret < 0) 605 goto out_delete; 606 607 if (annotate.use_stdio || annotate.use_stdio2) 608 use_browser = 0; 609 else if (annotate.use_tui) 610 use_browser = 1; 611 else if (annotate.use_gtk) 612 use_browser = 2; 613 614 setup_browser(true); 615 616 if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) { 617 sort__mode = SORT_MODE__BRANCH; 618 if (setup_sorting(annotate.session->evlist) < 0) 619 usage_with_options(annotate_usage, options); 620 } else { 621 if (setup_sorting(NULL) < 0) 622 usage_with_options(annotate_usage, options); 623 } 624 625 ret = __cmd_annotate(&annotate); 626 627 out_delete: 628 /* 629 * Speed up the exit process, for large files this can 630 * take quite a while. 631 * 632 * XXX Enable this when using valgrind or if we ever 633 * librarize this command. 634 * 635 * Also experiment with obstacks to see how much speed 636 * up we'll get here. 637 * 638 * perf_session__delete(session); 639 */ 640 return ret; 641 } 642