1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <locale.h> 49 #include <poll.h> 50 #include <unistd.h> 51 #include <sched.h> 52 #include <signal.h> 53 #include <sys/mman.h> 54 #include <sys/wait.h> 55 #include <linux/time64.h> 56 57 struct switch_output { 58 bool enabled; 59 bool signal; 60 unsigned long size; 61 unsigned long time; 62 const char *str; 63 bool set; 64 }; 65 66 struct record { 67 struct perf_tool tool; 68 struct record_opts opts; 69 u64 bytes_written; 70 struct perf_data data; 71 struct auxtrace_record *itr; 72 struct perf_evlist *evlist; 73 struct perf_session *session; 74 int realtime_prio; 75 bool no_buildid; 76 bool no_buildid_set; 77 bool no_buildid_cache; 78 bool no_buildid_cache_set; 79 bool buildid_all; 80 bool timestamp_filename; 81 bool timestamp_boundary; 82 struct switch_output switch_output; 83 unsigned long long samples; 84 }; 85 86 static volatile int auxtrace_record__snapshot_started; 87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 88 static DEFINE_TRIGGER(switch_output_trigger); 89 90 static bool switch_output_signal(struct record *rec) 91 { 92 return rec->switch_output.signal && 93 trigger_is_ready(&switch_output_trigger); 94 } 95 96 static bool switch_output_size(struct record *rec) 97 { 98 return rec->switch_output.size && 99 trigger_is_ready(&switch_output_trigger) && 100 (rec->bytes_written >= rec->switch_output.size); 101 } 102 103 static bool switch_output_time(struct record *rec) 104 { 105 return rec->switch_output.time && 106 trigger_is_ready(&switch_output_trigger); 107 } 108 109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 110 void *bf, size_t size) 111 { 112 struct perf_data_file *file = &rec->session->data->file; 113 114 if (perf_data_file__write(file, bf, size) < 0) { 115 pr_err("failed to write perf data, error: %m\n"); 116 return -1; 117 } 118 119 rec->bytes_written += size; 120 121 if (switch_output_size(rec)) 122 trigger_hit(&switch_output_trigger); 123 124 return 0; 125 } 126 127 static int process_synthesized_event(struct perf_tool *tool, 128 union perf_event *event, 129 struct perf_sample *sample __maybe_unused, 130 struct machine *machine __maybe_unused) 131 { 132 struct record *rec = container_of(tool, struct record, tool); 133 return record__write(rec, NULL, event, event->header.size); 134 } 135 136 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 137 { 138 struct record *rec = to; 139 140 rec->samples++; 141 return record__write(rec, map, bf, size); 142 } 143 144 static volatile int done; 145 static volatile int signr = -1; 146 static volatile int child_finished; 147 148 static void sig_handler(int sig) 149 { 150 if (sig == SIGCHLD) 151 child_finished = 1; 152 else 153 signr = sig; 154 155 done = 1; 156 } 157 158 static void sigsegv_handler(int sig) 159 { 160 perf_hooks__recover(); 161 sighandler_dump_stack(sig); 162 } 163 164 static void record__sig_exit(void) 165 { 166 if (signr == -1) 167 return; 168 169 signal(signr, SIG_DFL); 170 raise(signr); 171 } 172 173 #ifdef HAVE_AUXTRACE_SUPPORT 174 175 static int record__process_auxtrace(struct perf_tool *tool, 176 struct perf_mmap *map, 177 union perf_event *event, void *data1, 178 size_t len1, void *data2, size_t len2) 179 { 180 struct record *rec = container_of(tool, struct record, tool); 181 struct perf_data *data = &rec->data; 182 size_t padding; 183 u8 pad[8] = {0}; 184 185 if (!perf_data__is_pipe(data)) { 186 off_t file_offset; 187 int fd = perf_data__fd(data); 188 int err; 189 190 file_offset = lseek(fd, 0, SEEK_CUR); 191 if (file_offset == -1) 192 return -1; 193 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 194 event, file_offset); 195 if (err) 196 return err; 197 } 198 199 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 200 padding = (len1 + len2) & 7; 201 if (padding) 202 padding = 8 - padding; 203 204 record__write(rec, map, event, event->header.size); 205 record__write(rec, map, data1, len1); 206 if (len2) 207 record__write(rec, map, data2, len2); 208 record__write(rec, map, &pad, padding); 209 210 return 0; 211 } 212 213 static int record__auxtrace_mmap_read(struct record *rec, 214 struct perf_mmap *map) 215 { 216 int ret; 217 218 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 219 record__process_auxtrace); 220 if (ret < 0) 221 return ret; 222 223 if (ret) 224 rec->samples++; 225 226 return 0; 227 } 228 229 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 230 struct perf_mmap *map) 231 { 232 int ret; 233 234 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 235 record__process_auxtrace, 236 rec->opts.auxtrace_snapshot_size); 237 if (ret < 0) 238 return ret; 239 240 if (ret) 241 rec->samples++; 242 243 return 0; 244 } 245 246 static int record__auxtrace_read_snapshot_all(struct record *rec) 247 { 248 int i; 249 int rc = 0; 250 251 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 252 struct perf_mmap *map = &rec->evlist->mmap[i]; 253 254 if (!map->auxtrace_mmap.base) 255 continue; 256 257 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 258 rc = -1; 259 goto out; 260 } 261 } 262 out: 263 return rc; 264 } 265 266 static void record__read_auxtrace_snapshot(struct record *rec) 267 { 268 pr_debug("Recording AUX area tracing snapshot\n"); 269 if (record__auxtrace_read_snapshot_all(rec) < 0) { 270 trigger_error(&auxtrace_snapshot_trigger); 271 } else { 272 if (auxtrace_record__snapshot_finish(rec->itr)) 273 trigger_error(&auxtrace_snapshot_trigger); 274 else 275 trigger_ready(&auxtrace_snapshot_trigger); 276 } 277 } 278 279 static int record__auxtrace_init(struct record *rec) 280 { 281 int err; 282 283 if (!rec->itr) { 284 rec->itr = auxtrace_record__init(rec->evlist, &err); 285 if (err) 286 return err; 287 } 288 289 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 290 rec->opts.auxtrace_snapshot_opts); 291 if (err) 292 return err; 293 294 return auxtrace_parse_filters(rec->evlist); 295 } 296 297 #else 298 299 static inline 300 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 301 struct perf_mmap *map __maybe_unused) 302 { 303 return 0; 304 } 305 306 static inline 307 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 308 { 309 } 310 311 static inline 312 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 313 { 314 return 0; 315 } 316 317 static int record__auxtrace_init(struct record *rec __maybe_unused) 318 { 319 return 0; 320 } 321 322 #endif 323 324 static int record__mmap_evlist(struct record *rec, 325 struct perf_evlist *evlist) 326 { 327 struct record_opts *opts = &rec->opts; 328 char msg[512]; 329 330 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 331 opts->auxtrace_mmap_pages, 332 opts->auxtrace_snapshot_mode) < 0) { 333 if (errno == EPERM) { 334 pr_err("Permission error mapping pages.\n" 335 "Consider increasing " 336 "/proc/sys/kernel/perf_event_mlock_kb,\n" 337 "or try again with a smaller value of -m/--mmap_pages.\n" 338 "(current value: %u,%u)\n", 339 opts->mmap_pages, opts->auxtrace_mmap_pages); 340 return -errno; 341 } else { 342 pr_err("failed to mmap with %d (%s)\n", errno, 343 str_error_r(errno, msg, sizeof(msg))); 344 if (errno) 345 return -errno; 346 else 347 return -EINVAL; 348 } 349 } 350 return 0; 351 } 352 353 static int record__mmap(struct record *rec) 354 { 355 return record__mmap_evlist(rec, rec->evlist); 356 } 357 358 static int record__open(struct record *rec) 359 { 360 char msg[BUFSIZ]; 361 struct perf_evsel *pos; 362 struct perf_evlist *evlist = rec->evlist; 363 struct perf_session *session = rec->session; 364 struct record_opts *opts = &rec->opts; 365 struct perf_evsel_config_term *err_term; 366 int rc = 0; 367 368 /* 369 * For initial_delay we need to add a dummy event so that we can track 370 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 371 * real events, the ones asked by the user. 372 */ 373 if (opts->initial_delay) { 374 if (perf_evlist__add_dummy(evlist)) 375 return -ENOMEM; 376 377 pos = perf_evlist__first(evlist); 378 pos->tracking = 0; 379 pos = perf_evlist__last(evlist); 380 pos->tracking = 1; 381 pos->attr.enable_on_exec = 1; 382 } 383 384 perf_evlist__config(evlist, opts, &callchain_param); 385 386 evlist__for_each_entry(evlist, pos) { 387 try_again: 388 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 389 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 390 if (verbose > 0) 391 ui__warning("%s\n", msg); 392 goto try_again; 393 } 394 395 rc = -errno; 396 perf_evsel__open_strerror(pos, &opts->target, 397 errno, msg, sizeof(msg)); 398 ui__error("%s\n", msg); 399 goto out; 400 } 401 402 pos->supported = true; 403 } 404 405 if (perf_evlist__apply_filters(evlist, &pos)) { 406 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 407 pos->filter, perf_evsel__name(pos), errno, 408 str_error_r(errno, msg, sizeof(msg))); 409 rc = -1; 410 goto out; 411 } 412 413 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 414 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 415 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 416 str_error_r(errno, msg, sizeof(msg))); 417 rc = -1; 418 goto out; 419 } 420 421 rc = record__mmap(rec); 422 if (rc) 423 goto out; 424 425 session->evlist = evlist; 426 perf_session__set_id_hdr_size(session); 427 out: 428 return rc; 429 } 430 431 static int process_sample_event(struct perf_tool *tool, 432 union perf_event *event, 433 struct perf_sample *sample, 434 struct perf_evsel *evsel, 435 struct machine *machine) 436 { 437 struct record *rec = container_of(tool, struct record, tool); 438 439 if (rec->evlist->first_sample_time == 0) 440 rec->evlist->first_sample_time = sample->time; 441 442 rec->evlist->last_sample_time = sample->time; 443 444 if (rec->buildid_all) 445 return 0; 446 447 rec->samples++; 448 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 449 } 450 451 static int process_buildids(struct record *rec) 452 { 453 struct perf_data *data = &rec->data; 454 struct perf_session *session = rec->session; 455 456 if (data->size == 0) 457 return 0; 458 459 /* 460 * During this process, it'll load kernel map and replace the 461 * dso->long_name to a real pathname it found. In this case 462 * we prefer the vmlinux path like 463 * /lib/modules/3.16.4/build/vmlinux 464 * 465 * rather than build-id path (in debug directory). 466 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 467 */ 468 symbol_conf.ignore_vmlinux_buildid = true; 469 470 /* 471 * If --buildid-all is given, it marks all DSO regardless of hits, 472 * so no need to process samples. But if timestamp_boundary is enabled, 473 * it still needs to walk on all samples to get the timestamps of 474 * first/last samples. 475 */ 476 if (rec->buildid_all && !rec->timestamp_boundary) 477 rec->tool.sample = NULL; 478 479 return perf_session__process_events(session); 480 } 481 482 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 483 { 484 int err; 485 struct perf_tool *tool = data; 486 /* 487 *As for guest kernel when processing subcommand record&report, 488 *we arrange module mmap prior to guest kernel mmap and trigger 489 *a preload dso because default guest module symbols are loaded 490 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 491 *method is used to avoid symbol missing when the first addr is 492 *in module instead of in guest kernel. 493 */ 494 err = perf_event__synthesize_modules(tool, process_synthesized_event, 495 machine); 496 if (err < 0) 497 pr_err("Couldn't record guest kernel [%d]'s reference" 498 " relocation symbol.\n", machine->pid); 499 500 /* 501 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 502 * have no _text sometimes. 503 */ 504 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 505 machine); 506 if (err < 0) 507 pr_err("Couldn't record guest kernel [%d]'s reference" 508 " relocation symbol.\n", machine->pid); 509 } 510 511 static struct perf_event_header finished_round_event = { 512 .size = sizeof(struct perf_event_header), 513 .type = PERF_RECORD_FINISHED_ROUND, 514 }; 515 516 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 517 bool overwrite) 518 { 519 u64 bytes_written = rec->bytes_written; 520 int i; 521 int rc = 0; 522 struct perf_mmap *maps; 523 524 if (!evlist) 525 return 0; 526 527 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 528 if (!maps) 529 return 0; 530 531 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 532 return 0; 533 534 for (i = 0; i < evlist->nr_mmaps; i++) { 535 struct perf_mmap *map = &maps[i]; 536 537 if (map->base) { 538 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 539 rc = -1; 540 goto out; 541 } 542 } 543 544 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 545 record__auxtrace_mmap_read(rec, map) != 0) { 546 rc = -1; 547 goto out; 548 } 549 } 550 551 /* 552 * Mark the round finished in case we wrote 553 * at least one event. 554 */ 555 if (bytes_written != rec->bytes_written) 556 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 557 558 if (overwrite) 559 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 560 out: 561 return rc; 562 } 563 564 static int record__mmap_read_all(struct record *rec) 565 { 566 int err; 567 568 err = record__mmap_read_evlist(rec, rec->evlist, false); 569 if (err) 570 return err; 571 572 return record__mmap_read_evlist(rec, rec->evlist, true); 573 } 574 575 static void record__init_features(struct record *rec) 576 { 577 struct perf_session *session = rec->session; 578 int feat; 579 580 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 581 perf_header__set_feat(&session->header, feat); 582 583 if (rec->no_buildid) 584 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 585 586 if (!have_tracepoints(&rec->evlist->entries)) 587 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 588 589 if (!rec->opts.branch_stack) 590 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 591 592 if (!rec->opts.full_auxtrace) 593 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 594 595 perf_header__clear_feat(&session->header, HEADER_STAT); 596 } 597 598 static void 599 record__finish_output(struct record *rec) 600 { 601 struct perf_data *data = &rec->data; 602 int fd = perf_data__fd(data); 603 604 if (data->is_pipe) 605 return; 606 607 rec->session->header.data_size += rec->bytes_written; 608 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 609 610 if (!rec->no_buildid) { 611 process_buildids(rec); 612 613 if (rec->buildid_all) 614 dsos__hit_all(rec->session); 615 } 616 perf_session__write_header(rec->session, rec->evlist, fd, true); 617 618 return; 619 } 620 621 static int record__synthesize_workload(struct record *rec, bool tail) 622 { 623 int err; 624 struct thread_map *thread_map; 625 626 if (rec->opts.tail_synthesize != tail) 627 return 0; 628 629 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 630 if (thread_map == NULL) 631 return -1; 632 633 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 634 process_synthesized_event, 635 &rec->session->machines.host, 636 rec->opts.sample_address, 637 rec->opts.proc_map_timeout); 638 thread_map__put(thread_map); 639 return err; 640 } 641 642 static int record__synthesize(struct record *rec, bool tail); 643 644 static int 645 record__switch_output(struct record *rec, bool at_exit) 646 { 647 struct perf_data *data = &rec->data; 648 int fd, err; 649 650 /* Same Size: "2015122520103046"*/ 651 char timestamp[] = "InvalidTimestamp"; 652 653 record__synthesize(rec, true); 654 if (target__none(&rec->opts.target)) 655 record__synthesize_workload(rec, true); 656 657 rec->samples = 0; 658 record__finish_output(rec); 659 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 660 if (err) { 661 pr_err("Failed to get current timestamp\n"); 662 return -EINVAL; 663 } 664 665 fd = perf_data__switch(data, timestamp, 666 rec->session->header.data_offset, 667 at_exit); 668 if (fd >= 0 && !at_exit) { 669 rec->bytes_written = 0; 670 rec->session->header.data_size = 0; 671 } 672 673 if (!quiet) 674 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 675 data->file.path, timestamp); 676 677 /* Output tracking events */ 678 if (!at_exit) { 679 record__synthesize(rec, false); 680 681 /* 682 * In 'perf record --switch-output' without -a, 683 * record__synthesize() in record__switch_output() won't 684 * generate tracking events because there's no thread_map 685 * in evlist. Which causes newly created perf.data doesn't 686 * contain map and comm information. 687 * Create a fake thread_map and directly call 688 * perf_event__synthesize_thread_map() for those events. 689 */ 690 if (target__none(&rec->opts.target)) 691 record__synthesize_workload(rec, false); 692 } 693 return fd; 694 } 695 696 static volatile int workload_exec_errno; 697 698 /* 699 * perf_evlist__prepare_workload will send a SIGUSR1 700 * if the fork fails, since we asked by setting its 701 * want_signal to true. 702 */ 703 static void workload_exec_failed_signal(int signo __maybe_unused, 704 siginfo_t *info, 705 void *ucontext __maybe_unused) 706 { 707 workload_exec_errno = info->si_value.sival_int; 708 done = 1; 709 child_finished = 1; 710 } 711 712 static void snapshot_sig_handler(int sig); 713 static void alarm_sig_handler(int sig); 714 715 int __weak 716 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 717 struct perf_tool *tool __maybe_unused, 718 perf_event__handler_t process __maybe_unused, 719 struct machine *machine __maybe_unused) 720 { 721 return 0; 722 } 723 724 static const struct perf_event_mmap_page * 725 perf_evlist__pick_pc(struct perf_evlist *evlist) 726 { 727 if (evlist) { 728 if (evlist->mmap && evlist->mmap[0].base) 729 return evlist->mmap[0].base; 730 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 731 return evlist->overwrite_mmap[0].base; 732 } 733 return NULL; 734 } 735 736 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 737 { 738 const struct perf_event_mmap_page *pc; 739 740 pc = perf_evlist__pick_pc(rec->evlist); 741 if (pc) 742 return pc; 743 return NULL; 744 } 745 746 static int record__synthesize(struct record *rec, bool tail) 747 { 748 struct perf_session *session = rec->session; 749 struct machine *machine = &session->machines.host; 750 struct perf_data *data = &rec->data; 751 struct record_opts *opts = &rec->opts; 752 struct perf_tool *tool = &rec->tool; 753 int fd = perf_data__fd(data); 754 int err = 0; 755 756 if (rec->opts.tail_synthesize != tail) 757 return 0; 758 759 if (data->is_pipe) { 760 /* 761 * We need to synthesize events first, because some 762 * features works on top of them (on report side). 763 */ 764 err = perf_event__synthesize_attrs(tool, rec->evlist, 765 process_synthesized_event); 766 if (err < 0) { 767 pr_err("Couldn't synthesize attrs.\n"); 768 goto out; 769 } 770 771 err = perf_event__synthesize_features(tool, session, rec->evlist, 772 process_synthesized_event); 773 if (err < 0) { 774 pr_err("Couldn't synthesize features.\n"); 775 return err; 776 } 777 778 if (have_tracepoints(&rec->evlist->entries)) { 779 /* 780 * FIXME err <= 0 here actually means that 781 * there were no tracepoints so its not really 782 * an error, just that we don't need to 783 * synthesize anything. We really have to 784 * return this more properly and also 785 * propagate errors that now are calling die() 786 */ 787 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 788 process_synthesized_event); 789 if (err <= 0) { 790 pr_err("Couldn't record tracing data.\n"); 791 goto out; 792 } 793 rec->bytes_written += err; 794 } 795 } 796 797 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 798 process_synthesized_event, machine); 799 if (err) 800 goto out; 801 802 if (rec->opts.full_auxtrace) { 803 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 804 session, process_synthesized_event); 805 if (err) 806 goto out; 807 } 808 809 if (!perf_evlist__exclude_kernel(rec->evlist)) { 810 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 811 machine); 812 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 813 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 814 "Check /proc/kallsyms permission or run as root.\n"); 815 816 err = perf_event__synthesize_modules(tool, process_synthesized_event, 817 machine); 818 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 819 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 820 "Check /proc/modules permission or run as root.\n"); 821 } 822 823 if (perf_guest) { 824 machines__process_guests(&session->machines, 825 perf_event__synthesize_guest_os, tool); 826 } 827 828 err = perf_event__synthesize_extra_attr(&rec->tool, 829 rec->evlist, 830 process_synthesized_event, 831 data->is_pipe); 832 if (err) 833 goto out; 834 835 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 836 process_synthesized_event, 837 NULL); 838 if (err < 0) { 839 pr_err("Couldn't synthesize thread map.\n"); 840 return err; 841 } 842 843 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 844 process_synthesized_event, NULL); 845 if (err < 0) { 846 pr_err("Couldn't synthesize cpu map.\n"); 847 return err; 848 } 849 850 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 851 process_synthesized_event, opts->sample_address, 852 opts->proc_map_timeout, 1); 853 out: 854 return err; 855 } 856 857 static int __cmd_record(struct record *rec, int argc, const char **argv) 858 { 859 int err; 860 int status = 0; 861 unsigned long waking = 0; 862 const bool forks = argc > 0; 863 struct perf_tool *tool = &rec->tool; 864 struct record_opts *opts = &rec->opts; 865 struct perf_data *data = &rec->data; 866 struct perf_session *session; 867 bool disabled = false, draining = false; 868 int fd; 869 870 atexit(record__sig_exit); 871 signal(SIGCHLD, sig_handler); 872 signal(SIGINT, sig_handler); 873 signal(SIGTERM, sig_handler); 874 signal(SIGSEGV, sigsegv_handler); 875 876 if (rec->opts.record_namespaces) 877 tool->namespace_events = true; 878 879 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 880 signal(SIGUSR2, snapshot_sig_handler); 881 if (rec->opts.auxtrace_snapshot_mode) 882 trigger_on(&auxtrace_snapshot_trigger); 883 if (rec->switch_output.enabled) 884 trigger_on(&switch_output_trigger); 885 } else { 886 signal(SIGUSR2, SIG_IGN); 887 } 888 889 session = perf_session__new(data, false, tool); 890 if (session == NULL) { 891 pr_err("Perf session creation failed.\n"); 892 return -1; 893 } 894 895 fd = perf_data__fd(data); 896 rec->session = session; 897 898 record__init_features(rec); 899 900 if (forks) { 901 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 902 argv, data->is_pipe, 903 workload_exec_failed_signal); 904 if (err < 0) { 905 pr_err("Couldn't run the workload!\n"); 906 status = err; 907 goto out_delete_session; 908 } 909 } 910 911 /* 912 * If we have just single event and are sending data 913 * through pipe, we need to force the ids allocation, 914 * because we synthesize event name through the pipe 915 * and need the id for that. 916 */ 917 if (data->is_pipe && rec->evlist->nr_entries == 1) 918 rec->opts.sample_id = true; 919 920 if (record__open(rec) != 0) { 921 err = -1; 922 goto out_child; 923 } 924 925 err = bpf__apply_obj_config(); 926 if (err) { 927 char errbuf[BUFSIZ]; 928 929 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 930 pr_err("ERROR: Apply config to BPF failed: %s\n", 931 errbuf); 932 goto out_child; 933 } 934 935 /* 936 * Normally perf_session__new would do this, but it doesn't have the 937 * evlist. 938 */ 939 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 940 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 941 rec->tool.ordered_events = false; 942 } 943 944 if (!rec->evlist->nr_groups) 945 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 946 947 if (data->is_pipe) { 948 err = perf_header__write_pipe(fd); 949 if (err < 0) 950 goto out_child; 951 } else { 952 err = perf_session__write_header(session, rec->evlist, fd, false); 953 if (err < 0) 954 goto out_child; 955 } 956 957 if (!rec->no_buildid 958 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 959 pr_err("Couldn't generate buildids. " 960 "Use --no-buildid to profile anyway.\n"); 961 err = -1; 962 goto out_child; 963 } 964 965 err = record__synthesize(rec, false); 966 if (err < 0) 967 goto out_child; 968 969 if (rec->realtime_prio) { 970 struct sched_param param; 971 972 param.sched_priority = rec->realtime_prio; 973 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 974 pr_err("Could not set realtime priority.\n"); 975 err = -1; 976 goto out_child; 977 } 978 } 979 980 /* 981 * When perf is starting the traced process, all the events 982 * (apart from group members) have enable_on_exec=1 set, 983 * so don't spoil it by prematurely enabling them. 984 */ 985 if (!target__none(&opts->target) && !opts->initial_delay) 986 perf_evlist__enable(rec->evlist); 987 988 /* 989 * Let the child rip 990 */ 991 if (forks) { 992 struct machine *machine = &session->machines.host; 993 union perf_event *event; 994 pid_t tgid; 995 996 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 997 if (event == NULL) { 998 err = -ENOMEM; 999 goto out_child; 1000 } 1001 1002 /* 1003 * Some H/W events are generated before COMM event 1004 * which is emitted during exec(), so perf script 1005 * cannot see a correct process name for those events. 1006 * Synthesize COMM event to prevent it. 1007 */ 1008 tgid = perf_event__synthesize_comm(tool, event, 1009 rec->evlist->workload.pid, 1010 process_synthesized_event, 1011 machine); 1012 free(event); 1013 1014 if (tgid == -1) 1015 goto out_child; 1016 1017 event = malloc(sizeof(event->namespaces) + 1018 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1019 machine->id_hdr_size); 1020 if (event == NULL) { 1021 err = -ENOMEM; 1022 goto out_child; 1023 } 1024 1025 /* 1026 * Synthesize NAMESPACES event for the command specified. 1027 */ 1028 perf_event__synthesize_namespaces(tool, event, 1029 rec->evlist->workload.pid, 1030 tgid, process_synthesized_event, 1031 machine); 1032 free(event); 1033 1034 perf_evlist__start_workload(rec->evlist); 1035 } 1036 1037 if (opts->initial_delay) { 1038 usleep(opts->initial_delay * USEC_PER_MSEC); 1039 perf_evlist__enable(rec->evlist); 1040 } 1041 1042 trigger_ready(&auxtrace_snapshot_trigger); 1043 trigger_ready(&switch_output_trigger); 1044 perf_hooks__invoke_record_start(); 1045 for (;;) { 1046 unsigned long long hits = rec->samples; 1047 1048 /* 1049 * rec->evlist->bkw_mmap_state is possible to be 1050 * BKW_MMAP_EMPTY here: when done == true and 1051 * hits != rec->samples in previous round. 1052 * 1053 * perf_evlist__toggle_bkw_mmap ensure we never 1054 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1055 */ 1056 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1057 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1058 1059 if (record__mmap_read_all(rec) < 0) { 1060 trigger_error(&auxtrace_snapshot_trigger); 1061 trigger_error(&switch_output_trigger); 1062 err = -1; 1063 goto out_child; 1064 } 1065 1066 if (auxtrace_record__snapshot_started) { 1067 auxtrace_record__snapshot_started = 0; 1068 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1069 record__read_auxtrace_snapshot(rec); 1070 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1071 pr_err("AUX area tracing snapshot failed\n"); 1072 err = -1; 1073 goto out_child; 1074 } 1075 } 1076 1077 if (trigger_is_hit(&switch_output_trigger)) { 1078 /* 1079 * If switch_output_trigger is hit, the data in 1080 * overwritable ring buffer should have been collected, 1081 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1082 * 1083 * If SIGUSR2 raise after or during record__mmap_read_all(), 1084 * record__mmap_read_all() didn't collect data from 1085 * overwritable ring buffer. Read again. 1086 */ 1087 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1088 continue; 1089 trigger_ready(&switch_output_trigger); 1090 1091 /* 1092 * Reenable events in overwrite ring buffer after 1093 * record__mmap_read_all(): we should have collected 1094 * data from it. 1095 */ 1096 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1097 1098 if (!quiet) 1099 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1100 waking); 1101 waking = 0; 1102 fd = record__switch_output(rec, false); 1103 if (fd < 0) { 1104 pr_err("Failed to switch to new file\n"); 1105 trigger_error(&switch_output_trigger); 1106 err = fd; 1107 goto out_child; 1108 } 1109 1110 /* re-arm the alarm */ 1111 if (rec->switch_output.time) 1112 alarm(rec->switch_output.time); 1113 } 1114 1115 if (hits == rec->samples) { 1116 if (done || draining) 1117 break; 1118 err = perf_evlist__poll(rec->evlist, -1); 1119 /* 1120 * Propagate error, only if there's any. Ignore positive 1121 * number of returned events and interrupt error. 1122 */ 1123 if (err > 0 || (err < 0 && errno == EINTR)) 1124 err = 0; 1125 waking++; 1126 1127 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1128 draining = true; 1129 } 1130 1131 /* 1132 * When perf is starting the traced process, at the end events 1133 * die with the process and we wait for that. Thus no need to 1134 * disable events in this case. 1135 */ 1136 if (done && !disabled && !target__none(&opts->target)) { 1137 trigger_off(&auxtrace_snapshot_trigger); 1138 perf_evlist__disable(rec->evlist); 1139 disabled = true; 1140 } 1141 } 1142 trigger_off(&auxtrace_snapshot_trigger); 1143 trigger_off(&switch_output_trigger); 1144 1145 if (forks && workload_exec_errno) { 1146 char msg[STRERR_BUFSIZE]; 1147 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1148 pr_err("Workload failed: %s\n", emsg); 1149 err = -1; 1150 goto out_child; 1151 } 1152 1153 if (!quiet) 1154 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1155 1156 if (target__none(&rec->opts.target)) 1157 record__synthesize_workload(rec, true); 1158 1159 out_child: 1160 if (forks) { 1161 int exit_status; 1162 1163 if (!child_finished) 1164 kill(rec->evlist->workload.pid, SIGTERM); 1165 1166 wait(&exit_status); 1167 1168 if (err < 0) 1169 status = err; 1170 else if (WIFEXITED(exit_status)) 1171 status = WEXITSTATUS(exit_status); 1172 else if (WIFSIGNALED(exit_status)) 1173 signr = WTERMSIG(exit_status); 1174 } else 1175 status = err; 1176 1177 record__synthesize(rec, true); 1178 /* this will be recalculated during process_buildids() */ 1179 rec->samples = 0; 1180 1181 if (!err) { 1182 if (!rec->timestamp_filename) { 1183 record__finish_output(rec); 1184 } else { 1185 fd = record__switch_output(rec, true); 1186 if (fd < 0) { 1187 status = fd; 1188 goto out_delete_session; 1189 } 1190 } 1191 } 1192 1193 perf_hooks__invoke_record_end(); 1194 1195 if (!err && !quiet) { 1196 char samples[128]; 1197 const char *postfix = rec->timestamp_filename ? 1198 ".<timestamp>" : ""; 1199 1200 if (rec->samples && !rec->opts.full_auxtrace) 1201 scnprintf(samples, sizeof(samples), 1202 " (%" PRIu64 " samples)", rec->samples); 1203 else 1204 samples[0] = '\0'; 1205 1206 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1207 perf_data__size(data) / 1024.0 / 1024.0, 1208 data->file.path, postfix, samples); 1209 } 1210 1211 out_delete_session: 1212 perf_session__delete(session); 1213 return status; 1214 } 1215 1216 static void callchain_debug(struct callchain_param *callchain) 1217 { 1218 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1219 1220 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1221 1222 if (callchain->record_mode == CALLCHAIN_DWARF) 1223 pr_debug("callchain: stack dump size %d\n", 1224 callchain->dump_size); 1225 } 1226 1227 int record_opts__parse_callchain(struct record_opts *record, 1228 struct callchain_param *callchain, 1229 const char *arg, bool unset) 1230 { 1231 int ret; 1232 callchain->enabled = !unset; 1233 1234 /* --no-call-graph */ 1235 if (unset) { 1236 callchain->record_mode = CALLCHAIN_NONE; 1237 pr_debug("callchain: disabled\n"); 1238 return 0; 1239 } 1240 1241 ret = parse_callchain_record_opt(arg, callchain); 1242 if (!ret) { 1243 /* Enable data address sampling for DWARF unwind. */ 1244 if (callchain->record_mode == CALLCHAIN_DWARF) 1245 record->sample_address = true; 1246 callchain_debug(callchain); 1247 } 1248 1249 return ret; 1250 } 1251 1252 int record_parse_callchain_opt(const struct option *opt, 1253 const char *arg, 1254 int unset) 1255 { 1256 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1257 } 1258 1259 int record_callchain_opt(const struct option *opt, 1260 const char *arg __maybe_unused, 1261 int unset __maybe_unused) 1262 { 1263 struct callchain_param *callchain = opt->value; 1264 1265 callchain->enabled = true; 1266 1267 if (callchain->record_mode == CALLCHAIN_NONE) 1268 callchain->record_mode = CALLCHAIN_FP; 1269 1270 callchain_debug(callchain); 1271 return 0; 1272 } 1273 1274 static int perf_record_config(const char *var, const char *value, void *cb) 1275 { 1276 struct record *rec = cb; 1277 1278 if (!strcmp(var, "record.build-id")) { 1279 if (!strcmp(value, "cache")) 1280 rec->no_buildid_cache = false; 1281 else if (!strcmp(value, "no-cache")) 1282 rec->no_buildid_cache = true; 1283 else if (!strcmp(value, "skip")) 1284 rec->no_buildid = true; 1285 else 1286 return -1; 1287 return 0; 1288 } 1289 if (!strcmp(var, "record.call-graph")) { 1290 var = "call-graph.record-mode"; 1291 return perf_default_config(var, value, cb); 1292 } 1293 1294 return 0; 1295 } 1296 1297 struct clockid_map { 1298 const char *name; 1299 int clockid; 1300 }; 1301 1302 #define CLOCKID_MAP(n, c) \ 1303 { .name = n, .clockid = (c), } 1304 1305 #define CLOCKID_END { .name = NULL, } 1306 1307 1308 /* 1309 * Add the missing ones, we need to build on many distros... 1310 */ 1311 #ifndef CLOCK_MONOTONIC_RAW 1312 #define CLOCK_MONOTONIC_RAW 4 1313 #endif 1314 #ifndef CLOCK_BOOTTIME 1315 #define CLOCK_BOOTTIME 7 1316 #endif 1317 #ifndef CLOCK_TAI 1318 #define CLOCK_TAI 11 1319 #endif 1320 1321 static const struct clockid_map clockids[] = { 1322 /* available for all events, NMI safe */ 1323 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1324 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1325 1326 /* available for some events */ 1327 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1328 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1329 CLOCKID_MAP("tai", CLOCK_TAI), 1330 1331 /* available for the lazy */ 1332 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1333 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1334 CLOCKID_MAP("real", CLOCK_REALTIME), 1335 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1336 1337 CLOCKID_END, 1338 }; 1339 1340 static int parse_clockid(const struct option *opt, const char *str, int unset) 1341 { 1342 struct record_opts *opts = (struct record_opts *)opt->value; 1343 const struct clockid_map *cm; 1344 const char *ostr = str; 1345 1346 if (unset) { 1347 opts->use_clockid = 0; 1348 return 0; 1349 } 1350 1351 /* no arg passed */ 1352 if (!str) 1353 return 0; 1354 1355 /* no setting it twice */ 1356 if (opts->use_clockid) 1357 return -1; 1358 1359 opts->use_clockid = true; 1360 1361 /* if its a number, we're done */ 1362 if (sscanf(str, "%d", &opts->clockid) == 1) 1363 return 0; 1364 1365 /* allow a "CLOCK_" prefix to the name */ 1366 if (!strncasecmp(str, "CLOCK_", 6)) 1367 str += 6; 1368 1369 for (cm = clockids; cm->name; cm++) { 1370 if (!strcasecmp(str, cm->name)) { 1371 opts->clockid = cm->clockid; 1372 return 0; 1373 } 1374 } 1375 1376 opts->use_clockid = false; 1377 ui__warning("unknown clockid %s, check man page\n", ostr); 1378 return -1; 1379 } 1380 1381 static int record__parse_mmap_pages(const struct option *opt, 1382 const char *str, 1383 int unset __maybe_unused) 1384 { 1385 struct record_opts *opts = opt->value; 1386 char *s, *p; 1387 unsigned int mmap_pages; 1388 int ret; 1389 1390 if (!str) 1391 return -EINVAL; 1392 1393 s = strdup(str); 1394 if (!s) 1395 return -ENOMEM; 1396 1397 p = strchr(s, ','); 1398 if (p) 1399 *p = '\0'; 1400 1401 if (*s) { 1402 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1403 if (ret) 1404 goto out_free; 1405 opts->mmap_pages = mmap_pages; 1406 } 1407 1408 if (!p) { 1409 ret = 0; 1410 goto out_free; 1411 } 1412 1413 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1414 if (ret) 1415 goto out_free; 1416 1417 opts->auxtrace_mmap_pages = mmap_pages; 1418 1419 out_free: 1420 free(s); 1421 return ret; 1422 } 1423 1424 static void switch_output_size_warn(struct record *rec) 1425 { 1426 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1427 struct switch_output *s = &rec->switch_output; 1428 1429 wakeup_size /= 2; 1430 1431 if (s->size < wakeup_size) { 1432 char buf[100]; 1433 1434 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1435 pr_warning("WARNING: switch-output data size lower than " 1436 "wakeup kernel buffer size (%s) " 1437 "expect bigger perf.data sizes\n", buf); 1438 } 1439 } 1440 1441 static int switch_output_setup(struct record *rec) 1442 { 1443 struct switch_output *s = &rec->switch_output; 1444 static struct parse_tag tags_size[] = { 1445 { .tag = 'B', .mult = 1 }, 1446 { .tag = 'K', .mult = 1 << 10 }, 1447 { .tag = 'M', .mult = 1 << 20 }, 1448 { .tag = 'G', .mult = 1 << 30 }, 1449 { .tag = 0 }, 1450 }; 1451 static struct parse_tag tags_time[] = { 1452 { .tag = 's', .mult = 1 }, 1453 { .tag = 'm', .mult = 60 }, 1454 { .tag = 'h', .mult = 60*60 }, 1455 { .tag = 'd', .mult = 60*60*24 }, 1456 { .tag = 0 }, 1457 }; 1458 unsigned long val; 1459 1460 if (!s->set) 1461 return 0; 1462 1463 if (!strcmp(s->str, "signal")) { 1464 s->signal = true; 1465 pr_debug("switch-output with SIGUSR2 signal\n"); 1466 goto enabled; 1467 } 1468 1469 val = parse_tag_value(s->str, tags_size); 1470 if (val != (unsigned long) -1) { 1471 s->size = val; 1472 pr_debug("switch-output with %s size threshold\n", s->str); 1473 goto enabled; 1474 } 1475 1476 val = parse_tag_value(s->str, tags_time); 1477 if (val != (unsigned long) -1) { 1478 s->time = val; 1479 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1480 s->str, s->time); 1481 goto enabled; 1482 } 1483 1484 return -1; 1485 1486 enabled: 1487 rec->timestamp_filename = true; 1488 s->enabled = true; 1489 1490 if (s->size && !rec->opts.no_buffering) 1491 switch_output_size_warn(rec); 1492 1493 return 0; 1494 } 1495 1496 static const char * const __record_usage[] = { 1497 "perf record [<options>] [<command>]", 1498 "perf record [<options>] -- <command> [<options>]", 1499 NULL 1500 }; 1501 const char * const *record_usage = __record_usage; 1502 1503 /* 1504 * XXX Ideally would be local to cmd_record() and passed to a record__new 1505 * because we need to have access to it in record__exit, that is called 1506 * after cmd_record() exits, but since record_options need to be accessible to 1507 * builtin-script, leave it here. 1508 * 1509 * At least we don't ouch it in all the other functions here directly. 1510 * 1511 * Just say no to tons of global variables, sigh. 1512 */ 1513 static struct record record = { 1514 .opts = { 1515 .sample_time = true, 1516 .mmap_pages = UINT_MAX, 1517 .user_freq = UINT_MAX, 1518 .user_interval = ULLONG_MAX, 1519 .freq = 4000, 1520 .target = { 1521 .uses_mmap = true, 1522 .default_per_cpu = true, 1523 }, 1524 .proc_map_timeout = 500, 1525 }, 1526 .tool = { 1527 .sample = process_sample_event, 1528 .fork = perf_event__process_fork, 1529 .exit = perf_event__process_exit, 1530 .comm = perf_event__process_comm, 1531 .namespaces = perf_event__process_namespaces, 1532 .mmap = perf_event__process_mmap, 1533 .mmap2 = perf_event__process_mmap2, 1534 .ordered_events = true, 1535 }, 1536 }; 1537 1538 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1539 "\n\t\t\t\tDefault: fp"; 1540 1541 static bool dry_run; 1542 1543 /* 1544 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1545 * with it and switch to use the library functions in perf_evlist that came 1546 * from builtin-record.c, i.e. use record_opts, 1547 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1548 * using pipes, etc. 1549 */ 1550 static struct option __record_options[] = { 1551 OPT_CALLBACK('e', "event", &record.evlist, "event", 1552 "event selector. use 'perf list' to list available events", 1553 parse_events_option), 1554 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1555 "event filter", parse_filter), 1556 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1557 NULL, "don't record events from perf itself", 1558 exclude_perf), 1559 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1560 "record events on existing process id"), 1561 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1562 "record events on existing thread id"), 1563 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1564 "collect data with this RT SCHED_FIFO priority"), 1565 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1566 "collect data without buffering"), 1567 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1568 "collect raw sample records from all opened counters"), 1569 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1570 "system-wide collection from all CPUs"), 1571 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1572 "list of cpus to monitor"), 1573 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1574 OPT_STRING('o', "output", &record.data.file.path, "file", 1575 "output file name"), 1576 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1577 &record.opts.no_inherit_set, 1578 "child tasks do not inherit counters"), 1579 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1580 "synthesize non-sample events at the end of output"), 1581 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1582 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 1583 "Fail if the specified frequency can't be used"), 1584 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 1585 "profile at this frequency", 1586 record__parse_freq), 1587 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1588 "number of mmap data pages and AUX area tracing mmap pages", 1589 record__parse_mmap_pages), 1590 OPT_BOOLEAN(0, "group", &record.opts.group, 1591 "put the counters into a counter group"), 1592 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1593 NULL, "enables call-graph recording" , 1594 &record_callchain_opt), 1595 OPT_CALLBACK(0, "call-graph", &record.opts, 1596 "record_mode[,record_size]", record_callchain_help, 1597 &record_parse_callchain_opt), 1598 OPT_INCR('v', "verbose", &verbose, 1599 "be more verbose (show counter open errors, etc)"), 1600 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1601 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1602 "per thread counts"), 1603 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1604 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1605 "Record the sample physical addresses"), 1606 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1607 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1608 &record.opts.sample_time_set, 1609 "Record the sample timestamps"), 1610 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 1611 "Record the sample period"), 1612 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1613 "don't sample"), 1614 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1615 &record.no_buildid_cache_set, 1616 "do not update the buildid cache"), 1617 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1618 &record.no_buildid_set, 1619 "do not collect buildids in perf.data"), 1620 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1621 "monitor event in cgroup name only", 1622 parse_cgroups), 1623 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1624 "ms to wait before starting measurement after program start"), 1625 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1626 "user to profile"), 1627 1628 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1629 "branch any", "sample any taken branches", 1630 parse_branch_stack), 1631 1632 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1633 "branch filter mask", "branch stack filter modes", 1634 parse_branch_stack), 1635 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1636 "sample by weight (on special events only)"), 1637 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1638 "sample transaction flags (special events only)"), 1639 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1640 "use per-thread mmaps"), 1641 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1642 "sample selected machine registers on interrupt," 1643 " use -I ? to list register names", parse_regs), 1644 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 1645 "sample selected machine registers on interrupt," 1646 " use -I ? to list register names", parse_regs), 1647 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1648 "Record running/enabled time of read (:S) events"), 1649 OPT_CALLBACK('k', "clockid", &record.opts, 1650 "clockid", "clockid to use for events, see clock_gettime()", 1651 parse_clockid), 1652 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1653 "opts", "AUX area tracing Snapshot Mode", ""), 1654 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1655 "per thread proc mmap processing timeout in ms"), 1656 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1657 "Record namespaces events"), 1658 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1659 "Record context switch events"), 1660 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1661 "Configure all used events to run in kernel space.", 1662 PARSE_OPT_EXCLUSIVE), 1663 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1664 "Configure all used events to run in user space.", 1665 PARSE_OPT_EXCLUSIVE), 1666 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1667 "clang binary to use for compiling BPF scriptlets"), 1668 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1669 "options passed to clang when compiling BPF scriptlets"), 1670 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1671 "file", "vmlinux pathname"), 1672 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1673 "Record build-id of all DSOs regardless of hits"), 1674 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1675 "append timestamp to output filename"), 1676 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 1677 "Record timestamp boundary (time of first/last samples)"), 1678 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1679 &record.switch_output.set, "signal,size,time", 1680 "Switch output when receive SIGUSR2 or cross size,time threshold", 1681 "signal"), 1682 OPT_BOOLEAN(0, "dry-run", &dry_run, 1683 "Parse options then exit"), 1684 OPT_END() 1685 }; 1686 1687 struct option *record_options = __record_options; 1688 1689 int cmd_record(int argc, const char **argv) 1690 { 1691 int err; 1692 struct record *rec = &record; 1693 char errbuf[BUFSIZ]; 1694 1695 setlocale(LC_ALL, ""); 1696 1697 #ifndef HAVE_LIBBPF_SUPPORT 1698 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1699 set_nobuild('\0', "clang-path", true); 1700 set_nobuild('\0', "clang-opt", true); 1701 # undef set_nobuild 1702 #endif 1703 1704 #ifndef HAVE_BPF_PROLOGUE 1705 # if !defined (HAVE_DWARF_SUPPORT) 1706 # define REASON "NO_DWARF=1" 1707 # elif !defined (HAVE_LIBBPF_SUPPORT) 1708 # define REASON "NO_LIBBPF=1" 1709 # else 1710 # define REASON "this architecture doesn't support BPF prologue" 1711 # endif 1712 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1713 set_nobuild('\0', "vmlinux", true); 1714 # undef set_nobuild 1715 # undef REASON 1716 #endif 1717 1718 rec->evlist = perf_evlist__new(); 1719 if (rec->evlist == NULL) 1720 return -ENOMEM; 1721 1722 err = perf_config(perf_record_config, rec); 1723 if (err) 1724 return err; 1725 1726 argc = parse_options(argc, argv, record_options, record_usage, 1727 PARSE_OPT_STOP_AT_NON_OPTION); 1728 if (quiet) 1729 perf_quiet_option(); 1730 1731 /* Make system wide (-a) the default target. */ 1732 if (!argc && target__none(&rec->opts.target)) 1733 rec->opts.target.system_wide = true; 1734 1735 if (nr_cgroups && !rec->opts.target.system_wide) { 1736 usage_with_options_msg(record_usage, record_options, 1737 "cgroup monitoring only available in system-wide mode"); 1738 1739 } 1740 if (rec->opts.record_switch_events && 1741 !perf_can_record_switch_events()) { 1742 ui__error("kernel does not support recording context switch events\n"); 1743 parse_options_usage(record_usage, record_options, "switch-events", 0); 1744 return -EINVAL; 1745 } 1746 1747 if (switch_output_setup(rec)) { 1748 parse_options_usage(record_usage, record_options, "switch-output", 0); 1749 return -EINVAL; 1750 } 1751 1752 if (rec->switch_output.time) { 1753 signal(SIGALRM, alarm_sig_handler); 1754 alarm(rec->switch_output.time); 1755 } 1756 1757 /* 1758 * Allow aliases to facilitate the lookup of symbols for address 1759 * filters. Refer to auxtrace_parse_filters(). 1760 */ 1761 symbol_conf.allow_aliases = true; 1762 1763 symbol__init(NULL); 1764 1765 err = record__auxtrace_init(rec); 1766 if (err) 1767 goto out; 1768 1769 if (dry_run) 1770 goto out; 1771 1772 err = bpf__setup_stdout(rec->evlist); 1773 if (err) { 1774 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1775 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1776 errbuf); 1777 goto out; 1778 } 1779 1780 err = -ENOMEM; 1781 1782 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 1783 pr_warning( 1784 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1785 "check /proc/sys/kernel/kptr_restrict.\n\n" 1786 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1787 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1788 "Samples in kernel modules won't be resolved at all.\n\n" 1789 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1790 "even with a suitable vmlinux or kallsyms file.\n\n"); 1791 1792 if (rec->no_buildid_cache || rec->no_buildid) { 1793 disable_buildid_cache(); 1794 } else if (rec->switch_output.enabled) { 1795 /* 1796 * In 'perf record --switch-output', disable buildid 1797 * generation by default to reduce data file switching 1798 * overhead. Still generate buildid if they are required 1799 * explicitly using 1800 * 1801 * perf record --switch-output --no-no-buildid \ 1802 * --no-no-buildid-cache 1803 * 1804 * Following code equals to: 1805 * 1806 * if ((rec->no_buildid || !rec->no_buildid_set) && 1807 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1808 * disable_buildid_cache(); 1809 */ 1810 bool disable = true; 1811 1812 if (rec->no_buildid_set && !rec->no_buildid) 1813 disable = false; 1814 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1815 disable = false; 1816 if (disable) { 1817 rec->no_buildid = true; 1818 rec->no_buildid_cache = true; 1819 disable_buildid_cache(); 1820 } 1821 } 1822 1823 if (record.opts.overwrite) 1824 record.opts.tail_synthesize = true; 1825 1826 if (rec->evlist->nr_entries == 0 && 1827 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1828 pr_err("Not enough memory for event selector list\n"); 1829 goto out; 1830 } 1831 1832 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1833 rec->opts.no_inherit = true; 1834 1835 err = target__validate(&rec->opts.target); 1836 if (err) { 1837 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1838 ui__warning("%s\n", errbuf); 1839 } 1840 1841 err = target__parse_uid(&rec->opts.target); 1842 if (err) { 1843 int saved_errno = errno; 1844 1845 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1846 ui__error("%s", errbuf); 1847 1848 err = -saved_errno; 1849 goto out; 1850 } 1851 1852 /* Enable ignoring missing threads when -u/-p option is defined. */ 1853 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 1854 1855 err = -ENOMEM; 1856 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1857 usage_with_options(record_usage, record_options); 1858 1859 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1860 if (err) 1861 goto out; 1862 1863 /* 1864 * We take all buildids when the file contains 1865 * AUX area tracing data because we do not decode the 1866 * trace because it would take too long. 1867 */ 1868 if (rec->opts.full_auxtrace) 1869 rec->buildid_all = true; 1870 1871 if (record_opts__config(&rec->opts)) { 1872 err = -EINVAL; 1873 goto out; 1874 } 1875 1876 err = __cmd_record(&record, argc, argv); 1877 out: 1878 perf_evlist__delete(rec->evlist); 1879 symbol__exit(); 1880 auxtrace_record__free(rec->itr); 1881 return err; 1882 } 1883 1884 static void snapshot_sig_handler(int sig __maybe_unused) 1885 { 1886 struct record *rec = &record; 1887 1888 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1889 trigger_hit(&auxtrace_snapshot_trigger); 1890 auxtrace_record__snapshot_started = 1; 1891 if (auxtrace_record__snapshot_start(record.itr)) 1892 trigger_error(&auxtrace_snapshot_trigger); 1893 } 1894 1895 if (switch_output_signal(rec)) 1896 trigger_hit(&switch_output_trigger); 1897 } 1898 1899 static void alarm_sig_handler(int sig __maybe_unused) 1900 { 1901 struct record *rec = &record; 1902 1903 if (switch_output_time(rec)) 1904 trigger_hit(&switch_output_trigger); 1905 } 1906