1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <locale.h> 49 #include <poll.h> 50 #include <unistd.h> 51 #include <sched.h> 52 #include <signal.h> 53 #include <sys/mman.h> 54 #include <sys/wait.h> 55 #include <linux/time64.h> 56 57 struct switch_output { 58 bool enabled; 59 bool signal; 60 unsigned long size; 61 unsigned long time; 62 const char *str; 63 bool set; 64 }; 65 66 struct record { 67 struct perf_tool tool; 68 struct record_opts opts; 69 u64 bytes_written; 70 struct perf_data data; 71 struct auxtrace_record *itr; 72 struct perf_evlist *evlist; 73 struct perf_session *session; 74 int realtime_prio; 75 bool no_buildid; 76 bool no_buildid_set; 77 bool no_buildid_cache; 78 bool no_buildid_cache_set; 79 bool buildid_all; 80 bool timestamp_filename; 81 bool timestamp_boundary; 82 struct switch_output switch_output; 83 unsigned long long samples; 84 }; 85 86 static volatile int auxtrace_record__snapshot_started; 87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 88 static DEFINE_TRIGGER(switch_output_trigger); 89 90 static bool switch_output_signal(struct record *rec) 91 { 92 return rec->switch_output.signal && 93 trigger_is_ready(&switch_output_trigger); 94 } 95 96 static bool switch_output_size(struct record *rec) 97 { 98 return rec->switch_output.size && 99 trigger_is_ready(&switch_output_trigger) && 100 (rec->bytes_written >= rec->switch_output.size); 101 } 102 103 static bool switch_output_time(struct record *rec) 104 { 105 return rec->switch_output.time && 106 trigger_is_ready(&switch_output_trigger); 107 } 108 109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 110 void *bf, size_t size) 111 { 112 struct perf_data_file *file = &rec->session->data->file; 113 114 if (perf_data_file__write(file, bf, size) < 0) { 115 pr_err("failed to write perf data, error: %m\n"); 116 return -1; 117 } 118 119 rec->bytes_written += size; 120 121 if (switch_output_size(rec)) 122 trigger_hit(&switch_output_trigger); 123 124 return 0; 125 } 126 127 static int process_synthesized_event(struct perf_tool *tool, 128 union perf_event *event, 129 struct perf_sample *sample __maybe_unused, 130 struct machine *machine __maybe_unused) 131 { 132 struct record *rec = container_of(tool, struct record, tool); 133 return record__write(rec, NULL, event, event->header.size); 134 } 135 136 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 137 { 138 struct record *rec = to; 139 140 rec->samples++; 141 return record__write(rec, map, bf, size); 142 } 143 144 static volatile int done; 145 static volatile int signr = -1; 146 static volatile int child_finished; 147 148 static void sig_handler(int sig) 149 { 150 if (sig == SIGCHLD) 151 child_finished = 1; 152 else 153 signr = sig; 154 155 done = 1; 156 } 157 158 static void sigsegv_handler(int sig) 159 { 160 perf_hooks__recover(); 161 sighandler_dump_stack(sig); 162 } 163 164 static void record__sig_exit(void) 165 { 166 if (signr == -1) 167 return; 168 169 signal(signr, SIG_DFL); 170 raise(signr); 171 } 172 173 #ifdef HAVE_AUXTRACE_SUPPORT 174 175 static int record__process_auxtrace(struct perf_tool *tool, 176 struct perf_mmap *map, 177 union perf_event *event, void *data1, 178 size_t len1, void *data2, size_t len2) 179 { 180 struct record *rec = container_of(tool, struct record, tool); 181 struct perf_data *data = &rec->data; 182 size_t padding; 183 u8 pad[8] = {0}; 184 185 if (!perf_data__is_pipe(data)) { 186 off_t file_offset; 187 int fd = perf_data__fd(data); 188 int err; 189 190 file_offset = lseek(fd, 0, SEEK_CUR); 191 if (file_offset == -1) 192 return -1; 193 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 194 event, file_offset); 195 if (err) 196 return err; 197 } 198 199 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 200 padding = (len1 + len2) & 7; 201 if (padding) 202 padding = 8 - padding; 203 204 record__write(rec, map, event, event->header.size); 205 record__write(rec, map, data1, len1); 206 if (len2) 207 record__write(rec, map, data2, len2); 208 record__write(rec, map, &pad, padding); 209 210 return 0; 211 } 212 213 static int record__auxtrace_mmap_read(struct record *rec, 214 struct perf_mmap *map) 215 { 216 int ret; 217 218 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 219 record__process_auxtrace); 220 if (ret < 0) 221 return ret; 222 223 if (ret) 224 rec->samples++; 225 226 return 0; 227 } 228 229 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 230 struct perf_mmap *map) 231 { 232 int ret; 233 234 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 235 record__process_auxtrace, 236 rec->opts.auxtrace_snapshot_size); 237 if (ret < 0) 238 return ret; 239 240 if (ret) 241 rec->samples++; 242 243 return 0; 244 } 245 246 static int record__auxtrace_read_snapshot_all(struct record *rec) 247 { 248 int i; 249 int rc = 0; 250 251 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 252 struct perf_mmap *map = &rec->evlist->mmap[i]; 253 254 if (!map->auxtrace_mmap.base) 255 continue; 256 257 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 258 rc = -1; 259 goto out; 260 } 261 } 262 out: 263 return rc; 264 } 265 266 static void record__read_auxtrace_snapshot(struct record *rec) 267 { 268 pr_debug("Recording AUX area tracing snapshot\n"); 269 if (record__auxtrace_read_snapshot_all(rec) < 0) { 270 trigger_error(&auxtrace_snapshot_trigger); 271 } else { 272 if (auxtrace_record__snapshot_finish(rec->itr)) 273 trigger_error(&auxtrace_snapshot_trigger); 274 else 275 trigger_ready(&auxtrace_snapshot_trigger); 276 } 277 } 278 279 static int record__auxtrace_init(struct record *rec) 280 { 281 int err; 282 283 if (!rec->itr) { 284 rec->itr = auxtrace_record__init(rec->evlist, &err); 285 if (err) 286 return err; 287 } 288 289 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 290 rec->opts.auxtrace_snapshot_opts); 291 if (err) 292 return err; 293 294 return auxtrace_parse_filters(rec->evlist); 295 } 296 297 #else 298 299 static inline 300 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 301 struct perf_mmap *map __maybe_unused) 302 { 303 return 0; 304 } 305 306 static inline 307 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 308 { 309 } 310 311 static inline 312 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 313 { 314 return 0; 315 } 316 317 static int record__auxtrace_init(struct record *rec __maybe_unused) 318 { 319 return 0; 320 } 321 322 #endif 323 324 static int record__mmap_evlist(struct record *rec, 325 struct perf_evlist *evlist) 326 { 327 struct record_opts *opts = &rec->opts; 328 char msg[512]; 329 330 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 331 opts->auxtrace_mmap_pages, 332 opts->auxtrace_snapshot_mode) < 0) { 333 if (errno == EPERM) { 334 pr_err("Permission error mapping pages.\n" 335 "Consider increasing " 336 "/proc/sys/kernel/perf_event_mlock_kb,\n" 337 "or try again with a smaller value of -m/--mmap_pages.\n" 338 "(current value: %u,%u)\n", 339 opts->mmap_pages, opts->auxtrace_mmap_pages); 340 return -errno; 341 } else { 342 pr_err("failed to mmap with %d (%s)\n", errno, 343 str_error_r(errno, msg, sizeof(msg))); 344 if (errno) 345 return -errno; 346 else 347 return -EINVAL; 348 } 349 } 350 return 0; 351 } 352 353 static int record__mmap(struct record *rec) 354 { 355 return record__mmap_evlist(rec, rec->evlist); 356 } 357 358 static int record__open(struct record *rec) 359 { 360 char msg[BUFSIZ]; 361 struct perf_evsel *pos; 362 struct perf_evlist *evlist = rec->evlist; 363 struct perf_session *session = rec->session; 364 struct record_opts *opts = &rec->opts; 365 struct perf_evsel_config_term *err_term; 366 int rc = 0; 367 368 /* 369 * For initial_delay we need to add a dummy event so that we can track 370 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 371 * real events, the ones asked by the user. 372 */ 373 if (opts->initial_delay) { 374 if (perf_evlist__add_dummy(evlist)) 375 return -ENOMEM; 376 377 pos = perf_evlist__first(evlist); 378 pos->tracking = 0; 379 pos = perf_evlist__last(evlist); 380 pos->tracking = 1; 381 pos->attr.enable_on_exec = 1; 382 } 383 384 perf_evlist__config(evlist, opts, &callchain_param); 385 386 evlist__for_each_entry(evlist, pos) { 387 try_again: 388 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 389 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 390 if (verbose > 0) 391 ui__warning("%s\n", msg); 392 goto try_again; 393 } 394 if ((errno == EINVAL || errno == EBADF) && 395 pos->leader != pos && 396 pos->weak_group) { 397 pos = perf_evlist__reset_weak_group(evlist, pos); 398 goto try_again; 399 } 400 rc = -errno; 401 perf_evsel__open_strerror(pos, &opts->target, 402 errno, msg, sizeof(msg)); 403 ui__error("%s\n", msg); 404 goto out; 405 } 406 407 pos->supported = true; 408 } 409 410 if (perf_evlist__apply_filters(evlist, &pos)) { 411 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 412 pos->filter, perf_evsel__name(pos), errno, 413 str_error_r(errno, msg, sizeof(msg))); 414 rc = -1; 415 goto out; 416 } 417 418 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 419 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 420 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 421 str_error_r(errno, msg, sizeof(msg))); 422 rc = -1; 423 goto out; 424 } 425 426 rc = record__mmap(rec); 427 if (rc) 428 goto out; 429 430 session->evlist = evlist; 431 perf_session__set_id_hdr_size(session); 432 out: 433 return rc; 434 } 435 436 static int process_sample_event(struct perf_tool *tool, 437 union perf_event *event, 438 struct perf_sample *sample, 439 struct perf_evsel *evsel, 440 struct machine *machine) 441 { 442 struct record *rec = container_of(tool, struct record, tool); 443 444 if (rec->evlist->first_sample_time == 0) 445 rec->evlist->first_sample_time = sample->time; 446 447 rec->evlist->last_sample_time = sample->time; 448 449 if (rec->buildid_all) 450 return 0; 451 452 rec->samples++; 453 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 454 } 455 456 static int process_buildids(struct record *rec) 457 { 458 struct perf_data *data = &rec->data; 459 struct perf_session *session = rec->session; 460 461 if (data->size == 0) 462 return 0; 463 464 /* 465 * During this process, it'll load kernel map and replace the 466 * dso->long_name to a real pathname it found. In this case 467 * we prefer the vmlinux path like 468 * /lib/modules/3.16.4/build/vmlinux 469 * 470 * rather than build-id path (in debug directory). 471 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 472 */ 473 symbol_conf.ignore_vmlinux_buildid = true; 474 475 /* 476 * If --buildid-all is given, it marks all DSO regardless of hits, 477 * so no need to process samples. But if timestamp_boundary is enabled, 478 * it still needs to walk on all samples to get the timestamps of 479 * first/last samples. 480 */ 481 if (rec->buildid_all && !rec->timestamp_boundary) 482 rec->tool.sample = NULL; 483 484 return perf_session__process_events(session); 485 } 486 487 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 488 { 489 int err; 490 struct perf_tool *tool = data; 491 /* 492 *As for guest kernel when processing subcommand record&report, 493 *we arrange module mmap prior to guest kernel mmap and trigger 494 *a preload dso because default guest module symbols are loaded 495 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 496 *method is used to avoid symbol missing when the first addr is 497 *in module instead of in guest kernel. 498 */ 499 err = perf_event__synthesize_modules(tool, process_synthesized_event, 500 machine); 501 if (err < 0) 502 pr_err("Couldn't record guest kernel [%d]'s reference" 503 " relocation symbol.\n", machine->pid); 504 505 /* 506 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 507 * have no _text sometimes. 508 */ 509 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 510 machine); 511 if (err < 0) 512 pr_err("Couldn't record guest kernel [%d]'s reference" 513 " relocation symbol.\n", machine->pid); 514 } 515 516 static struct perf_event_header finished_round_event = { 517 .size = sizeof(struct perf_event_header), 518 .type = PERF_RECORD_FINISHED_ROUND, 519 }; 520 521 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 522 bool overwrite) 523 { 524 u64 bytes_written = rec->bytes_written; 525 int i; 526 int rc = 0; 527 struct perf_mmap *maps; 528 529 if (!evlist) 530 return 0; 531 532 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 533 if (!maps) 534 return 0; 535 536 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 537 return 0; 538 539 for (i = 0; i < evlist->nr_mmaps; i++) { 540 struct perf_mmap *map = &maps[i]; 541 542 if (map->base) { 543 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 544 rc = -1; 545 goto out; 546 } 547 } 548 549 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 550 record__auxtrace_mmap_read(rec, map) != 0) { 551 rc = -1; 552 goto out; 553 } 554 } 555 556 /* 557 * Mark the round finished in case we wrote 558 * at least one event. 559 */ 560 if (bytes_written != rec->bytes_written) 561 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 562 563 if (overwrite) 564 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 565 out: 566 return rc; 567 } 568 569 static int record__mmap_read_all(struct record *rec) 570 { 571 int err; 572 573 err = record__mmap_read_evlist(rec, rec->evlist, false); 574 if (err) 575 return err; 576 577 return record__mmap_read_evlist(rec, rec->evlist, true); 578 } 579 580 static void record__init_features(struct record *rec) 581 { 582 struct perf_session *session = rec->session; 583 int feat; 584 585 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 586 perf_header__set_feat(&session->header, feat); 587 588 if (rec->no_buildid) 589 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 590 591 if (!have_tracepoints(&rec->evlist->entries)) 592 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 593 594 if (!rec->opts.branch_stack) 595 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 596 597 if (!rec->opts.full_auxtrace) 598 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 599 600 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 601 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 602 603 perf_header__clear_feat(&session->header, HEADER_STAT); 604 } 605 606 static void 607 record__finish_output(struct record *rec) 608 { 609 struct perf_data *data = &rec->data; 610 int fd = perf_data__fd(data); 611 612 if (data->is_pipe) 613 return; 614 615 rec->session->header.data_size += rec->bytes_written; 616 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 617 618 if (!rec->no_buildid) { 619 process_buildids(rec); 620 621 if (rec->buildid_all) 622 dsos__hit_all(rec->session); 623 } 624 perf_session__write_header(rec->session, rec->evlist, fd, true); 625 626 return; 627 } 628 629 static int record__synthesize_workload(struct record *rec, bool tail) 630 { 631 int err; 632 struct thread_map *thread_map; 633 634 if (rec->opts.tail_synthesize != tail) 635 return 0; 636 637 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 638 if (thread_map == NULL) 639 return -1; 640 641 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 642 process_synthesized_event, 643 &rec->session->machines.host, 644 rec->opts.sample_address, 645 rec->opts.proc_map_timeout); 646 thread_map__put(thread_map); 647 return err; 648 } 649 650 static int record__synthesize(struct record *rec, bool tail); 651 652 static int 653 record__switch_output(struct record *rec, bool at_exit) 654 { 655 struct perf_data *data = &rec->data; 656 int fd, err; 657 658 /* Same Size: "2015122520103046"*/ 659 char timestamp[] = "InvalidTimestamp"; 660 661 record__synthesize(rec, true); 662 if (target__none(&rec->opts.target)) 663 record__synthesize_workload(rec, true); 664 665 rec->samples = 0; 666 record__finish_output(rec); 667 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 668 if (err) { 669 pr_err("Failed to get current timestamp\n"); 670 return -EINVAL; 671 } 672 673 fd = perf_data__switch(data, timestamp, 674 rec->session->header.data_offset, 675 at_exit); 676 if (fd >= 0 && !at_exit) { 677 rec->bytes_written = 0; 678 rec->session->header.data_size = 0; 679 } 680 681 if (!quiet) 682 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 683 data->file.path, timestamp); 684 685 /* Output tracking events */ 686 if (!at_exit) { 687 record__synthesize(rec, false); 688 689 /* 690 * In 'perf record --switch-output' without -a, 691 * record__synthesize() in record__switch_output() won't 692 * generate tracking events because there's no thread_map 693 * in evlist. Which causes newly created perf.data doesn't 694 * contain map and comm information. 695 * Create a fake thread_map and directly call 696 * perf_event__synthesize_thread_map() for those events. 697 */ 698 if (target__none(&rec->opts.target)) 699 record__synthesize_workload(rec, false); 700 } 701 return fd; 702 } 703 704 static volatile int workload_exec_errno; 705 706 /* 707 * perf_evlist__prepare_workload will send a SIGUSR1 708 * if the fork fails, since we asked by setting its 709 * want_signal to true. 710 */ 711 static void workload_exec_failed_signal(int signo __maybe_unused, 712 siginfo_t *info, 713 void *ucontext __maybe_unused) 714 { 715 workload_exec_errno = info->si_value.sival_int; 716 done = 1; 717 child_finished = 1; 718 } 719 720 static void snapshot_sig_handler(int sig); 721 static void alarm_sig_handler(int sig); 722 723 int __weak 724 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 725 struct perf_tool *tool __maybe_unused, 726 perf_event__handler_t process __maybe_unused, 727 struct machine *machine __maybe_unused) 728 { 729 return 0; 730 } 731 732 static const struct perf_event_mmap_page * 733 perf_evlist__pick_pc(struct perf_evlist *evlist) 734 { 735 if (evlist) { 736 if (evlist->mmap && evlist->mmap[0].base) 737 return evlist->mmap[0].base; 738 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 739 return evlist->overwrite_mmap[0].base; 740 } 741 return NULL; 742 } 743 744 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 745 { 746 const struct perf_event_mmap_page *pc; 747 748 pc = perf_evlist__pick_pc(rec->evlist); 749 if (pc) 750 return pc; 751 return NULL; 752 } 753 754 static int record__synthesize(struct record *rec, bool tail) 755 { 756 struct perf_session *session = rec->session; 757 struct machine *machine = &session->machines.host; 758 struct perf_data *data = &rec->data; 759 struct record_opts *opts = &rec->opts; 760 struct perf_tool *tool = &rec->tool; 761 int fd = perf_data__fd(data); 762 int err = 0; 763 764 if (rec->opts.tail_synthesize != tail) 765 return 0; 766 767 if (data->is_pipe) { 768 /* 769 * We need to synthesize events first, because some 770 * features works on top of them (on report side). 771 */ 772 err = perf_event__synthesize_attrs(tool, rec->evlist, 773 process_synthesized_event); 774 if (err < 0) { 775 pr_err("Couldn't synthesize attrs.\n"); 776 goto out; 777 } 778 779 err = perf_event__synthesize_features(tool, session, rec->evlist, 780 process_synthesized_event); 781 if (err < 0) { 782 pr_err("Couldn't synthesize features.\n"); 783 return err; 784 } 785 786 if (have_tracepoints(&rec->evlist->entries)) { 787 /* 788 * FIXME err <= 0 here actually means that 789 * there were no tracepoints so its not really 790 * an error, just that we don't need to 791 * synthesize anything. We really have to 792 * return this more properly and also 793 * propagate errors that now are calling die() 794 */ 795 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 796 process_synthesized_event); 797 if (err <= 0) { 798 pr_err("Couldn't record tracing data.\n"); 799 goto out; 800 } 801 rec->bytes_written += err; 802 } 803 } 804 805 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 806 process_synthesized_event, machine); 807 if (err) 808 goto out; 809 810 if (rec->opts.full_auxtrace) { 811 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 812 session, process_synthesized_event); 813 if (err) 814 goto out; 815 } 816 817 if (!perf_evlist__exclude_kernel(rec->evlist)) { 818 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 819 machine); 820 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 821 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 822 "Check /proc/kallsyms permission or run as root.\n"); 823 824 err = perf_event__synthesize_modules(tool, process_synthesized_event, 825 machine); 826 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 827 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 828 "Check /proc/modules permission or run as root.\n"); 829 } 830 831 if (perf_guest) { 832 machines__process_guests(&session->machines, 833 perf_event__synthesize_guest_os, tool); 834 } 835 836 err = perf_event__synthesize_extra_attr(&rec->tool, 837 rec->evlist, 838 process_synthesized_event, 839 data->is_pipe); 840 if (err) 841 goto out; 842 843 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 844 process_synthesized_event, 845 NULL); 846 if (err < 0) { 847 pr_err("Couldn't synthesize thread map.\n"); 848 return err; 849 } 850 851 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 852 process_synthesized_event, NULL); 853 if (err < 0) { 854 pr_err("Couldn't synthesize cpu map.\n"); 855 return err; 856 } 857 858 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 859 process_synthesized_event, opts->sample_address, 860 opts->proc_map_timeout, 1); 861 out: 862 return err; 863 } 864 865 static int __cmd_record(struct record *rec, int argc, const char **argv) 866 { 867 int err; 868 int status = 0; 869 unsigned long waking = 0; 870 const bool forks = argc > 0; 871 struct perf_tool *tool = &rec->tool; 872 struct record_opts *opts = &rec->opts; 873 struct perf_data *data = &rec->data; 874 struct perf_session *session; 875 bool disabled = false, draining = false; 876 int fd; 877 878 atexit(record__sig_exit); 879 signal(SIGCHLD, sig_handler); 880 signal(SIGINT, sig_handler); 881 signal(SIGTERM, sig_handler); 882 signal(SIGSEGV, sigsegv_handler); 883 884 if (rec->opts.record_namespaces) 885 tool->namespace_events = true; 886 887 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 888 signal(SIGUSR2, snapshot_sig_handler); 889 if (rec->opts.auxtrace_snapshot_mode) 890 trigger_on(&auxtrace_snapshot_trigger); 891 if (rec->switch_output.enabled) 892 trigger_on(&switch_output_trigger); 893 } else { 894 signal(SIGUSR2, SIG_IGN); 895 } 896 897 session = perf_session__new(data, false, tool); 898 if (session == NULL) { 899 pr_err("Perf session creation failed.\n"); 900 return -1; 901 } 902 903 fd = perf_data__fd(data); 904 rec->session = session; 905 906 record__init_features(rec); 907 908 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 909 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 910 911 if (forks) { 912 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 913 argv, data->is_pipe, 914 workload_exec_failed_signal); 915 if (err < 0) { 916 pr_err("Couldn't run the workload!\n"); 917 status = err; 918 goto out_delete_session; 919 } 920 } 921 922 /* 923 * If we have just single event and are sending data 924 * through pipe, we need to force the ids allocation, 925 * because we synthesize event name through the pipe 926 * and need the id for that. 927 */ 928 if (data->is_pipe && rec->evlist->nr_entries == 1) 929 rec->opts.sample_id = true; 930 931 if (record__open(rec) != 0) { 932 err = -1; 933 goto out_child; 934 } 935 936 err = bpf__apply_obj_config(); 937 if (err) { 938 char errbuf[BUFSIZ]; 939 940 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 941 pr_err("ERROR: Apply config to BPF failed: %s\n", 942 errbuf); 943 goto out_child; 944 } 945 946 /* 947 * Normally perf_session__new would do this, but it doesn't have the 948 * evlist. 949 */ 950 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 951 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 952 rec->tool.ordered_events = false; 953 } 954 955 if (!rec->evlist->nr_groups) 956 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 957 958 if (data->is_pipe) { 959 err = perf_header__write_pipe(fd); 960 if (err < 0) 961 goto out_child; 962 } else { 963 err = perf_session__write_header(session, rec->evlist, fd, false); 964 if (err < 0) 965 goto out_child; 966 } 967 968 if (!rec->no_buildid 969 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 970 pr_err("Couldn't generate buildids. " 971 "Use --no-buildid to profile anyway.\n"); 972 err = -1; 973 goto out_child; 974 } 975 976 err = record__synthesize(rec, false); 977 if (err < 0) 978 goto out_child; 979 980 if (rec->realtime_prio) { 981 struct sched_param param; 982 983 param.sched_priority = rec->realtime_prio; 984 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 985 pr_err("Could not set realtime priority.\n"); 986 err = -1; 987 goto out_child; 988 } 989 } 990 991 /* 992 * When perf is starting the traced process, all the events 993 * (apart from group members) have enable_on_exec=1 set, 994 * so don't spoil it by prematurely enabling them. 995 */ 996 if (!target__none(&opts->target) && !opts->initial_delay) 997 perf_evlist__enable(rec->evlist); 998 999 /* 1000 * Let the child rip 1001 */ 1002 if (forks) { 1003 struct machine *machine = &session->machines.host; 1004 union perf_event *event; 1005 pid_t tgid; 1006 1007 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1008 if (event == NULL) { 1009 err = -ENOMEM; 1010 goto out_child; 1011 } 1012 1013 /* 1014 * Some H/W events are generated before COMM event 1015 * which is emitted during exec(), so perf script 1016 * cannot see a correct process name for those events. 1017 * Synthesize COMM event to prevent it. 1018 */ 1019 tgid = perf_event__synthesize_comm(tool, event, 1020 rec->evlist->workload.pid, 1021 process_synthesized_event, 1022 machine); 1023 free(event); 1024 1025 if (tgid == -1) 1026 goto out_child; 1027 1028 event = malloc(sizeof(event->namespaces) + 1029 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1030 machine->id_hdr_size); 1031 if (event == NULL) { 1032 err = -ENOMEM; 1033 goto out_child; 1034 } 1035 1036 /* 1037 * Synthesize NAMESPACES event for the command specified. 1038 */ 1039 perf_event__synthesize_namespaces(tool, event, 1040 rec->evlist->workload.pid, 1041 tgid, process_synthesized_event, 1042 machine); 1043 free(event); 1044 1045 perf_evlist__start_workload(rec->evlist); 1046 } 1047 1048 if (opts->initial_delay) { 1049 usleep(opts->initial_delay * USEC_PER_MSEC); 1050 perf_evlist__enable(rec->evlist); 1051 } 1052 1053 trigger_ready(&auxtrace_snapshot_trigger); 1054 trigger_ready(&switch_output_trigger); 1055 perf_hooks__invoke_record_start(); 1056 for (;;) { 1057 unsigned long long hits = rec->samples; 1058 1059 /* 1060 * rec->evlist->bkw_mmap_state is possible to be 1061 * BKW_MMAP_EMPTY here: when done == true and 1062 * hits != rec->samples in previous round. 1063 * 1064 * perf_evlist__toggle_bkw_mmap ensure we never 1065 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1066 */ 1067 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1068 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1069 1070 if (record__mmap_read_all(rec) < 0) { 1071 trigger_error(&auxtrace_snapshot_trigger); 1072 trigger_error(&switch_output_trigger); 1073 err = -1; 1074 goto out_child; 1075 } 1076 1077 if (auxtrace_record__snapshot_started) { 1078 auxtrace_record__snapshot_started = 0; 1079 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1080 record__read_auxtrace_snapshot(rec); 1081 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1082 pr_err("AUX area tracing snapshot failed\n"); 1083 err = -1; 1084 goto out_child; 1085 } 1086 } 1087 1088 if (trigger_is_hit(&switch_output_trigger)) { 1089 /* 1090 * If switch_output_trigger is hit, the data in 1091 * overwritable ring buffer should have been collected, 1092 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1093 * 1094 * If SIGUSR2 raise after or during record__mmap_read_all(), 1095 * record__mmap_read_all() didn't collect data from 1096 * overwritable ring buffer. Read again. 1097 */ 1098 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1099 continue; 1100 trigger_ready(&switch_output_trigger); 1101 1102 /* 1103 * Reenable events in overwrite ring buffer after 1104 * record__mmap_read_all(): we should have collected 1105 * data from it. 1106 */ 1107 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1108 1109 if (!quiet) 1110 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1111 waking); 1112 waking = 0; 1113 fd = record__switch_output(rec, false); 1114 if (fd < 0) { 1115 pr_err("Failed to switch to new file\n"); 1116 trigger_error(&switch_output_trigger); 1117 err = fd; 1118 goto out_child; 1119 } 1120 1121 /* re-arm the alarm */ 1122 if (rec->switch_output.time) 1123 alarm(rec->switch_output.time); 1124 } 1125 1126 if (hits == rec->samples) { 1127 if (done || draining) 1128 break; 1129 err = perf_evlist__poll(rec->evlist, -1); 1130 /* 1131 * Propagate error, only if there's any. Ignore positive 1132 * number of returned events and interrupt error. 1133 */ 1134 if (err > 0 || (err < 0 && errno == EINTR)) 1135 err = 0; 1136 waking++; 1137 1138 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1139 draining = true; 1140 } 1141 1142 /* 1143 * When perf is starting the traced process, at the end events 1144 * die with the process and we wait for that. Thus no need to 1145 * disable events in this case. 1146 */ 1147 if (done && !disabled && !target__none(&opts->target)) { 1148 trigger_off(&auxtrace_snapshot_trigger); 1149 perf_evlist__disable(rec->evlist); 1150 disabled = true; 1151 } 1152 } 1153 trigger_off(&auxtrace_snapshot_trigger); 1154 trigger_off(&switch_output_trigger); 1155 1156 if (forks && workload_exec_errno) { 1157 char msg[STRERR_BUFSIZE]; 1158 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1159 pr_err("Workload failed: %s\n", emsg); 1160 err = -1; 1161 goto out_child; 1162 } 1163 1164 if (!quiet) 1165 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1166 1167 if (target__none(&rec->opts.target)) 1168 record__synthesize_workload(rec, true); 1169 1170 out_child: 1171 if (forks) { 1172 int exit_status; 1173 1174 if (!child_finished) 1175 kill(rec->evlist->workload.pid, SIGTERM); 1176 1177 wait(&exit_status); 1178 1179 if (err < 0) 1180 status = err; 1181 else if (WIFEXITED(exit_status)) 1182 status = WEXITSTATUS(exit_status); 1183 else if (WIFSIGNALED(exit_status)) 1184 signr = WTERMSIG(exit_status); 1185 } else 1186 status = err; 1187 1188 record__synthesize(rec, true); 1189 /* this will be recalculated during process_buildids() */ 1190 rec->samples = 0; 1191 1192 if (!err) { 1193 if (!rec->timestamp_filename) { 1194 record__finish_output(rec); 1195 } else { 1196 fd = record__switch_output(rec, true); 1197 if (fd < 0) { 1198 status = fd; 1199 goto out_delete_session; 1200 } 1201 } 1202 } 1203 1204 perf_hooks__invoke_record_end(); 1205 1206 if (!err && !quiet) { 1207 char samples[128]; 1208 const char *postfix = rec->timestamp_filename ? 1209 ".<timestamp>" : ""; 1210 1211 if (rec->samples && !rec->opts.full_auxtrace) 1212 scnprintf(samples, sizeof(samples), 1213 " (%" PRIu64 " samples)", rec->samples); 1214 else 1215 samples[0] = '\0'; 1216 1217 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1218 perf_data__size(data) / 1024.0 / 1024.0, 1219 data->file.path, postfix, samples); 1220 } 1221 1222 out_delete_session: 1223 perf_session__delete(session); 1224 return status; 1225 } 1226 1227 static void callchain_debug(struct callchain_param *callchain) 1228 { 1229 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1230 1231 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1232 1233 if (callchain->record_mode == CALLCHAIN_DWARF) 1234 pr_debug("callchain: stack dump size %d\n", 1235 callchain->dump_size); 1236 } 1237 1238 int record_opts__parse_callchain(struct record_opts *record, 1239 struct callchain_param *callchain, 1240 const char *arg, bool unset) 1241 { 1242 int ret; 1243 callchain->enabled = !unset; 1244 1245 /* --no-call-graph */ 1246 if (unset) { 1247 callchain->record_mode = CALLCHAIN_NONE; 1248 pr_debug("callchain: disabled\n"); 1249 return 0; 1250 } 1251 1252 ret = parse_callchain_record_opt(arg, callchain); 1253 if (!ret) { 1254 /* Enable data address sampling for DWARF unwind. */ 1255 if (callchain->record_mode == CALLCHAIN_DWARF) 1256 record->sample_address = true; 1257 callchain_debug(callchain); 1258 } 1259 1260 return ret; 1261 } 1262 1263 int record_parse_callchain_opt(const struct option *opt, 1264 const char *arg, 1265 int unset) 1266 { 1267 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1268 } 1269 1270 int record_callchain_opt(const struct option *opt, 1271 const char *arg __maybe_unused, 1272 int unset __maybe_unused) 1273 { 1274 struct callchain_param *callchain = opt->value; 1275 1276 callchain->enabled = true; 1277 1278 if (callchain->record_mode == CALLCHAIN_NONE) 1279 callchain->record_mode = CALLCHAIN_FP; 1280 1281 callchain_debug(callchain); 1282 return 0; 1283 } 1284 1285 static int perf_record_config(const char *var, const char *value, void *cb) 1286 { 1287 struct record *rec = cb; 1288 1289 if (!strcmp(var, "record.build-id")) { 1290 if (!strcmp(value, "cache")) 1291 rec->no_buildid_cache = false; 1292 else if (!strcmp(value, "no-cache")) 1293 rec->no_buildid_cache = true; 1294 else if (!strcmp(value, "skip")) 1295 rec->no_buildid = true; 1296 else 1297 return -1; 1298 return 0; 1299 } 1300 if (!strcmp(var, "record.call-graph")) { 1301 var = "call-graph.record-mode"; 1302 return perf_default_config(var, value, cb); 1303 } 1304 1305 return 0; 1306 } 1307 1308 struct clockid_map { 1309 const char *name; 1310 int clockid; 1311 }; 1312 1313 #define CLOCKID_MAP(n, c) \ 1314 { .name = n, .clockid = (c), } 1315 1316 #define CLOCKID_END { .name = NULL, } 1317 1318 1319 /* 1320 * Add the missing ones, we need to build on many distros... 1321 */ 1322 #ifndef CLOCK_MONOTONIC_RAW 1323 #define CLOCK_MONOTONIC_RAW 4 1324 #endif 1325 #ifndef CLOCK_BOOTTIME 1326 #define CLOCK_BOOTTIME 7 1327 #endif 1328 #ifndef CLOCK_TAI 1329 #define CLOCK_TAI 11 1330 #endif 1331 1332 static const struct clockid_map clockids[] = { 1333 /* available for all events, NMI safe */ 1334 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1335 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1336 1337 /* available for some events */ 1338 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1339 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1340 CLOCKID_MAP("tai", CLOCK_TAI), 1341 1342 /* available for the lazy */ 1343 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1344 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1345 CLOCKID_MAP("real", CLOCK_REALTIME), 1346 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1347 1348 CLOCKID_END, 1349 }; 1350 1351 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1352 { 1353 struct timespec res; 1354 1355 *res_ns = 0; 1356 if (!clock_getres(clk_id, &res)) 1357 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1358 else 1359 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1360 1361 return 0; 1362 } 1363 1364 static int parse_clockid(const struct option *opt, const char *str, int unset) 1365 { 1366 struct record_opts *opts = (struct record_opts *)opt->value; 1367 const struct clockid_map *cm; 1368 const char *ostr = str; 1369 1370 if (unset) { 1371 opts->use_clockid = 0; 1372 return 0; 1373 } 1374 1375 /* no arg passed */ 1376 if (!str) 1377 return 0; 1378 1379 /* no setting it twice */ 1380 if (opts->use_clockid) 1381 return -1; 1382 1383 opts->use_clockid = true; 1384 1385 /* if its a number, we're done */ 1386 if (sscanf(str, "%d", &opts->clockid) == 1) 1387 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1388 1389 /* allow a "CLOCK_" prefix to the name */ 1390 if (!strncasecmp(str, "CLOCK_", 6)) 1391 str += 6; 1392 1393 for (cm = clockids; cm->name; cm++) { 1394 if (!strcasecmp(str, cm->name)) { 1395 opts->clockid = cm->clockid; 1396 return get_clockid_res(opts->clockid, 1397 &opts->clockid_res_ns); 1398 } 1399 } 1400 1401 opts->use_clockid = false; 1402 ui__warning("unknown clockid %s, check man page\n", ostr); 1403 return -1; 1404 } 1405 1406 static int record__parse_mmap_pages(const struct option *opt, 1407 const char *str, 1408 int unset __maybe_unused) 1409 { 1410 struct record_opts *opts = opt->value; 1411 char *s, *p; 1412 unsigned int mmap_pages; 1413 int ret; 1414 1415 if (!str) 1416 return -EINVAL; 1417 1418 s = strdup(str); 1419 if (!s) 1420 return -ENOMEM; 1421 1422 p = strchr(s, ','); 1423 if (p) 1424 *p = '\0'; 1425 1426 if (*s) { 1427 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1428 if (ret) 1429 goto out_free; 1430 opts->mmap_pages = mmap_pages; 1431 } 1432 1433 if (!p) { 1434 ret = 0; 1435 goto out_free; 1436 } 1437 1438 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1439 if (ret) 1440 goto out_free; 1441 1442 opts->auxtrace_mmap_pages = mmap_pages; 1443 1444 out_free: 1445 free(s); 1446 return ret; 1447 } 1448 1449 static void switch_output_size_warn(struct record *rec) 1450 { 1451 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1452 struct switch_output *s = &rec->switch_output; 1453 1454 wakeup_size /= 2; 1455 1456 if (s->size < wakeup_size) { 1457 char buf[100]; 1458 1459 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1460 pr_warning("WARNING: switch-output data size lower than " 1461 "wakeup kernel buffer size (%s) " 1462 "expect bigger perf.data sizes\n", buf); 1463 } 1464 } 1465 1466 static int switch_output_setup(struct record *rec) 1467 { 1468 struct switch_output *s = &rec->switch_output; 1469 static struct parse_tag tags_size[] = { 1470 { .tag = 'B', .mult = 1 }, 1471 { .tag = 'K', .mult = 1 << 10 }, 1472 { .tag = 'M', .mult = 1 << 20 }, 1473 { .tag = 'G', .mult = 1 << 30 }, 1474 { .tag = 0 }, 1475 }; 1476 static struct parse_tag tags_time[] = { 1477 { .tag = 's', .mult = 1 }, 1478 { .tag = 'm', .mult = 60 }, 1479 { .tag = 'h', .mult = 60*60 }, 1480 { .tag = 'd', .mult = 60*60*24 }, 1481 { .tag = 0 }, 1482 }; 1483 unsigned long val; 1484 1485 if (!s->set) 1486 return 0; 1487 1488 if (!strcmp(s->str, "signal")) { 1489 s->signal = true; 1490 pr_debug("switch-output with SIGUSR2 signal\n"); 1491 goto enabled; 1492 } 1493 1494 val = parse_tag_value(s->str, tags_size); 1495 if (val != (unsigned long) -1) { 1496 s->size = val; 1497 pr_debug("switch-output with %s size threshold\n", s->str); 1498 goto enabled; 1499 } 1500 1501 val = parse_tag_value(s->str, tags_time); 1502 if (val != (unsigned long) -1) { 1503 s->time = val; 1504 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1505 s->str, s->time); 1506 goto enabled; 1507 } 1508 1509 return -1; 1510 1511 enabled: 1512 rec->timestamp_filename = true; 1513 s->enabled = true; 1514 1515 if (s->size && !rec->opts.no_buffering) 1516 switch_output_size_warn(rec); 1517 1518 return 0; 1519 } 1520 1521 static const char * const __record_usage[] = { 1522 "perf record [<options>] [<command>]", 1523 "perf record [<options>] -- <command> [<options>]", 1524 NULL 1525 }; 1526 const char * const *record_usage = __record_usage; 1527 1528 /* 1529 * XXX Ideally would be local to cmd_record() and passed to a record__new 1530 * because we need to have access to it in record__exit, that is called 1531 * after cmd_record() exits, but since record_options need to be accessible to 1532 * builtin-script, leave it here. 1533 * 1534 * At least we don't ouch it in all the other functions here directly. 1535 * 1536 * Just say no to tons of global variables, sigh. 1537 */ 1538 static struct record record = { 1539 .opts = { 1540 .sample_time = true, 1541 .mmap_pages = UINT_MAX, 1542 .user_freq = UINT_MAX, 1543 .user_interval = ULLONG_MAX, 1544 .freq = 4000, 1545 .target = { 1546 .uses_mmap = true, 1547 .default_per_cpu = true, 1548 }, 1549 .proc_map_timeout = 500, 1550 }, 1551 .tool = { 1552 .sample = process_sample_event, 1553 .fork = perf_event__process_fork, 1554 .exit = perf_event__process_exit, 1555 .comm = perf_event__process_comm, 1556 .namespaces = perf_event__process_namespaces, 1557 .mmap = perf_event__process_mmap, 1558 .mmap2 = perf_event__process_mmap2, 1559 .ordered_events = true, 1560 }, 1561 }; 1562 1563 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1564 "\n\t\t\t\tDefault: fp"; 1565 1566 static bool dry_run; 1567 1568 /* 1569 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1570 * with it and switch to use the library functions in perf_evlist that came 1571 * from builtin-record.c, i.e. use record_opts, 1572 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1573 * using pipes, etc. 1574 */ 1575 static struct option __record_options[] = { 1576 OPT_CALLBACK('e', "event", &record.evlist, "event", 1577 "event selector. use 'perf list' to list available events", 1578 parse_events_option), 1579 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1580 "event filter", parse_filter), 1581 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1582 NULL, "don't record events from perf itself", 1583 exclude_perf), 1584 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1585 "record events on existing process id"), 1586 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1587 "record events on existing thread id"), 1588 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1589 "collect data with this RT SCHED_FIFO priority"), 1590 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1591 "collect data without buffering"), 1592 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1593 "collect raw sample records from all opened counters"), 1594 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1595 "system-wide collection from all CPUs"), 1596 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1597 "list of cpus to monitor"), 1598 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1599 OPT_STRING('o', "output", &record.data.file.path, "file", 1600 "output file name"), 1601 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1602 &record.opts.no_inherit_set, 1603 "child tasks do not inherit counters"), 1604 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1605 "synthesize non-sample events at the end of output"), 1606 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1607 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 1608 "Fail if the specified frequency can't be used"), 1609 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 1610 "profile at this frequency", 1611 record__parse_freq), 1612 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1613 "number of mmap data pages and AUX area tracing mmap pages", 1614 record__parse_mmap_pages), 1615 OPT_BOOLEAN(0, "group", &record.opts.group, 1616 "put the counters into a counter group"), 1617 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1618 NULL, "enables call-graph recording" , 1619 &record_callchain_opt), 1620 OPT_CALLBACK(0, "call-graph", &record.opts, 1621 "record_mode[,record_size]", record_callchain_help, 1622 &record_parse_callchain_opt), 1623 OPT_INCR('v', "verbose", &verbose, 1624 "be more verbose (show counter open errors, etc)"), 1625 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1626 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1627 "per thread counts"), 1628 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1629 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1630 "Record the sample physical addresses"), 1631 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1632 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1633 &record.opts.sample_time_set, 1634 "Record the sample timestamps"), 1635 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 1636 "Record the sample period"), 1637 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1638 "don't sample"), 1639 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1640 &record.no_buildid_cache_set, 1641 "do not update the buildid cache"), 1642 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1643 &record.no_buildid_set, 1644 "do not collect buildids in perf.data"), 1645 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1646 "monitor event in cgroup name only", 1647 parse_cgroups), 1648 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1649 "ms to wait before starting measurement after program start"), 1650 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1651 "user to profile"), 1652 1653 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1654 "branch any", "sample any taken branches", 1655 parse_branch_stack), 1656 1657 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1658 "branch filter mask", "branch stack filter modes", 1659 parse_branch_stack), 1660 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1661 "sample by weight (on special events only)"), 1662 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1663 "sample transaction flags (special events only)"), 1664 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1665 "use per-thread mmaps"), 1666 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1667 "sample selected machine registers on interrupt," 1668 " use -I ? to list register names", parse_regs), 1669 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 1670 "sample selected machine registers on interrupt," 1671 " use -I ? to list register names", parse_regs), 1672 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1673 "Record running/enabled time of read (:S) events"), 1674 OPT_CALLBACK('k', "clockid", &record.opts, 1675 "clockid", "clockid to use for events, see clock_gettime()", 1676 parse_clockid), 1677 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1678 "opts", "AUX area tracing Snapshot Mode", ""), 1679 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1680 "per thread proc mmap processing timeout in ms"), 1681 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1682 "Record namespaces events"), 1683 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1684 "Record context switch events"), 1685 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1686 "Configure all used events to run in kernel space.", 1687 PARSE_OPT_EXCLUSIVE), 1688 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1689 "Configure all used events to run in user space.", 1690 PARSE_OPT_EXCLUSIVE), 1691 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1692 "clang binary to use for compiling BPF scriptlets"), 1693 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1694 "options passed to clang when compiling BPF scriptlets"), 1695 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1696 "file", "vmlinux pathname"), 1697 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1698 "Record build-id of all DSOs regardless of hits"), 1699 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1700 "append timestamp to output filename"), 1701 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 1702 "Record timestamp boundary (time of first/last samples)"), 1703 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1704 &record.switch_output.set, "signal,size,time", 1705 "Switch output when receive SIGUSR2 or cross size,time threshold", 1706 "signal"), 1707 OPT_BOOLEAN(0, "dry-run", &dry_run, 1708 "Parse options then exit"), 1709 OPT_END() 1710 }; 1711 1712 struct option *record_options = __record_options; 1713 1714 int cmd_record(int argc, const char **argv) 1715 { 1716 int err; 1717 struct record *rec = &record; 1718 char errbuf[BUFSIZ]; 1719 1720 setlocale(LC_ALL, ""); 1721 1722 #ifndef HAVE_LIBBPF_SUPPORT 1723 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1724 set_nobuild('\0', "clang-path", true); 1725 set_nobuild('\0', "clang-opt", true); 1726 # undef set_nobuild 1727 #endif 1728 1729 #ifndef HAVE_BPF_PROLOGUE 1730 # if !defined (HAVE_DWARF_SUPPORT) 1731 # define REASON "NO_DWARF=1" 1732 # elif !defined (HAVE_LIBBPF_SUPPORT) 1733 # define REASON "NO_LIBBPF=1" 1734 # else 1735 # define REASON "this architecture doesn't support BPF prologue" 1736 # endif 1737 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1738 set_nobuild('\0', "vmlinux", true); 1739 # undef set_nobuild 1740 # undef REASON 1741 #endif 1742 1743 rec->evlist = perf_evlist__new(); 1744 if (rec->evlist == NULL) 1745 return -ENOMEM; 1746 1747 err = perf_config(perf_record_config, rec); 1748 if (err) 1749 return err; 1750 1751 argc = parse_options(argc, argv, record_options, record_usage, 1752 PARSE_OPT_STOP_AT_NON_OPTION); 1753 if (quiet) 1754 perf_quiet_option(); 1755 1756 /* Make system wide (-a) the default target. */ 1757 if (!argc && target__none(&rec->opts.target)) 1758 rec->opts.target.system_wide = true; 1759 1760 if (nr_cgroups && !rec->opts.target.system_wide) { 1761 usage_with_options_msg(record_usage, record_options, 1762 "cgroup monitoring only available in system-wide mode"); 1763 1764 } 1765 if (rec->opts.record_switch_events && 1766 !perf_can_record_switch_events()) { 1767 ui__error("kernel does not support recording context switch events\n"); 1768 parse_options_usage(record_usage, record_options, "switch-events", 0); 1769 return -EINVAL; 1770 } 1771 1772 if (switch_output_setup(rec)) { 1773 parse_options_usage(record_usage, record_options, "switch-output", 0); 1774 return -EINVAL; 1775 } 1776 1777 if (rec->switch_output.time) { 1778 signal(SIGALRM, alarm_sig_handler); 1779 alarm(rec->switch_output.time); 1780 } 1781 1782 /* 1783 * Allow aliases to facilitate the lookup of symbols for address 1784 * filters. Refer to auxtrace_parse_filters(). 1785 */ 1786 symbol_conf.allow_aliases = true; 1787 1788 symbol__init(NULL); 1789 1790 err = record__auxtrace_init(rec); 1791 if (err) 1792 goto out; 1793 1794 if (dry_run) 1795 goto out; 1796 1797 err = bpf__setup_stdout(rec->evlist); 1798 if (err) { 1799 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1800 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1801 errbuf); 1802 goto out; 1803 } 1804 1805 err = -ENOMEM; 1806 1807 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 1808 pr_warning( 1809 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1810 "check /proc/sys/kernel/kptr_restrict.\n\n" 1811 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1812 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1813 "Samples in kernel modules won't be resolved at all.\n\n" 1814 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1815 "even with a suitable vmlinux or kallsyms file.\n\n"); 1816 1817 if (rec->no_buildid_cache || rec->no_buildid) { 1818 disable_buildid_cache(); 1819 } else if (rec->switch_output.enabled) { 1820 /* 1821 * In 'perf record --switch-output', disable buildid 1822 * generation by default to reduce data file switching 1823 * overhead. Still generate buildid if they are required 1824 * explicitly using 1825 * 1826 * perf record --switch-output --no-no-buildid \ 1827 * --no-no-buildid-cache 1828 * 1829 * Following code equals to: 1830 * 1831 * if ((rec->no_buildid || !rec->no_buildid_set) && 1832 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1833 * disable_buildid_cache(); 1834 */ 1835 bool disable = true; 1836 1837 if (rec->no_buildid_set && !rec->no_buildid) 1838 disable = false; 1839 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1840 disable = false; 1841 if (disable) { 1842 rec->no_buildid = true; 1843 rec->no_buildid_cache = true; 1844 disable_buildid_cache(); 1845 } 1846 } 1847 1848 if (record.opts.overwrite) 1849 record.opts.tail_synthesize = true; 1850 1851 if (rec->evlist->nr_entries == 0 && 1852 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1853 pr_err("Not enough memory for event selector list\n"); 1854 goto out; 1855 } 1856 1857 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1858 rec->opts.no_inherit = true; 1859 1860 err = target__validate(&rec->opts.target); 1861 if (err) { 1862 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1863 ui__warning("%s\n", errbuf); 1864 } 1865 1866 err = target__parse_uid(&rec->opts.target); 1867 if (err) { 1868 int saved_errno = errno; 1869 1870 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1871 ui__error("%s", errbuf); 1872 1873 err = -saved_errno; 1874 goto out; 1875 } 1876 1877 /* Enable ignoring missing threads when -u/-p option is defined. */ 1878 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 1879 1880 err = -ENOMEM; 1881 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1882 usage_with_options(record_usage, record_options); 1883 1884 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1885 if (err) 1886 goto out; 1887 1888 /* 1889 * We take all buildids when the file contains 1890 * AUX area tracing data because we do not decode the 1891 * trace because it would take too long. 1892 */ 1893 if (rec->opts.full_auxtrace) 1894 rec->buildid_all = true; 1895 1896 if (record_opts__config(&rec->opts)) { 1897 err = -EINVAL; 1898 goto out; 1899 } 1900 1901 err = __cmd_record(&record, argc, argv); 1902 out: 1903 perf_evlist__delete(rec->evlist); 1904 symbol__exit(); 1905 auxtrace_record__free(rec->itr); 1906 return err; 1907 } 1908 1909 static void snapshot_sig_handler(int sig __maybe_unused) 1910 { 1911 struct record *rec = &record; 1912 1913 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1914 trigger_hit(&auxtrace_snapshot_trigger); 1915 auxtrace_record__snapshot_started = 1; 1916 if (auxtrace_record__snapshot_start(record.itr)) 1917 trigger_error(&auxtrace_snapshot_trigger); 1918 } 1919 1920 if (switch_output_signal(rec)) 1921 trigger_hit(&switch_output_trigger); 1922 } 1923 1924 static void alarm_sig_handler(int sig __maybe_unused) 1925 { 1926 struct record *rec = &record; 1927 1928 if (switch_output_time(rec)) 1929 trigger_hit(&switch_output_trigger); 1930 } 1931