1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <poll.h> 49 #include <unistd.h> 50 #include <sched.h> 51 #include <signal.h> 52 #include <sys/mman.h> 53 #include <sys/wait.h> 54 #include <asm/bug.h> 55 #include <linux/time64.h> 56 57 struct switch_output { 58 bool enabled; 59 bool signal; 60 unsigned long size; 61 unsigned long time; 62 const char *str; 63 bool set; 64 }; 65 66 struct record { 67 struct perf_tool tool; 68 struct record_opts opts; 69 u64 bytes_written; 70 struct perf_data data; 71 struct auxtrace_record *itr; 72 struct perf_evlist *evlist; 73 struct perf_session *session; 74 const char *progname; 75 int realtime_prio; 76 bool no_buildid; 77 bool no_buildid_set; 78 bool no_buildid_cache; 79 bool no_buildid_cache_set; 80 bool buildid_all; 81 bool timestamp_filename; 82 struct switch_output switch_output; 83 unsigned long long samples; 84 }; 85 86 static volatile int auxtrace_record__snapshot_started; 87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 88 static DEFINE_TRIGGER(switch_output_trigger); 89 90 static bool switch_output_signal(struct record *rec) 91 { 92 return rec->switch_output.signal && 93 trigger_is_ready(&switch_output_trigger); 94 } 95 96 static bool switch_output_size(struct record *rec) 97 { 98 return rec->switch_output.size && 99 trigger_is_ready(&switch_output_trigger) && 100 (rec->bytes_written >= rec->switch_output.size); 101 } 102 103 static bool switch_output_time(struct record *rec) 104 { 105 return rec->switch_output.time && 106 trigger_is_ready(&switch_output_trigger); 107 } 108 109 static int record__write(struct record *rec, void *bf, size_t size) 110 { 111 if (perf_data__write(rec->session->data, bf, size) < 0) { 112 pr_err("failed to write perf data, error: %m\n"); 113 return -1; 114 } 115 116 rec->bytes_written += size; 117 118 if (switch_output_size(rec)) 119 trigger_hit(&switch_output_trigger); 120 121 return 0; 122 } 123 124 static int process_synthesized_event(struct perf_tool *tool, 125 union perf_event *event, 126 struct perf_sample *sample __maybe_unused, 127 struct machine *machine __maybe_unused) 128 { 129 struct record *rec = container_of(tool, struct record, tool); 130 return record__write(rec, event, event->header.size); 131 } 132 133 static int record__pushfn(void *to, void *bf, size_t size) 134 { 135 struct record *rec = to; 136 137 rec->samples++; 138 return record__write(rec, bf, size); 139 } 140 141 static volatile int done; 142 static volatile int signr = -1; 143 static volatile int child_finished; 144 145 static void sig_handler(int sig) 146 { 147 if (sig == SIGCHLD) 148 child_finished = 1; 149 else 150 signr = sig; 151 152 done = 1; 153 } 154 155 static void sigsegv_handler(int sig) 156 { 157 perf_hooks__recover(); 158 sighandler_dump_stack(sig); 159 } 160 161 static void record__sig_exit(void) 162 { 163 if (signr == -1) 164 return; 165 166 signal(signr, SIG_DFL); 167 raise(signr); 168 } 169 170 #ifdef HAVE_AUXTRACE_SUPPORT 171 172 static int record__process_auxtrace(struct perf_tool *tool, 173 union perf_event *event, void *data1, 174 size_t len1, void *data2, size_t len2) 175 { 176 struct record *rec = container_of(tool, struct record, tool); 177 struct perf_data *data = &rec->data; 178 size_t padding; 179 u8 pad[8] = {0}; 180 181 if (!perf_data__is_pipe(data)) { 182 off_t file_offset; 183 int fd = perf_data__fd(data); 184 int err; 185 186 file_offset = lseek(fd, 0, SEEK_CUR); 187 if (file_offset == -1) 188 return -1; 189 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 190 event, file_offset); 191 if (err) 192 return err; 193 } 194 195 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 196 padding = (len1 + len2) & 7; 197 if (padding) 198 padding = 8 - padding; 199 200 record__write(rec, event, event->header.size); 201 record__write(rec, data1, len1); 202 if (len2) 203 record__write(rec, data2, len2); 204 record__write(rec, &pad, padding); 205 206 return 0; 207 } 208 209 static int record__auxtrace_mmap_read(struct record *rec, 210 struct auxtrace_mmap *mm) 211 { 212 int ret; 213 214 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 215 record__process_auxtrace); 216 if (ret < 0) 217 return ret; 218 219 if (ret) 220 rec->samples++; 221 222 return 0; 223 } 224 225 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 226 struct auxtrace_mmap *mm) 227 { 228 int ret; 229 230 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 231 record__process_auxtrace, 232 rec->opts.auxtrace_snapshot_size); 233 if (ret < 0) 234 return ret; 235 236 if (ret) 237 rec->samples++; 238 239 return 0; 240 } 241 242 static int record__auxtrace_read_snapshot_all(struct record *rec) 243 { 244 int i; 245 int rc = 0; 246 247 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 248 struct auxtrace_mmap *mm = 249 &rec->evlist->mmap[i].auxtrace_mmap; 250 251 if (!mm->base) 252 continue; 253 254 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 255 rc = -1; 256 goto out; 257 } 258 } 259 out: 260 return rc; 261 } 262 263 static void record__read_auxtrace_snapshot(struct record *rec) 264 { 265 pr_debug("Recording AUX area tracing snapshot\n"); 266 if (record__auxtrace_read_snapshot_all(rec) < 0) { 267 trigger_error(&auxtrace_snapshot_trigger); 268 } else { 269 if (auxtrace_record__snapshot_finish(rec->itr)) 270 trigger_error(&auxtrace_snapshot_trigger); 271 else 272 trigger_ready(&auxtrace_snapshot_trigger); 273 } 274 } 275 276 #else 277 278 static inline 279 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 280 struct auxtrace_mmap *mm __maybe_unused) 281 { 282 return 0; 283 } 284 285 static inline 286 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 287 { 288 } 289 290 static inline 291 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 292 { 293 return 0; 294 } 295 296 #endif 297 298 static int record__mmap_evlist(struct record *rec, 299 struct perf_evlist *evlist) 300 { 301 struct record_opts *opts = &rec->opts; 302 char msg[512]; 303 304 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, 305 opts->auxtrace_mmap_pages, 306 opts->auxtrace_snapshot_mode) < 0) { 307 if (errno == EPERM) { 308 pr_err("Permission error mapping pages.\n" 309 "Consider increasing " 310 "/proc/sys/kernel/perf_event_mlock_kb,\n" 311 "or try again with a smaller value of -m/--mmap_pages.\n" 312 "(current value: %u,%u)\n", 313 opts->mmap_pages, opts->auxtrace_mmap_pages); 314 return -errno; 315 } else { 316 pr_err("failed to mmap with %d (%s)\n", errno, 317 str_error_r(errno, msg, sizeof(msg))); 318 if (errno) 319 return -errno; 320 else 321 return -EINVAL; 322 } 323 } 324 return 0; 325 } 326 327 static int record__mmap(struct record *rec) 328 { 329 return record__mmap_evlist(rec, rec->evlist); 330 } 331 332 static int record__open(struct record *rec) 333 { 334 char msg[BUFSIZ]; 335 struct perf_evsel *pos; 336 struct perf_evlist *evlist = rec->evlist; 337 struct perf_session *session = rec->session; 338 struct record_opts *opts = &rec->opts; 339 struct perf_evsel_config_term *err_term; 340 int rc = 0; 341 342 perf_evlist__config(evlist, opts, &callchain_param); 343 344 evlist__for_each_entry(evlist, pos) { 345 try_again: 346 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 347 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 348 if (verbose > 0) 349 ui__warning("%s\n", msg); 350 goto try_again; 351 } 352 353 rc = -errno; 354 perf_evsel__open_strerror(pos, &opts->target, 355 errno, msg, sizeof(msg)); 356 ui__error("%s\n", msg); 357 goto out; 358 } 359 } 360 361 if (perf_evlist__apply_filters(evlist, &pos)) { 362 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 363 pos->filter, perf_evsel__name(pos), errno, 364 str_error_r(errno, msg, sizeof(msg))); 365 rc = -1; 366 goto out; 367 } 368 369 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 370 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 371 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 372 str_error_r(errno, msg, sizeof(msg))); 373 rc = -1; 374 goto out; 375 } 376 377 rc = record__mmap(rec); 378 if (rc) 379 goto out; 380 381 session->evlist = evlist; 382 perf_session__set_id_hdr_size(session); 383 out: 384 return rc; 385 } 386 387 static int process_sample_event(struct perf_tool *tool, 388 union perf_event *event, 389 struct perf_sample *sample, 390 struct perf_evsel *evsel, 391 struct machine *machine) 392 { 393 struct record *rec = container_of(tool, struct record, tool); 394 395 rec->samples++; 396 397 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 398 } 399 400 static int process_buildids(struct record *rec) 401 { 402 struct perf_data *data = &rec->data; 403 struct perf_session *session = rec->session; 404 405 if (data->size == 0) 406 return 0; 407 408 /* 409 * During this process, it'll load kernel map and replace the 410 * dso->long_name to a real pathname it found. In this case 411 * we prefer the vmlinux path like 412 * /lib/modules/3.16.4/build/vmlinux 413 * 414 * rather than build-id path (in debug directory). 415 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 416 */ 417 symbol_conf.ignore_vmlinux_buildid = true; 418 419 /* 420 * If --buildid-all is given, it marks all DSO regardless of hits, 421 * so no need to process samples. 422 */ 423 if (rec->buildid_all) 424 rec->tool.sample = NULL; 425 426 return perf_session__process_events(session); 427 } 428 429 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 430 { 431 int err; 432 struct perf_tool *tool = data; 433 /* 434 *As for guest kernel when processing subcommand record&report, 435 *we arrange module mmap prior to guest kernel mmap and trigger 436 *a preload dso because default guest module symbols are loaded 437 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 438 *method is used to avoid symbol missing when the first addr is 439 *in module instead of in guest kernel. 440 */ 441 err = perf_event__synthesize_modules(tool, process_synthesized_event, 442 machine); 443 if (err < 0) 444 pr_err("Couldn't record guest kernel [%d]'s reference" 445 " relocation symbol.\n", machine->pid); 446 447 /* 448 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 449 * have no _text sometimes. 450 */ 451 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 452 machine); 453 if (err < 0) 454 pr_err("Couldn't record guest kernel [%d]'s reference" 455 " relocation symbol.\n", machine->pid); 456 } 457 458 static struct perf_event_header finished_round_event = { 459 .size = sizeof(struct perf_event_header), 460 .type = PERF_RECORD_FINISHED_ROUND, 461 }; 462 463 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 464 bool backward) 465 { 466 u64 bytes_written = rec->bytes_written; 467 int i; 468 int rc = 0; 469 struct perf_mmap *maps; 470 471 if (!evlist) 472 return 0; 473 474 maps = backward ? evlist->backward_mmap : evlist->mmap; 475 if (!maps) 476 return 0; 477 478 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 479 return 0; 480 481 for (i = 0; i < evlist->nr_mmaps; i++) { 482 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 483 484 if (maps[i].base) { 485 if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) { 486 rc = -1; 487 goto out; 488 } 489 } 490 491 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 492 record__auxtrace_mmap_read(rec, mm) != 0) { 493 rc = -1; 494 goto out; 495 } 496 } 497 498 /* 499 * Mark the round finished in case we wrote 500 * at least one event. 501 */ 502 if (bytes_written != rec->bytes_written) 503 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 504 505 if (backward) 506 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 507 out: 508 return rc; 509 } 510 511 static int record__mmap_read_all(struct record *rec) 512 { 513 int err; 514 515 err = record__mmap_read_evlist(rec, rec->evlist, false); 516 if (err) 517 return err; 518 519 return record__mmap_read_evlist(rec, rec->evlist, true); 520 } 521 522 static void record__init_features(struct record *rec) 523 { 524 struct perf_session *session = rec->session; 525 int feat; 526 527 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 528 perf_header__set_feat(&session->header, feat); 529 530 if (rec->no_buildid) 531 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 532 533 if (!have_tracepoints(&rec->evlist->entries)) 534 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 535 536 if (!rec->opts.branch_stack) 537 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 538 539 if (!rec->opts.full_auxtrace) 540 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 541 542 perf_header__clear_feat(&session->header, HEADER_STAT); 543 } 544 545 static void 546 record__finish_output(struct record *rec) 547 { 548 struct perf_data *data = &rec->data; 549 int fd = perf_data__fd(data); 550 551 if (data->is_pipe) 552 return; 553 554 rec->session->header.data_size += rec->bytes_written; 555 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 556 557 if (!rec->no_buildid) { 558 process_buildids(rec); 559 560 if (rec->buildid_all) 561 dsos__hit_all(rec->session); 562 } 563 perf_session__write_header(rec->session, rec->evlist, fd, true); 564 565 return; 566 } 567 568 static int record__synthesize_workload(struct record *rec, bool tail) 569 { 570 int err; 571 struct thread_map *thread_map; 572 573 if (rec->opts.tail_synthesize != tail) 574 return 0; 575 576 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 577 if (thread_map == NULL) 578 return -1; 579 580 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 581 process_synthesized_event, 582 &rec->session->machines.host, 583 rec->opts.sample_address, 584 rec->opts.proc_map_timeout); 585 thread_map__put(thread_map); 586 return err; 587 } 588 589 static int record__synthesize(struct record *rec, bool tail); 590 591 static int 592 record__switch_output(struct record *rec, bool at_exit) 593 { 594 struct perf_data *data = &rec->data; 595 int fd, err; 596 597 /* Same Size: "2015122520103046"*/ 598 char timestamp[] = "InvalidTimestamp"; 599 600 record__synthesize(rec, true); 601 if (target__none(&rec->opts.target)) 602 record__synthesize_workload(rec, true); 603 604 rec->samples = 0; 605 record__finish_output(rec); 606 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 607 if (err) { 608 pr_err("Failed to get current timestamp\n"); 609 return -EINVAL; 610 } 611 612 fd = perf_data__switch(data, timestamp, 613 rec->session->header.data_offset, 614 at_exit); 615 if (fd >= 0 && !at_exit) { 616 rec->bytes_written = 0; 617 rec->session->header.data_size = 0; 618 } 619 620 if (!quiet) 621 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 622 data->file.path, timestamp); 623 624 /* Output tracking events */ 625 if (!at_exit) { 626 record__synthesize(rec, false); 627 628 /* 629 * In 'perf record --switch-output' without -a, 630 * record__synthesize() in record__switch_output() won't 631 * generate tracking events because there's no thread_map 632 * in evlist. Which causes newly created perf.data doesn't 633 * contain map and comm information. 634 * Create a fake thread_map and directly call 635 * perf_event__synthesize_thread_map() for those events. 636 */ 637 if (target__none(&rec->opts.target)) 638 record__synthesize_workload(rec, false); 639 } 640 return fd; 641 } 642 643 static volatile int workload_exec_errno; 644 645 /* 646 * perf_evlist__prepare_workload will send a SIGUSR1 647 * if the fork fails, since we asked by setting its 648 * want_signal to true. 649 */ 650 static void workload_exec_failed_signal(int signo __maybe_unused, 651 siginfo_t *info, 652 void *ucontext __maybe_unused) 653 { 654 workload_exec_errno = info->si_value.sival_int; 655 done = 1; 656 child_finished = 1; 657 } 658 659 static void snapshot_sig_handler(int sig); 660 static void alarm_sig_handler(int sig); 661 662 int __weak 663 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 664 struct perf_tool *tool __maybe_unused, 665 perf_event__handler_t process __maybe_unused, 666 struct machine *machine __maybe_unused) 667 { 668 return 0; 669 } 670 671 static const struct perf_event_mmap_page * 672 perf_evlist__pick_pc(struct perf_evlist *evlist) 673 { 674 if (evlist) { 675 if (evlist->mmap && evlist->mmap[0].base) 676 return evlist->mmap[0].base; 677 if (evlist->backward_mmap && evlist->backward_mmap[0].base) 678 return evlist->backward_mmap[0].base; 679 } 680 return NULL; 681 } 682 683 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 684 { 685 const struct perf_event_mmap_page *pc; 686 687 pc = perf_evlist__pick_pc(rec->evlist); 688 if (pc) 689 return pc; 690 return NULL; 691 } 692 693 static int record__synthesize(struct record *rec, bool tail) 694 { 695 struct perf_session *session = rec->session; 696 struct machine *machine = &session->machines.host; 697 struct perf_data *data = &rec->data; 698 struct record_opts *opts = &rec->opts; 699 struct perf_tool *tool = &rec->tool; 700 int fd = perf_data__fd(data); 701 int err = 0; 702 703 if (rec->opts.tail_synthesize != tail) 704 return 0; 705 706 if (data->is_pipe) { 707 err = perf_event__synthesize_features( 708 tool, session, rec->evlist, process_synthesized_event); 709 if (err < 0) { 710 pr_err("Couldn't synthesize features.\n"); 711 return err; 712 } 713 714 err = perf_event__synthesize_attrs(tool, session, 715 process_synthesized_event); 716 if (err < 0) { 717 pr_err("Couldn't synthesize attrs.\n"); 718 goto out; 719 } 720 721 if (have_tracepoints(&rec->evlist->entries)) { 722 /* 723 * FIXME err <= 0 here actually means that 724 * there were no tracepoints so its not really 725 * an error, just that we don't need to 726 * synthesize anything. We really have to 727 * return this more properly and also 728 * propagate errors that now are calling die() 729 */ 730 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 731 process_synthesized_event); 732 if (err <= 0) { 733 pr_err("Couldn't record tracing data.\n"); 734 goto out; 735 } 736 rec->bytes_written += err; 737 } 738 } 739 740 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 741 process_synthesized_event, machine); 742 if (err) 743 goto out; 744 745 if (rec->opts.full_auxtrace) { 746 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 747 session, process_synthesized_event); 748 if (err) 749 goto out; 750 } 751 752 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 753 machine); 754 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 755 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 756 "Check /proc/kallsyms permission or run as root.\n"); 757 758 err = perf_event__synthesize_modules(tool, process_synthesized_event, 759 machine); 760 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 761 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 762 "Check /proc/modules permission or run as root.\n"); 763 764 if (perf_guest) { 765 machines__process_guests(&session->machines, 766 perf_event__synthesize_guest_os, tool); 767 } 768 769 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 770 process_synthesized_event, opts->sample_address, 771 opts->proc_map_timeout, 1); 772 out: 773 return err; 774 } 775 776 static int __cmd_record(struct record *rec, int argc, const char **argv) 777 { 778 int err; 779 int status = 0; 780 unsigned long waking = 0; 781 const bool forks = argc > 0; 782 struct machine *machine; 783 struct perf_tool *tool = &rec->tool; 784 struct record_opts *opts = &rec->opts; 785 struct perf_data *data = &rec->data; 786 struct perf_session *session; 787 bool disabled = false, draining = false; 788 int fd; 789 790 rec->progname = argv[0]; 791 792 atexit(record__sig_exit); 793 signal(SIGCHLD, sig_handler); 794 signal(SIGINT, sig_handler); 795 signal(SIGTERM, sig_handler); 796 signal(SIGSEGV, sigsegv_handler); 797 798 if (rec->opts.record_namespaces) 799 tool->namespace_events = true; 800 801 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 802 signal(SIGUSR2, snapshot_sig_handler); 803 if (rec->opts.auxtrace_snapshot_mode) 804 trigger_on(&auxtrace_snapshot_trigger); 805 if (rec->switch_output.enabled) 806 trigger_on(&switch_output_trigger); 807 } else { 808 signal(SIGUSR2, SIG_IGN); 809 } 810 811 session = perf_session__new(data, false, tool); 812 if (session == NULL) { 813 pr_err("Perf session creation failed.\n"); 814 return -1; 815 } 816 817 fd = perf_data__fd(data); 818 rec->session = session; 819 820 record__init_features(rec); 821 822 if (forks) { 823 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 824 argv, data->is_pipe, 825 workload_exec_failed_signal); 826 if (err < 0) { 827 pr_err("Couldn't run the workload!\n"); 828 status = err; 829 goto out_delete_session; 830 } 831 } 832 833 if (record__open(rec) != 0) { 834 err = -1; 835 goto out_child; 836 } 837 838 err = bpf__apply_obj_config(); 839 if (err) { 840 char errbuf[BUFSIZ]; 841 842 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 843 pr_err("ERROR: Apply config to BPF failed: %s\n", 844 errbuf); 845 goto out_child; 846 } 847 848 /* 849 * Normally perf_session__new would do this, but it doesn't have the 850 * evlist. 851 */ 852 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 853 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 854 rec->tool.ordered_events = false; 855 } 856 857 if (!rec->evlist->nr_groups) 858 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 859 860 if (data->is_pipe) { 861 err = perf_header__write_pipe(fd); 862 if (err < 0) 863 goto out_child; 864 } else { 865 err = perf_session__write_header(session, rec->evlist, fd, false); 866 if (err < 0) 867 goto out_child; 868 } 869 870 if (!rec->no_buildid 871 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 872 pr_err("Couldn't generate buildids. " 873 "Use --no-buildid to profile anyway.\n"); 874 err = -1; 875 goto out_child; 876 } 877 878 machine = &session->machines.host; 879 880 err = record__synthesize(rec, false); 881 if (err < 0) 882 goto out_child; 883 884 if (rec->realtime_prio) { 885 struct sched_param param; 886 887 param.sched_priority = rec->realtime_prio; 888 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 889 pr_err("Could not set realtime priority.\n"); 890 err = -1; 891 goto out_child; 892 } 893 } 894 895 /* 896 * When perf is starting the traced process, all the events 897 * (apart from group members) have enable_on_exec=1 set, 898 * so don't spoil it by prematurely enabling them. 899 */ 900 if (!target__none(&opts->target) && !opts->initial_delay) 901 perf_evlist__enable(rec->evlist); 902 903 /* 904 * Let the child rip 905 */ 906 if (forks) { 907 union perf_event *event; 908 pid_t tgid; 909 910 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 911 if (event == NULL) { 912 err = -ENOMEM; 913 goto out_child; 914 } 915 916 /* 917 * Some H/W events are generated before COMM event 918 * which is emitted during exec(), so perf script 919 * cannot see a correct process name for those events. 920 * Synthesize COMM event to prevent it. 921 */ 922 tgid = perf_event__synthesize_comm(tool, event, 923 rec->evlist->workload.pid, 924 process_synthesized_event, 925 machine); 926 free(event); 927 928 if (tgid == -1) 929 goto out_child; 930 931 event = malloc(sizeof(event->namespaces) + 932 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 933 machine->id_hdr_size); 934 if (event == NULL) { 935 err = -ENOMEM; 936 goto out_child; 937 } 938 939 /* 940 * Synthesize NAMESPACES event for the command specified. 941 */ 942 perf_event__synthesize_namespaces(tool, event, 943 rec->evlist->workload.pid, 944 tgid, process_synthesized_event, 945 machine); 946 free(event); 947 948 perf_evlist__start_workload(rec->evlist); 949 } 950 951 if (opts->initial_delay) { 952 usleep(opts->initial_delay * USEC_PER_MSEC); 953 perf_evlist__enable(rec->evlist); 954 } 955 956 trigger_ready(&auxtrace_snapshot_trigger); 957 trigger_ready(&switch_output_trigger); 958 perf_hooks__invoke_record_start(); 959 for (;;) { 960 unsigned long long hits = rec->samples; 961 962 /* 963 * rec->evlist->bkw_mmap_state is possible to be 964 * BKW_MMAP_EMPTY here: when done == true and 965 * hits != rec->samples in previous round. 966 * 967 * perf_evlist__toggle_bkw_mmap ensure we never 968 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 969 */ 970 if (trigger_is_hit(&switch_output_trigger) || done || draining) 971 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 972 973 if (record__mmap_read_all(rec) < 0) { 974 trigger_error(&auxtrace_snapshot_trigger); 975 trigger_error(&switch_output_trigger); 976 err = -1; 977 goto out_child; 978 } 979 980 if (auxtrace_record__snapshot_started) { 981 auxtrace_record__snapshot_started = 0; 982 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 983 record__read_auxtrace_snapshot(rec); 984 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 985 pr_err("AUX area tracing snapshot failed\n"); 986 err = -1; 987 goto out_child; 988 } 989 } 990 991 if (trigger_is_hit(&switch_output_trigger)) { 992 /* 993 * If switch_output_trigger is hit, the data in 994 * overwritable ring buffer should have been collected, 995 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 996 * 997 * If SIGUSR2 raise after or during record__mmap_read_all(), 998 * record__mmap_read_all() didn't collect data from 999 * overwritable ring buffer. Read again. 1000 */ 1001 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1002 continue; 1003 trigger_ready(&switch_output_trigger); 1004 1005 /* 1006 * Reenable events in overwrite ring buffer after 1007 * record__mmap_read_all(): we should have collected 1008 * data from it. 1009 */ 1010 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1011 1012 if (!quiet) 1013 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1014 waking); 1015 waking = 0; 1016 fd = record__switch_output(rec, false); 1017 if (fd < 0) { 1018 pr_err("Failed to switch to new file\n"); 1019 trigger_error(&switch_output_trigger); 1020 err = fd; 1021 goto out_child; 1022 } 1023 1024 /* re-arm the alarm */ 1025 if (rec->switch_output.time) 1026 alarm(rec->switch_output.time); 1027 } 1028 1029 if (hits == rec->samples) { 1030 if (done || draining) 1031 break; 1032 err = perf_evlist__poll(rec->evlist, -1); 1033 /* 1034 * Propagate error, only if there's any. Ignore positive 1035 * number of returned events and interrupt error. 1036 */ 1037 if (err > 0 || (err < 0 && errno == EINTR)) 1038 err = 0; 1039 waking++; 1040 1041 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1042 draining = true; 1043 } 1044 1045 /* 1046 * When perf is starting the traced process, at the end events 1047 * die with the process and we wait for that. Thus no need to 1048 * disable events in this case. 1049 */ 1050 if (done && !disabled && !target__none(&opts->target)) { 1051 trigger_off(&auxtrace_snapshot_trigger); 1052 perf_evlist__disable(rec->evlist); 1053 disabled = true; 1054 } 1055 } 1056 trigger_off(&auxtrace_snapshot_trigger); 1057 trigger_off(&switch_output_trigger); 1058 1059 if (forks && workload_exec_errno) { 1060 char msg[STRERR_BUFSIZE]; 1061 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1062 pr_err("Workload failed: %s\n", emsg); 1063 err = -1; 1064 goto out_child; 1065 } 1066 1067 if (!quiet) 1068 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1069 1070 if (target__none(&rec->opts.target)) 1071 record__synthesize_workload(rec, true); 1072 1073 out_child: 1074 if (forks) { 1075 int exit_status; 1076 1077 if (!child_finished) 1078 kill(rec->evlist->workload.pid, SIGTERM); 1079 1080 wait(&exit_status); 1081 1082 if (err < 0) 1083 status = err; 1084 else if (WIFEXITED(exit_status)) 1085 status = WEXITSTATUS(exit_status); 1086 else if (WIFSIGNALED(exit_status)) 1087 signr = WTERMSIG(exit_status); 1088 } else 1089 status = err; 1090 1091 record__synthesize(rec, true); 1092 /* this will be recalculated during process_buildids() */ 1093 rec->samples = 0; 1094 1095 if (!err) { 1096 if (!rec->timestamp_filename) { 1097 record__finish_output(rec); 1098 } else { 1099 fd = record__switch_output(rec, true); 1100 if (fd < 0) { 1101 status = fd; 1102 goto out_delete_session; 1103 } 1104 } 1105 } 1106 1107 perf_hooks__invoke_record_end(); 1108 1109 if (!err && !quiet) { 1110 char samples[128]; 1111 const char *postfix = rec->timestamp_filename ? 1112 ".<timestamp>" : ""; 1113 1114 if (rec->samples && !rec->opts.full_auxtrace) 1115 scnprintf(samples, sizeof(samples), 1116 " (%" PRIu64 " samples)", rec->samples); 1117 else 1118 samples[0] = '\0'; 1119 1120 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1121 perf_data__size(data) / 1024.0 / 1024.0, 1122 data->file.path, postfix, samples); 1123 } 1124 1125 out_delete_session: 1126 perf_session__delete(session); 1127 return status; 1128 } 1129 1130 static void callchain_debug(struct callchain_param *callchain) 1131 { 1132 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1133 1134 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1135 1136 if (callchain->record_mode == CALLCHAIN_DWARF) 1137 pr_debug("callchain: stack dump size %d\n", 1138 callchain->dump_size); 1139 } 1140 1141 int record_opts__parse_callchain(struct record_opts *record, 1142 struct callchain_param *callchain, 1143 const char *arg, bool unset) 1144 { 1145 int ret; 1146 callchain->enabled = !unset; 1147 1148 /* --no-call-graph */ 1149 if (unset) { 1150 callchain->record_mode = CALLCHAIN_NONE; 1151 pr_debug("callchain: disabled\n"); 1152 return 0; 1153 } 1154 1155 ret = parse_callchain_record_opt(arg, callchain); 1156 if (!ret) { 1157 /* Enable data address sampling for DWARF unwind. */ 1158 if (callchain->record_mode == CALLCHAIN_DWARF) 1159 record->sample_address = true; 1160 callchain_debug(callchain); 1161 } 1162 1163 return ret; 1164 } 1165 1166 int record_parse_callchain_opt(const struct option *opt, 1167 const char *arg, 1168 int unset) 1169 { 1170 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1171 } 1172 1173 int record_callchain_opt(const struct option *opt, 1174 const char *arg __maybe_unused, 1175 int unset __maybe_unused) 1176 { 1177 struct callchain_param *callchain = opt->value; 1178 1179 callchain->enabled = true; 1180 1181 if (callchain->record_mode == CALLCHAIN_NONE) 1182 callchain->record_mode = CALLCHAIN_FP; 1183 1184 callchain_debug(callchain); 1185 return 0; 1186 } 1187 1188 static int perf_record_config(const char *var, const char *value, void *cb) 1189 { 1190 struct record *rec = cb; 1191 1192 if (!strcmp(var, "record.build-id")) { 1193 if (!strcmp(value, "cache")) 1194 rec->no_buildid_cache = false; 1195 else if (!strcmp(value, "no-cache")) 1196 rec->no_buildid_cache = true; 1197 else if (!strcmp(value, "skip")) 1198 rec->no_buildid = true; 1199 else 1200 return -1; 1201 return 0; 1202 } 1203 if (!strcmp(var, "record.call-graph")) 1204 var = "call-graph.record-mode"; /* fall-through */ 1205 1206 return perf_default_config(var, value, cb); 1207 } 1208 1209 struct clockid_map { 1210 const char *name; 1211 int clockid; 1212 }; 1213 1214 #define CLOCKID_MAP(n, c) \ 1215 { .name = n, .clockid = (c), } 1216 1217 #define CLOCKID_END { .name = NULL, } 1218 1219 1220 /* 1221 * Add the missing ones, we need to build on many distros... 1222 */ 1223 #ifndef CLOCK_MONOTONIC_RAW 1224 #define CLOCK_MONOTONIC_RAW 4 1225 #endif 1226 #ifndef CLOCK_BOOTTIME 1227 #define CLOCK_BOOTTIME 7 1228 #endif 1229 #ifndef CLOCK_TAI 1230 #define CLOCK_TAI 11 1231 #endif 1232 1233 static const struct clockid_map clockids[] = { 1234 /* available for all events, NMI safe */ 1235 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1236 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1237 1238 /* available for some events */ 1239 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1240 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1241 CLOCKID_MAP("tai", CLOCK_TAI), 1242 1243 /* available for the lazy */ 1244 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1245 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1246 CLOCKID_MAP("real", CLOCK_REALTIME), 1247 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1248 1249 CLOCKID_END, 1250 }; 1251 1252 static int parse_clockid(const struct option *opt, const char *str, int unset) 1253 { 1254 struct record_opts *opts = (struct record_opts *)opt->value; 1255 const struct clockid_map *cm; 1256 const char *ostr = str; 1257 1258 if (unset) { 1259 opts->use_clockid = 0; 1260 return 0; 1261 } 1262 1263 /* no arg passed */ 1264 if (!str) 1265 return 0; 1266 1267 /* no setting it twice */ 1268 if (opts->use_clockid) 1269 return -1; 1270 1271 opts->use_clockid = true; 1272 1273 /* if its a number, we're done */ 1274 if (sscanf(str, "%d", &opts->clockid) == 1) 1275 return 0; 1276 1277 /* allow a "CLOCK_" prefix to the name */ 1278 if (!strncasecmp(str, "CLOCK_", 6)) 1279 str += 6; 1280 1281 for (cm = clockids; cm->name; cm++) { 1282 if (!strcasecmp(str, cm->name)) { 1283 opts->clockid = cm->clockid; 1284 return 0; 1285 } 1286 } 1287 1288 opts->use_clockid = false; 1289 ui__warning("unknown clockid %s, check man page\n", ostr); 1290 return -1; 1291 } 1292 1293 static int record__parse_mmap_pages(const struct option *opt, 1294 const char *str, 1295 int unset __maybe_unused) 1296 { 1297 struct record_opts *opts = opt->value; 1298 char *s, *p; 1299 unsigned int mmap_pages; 1300 int ret; 1301 1302 if (!str) 1303 return -EINVAL; 1304 1305 s = strdup(str); 1306 if (!s) 1307 return -ENOMEM; 1308 1309 p = strchr(s, ','); 1310 if (p) 1311 *p = '\0'; 1312 1313 if (*s) { 1314 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1315 if (ret) 1316 goto out_free; 1317 opts->mmap_pages = mmap_pages; 1318 } 1319 1320 if (!p) { 1321 ret = 0; 1322 goto out_free; 1323 } 1324 1325 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1326 if (ret) 1327 goto out_free; 1328 1329 opts->auxtrace_mmap_pages = mmap_pages; 1330 1331 out_free: 1332 free(s); 1333 return ret; 1334 } 1335 1336 static void switch_output_size_warn(struct record *rec) 1337 { 1338 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1339 struct switch_output *s = &rec->switch_output; 1340 1341 wakeup_size /= 2; 1342 1343 if (s->size < wakeup_size) { 1344 char buf[100]; 1345 1346 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1347 pr_warning("WARNING: switch-output data size lower than " 1348 "wakeup kernel buffer size (%s) " 1349 "expect bigger perf.data sizes\n", buf); 1350 } 1351 } 1352 1353 static int switch_output_setup(struct record *rec) 1354 { 1355 struct switch_output *s = &rec->switch_output; 1356 static struct parse_tag tags_size[] = { 1357 { .tag = 'B', .mult = 1 }, 1358 { .tag = 'K', .mult = 1 << 10 }, 1359 { .tag = 'M', .mult = 1 << 20 }, 1360 { .tag = 'G', .mult = 1 << 30 }, 1361 { .tag = 0 }, 1362 }; 1363 static struct parse_tag tags_time[] = { 1364 { .tag = 's', .mult = 1 }, 1365 { .tag = 'm', .mult = 60 }, 1366 { .tag = 'h', .mult = 60*60 }, 1367 { .tag = 'd', .mult = 60*60*24 }, 1368 { .tag = 0 }, 1369 }; 1370 unsigned long val; 1371 1372 if (!s->set) 1373 return 0; 1374 1375 if (!strcmp(s->str, "signal")) { 1376 s->signal = true; 1377 pr_debug("switch-output with SIGUSR2 signal\n"); 1378 goto enabled; 1379 } 1380 1381 val = parse_tag_value(s->str, tags_size); 1382 if (val != (unsigned long) -1) { 1383 s->size = val; 1384 pr_debug("switch-output with %s size threshold\n", s->str); 1385 goto enabled; 1386 } 1387 1388 val = parse_tag_value(s->str, tags_time); 1389 if (val != (unsigned long) -1) { 1390 s->time = val; 1391 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1392 s->str, s->time); 1393 goto enabled; 1394 } 1395 1396 return -1; 1397 1398 enabled: 1399 rec->timestamp_filename = true; 1400 s->enabled = true; 1401 1402 if (s->size && !rec->opts.no_buffering) 1403 switch_output_size_warn(rec); 1404 1405 return 0; 1406 } 1407 1408 static const char * const __record_usage[] = { 1409 "perf record [<options>] [<command>]", 1410 "perf record [<options>] -- <command> [<options>]", 1411 NULL 1412 }; 1413 const char * const *record_usage = __record_usage; 1414 1415 /* 1416 * XXX Ideally would be local to cmd_record() and passed to a record__new 1417 * because we need to have access to it in record__exit, that is called 1418 * after cmd_record() exits, but since record_options need to be accessible to 1419 * builtin-script, leave it here. 1420 * 1421 * At least we don't ouch it in all the other functions here directly. 1422 * 1423 * Just say no to tons of global variables, sigh. 1424 */ 1425 static struct record record = { 1426 .opts = { 1427 .sample_time = true, 1428 .mmap_pages = UINT_MAX, 1429 .user_freq = UINT_MAX, 1430 .user_interval = ULLONG_MAX, 1431 .freq = 4000, 1432 .target = { 1433 .uses_mmap = true, 1434 .default_per_cpu = true, 1435 }, 1436 .proc_map_timeout = 500, 1437 }, 1438 .tool = { 1439 .sample = process_sample_event, 1440 .fork = perf_event__process_fork, 1441 .exit = perf_event__process_exit, 1442 .comm = perf_event__process_comm, 1443 .namespaces = perf_event__process_namespaces, 1444 .mmap = perf_event__process_mmap, 1445 .mmap2 = perf_event__process_mmap2, 1446 .ordered_events = true, 1447 }, 1448 }; 1449 1450 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1451 "\n\t\t\t\tDefault: fp"; 1452 1453 static bool dry_run; 1454 1455 /* 1456 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1457 * with it and switch to use the library functions in perf_evlist that came 1458 * from builtin-record.c, i.e. use record_opts, 1459 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1460 * using pipes, etc. 1461 */ 1462 static struct option __record_options[] = { 1463 OPT_CALLBACK('e', "event", &record.evlist, "event", 1464 "event selector. use 'perf list' to list available events", 1465 parse_events_option), 1466 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1467 "event filter", parse_filter), 1468 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1469 NULL, "don't record events from perf itself", 1470 exclude_perf), 1471 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1472 "record events on existing process id"), 1473 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1474 "record events on existing thread id"), 1475 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1476 "collect data with this RT SCHED_FIFO priority"), 1477 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1478 "collect data without buffering"), 1479 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1480 "collect raw sample records from all opened counters"), 1481 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1482 "system-wide collection from all CPUs"), 1483 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1484 "list of cpus to monitor"), 1485 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1486 OPT_STRING('o', "output", &record.data.file.path, "file", 1487 "output file name"), 1488 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1489 &record.opts.no_inherit_set, 1490 "child tasks do not inherit counters"), 1491 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1492 "synthesize non-sample events at the end of output"), 1493 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1494 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1495 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1496 "number of mmap data pages and AUX area tracing mmap pages", 1497 record__parse_mmap_pages), 1498 OPT_BOOLEAN(0, "group", &record.opts.group, 1499 "put the counters into a counter group"), 1500 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1501 NULL, "enables call-graph recording" , 1502 &record_callchain_opt), 1503 OPT_CALLBACK(0, "call-graph", &record.opts, 1504 "record_mode[,record_size]", record_callchain_help, 1505 &record_parse_callchain_opt), 1506 OPT_INCR('v', "verbose", &verbose, 1507 "be more verbose (show counter open errors, etc)"), 1508 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1509 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1510 "per thread counts"), 1511 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1512 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1513 "Record the sample physical addresses"), 1514 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1515 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1516 &record.opts.sample_time_set, 1517 "Record the sample timestamps"), 1518 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1519 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1520 "don't sample"), 1521 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1522 &record.no_buildid_cache_set, 1523 "do not update the buildid cache"), 1524 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1525 &record.no_buildid_set, 1526 "do not collect buildids in perf.data"), 1527 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1528 "monitor event in cgroup name only", 1529 parse_cgroups), 1530 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1531 "ms to wait before starting measurement after program start"), 1532 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1533 "user to profile"), 1534 1535 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1536 "branch any", "sample any taken branches", 1537 parse_branch_stack), 1538 1539 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1540 "branch filter mask", "branch stack filter modes", 1541 parse_branch_stack), 1542 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1543 "sample by weight (on special events only)"), 1544 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1545 "sample transaction flags (special events only)"), 1546 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1547 "use per-thread mmaps"), 1548 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1549 "sample selected machine registers on interrupt," 1550 " use -I ? to list register names", parse_regs), 1551 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 1552 "sample selected machine registers on interrupt," 1553 " use -I ? to list register names", parse_regs), 1554 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1555 "Record running/enabled time of read (:S) events"), 1556 OPT_CALLBACK('k', "clockid", &record.opts, 1557 "clockid", "clockid to use for events, see clock_gettime()", 1558 parse_clockid), 1559 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1560 "opts", "AUX area tracing Snapshot Mode", ""), 1561 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1562 "per thread proc mmap processing timeout in ms"), 1563 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1564 "Record namespaces events"), 1565 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1566 "Record context switch events"), 1567 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1568 "Configure all used events to run in kernel space.", 1569 PARSE_OPT_EXCLUSIVE), 1570 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1571 "Configure all used events to run in user space.", 1572 PARSE_OPT_EXCLUSIVE), 1573 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1574 "clang binary to use for compiling BPF scriptlets"), 1575 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1576 "options passed to clang when compiling BPF scriptlets"), 1577 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1578 "file", "vmlinux pathname"), 1579 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1580 "Record build-id of all DSOs regardless of hits"), 1581 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1582 "append timestamp to output filename"), 1583 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1584 &record.switch_output.set, "signal,size,time", 1585 "Switch output when receive SIGUSR2 or cross size,time threshold", 1586 "signal"), 1587 OPT_BOOLEAN(0, "dry-run", &dry_run, 1588 "Parse options then exit"), 1589 OPT_END() 1590 }; 1591 1592 struct option *record_options = __record_options; 1593 1594 int cmd_record(int argc, const char **argv) 1595 { 1596 int err; 1597 struct record *rec = &record; 1598 char errbuf[BUFSIZ]; 1599 1600 #ifndef HAVE_LIBBPF_SUPPORT 1601 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1602 set_nobuild('\0', "clang-path", true); 1603 set_nobuild('\0', "clang-opt", true); 1604 # undef set_nobuild 1605 #endif 1606 1607 #ifndef HAVE_BPF_PROLOGUE 1608 # if !defined (HAVE_DWARF_SUPPORT) 1609 # define REASON "NO_DWARF=1" 1610 # elif !defined (HAVE_LIBBPF_SUPPORT) 1611 # define REASON "NO_LIBBPF=1" 1612 # else 1613 # define REASON "this architecture doesn't support BPF prologue" 1614 # endif 1615 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1616 set_nobuild('\0', "vmlinux", true); 1617 # undef set_nobuild 1618 # undef REASON 1619 #endif 1620 1621 rec->evlist = perf_evlist__new(); 1622 if (rec->evlist == NULL) 1623 return -ENOMEM; 1624 1625 err = perf_config(perf_record_config, rec); 1626 if (err) 1627 return err; 1628 1629 argc = parse_options(argc, argv, record_options, record_usage, 1630 PARSE_OPT_STOP_AT_NON_OPTION); 1631 if (quiet) 1632 perf_quiet_option(); 1633 1634 /* Make system wide (-a) the default target. */ 1635 if (!argc && target__none(&rec->opts.target)) 1636 rec->opts.target.system_wide = true; 1637 1638 if (nr_cgroups && !rec->opts.target.system_wide) { 1639 usage_with_options_msg(record_usage, record_options, 1640 "cgroup monitoring only available in system-wide mode"); 1641 1642 } 1643 if (rec->opts.record_switch_events && 1644 !perf_can_record_switch_events()) { 1645 ui__error("kernel does not support recording context switch events\n"); 1646 parse_options_usage(record_usage, record_options, "switch-events", 0); 1647 return -EINVAL; 1648 } 1649 1650 if (switch_output_setup(rec)) { 1651 parse_options_usage(record_usage, record_options, "switch-output", 0); 1652 return -EINVAL; 1653 } 1654 1655 if (rec->switch_output.time) { 1656 signal(SIGALRM, alarm_sig_handler); 1657 alarm(rec->switch_output.time); 1658 } 1659 1660 if (!rec->itr) { 1661 rec->itr = auxtrace_record__init(rec->evlist, &err); 1662 if (err) 1663 goto out; 1664 } 1665 1666 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 1667 rec->opts.auxtrace_snapshot_opts); 1668 if (err) 1669 goto out; 1670 1671 /* 1672 * Allow aliases to facilitate the lookup of symbols for address 1673 * filters. Refer to auxtrace_parse_filters(). 1674 */ 1675 symbol_conf.allow_aliases = true; 1676 1677 symbol__init(NULL); 1678 1679 err = auxtrace_parse_filters(rec->evlist); 1680 if (err) 1681 goto out; 1682 1683 if (dry_run) 1684 goto out; 1685 1686 err = bpf__setup_stdout(rec->evlist); 1687 if (err) { 1688 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1689 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1690 errbuf); 1691 goto out; 1692 } 1693 1694 err = -ENOMEM; 1695 1696 if (symbol_conf.kptr_restrict) 1697 pr_warning( 1698 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1699 "check /proc/sys/kernel/kptr_restrict.\n\n" 1700 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1701 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1702 "Samples in kernel modules won't be resolved at all.\n\n" 1703 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1704 "even with a suitable vmlinux or kallsyms file.\n\n"); 1705 1706 if (rec->no_buildid_cache || rec->no_buildid) { 1707 disable_buildid_cache(); 1708 } else if (rec->switch_output.enabled) { 1709 /* 1710 * In 'perf record --switch-output', disable buildid 1711 * generation by default to reduce data file switching 1712 * overhead. Still generate buildid if they are required 1713 * explicitly using 1714 * 1715 * perf record --switch-output --no-no-buildid \ 1716 * --no-no-buildid-cache 1717 * 1718 * Following code equals to: 1719 * 1720 * if ((rec->no_buildid || !rec->no_buildid_set) && 1721 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1722 * disable_buildid_cache(); 1723 */ 1724 bool disable = true; 1725 1726 if (rec->no_buildid_set && !rec->no_buildid) 1727 disable = false; 1728 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1729 disable = false; 1730 if (disable) { 1731 rec->no_buildid = true; 1732 rec->no_buildid_cache = true; 1733 disable_buildid_cache(); 1734 } 1735 } 1736 1737 if (record.opts.overwrite) 1738 record.opts.tail_synthesize = true; 1739 1740 if (rec->evlist->nr_entries == 0 && 1741 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1742 pr_err("Not enough memory for event selector list\n"); 1743 goto out; 1744 } 1745 1746 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1747 rec->opts.no_inherit = true; 1748 1749 err = target__validate(&rec->opts.target); 1750 if (err) { 1751 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1752 ui__warning("%s", errbuf); 1753 } 1754 1755 err = target__parse_uid(&rec->opts.target); 1756 if (err) { 1757 int saved_errno = errno; 1758 1759 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1760 ui__error("%s", errbuf); 1761 1762 err = -saved_errno; 1763 goto out; 1764 } 1765 1766 /* Enable ignoring missing threads when -u option is defined. */ 1767 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; 1768 1769 err = -ENOMEM; 1770 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1771 usage_with_options(record_usage, record_options); 1772 1773 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1774 if (err) 1775 goto out; 1776 1777 /* 1778 * We take all buildids when the file contains 1779 * AUX area tracing data because we do not decode the 1780 * trace because it would take too long. 1781 */ 1782 if (rec->opts.full_auxtrace) 1783 rec->buildid_all = true; 1784 1785 if (record_opts__config(&rec->opts)) { 1786 err = -EINVAL; 1787 goto out; 1788 } 1789 1790 err = __cmd_record(&record, argc, argv); 1791 out: 1792 perf_evlist__delete(rec->evlist); 1793 symbol__exit(); 1794 auxtrace_record__free(rec->itr); 1795 return err; 1796 } 1797 1798 static void snapshot_sig_handler(int sig __maybe_unused) 1799 { 1800 struct record *rec = &record; 1801 1802 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1803 trigger_hit(&auxtrace_snapshot_trigger); 1804 auxtrace_record__snapshot_started = 1; 1805 if (auxtrace_record__snapshot_start(record.itr)) 1806 trigger_error(&auxtrace_snapshot_trigger); 1807 } 1808 1809 if (switch_output_signal(rec)) 1810 trigger_hit(&switch_output_trigger); 1811 } 1812 1813 static void alarm_sig_handler(int sig __maybe_unused) 1814 { 1815 struct record *rec = &record; 1816 1817 if (switch_output_time(rec)) 1818 trigger_hit(&switch_output_trigger); 1819 } 1820