1 /* 2 * builtin-record.c 3 * 4 * Builtin record command: Record the profile of a workload 5 * (or a CPU, or a PID) into the perf.data output file - for 6 * later analysis via perf report. 7 */ 8 #include "builtin.h" 9 10 #include "perf.h" 11 12 #include "util/build-id.h" 13 #include "util/util.h" 14 #include <subcmd/parse-options.h> 15 #include "util/parse-events.h" 16 #include "util/config.h" 17 18 #include "util/callchain.h" 19 #include "util/cgroup.h" 20 #include "util/header.h" 21 #include "util/event.h" 22 #include "util/evlist.h" 23 #include "util/evsel.h" 24 #include "util/debug.h" 25 #include "util/drv_configs.h" 26 #include "util/session.h" 27 #include "util/tool.h" 28 #include "util/symbol.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/llvm-utils.h" 38 #include "util/bpf-loader.h" 39 #include "util/trigger.h" 40 #include "util/perf-hooks.h" 41 #include "asm/bug.h" 42 43 #include <unistd.h> 44 #include <sched.h> 45 #include <sys/mman.h> 46 #include <asm/bug.h> 47 #include <linux/time64.h> 48 49 struct switch_output { 50 bool enabled; 51 bool signal; 52 unsigned long size; 53 unsigned long time; 54 const char *str; 55 bool set; 56 }; 57 58 struct record { 59 struct perf_tool tool; 60 struct record_opts opts; 61 u64 bytes_written; 62 struct perf_data_file file; 63 struct auxtrace_record *itr; 64 struct perf_evlist *evlist; 65 struct perf_session *session; 66 const char *progname; 67 int realtime_prio; 68 bool no_buildid; 69 bool no_buildid_set; 70 bool no_buildid_cache; 71 bool no_buildid_cache_set; 72 bool buildid_all; 73 bool timestamp_filename; 74 struct switch_output switch_output; 75 unsigned long long samples; 76 }; 77 78 static volatile int auxtrace_record__snapshot_started; 79 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 80 static DEFINE_TRIGGER(switch_output_trigger); 81 82 static bool switch_output_signal(struct record *rec) 83 { 84 return rec->switch_output.signal && 85 trigger_is_ready(&switch_output_trigger); 86 } 87 88 static bool switch_output_size(struct record *rec) 89 { 90 return rec->switch_output.size && 91 trigger_is_ready(&switch_output_trigger) && 92 (rec->bytes_written >= rec->switch_output.size); 93 } 94 95 static bool switch_output_time(struct record *rec) 96 { 97 return rec->switch_output.time && 98 trigger_is_ready(&switch_output_trigger); 99 } 100 101 static int record__write(struct record *rec, void *bf, size_t size) 102 { 103 if (perf_data_file__write(rec->session->file, bf, size) < 0) { 104 pr_err("failed to write perf data, error: %m\n"); 105 return -1; 106 } 107 108 rec->bytes_written += size; 109 110 if (switch_output_size(rec)) 111 trigger_hit(&switch_output_trigger); 112 113 return 0; 114 } 115 116 static int process_synthesized_event(struct perf_tool *tool, 117 union perf_event *event, 118 struct perf_sample *sample __maybe_unused, 119 struct machine *machine __maybe_unused) 120 { 121 struct record *rec = container_of(tool, struct record, tool); 122 return record__write(rec, event, event->header.size); 123 } 124 125 static int 126 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) 127 { 128 struct perf_event_header *pheader; 129 u64 evt_head = head; 130 int size = mask + 1; 131 132 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); 133 pheader = (struct perf_event_header *)(buf + (head & mask)); 134 *start = head; 135 while (true) { 136 if (evt_head - head >= (unsigned int)size) { 137 pr_debug("Finished reading backward ring buffer: rewind\n"); 138 if (evt_head - head > (unsigned int)size) 139 evt_head -= pheader->size; 140 *end = evt_head; 141 return 0; 142 } 143 144 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); 145 146 if (pheader->size == 0) { 147 pr_debug("Finished reading backward ring buffer: get start\n"); 148 *end = evt_head; 149 return 0; 150 } 151 152 evt_head += pheader->size; 153 pr_debug3("move evt_head: %"PRIx64"\n", evt_head); 154 } 155 WARN_ONCE(1, "Shouldn't get here\n"); 156 return -1; 157 } 158 159 static int 160 rb_find_range(void *data, int mask, u64 head, u64 old, 161 u64 *start, u64 *end, bool backward) 162 { 163 if (!backward) { 164 *start = old; 165 *end = head; 166 return 0; 167 } 168 169 return backward_rb_find_range(data, mask, head, start, end); 170 } 171 172 static int 173 record__mmap_read(struct record *rec, struct perf_mmap *md, 174 bool overwrite, bool backward) 175 { 176 u64 head = perf_mmap__read_head(md); 177 u64 old = md->prev; 178 u64 end = head, start = old; 179 unsigned char *data = md->base + page_size; 180 unsigned long size; 181 void *buf; 182 int rc = 0; 183 184 if (rb_find_range(data, md->mask, head, 185 old, &start, &end, backward)) 186 return -1; 187 188 if (start == end) 189 return 0; 190 191 rec->samples++; 192 193 size = end - start; 194 if (size > (unsigned long)(md->mask) + 1) { 195 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 196 197 md->prev = head; 198 perf_mmap__consume(md, overwrite || backward); 199 return 0; 200 } 201 202 if ((start & md->mask) + size != (end & md->mask)) { 203 buf = &data[start & md->mask]; 204 size = md->mask + 1 - (start & md->mask); 205 start += size; 206 207 if (record__write(rec, buf, size) < 0) { 208 rc = -1; 209 goto out; 210 } 211 } 212 213 buf = &data[start & md->mask]; 214 size = end - start; 215 start += size; 216 217 if (record__write(rec, buf, size) < 0) { 218 rc = -1; 219 goto out; 220 } 221 222 md->prev = head; 223 perf_mmap__consume(md, overwrite || backward); 224 out: 225 return rc; 226 } 227 228 static volatile int done; 229 static volatile int signr = -1; 230 static volatile int child_finished; 231 232 static void sig_handler(int sig) 233 { 234 if (sig == SIGCHLD) 235 child_finished = 1; 236 else 237 signr = sig; 238 239 done = 1; 240 } 241 242 static void sigsegv_handler(int sig) 243 { 244 perf_hooks__recover(); 245 sighandler_dump_stack(sig); 246 } 247 248 static void record__sig_exit(void) 249 { 250 if (signr == -1) 251 return; 252 253 signal(signr, SIG_DFL); 254 raise(signr); 255 } 256 257 #ifdef HAVE_AUXTRACE_SUPPORT 258 259 static int record__process_auxtrace(struct perf_tool *tool, 260 union perf_event *event, void *data1, 261 size_t len1, void *data2, size_t len2) 262 { 263 struct record *rec = container_of(tool, struct record, tool); 264 struct perf_data_file *file = &rec->file; 265 size_t padding; 266 u8 pad[8] = {0}; 267 268 if (!perf_data_file__is_pipe(file)) { 269 off_t file_offset; 270 int fd = perf_data_file__fd(file); 271 int err; 272 273 file_offset = lseek(fd, 0, SEEK_CUR); 274 if (file_offset == -1) 275 return -1; 276 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 277 event, file_offset); 278 if (err) 279 return err; 280 } 281 282 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 283 padding = (len1 + len2) & 7; 284 if (padding) 285 padding = 8 - padding; 286 287 record__write(rec, event, event->header.size); 288 record__write(rec, data1, len1); 289 if (len2) 290 record__write(rec, data2, len2); 291 record__write(rec, &pad, padding); 292 293 return 0; 294 } 295 296 static int record__auxtrace_mmap_read(struct record *rec, 297 struct auxtrace_mmap *mm) 298 { 299 int ret; 300 301 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 302 record__process_auxtrace); 303 if (ret < 0) 304 return ret; 305 306 if (ret) 307 rec->samples++; 308 309 return 0; 310 } 311 312 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 313 struct auxtrace_mmap *mm) 314 { 315 int ret; 316 317 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 318 record__process_auxtrace, 319 rec->opts.auxtrace_snapshot_size); 320 if (ret < 0) 321 return ret; 322 323 if (ret) 324 rec->samples++; 325 326 return 0; 327 } 328 329 static int record__auxtrace_read_snapshot_all(struct record *rec) 330 { 331 int i; 332 int rc = 0; 333 334 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 335 struct auxtrace_mmap *mm = 336 &rec->evlist->mmap[i].auxtrace_mmap; 337 338 if (!mm->base) 339 continue; 340 341 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 342 rc = -1; 343 goto out; 344 } 345 } 346 out: 347 return rc; 348 } 349 350 static void record__read_auxtrace_snapshot(struct record *rec) 351 { 352 pr_debug("Recording AUX area tracing snapshot\n"); 353 if (record__auxtrace_read_snapshot_all(rec) < 0) { 354 trigger_error(&auxtrace_snapshot_trigger); 355 } else { 356 if (auxtrace_record__snapshot_finish(rec->itr)) 357 trigger_error(&auxtrace_snapshot_trigger); 358 else 359 trigger_ready(&auxtrace_snapshot_trigger); 360 } 361 } 362 363 #else 364 365 static inline 366 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 367 struct auxtrace_mmap *mm __maybe_unused) 368 { 369 return 0; 370 } 371 372 static inline 373 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 374 { 375 } 376 377 static inline 378 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 379 { 380 return 0; 381 } 382 383 #endif 384 385 static int record__mmap_evlist(struct record *rec, 386 struct perf_evlist *evlist) 387 { 388 struct record_opts *opts = &rec->opts; 389 char msg[512]; 390 391 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, 392 opts->auxtrace_mmap_pages, 393 opts->auxtrace_snapshot_mode) < 0) { 394 if (errno == EPERM) { 395 pr_err("Permission error mapping pages.\n" 396 "Consider increasing " 397 "/proc/sys/kernel/perf_event_mlock_kb,\n" 398 "or try again with a smaller value of -m/--mmap_pages.\n" 399 "(current value: %u,%u)\n", 400 opts->mmap_pages, opts->auxtrace_mmap_pages); 401 return -errno; 402 } else { 403 pr_err("failed to mmap with %d (%s)\n", errno, 404 str_error_r(errno, msg, sizeof(msg))); 405 if (errno) 406 return -errno; 407 else 408 return -EINVAL; 409 } 410 } 411 return 0; 412 } 413 414 static int record__mmap(struct record *rec) 415 { 416 return record__mmap_evlist(rec, rec->evlist); 417 } 418 419 static int record__open(struct record *rec) 420 { 421 char msg[BUFSIZ]; 422 struct perf_evsel *pos; 423 struct perf_evlist *evlist = rec->evlist; 424 struct perf_session *session = rec->session; 425 struct record_opts *opts = &rec->opts; 426 struct perf_evsel_config_term *err_term; 427 int rc = 0; 428 429 perf_evlist__config(evlist, opts, &callchain_param); 430 431 evlist__for_each_entry(evlist, pos) { 432 try_again: 433 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 434 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 435 if (verbose) 436 ui__warning("%s\n", msg); 437 goto try_again; 438 } 439 440 rc = -errno; 441 perf_evsel__open_strerror(pos, &opts->target, 442 errno, msg, sizeof(msg)); 443 ui__error("%s\n", msg); 444 goto out; 445 } 446 } 447 448 if (perf_evlist__apply_filters(evlist, &pos)) { 449 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 450 pos->filter, perf_evsel__name(pos), errno, 451 str_error_r(errno, msg, sizeof(msg))); 452 rc = -1; 453 goto out; 454 } 455 456 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 457 error("failed to set config \"%s\" on event %s with %d (%s)\n", 458 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 459 str_error_r(errno, msg, sizeof(msg))); 460 rc = -1; 461 goto out; 462 } 463 464 rc = record__mmap(rec); 465 if (rc) 466 goto out; 467 468 session->evlist = evlist; 469 perf_session__set_id_hdr_size(session); 470 out: 471 return rc; 472 } 473 474 static int process_sample_event(struct perf_tool *tool, 475 union perf_event *event, 476 struct perf_sample *sample, 477 struct perf_evsel *evsel, 478 struct machine *machine) 479 { 480 struct record *rec = container_of(tool, struct record, tool); 481 482 rec->samples++; 483 484 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 485 } 486 487 static int process_buildids(struct record *rec) 488 { 489 struct perf_data_file *file = &rec->file; 490 struct perf_session *session = rec->session; 491 492 if (file->size == 0) 493 return 0; 494 495 /* 496 * During this process, it'll load kernel map and replace the 497 * dso->long_name to a real pathname it found. In this case 498 * we prefer the vmlinux path like 499 * /lib/modules/3.16.4/build/vmlinux 500 * 501 * rather than build-id path (in debug directory). 502 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 503 */ 504 symbol_conf.ignore_vmlinux_buildid = true; 505 506 /* 507 * If --buildid-all is given, it marks all DSO regardless of hits, 508 * so no need to process samples. 509 */ 510 if (rec->buildid_all) 511 rec->tool.sample = NULL; 512 513 return perf_session__process_events(session); 514 } 515 516 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 517 { 518 int err; 519 struct perf_tool *tool = data; 520 /* 521 *As for guest kernel when processing subcommand record&report, 522 *we arrange module mmap prior to guest kernel mmap and trigger 523 *a preload dso because default guest module symbols are loaded 524 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 525 *method is used to avoid symbol missing when the first addr is 526 *in module instead of in guest kernel. 527 */ 528 err = perf_event__synthesize_modules(tool, process_synthesized_event, 529 machine); 530 if (err < 0) 531 pr_err("Couldn't record guest kernel [%d]'s reference" 532 " relocation symbol.\n", machine->pid); 533 534 /* 535 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 536 * have no _text sometimes. 537 */ 538 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 539 machine); 540 if (err < 0) 541 pr_err("Couldn't record guest kernel [%d]'s reference" 542 " relocation symbol.\n", machine->pid); 543 } 544 545 static struct perf_event_header finished_round_event = { 546 .size = sizeof(struct perf_event_header), 547 .type = PERF_RECORD_FINISHED_ROUND, 548 }; 549 550 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 551 bool backward) 552 { 553 u64 bytes_written = rec->bytes_written; 554 int i; 555 int rc = 0; 556 struct perf_mmap *maps; 557 558 if (!evlist) 559 return 0; 560 561 maps = backward ? evlist->backward_mmap : evlist->mmap; 562 if (!maps) 563 return 0; 564 565 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 566 return 0; 567 568 for (i = 0; i < evlist->nr_mmaps; i++) { 569 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 570 571 if (maps[i].base) { 572 if (record__mmap_read(rec, &maps[i], 573 evlist->overwrite, backward) != 0) { 574 rc = -1; 575 goto out; 576 } 577 } 578 579 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 580 record__auxtrace_mmap_read(rec, mm) != 0) { 581 rc = -1; 582 goto out; 583 } 584 } 585 586 /* 587 * Mark the round finished in case we wrote 588 * at least one event. 589 */ 590 if (bytes_written != rec->bytes_written) 591 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 592 593 if (backward) 594 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 595 out: 596 return rc; 597 } 598 599 static int record__mmap_read_all(struct record *rec) 600 { 601 int err; 602 603 err = record__mmap_read_evlist(rec, rec->evlist, false); 604 if (err) 605 return err; 606 607 return record__mmap_read_evlist(rec, rec->evlist, true); 608 } 609 610 static void record__init_features(struct record *rec) 611 { 612 struct perf_session *session = rec->session; 613 int feat; 614 615 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 616 perf_header__set_feat(&session->header, feat); 617 618 if (rec->no_buildid) 619 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 620 621 if (!have_tracepoints(&rec->evlist->entries)) 622 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 623 624 if (!rec->opts.branch_stack) 625 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 626 627 if (!rec->opts.full_auxtrace) 628 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 629 630 perf_header__clear_feat(&session->header, HEADER_STAT); 631 } 632 633 static void 634 record__finish_output(struct record *rec) 635 { 636 struct perf_data_file *file = &rec->file; 637 int fd = perf_data_file__fd(file); 638 639 if (file->is_pipe) 640 return; 641 642 rec->session->header.data_size += rec->bytes_written; 643 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 644 645 if (!rec->no_buildid) { 646 process_buildids(rec); 647 648 if (rec->buildid_all) 649 dsos__hit_all(rec->session); 650 } 651 perf_session__write_header(rec->session, rec->evlist, fd, true); 652 653 return; 654 } 655 656 static int record__synthesize_workload(struct record *rec, bool tail) 657 { 658 int err; 659 struct thread_map *thread_map; 660 661 if (rec->opts.tail_synthesize != tail) 662 return 0; 663 664 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 665 if (thread_map == NULL) 666 return -1; 667 668 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 669 process_synthesized_event, 670 &rec->session->machines.host, 671 rec->opts.sample_address, 672 rec->opts.proc_map_timeout); 673 thread_map__put(thread_map); 674 return err; 675 } 676 677 static int record__synthesize(struct record *rec, bool tail); 678 679 static int 680 record__switch_output(struct record *rec, bool at_exit) 681 { 682 struct perf_data_file *file = &rec->file; 683 int fd, err; 684 685 /* Same Size: "2015122520103046"*/ 686 char timestamp[] = "InvalidTimestamp"; 687 688 record__synthesize(rec, true); 689 if (target__none(&rec->opts.target)) 690 record__synthesize_workload(rec, true); 691 692 rec->samples = 0; 693 record__finish_output(rec); 694 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 695 if (err) { 696 pr_err("Failed to get current timestamp\n"); 697 return -EINVAL; 698 } 699 700 fd = perf_data_file__switch(file, timestamp, 701 rec->session->header.data_offset, 702 at_exit); 703 if (fd >= 0 && !at_exit) { 704 rec->bytes_written = 0; 705 rec->session->header.data_size = 0; 706 } 707 708 if (!quiet) 709 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 710 file->path, timestamp); 711 712 /* Output tracking events */ 713 if (!at_exit) { 714 record__synthesize(rec, false); 715 716 /* 717 * In 'perf record --switch-output' without -a, 718 * record__synthesize() in record__switch_output() won't 719 * generate tracking events because there's no thread_map 720 * in evlist. Which causes newly created perf.data doesn't 721 * contain map and comm information. 722 * Create a fake thread_map and directly call 723 * perf_event__synthesize_thread_map() for those events. 724 */ 725 if (target__none(&rec->opts.target)) 726 record__synthesize_workload(rec, false); 727 } 728 return fd; 729 } 730 731 static volatile int workload_exec_errno; 732 733 /* 734 * perf_evlist__prepare_workload will send a SIGUSR1 735 * if the fork fails, since we asked by setting its 736 * want_signal to true. 737 */ 738 static void workload_exec_failed_signal(int signo __maybe_unused, 739 siginfo_t *info, 740 void *ucontext __maybe_unused) 741 { 742 workload_exec_errno = info->si_value.sival_int; 743 done = 1; 744 child_finished = 1; 745 } 746 747 static void snapshot_sig_handler(int sig); 748 static void alarm_sig_handler(int sig); 749 750 int __weak 751 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 752 struct perf_tool *tool __maybe_unused, 753 perf_event__handler_t process __maybe_unused, 754 struct machine *machine __maybe_unused) 755 { 756 return 0; 757 } 758 759 static const struct perf_event_mmap_page * 760 perf_evlist__pick_pc(struct perf_evlist *evlist) 761 { 762 if (evlist) { 763 if (evlist->mmap && evlist->mmap[0].base) 764 return evlist->mmap[0].base; 765 if (evlist->backward_mmap && evlist->backward_mmap[0].base) 766 return evlist->backward_mmap[0].base; 767 } 768 return NULL; 769 } 770 771 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 772 { 773 const struct perf_event_mmap_page *pc; 774 775 pc = perf_evlist__pick_pc(rec->evlist); 776 if (pc) 777 return pc; 778 return NULL; 779 } 780 781 static int record__synthesize(struct record *rec, bool tail) 782 { 783 struct perf_session *session = rec->session; 784 struct machine *machine = &session->machines.host; 785 struct perf_data_file *file = &rec->file; 786 struct record_opts *opts = &rec->opts; 787 struct perf_tool *tool = &rec->tool; 788 int fd = perf_data_file__fd(file); 789 int err = 0; 790 791 if (rec->opts.tail_synthesize != tail) 792 return 0; 793 794 if (file->is_pipe) { 795 err = perf_event__synthesize_attrs(tool, session, 796 process_synthesized_event); 797 if (err < 0) { 798 pr_err("Couldn't synthesize attrs.\n"); 799 goto out; 800 } 801 802 if (have_tracepoints(&rec->evlist->entries)) { 803 /* 804 * FIXME err <= 0 here actually means that 805 * there were no tracepoints so its not really 806 * an error, just that we don't need to 807 * synthesize anything. We really have to 808 * return this more properly and also 809 * propagate errors that now are calling die() 810 */ 811 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 812 process_synthesized_event); 813 if (err <= 0) { 814 pr_err("Couldn't record tracing data.\n"); 815 goto out; 816 } 817 rec->bytes_written += err; 818 } 819 } 820 821 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 822 process_synthesized_event, machine); 823 if (err) 824 goto out; 825 826 if (rec->opts.full_auxtrace) { 827 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 828 session, process_synthesized_event); 829 if (err) 830 goto out; 831 } 832 833 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 834 machine); 835 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 836 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 837 "Check /proc/kallsyms permission or run as root.\n"); 838 839 err = perf_event__synthesize_modules(tool, process_synthesized_event, 840 machine); 841 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 842 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 843 "Check /proc/modules permission or run as root.\n"); 844 845 if (perf_guest) { 846 machines__process_guests(&session->machines, 847 perf_event__synthesize_guest_os, tool); 848 } 849 850 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 851 process_synthesized_event, opts->sample_address, 852 opts->proc_map_timeout); 853 out: 854 return err; 855 } 856 857 static int __cmd_record(struct record *rec, int argc, const char **argv) 858 { 859 int err; 860 int status = 0; 861 unsigned long waking = 0; 862 const bool forks = argc > 0; 863 struct machine *machine; 864 struct perf_tool *tool = &rec->tool; 865 struct record_opts *opts = &rec->opts; 866 struct perf_data_file *file = &rec->file; 867 struct perf_session *session; 868 bool disabled = false, draining = false; 869 int fd; 870 871 rec->progname = argv[0]; 872 873 atexit(record__sig_exit); 874 signal(SIGCHLD, sig_handler); 875 signal(SIGINT, sig_handler); 876 signal(SIGTERM, sig_handler); 877 signal(SIGSEGV, sigsegv_handler); 878 879 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 880 signal(SIGUSR2, snapshot_sig_handler); 881 if (rec->opts.auxtrace_snapshot_mode) 882 trigger_on(&auxtrace_snapshot_trigger); 883 if (rec->switch_output.enabled) 884 trigger_on(&switch_output_trigger); 885 } else { 886 signal(SIGUSR2, SIG_IGN); 887 } 888 889 session = perf_session__new(file, false, tool); 890 if (session == NULL) { 891 pr_err("Perf session creation failed.\n"); 892 return -1; 893 } 894 895 fd = perf_data_file__fd(file); 896 rec->session = session; 897 898 record__init_features(rec); 899 900 if (forks) { 901 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 902 argv, file->is_pipe, 903 workload_exec_failed_signal); 904 if (err < 0) { 905 pr_err("Couldn't run the workload!\n"); 906 status = err; 907 goto out_delete_session; 908 } 909 } 910 911 if (record__open(rec) != 0) { 912 err = -1; 913 goto out_child; 914 } 915 916 err = bpf__apply_obj_config(); 917 if (err) { 918 char errbuf[BUFSIZ]; 919 920 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 921 pr_err("ERROR: Apply config to BPF failed: %s\n", 922 errbuf); 923 goto out_child; 924 } 925 926 /* 927 * Normally perf_session__new would do this, but it doesn't have the 928 * evlist. 929 */ 930 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 931 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 932 rec->tool.ordered_events = false; 933 } 934 935 if (!rec->evlist->nr_groups) 936 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 937 938 if (file->is_pipe) { 939 err = perf_header__write_pipe(fd); 940 if (err < 0) 941 goto out_child; 942 } else { 943 err = perf_session__write_header(session, rec->evlist, fd, false); 944 if (err < 0) 945 goto out_child; 946 } 947 948 if (!rec->no_buildid 949 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 950 pr_err("Couldn't generate buildids. " 951 "Use --no-buildid to profile anyway.\n"); 952 err = -1; 953 goto out_child; 954 } 955 956 machine = &session->machines.host; 957 958 err = record__synthesize(rec, false); 959 if (err < 0) 960 goto out_child; 961 962 if (rec->realtime_prio) { 963 struct sched_param param; 964 965 param.sched_priority = rec->realtime_prio; 966 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 967 pr_err("Could not set realtime priority.\n"); 968 err = -1; 969 goto out_child; 970 } 971 } 972 973 /* 974 * When perf is starting the traced process, all the events 975 * (apart from group members) have enable_on_exec=1 set, 976 * so don't spoil it by prematurely enabling them. 977 */ 978 if (!target__none(&opts->target) && !opts->initial_delay) 979 perf_evlist__enable(rec->evlist); 980 981 /* 982 * Let the child rip 983 */ 984 if (forks) { 985 union perf_event *event; 986 987 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 988 if (event == NULL) { 989 err = -ENOMEM; 990 goto out_child; 991 } 992 993 /* 994 * Some H/W events are generated before COMM event 995 * which is emitted during exec(), so perf script 996 * cannot see a correct process name for those events. 997 * Synthesize COMM event to prevent it. 998 */ 999 perf_event__synthesize_comm(tool, event, 1000 rec->evlist->workload.pid, 1001 process_synthesized_event, 1002 machine); 1003 free(event); 1004 1005 perf_evlist__start_workload(rec->evlist); 1006 } 1007 1008 if (opts->initial_delay) { 1009 usleep(opts->initial_delay * USEC_PER_MSEC); 1010 perf_evlist__enable(rec->evlist); 1011 } 1012 1013 trigger_ready(&auxtrace_snapshot_trigger); 1014 trigger_ready(&switch_output_trigger); 1015 perf_hooks__invoke_record_start(); 1016 for (;;) { 1017 unsigned long long hits = rec->samples; 1018 1019 /* 1020 * rec->evlist->bkw_mmap_state is possible to be 1021 * BKW_MMAP_EMPTY here: when done == true and 1022 * hits != rec->samples in previous round. 1023 * 1024 * perf_evlist__toggle_bkw_mmap ensure we never 1025 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1026 */ 1027 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1028 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1029 1030 if (record__mmap_read_all(rec) < 0) { 1031 trigger_error(&auxtrace_snapshot_trigger); 1032 trigger_error(&switch_output_trigger); 1033 err = -1; 1034 goto out_child; 1035 } 1036 1037 if (auxtrace_record__snapshot_started) { 1038 auxtrace_record__snapshot_started = 0; 1039 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1040 record__read_auxtrace_snapshot(rec); 1041 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1042 pr_err("AUX area tracing snapshot failed\n"); 1043 err = -1; 1044 goto out_child; 1045 } 1046 } 1047 1048 if (trigger_is_hit(&switch_output_trigger)) { 1049 /* 1050 * If switch_output_trigger is hit, the data in 1051 * overwritable ring buffer should have been collected, 1052 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1053 * 1054 * If SIGUSR2 raise after or during record__mmap_read_all(), 1055 * record__mmap_read_all() didn't collect data from 1056 * overwritable ring buffer. Read again. 1057 */ 1058 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1059 continue; 1060 trigger_ready(&switch_output_trigger); 1061 1062 /* 1063 * Reenable events in overwrite ring buffer after 1064 * record__mmap_read_all(): we should have collected 1065 * data from it. 1066 */ 1067 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1068 1069 if (!quiet) 1070 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1071 waking); 1072 waking = 0; 1073 fd = record__switch_output(rec, false); 1074 if (fd < 0) { 1075 pr_err("Failed to switch to new file\n"); 1076 trigger_error(&switch_output_trigger); 1077 err = fd; 1078 goto out_child; 1079 } 1080 1081 /* re-arm the alarm */ 1082 if (rec->switch_output.time) 1083 alarm(rec->switch_output.time); 1084 } 1085 1086 if (hits == rec->samples) { 1087 if (done || draining) 1088 break; 1089 err = perf_evlist__poll(rec->evlist, -1); 1090 /* 1091 * Propagate error, only if there's any. Ignore positive 1092 * number of returned events and interrupt error. 1093 */ 1094 if (err > 0 || (err < 0 && errno == EINTR)) 1095 err = 0; 1096 waking++; 1097 1098 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1099 draining = true; 1100 } 1101 1102 /* 1103 * When perf is starting the traced process, at the end events 1104 * die with the process and we wait for that. Thus no need to 1105 * disable events in this case. 1106 */ 1107 if (done && !disabled && !target__none(&opts->target)) { 1108 trigger_off(&auxtrace_snapshot_trigger); 1109 perf_evlist__disable(rec->evlist); 1110 disabled = true; 1111 } 1112 } 1113 trigger_off(&auxtrace_snapshot_trigger); 1114 trigger_off(&switch_output_trigger); 1115 1116 if (forks && workload_exec_errno) { 1117 char msg[STRERR_BUFSIZE]; 1118 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1119 pr_err("Workload failed: %s\n", emsg); 1120 err = -1; 1121 goto out_child; 1122 } 1123 1124 if (!quiet) 1125 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1126 1127 if (target__none(&rec->opts.target)) 1128 record__synthesize_workload(rec, true); 1129 1130 out_child: 1131 if (forks) { 1132 int exit_status; 1133 1134 if (!child_finished) 1135 kill(rec->evlist->workload.pid, SIGTERM); 1136 1137 wait(&exit_status); 1138 1139 if (err < 0) 1140 status = err; 1141 else if (WIFEXITED(exit_status)) 1142 status = WEXITSTATUS(exit_status); 1143 else if (WIFSIGNALED(exit_status)) 1144 signr = WTERMSIG(exit_status); 1145 } else 1146 status = err; 1147 1148 record__synthesize(rec, true); 1149 /* this will be recalculated during process_buildids() */ 1150 rec->samples = 0; 1151 1152 if (!err) { 1153 if (!rec->timestamp_filename) { 1154 record__finish_output(rec); 1155 } else { 1156 fd = record__switch_output(rec, true); 1157 if (fd < 0) { 1158 status = fd; 1159 goto out_delete_session; 1160 } 1161 } 1162 } 1163 1164 perf_hooks__invoke_record_end(); 1165 1166 if (!err && !quiet) { 1167 char samples[128]; 1168 const char *postfix = rec->timestamp_filename ? 1169 ".<timestamp>" : ""; 1170 1171 if (rec->samples && !rec->opts.full_auxtrace) 1172 scnprintf(samples, sizeof(samples), 1173 " (%" PRIu64 " samples)", rec->samples); 1174 else 1175 samples[0] = '\0'; 1176 1177 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1178 perf_data_file__size(file) / 1024.0 / 1024.0, 1179 file->path, postfix, samples); 1180 } 1181 1182 out_delete_session: 1183 perf_session__delete(session); 1184 return status; 1185 } 1186 1187 static void callchain_debug(struct callchain_param *callchain) 1188 { 1189 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1190 1191 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1192 1193 if (callchain->record_mode == CALLCHAIN_DWARF) 1194 pr_debug("callchain: stack dump size %d\n", 1195 callchain->dump_size); 1196 } 1197 1198 int record_opts__parse_callchain(struct record_opts *record, 1199 struct callchain_param *callchain, 1200 const char *arg, bool unset) 1201 { 1202 int ret; 1203 callchain->enabled = !unset; 1204 1205 /* --no-call-graph */ 1206 if (unset) { 1207 callchain->record_mode = CALLCHAIN_NONE; 1208 pr_debug("callchain: disabled\n"); 1209 return 0; 1210 } 1211 1212 ret = parse_callchain_record_opt(arg, callchain); 1213 if (!ret) { 1214 /* Enable data address sampling for DWARF unwind. */ 1215 if (callchain->record_mode == CALLCHAIN_DWARF) 1216 record->sample_address = true; 1217 callchain_debug(callchain); 1218 } 1219 1220 return ret; 1221 } 1222 1223 int record_parse_callchain_opt(const struct option *opt, 1224 const char *arg, 1225 int unset) 1226 { 1227 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1228 } 1229 1230 int record_callchain_opt(const struct option *opt, 1231 const char *arg __maybe_unused, 1232 int unset __maybe_unused) 1233 { 1234 struct callchain_param *callchain = opt->value; 1235 1236 callchain->enabled = true; 1237 1238 if (callchain->record_mode == CALLCHAIN_NONE) 1239 callchain->record_mode = CALLCHAIN_FP; 1240 1241 callchain_debug(callchain); 1242 return 0; 1243 } 1244 1245 static int perf_record_config(const char *var, const char *value, void *cb) 1246 { 1247 struct record *rec = cb; 1248 1249 if (!strcmp(var, "record.build-id")) { 1250 if (!strcmp(value, "cache")) 1251 rec->no_buildid_cache = false; 1252 else if (!strcmp(value, "no-cache")) 1253 rec->no_buildid_cache = true; 1254 else if (!strcmp(value, "skip")) 1255 rec->no_buildid = true; 1256 else 1257 return -1; 1258 return 0; 1259 } 1260 if (!strcmp(var, "record.call-graph")) 1261 var = "call-graph.record-mode"; /* fall-through */ 1262 1263 return perf_default_config(var, value, cb); 1264 } 1265 1266 struct clockid_map { 1267 const char *name; 1268 int clockid; 1269 }; 1270 1271 #define CLOCKID_MAP(n, c) \ 1272 { .name = n, .clockid = (c), } 1273 1274 #define CLOCKID_END { .name = NULL, } 1275 1276 1277 /* 1278 * Add the missing ones, we need to build on many distros... 1279 */ 1280 #ifndef CLOCK_MONOTONIC_RAW 1281 #define CLOCK_MONOTONIC_RAW 4 1282 #endif 1283 #ifndef CLOCK_BOOTTIME 1284 #define CLOCK_BOOTTIME 7 1285 #endif 1286 #ifndef CLOCK_TAI 1287 #define CLOCK_TAI 11 1288 #endif 1289 1290 static const struct clockid_map clockids[] = { 1291 /* available for all events, NMI safe */ 1292 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1293 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1294 1295 /* available for some events */ 1296 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1297 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1298 CLOCKID_MAP("tai", CLOCK_TAI), 1299 1300 /* available for the lazy */ 1301 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1302 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1303 CLOCKID_MAP("real", CLOCK_REALTIME), 1304 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1305 1306 CLOCKID_END, 1307 }; 1308 1309 static int parse_clockid(const struct option *opt, const char *str, int unset) 1310 { 1311 struct record_opts *opts = (struct record_opts *)opt->value; 1312 const struct clockid_map *cm; 1313 const char *ostr = str; 1314 1315 if (unset) { 1316 opts->use_clockid = 0; 1317 return 0; 1318 } 1319 1320 /* no arg passed */ 1321 if (!str) 1322 return 0; 1323 1324 /* no setting it twice */ 1325 if (opts->use_clockid) 1326 return -1; 1327 1328 opts->use_clockid = true; 1329 1330 /* if its a number, we're done */ 1331 if (sscanf(str, "%d", &opts->clockid) == 1) 1332 return 0; 1333 1334 /* allow a "CLOCK_" prefix to the name */ 1335 if (!strncasecmp(str, "CLOCK_", 6)) 1336 str += 6; 1337 1338 for (cm = clockids; cm->name; cm++) { 1339 if (!strcasecmp(str, cm->name)) { 1340 opts->clockid = cm->clockid; 1341 return 0; 1342 } 1343 } 1344 1345 opts->use_clockid = false; 1346 ui__warning("unknown clockid %s, check man page\n", ostr); 1347 return -1; 1348 } 1349 1350 static int record__parse_mmap_pages(const struct option *opt, 1351 const char *str, 1352 int unset __maybe_unused) 1353 { 1354 struct record_opts *opts = opt->value; 1355 char *s, *p; 1356 unsigned int mmap_pages; 1357 int ret; 1358 1359 if (!str) 1360 return -EINVAL; 1361 1362 s = strdup(str); 1363 if (!s) 1364 return -ENOMEM; 1365 1366 p = strchr(s, ','); 1367 if (p) 1368 *p = '\0'; 1369 1370 if (*s) { 1371 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1372 if (ret) 1373 goto out_free; 1374 opts->mmap_pages = mmap_pages; 1375 } 1376 1377 if (!p) { 1378 ret = 0; 1379 goto out_free; 1380 } 1381 1382 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1383 if (ret) 1384 goto out_free; 1385 1386 opts->auxtrace_mmap_pages = mmap_pages; 1387 1388 out_free: 1389 free(s); 1390 return ret; 1391 } 1392 1393 static void switch_output_size_warn(struct record *rec) 1394 { 1395 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1396 struct switch_output *s = &rec->switch_output; 1397 1398 wakeup_size /= 2; 1399 1400 if (s->size < wakeup_size) { 1401 char buf[100]; 1402 1403 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1404 pr_warning("WARNING: switch-output data size lower than " 1405 "wakeup kernel buffer size (%s) " 1406 "expect bigger perf.data sizes\n", buf); 1407 } 1408 } 1409 1410 static int switch_output_setup(struct record *rec) 1411 { 1412 struct switch_output *s = &rec->switch_output; 1413 static struct parse_tag tags_size[] = { 1414 { .tag = 'B', .mult = 1 }, 1415 { .tag = 'K', .mult = 1 << 10 }, 1416 { .tag = 'M', .mult = 1 << 20 }, 1417 { .tag = 'G', .mult = 1 << 30 }, 1418 { .tag = 0 }, 1419 }; 1420 static struct parse_tag tags_time[] = { 1421 { .tag = 's', .mult = 1 }, 1422 { .tag = 'm', .mult = 60 }, 1423 { .tag = 'h', .mult = 60*60 }, 1424 { .tag = 'd', .mult = 60*60*24 }, 1425 { .tag = 0 }, 1426 }; 1427 unsigned long val; 1428 1429 if (!s->set) 1430 return 0; 1431 1432 if (!strcmp(s->str, "signal")) { 1433 s->signal = true; 1434 pr_debug("switch-output with SIGUSR2 signal\n"); 1435 goto enabled; 1436 } 1437 1438 val = parse_tag_value(s->str, tags_size); 1439 if (val != (unsigned long) -1) { 1440 s->size = val; 1441 pr_debug("switch-output with %s size threshold\n", s->str); 1442 goto enabled; 1443 } 1444 1445 val = parse_tag_value(s->str, tags_time); 1446 if (val != (unsigned long) -1) { 1447 s->time = val; 1448 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1449 s->str, s->time); 1450 goto enabled; 1451 } 1452 1453 return -1; 1454 1455 enabled: 1456 rec->timestamp_filename = true; 1457 s->enabled = true; 1458 1459 if (s->size && !rec->opts.no_buffering) 1460 switch_output_size_warn(rec); 1461 1462 return 0; 1463 } 1464 1465 static const char * const __record_usage[] = { 1466 "perf record [<options>] [<command>]", 1467 "perf record [<options>] -- <command> [<options>]", 1468 NULL 1469 }; 1470 const char * const *record_usage = __record_usage; 1471 1472 /* 1473 * XXX Ideally would be local to cmd_record() and passed to a record__new 1474 * because we need to have access to it in record__exit, that is called 1475 * after cmd_record() exits, but since record_options need to be accessible to 1476 * builtin-script, leave it here. 1477 * 1478 * At least we don't ouch it in all the other functions here directly. 1479 * 1480 * Just say no to tons of global variables, sigh. 1481 */ 1482 static struct record record = { 1483 .opts = { 1484 .sample_time = true, 1485 .mmap_pages = UINT_MAX, 1486 .user_freq = UINT_MAX, 1487 .user_interval = ULLONG_MAX, 1488 .freq = 4000, 1489 .target = { 1490 .uses_mmap = true, 1491 .default_per_cpu = true, 1492 }, 1493 .proc_map_timeout = 500, 1494 }, 1495 .tool = { 1496 .sample = process_sample_event, 1497 .fork = perf_event__process_fork, 1498 .exit = perf_event__process_exit, 1499 .comm = perf_event__process_comm, 1500 .mmap = perf_event__process_mmap, 1501 .mmap2 = perf_event__process_mmap2, 1502 .ordered_events = true, 1503 }, 1504 }; 1505 1506 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1507 "\n\t\t\t\tDefault: fp"; 1508 1509 static bool dry_run; 1510 1511 /* 1512 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1513 * with it and switch to use the library functions in perf_evlist that came 1514 * from builtin-record.c, i.e. use record_opts, 1515 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1516 * using pipes, etc. 1517 */ 1518 static struct option __record_options[] = { 1519 OPT_CALLBACK('e', "event", &record.evlist, "event", 1520 "event selector. use 'perf list' to list available events", 1521 parse_events_option), 1522 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1523 "event filter", parse_filter), 1524 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1525 NULL, "don't record events from perf itself", 1526 exclude_perf), 1527 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1528 "record events on existing process id"), 1529 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1530 "record events on existing thread id"), 1531 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1532 "collect data with this RT SCHED_FIFO priority"), 1533 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1534 "collect data without buffering"), 1535 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1536 "collect raw sample records from all opened counters"), 1537 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1538 "system-wide collection from all CPUs"), 1539 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1540 "list of cpus to monitor"), 1541 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1542 OPT_STRING('o', "output", &record.file.path, "file", 1543 "output file name"), 1544 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1545 &record.opts.no_inherit_set, 1546 "child tasks do not inherit counters"), 1547 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1548 "synthesize non-sample events at the end of output"), 1549 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1550 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1551 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1552 "number of mmap data pages and AUX area tracing mmap pages", 1553 record__parse_mmap_pages), 1554 OPT_BOOLEAN(0, "group", &record.opts.group, 1555 "put the counters into a counter group"), 1556 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1557 NULL, "enables call-graph recording" , 1558 &record_callchain_opt), 1559 OPT_CALLBACK(0, "call-graph", &record.opts, 1560 "record_mode[,record_size]", record_callchain_help, 1561 &record_parse_callchain_opt), 1562 OPT_INCR('v', "verbose", &verbose, 1563 "be more verbose (show counter open errors, etc)"), 1564 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1565 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1566 "per thread counts"), 1567 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1568 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1569 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1570 &record.opts.sample_time_set, 1571 "Record the sample timestamps"), 1572 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1573 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1574 "don't sample"), 1575 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1576 &record.no_buildid_cache_set, 1577 "do not update the buildid cache"), 1578 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1579 &record.no_buildid_set, 1580 "do not collect buildids in perf.data"), 1581 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1582 "monitor event in cgroup name only", 1583 parse_cgroups), 1584 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1585 "ms to wait before starting measurement after program start"), 1586 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1587 "user to profile"), 1588 1589 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1590 "branch any", "sample any taken branches", 1591 parse_branch_stack), 1592 1593 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1594 "branch filter mask", "branch stack filter modes", 1595 parse_branch_stack), 1596 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1597 "sample by weight (on special events only)"), 1598 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1599 "sample transaction flags (special events only)"), 1600 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1601 "use per-thread mmaps"), 1602 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1603 "sample selected machine registers on interrupt," 1604 " use -I ? to list register names", parse_regs), 1605 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1606 "Record running/enabled time of read (:S) events"), 1607 OPT_CALLBACK('k', "clockid", &record.opts, 1608 "clockid", "clockid to use for events, see clock_gettime()", 1609 parse_clockid), 1610 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1611 "opts", "AUX area tracing Snapshot Mode", ""), 1612 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1613 "per thread proc mmap processing timeout in ms"), 1614 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1615 "Record context switch events"), 1616 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1617 "Configure all used events to run in kernel space.", 1618 PARSE_OPT_EXCLUSIVE), 1619 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1620 "Configure all used events to run in user space.", 1621 PARSE_OPT_EXCLUSIVE), 1622 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1623 "clang binary to use for compiling BPF scriptlets"), 1624 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1625 "options passed to clang when compiling BPF scriptlets"), 1626 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1627 "file", "vmlinux pathname"), 1628 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1629 "Record build-id of all DSOs regardless of hits"), 1630 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1631 "append timestamp to output filename"), 1632 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1633 &record.switch_output.set, "signal,size,time", 1634 "Switch output when receive SIGUSR2 or cross size,time threshold", 1635 "signal"), 1636 OPT_BOOLEAN(0, "dry-run", &dry_run, 1637 "Parse options then exit"), 1638 OPT_END() 1639 }; 1640 1641 struct option *record_options = __record_options; 1642 1643 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) 1644 { 1645 int err; 1646 struct record *rec = &record; 1647 char errbuf[BUFSIZ]; 1648 1649 #ifndef HAVE_LIBBPF_SUPPORT 1650 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1651 set_nobuild('\0', "clang-path", true); 1652 set_nobuild('\0', "clang-opt", true); 1653 # undef set_nobuild 1654 #endif 1655 1656 #ifndef HAVE_BPF_PROLOGUE 1657 # if !defined (HAVE_DWARF_SUPPORT) 1658 # define REASON "NO_DWARF=1" 1659 # elif !defined (HAVE_LIBBPF_SUPPORT) 1660 # define REASON "NO_LIBBPF=1" 1661 # else 1662 # define REASON "this architecture doesn't support BPF prologue" 1663 # endif 1664 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1665 set_nobuild('\0', "vmlinux", true); 1666 # undef set_nobuild 1667 # undef REASON 1668 #endif 1669 1670 rec->evlist = perf_evlist__new(); 1671 if (rec->evlist == NULL) 1672 return -ENOMEM; 1673 1674 err = perf_config(perf_record_config, rec); 1675 if (err) 1676 return err; 1677 1678 argc = parse_options(argc, argv, record_options, record_usage, 1679 PARSE_OPT_STOP_AT_NON_OPTION); 1680 if (!argc && target__none(&rec->opts.target)) 1681 usage_with_options(record_usage, record_options); 1682 1683 if (nr_cgroups && !rec->opts.target.system_wide) { 1684 usage_with_options_msg(record_usage, record_options, 1685 "cgroup monitoring only available in system-wide mode"); 1686 1687 } 1688 if (rec->opts.record_switch_events && 1689 !perf_can_record_switch_events()) { 1690 ui__error("kernel does not support recording context switch events\n"); 1691 parse_options_usage(record_usage, record_options, "switch-events", 0); 1692 return -EINVAL; 1693 } 1694 1695 if (switch_output_setup(rec)) { 1696 parse_options_usage(record_usage, record_options, "switch-output", 0); 1697 return -EINVAL; 1698 } 1699 1700 if (rec->switch_output.time) { 1701 signal(SIGALRM, alarm_sig_handler); 1702 alarm(rec->switch_output.time); 1703 } 1704 1705 if (!rec->itr) { 1706 rec->itr = auxtrace_record__init(rec->evlist, &err); 1707 if (err) 1708 goto out; 1709 } 1710 1711 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 1712 rec->opts.auxtrace_snapshot_opts); 1713 if (err) 1714 goto out; 1715 1716 /* 1717 * Allow aliases to facilitate the lookup of symbols for address 1718 * filters. Refer to auxtrace_parse_filters(). 1719 */ 1720 symbol_conf.allow_aliases = true; 1721 1722 symbol__init(NULL); 1723 1724 err = auxtrace_parse_filters(rec->evlist); 1725 if (err) 1726 goto out; 1727 1728 if (dry_run) 1729 goto out; 1730 1731 err = bpf__setup_stdout(rec->evlist); 1732 if (err) { 1733 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1734 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1735 errbuf); 1736 goto out; 1737 } 1738 1739 err = -ENOMEM; 1740 1741 if (symbol_conf.kptr_restrict) 1742 pr_warning( 1743 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1744 "check /proc/sys/kernel/kptr_restrict.\n\n" 1745 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1746 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1747 "Samples in kernel modules won't be resolved at all.\n\n" 1748 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1749 "even with a suitable vmlinux or kallsyms file.\n\n"); 1750 1751 if (rec->no_buildid_cache || rec->no_buildid) { 1752 disable_buildid_cache(); 1753 } else if (rec->switch_output.enabled) { 1754 /* 1755 * In 'perf record --switch-output', disable buildid 1756 * generation by default to reduce data file switching 1757 * overhead. Still generate buildid if they are required 1758 * explicitly using 1759 * 1760 * perf record --switch-output --no-no-buildid \ 1761 * --no-no-buildid-cache 1762 * 1763 * Following code equals to: 1764 * 1765 * if ((rec->no_buildid || !rec->no_buildid_set) && 1766 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1767 * disable_buildid_cache(); 1768 */ 1769 bool disable = true; 1770 1771 if (rec->no_buildid_set && !rec->no_buildid) 1772 disable = false; 1773 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1774 disable = false; 1775 if (disable) { 1776 rec->no_buildid = true; 1777 rec->no_buildid_cache = true; 1778 disable_buildid_cache(); 1779 } 1780 } 1781 1782 if (record.opts.overwrite) 1783 record.opts.tail_synthesize = true; 1784 1785 if (rec->evlist->nr_entries == 0 && 1786 perf_evlist__add_default(rec->evlist) < 0) { 1787 pr_err("Not enough memory for event selector list\n"); 1788 goto out; 1789 } 1790 1791 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1792 rec->opts.no_inherit = true; 1793 1794 err = target__validate(&rec->opts.target); 1795 if (err) { 1796 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1797 ui__warning("%s", errbuf); 1798 } 1799 1800 err = target__parse_uid(&rec->opts.target); 1801 if (err) { 1802 int saved_errno = errno; 1803 1804 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1805 ui__error("%s", errbuf); 1806 1807 err = -saved_errno; 1808 goto out; 1809 } 1810 1811 /* Enable ignoring missing threads when -u option is defined. */ 1812 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; 1813 1814 err = -ENOMEM; 1815 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1816 usage_with_options(record_usage, record_options); 1817 1818 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1819 if (err) 1820 goto out; 1821 1822 /* 1823 * We take all buildids when the file contains 1824 * AUX area tracing data because we do not decode the 1825 * trace because it would take too long. 1826 */ 1827 if (rec->opts.full_auxtrace) 1828 rec->buildid_all = true; 1829 1830 if (record_opts__config(&rec->opts)) { 1831 err = -EINVAL; 1832 goto out; 1833 } 1834 1835 err = __cmd_record(&record, argc, argv); 1836 out: 1837 perf_evlist__delete(rec->evlist); 1838 symbol__exit(); 1839 auxtrace_record__free(rec->itr); 1840 return err; 1841 } 1842 1843 static void snapshot_sig_handler(int sig __maybe_unused) 1844 { 1845 struct record *rec = &record; 1846 1847 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1848 trigger_hit(&auxtrace_snapshot_trigger); 1849 auxtrace_record__snapshot_started = 1; 1850 if (auxtrace_record__snapshot_start(record.itr)) 1851 trigger_error(&auxtrace_snapshot_trigger); 1852 } 1853 1854 if (switch_output_signal(rec)) 1855 trigger_hit(&switch_output_trigger); 1856 } 1857 1858 static void alarm_sig_handler(int sig __maybe_unused) 1859 { 1860 struct record *rec = &record; 1861 1862 if (switch_output_time(rec)) 1863 trigger_hit(&switch_output_trigger); 1864 } 1865