1 /* 2 * builtin-record.c 3 * 4 * Builtin record command: Record the profile of a workload 5 * (or a CPU, or a PID) into the perf.data output file - for 6 * later analysis via perf report. 7 */ 8 #include "builtin.h" 9 10 #include "perf.h" 11 12 #include "util/build-id.h" 13 #include "util/util.h" 14 #include <subcmd/parse-options.h> 15 #include "util/parse-events.h" 16 #include "util/config.h" 17 18 #include "util/callchain.h" 19 #include "util/cgroup.h" 20 #include "util/header.h" 21 #include "util/event.h" 22 #include "util/evlist.h" 23 #include "util/evsel.h" 24 #include "util/debug.h" 25 #include "util/drv_configs.h" 26 #include "util/session.h" 27 #include "util/tool.h" 28 #include "util/symbol.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/llvm-utils.h" 38 #include "util/bpf-loader.h" 39 #include "util/trigger.h" 40 #include "asm/bug.h" 41 42 #include <unistd.h> 43 #include <sched.h> 44 #include <sys/mman.h> 45 #include <asm/bug.h> 46 #include <linux/time64.h> 47 48 struct record { 49 struct perf_tool tool; 50 struct record_opts opts; 51 u64 bytes_written; 52 struct perf_data_file file; 53 struct auxtrace_record *itr; 54 struct perf_evlist *evlist; 55 struct perf_session *session; 56 const char *progname; 57 int realtime_prio; 58 bool no_buildid; 59 bool no_buildid_set; 60 bool no_buildid_cache; 61 bool no_buildid_cache_set; 62 bool buildid_all; 63 bool timestamp_filename; 64 bool switch_output; 65 unsigned long long samples; 66 }; 67 68 static int record__write(struct record *rec, void *bf, size_t size) 69 { 70 if (perf_data_file__write(rec->session->file, bf, size) < 0) { 71 pr_err("failed to write perf data, error: %m\n"); 72 return -1; 73 } 74 75 rec->bytes_written += size; 76 return 0; 77 } 78 79 static int process_synthesized_event(struct perf_tool *tool, 80 union perf_event *event, 81 struct perf_sample *sample __maybe_unused, 82 struct machine *machine __maybe_unused) 83 { 84 struct record *rec = container_of(tool, struct record, tool); 85 return record__write(rec, event, event->header.size); 86 } 87 88 static int 89 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) 90 { 91 struct perf_event_header *pheader; 92 u64 evt_head = head; 93 int size = mask + 1; 94 95 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); 96 pheader = (struct perf_event_header *)(buf + (head & mask)); 97 *start = head; 98 while (true) { 99 if (evt_head - head >= (unsigned int)size) { 100 pr_debug("Finished reading backward ring buffer: rewind\n"); 101 if (evt_head - head > (unsigned int)size) 102 evt_head -= pheader->size; 103 *end = evt_head; 104 return 0; 105 } 106 107 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); 108 109 if (pheader->size == 0) { 110 pr_debug("Finished reading backward ring buffer: get start\n"); 111 *end = evt_head; 112 return 0; 113 } 114 115 evt_head += pheader->size; 116 pr_debug3("move evt_head: %"PRIx64"\n", evt_head); 117 } 118 WARN_ONCE(1, "Shouldn't get here\n"); 119 return -1; 120 } 121 122 static int 123 rb_find_range(void *data, int mask, u64 head, u64 old, 124 u64 *start, u64 *end, bool backward) 125 { 126 if (!backward) { 127 *start = old; 128 *end = head; 129 return 0; 130 } 131 132 return backward_rb_find_range(data, mask, head, start, end); 133 } 134 135 static int 136 record__mmap_read(struct record *rec, struct perf_mmap *md, 137 bool overwrite, bool backward) 138 { 139 u64 head = perf_mmap__read_head(md); 140 u64 old = md->prev; 141 u64 end = head, start = old; 142 unsigned char *data = md->base + page_size; 143 unsigned long size; 144 void *buf; 145 int rc = 0; 146 147 if (rb_find_range(data, md->mask, head, 148 old, &start, &end, backward)) 149 return -1; 150 151 if (start == end) 152 return 0; 153 154 rec->samples++; 155 156 size = end - start; 157 if (size > (unsigned long)(md->mask) + 1) { 158 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 159 160 md->prev = head; 161 perf_mmap__consume(md, overwrite || backward); 162 return 0; 163 } 164 165 if ((start & md->mask) + size != (end & md->mask)) { 166 buf = &data[start & md->mask]; 167 size = md->mask + 1 - (start & md->mask); 168 start += size; 169 170 if (record__write(rec, buf, size) < 0) { 171 rc = -1; 172 goto out; 173 } 174 } 175 176 buf = &data[start & md->mask]; 177 size = end - start; 178 start += size; 179 180 if (record__write(rec, buf, size) < 0) { 181 rc = -1; 182 goto out; 183 } 184 185 md->prev = head; 186 perf_mmap__consume(md, overwrite || backward); 187 out: 188 return rc; 189 } 190 191 static volatile int done; 192 static volatile int signr = -1; 193 static volatile int child_finished; 194 195 static volatile int auxtrace_record__snapshot_started; 196 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 197 static DEFINE_TRIGGER(switch_output_trigger); 198 199 static void sig_handler(int sig) 200 { 201 if (sig == SIGCHLD) 202 child_finished = 1; 203 else 204 signr = sig; 205 206 done = 1; 207 } 208 209 static void record__sig_exit(void) 210 { 211 if (signr == -1) 212 return; 213 214 signal(signr, SIG_DFL); 215 raise(signr); 216 } 217 218 #ifdef HAVE_AUXTRACE_SUPPORT 219 220 static int record__process_auxtrace(struct perf_tool *tool, 221 union perf_event *event, void *data1, 222 size_t len1, void *data2, size_t len2) 223 { 224 struct record *rec = container_of(tool, struct record, tool); 225 struct perf_data_file *file = &rec->file; 226 size_t padding; 227 u8 pad[8] = {0}; 228 229 if (!perf_data_file__is_pipe(file)) { 230 off_t file_offset; 231 int fd = perf_data_file__fd(file); 232 int err; 233 234 file_offset = lseek(fd, 0, SEEK_CUR); 235 if (file_offset == -1) 236 return -1; 237 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 238 event, file_offset); 239 if (err) 240 return err; 241 } 242 243 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 244 padding = (len1 + len2) & 7; 245 if (padding) 246 padding = 8 - padding; 247 248 record__write(rec, event, event->header.size); 249 record__write(rec, data1, len1); 250 if (len2) 251 record__write(rec, data2, len2); 252 record__write(rec, &pad, padding); 253 254 return 0; 255 } 256 257 static int record__auxtrace_mmap_read(struct record *rec, 258 struct auxtrace_mmap *mm) 259 { 260 int ret; 261 262 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 263 record__process_auxtrace); 264 if (ret < 0) 265 return ret; 266 267 if (ret) 268 rec->samples++; 269 270 return 0; 271 } 272 273 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 274 struct auxtrace_mmap *mm) 275 { 276 int ret; 277 278 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 279 record__process_auxtrace, 280 rec->opts.auxtrace_snapshot_size); 281 if (ret < 0) 282 return ret; 283 284 if (ret) 285 rec->samples++; 286 287 return 0; 288 } 289 290 static int record__auxtrace_read_snapshot_all(struct record *rec) 291 { 292 int i; 293 int rc = 0; 294 295 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 296 struct auxtrace_mmap *mm = 297 &rec->evlist->mmap[i].auxtrace_mmap; 298 299 if (!mm->base) 300 continue; 301 302 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 303 rc = -1; 304 goto out; 305 } 306 } 307 out: 308 return rc; 309 } 310 311 static void record__read_auxtrace_snapshot(struct record *rec) 312 { 313 pr_debug("Recording AUX area tracing snapshot\n"); 314 if (record__auxtrace_read_snapshot_all(rec) < 0) { 315 trigger_error(&auxtrace_snapshot_trigger); 316 } else { 317 if (auxtrace_record__snapshot_finish(rec->itr)) 318 trigger_error(&auxtrace_snapshot_trigger); 319 else 320 trigger_ready(&auxtrace_snapshot_trigger); 321 } 322 } 323 324 #else 325 326 static inline 327 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 328 struct auxtrace_mmap *mm __maybe_unused) 329 { 330 return 0; 331 } 332 333 static inline 334 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 335 { 336 } 337 338 static inline 339 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 340 { 341 return 0; 342 } 343 344 #endif 345 346 static int record__mmap_evlist(struct record *rec, 347 struct perf_evlist *evlist) 348 { 349 struct record_opts *opts = &rec->opts; 350 char msg[512]; 351 352 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, 353 opts->auxtrace_mmap_pages, 354 opts->auxtrace_snapshot_mode) < 0) { 355 if (errno == EPERM) { 356 pr_err("Permission error mapping pages.\n" 357 "Consider increasing " 358 "/proc/sys/kernel/perf_event_mlock_kb,\n" 359 "or try again with a smaller value of -m/--mmap_pages.\n" 360 "(current value: %u,%u)\n", 361 opts->mmap_pages, opts->auxtrace_mmap_pages); 362 return -errno; 363 } else { 364 pr_err("failed to mmap with %d (%s)\n", errno, 365 str_error_r(errno, msg, sizeof(msg))); 366 if (errno) 367 return -errno; 368 else 369 return -EINVAL; 370 } 371 } 372 return 0; 373 } 374 375 static int record__mmap(struct record *rec) 376 { 377 return record__mmap_evlist(rec, rec->evlist); 378 } 379 380 static int record__open(struct record *rec) 381 { 382 char msg[512]; 383 struct perf_evsel *pos; 384 struct perf_evlist *evlist = rec->evlist; 385 struct perf_session *session = rec->session; 386 struct record_opts *opts = &rec->opts; 387 struct perf_evsel_config_term *err_term; 388 int rc = 0; 389 390 perf_evlist__config(evlist, opts, &callchain_param); 391 392 evlist__for_each_entry(evlist, pos) { 393 try_again: 394 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 395 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 396 if (verbose) 397 ui__warning("%s\n", msg); 398 goto try_again; 399 } 400 401 rc = -errno; 402 perf_evsel__open_strerror(pos, &opts->target, 403 errno, msg, sizeof(msg)); 404 ui__error("%s\n", msg); 405 goto out; 406 } 407 } 408 409 if (perf_evlist__apply_filters(evlist, &pos)) { 410 error("failed to set filter \"%s\" on event %s with %d (%s)\n", 411 pos->filter, perf_evsel__name(pos), errno, 412 str_error_r(errno, msg, sizeof(msg))); 413 rc = -1; 414 goto out; 415 } 416 417 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 418 error("failed to set config \"%s\" on event %s with %d (%s)\n", 419 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 420 str_error_r(errno, msg, sizeof(msg))); 421 rc = -1; 422 goto out; 423 } 424 425 rc = record__mmap(rec); 426 if (rc) 427 goto out; 428 429 session->evlist = evlist; 430 perf_session__set_id_hdr_size(session); 431 out: 432 return rc; 433 } 434 435 static int process_sample_event(struct perf_tool *tool, 436 union perf_event *event, 437 struct perf_sample *sample, 438 struct perf_evsel *evsel, 439 struct machine *machine) 440 { 441 struct record *rec = container_of(tool, struct record, tool); 442 443 rec->samples++; 444 445 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 446 } 447 448 static int process_buildids(struct record *rec) 449 { 450 struct perf_data_file *file = &rec->file; 451 struct perf_session *session = rec->session; 452 453 if (file->size == 0) 454 return 0; 455 456 /* 457 * During this process, it'll load kernel map and replace the 458 * dso->long_name to a real pathname it found. In this case 459 * we prefer the vmlinux path like 460 * /lib/modules/3.16.4/build/vmlinux 461 * 462 * rather than build-id path (in debug directory). 463 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 464 */ 465 symbol_conf.ignore_vmlinux_buildid = true; 466 467 /* 468 * If --buildid-all is given, it marks all DSO regardless of hits, 469 * so no need to process samples. 470 */ 471 if (rec->buildid_all) 472 rec->tool.sample = NULL; 473 474 return perf_session__process_events(session); 475 } 476 477 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 478 { 479 int err; 480 struct perf_tool *tool = data; 481 /* 482 *As for guest kernel when processing subcommand record&report, 483 *we arrange module mmap prior to guest kernel mmap and trigger 484 *a preload dso because default guest module symbols are loaded 485 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 486 *method is used to avoid symbol missing when the first addr is 487 *in module instead of in guest kernel. 488 */ 489 err = perf_event__synthesize_modules(tool, process_synthesized_event, 490 machine); 491 if (err < 0) 492 pr_err("Couldn't record guest kernel [%d]'s reference" 493 " relocation symbol.\n", machine->pid); 494 495 /* 496 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 497 * have no _text sometimes. 498 */ 499 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 500 machine); 501 if (err < 0) 502 pr_err("Couldn't record guest kernel [%d]'s reference" 503 " relocation symbol.\n", machine->pid); 504 } 505 506 static struct perf_event_header finished_round_event = { 507 .size = sizeof(struct perf_event_header), 508 .type = PERF_RECORD_FINISHED_ROUND, 509 }; 510 511 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 512 bool backward) 513 { 514 u64 bytes_written = rec->bytes_written; 515 int i; 516 int rc = 0; 517 struct perf_mmap *maps; 518 519 if (!evlist) 520 return 0; 521 522 maps = backward ? evlist->backward_mmap : evlist->mmap; 523 if (!maps) 524 return 0; 525 526 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 527 return 0; 528 529 for (i = 0; i < evlist->nr_mmaps; i++) { 530 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 531 532 if (maps[i].base) { 533 if (record__mmap_read(rec, &maps[i], 534 evlist->overwrite, backward) != 0) { 535 rc = -1; 536 goto out; 537 } 538 } 539 540 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 541 record__auxtrace_mmap_read(rec, mm) != 0) { 542 rc = -1; 543 goto out; 544 } 545 } 546 547 /* 548 * Mark the round finished in case we wrote 549 * at least one event. 550 */ 551 if (bytes_written != rec->bytes_written) 552 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 553 554 if (backward) 555 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 556 out: 557 return rc; 558 } 559 560 static int record__mmap_read_all(struct record *rec) 561 { 562 int err; 563 564 err = record__mmap_read_evlist(rec, rec->evlist, false); 565 if (err) 566 return err; 567 568 return record__mmap_read_evlist(rec, rec->evlist, true); 569 } 570 571 static void record__init_features(struct record *rec) 572 { 573 struct perf_session *session = rec->session; 574 int feat; 575 576 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 577 perf_header__set_feat(&session->header, feat); 578 579 if (rec->no_buildid) 580 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 581 582 if (!have_tracepoints(&rec->evlist->entries)) 583 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 584 585 if (!rec->opts.branch_stack) 586 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 587 588 if (!rec->opts.full_auxtrace) 589 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 590 591 perf_header__clear_feat(&session->header, HEADER_STAT); 592 } 593 594 static void 595 record__finish_output(struct record *rec) 596 { 597 struct perf_data_file *file = &rec->file; 598 int fd = perf_data_file__fd(file); 599 600 if (file->is_pipe) 601 return; 602 603 rec->session->header.data_size += rec->bytes_written; 604 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 605 606 if (!rec->no_buildid) { 607 process_buildids(rec); 608 609 if (rec->buildid_all) 610 dsos__hit_all(rec->session); 611 } 612 perf_session__write_header(rec->session, rec->evlist, fd, true); 613 614 return; 615 } 616 617 static int record__synthesize_workload(struct record *rec, bool tail) 618 { 619 struct { 620 struct thread_map map; 621 struct thread_map_data map_data; 622 } thread_map; 623 624 if (rec->opts.tail_synthesize != tail) 625 return 0; 626 627 thread_map.map.nr = 1; 628 thread_map.map.map[0].pid = rec->evlist->workload.pid; 629 thread_map.map.map[0].comm = NULL; 630 return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map, 631 process_synthesized_event, 632 &rec->session->machines.host, 633 rec->opts.sample_address, 634 rec->opts.proc_map_timeout); 635 } 636 637 static int record__synthesize(struct record *rec, bool tail); 638 639 static int 640 record__switch_output(struct record *rec, bool at_exit) 641 { 642 struct perf_data_file *file = &rec->file; 643 int fd, err; 644 645 /* Same Size: "2015122520103046"*/ 646 char timestamp[] = "InvalidTimestamp"; 647 648 record__synthesize(rec, true); 649 if (target__none(&rec->opts.target)) 650 record__synthesize_workload(rec, true); 651 652 rec->samples = 0; 653 record__finish_output(rec); 654 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 655 if (err) { 656 pr_err("Failed to get current timestamp\n"); 657 return -EINVAL; 658 } 659 660 fd = perf_data_file__switch(file, timestamp, 661 rec->session->header.data_offset, 662 at_exit); 663 if (fd >= 0 && !at_exit) { 664 rec->bytes_written = 0; 665 rec->session->header.data_size = 0; 666 } 667 668 if (!quiet) 669 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 670 file->path, timestamp); 671 672 /* Output tracking events */ 673 if (!at_exit) { 674 record__synthesize(rec, false); 675 676 /* 677 * In 'perf record --switch-output' without -a, 678 * record__synthesize() in record__switch_output() won't 679 * generate tracking events because there's no thread_map 680 * in evlist. Which causes newly created perf.data doesn't 681 * contain map and comm information. 682 * Create a fake thread_map and directly call 683 * perf_event__synthesize_thread_map() for those events. 684 */ 685 if (target__none(&rec->opts.target)) 686 record__synthesize_workload(rec, false); 687 } 688 return fd; 689 } 690 691 static volatile int workload_exec_errno; 692 693 /* 694 * perf_evlist__prepare_workload will send a SIGUSR1 695 * if the fork fails, since we asked by setting its 696 * want_signal to true. 697 */ 698 static void workload_exec_failed_signal(int signo __maybe_unused, 699 siginfo_t *info, 700 void *ucontext __maybe_unused) 701 { 702 workload_exec_errno = info->si_value.sival_int; 703 done = 1; 704 child_finished = 1; 705 } 706 707 static void snapshot_sig_handler(int sig); 708 709 int __weak 710 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 711 struct perf_tool *tool __maybe_unused, 712 perf_event__handler_t process __maybe_unused, 713 struct machine *machine __maybe_unused) 714 { 715 return 0; 716 } 717 718 static const struct perf_event_mmap_page * 719 perf_evlist__pick_pc(struct perf_evlist *evlist) 720 { 721 if (evlist) { 722 if (evlist->mmap && evlist->mmap[0].base) 723 return evlist->mmap[0].base; 724 if (evlist->backward_mmap && evlist->backward_mmap[0].base) 725 return evlist->backward_mmap[0].base; 726 } 727 return NULL; 728 } 729 730 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 731 { 732 const struct perf_event_mmap_page *pc; 733 734 pc = perf_evlist__pick_pc(rec->evlist); 735 if (pc) 736 return pc; 737 return NULL; 738 } 739 740 static int record__synthesize(struct record *rec, bool tail) 741 { 742 struct perf_session *session = rec->session; 743 struct machine *machine = &session->machines.host; 744 struct perf_data_file *file = &rec->file; 745 struct record_opts *opts = &rec->opts; 746 struct perf_tool *tool = &rec->tool; 747 int fd = perf_data_file__fd(file); 748 int err = 0; 749 750 if (rec->opts.tail_synthesize != tail) 751 return 0; 752 753 if (file->is_pipe) { 754 err = perf_event__synthesize_attrs(tool, session, 755 process_synthesized_event); 756 if (err < 0) { 757 pr_err("Couldn't synthesize attrs.\n"); 758 goto out; 759 } 760 761 if (have_tracepoints(&rec->evlist->entries)) { 762 /* 763 * FIXME err <= 0 here actually means that 764 * there were no tracepoints so its not really 765 * an error, just that we don't need to 766 * synthesize anything. We really have to 767 * return this more properly and also 768 * propagate errors that now are calling die() 769 */ 770 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 771 process_synthesized_event); 772 if (err <= 0) { 773 pr_err("Couldn't record tracing data.\n"); 774 goto out; 775 } 776 rec->bytes_written += err; 777 } 778 } 779 780 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 781 process_synthesized_event, machine); 782 if (err) 783 goto out; 784 785 if (rec->opts.full_auxtrace) { 786 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 787 session, process_synthesized_event); 788 if (err) 789 goto out; 790 } 791 792 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 793 machine); 794 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 795 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 796 "Check /proc/kallsyms permission or run as root.\n"); 797 798 err = perf_event__synthesize_modules(tool, process_synthesized_event, 799 machine); 800 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 801 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 802 "Check /proc/modules permission or run as root.\n"); 803 804 if (perf_guest) { 805 machines__process_guests(&session->machines, 806 perf_event__synthesize_guest_os, tool); 807 } 808 809 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 810 process_synthesized_event, opts->sample_address, 811 opts->proc_map_timeout); 812 out: 813 return err; 814 } 815 816 static int __cmd_record(struct record *rec, int argc, const char **argv) 817 { 818 int err; 819 int status = 0; 820 unsigned long waking = 0; 821 const bool forks = argc > 0; 822 struct machine *machine; 823 struct perf_tool *tool = &rec->tool; 824 struct record_opts *opts = &rec->opts; 825 struct perf_data_file *file = &rec->file; 826 struct perf_session *session; 827 bool disabled = false, draining = false; 828 int fd; 829 830 rec->progname = argv[0]; 831 832 atexit(record__sig_exit); 833 signal(SIGCHLD, sig_handler); 834 signal(SIGINT, sig_handler); 835 signal(SIGTERM, sig_handler); 836 837 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) { 838 signal(SIGUSR2, snapshot_sig_handler); 839 if (rec->opts.auxtrace_snapshot_mode) 840 trigger_on(&auxtrace_snapshot_trigger); 841 if (rec->switch_output) 842 trigger_on(&switch_output_trigger); 843 } else { 844 signal(SIGUSR2, SIG_IGN); 845 } 846 847 session = perf_session__new(file, false, tool); 848 if (session == NULL) { 849 pr_err("Perf session creation failed.\n"); 850 return -1; 851 } 852 853 fd = perf_data_file__fd(file); 854 rec->session = session; 855 856 record__init_features(rec); 857 858 if (forks) { 859 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 860 argv, file->is_pipe, 861 workload_exec_failed_signal); 862 if (err < 0) { 863 pr_err("Couldn't run the workload!\n"); 864 status = err; 865 goto out_delete_session; 866 } 867 } 868 869 if (record__open(rec) != 0) { 870 err = -1; 871 goto out_child; 872 } 873 874 err = bpf__apply_obj_config(); 875 if (err) { 876 char errbuf[BUFSIZ]; 877 878 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 879 pr_err("ERROR: Apply config to BPF failed: %s\n", 880 errbuf); 881 goto out_child; 882 } 883 884 /* 885 * Normally perf_session__new would do this, but it doesn't have the 886 * evlist. 887 */ 888 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 889 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 890 rec->tool.ordered_events = false; 891 } 892 893 if (!rec->evlist->nr_groups) 894 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 895 896 if (file->is_pipe) { 897 err = perf_header__write_pipe(fd); 898 if (err < 0) 899 goto out_child; 900 } else { 901 err = perf_session__write_header(session, rec->evlist, fd, false); 902 if (err < 0) 903 goto out_child; 904 } 905 906 if (!rec->no_buildid 907 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 908 pr_err("Couldn't generate buildids. " 909 "Use --no-buildid to profile anyway.\n"); 910 err = -1; 911 goto out_child; 912 } 913 914 machine = &session->machines.host; 915 916 err = record__synthesize(rec, false); 917 if (err < 0) 918 goto out_child; 919 920 if (rec->realtime_prio) { 921 struct sched_param param; 922 923 param.sched_priority = rec->realtime_prio; 924 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 925 pr_err("Could not set realtime priority.\n"); 926 err = -1; 927 goto out_child; 928 } 929 } 930 931 /* 932 * When perf is starting the traced process, all the events 933 * (apart from group members) have enable_on_exec=1 set, 934 * so don't spoil it by prematurely enabling them. 935 */ 936 if (!target__none(&opts->target) && !opts->initial_delay) 937 perf_evlist__enable(rec->evlist); 938 939 /* 940 * Let the child rip 941 */ 942 if (forks) { 943 union perf_event *event; 944 945 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 946 if (event == NULL) { 947 err = -ENOMEM; 948 goto out_child; 949 } 950 951 /* 952 * Some H/W events are generated before COMM event 953 * which is emitted during exec(), so perf script 954 * cannot see a correct process name for those events. 955 * Synthesize COMM event to prevent it. 956 */ 957 perf_event__synthesize_comm(tool, event, 958 rec->evlist->workload.pid, 959 process_synthesized_event, 960 machine); 961 free(event); 962 963 perf_evlist__start_workload(rec->evlist); 964 } 965 966 if (opts->initial_delay) { 967 usleep(opts->initial_delay * USEC_PER_MSEC); 968 perf_evlist__enable(rec->evlist); 969 } 970 971 trigger_ready(&auxtrace_snapshot_trigger); 972 trigger_ready(&switch_output_trigger); 973 for (;;) { 974 unsigned long long hits = rec->samples; 975 976 /* 977 * rec->evlist->bkw_mmap_state is possible to be 978 * BKW_MMAP_EMPTY here: when done == true and 979 * hits != rec->samples in previous round. 980 * 981 * perf_evlist__toggle_bkw_mmap ensure we never 982 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 983 */ 984 if (trigger_is_hit(&switch_output_trigger) || done || draining) 985 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 986 987 if (record__mmap_read_all(rec) < 0) { 988 trigger_error(&auxtrace_snapshot_trigger); 989 trigger_error(&switch_output_trigger); 990 err = -1; 991 goto out_child; 992 } 993 994 if (auxtrace_record__snapshot_started) { 995 auxtrace_record__snapshot_started = 0; 996 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 997 record__read_auxtrace_snapshot(rec); 998 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 999 pr_err("AUX area tracing snapshot failed\n"); 1000 err = -1; 1001 goto out_child; 1002 } 1003 } 1004 1005 if (trigger_is_hit(&switch_output_trigger)) { 1006 /* 1007 * If switch_output_trigger is hit, the data in 1008 * overwritable ring buffer should have been collected, 1009 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1010 * 1011 * If SIGUSR2 raise after or during record__mmap_read_all(), 1012 * record__mmap_read_all() didn't collect data from 1013 * overwritable ring buffer. Read again. 1014 */ 1015 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1016 continue; 1017 trigger_ready(&switch_output_trigger); 1018 1019 /* 1020 * Reenable events in overwrite ring buffer after 1021 * record__mmap_read_all(): we should have collected 1022 * data from it. 1023 */ 1024 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1025 1026 if (!quiet) 1027 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1028 waking); 1029 waking = 0; 1030 fd = record__switch_output(rec, false); 1031 if (fd < 0) { 1032 pr_err("Failed to switch to new file\n"); 1033 trigger_error(&switch_output_trigger); 1034 err = fd; 1035 goto out_child; 1036 } 1037 } 1038 1039 if (hits == rec->samples) { 1040 if (done || draining) 1041 break; 1042 err = perf_evlist__poll(rec->evlist, -1); 1043 /* 1044 * Propagate error, only if there's any. Ignore positive 1045 * number of returned events and interrupt error. 1046 */ 1047 if (err > 0 || (err < 0 && errno == EINTR)) 1048 err = 0; 1049 waking++; 1050 1051 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1052 draining = true; 1053 } 1054 1055 /* 1056 * When perf is starting the traced process, at the end events 1057 * die with the process and we wait for that. Thus no need to 1058 * disable events in this case. 1059 */ 1060 if (done && !disabled && !target__none(&opts->target)) { 1061 trigger_off(&auxtrace_snapshot_trigger); 1062 perf_evlist__disable(rec->evlist); 1063 disabled = true; 1064 } 1065 } 1066 trigger_off(&auxtrace_snapshot_trigger); 1067 trigger_off(&switch_output_trigger); 1068 1069 if (forks && workload_exec_errno) { 1070 char msg[STRERR_BUFSIZE]; 1071 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1072 pr_err("Workload failed: %s\n", emsg); 1073 err = -1; 1074 goto out_child; 1075 } 1076 1077 if (!quiet) 1078 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1079 1080 if (target__none(&rec->opts.target)) 1081 record__synthesize_workload(rec, true); 1082 1083 out_child: 1084 if (forks) { 1085 int exit_status; 1086 1087 if (!child_finished) 1088 kill(rec->evlist->workload.pid, SIGTERM); 1089 1090 wait(&exit_status); 1091 1092 if (err < 0) 1093 status = err; 1094 else if (WIFEXITED(exit_status)) 1095 status = WEXITSTATUS(exit_status); 1096 else if (WIFSIGNALED(exit_status)) 1097 signr = WTERMSIG(exit_status); 1098 } else 1099 status = err; 1100 1101 record__synthesize(rec, true); 1102 /* this will be recalculated during process_buildids() */ 1103 rec->samples = 0; 1104 1105 if (!err) { 1106 if (!rec->timestamp_filename) { 1107 record__finish_output(rec); 1108 } else { 1109 fd = record__switch_output(rec, true); 1110 if (fd < 0) { 1111 status = fd; 1112 goto out_delete_session; 1113 } 1114 } 1115 } 1116 1117 if (!err && !quiet) { 1118 char samples[128]; 1119 const char *postfix = rec->timestamp_filename ? 1120 ".<timestamp>" : ""; 1121 1122 if (rec->samples && !rec->opts.full_auxtrace) 1123 scnprintf(samples, sizeof(samples), 1124 " (%" PRIu64 " samples)", rec->samples); 1125 else 1126 samples[0] = '\0'; 1127 1128 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1129 perf_data_file__size(file) / 1024.0 / 1024.0, 1130 file->path, postfix, samples); 1131 } 1132 1133 out_delete_session: 1134 perf_session__delete(session); 1135 return status; 1136 } 1137 1138 static void callchain_debug(struct callchain_param *callchain) 1139 { 1140 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1141 1142 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1143 1144 if (callchain->record_mode == CALLCHAIN_DWARF) 1145 pr_debug("callchain: stack dump size %d\n", 1146 callchain->dump_size); 1147 } 1148 1149 int record_opts__parse_callchain(struct record_opts *record, 1150 struct callchain_param *callchain, 1151 const char *arg, bool unset) 1152 { 1153 int ret; 1154 callchain->enabled = !unset; 1155 1156 /* --no-call-graph */ 1157 if (unset) { 1158 callchain->record_mode = CALLCHAIN_NONE; 1159 pr_debug("callchain: disabled\n"); 1160 return 0; 1161 } 1162 1163 ret = parse_callchain_record_opt(arg, callchain); 1164 if (!ret) { 1165 /* Enable data address sampling for DWARF unwind. */ 1166 if (callchain->record_mode == CALLCHAIN_DWARF) 1167 record->sample_address = true; 1168 callchain_debug(callchain); 1169 } 1170 1171 return ret; 1172 } 1173 1174 int record_parse_callchain_opt(const struct option *opt, 1175 const char *arg, 1176 int unset) 1177 { 1178 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1179 } 1180 1181 int record_callchain_opt(const struct option *opt, 1182 const char *arg __maybe_unused, 1183 int unset __maybe_unused) 1184 { 1185 struct callchain_param *callchain = opt->value; 1186 1187 callchain->enabled = true; 1188 1189 if (callchain->record_mode == CALLCHAIN_NONE) 1190 callchain->record_mode = CALLCHAIN_FP; 1191 1192 callchain_debug(callchain); 1193 return 0; 1194 } 1195 1196 static int perf_record_config(const char *var, const char *value, void *cb) 1197 { 1198 struct record *rec = cb; 1199 1200 if (!strcmp(var, "record.build-id")) { 1201 if (!strcmp(value, "cache")) 1202 rec->no_buildid_cache = false; 1203 else if (!strcmp(value, "no-cache")) 1204 rec->no_buildid_cache = true; 1205 else if (!strcmp(value, "skip")) 1206 rec->no_buildid = true; 1207 else 1208 return -1; 1209 return 0; 1210 } 1211 if (!strcmp(var, "record.call-graph")) 1212 var = "call-graph.record-mode"; /* fall-through */ 1213 1214 return perf_default_config(var, value, cb); 1215 } 1216 1217 struct clockid_map { 1218 const char *name; 1219 int clockid; 1220 }; 1221 1222 #define CLOCKID_MAP(n, c) \ 1223 { .name = n, .clockid = (c), } 1224 1225 #define CLOCKID_END { .name = NULL, } 1226 1227 1228 /* 1229 * Add the missing ones, we need to build on many distros... 1230 */ 1231 #ifndef CLOCK_MONOTONIC_RAW 1232 #define CLOCK_MONOTONIC_RAW 4 1233 #endif 1234 #ifndef CLOCK_BOOTTIME 1235 #define CLOCK_BOOTTIME 7 1236 #endif 1237 #ifndef CLOCK_TAI 1238 #define CLOCK_TAI 11 1239 #endif 1240 1241 static const struct clockid_map clockids[] = { 1242 /* available for all events, NMI safe */ 1243 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1244 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1245 1246 /* available for some events */ 1247 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1248 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1249 CLOCKID_MAP("tai", CLOCK_TAI), 1250 1251 /* available for the lazy */ 1252 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1253 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1254 CLOCKID_MAP("real", CLOCK_REALTIME), 1255 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1256 1257 CLOCKID_END, 1258 }; 1259 1260 static int parse_clockid(const struct option *opt, const char *str, int unset) 1261 { 1262 struct record_opts *opts = (struct record_opts *)opt->value; 1263 const struct clockid_map *cm; 1264 const char *ostr = str; 1265 1266 if (unset) { 1267 opts->use_clockid = 0; 1268 return 0; 1269 } 1270 1271 /* no arg passed */ 1272 if (!str) 1273 return 0; 1274 1275 /* no setting it twice */ 1276 if (opts->use_clockid) 1277 return -1; 1278 1279 opts->use_clockid = true; 1280 1281 /* if its a number, we're done */ 1282 if (sscanf(str, "%d", &opts->clockid) == 1) 1283 return 0; 1284 1285 /* allow a "CLOCK_" prefix to the name */ 1286 if (!strncasecmp(str, "CLOCK_", 6)) 1287 str += 6; 1288 1289 for (cm = clockids; cm->name; cm++) { 1290 if (!strcasecmp(str, cm->name)) { 1291 opts->clockid = cm->clockid; 1292 return 0; 1293 } 1294 } 1295 1296 opts->use_clockid = false; 1297 ui__warning("unknown clockid %s, check man page\n", ostr); 1298 return -1; 1299 } 1300 1301 static int record__parse_mmap_pages(const struct option *opt, 1302 const char *str, 1303 int unset __maybe_unused) 1304 { 1305 struct record_opts *opts = opt->value; 1306 char *s, *p; 1307 unsigned int mmap_pages; 1308 int ret; 1309 1310 if (!str) 1311 return -EINVAL; 1312 1313 s = strdup(str); 1314 if (!s) 1315 return -ENOMEM; 1316 1317 p = strchr(s, ','); 1318 if (p) 1319 *p = '\0'; 1320 1321 if (*s) { 1322 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1323 if (ret) 1324 goto out_free; 1325 opts->mmap_pages = mmap_pages; 1326 } 1327 1328 if (!p) { 1329 ret = 0; 1330 goto out_free; 1331 } 1332 1333 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1334 if (ret) 1335 goto out_free; 1336 1337 opts->auxtrace_mmap_pages = mmap_pages; 1338 1339 out_free: 1340 free(s); 1341 return ret; 1342 } 1343 1344 static const char * const __record_usage[] = { 1345 "perf record [<options>] [<command>]", 1346 "perf record [<options>] -- <command> [<options>]", 1347 NULL 1348 }; 1349 const char * const *record_usage = __record_usage; 1350 1351 /* 1352 * XXX Ideally would be local to cmd_record() and passed to a record__new 1353 * because we need to have access to it in record__exit, that is called 1354 * after cmd_record() exits, but since record_options need to be accessible to 1355 * builtin-script, leave it here. 1356 * 1357 * At least we don't ouch it in all the other functions here directly. 1358 * 1359 * Just say no to tons of global variables, sigh. 1360 */ 1361 static struct record record = { 1362 .opts = { 1363 .sample_time = true, 1364 .mmap_pages = UINT_MAX, 1365 .user_freq = UINT_MAX, 1366 .user_interval = ULLONG_MAX, 1367 .freq = 4000, 1368 .target = { 1369 .uses_mmap = true, 1370 .default_per_cpu = true, 1371 }, 1372 .proc_map_timeout = 500, 1373 }, 1374 .tool = { 1375 .sample = process_sample_event, 1376 .fork = perf_event__process_fork, 1377 .exit = perf_event__process_exit, 1378 .comm = perf_event__process_comm, 1379 .mmap = perf_event__process_mmap, 1380 .mmap2 = perf_event__process_mmap2, 1381 .ordered_events = true, 1382 }, 1383 }; 1384 1385 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1386 "\n\t\t\t\tDefault: fp"; 1387 1388 static bool dry_run; 1389 1390 /* 1391 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1392 * with it and switch to use the library functions in perf_evlist that came 1393 * from builtin-record.c, i.e. use record_opts, 1394 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1395 * using pipes, etc. 1396 */ 1397 struct option __record_options[] = { 1398 OPT_CALLBACK('e', "event", &record.evlist, "event", 1399 "event selector. use 'perf list' to list available events", 1400 parse_events_option), 1401 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1402 "event filter", parse_filter), 1403 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1404 NULL, "don't record events from perf itself", 1405 exclude_perf), 1406 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1407 "record events on existing process id"), 1408 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1409 "record events on existing thread id"), 1410 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1411 "collect data with this RT SCHED_FIFO priority"), 1412 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1413 "collect data without buffering"), 1414 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1415 "collect raw sample records from all opened counters"), 1416 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1417 "system-wide collection from all CPUs"), 1418 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1419 "list of cpus to monitor"), 1420 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1421 OPT_STRING('o', "output", &record.file.path, "file", 1422 "output file name"), 1423 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1424 &record.opts.no_inherit_set, 1425 "child tasks do not inherit counters"), 1426 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1427 "synthesize non-sample events at the end of output"), 1428 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1429 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1430 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1431 "number of mmap data pages and AUX area tracing mmap pages", 1432 record__parse_mmap_pages), 1433 OPT_BOOLEAN(0, "group", &record.opts.group, 1434 "put the counters into a counter group"), 1435 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1436 NULL, "enables call-graph recording" , 1437 &record_callchain_opt), 1438 OPT_CALLBACK(0, "call-graph", &record.opts, 1439 "record_mode[,record_size]", record_callchain_help, 1440 &record_parse_callchain_opt), 1441 OPT_INCR('v', "verbose", &verbose, 1442 "be more verbose (show counter open errors, etc)"), 1443 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1444 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1445 "per thread counts"), 1446 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1447 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1448 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1449 &record.opts.sample_time_set, 1450 "Record the sample timestamps"), 1451 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1452 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1453 "don't sample"), 1454 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1455 &record.no_buildid_cache_set, 1456 "do not update the buildid cache"), 1457 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1458 &record.no_buildid_set, 1459 "do not collect buildids in perf.data"), 1460 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1461 "monitor event in cgroup name only", 1462 parse_cgroups), 1463 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1464 "ms to wait before starting measurement after program start"), 1465 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1466 "user to profile"), 1467 1468 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1469 "branch any", "sample any taken branches", 1470 parse_branch_stack), 1471 1472 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1473 "branch filter mask", "branch stack filter modes", 1474 parse_branch_stack), 1475 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1476 "sample by weight (on special events only)"), 1477 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1478 "sample transaction flags (special events only)"), 1479 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1480 "use per-thread mmaps"), 1481 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1482 "sample selected machine registers on interrupt," 1483 " use -I ? to list register names", parse_regs), 1484 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1485 "Record running/enabled time of read (:S) events"), 1486 OPT_CALLBACK('k', "clockid", &record.opts, 1487 "clockid", "clockid to use for events, see clock_gettime()", 1488 parse_clockid), 1489 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1490 "opts", "AUX area tracing Snapshot Mode", ""), 1491 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1492 "per thread proc mmap processing timeout in ms"), 1493 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1494 "Record context switch events"), 1495 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1496 "Configure all used events to run in kernel space.", 1497 PARSE_OPT_EXCLUSIVE), 1498 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1499 "Configure all used events to run in user space.", 1500 PARSE_OPT_EXCLUSIVE), 1501 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1502 "clang binary to use for compiling BPF scriptlets"), 1503 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1504 "options passed to clang when compiling BPF scriptlets"), 1505 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1506 "file", "vmlinux pathname"), 1507 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1508 "Record build-id of all DSOs regardless of hits"), 1509 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1510 "append timestamp to output filename"), 1511 OPT_BOOLEAN(0, "switch-output", &record.switch_output, 1512 "Switch output when receive SIGUSR2"), 1513 OPT_BOOLEAN(0, "dry-run", &dry_run, 1514 "Parse options then exit"), 1515 OPT_END() 1516 }; 1517 1518 struct option *record_options = __record_options; 1519 1520 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) 1521 { 1522 int err; 1523 struct record *rec = &record; 1524 char errbuf[BUFSIZ]; 1525 1526 #ifndef HAVE_LIBBPF_SUPPORT 1527 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1528 set_nobuild('\0', "clang-path", true); 1529 set_nobuild('\0', "clang-opt", true); 1530 # undef set_nobuild 1531 #endif 1532 1533 #ifndef HAVE_BPF_PROLOGUE 1534 # if !defined (HAVE_DWARF_SUPPORT) 1535 # define REASON "NO_DWARF=1" 1536 # elif !defined (HAVE_LIBBPF_SUPPORT) 1537 # define REASON "NO_LIBBPF=1" 1538 # else 1539 # define REASON "this architecture doesn't support BPF prologue" 1540 # endif 1541 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1542 set_nobuild('\0', "vmlinux", true); 1543 # undef set_nobuild 1544 # undef REASON 1545 #endif 1546 1547 rec->evlist = perf_evlist__new(); 1548 if (rec->evlist == NULL) 1549 return -ENOMEM; 1550 1551 perf_config(perf_record_config, rec); 1552 1553 argc = parse_options(argc, argv, record_options, record_usage, 1554 PARSE_OPT_STOP_AT_NON_OPTION); 1555 if (!argc && target__none(&rec->opts.target)) 1556 usage_with_options(record_usage, record_options); 1557 1558 if (nr_cgroups && !rec->opts.target.system_wide) { 1559 usage_with_options_msg(record_usage, record_options, 1560 "cgroup monitoring only available in system-wide mode"); 1561 1562 } 1563 if (rec->opts.record_switch_events && 1564 !perf_can_record_switch_events()) { 1565 ui__error("kernel does not support recording context switch events\n"); 1566 parse_options_usage(record_usage, record_options, "switch-events", 0); 1567 return -EINVAL; 1568 } 1569 1570 if (rec->switch_output) 1571 rec->timestamp_filename = true; 1572 1573 if (!rec->itr) { 1574 rec->itr = auxtrace_record__init(rec->evlist, &err); 1575 if (err) 1576 goto out; 1577 } 1578 1579 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 1580 rec->opts.auxtrace_snapshot_opts); 1581 if (err) 1582 goto out; 1583 1584 /* 1585 * Allow aliases to facilitate the lookup of symbols for address 1586 * filters. Refer to auxtrace_parse_filters(). 1587 */ 1588 symbol_conf.allow_aliases = true; 1589 1590 symbol__init(NULL); 1591 1592 err = auxtrace_parse_filters(rec->evlist); 1593 if (err) 1594 goto out; 1595 1596 if (dry_run) 1597 goto out; 1598 1599 err = bpf__setup_stdout(rec->evlist); 1600 if (err) { 1601 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1602 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1603 errbuf); 1604 goto out; 1605 } 1606 1607 err = -ENOMEM; 1608 1609 if (symbol_conf.kptr_restrict) 1610 pr_warning( 1611 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1612 "check /proc/sys/kernel/kptr_restrict.\n\n" 1613 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1614 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1615 "Samples in kernel modules won't be resolved at all.\n\n" 1616 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1617 "even with a suitable vmlinux or kallsyms file.\n\n"); 1618 1619 if (rec->no_buildid_cache || rec->no_buildid) { 1620 disable_buildid_cache(); 1621 } else if (rec->switch_output) { 1622 /* 1623 * In 'perf record --switch-output', disable buildid 1624 * generation by default to reduce data file switching 1625 * overhead. Still generate buildid if they are required 1626 * explicitly using 1627 * 1628 * perf record --signal-trigger --no-no-buildid \ 1629 * --no-no-buildid-cache 1630 * 1631 * Following code equals to: 1632 * 1633 * if ((rec->no_buildid || !rec->no_buildid_set) && 1634 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1635 * disable_buildid_cache(); 1636 */ 1637 bool disable = true; 1638 1639 if (rec->no_buildid_set && !rec->no_buildid) 1640 disable = false; 1641 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1642 disable = false; 1643 if (disable) { 1644 rec->no_buildid = true; 1645 rec->no_buildid_cache = true; 1646 disable_buildid_cache(); 1647 } 1648 } 1649 1650 if (record.opts.overwrite) 1651 record.opts.tail_synthesize = true; 1652 1653 if (rec->evlist->nr_entries == 0 && 1654 perf_evlist__add_default(rec->evlist) < 0) { 1655 pr_err("Not enough memory for event selector list\n"); 1656 goto out; 1657 } 1658 1659 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1660 rec->opts.no_inherit = true; 1661 1662 err = target__validate(&rec->opts.target); 1663 if (err) { 1664 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1665 ui__warning("%s", errbuf); 1666 } 1667 1668 err = target__parse_uid(&rec->opts.target); 1669 if (err) { 1670 int saved_errno = errno; 1671 1672 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1673 ui__error("%s", errbuf); 1674 1675 err = -saved_errno; 1676 goto out; 1677 } 1678 1679 err = -ENOMEM; 1680 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1681 usage_with_options(record_usage, record_options); 1682 1683 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1684 if (err) 1685 goto out; 1686 1687 /* 1688 * We take all buildids when the file contains 1689 * AUX area tracing data because we do not decode the 1690 * trace because it would take too long. 1691 */ 1692 if (rec->opts.full_auxtrace) 1693 rec->buildid_all = true; 1694 1695 if (record_opts__config(&rec->opts)) { 1696 err = -EINVAL; 1697 goto out; 1698 } 1699 1700 err = __cmd_record(&record, argc, argv); 1701 out: 1702 perf_evlist__delete(rec->evlist); 1703 symbol__exit(); 1704 auxtrace_record__free(rec->itr); 1705 return err; 1706 } 1707 1708 static void snapshot_sig_handler(int sig __maybe_unused) 1709 { 1710 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1711 trigger_hit(&auxtrace_snapshot_trigger); 1712 auxtrace_record__snapshot_started = 1; 1713 if (auxtrace_record__snapshot_start(record.itr)) 1714 trigger_error(&auxtrace_snapshot_trigger); 1715 } 1716 1717 if (trigger_is_ready(&switch_output_trigger)) 1718 trigger_hit(&switch_output_trigger); 1719 } 1720