1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/drv_configs.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/cpumap.h" 31 #include "util/thread_map.h" 32 #include "util/data.h" 33 #include "util/perf_regs.h" 34 #include "util/auxtrace.h" 35 #include "util/tsc.h" 36 #include "util/parse-branch-options.h" 37 #include "util/parse-regs-options.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "asm/bug.h" 45 46 #include <errno.h> 47 #include <inttypes.h> 48 #include <poll.h> 49 #include <unistd.h> 50 #include <sched.h> 51 #include <signal.h> 52 #include <sys/mman.h> 53 #include <sys/wait.h> 54 #include <asm/bug.h> 55 #include <linux/time64.h> 56 57 struct switch_output { 58 bool enabled; 59 bool signal; 60 unsigned long size; 61 unsigned long time; 62 const char *str; 63 bool set; 64 }; 65 66 struct record { 67 struct perf_tool tool; 68 struct record_opts opts; 69 u64 bytes_written; 70 struct perf_data_file file; 71 struct auxtrace_record *itr; 72 struct perf_evlist *evlist; 73 struct perf_session *session; 74 const char *progname; 75 int realtime_prio; 76 bool no_buildid; 77 bool no_buildid_set; 78 bool no_buildid_cache; 79 bool no_buildid_cache_set; 80 bool buildid_all; 81 bool timestamp_filename; 82 struct switch_output switch_output; 83 unsigned long long samples; 84 }; 85 86 static volatile int auxtrace_record__snapshot_started; 87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 88 static DEFINE_TRIGGER(switch_output_trigger); 89 90 static bool switch_output_signal(struct record *rec) 91 { 92 return rec->switch_output.signal && 93 trigger_is_ready(&switch_output_trigger); 94 } 95 96 static bool switch_output_size(struct record *rec) 97 { 98 return rec->switch_output.size && 99 trigger_is_ready(&switch_output_trigger) && 100 (rec->bytes_written >= rec->switch_output.size); 101 } 102 103 static bool switch_output_time(struct record *rec) 104 { 105 return rec->switch_output.time && 106 trigger_is_ready(&switch_output_trigger); 107 } 108 109 static int record__write(struct record *rec, void *bf, size_t size) 110 { 111 if (perf_data_file__write(rec->session->file, bf, size) < 0) { 112 pr_err("failed to write perf data, error: %m\n"); 113 return -1; 114 } 115 116 rec->bytes_written += size; 117 118 if (switch_output_size(rec)) 119 trigger_hit(&switch_output_trigger); 120 121 return 0; 122 } 123 124 static int process_synthesized_event(struct perf_tool *tool, 125 union perf_event *event, 126 struct perf_sample *sample __maybe_unused, 127 struct machine *machine __maybe_unused) 128 { 129 struct record *rec = container_of(tool, struct record, tool); 130 return record__write(rec, event, event->header.size); 131 } 132 133 static int 134 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) 135 { 136 struct perf_event_header *pheader; 137 u64 evt_head = head; 138 int size = mask + 1; 139 140 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); 141 pheader = (struct perf_event_header *)(buf + (head & mask)); 142 *start = head; 143 while (true) { 144 if (evt_head - head >= (unsigned int)size) { 145 pr_debug("Finished reading backward ring buffer: rewind\n"); 146 if (evt_head - head > (unsigned int)size) 147 evt_head -= pheader->size; 148 *end = evt_head; 149 return 0; 150 } 151 152 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); 153 154 if (pheader->size == 0) { 155 pr_debug("Finished reading backward ring buffer: get start\n"); 156 *end = evt_head; 157 return 0; 158 } 159 160 evt_head += pheader->size; 161 pr_debug3("move evt_head: %"PRIx64"\n", evt_head); 162 } 163 WARN_ONCE(1, "Shouldn't get here\n"); 164 return -1; 165 } 166 167 static int 168 rb_find_range(void *data, int mask, u64 head, u64 old, 169 u64 *start, u64 *end, bool backward) 170 { 171 if (!backward) { 172 *start = old; 173 *end = head; 174 return 0; 175 } 176 177 return backward_rb_find_range(data, mask, head, start, end); 178 } 179 180 static int 181 record__mmap_read(struct record *rec, struct perf_mmap *md, 182 bool overwrite, bool backward) 183 { 184 u64 head = perf_mmap__read_head(md); 185 u64 old = md->prev; 186 u64 end = head, start = old; 187 unsigned char *data = md->base + page_size; 188 unsigned long size; 189 void *buf; 190 int rc = 0; 191 192 if (rb_find_range(data, md->mask, head, 193 old, &start, &end, backward)) 194 return -1; 195 196 if (start == end) 197 return 0; 198 199 rec->samples++; 200 201 size = end - start; 202 if (size > (unsigned long)(md->mask) + 1) { 203 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); 204 205 md->prev = head; 206 perf_mmap__consume(md, overwrite || backward); 207 return 0; 208 } 209 210 if ((start & md->mask) + size != (end & md->mask)) { 211 buf = &data[start & md->mask]; 212 size = md->mask + 1 - (start & md->mask); 213 start += size; 214 215 if (record__write(rec, buf, size) < 0) { 216 rc = -1; 217 goto out; 218 } 219 } 220 221 buf = &data[start & md->mask]; 222 size = end - start; 223 start += size; 224 225 if (record__write(rec, buf, size) < 0) { 226 rc = -1; 227 goto out; 228 } 229 230 md->prev = head; 231 perf_mmap__consume(md, overwrite || backward); 232 out: 233 return rc; 234 } 235 236 static volatile int done; 237 static volatile int signr = -1; 238 static volatile int child_finished; 239 240 static void sig_handler(int sig) 241 { 242 if (sig == SIGCHLD) 243 child_finished = 1; 244 else 245 signr = sig; 246 247 done = 1; 248 } 249 250 static void sigsegv_handler(int sig) 251 { 252 perf_hooks__recover(); 253 sighandler_dump_stack(sig); 254 } 255 256 static void record__sig_exit(void) 257 { 258 if (signr == -1) 259 return; 260 261 signal(signr, SIG_DFL); 262 raise(signr); 263 } 264 265 #ifdef HAVE_AUXTRACE_SUPPORT 266 267 static int record__process_auxtrace(struct perf_tool *tool, 268 union perf_event *event, void *data1, 269 size_t len1, void *data2, size_t len2) 270 { 271 struct record *rec = container_of(tool, struct record, tool); 272 struct perf_data_file *file = &rec->file; 273 size_t padding; 274 u8 pad[8] = {0}; 275 276 if (!perf_data_file__is_pipe(file)) { 277 off_t file_offset; 278 int fd = perf_data_file__fd(file); 279 int err; 280 281 file_offset = lseek(fd, 0, SEEK_CUR); 282 if (file_offset == -1) 283 return -1; 284 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 285 event, file_offset); 286 if (err) 287 return err; 288 } 289 290 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 291 padding = (len1 + len2) & 7; 292 if (padding) 293 padding = 8 - padding; 294 295 record__write(rec, event, event->header.size); 296 record__write(rec, data1, len1); 297 if (len2) 298 record__write(rec, data2, len2); 299 record__write(rec, &pad, padding); 300 301 return 0; 302 } 303 304 static int record__auxtrace_mmap_read(struct record *rec, 305 struct auxtrace_mmap *mm) 306 { 307 int ret; 308 309 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, 310 record__process_auxtrace); 311 if (ret < 0) 312 return ret; 313 314 if (ret) 315 rec->samples++; 316 317 return 0; 318 } 319 320 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 321 struct auxtrace_mmap *mm) 322 { 323 int ret; 324 325 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, 326 record__process_auxtrace, 327 rec->opts.auxtrace_snapshot_size); 328 if (ret < 0) 329 return ret; 330 331 if (ret) 332 rec->samples++; 333 334 return 0; 335 } 336 337 static int record__auxtrace_read_snapshot_all(struct record *rec) 338 { 339 int i; 340 int rc = 0; 341 342 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 343 struct auxtrace_mmap *mm = 344 &rec->evlist->mmap[i].auxtrace_mmap; 345 346 if (!mm->base) 347 continue; 348 349 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { 350 rc = -1; 351 goto out; 352 } 353 } 354 out: 355 return rc; 356 } 357 358 static void record__read_auxtrace_snapshot(struct record *rec) 359 { 360 pr_debug("Recording AUX area tracing snapshot\n"); 361 if (record__auxtrace_read_snapshot_all(rec) < 0) { 362 trigger_error(&auxtrace_snapshot_trigger); 363 } else { 364 if (auxtrace_record__snapshot_finish(rec->itr)) 365 trigger_error(&auxtrace_snapshot_trigger); 366 else 367 trigger_ready(&auxtrace_snapshot_trigger); 368 } 369 } 370 371 #else 372 373 static inline 374 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 375 struct auxtrace_mmap *mm __maybe_unused) 376 { 377 return 0; 378 } 379 380 static inline 381 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 382 { 383 } 384 385 static inline 386 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 387 { 388 return 0; 389 } 390 391 #endif 392 393 static int record__mmap_evlist(struct record *rec, 394 struct perf_evlist *evlist) 395 { 396 struct record_opts *opts = &rec->opts; 397 char msg[512]; 398 399 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, 400 opts->auxtrace_mmap_pages, 401 opts->auxtrace_snapshot_mode) < 0) { 402 if (errno == EPERM) { 403 pr_err("Permission error mapping pages.\n" 404 "Consider increasing " 405 "/proc/sys/kernel/perf_event_mlock_kb,\n" 406 "or try again with a smaller value of -m/--mmap_pages.\n" 407 "(current value: %u,%u)\n", 408 opts->mmap_pages, opts->auxtrace_mmap_pages); 409 return -errno; 410 } else { 411 pr_err("failed to mmap with %d (%s)\n", errno, 412 str_error_r(errno, msg, sizeof(msg))); 413 if (errno) 414 return -errno; 415 else 416 return -EINVAL; 417 } 418 } 419 return 0; 420 } 421 422 static int record__mmap(struct record *rec) 423 { 424 return record__mmap_evlist(rec, rec->evlist); 425 } 426 427 static int record__open(struct record *rec) 428 { 429 char msg[BUFSIZ]; 430 struct perf_evsel *pos; 431 struct perf_evlist *evlist = rec->evlist; 432 struct perf_session *session = rec->session; 433 struct record_opts *opts = &rec->opts; 434 struct perf_evsel_config_term *err_term; 435 int rc = 0; 436 437 perf_evlist__config(evlist, opts, &callchain_param); 438 439 evlist__for_each_entry(evlist, pos) { 440 try_again: 441 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 442 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 443 if (verbose > 0) 444 ui__warning("%s\n", msg); 445 goto try_again; 446 } 447 448 rc = -errno; 449 perf_evsel__open_strerror(pos, &opts->target, 450 errno, msg, sizeof(msg)); 451 ui__error("%s\n", msg); 452 goto out; 453 } 454 } 455 456 if (perf_evlist__apply_filters(evlist, &pos)) { 457 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 458 pos->filter, perf_evsel__name(pos), errno, 459 str_error_r(errno, msg, sizeof(msg))); 460 rc = -1; 461 goto out; 462 } 463 464 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { 465 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", 466 err_term->val.drv_cfg, perf_evsel__name(pos), errno, 467 str_error_r(errno, msg, sizeof(msg))); 468 rc = -1; 469 goto out; 470 } 471 472 rc = record__mmap(rec); 473 if (rc) 474 goto out; 475 476 session->evlist = evlist; 477 perf_session__set_id_hdr_size(session); 478 out: 479 return rc; 480 } 481 482 static int process_sample_event(struct perf_tool *tool, 483 union perf_event *event, 484 struct perf_sample *sample, 485 struct perf_evsel *evsel, 486 struct machine *machine) 487 { 488 struct record *rec = container_of(tool, struct record, tool); 489 490 rec->samples++; 491 492 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 493 } 494 495 static int process_buildids(struct record *rec) 496 { 497 struct perf_data_file *file = &rec->file; 498 struct perf_session *session = rec->session; 499 500 if (file->size == 0) 501 return 0; 502 503 /* 504 * During this process, it'll load kernel map and replace the 505 * dso->long_name to a real pathname it found. In this case 506 * we prefer the vmlinux path like 507 * /lib/modules/3.16.4/build/vmlinux 508 * 509 * rather than build-id path (in debug directory). 510 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 511 */ 512 symbol_conf.ignore_vmlinux_buildid = true; 513 514 /* 515 * If --buildid-all is given, it marks all DSO regardless of hits, 516 * so no need to process samples. 517 */ 518 if (rec->buildid_all) 519 rec->tool.sample = NULL; 520 521 return perf_session__process_events(session); 522 } 523 524 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 525 { 526 int err; 527 struct perf_tool *tool = data; 528 /* 529 *As for guest kernel when processing subcommand record&report, 530 *we arrange module mmap prior to guest kernel mmap and trigger 531 *a preload dso because default guest module symbols are loaded 532 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 533 *method is used to avoid symbol missing when the first addr is 534 *in module instead of in guest kernel. 535 */ 536 err = perf_event__synthesize_modules(tool, process_synthesized_event, 537 machine); 538 if (err < 0) 539 pr_err("Couldn't record guest kernel [%d]'s reference" 540 " relocation symbol.\n", machine->pid); 541 542 /* 543 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 544 * have no _text sometimes. 545 */ 546 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 547 machine); 548 if (err < 0) 549 pr_err("Couldn't record guest kernel [%d]'s reference" 550 " relocation symbol.\n", machine->pid); 551 } 552 553 static struct perf_event_header finished_round_event = { 554 .size = sizeof(struct perf_event_header), 555 .type = PERF_RECORD_FINISHED_ROUND, 556 }; 557 558 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 559 bool backward) 560 { 561 u64 bytes_written = rec->bytes_written; 562 int i; 563 int rc = 0; 564 struct perf_mmap *maps; 565 566 if (!evlist) 567 return 0; 568 569 maps = backward ? evlist->backward_mmap : evlist->mmap; 570 if (!maps) 571 return 0; 572 573 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 574 return 0; 575 576 for (i = 0; i < evlist->nr_mmaps; i++) { 577 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; 578 579 if (maps[i].base) { 580 if (record__mmap_read(rec, &maps[i], 581 evlist->overwrite, backward) != 0) { 582 rc = -1; 583 goto out; 584 } 585 } 586 587 if (mm->base && !rec->opts.auxtrace_snapshot_mode && 588 record__auxtrace_mmap_read(rec, mm) != 0) { 589 rc = -1; 590 goto out; 591 } 592 } 593 594 /* 595 * Mark the round finished in case we wrote 596 * at least one event. 597 */ 598 if (bytes_written != rec->bytes_written) 599 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); 600 601 if (backward) 602 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 603 out: 604 return rc; 605 } 606 607 static int record__mmap_read_all(struct record *rec) 608 { 609 int err; 610 611 err = record__mmap_read_evlist(rec, rec->evlist, false); 612 if (err) 613 return err; 614 615 return record__mmap_read_evlist(rec, rec->evlist, true); 616 } 617 618 static void record__init_features(struct record *rec) 619 { 620 struct perf_session *session = rec->session; 621 int feat; 622 623 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 624 perf_header__set_feat(&session->header, feat); 625 626 if (rec->no_buildid) 627 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 628 629 if (!have_tracepoints(&rec->evlist->entries)) 630 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 631 632 if (!rec->opts.branch_stack) 633 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 634 635 if (!rec->opts.full_auxtrace) 636 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 637 638 perf_header__clear_feat(&session->header, HEADER_STAT); 639 } 640 641 static void 642 record__finish_output(struct record *rec) 643 { 644 struct perf_data_file *file = &rec->file; 645 int fd = perf_data_file__fd(file); 646 647 if (file->is_pipe) 648 return; 649 650 rec->session->header.data_size += rec->bytes_written; 651 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); 652 653 if (!rec->no_buildid) { 654 process_buildids(rec); 655 656 if (rec->buildid_all) 657 dsos__hit_all(rec->session); 658 } 659 perf_session__write_header(rec->session, rec->evlist, fd, true); 660 661 return; 662 } 663 664 static int record__synthesize_workload(struct record *rec, bool tail) 665 { 666 int err; 667 struct thread_map *thread_map; 668 669 if (rec->opts.tail_synthesize != tail) 670 return 0; 671 672 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 673 if (thread_map == NULL) 674 return -1; 675 676 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 677 process_synthesized_event, 678 &rec->session->machines.host, 679 rec->opts.sample_address, 680 rec->opts.proc_map_timeout); 681 thread_map__put(thread_map); 682 return err; 683 } 684 685 static int record__synthesize(struct record *rec, bool tail); 686 687 static int 688 record__switch_output(struct record *rec, bool at_exit) 689 { 690 struct perf_data_file *file = &rec->file; 691 int fd, err; 692 693 /* Same Size: "2015122520103046"*/ 694 char timestamp[] = "InvalidTimestamp"; 695 696 record__synthesize(rec, true); 697 if (target__none(&rec->opts.target)) 698 record__synthesize_workload(rec, true); 699 700 rec->samples = 0; 701 record__finish_output(rec); 702 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 703 if (err) { 704 pr_err("Failed to get current timestamp\n"); 705 return -EINVAL; 706 } 707 708 fd = perf_data_file__switch(file, timestamp, 709 rec->session->header.data_offset, 710 at_exit); 711 if (fd >= 0 && !at_exit) { 712 rec->bytes_written = 0; 713 rec->session->header.data_size = 0; 714 } 715 716 if (!quiet) 717 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 718 file->path, timestamp); 719 720 /* Output tracking events */ 721 if (!at_exit) { 722 record__synthesize(rec, false); 723 724 /* 725 * In 'perf record --switch-output' without -a, 726 * record__synthesize() in record__switch_output() won't 727 * generate tracking events because there's no thread_map 728 * in evlist. Which causes newly created perf.data doesn't 729 * contain map and comm information. 730 * Create a fake thread_map and directly call 731 * perf_event__synthesize_thread_map() for those events. 732 */ 733 if (target__none(&rec->opts.target)) 734 record__synthesize_workload(rec, false); 735 } 736 return fd; 737 } 738 739 static volatile int workload_exec_errno; 740 741 /* 742 * perf_evlist__prepare_workload will send a SIGUSR1 743 * if the fork fails, since we asked by setting its 744 * want_signal to true. 745 */ 746 static void workload_exec_failed_signal(int signo __maybe_unused, 747 siginfo_t *info, 748 void *ucontext __maybe_unused) 749 { 750 workload_exec_errno = info->si_value.sival_int; 751 done = 1; 752 child_finished = 1; 753 } 754 755 static void snapshot_sig_handler(int sig); 756 static void alarm_sig_handler(int sig); 757 758 int __weak 759 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 760 struct perf_tool *tool __maybe_unused, 761 perf_event__handler_t process __maybe_unused, 762 struct machine *machine __maybe_unused) 763 { 764 return 0; 765 } 766 767 static const struct perf_event_mmap_page * 768 perf_evlist__pick_pc(struct perf_evlist *evlist) 769 { 770 if (evlist) { 771 if (evlist->mmap && evlist->mmap[0].base) 772 return evlist->mmap[0].base; 773 if (evlist->backward_mmap && evlist->backward_mmap[0].base) 774 return evlist->backward_mmap[0].base; 775 } 776 return NULL; 777 } 778 779 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 780 { 781 const struct perf_event_mmap_page *pc; 782 783 pc = perf_evlist__pick_pc(rec->evlist); 784 if (pc) 785 return pc; 786 return NULL; 787 } 788 789 static int record__synthesize(struct record *rec, bool tail) 790 { 791 struct perf_session *session = rec->session; 792 struct machine *machine = &session->machines.host; 793 struct perf_data_file *file = &rec->file; 794 struct record_opts *opts = &rec->opts; 795 struct perf_tool *tool = &rec->tool; 796 int fd = perf_data_file__fd(file); 797 int err = 0; 798 799 if (rec->opts.tail_synthesize != tail) 800 return 0; 801 802 if (file->is_pipe) { 803 err = perf_event__synthesize_features( 804 tool, session, rec->evlist, process_synthesized_event); 805 if (err < 0) { 806 pr_err("Couldn't synthesize features.\n"); 807 return err; 808 } 809 810 err = perf_event__synthesize_attrs(tool, session, 811 process_synthesized_event); 812 if (err < 0) { 813 pr_err("Couldn't synthesize attrs.\n"); 814 goto out; 815 } 816 817 if (have_tracepoints(&rec->evlist->entries)) { 818 /* 819 * FIXME err <= 0 here actually means that 820 * there were no tracepoints so its not really 821 * an error, just that we don't need to 822 * synthesize anything. We really have to 823 * return this more properly and also 824 * propagate errors that now are calling die() 825 */ 826 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 827 process_synthesized_event); 828 if (err <= 0) { 829 pr_err("Couldn't record tracing data.\n"); 830 goto out; 831 } 832 rec->bytes_written += err; 833 } 834 } 835 836 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 837 process_synthesized_event, machine); 838 if (err) 839 goto out; 840 841 if (rec->opts.full_auxtrace) { 842 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 843 session, process_synthesized_event); 844 if (err) 845 goto out; 846 } 847 848 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 849 machine); 850 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 851 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 852 "Check /proc/kallsyms permission or run as root.\n"); 853 854 err = perf_event__synthesize_modules(tool, process_synthesized_event, 855 machine); 856 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 857 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 858 "Check /proc/modules permission or run as root.\n"); 859 860 if (perf_guest) { 861 machines__process_guests(&session->machines, 862 perf_event__synthesize_guest_os, tool); 863 } 864 865 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 866 process_synthesized_event, opts->sample_address, 867 opts->proc_map_timeout); 868 out: 869 return err; 870 } 871 872 static int __cmd_record(struct record *rec, int argc, const char **argv) 873 { 874 int err; 875 int status = 0; 876 unsigned long waking = 0; 877 const bool forks = argc > 0; 878 struct machine *machine; 879 struct perf_tool *tool = &rec->tool; 880 struct record_opts *opts = &rec->opts; 881 struct perf_data_file *file = &rec->file; 882 struct perf_session *session; 883 bool disabled = false, draining = false; 884 int fd; 885 886 rec->progname = argv[0]; 887 888 atexit(record__sig_exit); 889 signal(SIGCHLD, sig_handler); 890 signal(SIGINT, sig_handler); 891 signal(SIGTERM, sig_handler); 892 signal(SIGSEGV, sigsegv_handler); 893 894 if (rec->opts.record_namespaces) 895 tool->namespace_events = true; 896 897 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 898 signal(SIGUSR2, snapshot_sig_handler); 899 if (rec->opts.auxtrace_snapshot_mode) 900 trigger_on(&auxtrace_snapshot_trigger); 901 if (rec->switch_output.enabled) 902 trigger_on(&switch_output_trigger); 903 } else { 904 signal(SIGUSR2, SIG_IGN); 905 } 906 907 session = perf_session__new(file, false, tool); 908 if (session == NULL) { 909 pr_err("Perf session creation failed.\n"); 910 return -1; 911 } 912 913 fd = perf_data_file__fd(file); 914 rec->session = session; 915 916 record__init_features(rec); 917 918 if (forks) { 919 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 920 argv, file->is_pipe, 921 workload_exec_failed_signal); 922 if (err < 0) { 923 pr_err("Couldn't run the workload!\n"); 924 status = err; 925 goto out_delete_session; 926 } 927 } 928 929 if (record__open(rec) != 0) { 930 err = -1; 931 goto out_child; 932 } 933 934 err = bpf__apply_obj_config(); 935 if (err) { 936 char errbuf[BUFSIZ]; 937 938 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 939 pr_err("ERROR: Apply config to BPF failed: %s\n", 940 errbuf); 941 goto out_child; 942 } 943 944 /* 945 * Normally perf_session__new would do this, but it doesn't have the 946 * evlist. 947 */ 948 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 949 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 950 rec->tool.ordered_events = false; 951 } 952 953 if (!rec->evlist->nr_groups) 954 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 955 956 if (file->is_pipe) { 957 err = perf_header__write_pipe(fd); 958 if (err < 0) 959 goto out_child; 960 } else { 961 err = perf_session__write_header(session, rec->evlist, fd, false); 962 if (err < 0) 963 goto out_child; 964 } 965 966 if (!rec->no_buildid 967 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 968 pr_err("Couldn't generate buildids. " 969 "Use --no-buildid to profile anyway.\n"); 970 err = -1; 971 goto out_child; 972 } 973 974 machine = &session->machines.host; 975 976 err = record__synthesize(rec, false); 977 if (err < 0) 978 goto out_child; 979 980 if (rec->realtime_prio) { 981 struct sched_param param; 982 983 param.sched_priority = rec->realtime_prio; 984 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 985 pr_err("Could not set realtime priority.\n"); 986 err = -1; 987 goto out_child; 988 } 989 } 990 991 /* 992 * When perf is starting the traced process, all the events 993 * (apart from group members) have enable_on_exec=1 set, 994 * so don't spoil it by prematurely enabling them. 995 */ 996 if (!target__none(&opts->target) && !opts->initial_delay) 997 perf_evlist__enable(rec->evlist); 998 999 /* 1000 * Let the child rip 1001 */ 1002 if (forks) { 1003 union perf_event *event; 1004 pid_t tgid; 1005 1006 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1007 if (event == NULL) { 1008 err = -ENOMEM; 1009 goto out_child; 1010 } 1011 1012 /* 1013 * Some H/W events are generated before COMM event 1014 * which is emitted during exec(), so perf script 1015 * cannot see a correct process name for those events. 1016 * Synthesize COMM event to prevent it. 1017 */ 1018 tgid = perf_event__synthesize_comm(tool, event, 1019 rec->evlist->workload.pid, 1020 process_synthesized_event, 1021 machine); 1022 free(event); 1023 1024 if (tgid == -1) 1025 goto out_child; 1026 1027 event = malloc(sizeof(event->namespaces) + 1028 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1029 machine->id_hdr_size); 1030 if (event == NULL) { 1031 err = -ENOMEM; 1032 goto out_child; 1033 } 1034 1035 /* 1036 * Synthesize NAMESPACES event for the command specified. 1037 */ 1038 perf_event__synthesize_namespaces(tool, event, 1039 rec->evlist->workload.pid, 1040 tgid, process_synthesized_event, 1041 machine); 1042 free(event); 1043 1044 perf_evlist__start_workload(rec->evlist); 1045 } 1046 1047 if (opts->initial_delay) { 1048 usleep(opts->initial_delay * USEC_PER_MSEC); 1049 perf_evlist__enable(rec->evlist); 1050 } 1051 1052 trigger_ready(&auxtrace_snapshot_trigger); 1053 trigger_ready(&switch_output_trigger); 1054 perf_hooks__invoke_record_start(); 1055 for (;;) { 1056 unsigned long long hits = rec->samples; 1057 1058 /* 1059 * rec->evlist->bkw_mmap_state is possible to be 1060 * BKW_MMAP_EMPTY here: when done == true and 1061 * hits != rec->samples in previous round. 1062 * 1063 * perf_evlist__toggle_bkw_mmap ensure we never 1064 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1065 */ 1066 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1067 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1068 1069 if (record__mmap_read_all(rec) < 0) { 1070 trigger_error(&auxtrace_snapshot_trigger); 1071 trigger_error(&switch_output_trigger); 1072 err = -1; 1073 goto out_child; 1074 } 1075 1076 if (auxtrace_record__snapshot_started) { 1077 auxtrace_record__snapshot_started = 0; 1078 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1079 record__read_auxtrace_snapshot(rec); 1080 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1081 pr_err("AUX area tracing snapshot failed\n"); 1082 err = -1; 1083 goto out_child; 1084 } 1085 } 1086 1087 if (trigger_is_hit(&switch_output_trigger)) { 1088 /* 1089 * If switch_output_trigger is hit, the data in 1090 * overwritable ring buffer should have been collected, 1091 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1092 * 1093 * If SIGUSR2 raise after or during record__mmap_read_all(), 1094 * record__mmap_read_all() didn't collect data from 1095 * overwritable ring buffer. Read again. 1096 */ 1097 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1098 continue; 1099 trigger_ready(&switch_output_trigger); 1100 1101 /* 1102 * Reenable events in overwrite ring buffer after 1103 * record__mmap_read_all(): we should have collected 1104 * data from it. 1105 */ 1106 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1107 1108 if (!quiet) 1109 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1110 waking); 1111 waking = 0; 1112 fd = record__switch_output(rec, false); 1113 if (fd < 0) { 1114 pr_err("Failed to switch to new file\n"); 1115 trigger_error(&switch_output_trigger); 1116 err = fd; 1117 goto out_child; 1118 } 1119 1120 /* re-arm the alarm */ 1121 if (rec->switch_output.time) 1122 alarm(rec->switch_output.time); 1123 } 1124 1125 if (hits == rec->samples) { 1126 if (done || draining) 1127 break; 1128 err = perf_evlist__poll(rec->evlist, -1); 1129 /* 1130 * Propagate error, only if there's any. Ignore positive 1131 * number of returned events and interrupt error. 1132 */ 1133 if (err > 0 || (err < 0 && errno == EINTR)) 1134 err = 0; 1135 waking++; 1136 1137 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1138 draining = true; 1139 } 1140 1141 /* 1142 * When perf is starting the traced process, at the end events 1143 * die with the process and we wait for that. Thus no need to 1144 * disable events in this case. 1145 */ 1146 if (done && !disabled && !target__none(&opts->target)) { 1147 trigger_off(&auxtrace_snapshot_trigger); 1148 perf_evlist__disable(rec->evlist); 1149 disabled = true; 1150 } 1151 } 1152 trigger_off(&auxtrace_snapshot_trigger); 1153 trigger_off(&switch_output_trigger); 1154 1155 if (forks && workload_exec_errno) { 1156 char msg[STRERR_BUFSIZE]; 1157 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1158 pr_err("Workload failed: %s\n", emsg); 1159 err = -1; 1160 goto out_child; 1161 } 1162 1163 if (!quiet) 1164 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1165 1166 if (target__none(&rec->opts.target)) 1167 record__synthesize_workload(rec, true); 1168 1169 out_child: 1170 if (forks) { 1171 int exit_status; 1172 1173 if (!child_finished) 1174 kill(rec->evlist->workload.pid, SIGTERM); 1175 1176 wait(&exit_status); 1177 1178 if (err < 0) 1179 status = err; 1180 else if (WIFEXITED(exit_status)) 1181 status = WEXITSTATUS(exit_status); 1182 else if (WIFSIGNALED(exit_status)) 1183 signr = WTERMSIG(exit_status); 1184 } else 1185 status = err; 1186 1187 record__synthesize(rec, true); 1188 /* this will be recalculated during process_buildids() */ 1189 rec->samples = 0; 1190 1191 if (!err) { 1192 if (!rec->timestamp_filename) { 1193 record__finish_output(rec); 1194 } else { 1195 fd = record__switch_output(rec, true); 1196 if (fd < 0) { 1197 status = fd; 1198 goto out_delete_session; 1199 } 1200 } 1201 } 1202 1203 perf_hooks__invoke_record_end(); 1204 1205 if (!err && !quiet) { 1206 char samples[128]; 1207 const char *postfix = rec->timestamp_filename ? 1208 ".<timestamp>" : ""; 1209 1210 if (rec->samples && !rec->opts.full_auxtrace) 1211 scnprintf(samples, sizeof(samples), 1212 " (%" PRIu64 " samples)", rec->samples); 1213 else 1214 samples[0] = '\0'; 1215 1216 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1217 perf_data_file__size(file) / 1024.0 / 1024.0, 1218 file->path, postfix, samples); 1219 } 1220 1221 out_delete_session: 1222 perf_session__delete(session); 1223 return status; 1224 } 1225 1226 static void callchain_debug(struct callchain_param *callchain) 1227 { 1228 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1229 1230 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1231 1232 if (callchain->record_mode == CALLCHAIN_DWARF) 1233 pr_debug("callchain: stack dump size %d\n", 1234 callchain->dump_size); 1235 } 1236 1237 int record_opts__parse_callchain(struct record_opts *record, 1238 struct callchain_param *callchain, 1239 const char *arg, bool unset) 1240 { 1241 int ret; 1242 callchain->enabled = !unset; 1243 1244 /* --no-call-graph */ 1245 if (unset) { 1246 callchain->record_mode = CALLCHAIN_NONE; 1247 pr_debug("callchain: disabled\n"); 1248 return 0; 1249 } 1250 1251 ret = parse_callchain_record_opt(arg, callchain); 1252 if (!ret) { 1253 /* Enable data address sampling for DWARF unwind. */ 1254 if (callchain->record_mode == CALLCHAIN_DWARF) 1255 record->sample_address = true; 1256 callchain_debug(callchain); 1257 } 1258 1259 return ret; 1260 } 1261 1262 int record_parse_callchain_opt(const struct option *opt, 1263 const char *arg, 1264 int unset) 1265 { 1266 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1267 } 1268 1269 int record_callchain_opt(const struct option *opt, 1270 const char *arg __maybe_unused, 1271 int unset __maybe_unused) 1272 { 1273 struct callchain_param *callchain = opt->value; 1274 1275 callchain->enabled = true; 1276 1277 if (callchain->record_mode == CALLCHAIN_NONE) 1278 callchain->record_mode = CALLCHAIN_FP; 1279 1280 callchain_debug(callchain); 1281 return 0; 1282 } 1283 1284 static int perf_record_config(const char *var, const char *value, void *cb) 1285 { 1286 struct record *rec = cb; 1287 1288 if (!strcmp(var, "record.build-id")) { 1289 if (!strcmp(value, "cache")) 1290 rec->no_buildid_cache = false; 1291 else if (!strcmp(value, "no-cache")) 1292 rec->no_buildid_cache = true; 1293 else if (!strcmp(value, "skip")) 1294 rec->no_buildid = true; 1295 else 1296 return -1; 1297 return 0; 1298 } 1299 if (!strcmp(var, "record.call-graph")) 1300 var = "call-graph.record-mode"; /* fall-through */ 1301 1302 return perf_default_config(var, value, cb); 1303 } 1304 1305 struct clockid_map { 1306 const char *name; 1307 int clockid; 1308 }; 1309 1310 #define CLOCKID_MAP(n, c) \ 1311 { .name = n, .clockid = (c), } 1312 1313 #define CLOCKID_END { .name = NULL, } 1314 1315 1316 /* 1317 * Add the missing ones, we need to build on many distros... 1318 */ 1319 #ifndef CLOCK_MONOTONIC_RAW 1320 #define CLOCK_MONOTONIC_RAW 4 1321 #endif 1322 #ifndef CLOCK_BOOTTIME 1323 #define CLOCK_BOOTTIME 7 1324 #endif 1325 #ifndef CLOCK_TAI 1326 #define CLOCK_TAI 11 1327 #endif 1328 1329 static const struct clockid_map clockids[] = { 1330 /* available for all events, NMI safe */ 1331 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1332 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1333 1334 /* available for some events */ 1335 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1336 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1337 CLOCKID_MAP("tai", CLOCK_TAI), 1338 1339 /* available for the lazy */ 1340 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1341 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1342 CLOCKID_MAP("real", CLOCK_REALTIME), 1343 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1344 1345 CLOCKID_END, 1346 }; 1347 1348 static int parse_clockid(const struct option *opt, const char *str, int unset) 1349 { 1350 struct record_opts *opts = (struct record_opts *)opt->value; 1351 const struct clockid_map *cm; 1352 const char *ostr = str; 1353 1354 if (unset) { 1355 opts->use_clockid = 0; 1356 return 0; 1357 } 1358 1359 /* no arg passed */ 1360 if (!str) 1361 return 0; 1362 1363 /* no setting it twice */ 1364 if (opts->use_clockid) 1365 return -1; 1366 1367 opts->use_clockid = true; 1368 1369 /* if its a number, we're done */ 1370 if (sscanf(str, "%d", &opts->clockid) == 1) 1371 return 0; 1372 1373 /* allow a "CLOCK_" prefix to the name */ 1374 if (!strncasecmp(str, "CLOCK_", 6)) 1375 str += 6; 1376 1377 for (cm = clockids; cm->name; cm++) { 1378 if (!strcasecmp(str, cm->name)) { 1379 opts->clockid = cm->clockid; 1380 return 0; 1381 } 1382 } 1383 1384 opts->use_clockid = false; 1385 ui__warning("unknown clockid %s, check man page\n", ostr); 1386 return -1; 1387 } 1388 1389 static int record__parse_mmap_pages(const struct option *opt, 1390 const char *str, 1391 int unset __maybe_unused) 1392 { 1393 struct record_opts *opts = opt->value; 1394 char *s, *p; 1395 unsigned int mmap_pages; 1396 int ret; 1397 1398 if (!str) 1399 return -EINVAL; 1400 1401 s = strdup(str); 1402 if (!s) 1403 return -ENOMEM; 1404 1405 p = strchr(s, ','); 1406 if (p) 1407 *p = '\0'; 1408 1409 if (*s) { 1410 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1411 if (ret) 1412 goto out_free; 1413 opts->mmap_pages = mmap_pages; 1414 } 1415 1416 if (!p) { 1417 ret = 0; 1418 goto out_free; 1419 } 1420 1421 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1422 if (ret) 1423 goto out_free; 1424 1425 opts->auxtrace_mmap_pages = mmap_pages; 1426 1427 out_free: 1428 free(s); 1429 return ret; 1430 } 1431 1432 static void switch_output_size_warn(struct record *rec) 1433 { 1434 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1435 struct switch_output *s = &rec->switch_output; 1436 1437 wakeup_size /= 2; 1438 1439 if (s->size < wakeup_size) { 1440 char buf[100]; 1441 1442 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1443 pr_warning("WARNING: switch-output data size lower than " 1444 "wakeup kernel buffer size (%s) " 1445 "expect bigger perf.data sizes\n", buf); 1446 } 1447 } 1448 1449 static int switch_output_setup(struct record *rec) 1450 { 1451 struct switch_output *s = &rec->switch_output; 1452 static struct parse_tag tags_size[] = { 1453 { .tag = 'B', .mult = 1 }, 1454 { .tag = 'K', .mult = 1 << 10 }, 1455 { .tag = 'M', .mult = 1 << 20 }, 1456 { .tag = 'G', .mult = 1 << 30 }, 1457 { .tag = 0 }, 1458 }; 1459 static struct parse_tag tags_time[] = { 1460 { .tag = 's', .mult = 1 }, 1461 { .tag = 'm', .mult = 60 }, 1462 { .tag = 'h', .mult = 60*60 }, 1463 { .tag = 'd', .mult = 60*60*24 }, 1464 { .tag = 0 }, 1465 }; 1466 unsigned long val; 1467 1468 if (!s->set) 1469 return 0; 1470 1471 if (!strcmp(s->str, "signal")) { 1472 s->signal = true; 1473 pr_debug("switch-output with SIGUSR2 signal\n"); 1474 goto enabled; 1475 } 1476 1477 val = parse_tag_value(s->str, tags_size); 1478 if (val != (unsigned long) -1) { 1479 s->size = val; 1480 pr_debug("switch-output with %s size threshold\n", s->str); 1481 goto enabled; 1482 } 1483 1484 val = parse_tag_value(s->str, tags_time); 1485 if (val != (unsigned long) -1) { 1486 s->time = val; 1487 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1488 s->str, s->time); 1489 goto enabled; 1490 } 1491 1492 return -1; 1493 1494 enabled: 1495 rec->timestamp_filename = true; 1496 s->enabled = true; 1497 1498 if (s->size && !rec->opts.no_buffering) 1499 switch_output_size_warn(rec); 1500 1501 return 0; 1502 } 1503 1504 static const char * const __record_usage[] = { 1505 "perf record [<options>] [<command>]", 1506 "perf record [<options>] -- <command> [<options>]", 1507 NULL 1508 }; 1509 const char * const *record_usage = __record_usage; 1510 1511 /* 1512 * XXX Ideally would be local to cmd_record() and passed to a record__new 1513 * because we need to have access to it in record__exit, that is called 1514 * after cmd_record() exits, but since record_options need to be accessible to 1515 * builtin-script, leave it here. 1516 * 1517 * At least we don't ouch it in all the other functions here directly. 1518 * 1519 * Just say no to tons of global variables, sigh. 1520 */ 1521 static struct record record = { 1522 .opts = { 1523 .sample_time = true, 1524 .mmap_pages = UINT_MAX, 1525 .user_freq = UINT_MAX, 1526 .user_interval = ULLONG_MAX, 1527 .freq = 4000, 1528 .target = { 1529 .uses_mmap = true, 1530 .default_per_cpu = true, 1531 }, 1532 .proc_map_timeout = 500, 1533 }, 1534 .tool = { 1535 .sample = process_sample_event, 1536 .fork = perf_event__process_fork, 1537 .exit = perf_event__process_exit, 1538 .comm = perf_event__process_comm, 1539 .namespaces = perf_event__process_namespaces, 1540 .mmap = perf_event__process_mmap, 1541 .mmap2 = perf_event__process_mmap2, 1542 .ordered_events = true, 1543 }, 1544 }; 1545 1546 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1547 "\n\t\t\t\tDefault: fp"; 1548 1549 static bool dry_run; 1550 1551 /* 1552 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1553 * with it and switch to use the library functions in perf_evlist that came 1554 * from builtin-record.c, i.e. use record_opts, 1555 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1556 * using pipes, etc. 1557 */ 1558 static struct option __record_options[] = { 1559 OPT_CALLBACK('e', "event", &record.evlist, "event", 1560 "event selector. use 'perf list' to list available events", 1561 parse_events_option), 1562 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1563 "event filter", parse_filter), 1564 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1565 NULL, "don't record events from perf itself", 1566 exclude_perf), 1567 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1568 "record events on existing process id"), 1569 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1570 "record events on existing thread id"), 1571 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1572 "collect data with this RT SCHED_FIFO priority"), 1573 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1574 "collect data without buffering"), 1575 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1576 "collect raw sample records from all opened counters"), 1577 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1578 "system-wide collection from all CPUs"), 1579 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1580 "list of cpus to monitor"), 1581 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1582 OPT_STRING('o', "output", &record.file.path, "file", 1583 "output file name"), 1584 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1585 &record.opts.no_inherit_set, 1586 "child tasks do not inherit counters"), 1587 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1588 "synthesize non-sample events at the end of output"), 1589 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1590 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 1591 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1592 "number of mmap data pages and AUX area tracing mmap pages", 1593 record__parse_mmap_pages), 1594 OPT_BOOLEAN(0, "group", &record.opts.group, 1595 "put the counters into a counter group"), 1596 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1597 NULL, "enables call-graph recording" , 1598 &record_callchain_opt), 1599 OPT_CALLBACK(0, "call-graph", &record.opts, 1600 "record_mode[,record_size]", record_callchain_help, 1601 &record_parse_callchain_opt), 1602 OPT_INCR('v', "verbose", &verbose, 1603 "be more verbose (show counter open errors, etc)"), 1604 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1605 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1606 "per thread counts"), 1607 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1608 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1609 "Record the sample physical addresses"), 1610 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1611 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1612 &record.opts.sample_time_set, 1613 "Record the sample timestamps"), 1614 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), 1615 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1616 "don't sample"), 1617 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1618 &record.no_buildid_cache_set, 1619 "do not update the buildid cache"), 1620 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1621 &record.no_buildid_set, 1622 "do not collect buildids in perf.data"), 1623 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1624 "monitor event in cgroup name only", 1625 parse_cgroups), 1626 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 1627 "ms to wait before starting measurement after program start"), 1628 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 1629 "user to profile"), 1630 1631 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1632 "branch any", "sample any taken branches", 1633 parse_branch_stack), 1634 1635 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1636 "branch filter mask", "branch stack filter modes", 1637 parse_branch_stack), 1638 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 1639 "sample by weight (on special events only)"), 1640 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 1641 "sample transaction flags (special events only)"), 1642 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 1643 "use per-thread mmaps"), 1644 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 1645 "sample selected machine registers on interrupt," 1646 " use -I ? to list register names", parse_regs), 1647 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 1648 "Record running/enabled time of read (:S) events"), 1649 OPT_CALLBACK('k', "clockid", &record.opts, 1650 "clockid", "clockid to use for events, see clock_gettime()", 1651 parse_clockid), 1652 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 1653 "opts", "AUX area tracing Snapshot Mode", ""), 1654 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, 1655 "per thread proc mmap processing timeout in ms"), 1656 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 1657 "Record namespaces events"), 1658 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 1659 "Record context switch events"), 1660 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 1661 "Configure all used events to run in kernel space.", 1662 PARSE_OPT_EXCLUSIVE), 1663 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 1664 "Configure all used events to run in user space.", 1665 PARSE_OPT_EXCLUSIVE), 1666 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 1667 "clang binary to use for compiling BPF scriptlets"), 1668 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 1669 "options passed to clang when compiling BPF scriptlets"), 1670 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 1671 "file", "vmlinux pathname"), 1672 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 1673 "Record build-id of all DSOs regardless of hits"), 1674 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 1675 "append timestamp to output filename"), 1676 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 1677 &record.switch_output.set, "signal,size,time", 1678 "Switch output when receive SIGUSR2 or cross size,time threshold", 1679 "signal"), 1680 OPT_BOOLEAN(0, "dry-run", &dry_run, 1681 "Parse options then exit"), 1682 OPT_END() 1683 }; 1684 1685 struct option *record_options = __record_options; 1686 1687 int cmd_record(int argc, const char **argv) 1688 { 1689 int err; 1690 struct record *rec = &record; 1691 char errbuf[BUFSIZ]; 1692 1693 #ifndef HAVE_LIBBPF_SUPPORT 1694 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 1695 set_nobuild('\0', "clang-path", true); 1696 set_nobuild('\0', "clang-opt", true); 1697 # undef set_nobuild 1698 #endif 1699 1700 #ifndef HAVE_BPF_PROLOGUE 1701 # if !defined (HAVE_DWARF_SUPPORT) 1702 # define REASON "NO_DWARF=1" 1703 # elif !defined (HAVE_LIBBPF_SUPPORT) 1704 # define REASON "NO_LIBBPF=1" 1705 # else 1706 # define REASON "this architecture doesn't support BPF prologue" 1707 # endif 1708 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 1709 set_nobuild('\0', "vmlinux", true); 1710 # undef set_nobuild 1711 # undef REASON 1712 #endif 1713 1714 rec->evlist = perf_evlist__new(); 1715 if (rec->evlist == NULL) 1716 return -ENOMEM; 1717 1718 err = perf_config(perf_record_config, rec); 1719 if (err) 1720 return err; 1721 1722 argc = parse_options(argc, argv, record_options, record_usage, 1723 PARSE_OPT_STOP_AT_NON_OPTION); 1724 if (quiet) 1725 perf_quiet_option(); 1726 1727 /* Make system wide (-a) the default target. */ 1728 if (!argc && target__none(&rec->opts.target)) 1729 rec->opts.target.system_wide = true; 1730 1731 if (nr_cgroups && !rec->opts.target.system_wide) { 1732 usage_with_options_msg(record_usage, record_options, 1733 "cgroup monitoring only available in system-wide mode"); 1734 1735 } 1736 if (rec->opts.record_switch_events && 1737 !perf_can_record_switch_events()) { 1738 ui__error("kernel does not support recording context switch events\n"); 1739 parse_options_usage(record_usage, record_options, "switch-events", 0); 1740 return -EINVAL; 1741 } 1742 1743 if (switch_output_setup(rec)) { 1744 parse_options_usage(record_usage, record_options, "switch-output", 0); 1745 return -EINVAL; 1746 } 1747 1748 if (rec->switch_output.time) { 1749 signal(SIGALRM, alarm_sig_handler); 1750 alarm(rec->switch_output.time); 1751 } 1752 1753 if (!rec->itr) { 1754 rec->itr = auxtrace_record__init(rec->evlist, &err); 1755 if (err) 1756 goto out; 1757 } 1758 1759 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 1760 rec->opts.auxtrace_snapshot_opts); 1761 if (err) 1762 goto out; 1763 1764 /* 1765 * Allow aliases to facilitate the lookup of symbols for address 1766 * filters. Refer to auxtrace_parse_filters(). 1767 */ 1768 symbol_conf.allow_aliases = true; 1769 1770 symbol__init(NULL); 1771 1772 err = auxtrace_parse_filters(rec->evlist); 1773 if (err) 1774 goto out; 1775 1776 if (dry_run) 1777 goto out; 1778 1779 err = bpf__setup_stdout(rec->evlist); 1780 if (err) { 1781 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 1782 pr_err("ERROR: Setup BPF stdout failed: %s\n", 1783 errbuf); 1784 goto out; 1785 } 1786 1787 err = -ENOMEM; 1788 1789 if (symbol_conf.kptr_restrict) 1790 pr_warning( 1791 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1792 "check /proc/sys/kernel/kptr_restrict.\n\n" 1793 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1794 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1795 "Samples in kernel modules won't be resolved at all.\n\n" 1796 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1797 "even with a suitable vmlinux or kallsyms file.\n\n"); 1798 1799 if (rec->no_buildid_cache || rec->no_buildid) { 1800 disable_buildid_cache(); 1801 } else if (rec->switch_output.enabled) { 1802 /* 1803 * In 'perf record --switch-output', disable buildid 1804 * generation by default to reduce data file switching 1805 * overhead. Still generate buildid if they are required 1806 * explicitly using 1807 * 1808 * perf record --switch-output --no-no-buildid \ 1809 * --no-no-buildid-cache 1810 * 1811 * Following code equals to: 1812 * 1813 * if ((rec->no_buildid || !rec->no_buildid_set) && 1814 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 1815 * disable_buildid_cache(); 1816 */ 1817 bool disable = true; 1818 1819 if (rec->no_buildid_set && !rec->no_buildid) 1820 disable = false; 1821 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 1822 disable = false; 1823 if (disable) { 1824 rec->no_buildid = true; 1825 rec->no_buildid_cache = true; 1826 disable_buildid_cache(); 1827 } 1828 } 1829 1830 if (record.opts.overwrite) 1831 record.opts.tail_synthesize = true; 1832 1833 if (rec->evlist->nr_entries == 0 && 1834 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 1835 pr_err("Not enough memory for event selector list\n"); 1836 goto out; 1837 } 1838 1839 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 1840 rec->opts.no_inherit = true; 1841 1842 err = target__validate(&rec->opts.target); 1843 if (err) { 1844 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1845 ui__warning("%s", errbuf); 1846 } 1847 1848 err = target__parse_uid(&rec->opts.target); 1849 if (err) { 1850 int saved_errno = errno; 1851 1852 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1853 ui__error("%s", errbuf); 1854 1855 err = -saved_errno; 1856 goto out; 1857 } 1858 1859 /* Enable ignoring missing threads when -u option is defined. */ 1860 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; 1861 1862 err = -ENOMEM; 1863 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 1864 usage_with_options(record_usage, record_options); 1865 1866 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 1867 if (err) 1868 goto out; 1869 1870 /* 1871 * We take all buildids when the file contains 1872 * AUX area tracing data because we do not decode the 1873 * trace because it would take too long. 1874 */ 1875 if (rec->opts.full_auxtrace) 1876 rec->buildid_all = true; 1877 1878 if (record_opts__config(&rec->opts)) { 1879 err = -EINVAL; 1880 goto out; 1881 } 1882 1883 err = __cmd_record(&record, argc, argv); 1884 out: 1885 perf_evlist__delete(rec->evlist); 1886 symbol__exit(); 1887 auxtrace_record__free(rec->itr); 1888 return err; 1889 } 1890 1891 static void snapshot_sig_handler(int sig __maybe_unused) 1892 { 1893 struct record *rec = &record; 1894 1895 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1896 trigger_hit(&auxtrace_snapshot_trigger); 1897 auxtrace_record__snapshot_started = 1; 1898 if (auxtrace_record__snapshot_start(record.itr)) 1899 trigger_error(&auxtrace_snapshot_trigger); 1900 } 1901 1902 if (switch_output_signal(rec)) 1903 trigger_hit(&switch_output_trigger); 1904 } 1905 1906 static void alarm_sig_handler(int sig __maybe_unused) 1907 { 1908 struct record *rec = &record; 1909 1910 if (switch_output_time(rec)) 1911 trigger_hit(&switch_output_trigger); 1912 } 1913