1 /* 2 * builtin-record.c 3 * 4 * Builtin record command: Record the profile of a workload 5 * (or a CPU, or a PID) into the perf.data output file - for 6 * later analysis via perf report. 7 */ 8 #define _FILE_OFFSET_BITS 64 9 10 #include "builtin.h" 11 12 #include "perf.h" 13 14 #include "util/build-id.h" 15 #include "util/util.h" 16 #include "util/parse-options.h" 17 #include "util/parse-events.h" 18 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/symbol.h" 27 #include "util/cpumap.h" 28 #include "util/thread_map.h" 29 30 #include <unistd.h> 31 #include <sched.h> 32 #include <sys/mman.h> 33 34 enum write_mode_t { 35 WRITE_FORCE, 36 WRITE_APPEND 37 }; 38 39 struct perf_record { 40 struct perf_tool tool; 41 struct perf_record_opts opts; 42 u64 bytes_written; 43 const char *output_name; 44 struct perf_evlist *evlist; 45 struct perf_session *session; 46 const char *progname; 47 int output; 48 unsigned int page_size; 49 int realtime_prio; 50 enum write_mode_t write_mode; 51 bool no_buildid; 52 bool no_buildid_cache; 53 bool force; 54 bool file_new; 55 bool append_file; 56 long samples; 57 off_t post_processing_offset; 58 }; 59 60 static void advance_output(struct perf_record *rec, size_t size) 61 { 62 rec->bytes_written += size; 63 } 64 65 static int write_output(struct perf_record *rec, void *buf, size_t size) 66 { 67 while (size) { 68 int ret = write(rec->output, buf, size); 69 70 if (ret < 0) { 71 pr_err("failed to write\n"); 72 return -1; 73 } 74 75 size -= ret; 76 buf += ret; 77 78 rec->bytes_written += ret; 79 } 80 81 return 0; 82 } 83 84 static int process_synthesized_event(struct perf_tool *tool, 85 union perf_event *event, 86 struct perf_sample *sample __maybe_unused, 87 struct machine *machine __maybe_unused) 88 { 89 struct perf_record *rec = container_of(tool, struct perf_record, tool); 90 if (write_output(rec, event, event->header.size) < 0) 91 return -1; 92 93 return 0; 94 } 95 96 static int perf_record__mmap_read(struct perf_record *rec, 97 struct perf_mmap *md) 98 { 99 unsigned int head = perf_mmap__read_head(md); 100 unsigned int old = md->prev; 101 unsigned char *data = md->base + rec->page_size; 102 unsigned long size; 103 void *buf; 104 int rc = 0; 105 106 if (old == head) 107 return 0; 108 109 rec->samples++; 110 111 size = head - old; 112 113 if ((old & md->mask) + size != (head & md->mask)) { 114 buf = &data[old & md->mask]; 115 size = md->mask + 1 - (old & md->mask); 116 old += size; 117 118 if (write_output(rec, buf, size) < 0) { 119 rc = -1; 120 goto out; 121 } 122 } 123 124 buf = &data[old & md->mask]; 125 size = head - old; 126 old += size; 127 128 if (write_output(rec, buf, size) < 0) { 129 rc = -1; 130 goto out; 131 } 132 133 md->prev = old; 134 perf_mmap__write_tail(md, old); 135 136 out: 137 return rc; 138 } 139 140 static volatile int done = 0; 141 static volatile int signr = -1; 142 static volatile int child_finished = 0; 143 144 static void sig_handler(int sig) 145 { 146 if (sig == SIGCHLD) 147 child_finished = 1; 148 149 done = 1; 150 signr = sig; 151 } 152 153 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg) 154 { 155 struct perf_record *rec = arg; 156 int status; 157 158 if (rec->evlist->workload.pid > 0) { 159 if (!child_finished) 160 kill(rec->evlist->workload.pid, SIGTERM); 161 162 wait(&status); 163 if (WIFSIGNALED(status)) 164 psignal(WTERMSIG(status), rec->progname); 165 } 166 167 if (signr == -1 || signr == SIGUSR1) 168 return; 169 170 signal(signr, SIG_DFL); 171 kill(getpid(), signr); 172 } 173 174 static bool perf_evlist__equal(struct perf_evlist *evlist, 175 struct perf_evlist *other) 176 { 177 struct perf_evsel *pos, *pair; 178 179 if (evlist->nr_entries != other->nr_entries) 180 return false; 181 182 pair = perf_evlist__first(other); 183 184 list_for_each_entry(pos, &evlist->entries, node) { 185 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) 186 return false; 187 pair = perf_evsel__next(pair); 188 } 189 190 return true; 191 } 192 193 static int perf_record__open(struct perf_record *rec) 194 { 195 struct perf_evsel *pos; 196 struct perf_evlist *evlist = rec->evlist; 197 struct perf_session *session = rec->session; 198 struct perf_record_opts *opts = &rec->opts; 199 int rc = 0; 200 201 perf_evlist__config_attrs(evlist, opts); 202 203 if (opts->group) 204 perf_evlist__set_leader(evlist); 205 206 list_for_each_entry(pos, &evlist->entries, node) { 207 struct perf_event_attr *attr = &pos->attr; 208 /* 209 * Check if parse_single_tracepoint_event has already asked for 210 * PERF_SAMPLE_TIME. 211 * 212 * XXX this is kludgy but short term fix for problems introduced by 213 * eac23d1c that broke 'perf script' by having different sample_types 214 * when using multiple tracepoint events when we use a perf binary 215 * that tries to use sample_id_all on an older kernel. 216 * 217 * We need to move counter creation to perf_session, support 218 * different sample_types, etc. 219 */ 220 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; 221 222 fallback_missing_features: 223 if (opts->exclude_guest_missing) 224 attr->exclude_guest = attr->exclude_host = 0; 225 retry_sample_id: 226 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; 227 try_again: 228 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { 229 int err = errno; 230 231 if (err == EPERM || err == EACCES) { 232 ui__error_paranoid(); 233 rc = -err; 234 goto out; 235 } else if (err == ENODEV && opts->target.cpu_list) { 236 pr_err("No such device - did you specify" 237 " an out-of-range profile CPU?\n"); 238 rc = -err; 239 goto out; 240 } else if (err == EINVAL) { 241 if (!opts->exclude_guest_missing && 242 (attr->exclude_guest || attr->exclude_host)) { 243 pr_debug("Old kernel, cannot exclude " 244 "guest or host samples.\n"); 245 opts->exclude_guest_missing = true; 246 goto fallback_missing_features; 247 } else if (!opts->sample_id_all_missing) { 248 /* 249 * Old kernel, no attr->sample_id_type_all field 250 */ 251 opts->sample_id_all_missing = true; 252 if (!opts->sample_time && !opts->raw_samples && !time_needed) 253 attr->sample_type &= ~PERF_SAMPLE_TIME; 254 255 goto retry_sample_id; 256 } 257 } 258 259 /* 260 * If it's cycles then fall back to hrtimer 261 * based cpu-clock-tick sw counter, which 262 * is always available even if no PMU support. 263 * 264 * PPC returns ENXIO until 2.6.37 (behavior changed 265 * with commit b0a873e). 266 */ 267 if ((err == ENOENT || err == ENXIO) 268 && attr->type == PERF_TYPE_HARDWARE 269 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 270 271 if (verbose) 272 ui__warning("The cycles event is not supported, " 273 "trying to fall back to cpu-clock-ticks\n"); 274 attr->type = PERF_TYPE_SOFTWARE; 275 attr->config = PERF_COUNT_SW_CPU_CLOCK; 276 if (pos->name) { 277 free(pos->name); 278 pos->name = NULL; 279 } 280 goto try_again; 281 } 282 283 if (err == ENOENT) { 284 ui__error("The %s event is not supported.\n", 285 perf_evsel__name(pos)); 286 rc = -err; 287 goto out; 288 } 289 290 printf("\n"); 291 error("sys_perf_event_open() syscall returned with %d " 292 "(%s) for event %s. /bin/dmesg may provide " 293 "additional information.\n", 294 err, strerror(err), perf_evsel__name(pos)); 295 296 #if defined(__i386__) || defined(__x86_64__) 297 if (attr->type == PERF_TYPE_HARDWARE && 298 err == EOPNOTSUPP) { 299 pr_err("No hardware sampling interrupt available." 300 " No APIC? If so then you can boot the kernel" 301 " with the \"lapic\" boot parameter to" 302 " force-enable it.\n"); 303 rc = -err; 304 goto out; 305 } 306 #endif 307 308 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 309 rc = -err; 310 goto out; 311 } 312 } 313 314 if (perf_evlist__apply_filters(evlist)) { 315 error("failed to set filter with %d (%s)\n", errno, 316 strerror(errno)); 317 rc = -1; 318 goto out; 319 } 320 321 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { 322 if (errno == EPERM) { 323 pr_err("Permission error mapping pages.\n" 324 "Consider increasing " 325 "/proc/sys/kernel/perf_event_mlock_kb,\n" 326 "or try again with a smaller value of -m/--mmap_pages.\n" 327 "(current value: %d)\n", opts->mmap_pages); 328 rc = -errno; 329 } else if (!is_power_of_2(opts->mmap_pages)) { 330 pr_err("--mmap_pages/-m value must be a power of two."); 331 rc = -EINVAL; 332 } else { 333 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno)); 334 rc = -errno; 335 } 336 goto out; 337 } 338 339 if (rec->file_new) 340 session->evlist = evlist; 341 else { 342 if (!perf_evlist__equal(session->evlist, evlist)) { 343 fprintf(stderr, "incompatible append\n"); 344 rc = -1; 345 goto out; 346 } 347 } 348 349 perf_session__set_id_hdr_size(session); 350 out: 351 return rc; 352 } 353 354 static int process_buildids(struct perf_record *rec) 355 { 356 u64 size = lseek(rec->output, 0, SEEK_CUR); 357 358 if (size == 0) 359 return 0; 360 361 rec->session->fd = rec->output; 362 return __perf_session__process_events(rec->session, rec->post_processing_offset, 363 size - rec->post_processing_offset, 364 size, &build_id__mark_dso_hit_ops); 365 } 366 367 static void perf_record__exit(int status, void *arg) 368 { 369 struct perf_record *rec = arg; 370 371 if (status != 0) 372 return; 373 374 if (!rec->opts.pipe_output) { 375 rec->session->header.data_size += rec->bytes_written; 376 377 if (!rec->no_buildid) 378 process_buildids(rec); 379 perf_session__write_header(rec->session, rec->evlist, 380 rec->output, true); 381 perf_session__delete(rec->session); 382 perf_evlist__delete(rec->evlist); 383 symbol__exit(); 384 } 385 } 386 387 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 388 { 389 int err; 390 struct perf_tool *tool = data; 391 392 if (machine__is_host(machine)) 393 return; 394 395 /* 396 *As for guest kernel when processing subcommand record&report, 397 *we arrange module mmap prior to guest kernel mmap and trigger 398 *a preload dso because default guest module symbols are loaded 399 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 400 *method is used to avoid symbol missing when the first addr is 401 *in module instead of in guest kernel. 402 */ 403 err = perf_event__synthesize_modules(tool, process_synthesized_event, 404 machine); 405 if (err < 0) 406 pr_err("Couldn't record guest kernel [%d]'s reference" 407 " relocation symbol.\n", machine->pid); 408 409 /* 410 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 411 * have no _text sometimes. 412 */ 413 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 414 machine, "_text"); 415 if (err < 0) 416 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 417 machine, "_stext"); 418 if (err < 0) 419 pr_err("Couldn't record guest kernel [%d]'s reference" 420 " relocation symbol.\n", machine->pid); 421 } 422 423 static struct perf_event_header finished_round_event = { 424 .size = sizeof(struct perf_event_header), 425 .type = PERF_RECORD_FINISHED_ROUND, 426 }; 427 428 static int perf_record__mmap_read_all(struct perf_record *rec) 429 { 430 int i; 431 int rc = 0; 432 433 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 434 if (rec->evlist->mmap[i].base) { 435 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) { 436 rc = -1; 437 goto out; 438 } 439 } 440 } 441 442 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA)) 443 rc = write_output(rec, &finished_round_event, 444 sizeof(finished_round_event)); 445 446 out: 447 return rc; 448 } 449 450 static int __cmd_record(struct perf_record *rec, int argc, const char **argv) 451 { 452 struct stat st; 453 int flags; 454 int err, output, feat; 455 unsigned long waking = 0; 456 const bool forks = argc > 0; 457 struct machine *machine; 458 struct perf_tool *tool = &rec->tool; 459 struct perf_record_opts *opts = &rec->opts; 460 struct perf_evlist *evsel_list = rec->evlist; 461 const char *output_name = rec->output_name; 462 struct perf_session *session; 463 464 rec->progname = argv[0]; 465 466 rec->page_size = sysconf(_SC_PAGE_SIZE); 467 468 on_exit(perf_record__sig_exit, rec); 469 signal(SIGCHLD, sig_handler); 470 signal(SIGINT, sig_handler); 471 signal(SIGUSR1, sig_handler); 472 473 if (!output_name) { 474 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) 475 opts->pipe_output = true; 476 else 477 rec->output_name = output_name = "perf.data"; 478 } 479 if (output_name) { 480 if (!strcmp(output_name, "-")) 481 opts->pipe_output = true; 482 else if (!stat(output_name, &st) && st.st_size) { 483 if (rec->write_mode == WRITE_FORCE) { 484 char oldname[PATH_MAX]; 485 snprintf(oldname, sizeof(oldname), "%s.old", 486 output_name); 487 unlink(oldname); 488 rename(output_name, oldname); 489 } 490 } else if (rec->write_mode == WRITE_APPEND) { 491 rec->write_mode = WRITE_FORCE; 492 } 493 } 494 495 flags = O_CREAT|O_RDWR; 496 if (rec->write_mode == WRITE_APPEND) 497 rec->file_new = 0; 498 else 499 flags |= O_TRUNC; 500 501 if (opts->pipe_output) 502 output = STDOUT_FILENO; 503 else 504 output = open(output_name, flags, S_IRUSR | S_IWUSR); 505 if (output < 0) { 506 perror("failed to create output file"); 507 return -1; 508 } 509 510 rec->output = output; 511 512 session = perf_session__new(output_name, O_WRONLY, 513 rec->write_mode == WRITE_FORCE, false, NULL); 514 if (session == NULL) { 515 pr_err("Not enough memory for reading perf file header\n"); 516 return -1; 517 } 518 519 rec->session = session; 520 521 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 522 perf_header__set_feat(&session->header, feat); 523 524 if (rec->no_buildid) 525 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 526 527 if (!have_tracepoints(&evsel_list->entries)) 528 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 529 530 if (!rec->opts.branch_stack) 531 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 532 533 if (!rec->file_new) { 534 err = perf_session__read_header(session, output); 535 if (err < 0) 536 goto out_delete_session; 537 } 538 539 if (forks) { 540 err = perf_evlist__prepare_workload(evsel_list, opts, argv); 541 if (err < 0) { 542 pr_err("Couldn't run the workload!\n"); 543 goto out_delete_session; 544 } 545 } 546 547 if (perf_record__open(rec) != 0) { 548 err = -1; 549 goto out_delete_session; 550 } 551 552 /* 553 * perf_session__delete(session) will be called at perf_record__exit() 554 */ 555 on_exit(perf_record__exit, rec); 556 557 if (opts->pipe_output) { 558 err = perf_header__write_pipe(output); 559 if (err < 0) 560 goto out_delete_session; 561 } else if (rec->file_new) { 562 err = perf_session__write_header(session, evsel_list, 563 output, false); 564 if (err < 0) 565 goto out_delete_session; 566 } 567 568 if (!rec->no_buildid 569 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 570 pr_err("Couldn't generate buildids. " 571 "Use --no-buildid to profile anyway.\n"); 572 err = -1; 573 goto out_delete_session; 574 } 575 576 rec->post_processing_offset = lseek(output, 0, SEEK_CUR); 577 578 machine = perf_session__find_host_machine(session); 579 if (!machine) { 580 pr_err("Couldn't find native kernel information.\n"); 581 err = -1; 582 goto out_delete_session; 583 } 584 585 if (opts->pipe_output) { 586 err = perf_event__synthesize_attrs(tool, session, 587 process_synthesized_event); 588 if (err < 0) { 589 pr_err("Couldn't synthesize attrs.\n"); 590 goto out_delete_session; 591 } 592 593 err = perf_event__synthesize_event_types(tool, process_synthesized_event, 594 machine); 595 if (err < 0) { 596 pr_err("Couldn't synthesize event_types.\n"); 597 goto out_delete_session; 598 } 599 600 if (have_tracepoints(&evsel_list->entries)) { 601 /* 602 * FIXME err <= 0 here actually means that 603 * there were no tracepoints so its not really 604 * an error, just that we don't need to 605 * synthesize anything. We really have to 606 * return this more properly and also 607 * propagate errors that now are calling die() 608 */ 609 err = perf_event__synthesize_tracing_data(tool, output, evsel_list, 610 process_synthesized_event); 611 if (err <= 0) { 612 pr_err("Couldn't record tracing data.\n"); 613 goto out_delete_session; 614 } 615 advance_output(rec, err); 616 } 617 } 618 619 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 620 machine, "_text"); 621 if (err < 0) 622 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 623 machine, "_stext"); 624 if (err < 0) 625 pr_err("Couldn't record kernel reference relocation symbol\n" 626 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 627 "Check /proc/kallsyms permission or run as root.\n"); 628 629 err = perf_event__synthesize_modules(tool, process_synthesized_event, 630 machine); 631 if (err < 0) 632 pr_err("Couldn't record kernel module information.\n" 633 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 634 "Check /proc/modules permission or run as root.\n"); 635 636 if (perf_guest) 637 perf_session__process_machines(session, tool, 638 perf_event__synthesize_guest_os); 639 640 if (!opts->target.system_wide) 641 err = perf_event__synthesize_thread_map(tool, evsel_list->threads, 642 process_synthesized_event, 643 machine); 644 else 645 err = perf_event__synthesize_threads(tool, process_synthesized_event, 646 machine); 647 648 if (err != 0) 649 goto out_delete_session; 650 651 if (rec->realtime_prio) { 652 struct sched_param param; 653 654 param.sched_priority = rec->realtime_prio; 655 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 656 pr_err("Could not set realtime priority.\n"); 657 err = -1; 658 goto out_delete_session; 659 } 660 } 661 662 perf_evlist__enable(evsel_list); 663 664 /* 665 * Let the child rip 666 */ 667 if (forks) 668 perf_evlist__start_workload(evsel_list); 669 670 for (;;) { 671 int hits = rec->samples; 672 673 if (perf_record__mmap_read_all(rec) < 0) { 674 err = -1; 675 goto out_delete_session; 676 } 677 678 if (hits == rec->samples) { 679 if (done) 680 break; 681 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); 682 waking++; 683 } 684 685 if (done) 686 perf_evlist__disable(evsel_list); 687 } 688 689 if (quiet || signr == SIGUSR1) 690 return 0; 691 692 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 693 694 /* 695 * Approximate RIP event size: 24 bytes. 696 */ 697 fprintf(stderr, 698 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", 699 (double)rec->bytes_written / 1024.0 / 1024.0, 700 output_name, 701 rec->bytes_written / 24); 702 703 return 0; 704 705 out_delete_session: 706 perf_session__delete(session); 707 return err; 708 } 709 710 #define BRANCH_OPT(n, m) \ 711 { .name = n, .mode = (m) } 712 713 #define BRANCH_END { .name = NULL } 714 715 struct branch_mode { 716 const char *name; 717 int mode; 718 }; 719 720 static const struct branch_mode branch_modes[] = { 721 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), 722 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), 723 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), 724 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), 725 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), 726 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), 727 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), 728 BRANCH_END 729 }; 730 731 static int 732 parse_branch_stack(const struct option *opt, const char *str, int unset) 733 { 734 #define ONLY_PLM \ 735 (PERF_SAMPLE_BRANCH_USER |\ 736 PERF_SAMPLE_BRANCH_KERNEL |\ 737 PERF_SAMPLE_BRANCH_HV) 738 739 uint64_t *mode = (uint64_t *)opt->value; 740 const struct branch_mode *br; 741 char *s, *os = NULL, *p; 742 int ret = -1; 743 744 if (unset) 745 return 0; 746 747 /* 748 * cannot set it twice, -b + --branch-filter for instance 749 */ 750 if (*mode) 751 return -1; 752 753 /* str may be NULL in case no arg is passed to -b */ 754 if (str) { 755 /* because str is read-only */ 756 s = os = strdup(str); 757 if (!s) 758 return -1; 759 760 for (;;) { 761 p = strchr(s, ','); 762 if (p) 763 *p = '\0'; 764 765 for (br = branch_modes; br->name; br++) { 766 if (!strcasecmp(s, br->name)) 767 break; 768 } 769 if (!br->name) { 770 ui__warning("unknown branch filter %s," 771 " check man page\n", s); 772 goto error; 773 } 774 775 *mode |= br->mode; 776 777 if (!p) 778 break; 779 780 s = p + 1; 781 } 782 } 783 ret = 0; 784 785 /* default to any branch */ 786 if ((*mode & ~ONLY_PLM) == 0) { 787 *mode = PERF_SAMPLE_BRANCH_ANY; 788 } 789 error: 790 free(os); 791 return ret; 792 } 793 794 #ifdef LIBUNWIND_SUPPORT 795 static int get_stack_size(char *str, unsigned long *_size) 796 { 797 char *endptr; 798 unsigned long size; 799 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); 800 801 size = strtoul(str, &endptr, 0); 802 803 do { 804 if (*endptr) 805 break; 806 807 size = round_up(size, sizeof(u64)); 808 if (!size || size > max_size) 809 break; 810 811 *_size = size; 812 return 0; 813 814 } while (0); 815 816 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", 817 max_size, str); 818 return -1; 819 } 820 #endif /* LIBUNWIND_SUPPORT */ 821 822 static int 823 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, 824 int unset) 825 { 826 struct perf_record *rec = (struct perf_record *)opt->value; 827 char *tok, *name, *saveptr = NULL; 828 char *buf; 829 int ret = -1; 830 831 /* --no-call-graph */ 832 if (unset) 833 return 0; 834 835 /* We specified default option if none is provided. */ 836 BUG_ON(!arg); 837 838 /* We need buffer that we know we can write to. */ 839 buf = malloc(strlen(arg) + 1); 840 if (!buf) 841 return -ENOMEM; 842 843 strcpy(buf, arg); 844 845 tok = strtok_r((char *)buf, ",", &saveptr); 846 name = tok ? : (char *)buf; 847 848 do { 849 /* Framepointer style */ 850 if (!strncmp(name, "fp", sizeof("fp"))) { 851 if (!strtok_r(NULL, ",", &saveptr)) { 852 rec->opts.call_graph = CALLCHAIN_FP; 853 ret = 0; 854 } else 855 pr_err("callchain: No more arguments " 856 "needed for -g fp\n"); 857 break; 858 859 #ifdef LIBUNWIND_SUPPORT 860 /* Dwarf style */ 861 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { 862 const unsigned long default_stack_dump_size = 8192; 863 864 ret = 0; 865 rec->opts.call_graph = CALLCHAIN_DWARF; 866 rec->opts.stack_dump_size = default_stack_dump_size; 867 868 tok = strtok_r(NULL, ",", &saveptr); 869 if (tok) { 870 unsigned long size = 0; 871 872 ret = get_stack_size(tok, &size); 873 rec->opts.stack_dump_size = size; 874 } 875 876 if (!ret) 877 pr_debug("callchain: stack dump size %d\n", 878 rec->opts.stack_dump_size); 879 #endif /* LIBUNWIND_SUPPORT */ 880 } else { 881 pr_err("callchain: Unknown -g option " 882 "value: %s\n", arg); 883 break; 884 } 885 886 } while (0); 887 888 free(buf); 889 890 if (!ret) 891 pr_debug("callchain: type %d\n", rec->opts.call_graph); 892 893 return ret; 894 } 895 896 static const char * const record_usage[] = { 897 "perf record [<options>] [<command>]", 898 "perf record [<options>] -- <command> [<options>]", 899 NULL 900 }; 901 902 /* 903 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new 904 * because we need to have access to it in perf_record__exit, that is called 905 * after cmd_record() exits, but since record_options need to be accessible to 906 * builtin-script, leave it here. 907 * 908 * At least we don't ouch it in all the other functions here directly. 909 * 910 * Just say no to tons of global variables, sigh. 911 */ 912 static struct perf_record record = { 913 .opts = { 914 .mmap_pages = UINT_MAX, 915 .user_freq = UINT_MAX, 916 .user_interval = ULLONG_MAX, 917 .freq = 4000, 918 .target = { 919 .uses_mmap = true, 920 }, 921 }, 922 .write_mode = WRITE_FORCE, 923 .file_new = true, 924 }; 925 926 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " 927 928 #ifdef LIBUNWIND_SUPPORT 929 static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; 930 #else 931 static const char callchain_help[] = CALLCHAIN_HELP "[fp]"; 932 #endif 933 934 /* 935 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 936 * with it and switch to use the library functions in perf_evlist that came 937 * from builtin-record.c, i.e. use perf_record_opts, 938 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 939 * using pipes, etc. 940 */ 941 const struct option record_options[] = { 942 OPT_CALLBACK('e', "event", &record.evlist, "event", 943 "event selector. use 'perf list' to list available events", 944 parse_events_option), 945 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 946 "event filter", parse_filter), 947 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 948 "record events on existing process id"), 949 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 950 "record events on existing thread id"), 951 OPT_INTEGER('r', "realtime", &record.realtime_prio, 952 "collect data with this RT SCHED_FIFO priority"), 953 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay, 954 "collect data without buffering"), 955 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 956 "collect raw sample records from all opened counters"), 957 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 958 "system-wide collection from all CPUs"), 959 OPT_BOOLEAN('A', "append", &record.append_file, 960 "append to the output file to do incremental profiling"), 961 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 962 "list of cpus to monitor"), 963 OPT_BOOLEAN('f', "force", &record.force, 964 "overwrite existing data file (deprecated)"), 965 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 966 OPT_STRING('o', "output", &record.output_name, "file", 967 "output file name"), 968 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, 969 "child tasks do not inherit counters"), 970 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), 971 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages, 972 "number of mmap data pages"), 973 OPT_BOOLEAN(0, "group", &record.opts.group, 974 "put the counters into a counter group"), 975 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", 976 callchain_help, &parse_callchain_opt, 977 "fp"), 978 OPT_INCR('v', "verbose", &verbose, 979 "be more verbose (show counter open errors, etc)"), 980 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 981 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 982 "per thread counts"), 983 OPT_BOOLEAN('d', "data", &record.opts.sample_address, 984 "Sample addresses"), 985 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), 986 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"), 987 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 988 "don't sample"), 989 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, 990 "do not update the buildid cache"), 991 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, 992 "do not collect buildids in perf.data"), 993 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 994 "monitor event in cgroup name only", 995 parse_cgroups), 996 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 997 "user to profile"), 998 999 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 1000 "branch any", "sample any taken branches", 1001 parse_branch_stack), 1002 1003 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 1004 "branch filter mask", "branch stack filter modes", 1005 parse_branch_stack), 1006 OPT_END() 1007 }; 1008 1009 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) 1010 { 1011 int err = -ENOMEM; 1012 struct perf_evsel *pos; 1013 struct perf_evlist *evsel_list; 1014 struct perf_record *rec = &record; 1015 char errbuf[BUFSIZ]; 1016 1017 evsel_list = perf_evlist__new(NULL, NULL); 1018 if (evsel_list == NULL) 1019 return -ENOMEM; 1020 1021 rec->evlist = evsel_list; 1022 1023 argc = parse_options(argc, argv, record_options, record_usage, 1024 PARSE_OPT_STOP_AT_NON_OPTION); 1025 if (!argc && perf_target__none(&rec->opts.target)) 1026 usage_with_options(record_usage, record_options); 1027 1028 if (rec->force && rec->append_file) { 1029 ui__error("Can't overwrite and append at the same time." 1030 " You need to choose between -f and -A"); 1031 usage_with_options(record_usage, record_options); 1032 } else if (rec->append_file) { 1033 rec->write_mode = WRITE_APPEND; 1034 } else { 1035 rec->write_mode = WRITE_FORCE; 1036 } 1037 1038 if (nr_cgroups && !rec->opts.target.system_wide) { 1039 ui__error("cgroup monitoring only available in" 1040 " system-wide mode\n"); 1041 usage_with_options(record_usage, record_options); 1042 } 1043 1044 symbol__init(); 1045 1046 if (symbol_conf.kptr_restrict) 1047 pr_warning( 1048 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1049 "check /proc/sys/kernel/kptr_restrict.\n\n" 1050 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1051 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1052 "Samples in kernel modules won't be resolved at all.\n\n" 1053 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1054 "even with a suitable vmlinux or kallsyms file.\n\n"); 1055 1056 if (rec->no_buildid_cache || rec->no_buildid) 1057 disable_buildid_cache(); 1058 1059 if (evsel_list->nr_entries == 0 && 1060 perf_evlist__add_default(evsel_list) < 0) { 1061 pr_err("Not enough memory for event selector list\n"); 1062 goto out_symbol_exit; 1063 } 1064 1065 err = perf_target__validate(&rec->opts.target); 1066 if (err) { 1067 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1068 ui__warning("%s", errbuf); 1069 } 1070 1071 err = perf_target__parse_uid(&rec->opts.target); 1072 if (err) { 1073 int saved_errno = errno; 1074 1075 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1076 ui__error("%s", errbuf); 1077 1078 err = -saved_errno; 1079 goto out_free_fd; 1080 } 1081 1082 err = -ENOMEM; 1083 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0) 1084 usage_with_options(record_usage, record_options); 1085 1086 list_for_each_entry(pos, &evsel_list->entries, node) { 1087 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos))) 1088 goto out_free_fd; 1089 } 1090 1091 if (rec->opts.user_interval != ULLONG_MAX) 1092 rec->opts.default_interval = rec->opts.user_interval; 1093 if (rec->opts.user_freq != UINT_MAX) 1094 rec->opts.freq = rec->opts.user_freq; 1095 1096 /* 1097 * User specified count overrides default frequency. 1098 */ 1099 if (rec->opts.default_interval) 1100 rec->opts.freq = 0; 1101 else if (rec->opts.freq) { 1102 rec->opts.default_interval = rec->opts.freq; 1103 } else { 1104 ui__error("frequency and count are zero, aborting\n"); 1105 err = -EINVAL; 1106 goto out_free_fd; 1107 } 1108 1109 err = __cmd_record(&record, argc, argv); 1110 out_free_fd: 1111 perf_evlist__delete_maps(evsel_list); 1112 out_symbol_exit: 1113 symbol__exit(); 1114 return err; 1115 } 1116