1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/session.h" 27 #include "util/tool.h" 28 #include "util/symbol.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/llvm-utils.h" 38 #include "util/bpf-loader.h" 39 #include "util/trigger.h" 40 #include "util/perf-hooks.h" 41 #include "util/cpu-set-sched.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "util/bpf-event.h" 45 #include "asm/bug.h" 46 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <locale.h> 50 #include <poll.h> 51 #include <unistd.h> 52 #include <sched.h> 53 #include <signal.h> 54 #include <sys/mman.h> 55 #include <sys/wait.h> 56 #include <linux/time64.h> 57 58 struct switch_output { 59 bool enabled; 60 bool signal; 61 unsigned long size; 62 unsigned long time; 63 const char *str; 64 bool set; 65 char **filenames; 66 int num_files; 67 int cur_file; 68 }; 69 70 struct record { 71 struct perf_tool tool; 72 struct record_opts opts; 73 u64 bytes_written; 74 struct perf_data data; 75 struct auxtrace_record *itr; 76 struct perf_evlist *evlist; 77 struct perf_session *session; 78 int realtime_prio; 79 bool no_buildid; 80 bool no_buildid_set; 81 bool no_buildid_cache; 82 bool no_buildid_cache_set; 83 bool buildid_all; 84 bool timestamp_filename; 85 bool timestamp_boundary; 86 struct switch_output switch_output; 87 unsigned long long samples; 88 cpu_set_t affinity_mask; 89 }; 90 91 static volatile int auxtrace_record__snapshot_started; 92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 93 static DEFINE_TRIGGER(switch_output_trigger); 94 95 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 96 "SYS", "NODE", "CPU" 97 }; 98 99 static bool switch_output_signal(struct record *rec) 100 { 101 return rec->switch_output.signal && 102 trigger_is_ready(&switch_output_trigger); 103 } 104 105 static bool switch_output_size(struct record *rec) 106 { 107 return rec->switch_output.size && 108 trigger_is_ready(&switch_output_trigger) && 109 (rec->bytes_written >= rec->switch_output.size); 110 } 111 112 static bool switch_output_time(struct record *rec) 113 { 114 return rec->switch_output.time && 115 trigger_is_ready(&switch_output_trigger); 116 } 117 118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 119 void *bf, size_t size) 120 { 121 struct perf_data_file *file = &rec->session->data->file; 122 123 if (perf_data_file__write(file, bf, size) < 0) { 124 pr_err("failed to write perf data, error: %m\n"); 125 return -1; 126 } 127 128 rec->bytes_written += size; 129 130 if (switch_output_size(rec)) 131 trigger_hit(&switch_output_trigger); 132 133 return 0; 134 } 135 136 #ifdef HAVE_AIO_SUPPORT 137 static int record__aio_write(struct aiocb *cblock, int trace_fd, 138 void *buf, size_t size, off_t off) 139 { 140 int rc; 141 142 cblock->aio_fildes = trace_fd; 143 cblock->aio_buf = buf; 144 cblock->aio_nbytes = size; 145 cblock->aio_offset = off; 146 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 147 148 do { 149 rc = aio_write(cblock); 150 if (rc == 0) { 151 break; 152 } else if (errno != EAGAIN) { 153 cblock->aio_fildes = -1; 154 pr_err("failed to queue perf data, error: %m\n"); 155 break; 156 } 157 } while (1); 158 159 return rc; 160 } 161 162 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) 163 { 164 void *rem_buf; 165 off_t rem_off; 166 size_t rem_size; 167 int rc, aio_errno; 168 ssize_t aio_ret, written; 169 170 aio_errno = aio_error(cblock); 171 if (aio_errno == EINPROGRESS) 172 return 0; 173 174 written = aio_ret = aio_return(cblock); 175 if (aio_ret < 0) { 176 if (aio_errno != EINTR) 177 pr_err("failed to write perf data, error: %m\n"); 178 written = 0; 179 } 180 181 rem_size = cblock->aio_nbytes - written; 182 183 if (rem_size == 0) { 184 cblock->aio_fildes = -1; 185 /* 186 * md->refcount is incremented in perf_mmap__push() for 187 * every enqueued aio write request so decrement it because 188 * the request is now complete. 189 */ 190 perf_mmap__put(md); 191 rc = 1; 192 } else { 193 /* 194 * aio write request may require restart with the 195 * reminder if the kernel didn't write whole 196 * chunk at once. 197 */ 198 rem_off = cblock->aio_offset + written; 199 rem_buf = (void *)(cblock->aio_buf + written); 200 record__aio_write(cblock, cblock->aio_fildes, 201 rem_buf, rem_size, rem_off); 202 rc = 0; 203 } 204 205 return rc; 206 } 207 208 static int record__aio_sync(struct perf_mmap *md, bool sync_all) 209 { 210 struct aiocb **aiocb = md->aio.aiocb; 211 struct aiocb *cblocks = md->aio.cblocks; 212 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 213 int i, do_suspend; 214 215 do { 216 do_suspend = 0; 217 for (i = 0; i < md->aio.nr_cblocks; ++i) { 218 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 219 if (sync_all) 220 aiocb[i] = NULL; 221 else 222 return i; 223 } else { 224 /* 225 * Started aio write is not complete yet 226 * so it has to be waited before the 227 * next allocation. 228 */ 229 aiocb[i] = &cblocks[i]; 230 do_suspend = 1; 231 } 232 } 233 if (!do_suspend) 234 return -1; 235 236 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 237 if (!(errno == EAGAIN || errno == EINTR)) 238 pr_err("failed to sync perf data, error: %m\n"); 239 } 240 } while (1); 241 } 242 243 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) 244 { 245 struct record *rec = to; 246 int ret, trace_fd = rec->session->data->file.fd; 247 248 rec->samples++; 249 250 ret = record__aio_write(cblock, trace_fd, bf, size, off); 251 if (!ret) { 252 rec->bytes_written += size; 253 if (switch_output_size(rec)) 254 trigger_hit(&switch_output_trigger); 255 } 256 257 return ret; 258 } 259 260 static off_t record__aio_get_pos(int trace_fd) 261 { 262 return lseek(trace_fd, 0, SEEK_CUR); 263 } 264 265 static void record__aio_set_pos(int trace_fd, off_t pos) 266 { 267 lseek(trace_fd, pos, SEEK_SET); 268 } 269 270 static void record__aio_mmap_read_sync(struct record *rec) 271 { 272 int i; 273 struct perf_evlist *evlist = rec->evlist; 274 struct perf_mmap *maps = evlist->mmap; 275 276 if (!rec->opts.nr_cblocks) 277 return; 278 279 for (i = 0; i < evlist->nr_mmaps; i++) { 280 struct perf_mmap *map = &maps[i]; 281 282 if (map->base) 283 record__aio_sync(map, true); 284 } 285 } 286 287 static int nr_cblocks_default = 1; 288 static int nr_cblocks_max = 4; 289 290 static int record__aio_parse(const struct option *opt, 291 const char *str, 292 int unset) 293 { 294 struct record_opts *opts = (struct record_opts *)opt->value; 295 296 if (unset) { 297 opts->nr_cblocks = 0; 298 } else { 299 if (str) 300 opts->nr_cblocks = strtol(str, NULL, 0); 301 if (!opts->nr_cblocks) 302 opts->nr_cblocks = nr_cblocks_default; 303 } 304 305 return 0; 306 } 307 #else /* HAVE_AIO_SUPPORT */ 308 static int nr_cblocks_max = 0; 309 310 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused) 311 { 312 return -1; 313 } 314 315 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused, 316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused) 317 { 318 return -1; 319 } 320 321 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 322 { 323 return -1; 324 } 325 326 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 327 { 328 } 329 330 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 331 { 332 } 333 #endif 334 335 static int record__aio_enabled(struct record *rec) 336 { 337 return rec->opts.nr_cblocks > 0; 338 } 339 340 #define MMAP_FLUSH_DEFAULT 1 341 static int record__mmap_flush_parse(const struct option *opt, 342 const char *str, 343 int unset) 344 { 345 int flush_max; 346 struct record_opts *opts = (struct record_opts *)opt->value; 347 static struct parse_tag tags[] = { 348 { .tag = 'B', .mult = 1 }, 349 { .tag = 'K', .mult = 1 << 10 }, 350 { .tag = 'M', .mult = 1 << 20 }, 351 { .tag = 'G', .mult = 1 << 30 }, 352 { .tag = 0 }, 353 }; 354 355 if (unset) 356 return 0; 357 358 if (str) { 359 opts->mmap_flush = parse_tag_value(str, tags); 360 if (opts->mmap_flush == (int)-1) 361 opts->mmap_flush = strtol(str, NULL, 0); 362 } 363 364 if (!opts->mmap_flush) 365 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 366 367 flush_max = perf_evlist__mmap_size(opts->mmap_pages); 368 flush_max /= 4; 369 if (opts->mmap_flush > flush_max) 370 opts->mmap_flush = flush_max; 371 372 return 0; 373 } 374 375 static int process_synthesized_event(struct perf_tool *tool, 376 union perf_event *event, 377 struct perf_sample *sample __maybe_unused, 378 struct machine *machine __maybe_unused) 379 { 380 struct record *rec = container_of(tool, struct record, tool); 381 return record__write(rec, NULL, event, event->header.size); 382 } 383 384 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 385 { 386 struct record *rec = to; 387 388 rec->samples++; 389 return record__write(rec, map, bf, size); 390 } 391 392 static volatile int done; 393 static volatile int signr = -1; 394 static volatile int child_finished; 395 396 static void sig_handler(int sig) 397 { 398 if (sig == SIGCHLD) 399 child_finished = 1; 400 else 401 signr = sig; 402 403 done = 1; 404 } 405 406 static void sigsegv_handler(int sig) 407 { 408 perf_hooks__recover(); 409 sighandler_dump_stack(sig); 410 } 411 412 static void record__sig_exit(void) 413 { 414 if (signr == -1) 415 return; 416 417 signal(signr, SIG_DFL); 418 raise(signr); 419 } 420 421 #ifdef HAVE_AUXTRACE_SUPPORT 422 423 static int record__process_auxtrace(struct perf_tool *tool, 424 struct perf_mmap *map, 425 union perf_event *event, void *data1, 426 size_t len1, void *data2, size_t len2) 427 { 428 struct record *rec = container_of(tool, struct record, tool); 429 struct perf_data *data = &rec->data; 430 size_t padding; 431 u8 pad[8] = {0}; 432 433 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { 434 off_t file_offset; 435 int fd = perf_data__fd(data); 436 int err; 437 438 file_offset = lseek(fd, 0, SEEK_CUR); 439 if (file_offset == -1) 440 return -1; 441 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 442 event, file_offset); 443 if (err) 444 return err; 445 } 446 447 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 448 padding = (len1 + len2) & 7; 449 if (padding) 450 padding = 8 - padding; 451 452 record__write(rec, map, event, event->header.size); 453 record__write(rec, map, data1, len1); 454 if (len2) 455 record__write(rec, map, data2, len2); 456 record__write(rec, map, &pad, padding); 457 458 return 0; 459 } 460 461 static int record__auxtrace_mmap_read(struct record *rec, 462 struct perf_mmap *map) 463 { 464 int ret; 465 466 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 467 record__process_auxtrace); 468 if (ret < 0) 469 return ret; 470 471 if (ret) 472 rec->samples++; 473 474 return 0; 475 } 476 477 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 478 struct perf_mmap *map) 479 { 480 int ret; 481 482 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 483 record__process_auxtrace, 484 rec->opts.auxtrace_snapshot_size); 485 if (ret < 0) 486 return ret; 487 488 if (ret) 489 rec->samples++; 490 491 return 0; 492 } 493 494 static int record__auxtrace_read_snapshot_all(struct record *rec) 495 { 496 int i; 497 int rc = 0; 498 499 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 500 struct perf_mmap *map = &rec->evlist->mmap[i]; 501 502 if (!map->auxtrace_mmap.base) 503 continue; 504 505 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 506 rc = -1; 507 goto out; 508 } 509 } 510 out: 511 return rc; 512 } 513 514 static void record__read_auxtrace_snapshot(struct record *rec) 515 { 516 pr_debug("Recording AUX area tracing snapshot\n"); 517 if (record__auxtrace_read_snapshot_all(rec) < 0) { 518 trigger_error(&auxtrace_snapshot_trigger); 519 } else { 520 if (auxtrace_record__snapshot_finish(rec->itr)) 521 trigger_error(&auxtrace_snapshot_trigger); 522 else 523 trigger_ready(&auxtrace_snapshot_trigger); 524 } 525 } 526 527 static int record__auxtrace_init(struct record *rec) 528 { 529 int err; 530 531 if (!rec->itr) { 532 rec->itr = auxtrace_record__init(rec->evlist, &err); 533 if (err) 534 return err; 535 } 536 537 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 538 rec->opts.auxtrace_snapshot_opts); 539 if (err) 540 return err; 541 542 return auxtrace_parse_filters(rec->evlist); 543 } 544 545 #else 546 547 static inline 548 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 549 struct perf_mmap *map __maybe_unused) 550 { 551 return 0; 552 } 553 554 static inline 555 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 556 { 557 } 558 559 static inline 560 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 561 { 562 return 0; 563 } 564 565 static int record__auxtrace_init(struct record *rec __maybe_unused) 566 { 567 return 0; 568 } 569 570 #endif 571 572 static int record__mmap_evlist(struct record *rec, 573 struct perf_evlist *evlist) 574 { 575 struct record_opts *opts = &rec->opts; 576 char msg[512]; 577 578 if (opts->affinity != PERF_AFFINITY_SYS) 579 cpu__setup_cpunode_map(); 580 581 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 582 opts->auxtrace_mmap_pages, 583 opts->auxtrace_snapshot_mode, 584 opts->nr_cblocks, opts->affinity, 585 opts->mmap_flush) < 0) { 586 if (errno == EPERM) { 587 pr_err("Permission error mapping pages.\n" 588 "Consider increasing " 589 "/proc/sys/kernel/perf_event_mlock_kb,\n" 590 "or try again with a smaller value of -m/--mmap_pages.\n" 591 "(current value: %u,%u)\n", 592 opts->mmap_pages, opts->auxtrace_mmap_pages); 593 return -errno; 594 } else { 595 pr_err("failed to mmap with %d (%s)\n", errno, 596 str_error_r(errno, msg, sizeof(msg))); 597 if (errno) 598 return -errno; 599 else 600 return -EINVAL; 601 } 602 } 603 return 0; 604 } 605 606 static int record__mmap(struct record *rec) 607 { 608 return record__mmap_evlist(rec, rec->evlist); 609 } 610 611 static int record__open(struct record *rec) 612 { 613 char msg[BUFSIZ]; 614 struct perf_evsel *pos; 615 struct perf_evlist *evlist = rec->evlist; 616 struct perf_session *session = rec->session; 617 struct record_opts *opts = &rec->opts; 618 int rc = 0; 619 620 /* 621 * For initial_delay we need to add a dummy event so that we can track 622 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 623 * real events, the ones asked by the user. 624 */ 625 if (opts->initial_delay) { 626 if (perf_evlist__add_dummy(evlist)) 627 return -ENOMEM; 628 629 pos = perf_evlist__first(evlist); 630 pos->tracking = 0; 631 pos = perf_evlist__last(evlist); 632 pos->tracking = 1; 633 pos->attr.enable_on_exec = 1; 634 } 635 636 perf_evlist__config(evlist, opts, &callchain_param); 637 638 evlist__for_each_entry(evlist, pos) { 639 try_again: 640 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 641 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 642 if (verbose > 0) 643 ui__warning("%s\n", msg); 644 goto try_again; 645 } 646 if ((errno == EINVAL || errno == EBADF) && 647 pos->leader != pos && 648 pos->weak_group) { 649 pos = perf_evlist__reset_weak_group(evlist, pos); 650 goto try_again; 651 } 652 rc = -errno; 653 perf_evsel__open_strerror(pos, &opts->target, 654 errno, msg, sizeof(msg)); 655 ui__error("%s\n", msg); 656 goto out; 657 } 658 659 pos->supported = true; 660 } 661 662 if (perf_evlist__apply_filters(evlist, &pos)) { 663 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 664 pos->filter, perf_evsel__name(pos), errno, 665 str_error_r(errno, msg, sizeof(msg))); 666 rc = -1; 667 goto out; 668 } 669 670 rc = record__mmap(rec); 671 if (rc) 672 goto out; 673 674 session->evlist = evlist; 675 perf_session__set_id_hdr_size(session); 676 out: 677 return rc; 678 } 679 680 static int process_sample_event(struct perf_tool *tool, 681 union perf_event *event, 682 struct perf_sample *sample, 683 struct perf_evsel *evsel, 684 struct machine *machine) 685 { 686 struct record *rec = container_of(tool, struct record, tool); 687 688 if (rec->evlist->first_sample_time == 0) 689 rec->evlist->first_sample_time = sample->time; 690 691 rec->evlist->last_sample_time = sample->time; 692 693 if (rec->buildid_all) 694 return 0; 695 696 rec->samples++; 697 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 698 } 699 700 static int process_buildids(struct record *rec) 701 { 702 struct perf_session *session = rec->session; 703 704 if (perf_data__size(&rec->data) == 0) 705 return 0; 706 707 /* 708 * During this process, it'll load kernel map and replace the 709 * dso->long_name to a real pathname it found. In this case 710 * we prefer the vmlinux path like 711 * /lib/modules/3.16.4/build/vmlinux 712 * 713 * rather than build-id path (in debug directory). 714 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 715 */ 716 symbol_conf.ignore_vmlinux_buildid = true; 717 718 /* 719 * If --buildid-all is given, it marks all DSO regardless of hits, 720 * so no need to process samples. But if timestamp_boundary is enabled, 721 * it still needs to walk on all samples to get the timestamps of 722 * first/last samples. 723 */ 724 if (rec->buildid_all && !rec->timestamp_boundary) 725 rec->tool.sample = NULL; 726 727 return perf_session__process_events(session); 728 } 729 730 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 731 { 732 int err; 733 struct perf_tool *tool = data; 734 /* 735 *As for guest kernel when processing subcommand record&report, 736 *we arrange module mmap prior to guest kernel mmap and trigger 737 *a preload dso because default guest module symbols are loaded 738 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 739 *method is used to avoid symbol missing when the first addr is 740 *in module instead of in guest kernel. 741 */ 742 err = perf_event__synthesize_modules(tool, process_synthesized_event, 743 machine); 744 if (err < 0) 745 pr_err("Couldn't record guest kernel [%d]'s reference" 746 " relocation symbol.\n", machine->pid); 747 748 /* 749 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 750 * have no _text sometimes. 751 */ 752 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 753 machine); 754 if (err < 0) 755 pr_err("Couldn't record guest kernel [%d]'s reference" 756 " relocation symbol.\n", machine->pid); 757 } 758 759 static struct perf_event_header finished_round_event = { 760 .size = sizeof(struct perf_event_header), 761 .type = PERF_RECORD_FINISHED_ROUND, 762 }; 763 764 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 765 { 766 if (rec->opts.affinity != PERF_AFFINITY_SYS && 767 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 768 CPU_ZERO(&rec->affinity_mask); 769 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 770 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 771 } 772 } 773 774 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 775 bool overwrite, bool synch) 776 { 777 u64 bytes_written = rec->bytes_written; 778 int i; 779 int rc = 0; 780 struct perf_mmap *maps; 781 int trace_fd = rec->data.file.fd; 782 off_t off; 783 784 if (!evlist) 785 return 0; 786 787 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 788 if (!maps) 789 return 0; 790 791 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 792 return 0; 793 794 if (record__aio_enabled(rec)) 795 off = record__aio_get_pos(trace_fd); 796 797 for (i = 0; i < evlist->nr_mmaps; i++) { 798 u64 flush = 0; 799 struct perf_mmap *map = &maps[i]; 800 801 if (map->base) { 802 record__adjust_affinity(rec, map); 803 if (synch) { 804 flush = map->flush; 805 map->flush = 1; 806 } 807 if (!record__aio_enabled(rec)) { 808 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 809 if (synch) 810 map->flush = flush; 811 rc = -1; 812 goto out; 813 } 814 } else { 815 int idx; 816 /* 817 * Call record__aio_sync() to wait till map->data buffer 818 * becomes available after previous aio write request. 819 */ 820 idx = record__aio_sync(map, false); 821 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) { 822 record__aio_set_pos(trace_fd, off); 823 if (synch) 824 map->flush = flush; 825 rc = -1; 826 goto out; 827 } 828 } 829 if (synch) 830 map->flush = flush; 831 } 832 833 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 834 record__auxtrace_mmap_read(rec, map) != 0) { 835 rc = -1; 836 goto out; 837 } 838 } 839 840 if (record__aio_enabled(rec)) 841 record__aio_set_pos(trace_fd, off); 842 843 /* 844 * Mark the round finished in case we wrote 845 * at least one event. 846 */ 847 if (bytes_written != rec->bytes_written) 848 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 849 850 if (overwrite) 851 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 852 out: 853 return rc; 854 } 855 856 static int record__mmap_read_all(struct record *rec, bool synch) 857 { 858 int err; 859 860 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 861 if (err) 862 return err; 863 864 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 865 } 866 867 static void record__init_features(struct record *rec) 868 { 869 struct perf_session *session = rec->session; 870 int feat; 871 872 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 873 perf_header__set_feat(&session->header, feat); 874 875 if (rec->no_buildid) 876 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 877 878 if (!have_tracepoints(&rec->evlist->entries)) 879 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 880 881 if (!rec->opts.branch_stack) 882 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 883 884 if (!rec->opts.full_auxtrace) 885 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 886 887 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 888 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 889 890 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 891 892 perf_header__clear_feat(&session->header, HEADER_STAT); 893 } 894 895 static void 896 record__finish_output(struct record *rec) 897 { 898 struct perf_data *data = &rec->data; 899 int fd = perf_data__fd(data); 900 901 if (data->is_pipe) 902 return; 903 904 rec->session->header.data_size += rec->bytes_written; 905 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 906 907 if (!rec->no_buildid) { 908 process_buildids(rec); 909 910 if (rec->buildid_all) 911 dsos__hit_all(rec->session); 912 } 913 perf_session__write_header(rec->session, rec->evlist, fd, true); 914 915 return; 916 } 917 918 static int record__synthesize_workload(struct record *rec, bool tail) 919 { 920 int err; 921 struct thread_map *thread_map; 922 923 if (rec->opts.tail_synthesize != tail) 924 return 0; 925 926 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 927 if (thread_map == NULL) 928 return -1; 929 930 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 931 process_synthesized_event, 932 &rec->session->machines.host, 933 rec->opts.sample_address); 934 thread_map__put(thread_map); 935 return err; 936 } 937 938 static int record__synthesize(struct record *rec, bool tail); 939 940 static int 941 record__switch_output(struct record *rec, bool at_exit) 942 { 943 struct perf_data *data = &rec->data; 944 int fd, err; 945 char *new_filename; 946 947 /* Same Size: "2015122520103046"*/ 948 char timestamp[] = "InvalidTimestamp"; 949 950 record__aio_mmap_read_sync(rec); 951 952 record__synthesize(rec, true); 953 if (target__none(&rec->opts.target)) 954 record__synthesize_workload(rec, true); 955 956 rec->samples = 0; 957 record__finish_output(rec); 958 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 959 if (err) { 960 pr_err("Failed to get current timestamp\n"); 961 return -EINVAL; 962 } 963 964 fd = perf_data__switch(data, timestamp, 965 rec->session->header.data_offset, 966 at_exit, &new_filename); 967 if (fd >= 0 && !at_exit) { 968 rec->bytes_written = 0; 969 rec->session->header.data_size = 0; 970 } 971 972 if (!quiet) 973 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 974 data->path, timestamp); 975 976 if (rec->switch_output.num_files) { 977 int n = rec->switch_output.cur_file + 1; 978 979 if (n >= rec->switch_output.num_files) 980 n = 0; 981 rec->switch_output.cur_file = n; 982 if (rec->switch_output.filenames[n]) { 983 remove(rec->switch_output.filenames[n]); 984 free(rec->switch_output.filenames[n]); 985 } 986 rec->switch_output.filenames[n] = new_filename; 987 } else { 988 free(new_filename); 989 } 990 991 /* Output tracking events */ 992 if (!at_exit) { 993 record__synthesize(rec, false); 994 995 /* 996 * In 'perf record --switch-output' without -a, 997 * record__synthesize() in record__switch_output() won't 998 * generate tracking events because there's no thread_map 999 * in evlist. Which causes newly created perf.data doesn't 1000 * contain map and comm information. 1001 * Create a fake thread_map and directly call 1002 * perf_event__synthesize_thread_map() for those events. 1003 */ 1004 if (target__none(&rec->opts.target)) 1005 record__synthesize_workload(rec, false); 1006 } 1007 return fd; 1008 } 1009 1010 static volatile int workload_exec_errno; 1011 1012 /* 1013 * perf_evlist__prepare_workload will send a SIGUSR1 1014 * if the fork fails, since we asked by setting its 1015 * want_signal to true. 1016 */ 1017 static void workload_exec_failed_signal(int signo __maybe_unused, 1018 siginfo_t *info, 1019 void *ucontext __maybe_unused) 1020 { 1021 workload_exec_errno = info->si_value.sival_int; 1022 done = 1; 1023 child_finished = 1; 1024 } 1025 1026 static void snapshot_sig_handler(int sig); 1027 static void alarm_sig_handler(int sig); 1028 1029 int __weak 1030 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 1031 struct perf_tool *tool __maybe_unused, 1032 perf_event__handler_t process __maybe_unused, 1033 struct machine *machine __maybe_unused) 1034 { 1035 return 0; 1036 } 1037 1038 static const struct perf_event_mmap_page * 1039 perf_evlist__pick_pc(struct perf_evlist *evlist) 1040 { 1041 if (evlist) { 1042 if (evlist->mmap && evlist->mmap[0].base) 1043 return evlist->mmap[0].base; 1044 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 1045 return evlist->overwrite_mmap[0].base; 1046 } 1047 return NULL; 1048 } 1049 1050 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1051 { 1052 const struct perf_event_mmap_page *pc; 1053 1054 pc = perf_evlist__pick_pc(rec->evlist); 1055 if (pc) 1056 return pc; 1057 return NULL; 1058 } 1059 1060 static int record__synthesize(struct record *rec, bool tail) 1061 { 1062 struct perf_session *session = rec->session; 1063 struct machine *machine = &session->machines.host; 1064 struct perf_data *data = &rec->data; 1065 struct record_opts *opts = &rec->opts; 1066 struct perf_tool *tool = &rec->tool; 1067 int fd = perf_data__fd(data); 1068 int err = 0; 1069 1070 if (rec->opts.tail_synthesize != tail) 1071 return 0; 1072 1073 if (data->is_pipe) { 1074 /* 1075 * We need to synthesize events first, because some 1076 * features works on top of them (on report side). 1077 */ 1078 err = perf_event__synthesize_attrs(tool, rec->evlist, 1079 process_synthesized_event); 1080 if (err < 0) { 1081 pr_err("Couldn't synthesize attrs.\n"); 1082 goto out; 1083 } 1084 1085 err = perf_event__synthesize_features(tool, session, rec->evlist, 1086 process_synthesized_event); 1087 if (err < 0) { 1088 pr_err("Couldn't synthesize features.\n"); 1089 return err; 1090 } 1091 1092 if (have_tracepoints(&rec->evlist->entries)) { 1093 /* 1094 * FIXME err <= 0 here actually means that 1095 * there were no tracepoints so its not really 1096 * an error, just that we don't need to 1097 * synthesize anything. We really have to 1098 * return this more properly and also 1099 * propagate errors that now are calling die() 1100 */ 1101 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 1102 process_synthesized_event); 1103 if (err <= 0) { 1104 pr_err("Couldn't record tracing data.\n"); 1105 goto out; 1106 } 1107 rec->bytes_written += err; 1108 } 1109 } 1110 1111 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1112 process_synthesized_event, machine); 1113 if (err) 1114 goto out; 1115 1116 if (rec->opts.full_auxtrace) { 1117 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1118 session, process_synthesized_event); 1119 if (err) 1120 goto out; 1121 } 1122 1123 if (!perf_evlist__exclude_kernel(rec->evlist)) { 1124 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1125 machine); 1126 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1127 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1128 "Check /proc/kallsyms permission or run as root.\n"); 1129 1130 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1131 machine); 1132 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1133 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1134 "Check /proc/modules permission or run as root.\n"); 1135 } 1136 1137 if (perf_guest) { 1138 machines__process_guests(&session->machines, 1139 perf_event__synthesize_guest_os, tool); 1140 } 1141 1142 err = perf_event__synthesize_extra_attr(&rec->tool, 1143 rec->evlist, 1144 process_synthesized_event, 1145 data->is_pipe); 1146 if (err) 1147 goto out; 1148 1149 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 1150 process_synthesized_event, 1151 NULL); 1152 if (err < 0) { 1153 pr_err("Couldn't synthesize thread map.\n"); 1154 return err; 1155 } 1156 1157 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 1158 process_synthesized_event, NULL); 1159 if (err < 0) { 1160 pr_err("Couldn't synthesize cpu map.\n"); 1161 return err; 1162 } 1163 1164 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1165 machine, opts); 1166 if (err < 0) 1167 pr_warning("Couldn't synthesize bpf events.\n"); 1168 1169 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 1170 process_synthesized_event, opts->sample_address, 1171 1); 1172 out: 1173 return err; 1174 } 1175 1176 static int __cmd_record(struct record *rec, int argc, const char **argv) 1177 { 1178 int err; 1179 int status = 0; 1180 unsigned long waking = 0; 1181 const bool forks = argc > 0; 1182 struct perf_tool *tool = &rec->tool; 1183 struct record_opts *opts = &rec->opts; 1184 struct perf_data *data = &rec->data; 1185 struct perf_session *session; 1186 bool disabled = false, draining = false; 1187 struct perf_evlist *sb_evlist = NULL; 1188 int fd; 1189 1190 atexit(record__sig_exit); 1191 signal(SIGCHLD, sig_handler); 1192 signal(SIGINT, sig_handler); 1193 signal(SIGTERM, sig_handler); 1194 signal(SIGSEGV, sigsegv_handler); 1195 1196 if (rec->opts.record_namespaces) 1197 tool->namespace_events = true; 1198 1199 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1200 signal(SIGUSR2, snapshot_sig_handler); 1201 if (rec->opts.auxtrace_snapshot_mode) 1202 trigger_on(&auxtrace_snapshot_trigger); 1203 if (rec->switch_output.enabled) 1204 trigger_on(&switch_output_trigger); 1205 } else { 1206 signal(SIGUSR2, SIG_IGN); 1207 } 1208 1209 session = perf_session__new(data, false, tool); 1210 if (session == NULL) { 1211 pr_err("Perf session creation failed.\n"); 1212 return -1; 1213 } 1214 1215 fd = perf_data__fd(data); 1216 rec->session = session; 1217 1218 record__init_features(rec); 1219 1220 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1221 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 1222 1223 if (forks) { 1224 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 1225 argv, data->is_pipe, 1226 workload_exec_failed_signal); 1227 if (err < 0) { 1228 pr_err("Couldn't run the workload!\n"); 1229 status = err; 1230 goto out_delete_session; 1231 } 1232 } 1233 1234 /* 1235 * If we have just single event and are sending data 1236 * through pipe, we need to force the ids allocation, 1237 * because we synthesize event name through the pipe 1238 * and need the id for that. 1239 */ 1240 if (data->is_pipe && rec->evlist->nr_entries == 1) 1241 rec->opts.sample_id = true; 1242 1243 if (record__open(rec) != 0) { 1244 err = -1; 1245 goto out_child; 1246 } 1247 1248 err = bpf__apply_obj_config(); 1249 if (err) { 1250 char errbuf[BUFSIZ]; 1251 1252 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1253 pr_err("ERROR: Apply config to BPF failed: %s\n", 1254 errbuf); 1255 goto out_child; 1256 } 1257 1258 /* 1259 * Normally perf_session__new would do this, but it doesn't have the 1260 * evlist. 1261 */ 1262 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 1263 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1264 rec->tool.ordered_events = false; 1265 } 1266 1267 if (!rec->evlist->nr_groups) 1268 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1269 1270 if (data->is_pipe) { 1271 err = perf_header__write_pipe(fd); 1272 if (err < 0) 1273 goto out_child; 1274 } else { 1275 err = perf_session__write_header(session, rec->evlist, fd, false); 1276 if (err < 0) 1277 goto out_child; 1278 } 1279 1280 if (!rec->no_buildid 1281 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1282 pr_err("Couldn't generate buildids. " 1283 "Use --no-buildid to profile anyway.\n"); 1284 err = -1; 1285 goto out_child; 1286 } 1287 1288 if (!opts->no_bpf_event) 1289 bpf_event__add_sb_event(&sb_evlist, &session->header.env); 1290 1291 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { 1292 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1293 opts->no_bpf_event = true; 1294 } 1295 1296 err = record__synthesize(rec, false); 1297 if (err < 0) 1298 goto out_child; 1299 1300 if (rec->realtime_prio) { 1301 struct sched_param param; 1302 1303 param.sched_priority = rec->realtime_prio; 1304 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1305 pr_err("Could not set realtime priority.\n"); 1306 err = -1; 1307 goto out_child; 1308 } 1309 } 1310 1311 /* 1312 * When perf is starting the traced process, all the events 1313 * (apart from group members) have enable_on_exec=1 set, 1314 * so don't spoil it by prematurely enabling them. 1315 */ 1316 if (!target__none(&opts->target) && !opts->initial_delay) 1317 perf_evlist__enable(rec->evlist); 1318 1319 /* 1320 * Let the child rip 1321 */ 1322 if (forks) { 1323 struct machine *machine = &session->machines.host; 1324 union perf_event *event; 1325 pid_t tgid; 1326 1327 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1328 if (event == NULL) { 1329 err = -ENOMEM; 1330 goto out_child; 1331 } 1332 1333 /* 1334 * Some H/W events are generated before COMM event 1335 * which is emitted during exec(), so perf script 1336 * cannot see a correct process name for those events. 1337 * Synthesize COMM event to prevent it. 1338 */ 1339 tgid = perf_event__synthesize_comm(tool, event, 1340 rec->evlist->workload.pid, 1341 process_synthesized_event, 1342 machine); 1343 free(event); 1344 1345 if (tgid == -1) 1346 goto out_child; 1347 1348 event = malloc(sizeof(event->namespaces) + 1349 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1350 machine->id_hdr_size); 1351 if (event == NULL) { 1352 err = -ENOMEM; 1353 goto out_child; 1354 } 1355 1356 /* 1357 * Synthesize NAMESPACES event for the command specified. 1358 */ 1359 perf_event__synthesize_namespaces(tool, event, 1360 rec->evlist->workload.pid, 1361 tgid, process_synthesized_event, 1362 machine); 1363 free(event); 1364 1365 perf_evlist__start_workload(rec->evlist); 1366 } 1367 1368 if (opts->initial_delay) { 1369 usleep(opts->initial_delay * USEC_PER_MSEC); 1370 perf_evlist__enable(rec->evlist); 1371 } 1372 1373 trigger_ready(&auxtrace_snapshot_trigger); 1374 trigger_ready(&switch_output_trigger); 1375 perf_hooks__invoke_record_start(); 1376 for (;;) { 1377 unsigned long long hits = rec->samples; 1378 1379 /* 1380 * rec->evlist->bkw_mmap_state is possible to be 1381 * BKW_MMAP_EMPTY here: when done == true and 1382 * hits != rec->samples in previous round. 1383 * 1384 * perf_evlist__toggle_bkw_mmap ensure we never 1385 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1386 */ 1387 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1388 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1389 1390 if (record__mmap_read_all(rec, false) < 0) { 1391 trigger_error(&auxtrace_snapshot_trigger); 1392 trigger_error(&switch_output_trigger); 1393 err = -1; 1394 goto out_child; 1395 } 1396 1397 if (auxtrace_record__snapshot_started) { 1398 auxtrace_record__snapshot_started = 0; 1399 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1400 record__read_auxtrace_snapshot(rec); 1401 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1402 pr_err("AUX area tracing snapshot failed\n"); 1403 err = -1; 1404 goto out_child; 1405 } 1406 } 1407 1408 if (trigger_is_hit(&switch_output_trigger)) { 1409 /* 1410 * If switch_output_trigger is hit, the data in 1411 * overwritable ring buffer should have been collected, 1412 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1413 * 1414 * If SIGUSR2 raise after or during record__mmap_read_all(), 1415 * record__mmap_read_all() didn't collect data from 1416 * overwritable ring buffer. Read again. 1417 */ 1418 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1419 continue; 1420 trigger_ready(&switch_output_trigger); 1421 1422 /* 1423 * Reenable events in overwrite ring buffer after 1424 * record__mmap_read_all(): we should have collected 1425 * data from it. 1426 */ 1427 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1428 1429 if (!quiet) 1430 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1431 waking); 1432 waking = 0; 1433 fd = record__switch_output(rec, false); 1434 if (fd < 0) { 1435 pr_err("Failed to switch to new file\n"); 1436 trigger_error(&switch_output_trigger); 1437 err = fd; 1438 goto out_child; 1439 } 1440 1441 /* re-arm the alarm */ 1442 if (rec->switch_output.time) 1443 alarm(rec->switch_output.time); 1444 } 1445 1446 if (hits == rec->samples) { 1447 if (done || draining) 1448 break; 1449 err = perf_evlist__poll(rec->evlist, -1); 1450 /* 1451 * Propagate error, only if there's any. Ignore positive 1452 * number of returned events and interrupt error. 1453 */ 1454 if (err > 0 || (err < 0 && errno == EINTR)) 1455 err = 0; 1456 waking++; 1457 1458 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1459 draining = true; 1460 } 1461 1462 /* 1463 * When perf is starting the traced process, at the end events 1464 * die with the process and we wait for that. Thus no need to 1465 * disable events in this case. 1466 */ 1467 if (done && !disabled && !target__none(&opts->target)) { 1468 trigger_off(&auxtrace_snapshot_trigger); 1469 perf_evlist__disable(rec->evlist); 1470 disabled = true; 1471 } 1472 } 1473 trigger_off(&auxtrace_snapshot_trigger); 1474 trigger_off(&switch_output_trigger); 1475 1476 if (forks && workload_exec_errno) { 1477 char msg[STRERR_BUFSIZE]; 1478 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1479 pr_err("Workload failed: %s\n", emsg); 1480 err = -1; 1481 goto out_child; 1482 } 1483 1484 if (!quiet) 1485 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1486 1487 if (target__none(&rec->opts.target)) 1488 record__synthesize_workload(rec, true); 1489 1490 out_child: 1491 record__mmap_read_all(rec, true); 1492 record__aio_mmap_read_sync(rec); 1493 1494 if (forks) { 1495 int exit_status; 1496 1497 if (!child_finished) 1498 kill(rec->evlist->workload.pid, SIGTERM); 1499 1500 wait(&exit_status); 1501 1502 if (err < 0) 1503 status = err; 1504 else if (WIFEXITED(exit_status)) 1505 status = WEXITSTATUS(exit_status); 1506 else if (WIFSIGNALED(exit_status)) 1507 signr = WTERMSIG(exit_status); 1508 } else 1509 status = err; 1510 1511 record__synthesize(rec, true); 1512 /* this will be recalculated during process_buildids() */ 1513 rec->samples = 0; 1514 1515 if (!err) { 1516 if (!rec->timestamp_filename) { 1517 record__finish_output(rec); 1518 } else { 1519 fd = record__switch_output(rec, true); 1520 if (fd < 0) { 1521 status = fd; 1522 goto out_delete_session; 1523 } 1524 } 1525 } 1526 1527 perf_hooks__invoke_record_end(); 1528 1529 if (!err && !quiet) { 1530 char samples[128]; 1531 const char *postfix = rec->timestamp_filename ? 1532 ".<timestamp>" : ""; 1533 1534 if (rec->samples && !rec->opts.full_auxtrace) 1535 scnprintf(samples, sizeof(samples), 1536 " (%" PRIu64 " samples)", rec->samples); 1537 else 1538 samples[0] = '\0'; 1539 1540 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1541 perf_data__size(data) / 1024.0 / 1024.0, 1542 data->path, postfix, samples); 1543 } 1544 1545 out_delete_session: 1546 perf_session__delete(session); 1547 1548 if (!opts->no_bpf_event) 1549 perf_evlist__stop_sb_thread(sb_evlist); 1550 return status; 1551 } 1552 1553 static void callchain_debug(struct callchain_param *callchain) 1554 { 1555 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1556 1557 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1558 1559 if (callchain->record_mode == CALLCHAIN_DWARF) 1560 pr_debug("callchain: stack dump size %d\n", 1561 callchain->dump_size); 1562 } 1563 1564 int record_opts__parse_callchain(struct record_opts *record, 1565 struct callchain_param *callchain, 1566 const char *arg, bool unset) 1567 { 1568 int ret; 1569 callchain->enabled = !unset; 1570 1571 /* --no-call-graph */ 1572 if (unset) { 1573 callchain->record_mode = CALLCHAIN_NONE; 1574 pr_debug("callchain: disabled\n"); 1575 return 0; 1576 } 1577 1578 ret = parse_callchain_record_opt(arg, callchain); 1579 if (!ret) { 1580 /* Enable data address sampling for DWARF unwind. */ 1581 if (callchain->record_mode == CALLCHAIN_DWARF) 1582 record->sample_address = true; 1583 callchain_debug(callchain); 1584 } 1585 1586 return ret; 1587 } 1588 1589 int record_parse_callchain_opt(const struct option *opt, 1590 const char *arg, 1591 int unset) 1592 { 1593 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1594 } 1595 1596 int record_callchain_opt(const struct option *opt, 1597 const char *arg __maybe_unused, 1598 int unset __maybe_unused) 1599 { 1600 struct callchain_param *callchain = opt->value; 1601 1602 callchain->enabled = true; 1603 1604 if (callchain->record_mode == CALLCHAIN_NONE) 1605 callchain->record_mode = CALLCHAIN_FP; 1606 1607 callchain_debug(callchain); 1608 return 0; 1609 } 1610 1611 static int perf_record_config(const char *var, const char *value, void *cb) 1612 { 1613 struct record *rec = cb; 1614 1615 if (!strcmp(var, "record.build-id")) { 1616 if (!strcmp(value, "cache")) 1617 rec->no_buildid_cache = false; 1618 else if (!strcmp(value, "no-cache")) 1619 rec->no_buildid_cache = true; 1620 else if (!strcmp(value, "skip")) 1621 rec->no_buildid = true; 1622 else 1623 return -1; 1624 return 0; 1625 } 1626 if (!strcmp(var, "record.call-graph")) { 1627 var = "call-graph.record-mode"; 1628 return perf_default_config(var, value, cb); 1629 } 1630 #ifdef HAVE_AIO_SUPPORT 1631 if (!strcmp(var, "record.aio")) { 1632 rec->opts.nr_cblocks = strtol(value, NULL, 0); 1633 if (!rec->opts.nr_cblocks) 1634 rec->opts.nr_cblocks = nr_cblocks_default; 1635 } 1636 #endif 1637 1638 return 0; 1639 } 1640 1641 struct clockid_map { 1642 const char *name; 1643 int clockid; 1644 }; 1645 1646 #define CLOCKID_MAP(n, c) \ 1647 { .name = n, .clockid = (c), } 1648 1649 #define CLOCKID_END { .name = NULL, } 1650 1651 1652 /* 1653 * Add the missing ones, we need to build on many distros... 1654 */ 1655 #ifndef CLOCK_MONOTONIC_RAW 1656 #define CLOCK_MONOTONIC_RAW 4 1657 #endif 1658 #ifndef CLOCK_BOOTTIME 1659 #define CLOCK_BOOTTIME 7 1660 #endif 1661 #ifndef CLOCK_TAI 1662 #define CLOCK_TAI 11 1663 #endif 1664 1665 static const struct clockid_map clockids[] = { 1666 /* available for all events, NMI safe */ 1667 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1668 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1669 1670 /* available for some events */ 1671 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1672 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1673 CLOCKID_MAP("tai", CLOCK_TAI), 1674 1675 /* available for the lazy */ 1676 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1677 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1678 CLOCKID_MAP("real", CLOCK_REALTIME), 1679 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1680 1681 CLOCKID_END, 1682 }; 1683 1684 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1685 { 1686 struct timespec res; 1687 1688 *res_ns = 0; 1689 if (!clock_getres(clk_id, &res)) 1690 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1691 else 1692 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1693 1694 return 0; 1695 } 1696 1697 static int parse_clockid(const struct option *opt, const char *str, int unset) 1698 { 1699 struct record_opts *opts = (struct record_opts *)opt->value; 1700 const struct clockid_map *cm; 1701 const char *ostr = str; 1702 1703 if (unset) { 1704 opts->use_clockid = 0; 1705 return 0; 1706 } 1707 1708 /* no arg passed */ 1709 if (!str) 1710 return 0; 1711 1712 /* no setting it twice */ 1713 if (opts->use_clockid) 1714 return -1; 1715 1716 opts->use_clockid = true; 1717 1718 /* if its a number, we're done */ 1719 if (sscanf(str, "%d", &opts->clockid) == 1) 1720 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1721 1722 /* allow a "CLOCK_" prefix to the name */ 1723 if (!strncasecmp(str, "CLOCK_", 6)) 1724 str += 6; 1725 1726 for (cm = clockids; cm->name; cm++) { 1727 if (!strcasecmp(str, cm->name)) { 1728 opts->clockid = cm->clockid; 1729 return get_clockid_res(opts->clockid, 1730 &opts->clockid_res_ns); 1731 } 1732 } 1733 1734 opts->use_clockid = false; 1735 ui__warning("unknown clockid %s, check man page\n", ostr); 1736 return -1; 1737 } 1738 1739 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 1740 { 1741 struct record_opts *opts = (struct record_opts *)opt->value; 1742 1743 if (unset || !str) 1744 return 0; 1745 1746 if (!strcasecmp(str, "node")) 1747 opts->affinity = PERF_AFFINITY_NODE; 1748 else if (!strcasecmp(str, "cpu")) 1749 opts->affinity = PERF_AFFINITY_CPU; 1750 1751 return 0; 1752 } 1753 1754 static int record__parse_mmap_pages(const struct option *opt, 1755 const char *str, 1756 int unset __maybe_unused) 1757 { 1758 struct record_opts *opts = opt->value; 1759 char *s, *p; 1760 unsigned int mmap_pages; 1761 int ret; 1762 1763 if (!str) 1764 return -EINVAL; 1765 1766 s = strdup(str); 1767 if (!s) 1768 return -ENOMEM; 1769 1770 p = strchr(s, ','); 1771 if (p) 1772 *p = '\0'; 1773 1774 if (*s) { 1775 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1776 if (ret) 1777 goto out_free; 1778 opts->mmap_pages = mmap_pages; 1779 } 1780 1781 if (!p) { 1782 ret = 0; 1783 goto out_free; 1784 } 1785 1786 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1787 if (ret) 1788 goto out_free; 1789 1790 opts->auxtrace_mmap_pages = mmap_pages; 1791 1792 out_free: 1793 free(s); 1794 return ret; 1795 } 1796 1797 static void switch_output_size_warn(struct record *rec) 1798 { 1799 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1800 struct switch_output *s = &rec->switch_output; 1801 1802 wakeup_size /= 2; 1803 1804 if (s->size < wakeup_size) { 1805 char buf[100]; 1806 1807 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1808 pr_warning("WARNING: switch-output data size lower than " 1809 "wakeup kernel buffer size (%s) " 1810 "expect bigger perf.data sizes\n", buf); 1811 } 1812 } 1813 1814 static int switch_output_setup(struct record *rec) 1815 { 1816 struct switch_output *s = &rec->switch_output; 1817 static struct parse_tag tags_size[] = { 1818 { .tag = 'B', .mult = 1 }, 1819 { .tag = 'K', .mult = 1 << 10 }, 1820 { .tag = 'M', .mult = 1 << 20 }, 1821 { .tag = 'G', .mult = 1 << 30 }, 1822 { .tag = 0 }, 1823 }; 1824 static struct parse_tag tags_time[] = { 1825 { .tag = 's', .mult = 1 }, 1826 { .tag = 'm', .mult = 60 }, 1827 { .tag = 'h', .mult = 60*60 }, 1828 { .tag = 'd', .mult = 60*60*24 }, 1829 { .tag = 0 }, 1830 }; 1831 unsigned long val; 1832 1833 if (!s->set) 1834 return 0; 1835 1836 if (!strcmp(s->str, "signal")) { 1837 s->signal = true; 1838 pr_debug("switch-output with SIGUSR2 signal\n"); 1839 goto enabled; 1840 } 1841 1842 val = parse_tag_value(s->str, tags_size); 1843 if (val != (unsigned long) -1) { 1844 s->size = val; 1845 pr_debug("switch-output with %s size threshold\n", s->str); 1846 goto enabled; 1847 } 1848 1849 val = parse_tag_value(s->str, tags_time); 1850 if (val != (unsigned long) -1) { 1851 s->time = val; 1852 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 1853 s->str, s->time); 1854 goto enabled; 1855 } 1856 1857 return -1; 1858 1859 enabled: 1860 rec->timestamp_filename = true; 1861 s->enabled = true; 1862 1863 if (s->size && !rec->opts.no_buffering) 1864 switch_output_size_warn(rec); 1865 1866 return 0; 1867 } 1868 1869 static const char * const __record_usage[] = { 1870 "perf record [<options>] [<command>]", 1871 "perf record [<options>] -- <command> [<options>]", 1872 NULL 1873 }; 1874 const char * const *record_usage = __record_usage; 1875 1876 /* 1877 * XXX Ideally would be local to cmd_record() and passed to a record__new 1878 * because we need to have access to it in record__exit, that is called 1879 * after cmd_record() exits, but since record_options need to be accessible to 1880 * builtin-script, leave it here. 1881 * 1882 * At least we don't ouch it in all the other functions here directly. 1883 * 1884 * Just say no to tons of global variables, sigh. 1885 */ 1886 static struct record record = { 1887 .opts = { 1888 .sample_time = true, 1889 .mmap_pages = UINT_MAX, 1890 .user_freq = UINT_MAX, 1891 .user_interval = ULLONG_MAX, 1892 .freq = 4000, 1893 .target = { 1894 .uses_mmap = true, 1895 .default_per_cpu = true, 1896 }, 1897 .mmap_flush = MMAP_FLUSH_DEFAULT, 1898 }, 1899 .tool = { 1900 .sample = process_sample_event, 1901 .fork = perf_event__process_fork, 1902 .exit = perf_event__process_exit, 1903 .comm = perf_event__process_comm, 1904 .namespaces = perf_event__process_namespaces, 1905 .mmap = perf_event__process_mmap, 1906 .mmap2 = perf_event__process_mmap2, 1907 .ordered_events = true, 1908 }, 1909 }; 1910 1911 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 1912 "\n\t\t\t\tDefault: fp"; 1913 1914 static bool dry_run; 1915 1916 /* 1917 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 1918 * with it and switch to use the library functions in perf_evlist that came 1919 * from builtin-record.c, i.e. use record_opts, 1920 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 1921 * using pipes, etc. 1922 */ 1923 static struct option __record_options[] = { 1924 OPT_CALLBACK('e', "event", &record.evlist, "event", 1925 "event selector. use 'perf list' to list available events", 1926 parse_events_option), 1927 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 1928 "event filter", parse_filter), 1929 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 1930 NULL, "don't record events from perf itself", 1931 exclude_perf), 1932 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 1933 "record events on existing process id"), 1934 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 1935 "record events on existing thread id"), 1936 OPT_INTEGER('r', "realtime", &record.realtime_prio, 1937 "collect data with this RT SCHED_FIFO priority"), 1938 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 1939 "collect data without buffering"), 1940 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 1941 "collect raw sample records from all opened counters"), 1942 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 1943 "system-wide collection from all CPUs"), 1944 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1945 "list of cpus to monitor"), 1946 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1947 OPT_STRING('o', "output", &record.data.path, "file", 1948 "output file name"), 1949 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1950 &record.opts.no_inherit_set, 1951 "child tasks do not inherit counters"), 1952 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1953 "synthesize non-sample events at the end of output"), 1954 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1955 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), 1956 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 1957 "Fail if the specified frequency can't be used"), 1958 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 1959 "profile at this frequency", 1960 record__parse_freq), 1961 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 1962 "number of mmap data pages and AUX area tracing mmap pages", 1963 record__parse_mmap_pages), 1964 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 1965 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 1966 record__mmap_flush_parse), 1967 OPT_BOOLEAN(0, "group", &record.opts.group, 1968 "put the counters into a counter group"), 1969 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 1970 NULL, "enables call-graph recording" , 1971 &record_callchain_opt), 1972 OPT_CALLBACK(0, "call-graph", &record.opts, 1973 "record_mode[,record_size]", record_callchain_help, 1974 &record_parse_callchain_opt), 1975 OPT_INCR('v', "verbose", &verbose, 1976 "be more verbose (show counter open errors, etc)"), 1977 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 1978 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 1979 "per thread counts"), 1980 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 1981 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 1982 "Record the sample physical addresses"), 1983 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 1984 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 1985 &record.opts.sample_time_set, 1986 "Record the sample timestamps"), 1987 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 1988 "Record the sample period"), 1989 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 1990 "don't sample"), 1991 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 1992 &record.no_buildid_cache_set, 1993 "do not update the buildid cache"), 1994 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 1995 &record.no_buildid_set, 1996 "do not collect buildids in perf.data"), 1997 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 1998 "monitor event in cgroup name only", 1999 parse_cgroups), 2000 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 2001 "ms to wait before starting measurement after program start"), 2002 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2003 "user to profile"), 2004 2005 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2006 "branch any", "sample any taken branches", 2007 parse_branch_stack), 2008 2009 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2010 "branch filter mask", "branch stack filter modes", 2011 parse_branch_stack), 2012 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2013 "sample by weight (on special events only)"), 2014 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2015 "sample transaction flags (special events only)"), 2016 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2017 "use per-thread mmaps"), 2018 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2019 "sample selected machine registers on interrupt," 2020 " use -I ? to list register names", parse_regs), 2021 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2022 "sample selected machine registers on interrupt," 2023 " use -I ? to list register names", parse_regs), 2024 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2025 "Record running/enabled time of read (:S) events"), 2026 OPT_CALLBACK('k', "clockid", &record.opts, 2027 "clockid", "clockid to use for events, see clock_gettime()", 2028 parse_clockid), 2029 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2030 "opts", "AUX area tracing Snapshot Mode", ""), 2031 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2032 "per thread proc mmap processing timeout in ms"), 2033 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2034 "Record namespaces events"), 2035 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2036 "Record context switch events"), 2037 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2038 "Configure all used events to run in kernel space.", 2039 PARSE_OPT_EXCLUSIVE), 2040 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2041 "Configure all used events to run in user space.", 2042 PARSE_OPT_EXCLUSIVE), 2043 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2044 "clang binary to use for compiling BPF scriptlets"), 2045 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2046 "options passed to clang when compiling BPF scriptlets"), 2047 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2048 "file", "vmlinux pathname"), 2049 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2050 "Record build-id of all DSOs regardless of hits"), 2051 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2052 "append timestamp to output filename"), 2053 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2054 "Record timestamp boundary (time of first/last samples)"), 2055 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2056 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2057 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2058 "signal"), 2059 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2060 "Limit number of switch output generated files"), 2061 OPT_BOOLEAN(0, "dry-run", &dry_run, 2062 "Parse options then exit"), 2063 #ifdef HAVE_AIO_SUPPORT 2064 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2065 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2066 record__aio_parse), 2067 #endif 2068 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2069 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2070 record__parse_affinity), 2071 OPT_END() 2072 }; 2073 2074 struct option *record_options = __record_options; 2075 2076 int cmd_record(int argc, const char **argv) 2077 { 2078 int err; 2079 struct record *rec = &record; 2080 char errbuf[BUFSIZ]; 2081 2082 setlocale(LC_ALL, ""); 2083 2084 #ifndef HAVE_LIBBPF_SUPPORT 2085 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2086 set_nobuild('\0', "clang-path", true); 2087 set_nobuild('\0', "clang-opt", true); 2088 # undef set_nobuild 2089 #endif 2090 2091 #ifndef HAVE_BPF_PROLOGUE 2092 # if !defined (HAVE_DWARF_SUPPORT) 2093 # define REASON "NO_DWARF=1" 2094 # elif !defined (HAVE_LIBBPF_SUPPORT) 2095 # define REASON "NO_LIBBPF=1" 2096 # else 2097 # define REASON "this architecture doesn't support BPF prologue" 2098 # endif 2099 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2100 set_nobuild('\0', "vmlinux", true); 2101 # undef set_nobuild 2102 # undef REASON 2103 #endif 2104 2105 CPU_ZERO(&rec->affinity_mask); 2106 rec->opts.affinity = PERF_AFFINITY_SYS; 2107 2108 rec->evlist = perf_evlist__new(); 2109 if (rec->evlist == NULL) 2110 return -ENOMEM; 2111 2112 err = perf_config(perf_record_config, rec); 2113 if (err) 2114 return err; 2115 2116 argc = parse_options(argc, argv, record_options, record_usage, 2117 PARSE_OPT_STOP_AT_NON_OPTION); 2118 if (quiet) 2119 perf_quiet_option(); 2120 2121 /* Make system wide (-a) the default target. */ 2122 if (!argc && target__none(&rec->opts.target)) 2123 rec->opts.target.system_wide = true; 2124 2125 if (nr_cgroups && !rec->opts.target.system_wide) { 2126 usage_with_options_msg(record_usage, record_options, 2127 "cgroup monitoring only available in system-wide mode"); 2128 2129 } 2130 if (rec->opts.record_switch_events && 2131 !perf_can_record_switch_events()) { 2132 ui__error("kernel does not support recording context switch events\n"); 2133 parse_options_usage(record_usage, record_options, "switch-events", 0); 2134 return -EINVAL; 2135 } 2136 2137 if (switch_output_setup(rec)) { 2138 parse_options_usage(record_usage, record_options, "switch-output", 0); 2139 return -EINVAL; 2140 } 2141 2142 if (rec->switch_output.time) { 2143 signal(SIGALRM, alarm_sig_handler); 2144 alarm(rec->switch_output.time); 2145 } 2146 2147 if (rec->switch_output.num_files) { 2148 rec->switch_output.filenames = calloc(sizeof(char *), 2149 rec->switch_output.num_files); 2150 if (!rec->switch_output.filenames) 2151 return -EINVAL; 2152 } 2153 2154 /* 2155 * Allow aliases to facilitate the lookup of symbols for address 2156 * filters. Refer to auxtrace_parse_filters(). 2157 */ 2158 symbol_conf.allow_aliases = true; 2159 2160 symbol__init(NULL); 2161 2162 err = record__auxtrace_init(rec); 2163 if (err) 2164 goto out; 2165 2166 if (dry_run) 2167 goto out; 2168 2169 err = bpf__setup_stdout(rec->evlist); 2170 if (err) { 2171 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2172 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2173 errbuf); 2174 goto out; 2175 } 2176 2177 err = -ENOMEM; 2178 2179 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 2180 pr_warning( 2181 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 2182 "check /proc/sys/kernel/kptr_restrict.\n\n" 2183 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 2184 "file is not found in the buildid cache or in the vmlinux path.\n\n" 2185 "Samples in kernel modules won't be resolved at all.\n\n" 2186 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 2187 "even with a suitable vmlinux or kallsyms file.\n\n"); 2188 2189 if (rec->no_buildid_cache || rec->no_buildid) { 2190 disable_buildid_cache(); 2191 } else if (rec->switch_output.enabled) { 2192 /* 2193 * In 'perf record --switch-output', disable buildid 2194 * generation by default to reduce data file switching 2195 * overhead. Still generate buildid if they are required 2196 * explicitly using 2197 * 2198 * perf record --switch-output --no-no-buildid \ 2199 * --no-no-buildid-cache 2200 * 2201 * Following code equals to: 2202 * 2203 * if ((rec->no_buildid || !rec->no_buildid_set) && 2204 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2205 * disable_buildid_cache(); 2206 */ 2207 bool disable = true; 2208 2209 if (rec->no_buildid_set && !rec->no_buildid) 2210 disable = false; 2211 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2212 disable = false; 2213 if (disable) { 2214 rec->no_buildid = true; 2215 rec->no_buildid_cache = true; 2216 disable_buildid_cache(); 2217 } 2218 } 2219 2220 if (record.opts.overwrite) 2221 record.opts.tail_synthesize = true; 2222 2223 if (rec->evlist->nr_entries == 0 && 2224 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 2225 pr_err("Not enough memory for event selector list\n"); 2226 goto out; 2227 } 2228 2229 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 2230 rec->opts.no_inherit = true; 2231 2232 err = target__validate(&rec->opts.target); 2233 if (err) { 2234 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2235 ui__warning("%s\n", errbuf); 2236 } 2237 2238 err = target__parse_uid(&rec->opts.target); 2239 if (err) { 2240 int saved_errno = errno; 2241 2242 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2243 ui__error("%s", errbuf); 2244 2245 err = -saved_errno; 2246 goto out; 2247 } 2248 2249 /* Enable ignoring missing threads when -u/-p option is defined. */ 2250 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 2251 2252 err = -ENOMEM; 2253 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 2254 usage_with_options(record_usage, record_options); 2255 2256 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 2257 if (err) 2258 goto out; 2259 2260 /* 2261 * We take all buildids when the file contains 2262 * AUX area tracing data because we do not decode the 2263 * trace because it would take too long. 2264 */ 2265 if (rec->opts.full_auxtrace) 2266 rec->buildid_all = true; 2267 2268 if (record_opts__config(&rec->opts)) { 2269 err = -EINVAL; 2270 goto out; 2271 } 2272 2273 if (rec->opts.nr_cblocks > nr_cblocks_max) 2274 rec->opts.nr_cblocks = nr_cblocks_max; 2275 if (verbose > 0) 2276 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2277 2278 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2279 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2280 2281 err = __cmd_record(&record, argc, argv); 2282 out: 2283 perf_evlist__delete(rec->evlist); 2284 symbol__exit(); 2285 auxtrace_record__free(rec->itr); 2286 return err; 2287 } 2288 2289 static void snapshot_sig_handler(int sig __maybe_unused) 2290 { 2291 struct record *rec = &record; 2292 2293 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2294 trigger_hit(&auxtrace_snapshot_trigger); 2295 auxtrace_record__snapshot_started = 1; 2296 if (auxtrace_record__snapshot_start(record.itr)) 2297 trigger_error(&auxtrace_snapshot_trigger); 2298 } 2299 2300 if (switch_output_signal(rec)) 2301 trigger_hit(&switch_output_trigger); 2302 } 2303 2304 static void alarm_sig_handler(int sig __maybe_unused) 2305 { 2306 struct record *rec = &record; 2307 2308 if (switch_output_time(rec)) 2309 trigger_hit(&switch_output_trigger); 2310 } 2311