1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/target.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/record.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/perf_api_probe.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/clockid.h" 50 #include "util/pmu-hybrid.h" 51 #include "util/evlist-hybrid.h" 52 #include "asm/bug.h" 53 #include "perf.h" 54 55 #include <errno.h> 56 #include <inttypes.h> 57 #include <locale.h> 58 #include <poll.h> 59 #include <pthread.h> 60 #include <unistd.h> 61 #include <sched.h> 62 #include <signal.h> 63 #ifdef HAVE_EVENTFD_SUPPORT 64 #include <sys/eventfd.h> 65 #endif 66 #include <sys/mman.h> 67 #include <sys/wait.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <fcntl.h> 71 #include <linux/err.h> 72 #include <linux/string.h> 73 #include <linux/time64.h> 74 #include <linux/zalloc.h> 75 #include <linux/bitmap.h> 76 #include <sys/time.h> 77 78 struct switch_output { 79 bool enabled; 80 bool signal; 81 unsigned long size; 82 unsigned long time; 83 const char *str; 84 bool set; 85 char **filenames; 86 int num_files; 87 int cur_file; 88 }; 89 90 struct record { 91 struct perf_tool tool; 92 struct record_opts opts; 93 u64 bytes_written; 94 struct perf_data data; 95 struct auxtrace_record *itr; 96 struct evlist *evlist; 97 struct perf_session *session; 98 struct evlist *sb_evlist; 99 pthread_t thread_id; 100 int realtime_prio; 101 bool switch_output_event_set; 102 bool no_buildid; 103 bool no_buildid_set; 104 bool no_buildid_cache; 105 bool no_buildid_cache_set; 106 bool buildid_all; 107 bool buildid_mmap; 108 bool timestamp_filename; 109 bool timestamp_boundary; 110 struct switch_output switch_output; 111 unsigned long long samples; 112 struct mmap_cpu_mask affinity_mask; 113 unsigned long output_max_size; /* = 0: unlimited */ 114 struct perf_debuginfod debuginfod; 115 }; 116 117 static volatile int done; 118 119 static volatile int auxtrace_record__snapshot_started; 120 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 121 static DEFINE_TRIGGER(switch_output_trigger); 122 123 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 124 "SYS", "NODE", "CPU" 125 }; 126 127 static bool switch_output_signal(struct record *rec) 128 { 129 return rec->switch_output.signal && 130 trigger_is_ready(&switch_output_trigger); 131 } 132 133 static bool switch_output_size(struct record *rec) 134 { 135 return rec->switch_output.size && 136 trigger_is_ready(&switch_output_trigger) && 137 (rec->bytes_written >= rec->switch_output.size); 138 } 139 140 static bool switch_output_time(struct record *rec) 141 { 142 return rec->switch_output.time && 143 trigger_is_ready(&switch_output_trigger); 144 } 145 146 static bool record__output_max_size_exceeded(struct record *rec) 147 { 148 return rec->output_max_size && 149 (rec->bytes_written >= rec->output_max_size); 150 } 151 152 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 153 void *bf, size_t size) 154 { 155 struct perf_data_file *file = &rec->session->data->file; 156 157 if (perf_data_file__write(file, bf, size) < 0) { 158 pr_err("failed to write perf data, error: %m\n"); 159 return -1; 160 } 161 162 rec->bytes_written += size; 163 164 if (record__output_max_size_exceeded(rec) && !done) { 165 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 166 " stopping session ]\n", 167 rec->bytes_written >> 10); 168 done = 1; 169 } 170 171 if (switch_output_size(rec)) 172 trigger_hit(&switch_output_trigger); 173 174 return 0; 175 } 176 177 static int record__aio_enabled(struct record *rec); 178 static int record__comp_enabled(struct record *rec); 179 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 180 void *src, size_t src_size); 181 182 #ifdef HAVE_AIO_SUPPORT 183 static int record__aio_write(struct aiocb *cblock, int trace_fd, 184 void *buf, size_t size, off_t off) 185 { 186 int rc; 187 188 cblock->aio_fildes = trace_fd; 189 cblock->aio_buf = buf; 190 cblock->aio_nbytes = size; 191 cblock->aio_offset = off; 192 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 193 194 do { 195 rc = aio_write(cblock); 196 if (rc == 0) { 197 break; 198 } else if (errno != EAGAIN) { 199 cblock->aio_fildes = -1; 200 pr_err("failed to queue perf data, error: %m\n"); 201 break; 202 } 203 } while (1); 204 205 return rc; 206 } 207 208 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 209 { 210 void *rem_buf; 211 off_t rem_off; 212 size_t rem_size; 213 int rc, aio_errno; 214 ssize_t aio_ret, written; 215 216 aio_errno = aio_error(cblock); 217 if (aio_errno == EINPROGRESS) 218 return 0; 219 220 written = aio_ret = aio_return(cblock); 221 if (aio_ret < 0) { 222 if (aio_errno != EINTR) 223 pr_err("failed to write perf data, error: %m\n"); 224 written = 0; 225 } 226 227 rem_size = cblock->aio_nbytes - written; 228 229 if (rem_size == 0) { 230 cblock->aio_fildes = -1; 231 /* 232 * md->refcount is incremented in record__aio_pushfn() for 233 * every aio write request started in record__aio_push() so 234 * decrement it because the request is now complete. 235 */ 236 perf_mmap__put(&md->core); 237 rc = 1; 238 } else { 239 /* 240 * aio write request may require restart with the 241 * reminder if the kernel didn't write whole 242 * chunk at once. 243 */ 244 rem_off = cblock->aio_offset + written; 245 rem_buf = (void *)(cblock->aio_buf + written); 246 record__aio_write(cblock, cblock->aio_fildes, 247 rem_buf, rem_size, rem_off); 248 rc = 0; 249 } 250 251 return rc; 252 } 253 254 static int record__aio_sync(struct mmap *md, bool sync_all) 255 { 256 struct aiocb **aiocb = md->aio.aiocb; 257 struct aiocb *cblocks = md->aio.cblocks; 258 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 259 int i, do_suspend; 260 261 do { 262 do_suspend = 0; 263 for (i = 0; i < md->aio.nr_cblocks; ++i) { 264 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 265 if (sync_all) 266 aiocb[i] = NULL; 267 else 268 return i; 269 } else { 270 /* 271 * Started aio write is not complete yet 272 * so it has to be waited before the 273 * next allocation. 274 */ 275 aiocb[i] = &cblocks[i]; 276 do_suspend = 1; 277 } 278 } 279 if (!do_suspend) 280 return -1; 281 282 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 283 if (!(errno == EAGAIN || errno == EINTR)) 284 pr_err("failed to sync perf data, error: %m\n"); 285 } 286 } while (1); 287 } 288 289 struct record_aio { 290 struct record *rec; 291 void *data; 292 size_t size; 293 }; 294 295 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 296 { 297 struct record_aio *aio = to; 298 299 /* 300 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 301 * to release space in the kernel buffer as fast as possible, calling 302 * perf_mmap__consume() from perf_mmap__push() function. 303 * 304 * That lets the kernel to proceed with storing more profiling data into 305 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 306 * 307 * Coping can be done in two steps in case the chunk of profiling data 308 * crosses the upper bound of the kernel buffer. In this case we first move 309 * part of data from map->start till the upper bound and then the reminder 310 * from the beginning of the kernel buffer till the end of the data chunk. 311 */ 312 313 if (record__comp_enabled(aio->rec)) { 314 size = zstd_compress(aio->rec->session, aio->data + aio->size, 315 mmap__mmap_len(map) - aio->size, 316 buf, size); 317 } else { 318 memcpy(aio->data + aio->size, buf, size); 319 } 320 321 if (!aio->size) { 322 /* 323 * Increment map->refcount to guard map->aio.data[] buffer 324 * from premature deallocation because map object can be 325 * released earlier than aio write request started on 326 * map->aio.data[] buffer is complete. 327 * 328 * perf_mmap__put() is done at record__aio_complete() 329 * after started aio request completion or at record__aio_push() 330 * if the request failed to start. 331 */ 332 perf_mmap__get(&map->core); 333 } 334 335 aio->size += size; 336 337 return size; 338 } 339 340 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 341 { 342 int ret, idx; 343 int trace_fd = rec->session->data->file.fd; 344 struct record_aio aio = { .rec = rec, .size = 0 }; 345 346 /* 347 * Call record__aio_sync() to wait till map->aio.data[] buffer 348 * becomes available after previous aio write operation. 349 */ 350 351 idx = record__aio_sync(map, false); 352 aio.data = map->aio.data[idx]; 353 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 354 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 355 return ret; 356 357 rec->samples++; 358 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 359 if (!ret) { 360 *off += aio.size; 361 rec->bytes_written += aio.size; 362 if (switch_output_size(rec)) 363 trigger_hit(&switch_output_trigger); 364 } else { 365 /* 366 * Decrement map->refcount incremented in record__aio_pushfn() 367 * back if record__aio_write() operation failed to start, otherwise 368 * map->refcount is decremented in record__aio_complete() after 369 * aio write operation finishes successfully. 370 */ 371 perf_mmap__put(&map->core); 372 } 373 374 return ret; 375 } 376 377 static off_t record__aio_get_pos(int trace_fd) 378 { 379 return lseek(trace_fd, 0, SEEK_CUR); 380 } 381 382 static void record__aio_set_pos(int trace_fd, off_t pos) 383 { 384 lseek(trace_fd, pos, SEEK_SET); 385 } 386 387 static void record__aio_mmap_read_sync(struct record *rec) 388 { 389 int i; 390 struct evlist *evlist = rec->evlist; 391 struct mmap *maps = evlist->mmap; 392 393 if (!record__aio_enabled(rec)) 394 return; 395 396 for (i = 0; i < evlist->core.nr_mmaps; i++) { 397 struct mmap *map = &maps[i]; 398 399 if (map->core.base) 400 record__aio_sync(map, true); 401 } 402 } 403 404 static int nr_cblocks_default = 1; 405 static int nr_cblocks_max = 4; 406 407 static int record__aio_parse(const struct option *opt, 408 const char *str, 409 int unset) 410 { 411 struct record_opts *opts = (struct record_opts *)opt->value; 412 413 if (unset) { 414 opts->nr_cblocks = 0; 415 } else { 416 if (str) 417 opts->nr_cblocks = strtol(str, NULL, 0); 418 if (!opts->nr_cblocks) 419 opts->nr_cblocks = nr_cblocks_default; 420 } 421 422 return 0; 423 } 424 #else /* HAVE_AIO_SUPPORT */ 425 static int nr_cblocks_max = 0; 426 427 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 428 off_t *off __maybe_unused) 429 { 430 return -1; 431 } 432 433 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 434 { 435 return -1; 436 } 437 438 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 439 { 440 } 441 442 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 443 { 444 } 445 #endif 446 447 static int record__aio_enabled(struct record *rec) 448 { 449 return rec->opts.nr_cblocks > 0; 450 } 451 452 #define MMAP_FLUSH_DEFAULT 1 453 static int record__mmap_flush_parse(const struct option *opt, 454 const char *str, 455 int unset) 456 { 457 int flush_max; 458 struct record_opts *opts = (struct record_opts *)opt->value; 459 static struct parse_tag tags[] = { 460 { .tag = 'B', .mult = 1 }, 461 { .tag = 'K', .mult = 1 << 10 }, 462 { .tag = 'M', .mult = 1 << 20 }, 463 { .tag = 'G', .mult = 1 << 30 }, 464 { .tag = 0 }, 465 }; 466 467 if (unset) 468 return 0; 469 470 if (str) { 471 opts->mmap_flush = parse_tag_value(str, tags); 472 if (opts->mmap_flush == (int)-1) 473 opts->mmap_flush = strtol(str, NULL, 0); 474 } 475 476 if (!opts->mmap_flush) 477 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 478 479 flush_max = evlist__mmap_size(opts->mmap_pages); 480 flush_max /= 4; 481 if (opts->mmap_flush > flush_max) 482 opts->mmap_flush = flush_max; 483 484 return 0; 485 } 486 487 #ifdef HAVE_ZSTD_SUPPORT 488 static unsigned int comp_level_default = 1; 489 490 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 491 { 492 struct record_opts *opts = opt->value; 493 494 if (unset) { 495 opts->comp_level = 0; 496 } else { 497 if (str) 498 opts->comp_level = strtol(str, NULL, 0); 499 if (!opts->comp_level) 500 opts->comp_level = comp_level_default; 501 } 502 503 return 0; 504 } 505 #endif 506 static unsigned int comp_level_max = 22; 507 508 static int record__comp_enabled(struct record *rec) 509 { 510 return rec->opts.comp_level > 0; 511 } 512 513 static int process_synthesized_event(struct perf_tool *tool, 514 union perf_event *event, 515 struct perf_sample *sample __maybe_unused, 516 struct machine *machine __maybe_unused) 517 { 518 struct record *rec = container_of(tool, struct record, tool); 519 return record__write(rec, NULL, event, event->header.size); 520 } 521 522 static int process_locked_synthesized_event(struct perf_tool *tool, 523 union perf_event *event, 524 struct perf_sample *sample __maybe_unused, 525 struct machine *machine __maybe_unused) 526 { 527 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; 528 int ret; 529 530 pthread_mutex_lock(&synth_lock); 531 ret = process_synthesized_event(tool, event, sample, machine); 532 pthread_mutex_unlock(&synth_lock); 533 return ret; 534 } 535 536 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 537 { 538 struct record *rec = to; 539 540 if (record__comp_enabled(rec)) { 541 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size); 542 bf = map->data; 543 } 544 545 rec->samples++; 546 return record__write(rec, map, bf, size); 547 } 548 549 static volatile int signr = -1; 550 static volatile int child_finished; 551 #ifdef HAVE_EVENTFD_SUPPORT 552 static int done_fd = -1; 553 #endif 554 555 static void sig_handler(int sig) 556 { 557 if (sig == SIGCHLD) 558 child_finished = 1; 559 else 560 signr = sig; 561 562 done = 1; 563 #ifdef HAVE_EVENTFD_SUPPORT 564 { 565 u64 tmp = 1; 566 /* 567 * It is possible for this signal handler to run after done is checked 568 * in the main loop, but before the perf counter fds are polled. If this 569 * happens, the poll() will continue to wait even though done is set, 570 * and will only break out if either another signal is received, or the 571 * counters are ready for read. To ensure the poll() doesn't sleep when 572 * done is set, use an eventfd (done_fd) to wake up the poll(). 573 */ 574 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 575 pr_err("failed to signal wakeup fd, error: %m\n"); 576 } 577 #endif // HAVE_EVENTFD_SUPPORT 578 } 579 580 static void sigsegv_handler(int sig) 581 { 582 perf_hooks__recover(); 583 sighandler_dump_stack(sig); 584 } 585 586 static void record__sig_exit(void) 587 { 588 if (signr == -1) 589 return; 590 591 signal(signr, SIG_DFL); 592 raise(signr); 593 } 594 595 #ifdef HAVE_AUXTRACE_SUPPORT 596 597 static int record__process_auxtrace(struct perf_tool *tool, 598 struct mmap *map, 599 union perf_event *event, void *data1, 600 size_t len1, void *data2, size_t len2) 601 { 602 struct record *rec = container_of(tool, struct record, tool); 603 struct perf_data *data = &rec->data; 604 size_t padding; 605 u8 pad[8] = {0}; 606 607 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 608 off_t file_offset; 609 int fd = perf_data__fd(data); 610 int err; 611 612 file_offset = lseek(fd, 0, SEEK_CUR); 613 if (file_offset == -1) 614 return -1; 615 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 616 event, file_offset); 617 if (err) 618 return err; 619 } 620 621 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 622 padding = (len1 + len2) & 7; 623 if (padding) 624 padding = 8 - padding; 625 626 record__write(rec, map, event, event->header.size); 627 record__write(rec, map, data1, len1); 628 if (len2) 629 record__write(rec, map, data2, len2); 630 record__write(rec, map, &pad, padding); 631 632 return 0; 633 } 634 635 static int record__auxtrace_mmap_read(struct record *rec, 636 struct mmap *map) 637 { 638 int ret; 639 640 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 641 record__process_auxtrace); 642 if (ret < 0) 643 return ret; 644 645 if (ret) 646 rec->samples++; 647 648 return 0; 649 } 650 651 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 652 struct mmap *map) 653 { 654 int ret; 655 656 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 657 record__process_auxtrace, 658 rec->opts.auxtrace_snapshot_size); 659 if (ret < 0) 660 return ret; 661 662 if (ret) 663 rec->samples++; 664 665 return 0; 666 } 667 668 static int record__auxtrace_read_snapshot_all(struct record *rec) 669 { 670 int i; 671 int rc = 0; 672 673 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 674 struct mmap *map = &rec->evlist->mmap[i]; 675 676 if (!map->auxtrace_mmap.base) 677 continue; 678 679 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 680 rc = -1; 681 goto out; 682 } 683 } 684 out: 685 return rc; 686 } 687 688 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 689 { 690 pr_debug("Recording AUX area tracing snapshot\n"); 691 if (record__auxtrace_read_snapshot_all(rec) < 0) { 692 trigger_error(&auxtrace_snapshot_trigger); 693 } else { 694 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 695 trigger_error(&auxtrace_snapshot_trigger); 696 else 697 trigger_ready(&auxtrace_snapshot_trigger); 698 } 699 } 700 701 static int record__auxtrace_snapshot_exit(struct record *rec) 702 { 703 if (trigger_is_error(&auxtrace_snapshot_trigger)) 704 return 0; 705 706 if (!auxtrace_record__snapshot_started && 707 auxtrace_record__snapshot_start(rec->itr)) 708 return -1; 709 710 record__read_auxtrace_snapshot(rec, true); 711 if (trigger_is_error(&auxtrace_snapshot_trigger)) 712 return -1; 713 714 return 0; 715 } 716 717 static int record__auxtrace_init(struct record *rec) 718 { 719 int err; 720 721 if (!rec->itr) { 722 rec->itr = auxtrace_record__init(rec->evlist, &err); 723 if (err) 724 return err; 725 } 726 727 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 728 rec->opts.auxtrace_snapshot_opts); 729 if (err) 730 return err; 731 732 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 733 rec->opts.auxtrace_sample_opts); 734 if (err) 735 return err; 736 737 auxtrace_regroup_aux_output(rec->evlist); 738 739 return auxtrace_parse_filters(rec->evlist); 740 } 741 742 #else 743 744 static inline 745 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 746 struct mmap *map __maybe_unused) 747 { 748 return 0; 749 } 750 751 static inline 752 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 753 bool on_exit __maybe_unused) 754 { 755 } 756 757 static inline 758 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 759 { 760 return 0; 761 } 762 763 static inline 764 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 765 { 766 return 0; 767 } 768 769 static int record__auxtrace_init(struct record *rec __maybe_unused) 770 { 771 return 0; 772 } 773 774 #endif 775 776 static int record__config_text_poke(struct evlist *evlist) 777 { 778 struct evsel *evsel; 779 int err; 780 781 /* Nothing to do if text poke is already configured */ 782 evlist__for_each_entry(evlist, evsel) { 783 if (evsel->core.attr.text_poke) 784 return 0; 785 } 786 787 err = parse_events(evlist, "dummy:u", NULL); 788 if (err) 789 return err; 790 791 evsel = evlist__last(evlist); 792 793 evsel->core.attr.freq = 0; 794 evsel->core.attr.sample_period = 1; 795 evsel->core.attr.text_poke = 1; 796 evsel->core.attr.ksymbol = 1; 797 798 evsel->core.system_wide = true; 799 evsel->no_aux_samples = true; 800 evsel->immediate = true; 801 802 /* Text poke must be collected on all CPUs */ 803 perf_cpu_map__put(evsel->core.own_cpus); 804 evsel->core.own_cpus = perf_cpu_map__new(NULL); 805 perf_cpu_map__put(evsel->core.cpus); 806 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 807 808 evsel__set_sample_bit(evsel, TIME); 809 810 return 0; 811 } 812 813 static bool record__kcore_readable(struct machine *machine) 814 { 815 char kcore[PATH_MAX]; 816 int fd; 817 818 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 819 820 fd = open(kcore, O_RDONLY); 821 if (fd < 0) 822 return false; 823 824 close(fd); 825 826 return true; 827 } 828 829 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 830 { 831 char from_dir[PATH_MAX]; 832 char kcore_dir[PATH_MAX]; 833 int ret; 834 835 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 836 837 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 838 if (ret) 839 return ret; 840 841 return kcore_copy(from_dir, kcore_dir); 842 } 843 844 static int record__mmap_evlist(struct record *rec, 845 struct evlist *evlist) 846 { 847 struct record_opts *opts = &rec->opts; 848 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 849 opts->auxtrace_sample_mode; 850 char msg[512]; 851 852 if (opts->affinity != PERF_AFFINITY_SYS) 853 cpu__setup_cpunode_map(); 854 855 if (evlist__mmap_ex(evlist, opts->mmap_pages, 856 opts->auxtrace_mmap_pages, 857 auxtrace_overwrite, 858 opts->nr_cblocks, opts->affinity, 859 opts->mmap_flush, opts->comp_level) < 0) { 860 if (errno == EPERM) { 861 pr_err("Permission error mapping pages.\n" 862 "Consider increasing " 863 "/proc/sys/kernel/perf_event_mlock_kb,\n" 864 "or try again with a smaller value of -m/--mmap_pages.\n" 865 "(current value: %u,%u)\n", 866 opts->mmap_pages, opts->auxtrace_mmap_pages); 867 return -errno; 868 } else { 869 pr_err("failed to mmap with %d (%s)\n", errno, 870 str_error_r(errno, msg, sizeof(msg))); 871 if (errno) 872 return -errno; 873 else 874 return -EINVAL; 875 } 876 } 877 return 0; 878 } 879 880 static int record__mmap(struct record *rec) 881 { 882 return record__mmap_evlist(rec, rec->evlist); 883 } 884 885 static int record__open(struct record *rec) 886 { 887 char msg[BUFSIZ]; 888 struct evsel *pos; 889 struct evlist *evlist = rec->evlist; 890 struct perf_session *session = rec->session; 891 struct record_opts *opts = &rec->opts; 892 int rc = 0; 893 894 /* 895 * For initial_delay, system wide or a hybrid system, we need to add a 896 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 897 * of waiting or event synthesis. 898 */ 899 if (opts->initial_delay || target__has_cpu(&opts->target) || 900 perf_pmu__has_hybrid()) { 901 pos = evlist__get_tracking_event(evlist); 902 if (!evsel__is_dummy_event(pos)) { 903 /* Set up dummy event. */ 904 if (evlist__add_dummy(evlist)) 905 return -ENOMEM; 906 pos = evlist__last(evlist); 907 evlist__set_tracking_event(evlist, pos); 908 } 909 910 /* 911 * Enable the dummy event when the process is forked for 912 * initial_delay, immediately for system wide. 913 */ 914 if (opts->initial_delay && !pos->immediate && 915 !target__has_cpu(&opts->target)) 916 pos->core.attr.enable_on_exec = 1; 917 else 918 pos->immediate = 1; 919 } 920 921 evlist__config(evlist, opts, &callchain_param); 922 923 evlist__for_each_entry(evlist, pos) { 924 try_again: 925 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 926 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 927 if (verbose > 0) 928 ui__warning("%s\n", msg); 929 goto try_again; 930 } 931 if ((errno == EINVAL || errno == EBADF) && 932 pos->core.leader != &pos->core && 933 pos->weak_group) { 934 pos = evlist__reset_weak_group(evlist, pos, true); 935 goto try_again; 936 } 937 rc = -errno; 938 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 939 ui__error("%s\n", msg); 940 goto out; 941 } 942 943 pos->supported = true; 944 } 945 946 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 947 pr_warning( 948 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 949 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 950 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 951 "file is not found in the buildid cache or in the vmlinux path.\n\n" 952 "Samples in kernel modules won't be resolved at all.\n\n" 953 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 954 "even with a suitable vmlinux or kallsyms file.\n\n"); 955 } 956 957 if (evlist__apply_filters(evlist, &pos)) { 958 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 959 pos->filter, evsel__name(pos), errno, 960 str_error_r(errno, msg, sizeof(msg))); 961 rc = -1; 962 goto out; 963 } 964 965 rc = record__mmap(rec); 966 if (rc) 967 goto out; 968 969 session->evlist = evlist; 970 perf_session__set_id_hdr_size(session); 971 out: 972 return rc; 973 } 974 975 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 976 { 977 if (rec->evlist->first_sample_time == 0) 978 rec->evlist->first_sample_time = sample_time; 979 980 if (sample_time) 981 rec->evlist->last_sample_time = sample_time; 982 } 983 984 static int process_sample_event(struct perf_tool *tool, 985 union perf_event *event, 986 struct perf_sample *sample, 987 struct evsel *evsel, 988 struct machine *machine) 989 { 990 struct record *rec = container_of(tool, struct record, tool); 991 992 set_timestamp_boundary(rec, sample->time); 993 994 if (rec->buildid_all) 995 return 0; 996 997 rec->samples++; 998 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 999 } 1000 1001 static int process_buildids(struct record *rec) 1002 { 1003 struct perf_session *session = rec->session; 1004 1005 if (perf_data__size(&rec->data) == 0) 1006 return 0; 1007 1008 /* 1009 * During this process, it'll load kernel map and replace the 1010 * dso->long_name to a real pathname it found. In this case 1011 * we prefer the vmlinux path like 1012 * /lib/modules/3.16.4/build/vmlinux 1013 * 1014 * rather than build-id path (in debug directory). 1015 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1016 */ 1017 symbol_conf.ignore_vmlinux_buildid = true; 1018 1019 /* 1020 * If --buildid-all is given, it marks all DSO regardless of hits, 1021 * so no need to process samples. But if timestamp_boundary is enabled, 1022 * it still needs to walk on all samples to get the timestamps of 1023 * first/last samples. 1024 */ 1025 if (rec->buildid_all && !rec->timestamp_boundary) 1026 rec->tool.sample = NULL; 1027 1028 return perf_session__process_events(session); 1029 } 1030 1031 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1032 { 1033 int err; 1034 struct perf_tool *tool = data; 1035 /* 1036 *As for guest kernel when processing subcommand record&report, 1037 *we arrange module mmap prior to guest kernel mmap and trigger 1038 *a preload dso because default guest module symbols are loaded 1039 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1040 *method is used to avoid symbol missing when the first addr is 1041 *in module instead of in guest kernel. 1042 */ 1043 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1044 machine); 1045 if (err < 0) 1046 pr_err("Couldn't record guest kernel [%d]'s reference" 1047 " relocation symbol.\n", machine->pid); 1048 1049 /* 1050 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1051 * have no _text sometimes. 1052 */ 1053 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1054 machine); 1055 if (err < 0) 1056 pr_err("Couldn't record guest kernel [%d]'s reference" 1057 " relocation symbol.\n", machine->pid); 1058 } 1059 1060 static struct perf_event_header finished_round_event = { 1061 .size = sizeof(struct perf_event_header), 1062 .type = PERF_RECORD_FINISHED_ROUND, 1063 }; 1064 1065 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1066 { 1067 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1068 !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits, 1069 rec->affinity_mask.nbits)) { 1070 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits); 1071 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits, 1072 map->affinity_mask.bits, rec->affinity_mask.nbits); 1073 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask), 1074 (cpu_set_t *)rec->affinity_mask.bits); 1075 if (verbose == 2) 1076 mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread"); 1077 } 1078 } 1079 1080 static size_t process_comp_header(void *record, size_t increment) 1081 { 1082 struct perf_record_compressed *event = record; 1083 size_t size = sizeof(*event); 1084 1085 if (increment) { 1086 event->header.size += increment; 1087 return increment; 1088 } 1089 1090 event->header.type = PERF_RECORD_COMPRESSED; 1091 event->header.size = size; 1092 1093 return size; 1094 } 1095 1096 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 1097 void *src, size_t src_size) 1098 { 1099 size_t compressed; 1100 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1101 1102 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, 1103 max_record_size, process_comp_header); 1104 1105 session->bytes_transferred += src_size; 1106 session->bytes_compressed += compressed; 1107 1108 return compressed; 1109 } 1110 1111 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1112 bool overwrite, bool synch) 1113 { 1114 u64 bytes_written = rec->bytes_written; 1115 int i; 1116 int rc = 0; 1117 struct mmap *maps; 1118 int trace_fd = rec->data.file.fd; 1119 off_t off = 0; 1120 1121 if (!evlist) 1122 return 0; 1123 1124 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 1125 if (!maps) 1126 return 0; 1127 1128 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1129 return 0; 1130 1131 if (record__aio_enabled(rec)) 1132 off = record__aio_get_pos(trace_fd); 1133 1134 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1135 u64 flush = 0; 1136 struct mmap *map = &maps[i]; 1137 1138 if (map->core.base) { 1139 record__adjust_affinity(rec, map); 1140 if (synch) { 1141 flush = map->core.flush; 1142 map->core.flush = 1; 1143 } 1144 if (!record__aio_enabled(rec)) { 1145 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1146 if (synch) 1147 map->core.flush = flush; 1148 rc = -1; 1149 goto out; 1150 } 1151 } else { 1152 if (record__aio_push(rec, map, &off) < 0) { 1153 record__aio_set_pos(trace_fd, off); 1154 if (synch) 1155 map->core.flush = flush; 1156 rc = -1; 1157 goto out; 1158 } 1159 } 1160 if (synch) 1161 map->core.flush = flush; 1162 } 1163 1164 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1165 !rec->opts.auxtrace_sample_mode && 1166 record__auxtrace_mmap_read(rec, map) != 0) { 1167 rc = -1; 1168 goto out; 1169 } 1170 } 1171 1172 if (record__aio_enabled(rec)) 1173 record__aio_set_pos(trace_fd, off); 1174 1175 /* 1176 * Mark the round finished in case we wrote 1177 * at least one event. 1178 */ 1179 if (bytes_written != rec->bytes_written) 1180 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1181 1182 if (overwrite) 1183 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1184 out: 1185 return rc; 1186 } 1187 1188 static int record__mmap_read_all(struct record *rec, bool synch) 1189 { 1190 int err; 1191 1192 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1193 if (err) 1194 return err; 1195 1196 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1197 } 1198 1199 static void record__init_features(struct record *rec) 1200 { 1201 struct perf_session *session = rec->session; 1202 int feat; 1203 1204 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1205 perf_header__set_feat(&session->header, feat); 1206 1207 if (rec->no_buildid) 1208 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1209 1210 if (!have_tracepoints(&rec->evlist->core.entries)) 1211 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1212 1213 if (!rec->opts.branch_stack) 1214 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1215 1216 if (!rec->opts.full_auxtrace) 1217 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1218 1219 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1220 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1221 1222 if (!rec->opts.use_clockid) 1223 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1224 1225 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1226 if (!record__comp_enabled(rec)) 1227 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1228 1229 perf_header__clear_feat(&session->header, HEADER_STAT); 1230 } 1231 1232 static void 1233 record__finish_output(struct record *rec) 1234 { 1235 struct perf_data *data = &rec->data; 1236 int fd = perf_data__fd(data); 1237 1238 if (data->is_pipe) 1239 return; 1240 1241 rec->session->header.data_size += rec->bytes_written; 1242 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1243 1244 if (!rec->no_buildid) { 1245 process_buildids(rec); 1246 1247 if (rec->buildid_all) 1248 dsos__hit_all(rec->session); 1249 } 1250 perf_session__write_header(rec->session, rec->evlist, fd, true); 1251 1252 return; 1253 } 1254 1255 static int record__synthesize_workload(struct record *rec, bool tail) 1256 { 1257 int err; 1258 struct perf_thread_map *thread_map; 1259 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1260 1261 if (rec->opts.tail_synthesize != tail) 1262 return 0; 1263 1264 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1265 if (thread_map == NULL) 1266 return -1; 1267 1268 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1269 process_synthesized_event, 1270 &rec->session->machines.host, 1271 needs_mmap, 1272 rec->opts.sample_address); 1273 perf_thread_map__put(thread_map); 1274 return err; 1275 } 1276 1277 static int record__synthesize(struct record *rec, bool tail); 1278 1279 static int 1280 record__switch_output(struct record *rec, bool at_exit) 1281 { 1282 struct perf_data *data = &rec->data; 1283 int fd, err; 1284 char *new_filename; 1285 1286 /* Same Size: "2015122520103046"*/ 1287 char timestamp[] = "InvalidTimestamp"; 1288 1289 record__aio_mmap_read_sync(rec); 1290 1291 record__synthesize(rec, true); 1292 if (target__none(&rec->opts.target)) 1293 record__synthesize_workload(rec, true); 1294 1295 rec->samples = 0; 1296 record__finish_output(rec); 1297 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1298 if (err) { 1299 pr_err("Failed to get current timestamp\n"); 1300 return -EINVAL; 1301 } 1302 1303 fd = perf_data__switch(data, timestamp, 1304 rec->session->header.data_offset, 1305 at_exit, &new_filename); 1306 if (fd >= 0 && !at_exit) { 1307 rec->bytes_written = 0; 1308 rec->session->header.data_size = 0; 1309 } 1310 1311 if (!quiet) 1312 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1313 data->path, timestamp); 1314 1315 if (rec->switch_output.num_files) { 1316 int n = rec->switch_output.cur_file + 1; 1317 1318 if (n >= rec->switch_output.num_files) 1319 n = 0; 1320 rec->switch_output.cur_file = n; 1321 if (rec->switch_output.filenames[n]) { 1322 remove(rec->switch_output.filenames[n]); 1323 zfree(&rec->switch_output.filenames[n]); 1324 } 1325 rec->switch_output.filenames[n] = new_filename; 1326 } else { 1327 free(new_filename); 1328 } 1329 1330 /* Output tracking events */ 1331 if (!at_exit) { 1332 record__synthesize(rec, false); 1333 1334 /* 1335 * In 'perf record --switch-output' without -a, 1336 * record__synthesize() in record__switch_output() won't 1337 * generate tracking events because there's no thread_map 1338 * in evlist. Which causes newly created perf.data doesn't 1339 * contain map and comm information. 1340 * Create a fake thread_map and directly call 1341 * perf_event__synthesize_thread_map() for those events. 1342 */ 1343 if (target__none(&rec->opts.target)) 1344 record__synthesize_workload(rec, false); 1345 } 1346 return fd; 1347 } 1348 1349 static volatile int workload_exec_errno; 1350 1351 /* 1352 * evlist__prepare_workload will send a SIGUSR1 1353 * if the fork fails, since we asked by setting its 1354 * want_signal to true. 1355 */ 1356 static void workload_exec_failed_signal(int signo __maybe_unused, 1357 siginfo_t *info, 1358 void *ucontext __maybe_unused) 1359 { 1360 workload_exec_errno = info->si_value.sival_int; 1361 done = 1; 1362 child_finished = 1; 1363 } 1364 1365 static void snapshot_sig_handler(int sig); 1366 static void alarm_sig_handler(int sig); 1367 1368 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1369 { 1370 if (evlist) { 1371 if (evlist->mmap && evlist->mmap[0].core.base) 1372 return evlist->mmap[0].core.base; 1373 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1374 return evlist->overwrite_mmap[0].core.base; 1375 } 1376 return NULL; 1377 } 1378 1379 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1380 { 1381 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1382 if (pc) 1383 return pc; 1384 return NULL; 1385 } 1386 1387 static int record__synthesize(struct record *rec, bool tail) 1388 { 1389 struct perf_session *session = rec->session; 1390 struct machine *machine = &session->machines.host; 1391 struct perf_data *data = &rec->data; 1392 struct record_opts *opts = &rec->opts; 1393 struct perf_tool *tool = &rec->tool; 1394 int err = 0; 1395 event_op f = process_synthesized_event; 1396 1397 if (rec->opts.tail_synthesize != tail) 1398 return 0; 1399 1400 if (data->is_pipe) { 1401 err = perf_event__synthesize_for_pipe(tool, session, data, 1402 process_synthesized_event); 1403 if (err < 0) 1404 goto out; 1405 1406 rec->bytes_written += err; 1407 } 1408 1409 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1410 process_synthesized_event, machine); 1411 if (err) 1412 goto out; 1413 1414 /* Synthesize id_index before auxtrace_info */ 1415 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { 1416 err = perf_event__synthesize_id_index(tool, 1417 process_synthesized_event, 1418 session->evlist, machine); 1419 if (err) 1420 goto out; 1421 } 1422 1423 if (rec->opts.full_auxtrace) { 1424 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1425 session, process_synthesized_event); 1426 if (err) 1427 goto out; 1428 } 1429 1430 if (!evlist__exclude_kernel(rec->evlist)) { 1431 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1432 machine); 1433 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1434 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1435 "Check /proc/kallsyms permission or run as root.\n"); 1436 1437 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1438 machine); 1439 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1440 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1441 "Check /proc/modules permission or run as root.\n"); 1442 } 1443 1444 if (perf_guest) { 1445 machines__process_guests(&session->machines, 1446 perf_event__synthesize_guest_os, tool); 1447 } 1448 1449 err = perf_event__synthesize_extra_attr(&rec->tool, 1450 rec->evlist, 1451 process_synthesized_event, 1452 data->is_pipe); 1453 if (err) 1454 goto out; 1455 1456 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1457 process_synthesized_event, 1458 NULL); 1459 if (err < 0) { 1460 pr_err("Couldn't synthesize thread map.\n"); 1461 return err; 1462 } 1463 1464 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus, 1465 process_synthesized_event, NULL); 1466 if (err < 0) { 1467 pr_err("Couldn't synthesize cpu map.\n"); 1468 return err; 1469 } 1470 1471 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1472 machine, opts); 1473 if (err < 0) 1474 pr_warning("Couldn't synthesize bpf events.\n"); 1475 1476 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1477 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1478 machine); 1479 if (err < 0) 1480 pr_warning("Couldn't synthesize cgroup events.\n"); 1481 } 1482 1483 if (rec->opts.nr_threads_synthesize > 1) { 1484 perf_set_multithreaded(); 1485 f = process_locked_synthesized_event; 1486 } 1487 1488 if (rec->opts.synth & PERF_SYNTH_TASK) { 1489 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1490 1491 err = __machine__synthesize_threads(machine, tool, &opts->target, 1492 rec->evlist->core.threads, 1493 f, needs_mmap, opts->sample_address, 1494 rec->opts.nr_threads_synthesize); 1495 } 1496 1497 if (rec->opts.nr_threads_synthesize > 1) 1498 perf_set_singlethreaded(); 1499 1500 out: 1501 return err; 1502 } 1503 1504 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 1505 { 1506 struct record *rec = data; 1507 pthread_kill(rec->thread_id, SIGUSR2); 1508 return 0; 1509 } 1510 1511 static int record__setup_sb_evlist(struct record *rec) 1512 { 1513 struct record_opts *opts = &rec->opts; 1514 1515 if (rec->sb_evlist != NULL) { 1516 /* 1517 * We get here if --switch-output-event populated the 1518 * sb_evlist, so associate a callback that will send a SIGUSR2 1519 * to the main thread. 1520 */ 1521 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 1522 rec->thread_id = pthread_self(); 1523 } 1524 #ifdef HAVE_LIBBPF_SUPPORT 1525 if (!opts->no_bpf_event) { 1526 if (rec->sb_evlist == NULL) { 1527 rec->sb_evlist = evlist__new(); 1528 1529 if (rec->sb_evlist == NULL) { 1530 pr_err("Couldn't create side band evlist.\n."); 1531 return -1; 1532 } 1533 } 1534 1535 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 1536 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 1537 return -1; 1538 } 1539 } 1540 #endif 1541 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 1542 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1543 opts->no_bpf_event = true; 1544 } 1545 1546 return 0; 1547 } 1548 1549 static int record__init_clock(struct record *rec) 1550 { 1551 struct perf_session *session = rec->session; 1552 struct timespec ref_clockid; 1553 struct timeval ref_tod; 1554 u64 ref; 1555 1556 if (!rec->opts.use_clockid) 1557 return 0; 1558 1559 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1560 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 1561 1562 session->header.env.clock.clockid = rec->opts.clockid; 1563 1564 if (gettimeofday(&ref_tod, NULL) != 0) { 1565 pr_err("gettimeofday failed, cannot set reference time.\n"); 1566 return -1; 1567 } 1568 1569 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 1570 pr_err("clock_gettime failed, cannot set reference time.\n"); 1571 return -1; 1572 } 1573 1574 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 1575 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 1576 1577 session->header.env.clock.tod_ns = ref; 1578 1579 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 1580 (u64) ref_clockid.tv_nsec; 1581 1582 session->header.env.clock.clockid_ns = ref; 1583 return 0; 1584 } 1585 1586 static void hit_auxtrace_snapshot_trigger(struct record *rec) 1587 { 1588 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1589 trigger_hit(&auxtrace_snapshot_trigger); 1590 auxtrace_record__snapshot_started = 1; 1591 if (auxtrace_record__snapshot_start(rec->itr)) 1592 trigger_error(&auxtrace_snapshot_trigger); 1593 } 1594 } 1595 1596 static void record__uniquify_name(struct record *rec) 1597 { 1598 struct evsel *pos; 1599 struct evlist *evlist = rec->evlist; 1600 char *new_name; 1601 int ret; 1602 1603 if (!perf_pmu__has_hybrid()) 1604 return; 1605 1606 evlist__for_each_entry(evlist, pos) { 1607 if (!evsel__is_hybrid(pos)) 1608 continue; 1609 1610 if (strchr(pos->name, '/')) 1611 continue; 1612 1613 ret = asprintf(&new_name, "%s/%s/", 1614 pos->pmu_name, pos->name); 1615 if (ret) { 1616 free(pos->name); 1617 pos->name = new_name; 1618 } 1619 } 1620 } 1621 1622 static int __cmd_record(struct record *rec, int argc, const char **argv) 1623 { 1624 int err; 1625 int status = 0; 1626 unsigned long waking = 0; 1627 const bool forks = argc > 0; 1628 struct perf_tool *tool = &rec->tool; 1629 struct record_opts *opts = &rec->opts; 1630 struct perf_data *data = &rec->data; 1631 struct perf_session *session; 1632 bool disabled = false, draining = false; 1633 int fd; 1634 float ratio = 0; 1635 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 1636 1637 atexit(record__sig_exit); 1638 signal(SIGCHLD, sig_handler); 1639 signal(SIGINT, sig_handler); 1640 signal(SIGTERM, sig_handler); 1641 signal(SIGSEGV, sigsegv_handler); 1642 1643 if (rec->opts.record_namespaces) 1644 tool->namespace_events = true; 1645 1646 if (rec->opts.record_cgroup) { 1647 #ifdef HAVE_FILE_HANDLE 1648 tool->cgroup_events = true; 1649 #else 1650 pr_err("cgroup tracking is not supported\n"); 1651 return -1; 1652 #endif 1653 } 1654 1655 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1656 signal(SIGUSR2, snapshot_sig_handler); 1657 if (rec->opts.auxtrace_snapshot_mode) 1658 trigger_on(&auxtrace_snapshot_trigger); 1659 if (rec->switch_output.enabled) 1660 trigger_on(&switch_output_trigger); 1661 } else { 1662 signal(SIGUSR2, SIG_IGN); 1663 } 1664 1665 session = perf_session__new(data, tool); 1666 if (IS_ERR(session)) { 1667 pr_err("Perf session creation failed.\n"); 1668 return PTR_ERR(session); 1669 } 1670 1671 fd = perf_data__fd(data); 1672 rec->session = session; 1673 1674 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 1675 pr_err("Compression initialization failed.\n"); 1676 return -1; 1677 } 1678 #ifdef HAVE_EVENTFD_SUPPORT 1679 done_fd = eventfd(0, EFD_NONBLOCK); 1680 if (done_fd < 0) { 1681 pr_err("Failed to create wakeup eventfd, error: %m\n"); 1682 status = -1; 1683 goto out_delete_session; 1684 } 1685 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 1686 if (err < 0) { 1687 pr_err("Failed to add wakeup eventfd to poll list\n"); 1688 status = err; 1689 goto out_delete_session; 1690 } 1691 #endif // HAVE_EVENTFD_SUPPORT 1692 1693 session->header.env.comp_type = PERF_COMP_ZSTD; 1694 session->header.env.comp_level = rec->opts.comp_level; 1695 1696 if (rec->opts.kcore && 1697 !record__kcore_readable(&session->machines.host)) { 1698 pr_err("ERROR: kcore is not readable.\n"); 1699 return -1; 1700 } 1701 1702 if (record__init_clock(rec)) 1703 return -1; 1704 1705 record__init_features(rec); 1706 1707 if (forks) { 1708 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 1709 workload_exec_failed_signal); 1710 if (err < 0) { 1711 pr_err("Couldn't run the workload!\n"); 1712 status = err; 1713 goto out_delete_session; 1714 } 1715 } 1716 1717 /* 1718 * If we have just single event and are sending data 1719 * through pipe, we need to force the ids allocation, 1720 * because we synthesize event name through the pipe 1721 * and need the id for that. 1722 */ 1723 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 1724 rec->opts.sample_id = true; 1725 1726 record__uniquify_name(rec); 1727 1728 if (record__open(rec) != 0) { 1729 err = -1; 1730 goto out_child; 1731 } 1732 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 1733 1734 if (rec->opts.kcore) { 1735 err = record__kcore_copy(&session->machines.host, data); 1736 if (err) { 1737 pr_err("ERROR: Failed to copy kcore\n"); 1738 goto out_child; 1739 } 1740 } 1741 1742 err = bpf__apply_obj_config(); 1743 if (err) { 1744 char errbuf[BUFSIZ]; 1745 1746 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1747 pr_err("ERROR: Apply config to BPF failed: %s\n", 1748 errbuf); 1749 goto out_child; 1750 } 1751 1752 /* 1753 * Normally perf_session__new would do this, but it doesn't have the 1754 * evlist. 1755 */ 1756 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 1757 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1758 rec->tool.ordered_events = false; 1759 } 1760 1761 if (!rec->evlist->core.nr_groups) 1762 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1763 1764 if (data->is_pipe) { 1765 err = perf_header__write_pipe(fd); 1766 if (err < 0) 1767 goto out_child; 1768 } else { 1769 err = perf_session__write_header(session, rec->evlist, fd, false); 1770 if (err < 0) 1771 goto out_child; 1772 } 1773 1774 err = -1; 1775 if (!rec->no_buildid 1776 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1777 pr_err("Couldn't generate buildids. " 1778 "Use --no-buildid to profile anyway.\n"); 1779 goto out_child; 1780 } 1781 1782 err = record__setup_sb_evlist(rec); 1783 if (err) 1784 goto out_child; 1785 1786 err = record__synthesize(rec, false); 1787 if (err < 0) 1788 goto out_child; 1789 1790 if (rec->realtime_prio) { 1791 struct sched_param param; 1792 1793 param.sched_priority = rec->realtime_prio; 1794 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1795 pr_err("Could not set realtime priority.\n"); 1796 err = -1; 1797 goto out_child; 1798 } 1799 } 1800 1801 /* 1802 * When perf is starting the traced process, all the events 1803 * (apart from group members) have enable_on_exec=1 set, 1804 * so don't spoil it by prematurely enabling them. 1805 */ 1806 if (!target__none(&opts->target) && !opts->initial_delay) 1807 evlist__enable(rec->evlist); 1808 1809 /* 1810 * Let the child rip 1811 */ 1812 if (forks) { 1813 struct machine *machine = &session->machines.host; 1814 union perf_event *event; 1815 pid_t tgid; 1816 1817 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1818 if (event == NULL) { 1819 err = -ENOMEM; 1820 goto out_child; 1821 } 1822 1823 /* 1824 * Some H/W events are generated before COMM event 1825 * which is emitted during exec(), so perf script 1826 * cannot see a correct process name for those events. 1827 * Synthesize COMM event to prevent it. 1828 */ 1829 tgid = perf_event__synthesize_comm(tool, event, 1830 rec->evlist->workload.pid, 1831 process_synthesized_event, 1832 machine); 1833 free(event); 1834 1835 if (tgid == -1) 1836 goto out_child; 1837 1838 event = malloc(sizeof(event->namespaces) + 1839 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1840 machine->id_hdr_size); 1841 if (event == NULL) { 1842 err = -ENOMEM; 1843 goto out_child; 1844 } 1845 1846 /* 1847 * Synthesize NAMESPACES event for the command specified. 1848 */ 1849 perf_event__synthesize_namespaces(tool, event, 1850 rec->evlist->workload.pid, 1851 tgid, process_synthesized_event, 1852 machine); 1853 free(event); 1854 1855 evlist__start_workload(rec->evlist); 1856 } 1857 1858 if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1859 goto out_child; 1860 1861 if (opts->initial_delay) { 1862 pr_info(EVLIST_DISABLED_MSG); 1863 if (opts->initial_delay > 0) { 1864 usleep(opts->initial_delay * USEC_PER_MSEC); 1865 evlist__enable(rec->evlist); 1866 pr_info(EVLIST_ENABLED_MSG); 1867 } 1868 } 1869 1870 trigger_ready(&auxtrace_snapshot_trigger); 1871 trigger_ready(&switch_output_trigger); 1872 perf_hooks__invoke_record_start(); 1873 for (;;) { 1874 unsigned long long hits = rec->samples; 1875 1876 /* 1877 * rec->evlist->bkw_mmap_state is possible to be 1878 * BKW_MMAP_EMPTY here: when done == true and 1879 * hits != rec->samples in previous round. 1880 * 1881 * evlist__toggle_bkw_mmap ensure we never 1882 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1883 */ 1884 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1885 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1886 1887 if (record__mmap_read_all(rec, false) < 0) { 1888 trigger_error(&auxtrace_snapshot_trigger); 1889 trigger_error(&switch_output_trigger); 1890 err = -1; 1891 goto out_child; 1892 } 1893 1894 if (auxtrace_record__snapshot_started) { 1895 auxtrace_record__snapshot_started = 0; 1896 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1897 record__read_auxtrace_snapshot(rec, false); 1898 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1899 pr_err("AUX area tracing snapshot failed\n"); 1900 err = -1; 1901 goto out_child; 1902 } 1903 } 1904 1905 if (trigger_is_hit(&switch_output_trigger)) { 1906 /* 1907 * If switch_output_trigger is hit, the data in 1908 * overwritable ring buffer should have been collected, 1909 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1910 * 1911 * If SIGUSR2 raise after or during record__mmap_read_all(), 1912 * record__mmap_read_all() didn't collect data from 1913 * overwritable ring buffer. Read again. 1914 */ 1915 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1916 continue; 1917 trigger_ready(&switch_output_trigger); 1918 1919 /* 1920 * Reenable events in overwrite ring buffer after 1921 * record__mmap_read_all(): we should have collected 1922 * data from it. 1923 */ 1924 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1925 1926 if (!quiet) 1927 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1928 waking); 1929 waking = 0; 1930 fd = record__switch_output(rec, false); 1931 if (fd < 0) { 1932 pr_err("Failed to switch to new file\n"); 1933 trigger_error(&switch_output_trigger); 1934 err = fd; 1935 goto out_child; 1936 } 1937 1938 /* re-arm the alarm */ 1939 if (rec->switch_output.time) 1940 alarm(rec->switch_output.time); 1941 } 1942 1943 if (hits == rec->samples) { 1944 if (done || draining) 1945 break; 1946 err = evlist__poll(rec->evlist, -1); 1947 /* 1948 * Propagate error, only if there's any. Ignore positive 1949 * number of returned events and interrupt error. 1950 */ 1951 if (err > 0 || (err < 0 && errno == EINTR)) 1952 err = 0; 1953 waking++; 1954 1955 if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1956 draining = true; 1957 } 1958 1959 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 1960 switch (cmd) { 1961 case EVLIST_CTL_CMD_SNAPSHOT: 1962 hit_auxtrace_snapshot_trigger(rec); 1963 evlist__ctlfd_ack(rec->evlist); 1964 break; 1965 case EVLIST_CTL_CMD_STOP: 1966 done = 1; 1967 break; 1968 case EVLIST_CTL_CMD_ACK: 1969 case EVLIST_CTL_CMD_UNSUPPORTED: 1970 case EVLIST_CTL_CMD_ENABLE: 1971 case EVLIST_CTL_CMD_DISABLE: 1972 case EVLIST_CTL_CMD_EVLIST: 1973 case EVLIST_CTL_CMD_PING: 1974 default: 1975 break; 1976 } 1977 } 1978 1979 /* 1980 * When perf is starting the traced process, at the end events 1981 * die with the process and we wait for that. Thus no need to 1982 * disable events in this case. 1983 */ 1984 if (done && !disabled && !target__none(&opts->target)) { 1985 trigger_off(&auxtrace_snapshot_trigger); 1986 evlist__disable(rec->evlist); 1987 disabled = true; 1988 } 1989 } 1990 1991 trigger_off(&auxtrace_snapshot_trigger); 1992 trigger_off(&switch_output_trigger); 1993 1994 if (opts->auxtrace_snapshot_on_exit) 1995 record__auxtrace_snapshot_exit(rec); 1996 1997 if (forks && workload_exec_errno) { 1998 char msg[STRERR_BUFSIZE], strevsels[2048]; 1999 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2000 2001 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2002 2003 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2004 strevsels, argv[0], emsg); 2005 err = -1; 2006 goto out_child; 2007 } 2008 2009 if (!quiet) 2010 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 2011 2012 if (target__none(&rec->opts.target)) 2013 record__synthesize_workload(rec, true); 2014 2015 out_child: 2016 evlist__finalize_ctlfd(rec->evlist); 2017 record__mmap_read_all(rec, true); 2018 record__aio_mmap_read_sync(rec); 2019 2020 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2021 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2022 session->header.env.comp_ratio = ratio + 0.5; 2023 } 2024 2025 if (forks) { 2026 int exit_status; 2027 2028 if (!child_finished) 2029 kill(rec->evlist->workload.pid, SIGTERM); 2030 2031 wait(&exit_status); 2032 2033 if (err < 0) 2034 status = err; 2035 else if (WIFEXITED(exit_status)) 2036 status = WEXITSTATUS(exit_status); 2037 else if (WIFSIGNALED(exit_status)) 2038 signr = WTERMSIG(exit_status); 2039 } else 2040 status = err; 2041 2042 record__synthesize(rec, true); 2043 /* this will be recalculated during process_buildids() */ 2044 rec->samples = 0; 2045 2046 if (!err) { 2047 if (!rec->timestamp_filename) { 2048 record__finish_output(rec); 2049 } else { 2050 fd = record__switch_output(rec, true); 2051 if (fd < 0) { 2052 status = fd; 2053 goto out_delete_session; 2054 } 2055 } 2056 } 2057 2058 perf_hooks__invoke_record_end(); 2059 2060 if (!err && !quiet) { 2061 char samples[128]; 2062 const char *postfix = rec->timestamp_filename ? 2063 ".<timestamp>" : ""; 2064 2065 if (rec->samples && !rec->opts.full_auxtrace) 2066 scnprintf(samples, sizeof(samples), 2067 " (%" PRIu64 " samples)", rec->samples); 2068 else 2069 samples[0] = '\0'; 2070 2071 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2072 perf_data__size(data) / 1024.0 / 1024.0, 2073 data->path, postfix, samples); 2074 if (ratio) { 2075 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2076 rec->session->bytes_transferred / 1024.0 / 1024.0, 2077 ratio); 2078 } 2079 fprintf(stderr, " ]\n"); 2080 } 2081 2082 out_delete_session: 2083 #ifdef HAVE_EVENTFD_SUPPORT 2084 if (done_fd >= 0) 2085 close(done_fd); 2086 #endif 2087 zstd_fini(&session->zstd_data); 2088 perf_session__delete(session); 2089 2090 if (!opts->no_bpf_event) 2091 evlist__stop_sb_thread(rec->sb_evlist); 2092 return status; 2093 } 2094 2095 static void callchain_debug(struct callchain_param *callchain) 2096 { 2097 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2098 2099 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2100 2101 if (callchain->record_mode == CALLCHAIN_DWARF) 2102 pr_debug("callchain: stack dump size %d\n", 2103 callchain->dump_size); 2104 } 2105 2106 int record_opts__parse_callchain(struct record_opts *record, 2107 struct callchain_param *callchain, 2108 const char *arg, bool unset) 2109 { 2110 int ret; 2111 callchain->enabled = !unset; 2112 2113 /* --no-call-graph */ 2114 if (unset) { 2115 callchain->record_mode = CALLCHAIN_NONE; 2116 pr_debug("callchain: disabled\n"); 2117 return 0; 2118 } 2119 2120 ret = parse_callchain_record_opt(arg, callchain); 2121 if (!ret) { 2122 /* Enable data address sampling for DWARF unwind. */ 2123 if (callchain->record_mode == CALLCHAIN_DWARF) 2124 record->sample_address = true; 2125 callchain_debug(callchain); 2126 } 2127 2128 return ret; 2129 } 2130 2131 int record_parse_callchain_opt(const struct option *opt, 2132 const char *arg, 2133 int unset) 2134 { 2135 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2136 } 2137 2138 int record_callchain_opt(const struct option *opt, 2139 const char *arg __maybe_unused, 2140 int unset __maybe_unused) 2141 { 2142 struct callchain_param *callchain = opt->value; 2143 2144 callchain->enabled = true; 2145 2146 if (callchain->record_mode == CALLCHAIN_NONE) 2147 callchain->record_mode = CALLCHAIN_FP; 2148 2149 callchain_debug(callchain); 2150 return 0; 2151 } 2152 2153 static int perf_record_config(const char *var, const char *value, void *cb) 2154 { 2155 struct record *rec = cb; 2156 2157 if (!strcmp(var, "record.build-id")) { 2158 if (!strcmp(value, "cache")) 2159 rec->no_buildid_cache = false; 2160 else if (!strcmp(value, "no-cache")) 2161 rec->no_buildid_cache = true; 2162 else if (!strcmp(value, "skip")) 2163 rec->no_buildid = true; 2164 else if (!strcmp(value, "mmap")) 2165 rec->buildid_mmap = true; 2166 else 2167 return -1; 2168 return 0; 2169 } 2170 if (!strcmp(var, "record.call-graph")) { 2171 var = "call-graph.record-mode"; 2172 return perf_default_config(var, value, cb); 2173 } 2174 #ifdef HAVE_AIO_SUPPORT 2175 if (!strcmp(var, "record.aio")) { 2176 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2177 if (!rec->opts.nr_cblocks) 2178 rec->opts.nr_cblocks = nr_cblocks_default; 2179 } 2180 #endif 2181 if (!strcmp(var, "record.debuginfod")) { 2182 rec->debuginfod.urls = strdup(value); 2183 if (!rec->debuginfod.urls) 2184 return -ENOMEM; 2185 rec->debuginfod.set = true; 2186 } 2187 2188 return 0; 2189 } 2190 2191 2192 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2193 { 2194 struct record_opts *opts = (struct record_opts *)opt->value; 2195 2196 if (unset || !str) 2197 return 0; 2198 2199 if (!strcasecmp(str, "node")) 2200 opts->affinity = PERF_AFFINITY_NODE; 2201 else if (!strcasecmp(str, "cpu")) 2202 opts->affinity = PERF_AFFINITY_CPU; 2203 2204 return 0; 2205 } 2206 2207 static int parse_output_max_size(const struct option *opt, 2208 const char *str, int unset) 2209 { 2210 unsigned long *s = (unsigned long *)opt->value; 2211 static struct parse_tag tags_size[] = { 2212 { .tag = 'B', .mult = 1 }, 2213 { .tag = 'K', .mult = 1 << 10 }, 2214 { .tag = 'M', .mult = 1 << 20 }, 2215 { .tag = 'G', .mult = 1 << 30 }, 2216 { .tag = 0 }, 2217 }; 2218 unsigned long val; 2219 2220 if (unset) { 2221 *s = 0; 2222 return 0; 2223 } 2224 2225 val = parse_tag_value(str, tags_size); 2226 if (val != (unsigned long) -1) { 2227 *s = val; 2228 return 0; 2229 } 2230 2231 return -1; 2232 } 2233 2234 static int record__parse_mmap_pages(const struct option *opt, 2235 const char *str, 2236 int unset __maybe_unused) 2237 { 2238 struct record_opts *opts = opt->value; 2239 char *s, *p; 2240 unsigned int mmap_pages; 2241 int ret; 2242 2243 if (!str) 2244 return -EINVAL; 2245 2246 s = strdup(str); 2247 if (!s) 2248 return -ENOMEM; 2249 2250 p = strchr(s, ','); 2251 if (p) 2252 *p = '\0'; 2253 2254 if (*s) { 2255 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 2256 if (ret) 2257 goto out_free; 2258 opts->mmap_pages = mmap_pages; 2259 } 2260 2261 if (!p) { 2262 ret = 0; 2263 goto out_free; 2264 } 2265 2266 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 2267 if (ret) 2268 goto out_free; 2269 2270 opts->auxtrace_mmap_pages = mmap_pages; 2271 2272 out_free: 2273 free(s); 2274 return ret; 2275 } 2276 2277 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 2278 { 2279 } 2280 2281 static int parse_control_option(const struct option *opt, 2282 const char *str, 2283 int unset __maybe_unused) 2284 { 2285 struct record_opts *opts = opt->value; 2286 2287 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 2288 } 2289 2290 static void switch_output_size_warn(struct record *rec) 2291 { 2292 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 2293 struct switch_output *s = &rec->switch_output; 2294 2295 wakeup_size /= 2; 2296 2297 if (s->size < wakeup_size) { 2298 char buf[100]; 2299 2300 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 2301 pr_warning("WARNING: switch-output data size lower than " 2302 "wakeup kernel buffer size (%s) " 2303 "expect bigger perf.data sizes\n", buf); 2304 } 2305 } 2306 2307 static int switch_output_setup(struct record *rec) 2308 { 2309 struct switch_output *s = &rec->switch_output; 2310 static struct parse_tag tags_size[] = { 2311 { .tag = 'B', .mult = 1 }, 2312 { .tag = 'K', .mult = 1 << 10 }, 2313 { .tag = 'M', .mult = 1 << 20 }, 2314 { .tag = 'G', .mult = 1 << 30 }, 2315 { .tag = 0 }, 2316 }; 2317 static struct parse_tag tags_time[] = { 2318 { .tag = 's', .mult = 1 }, 2319 { .tag = 'm', .mult = 60 }, 2320 { .tag = 'h', .mult = 60*60 }, 2321 { .tag = 'd', .mult = 60*60*24 }, 2322 { .tag = 0 }, 2323 }; 2324 unsigned long val; 2325 2326 /* 2327 * If we're using --switch-output-events, then we imply its 2328 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 2329 * thread to its parent. 2330 */ 2331 if (rec->switch_output_event_set) 2332 goto do_signal; 2333 2334 if (!s->set) 2335 return 0; 2336 2337 if (!strcmp(s->str, "signal")) { 2338 do_signal: 2339 s->signal = true; 2340 pr_debug("switch-output with SIGUSR2 signal\n"); 2341 goto enabled; 2342 } 2343 2344 val = parse_tag_value(s->str, tags_size); 2345 if (val != (unsigned long) -1) { 2346 s->size = val; 2347 pr_debug("switch-output with %s size threshold\n", s->str); 2348 goto enabled; 2349 } 2350 2351 val = parse_tag_value(s->str, tags_time); 2352 if (val != (unsigned long) -1) { 2353 s->time = val; 2354 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2355 s->str, s->time); 2356 goto enabled; 2357 } 2358 2359 return -1; 2360 2361 enabled: 2362 rec->timestamp_filename = true; 2363 s->enabled = true; 2364 2365 if (s->size && !rec->opts.no_buffering) 2366 switch_output_size_warn(rec); 2367 2368 return 0; 2369 } 2370 2371 static const char * const __record_usage[] = { 2372 "perf record [<options>] [<command>]", 2373 "perf record [<options>] -- <command> [<options>]", 2374 NULL 2375 }; 2376 const char * const *record_usage = __record_usage; 2377 2378 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 2379 struct perf_sample *sample, struct machine *machine) 2380 { 2381 /* 2382 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 2383 * no need to add them twice. 2384 */ 2385 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 2386 return 0; 2387 return perf_event__process_mmap(tool, event, sample, machine); 2388 } 2389 2390 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 2391 struct perf_sample *sample, struct machine *machine) 2392 { 2393 /* 2394 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 2395 * no need to add them twice. 2396 */ 2397 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 2398 return 0; 2399 2400 return perf_event__process_mmap2(tool, event, sample, machine); 2401 } 2402 2403 static int process_timestamp_boundary(struct perf_tool *tool, 2404 union perf_event *event __maybe_unused, 2405 struct perf_sample *sample, 2406 struct machine *machine __maybe_unused) 2407 { 2408 struct record *rec = container_of(tool, struct record, tool); 2409 2410 set_timestamp_boundary(rec, sample->time); 2411 return 0; 2412 } 2413 2414 static int parse_record_synth_option(const struct option *opt, 2415 const char *str, 2416 int unset __maybe_unused) 2417 { 2418 struct record_opts *opts = opt->value; 2419 char *p = strdup(str); 2420 2421 if (p == NULL) 2422 return -1; 2423 2424 opts->synth = parse_synth_opt(p); 2425 free(p); 2426 2427 if (opts->synth < 0) { 2428 pr_err("Invalid synth option: %s\n", str); 2429 return -1; 2430 } 2431 return 0; 2432 } 2433 2434 /* 2435 * XXX Ideally would be local to cmd_record() and passed to a record__new 2436 * because we need to have access to it in record__exit, that is called 2437 * after cmd_record() exits, but since record_options need to be accessible to 2438 * builtin-script, leave it here. 2439 * 2440 * At least we don't ouch it in all the other functions here directly. 2441 * 2442 * Just say no to tons of global variables, sigh. 2443 */ 2444 static struct record record = { 2445 .opts = { 2446 .sample_time = true, 2447 .mmap_pages = UINT_MAX, 2448 .user_freq = UINT_MAX, 2449 .user_interval = ULLONG_MAX, 2450 .freq = 4000, 2451 .target = { 2452 .uses_mmap = true, 2453 .default_per_cpu = true, 2454 }, 2455 .mmap_flush = MMAP_FLUSH_DEFAULT, 2456 .nr_threads_synthesize = 1, 2457 .ctl_fd = -1, 2458 .ctl_fd_ack = -1, 2459 .synth = PERF_SYNTH_ALL, 2460 }, 2461 .tool = { 2462 .sample = process_sample_event, 2463 .fork = perf_event__process_fork, 2464 .exit = perf_event__process_exit, 2465 .comm = perf_event__process_comm, 2466 .namespaces = perf_event__process_namespaces, 2467 .mmap = build_id__process_mmap, 2468 .mmap2 = build_id__process_mmap2, 2469 .itrace_start = process_timestamp_boundary, 2470 .aux = process_timestamp_boundary, 2471 .ordered_events = true, 2472 }, 2473 }; 2474 2475 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 2476 "\n\t\t\t\tDefault: fp"; 2477 2478 static bool dry_run; 2479 2480 /* 2481 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 2482 * with it and switch to use the library functions in perf_evlist that came 2483 * from builtin-record.c, i.e. use record_opts, 2484 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 2485 * using pipes, etc. 2486 */ 2487 static struct option __record_options[] = { 2488 OPT_CALLBACK('e', "event", &record.evlist, "event", 2489 "event selector. use 'perf list' to list available events", 2490 parse_events_option), 2491 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 2492 "event filter", parse_filter), 2493 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 2494 NULL, "don't record events from perf itself", 2495 exclude_perf), 2496 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 2497 "record events on existing process id"), 2498 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 2499 "record events on existing thread id"), 2500 OPT_INTEGER('r', "realtime", &record.realtime_prio, 2501 "collect data with this RT SCHED_FIFO priority"), 2502 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 2503 "collect data without buffering"), 2504 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 2505 "collect raw sample records from all opened counters"), 2506 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 2507 "system-wide collection from all CPUs"), 2508 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 2509 "list of cpus to monitor"), 2510 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 2511 OPT_STRING('o', "output", &record.data.path, "file", 2512 "output file name"), 2513 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 2514 &record.opts.no_inherit_set, 2515 "child tasks do not inherit counters"), 2516 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 2517 "synthesize non-sample events at the end of output"), 2518 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 2519 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 2520 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 2521 "Fail if the specified frequency can't be used"), 2522 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 2523 "profile at this frequency", 2524 record__parse_freq), 2525 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 2526 "number of mmap data pages and AUX area tracing mmap pages", 2527 record__parse_mmap_pages), 2528 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 2529 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 2530 record__mmap_flush_parse), 2531 OPT_BOOLEAN(0, "group", &record.opts.group, 2532 "put the counters into a counter group"), 2533 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 2534 NULL, "enables call-graph recording" , 2535 &record_callchain_opt), 2536 OPT_CALLBACK(0, "call-graph", &record.opts, 2537 "record_mode[,record_size]", record_callchain_help, 2538 &record_parse_callchain_opt), 2539 OPT_INCR('v', "verbose", &verbose, 2540 "be more verbose (show counter open errors, etc)"), 2541 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 2542 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 2543 "per thread counts"), 2544 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 2545 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 2546 "Record the sample physical addresses"), 2547 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 2548 "Record the sampled data address data page size"), 2549 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 2550 "Record the sampled code address (ip) page size"), 2551 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 2552 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 2553 &record.opts.sample_time_set, 2554 "Record the sample timestamps"), 2555 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 2556 "Record the sample period"), 2557 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 2558 "don't sample"), 2559 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 2560 &record.no_buildid_cache_set, 2561 "do not update the buildid cache"), 2562 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 2563 &record.no_buildid_set, 2564 "do not collect buildids in perf.data"), 2565 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 2566 "monitor event in cgroup name only", 2567 parse_cgroups), 2568 OPT_INTEGER('D', "delay", &record.opts.initial_delay, 2569 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 2570 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 2571 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2572 "user to profile"), 2573 2574 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2575 "branch any", "sample any taken branches", 2576 parse_branch_stack), 2577 2578 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2579 "branch filter mask", "branch stack filter modes", 2580 parse_branch_stack), 2581 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2582 "sample by weight (on special events only)"), 2583 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2584 "sample transaction flags (special events only)"), 2585 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2586 "use per-thread mmaps"), 2587 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2588 "sample selected machine registers on interrupt," 2589 " use '-I?' to list register names", parse_intr_regs), 2590 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2591 "sample selected machine registers on interrupt," 2592 " use '--user-regs=?' to list register names", parse_user_regs), 2593 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2594 "Record running/enabled time of read (:S) events"), 2595 OPT_CALLBACK('k', "clockid", &record.opts, 2596 "clockid", "clockid to use for events, see clock_gettime()", 2597 parse_clockid), 2598 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2599 "opts", "AUX area tracing Snapshot Mode", ""), 2600 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 2601 "opts", "sample AUX area", ""), 2602 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2603 "per thread proc mmap processing timeout in ms"), 2604 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2605 "Record namespaces events"), 2606 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 2607 "Record cgroup events"), 2608 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 2609 &record.opts.record_switch_events_set, 2610 "Record context switch events"), 2611 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2612 "Configure all used events to run in kernel space.", 2613 PARSE_OPT_EXCLUSIVE), 2614 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2615 "Configure all used events to run in user space.", 2616 PARSE_OPT_EXCLUSIVE), 2617 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 2618 "collect kernel callchains"), 2619 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 2620 "collect user callchains"), 2621 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2622 "clang binary to use for compiling BPF scriptlets"), 2623 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2624 "options passed to clang when compiling BPF scriptlets"), 2625 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2626 "file", "vmlinux pathname"), 2627 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2628 "Record build-id of all DSOs regardless of hits"), 2629 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 2630 "Record build-id in map events"), 2631 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2632 "append timestamp to output filename"), 2633 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2634 "Record timestamp boundary (time of first/last samples)"), 2635 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2636 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2637 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2638 "signal"), 2639 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 2640 "switch output event selector. use 'perf list' to list available events", 2641 parse_events_option_new_evlist), 2642 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2643 "Limit number of switch output generated files"), 2644 OPT_BOOLEAN(0, "dry-run", &dry_run, 2645 "Parse options then exit"), 2646 #ifdef HAVE_AIO_SUPPORT 2647 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2648 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2649 record__aio_parse), 2650 #endif 2651 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2652 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2653 record__parse_affinity), 2654 #ifdef HAVE_ZSTD_SUPPORT 2655 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, 2656 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 2657 record__parse_comp_level), 2658 #endif 2659 OPT_CALLBACK(0, "max-size", &record.output_max_size, 2660 "size", "Limit the maximum size of the output file", parse_output_max_size), 2661 OPT_UINTEGER(0, "num-thread-synthesize", 2662 &record.opts.nr_threads_synthesize, 2663 "number of threads to run for event synthesis"), 2664 #ifdef HAVE_LIBPFM 2665 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 2666 "libpfm4 event selector. use 'perf list' to list available events", 2667 parse_libpfm_events_option), 2668 #endif 2669 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 2670 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 2671 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 2672 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 2673 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 2674 parse_control_option), 2675 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 2676 "Fine-tune event synthesis: default=all", parse_record_synth_option), 2677 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 2678 &record.debuginfod.set, "debuginfod urls", 2679 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 2680 "system"), 2681 OPT_END() 2682 }; 2683 2684 struct option *record_options = __record_options; 2685 2686 int cmd_record(int argc, const char **argv) 2687 { 2688 int err; 2689 struct record *rec = &record; 2690 char errbuf[BUFSIZ]; 2691 2692 setlocale(LC_ALL, ""); 2693 2694 #ifndef HAVE_LIBBPF_SUPPORT 2695 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2696 set_nobuild('\0', "clang-path", true); 2697 set_nobuild('\0', "clang-opt", true); 2698 # undef set_nobuild 2699 #endif 2700 2701 #ifndef HAVE_BPF_PROLOGUE 2702 # if !defined (HAVE_DWARF_SUPPORT) 2703 # define REASON "NO_DWARF=1" 2704 # elif !defined (HAVE_LIBBPF_SUPPORT) 2705 # define REASON "NO_LIBBPF=1" 2706 # else 2707 # define REASON "this architecture doesn't support BPF prologue" 2708 # endif 2709 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2710 set_nobuild('\0', "vmlinux", true); 2711 # undef set_nobuild 2712 # undef REASON 2713 #endif 2714 2715 rec->opts.affinity = PERF_AFFINITY_SYS; 2716 2717 rec->evlist = evlist__new(); 2718 if (rec->evlist == NULL) 2719 return -ENOMEM; 2720 2721 err = perf_config(perf_record_config, rec); 2722 if (err) 2723 return err; 2724 2725 argc = parse_options(argc, argv, record_options, record_usage, 2726 PARSE_OPT_STOP_AT_NON_OPTION); 2727 if (quiet) 2728 perf_quiet_option(); 2729 2730 err = symbol__validate_sym_arguments(); 2731 if (err) 2732 return err; 2733 2734 perf_debuginfod_setup(&record.debuginfod); 2735 2736 /* Make system wide (-a) the default target. */ 2737 if (!argc && target__none(&rec->opts.target)) 2738 rec->opts.target.system_wide = true; 2739 2740 if (nr_cgroups && !rec->opts.target.system_wide) { 2741 usage_with_options_msg(record_usage, record_options, 2742 "cgroup monitoring only available in system-wide mode"); 2743 2744 } 2745 2746 if (rec->buildid_mmap) { 2747 if (!perf_can_record_build_id()) { 2748 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 2749 err = -EINVAL; 2750 goto out_opts; 2751 } 2752 pr_debug("Enabling build id in mmap2 events.\n"); 2753 /* Enable mmap build id synthesizing. */ 2754 symbol_conf.buildid_mmap2 = true; 2755 /* Enable perf_event_attr::build_id bit. */ 2756 rec->opts.build_id = true; 2757 /* Disable build id cache. */ 2758 rec->no_buildid = true; 2759 } 2760 2761 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 2762 pr_err("Kernel has no cgroup sampling support.\n"); 2763 err = -EINVAL; 2764 goto out_opts; 2765 } 2766 2767 if (rec->opts.kcore) 2768 rec->data.is_dir = true; 2769 2770 if (rec->opts.comp_level != 0) { 2771 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 2772 rec->no_buildid = true; 2773 } 2774 2775 if (rec->opts.record_switch_events && 2776 !perf_can_record_switch_events()) { 2777 ui__error("kernel does not support recording context switch events\n"); 2778 parse_options_usage(record_usage, record_options, "switch-events", 0); 2779 err = -EINVAL; 2780 goto out_opts; 2781 } 2782 2783 if (switch_output_setup(rec)) { 2784 parse_options_usage(record_usage, record_options, "switch-output", 0); 2785 err = -EINVAL; 2786 goto out_opts; 2787 } 2788 2789 if (rec->switch_output.time) { 2790 signal(SIGALRM, alarm_sig_handler); 2791 alarm(rec->switch_output.time); 2792 } 2793 2794 if (rec->switch_output.num_files) { 2795 rec->switch_output.filenames = calloc(sizeof(char *), 2796 rec->switch_output.num_files); 2797 if (!rec->switch_output.filenames) { 2798 err = -EINVAL; 2799 goto out_opts; 2800 } 2801 } 2802 2803 /* 2804 * Allow aliases to facilitate the lookup of symbols for address 2805 * filters. Refer to auxtrace_parse_filters(). 2806 */ 2807 symbol_conf.allow_aliases = true; 2808 2809 symbol__init(NULL); 2810 2811 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 2812 rec->affinity_mask.nbits = cpu__max_cpu().cpu; 2813 rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits); 2814 if (!rec->affinity_mask.bits) { 2815 pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); 2816 err = -ENOMEM; 2817 goto out_opts; 2818 } 2819 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits); 2820 } 2821 2822 err = record__auxtrace_init(rec); 2823 if (err) 2824 goto out; 2825 2826 if (dry_run) 2827 goto out; 2828 2829 err = bpf__setup_stdout(rec->evlist); 2830 if (err) { 2831 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2832 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2833 errbuf); 2834 goto out; 2835 } 2836 2837 err = -ENOMEM; 2838 2839 if (rec->no_buildid_cache || rec->no_buildid) { 2840 disable_buildid_cache(); 2841 } else if (rec->switch_output.enabled) { 2842 /* 2843 * In 'perf record --switch-output', disable buildid 2844 * generation by default to reduce data file switching 2845 * overhead. Still generate buildid if they are required 2846 * explicitly using 2847 * 2848 * perf record --switch-output --no-no-buildid \ 2849 * --no-no-buildid-cache 2850 * 2851 * Following code equals to: 2852 * 2853 * if ((rec->no_buildid || !rec->no_buildid_set) && 2854 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2855 * disable_buildid_cache(); 2856 */ 2857 bool disable = true; 2858 2859 if (rec->no_buildid_set && !rec->no_buildid) 2860 disable = false; 2861 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2862 disable = false; 2863 if (disable) { 2864 rec->no_buildid = true; 2865 rec->no_buildid_cache = true; 2866 disable_buildid_cache(); 2867 } 2868 } 2869 2870 if (record.opts.overwrite) 2871 record.opts.tail_synthesize = true; 2872 2873 if (rec->evlist->core.nr_entries == 0) { 2874 if (perf_pmu__has_hybrid()) { 2875 err = evlist__add_default_hybrid(rec->evlist, 2876 !record.opts.no_samples); 2877 } else { 2878 err = __evlist__add_default(rec->evlist, 2879 !record.opts.no_samples); 2880 } 2881 2882 if (err < 0) { 2883 pr_err("Not enough memory for event selector list\n"); 2884 goto out; 2885 } 2886 } 2887 2888 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 2889 rec->opts.no_inherit = true; 2890 2891 err = target__validate(&rec->opts.target); 2892 if (err) { 2893 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2894 ui__warning("%s\n", errbuf); 2895 } 2896 2897 err = target__parse_uid(&rec->opts.target); 2898 if (err) { 2899 int saved_errno = errno; 2900 2901 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2902 ui__error("%s", errbuf); 2903 2904 err = -saved_errno; 2905 goto out; 2906 } 2907 2908 /* Enable ignoring missing threads when -u/-p option is defined. */ 2909 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 2910 2911 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 2912 pr_err("failed to use cpu list %s\n", 2913 rec->opts.target.cpu_list); 2914 goto out; 2915 } 2916 2917 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 2918 2919 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 2920 arch__add_leaf_frame_record_opts(&rec->opts); 2921 2922 err = -ENOMEM; 2923 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 2924 usage_with_options(record_usage, record_options); 2925 2926 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 2927 if (err) 2928 goto out; 2929 2930 /* 2931 * We take all buildids when the file contains 2932 * AUX area tracing data because we do not decode the 2933 * trace because it would take too long. 2934 */ 2935 if (rec->opts.full_auxtrace) 2936 rec->buildid_all = true; 2937 2938 if (rec->opts.text_poke) { 2939 err = record__config_text_poke(rec->evlist); 2940 if (err) { 2941 pr_err("record__config_text_poke failed, error %d\n", err); 2942 goto out; 2943 } 2944 } 2945 2946 if (record_opts__config(&rec->opts)) { 2947 err = -EINVAL; 2948 goto out; 2949 } 2950 2951 if (rec->opts.nr_cblocks > nr_cblocks_max) 2952 rec->opts.nr_cblocks = nr_cblocks_max; 2953 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2954 2955 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2956 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2957 2958 if (rec->opts.comp_level > comp_level_max) 2959 rec->opts.comp_level = comp_level_max; 2960 pr_debug("comp level: %d\n", rec->opts.comp_level); 2961 2962 err = __cmd_record(&record, argc, argv); 2963 out: 2964 bitmap_free(rec->affinity_mask.bits); 2965 evlist__delete(rec->evlist); 2966 symbol__exit(); 2967 auxtrace_record__free(rec->itr); 2968 out_opts: 2969 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 2970 return err; 2971 } 2972 2973 static void snapshot_sig_handler(int sig __maybe_unused) 2974 { 2975 struct record *rec = &record; 2976 2977 hit_auxtrace_snapshot_trigger(rec); 2978 2979 if (switch_output_signal(rec)) 2980 trigger_hit(&switch_output_trigger); 2981 } 2982 2983 static void alarm_sig_handler(int sig __maybe_unused) 2984 { 2985 struct record *rec = &record; 2986 2987 if (switch_output_time(rec)) 2988 trigger_hit(&switch_output_trigger); 2989 } 2990