1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/target.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/record.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/perf_api_probe.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/clockid.h" 50 #include "util/pmu-hybrid.h" 51 #include "util/evlist-hybrid.h" 52 #include "asm/bug.h" 53 #include "perf.h" 54 55 #include <errno.h> 56 #include <inttypes.h> 57 #include <locale.h> 58 #include <poll.h> 59 #include <pthread.h> 60 #include <unistd.h> 61 #include <sched.h> 62 #include <signal.h> 63 #ifdef HAVE_EVENTFD_SUPPORT 64 #include <sys/eventfd.h> 65 #endif 66 #include <sys/mman.h> 67 #include <sys/wait.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <fcntl.h> 71 #include <linux/err.h> 72 #include <linux/string.h> 73 #include <linux/time64.h> 74 #include <linux/zalloc.h> 75 #include <linux/bitmap.h> 76 #include <sys/time.h> 77 78 struct switch_output { 79 bool enabled; 80 bool signal; 81 unsigned long size; 82 unsigned long time; 83 const char *str; 84 bool set; 85 char **filenames; 86 int num_files; 87 int cur_file; 88 }; 89 90 struct thread_mask { 91 struct mmap_cpu_mask maps; 92 struct mmap_cpu_mask affinity; 93 }; 94 95 struct record { 96 struct perf_tool tool; 97 struct record_opts opts; 98 u64 bytes_written; 99 struct perf_data data; 100 struct auxtrace_record *itr; 101 struct evlist *evlist; 102 struct perf_session *session; 103 struct evlist *sb_evlist; 104 pthread_t thread_id; 105 int realtime_prio; 106 bool switch_output_event_set; 107 bool no_buildid; 108 bool no_buildid_set; 109 bool no_buildid_cache; 110 bool no_buildid_cache_set; 111 bool buildid_all; 112 bool buildid_mmap; 113 bool timestamp_filename; 114 bool timestamp_boundary; 115 struct switch_output switch_output; 116 unsigned long long samples; 117 struct mmap_cpu_mask affinity_mask; 118 unsigned long output_max_size; /* = 0: unlimited */ 119 struct perf_debuginfod debuginfod; 120 int nr_threads; 121 struct thread_mask *thread_masks; 122 }; 123 124 static volatile int done; 125 126 static volatile int auxtrace_record__snapshot_started; 127 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 128 static DEFINE_TRIGGER(switch_output_trigger); 129 130 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 131 "SYS", "NODE", "CPU" 132 }; 133 134 static bool switch_output_signal(struct record *rec) 135 { 136 return rec->switch_output.signal && 137 trigger_is_ready(&switch_output_trigger); 138 } 139 140 static bool switch_output_size(struct record *rec) 141 { 142 return rec->switch_output.size && 143 trigger_is_ready(&switch_output_trigger) && 144 (rec->bytes_written >= rec->switch_output.size); 145 } 146 147 static bool switch_output_time(struct record *rec) 148 { 149 return rec->switch_output.time && 150 trigger_is_ready(&switch_output_trigger); 151 } 152 153 static bool record__output_max_size_exceeded(struct record *rec) 154 { 155 return rec->output_max_size && 156 (rec->bytes_written >= rec->output_max_size); 157 } 158 159 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 160 void *bf, size_t size) 161 { 162 struct perf_data_file *file = &rec->session->data->file; 163 164 if (perf_data_file__write(file, bf, size) < 0) { 165 pr_err("failed to write perf data, error: %m\n"); 166 return -1; 167 } 168 169 rec->bytes_written += size; 170 171 if (record__output_max_size_exceeded(rec) && !done) { 172 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 173 " stopping session ]\n", 174 rec->bytes_written >> 10); 175 done = 1; 176 } 177 178 if (switch_output_size(rec)) 179 trigger_hit(&switch_output_trigger); 180 181 return 0; 182 } 183 184 static int record__aio_enabled(struct record *rec); 185 static int record__comp_enabled(struct record *rec); 186 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 187 void *src, size_t src_size); 188 189 #ifdef HAVE_AIO_SUPPORT 190 static int record__aio_write(struct aiocb *cblock, int trace_fd, 191 void *buf, size_t size, off_t off) 192 { 193 int rc; 194 195 cblock->aio_fildes = trace_fd; 196 cblock->aio_buf = buf; 197 cblock->aio_nbytes = size; 198 cblock->aio_offset = off; 199 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 200 201 do { 202 rc = aio_write(cblock); 203 if (rc == 0) { 204 break; 205 } else if (errno != EAGAIN) { 206 cblock->aio_fildes = -1; 207 pr_err("failed to queue perf data, error: %m\n"); 208 break; 209 } 210 } while (1); 211 212 return rc; 213 } 214 215 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 216 { 217 void *rem_buf; 218 off_t rem_off; 219 size_t rem_size; 220 int rc, aio_errno; 221 ssize_t aio_ret, written; 222 223 aio_errno = aio_error(cblock); 224 if (aio_errno == EINPROGRESS) 225 return 0; 226 227 written = aio_ret = aio_return(cblock); 228 if (aio_ret < 0) { 229 if (aio_errno != EINTR) 230 pr_err("failed to write perf data, error: %m\n"); 231 written = 0; 232 } 233 234 rem_size = cblock->aio_nbytes - written; 235 236 if (rem_size == 0) { 237 cblock->aio_fildes = -1; 238 /* 239 * md->refcount is incremented in record__aio_pushfn() for 240 * every aio write request started in record__aio_push() so 241 * decrement it because the request is now complete. 242 */ 243 perf_mmap__put(&md->core); 244 rc = 1; 245 } else { 246 /* 247 * aio write request may require restart with the 248 * reminder if the kernel didn't write whole 249 * chunk at once. 250 */ 251 rem_off = cblock->aio_offset + written; 252 rem_buf = (void *)(cblock->aio_buf + written); 253 record__aio_write(cblock, cblock->aio_fildes, 254 rem_buf, rem_size, rem_off); 255 rc = 0; 256 } 257 258 return rc; 259 } 260 261 static int record__aio_sync(struct mmap *md, bool sync_all) 262 { 263 struct aiocb **aiocb = md->aio.aiocb; 264 struct aiocb *cblocks = md->aio.cblocks; 265 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 266 int i, do_suspend; 267 268 do { 269 do_suspend = 0; 270 for (i = 0; i < md->aio.nr_cblocks; ++i) { 271 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 272 if (sync_all) 273 aiocb[i] = NULL; 274 else 275 return i; 276 } else { 277 /* 278 * Started aio write is not complete yet 279 * so it has to be waited before the 280 * next allocation. 281 */ 282 aiocb[i] = &cblocks[i]; 283 do_suspend = 1; 284 } 285 } 286 if (!do_suspend) 287 return -1; 288 289 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 290 if (!(errno == EAGAIN || errno == EINTR)) 291 pr_err("failed to sync perf data, error: %m\n"); 292 } 293 } while (1); 294 } 295 296 struct record_aio { 297 struct record *rec; 298 void *data; 299 size_t size; 300 }; 301 302 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 303 { 304 struct record_aio *aio = to; 305 306 /* 307 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 308 * to release space in the kernel buffer as fast as possible, calling 309 * perf_mmap__consume() from perf_mmap__push() function. 310 * 311 * That lets the kernel to proceed with storing more profiling data into 312 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 313 * 314 * Coping can be done in two steps in case the chunk of profiling data 315 * crosses the upper bound of the kernel buffer. In this case we first move 316 * part of data from map->start till the upper bound and then the reminder 317 * from the beginning of the kernel buffer till the end of the data chunk. 318 */ 319 320 if (record__comp_enabled(aio->rec)) { 321 size = zstd_compress(aio->rec->session, aio->data + aio->size, 322 mmap__mmap_len(map) - aio->size, 323 buf, size); 324 } else { 325 memcpy(aio->data + aio->size, buf, size); 326 } 327 328 if (!aio->size) { 329 /* 330 * Increment map->refcount to guard map->aio.data[] buffer 331 * from premature deallocation because map object can be 332 * released earlier than aio write request started on 333 * map->aio.data[] buffer is complete. 334 * 335 * perf_mmap__put() is done at record__aio_complete() 336 * after started aio request completion or at record__aio_push() 337 * if the request failed to start. 338 */ 339 perf_mmap__get(&map->core); 340 } 341 342 aio->size += size; 343 344 return size; 345 } 346 347 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 348 { 349 int ret, idx; 350 int trace_fd = rec->session->data->file.fd; 351 struct record_aio aio = { .rec = rec, .size = 0 }; 352 353 /* 354 * Call record__aio_sync() to wait till map->aio.data[] buffer 355 * becomes available after previous aio write operation. 356 */ 357 358 idx = record__aio_sync(map, false); 359 aio.data = map->aio.data[idx]; 360 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 361 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 362 return ret; 363 364 rec->samples++; 365 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 366 if (!ret) { 367 *off += aio.size; 368 rec->bytes_written += aio.size; 369 if (switch_output_size(rec)) 370 trigger_hit(&switch_output_trigger); 371 } else { 372 /* 373 * Decrement map->refcount incremented in record__aio_pushfn() 374 * back if record__aio_write() operation failed to start, otherwise 375 * map->refcount is decremented in record__aio_complete() after 376 * aio write operation finishes successfully. 377 */ 378 perf_mmap__put(&map->core); 379 } 380 381 return ret; 382 } 383 384 static off_t record__aio_get_pos(int trace_fd) 385 { 386 return lseek(trace_fd, 0, SEEK_CUR); 387 } 388 389 static void record__aio_set_pos(int trace_fd, off_t pos) 390 { 391 lseek(trace_fd, pos, SEEK_SET); 392 } 393 394 static void record__aio_mmap_read_sync(struct record *rec) 395 { 396 int i; 397 struct evlist *evlist = rec->evlist; 398 struct mmap *maps = evlist->mmap; 399 400 if (!record__aio_enabled(rec)) 401 return; 402 403 for (i = 0; i < evlist->core.nr_mmaps; i++) { 404 struct mmap *map = &maps[i]; 405 406 if (map->core.base) 407 record__aio_sync(map, true); 408 } 409 } 410 411 static int nr_cblocks_default = 1; 412 static int nr_cblocks_max = 4; 413 414 static int record__aio_parse(const struct option *opt, 415 const char *str, 416 int unset) 417 { 418 struct record_opts *opts = (struct record_opts *)opt->value; 419 420 if (unset) { 421 opts->nr_cblocks = 0; 422 } else { 423 if (str) 424 opts->nr_cblocks = strtol(str, NULL, 0); 425 if (!opts->nr_cblocks) 426 opts->nr_cblocks = nr_cblocks_default; 427 } 428 429 return 0; 430 } 431 #else /* HAVE_AIO_SUPPORT */ 432 static int nr_cblocks_max = 0; 433 434 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 435 off_t *off __maybe_unused) 436 { 437 return -1; 438 } 439 440 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 441 { 442 return -1; 443 } 444 445 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 446 { 447 } 448 449 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 450 { 451 } 452 #endif 453 454 static int record__aio_enabled(struct record *rec) 455 { 456 return rec->opts.nr_cblocks > 0; 457 } 458 459 #define MMAP_FLUSH_DEFAULT 1 460 static int record__mmap_flush_parse(const struct option *opt, 461 const char *str, 462 int unset) 463 { 464 int flush_max; 465 struct record_opts *opts = (struct record_opts *)opt->value; 466 static struct parse_tag tags[] = { 467 { .tag = 'B', .mult = 1 }, 468 { .tag = 'K', .mult = 1 << 10 }, 469 { .tag = 'M', .mult = 1 << 20 }, 470 { .tag = 'G', .mult = 1 << 30 }, 471 { .tag = 0 }, 472 }; 473 474 if (unset) 475 return 0; 476 477 if (str) { 478 opts->mmap_flush = parse_tag_value(str, tags); 479 if (opts->mmap_flush == (int)-1) 480 opts->mmap_flush = strtol(str, NULL, 0); 481 } 482 483 if (!opts->mmap_flush) 484 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 485 486 flush_max = evlist__mmap_size(opts->mmap_pages); 487 flush_max /= 4; 488 if (opts->mmap_flush > flush_max) 489 opts->mmap_flush = flush_max; 490 491 return 0; 492 } 493 494 #ifdef HAVE_ZSTD_SUPPORT 495 static unsigned int comp_level_default = 1; 496 497 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 498 { 499 struct record_opts *opts = opt->value; 500 501 if (unset) { 502 opts->comp_level = 0; 503 } else { 504 if (str) 505 opts->comp_level = strtol(str, NULL, 0); 506 if (!opts->comp_level) 507 opts->comp_level = comp_level_default; 508 } 509 510 return 0; 511 } 512 #endif 513 static unsigned int comp_level_max = 22; 514 515 static int record__comp_enabled(struct record *rec) 516 { 517 return rec->opts.comp_level > 0; 518 } 519 520 static int process_synthesized_event(struct perf_tool *tool, 521 union perf_event *event, 522 struct perf_sample *sample __maybe_unused, 523 struct machine *machine __maybe_unused) 524 { 525 struct record *rec = container_of(tool, struct record, tool); 526 return record__write(rec, NULL, event, event->header.size); 527 } 528 529 static int process_locked_synthesized_event(struct perf_tool *tool, 530 union perf_event *event, 531 struct perf_sample *sample __maybe_unused, 532 struct machine *machine __maybe_unused) 533 { 534 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; 535 int ret; 536 537 pthread_mutex_lock(&synth_lock); 538 ret = process_synthesized_event(tool, event, sample, machine); 539 pthread_mutex_unlock(&synth_lock); 540 return ret; 541 } 542 543 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 544 { 545 struct record *rec = to; 546 547 if (record__comp_enabled(rec)) { 548 size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size); 549 bf = map->data; 550 } 551 552 rec->samples++; 553 return record__write(rec, map, bf, size); 554 } 555 556 static volatile int signr = -1; 557 static volatile int child_finished; 558 #ifdef HAVE_EVENTFD_SUPPORT 559 static int done_fd = -1; 560 #endif 561 562 static void sig_handler(int sig) 563 { 564 if (sig == SIGCHLD) 565 child_finished = 1; 566 else 567 signr = sig; 568 569 done = 1; 570 #ifdef HAVE_EVENTFD_SUPPORT 571 { 572 u64 tmp = 1; 573 /* 574 * It is possible for this signal handler to run after done is checked 575 * in the main loop, but before the perf counter fds are polled. If this 576 * happens, the poll() will continue to wait even though done is set, 577 * and will only break out if either another signal is received, or the 578 * counters are ready for read. To ensure the poll() doesn't sleep when 579 * done is set, use an eventfd (done_fd) to wake up the poll(). 580 */ 581 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 582 pr_err("failed to signal wakeup fd, error: %m\n"); 583 } 584 #endif // HAVE_EVENTFD_SUPPORT 585 } 586 587 static void sigsegv_handler(int sig) 588 { 589 perf_hooks__recover(); 590 sighandler_dump_stack(sig); 591 } 592 593 static void record__sig_exit(void) 594 { 595 if (signr == -1) 596 return; 597 598 signal(signr, SIG_DFL); 599 raise(signr); 600 } 601 602 #ifdef HAVE_AUXTRACE_SUPPORT 603 604 static int record__process_auxtrace(struct perf_tool *tool, 605 struct mmap *map, 606 union perf_event *event, void *data1, 607 size_t len1, void *data2, size_t len2) 608 { 609 struct record *rec = container_of(tool, struct record, tool); 610 struct perf_data *data = &rec->data; 611 size_t padding; 612 u8 pad[8] = {0}; 613 614 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 615 off_t file_offset; 616 int fd = perf_data__fd(data); 617 int err; 618 619 file_offset = lseek(fd, 0, SEEK_CUR); 620 if (file_offset == -1) 621 return -1; 622 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 623 event, file_offset); 624 if (err) 625 return err; 626 } 627 628 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 629 padding = (len1 + len2) & 7; 630 if (padding) 631 padding = 8 - padding; 632 633 record__write(rec, map, event, event->header.size); 634 record__write(rec, map, data1, len1); 635 if (len2) 636 record__write(rec, map, data2, len2); 637 record__write(rec, map, &pad, padding); 638 639 return 0; 640 } 641 642 static int record__auxtrace_mmap_read(struct record *rec, 643 struct mmap *map) 644 { 645 int ret; 646 647 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 648 record__process_auxtrace); 649 if (ret < 0) 650 return ret; 651 652 if (ret) 653 rec->samples++; 654 655 return 0; 656 } 657 658 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 659 struct mmap *map) 660 { 661 int ret; 662 663 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 664 record__process_auxtrace, 665 rec->opts.auxtrace_snapshot_size); 666 if (ret < 0) 667 return ret; 668 669 if (ret) 670 rec->samples++; 671 672 return 0; 673 } 674 675 static int record__auxtrace_read_snapshot_all(struct record *rec) 676 { 677 int i; 678 int rc = 0; 679 680 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 681 struct mmap *map = &rec->evlist->mmap[i]; 682 683 if (!map->auxtrace_mmap.base) 684 continue; 685 686 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 687 rc = -1; 688 goto out; 689 } 690 } 691 out: 692 return rc; 693 } 694 695 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 696 { 697 pr_debug("Recording AUX area tracing snapshot\n"); 698 if (record__auxtrace_read_snapshot_all(rec) < 0) { 699 trigger_error(&auxtrace_snapshot_trigger); 700 } else { 701 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 702 trigger_error(&auxtrace_snapshot_trigger); 703 else 704 trigger_ready(&auxtrace_snapshot_trigger); 705 } 706 } 707 708 static int record__auxtrace_snapshot_exit(struct record *rec) 709 { 710 if (trigger_is_error(&auxtrace_snapshot_trigger)) 711 return 0; 712 713 if (!auxtrace_record__snapshot_started && 714 auxtrace_record__snapshot_start(rec->itr)) 715 return -1; 716 717 record__read_auxtrace_snapshot(rec, true); 718 if (trigger_is_error(&auxtrace_snapshot_trigger)) 719 return -1; 720 721 return 0; 722 } 723 724 static int record__auxtrace_init(struct record *rec) 725 { 726 int err; 727 728 if (!rec->itr) { 729 rec->itr = auxtrace_record__init(rec->evlist, &err); 730 if (err) 731 return err; 732 } 733 734 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 735 rec->opts.auxtrace_snapshot_opts); 736 if (err) 737 return err; 738 739 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 740 rec->opts.auxtrace_sample_opts); 741 if (err) 742 return err; 743 744 auxtrace_regroup_aux_output(rec->evlist); 745 746 return auxtrace_parse_filters(rec->evlist); 747 } 748 749 #else 750 751 static inline 752 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 753 struct mmap *map __maybe_unused) 754 { 755 return 0; 756 } 757 758 static inline 759 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 760 bool on_exit __maybe_unused) 761 { 762 } 763 764 static inline 765 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 766 { 767 return 0; 768 } 769 770 static inline 771 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 772 { 773 return 0; 774 } 775 776 static int record__auxtrace_init(struct record *rec __maybe_unused) 777 { 778 return 0; 779 } 780 781 #endif 782 783 static int record__config_text_poke(struct evlist *evlist) 784 { 785 struct evsel *evsel; 786 int err; 787 788 /* Nothing to do if text poke is already configured */ 789 evlist__for_each_entry(evlist, evsel) { 790 if (evsel->core.attr.text_poke) 791 return 0; 792 } 793 794 err = parse_events(evlist, "dummy:u", NULL); 795 if (err) 796 return err; 797 798 evsel = evlist__last(evlist); 799 800 evsel->core.attr.freq = 0; 801 evsel->core.attr.sample_period = 1; 802 evsel->core.attr.text_poke = 1; 803 evsel->core.attr.ksymbol = 1; 804 805 evsel->core.system_wide = true; 806 evsel->no_aux_samples = true; 807 evsel->immediate = true; 808 809 /* Text poke must be collected on all CPUs */ 810 perf_cpu_map__put(evsel->core.own_cpus); 811 evsel->core.own_cpus = perf_cpu_map__new(NULL); 812 perf_cpu_map__put(evsel->core.cpus); 813 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 814 815 evsel__set_sample_bit(evsel, TIME); 816 817 return 0; 818 } 819 820 static bool record__kcore_readable(struct machine *machine) 821 { 822 char kcore[PATH_MAX]; 823 int fd; 824 825 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 826 827 fd = open(kcore, O_RDONLY); 828 if (fd < 0) 829 return false; 830 831 close(fd); 832 833 return true; 834 } 835 836 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 837 { 838 char from_dir[PATH_MAX]; 839 char kcore_dir[PATH_MAX]; 840 int ret; 841 842 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 843 844 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 845 if (ret) 846 return ret; 847 848 return kcore_copy(from_dir, kcore_dir); 849 } 850 851 static int record__mmap_evlist(struct record *rec, 852 struct evlist *evlist) 853 { 854 struct record_opts *opts = &rec->opts; 855 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 856 opts->auxtrace_sample_mode; 857 char msg[512]; 858 859 if (opts->affinity != PERF_AFFINITY_SYS) 860 cpu__setup_cpunode_map(); 861 862 if (evlist__mmap_ex(evlist, opts->mmap_pages, 863 opts->auxtrace_mmap_pages, 864 auxtrace_overwrite, 865 opts->nr_cblocks, opts->affinity, 866 opts->mmap_flush, opts->comp_level) < 0) { 867 if (errno == EPERM) { 868 pr_err("Permission error mapping pages.\n" 869 "Consider increasing " 870 "/proc/sys/kernel/perf_event_mlock_kb,\n" 871 "or try again with a smaller value of -m/--mmap_pages.\n" 872 "(current value: %u,%u)\n", 873 opts->mmap_pages, opts->auxtrace_mmap_pages); 874 return -errno; 875 } else { 876 pr_err("failed to mmap with %d (%s)\n", errno, 877 str_error_r(errno, msg, sizeof(msg))); 878 if (errno) 879 return -errno; 880 else 881 return -EINVAL; 882 } 883 } 884 return 0; 885 } 886 887 static int record__mmap(struct record *rec) 888 { 889 return record__mmap_evlist(rec, rec->evlist); 890 } 891 892 static int record__open(struct record *rec) 893 { 894 char msg[BUFSIZ]; 895 struct evsel *pos; 896 struct evlist *evlist = rec->evlist; 897 struct perf_session *session = rec->session; 898 struct record_opts *opts = &rec->opts; 899 int rc = 0; 900 901 /* 902 * For initial_delay, system wide or a hybrid system, we need to add a 903 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 904 * of waiting or event synthesis. 905 */ 906 if (opts->initial_delay || target__has_cpu(&opts->target) || 907 perf_pmu__has_hybrid()) { 908 pos = evlist__get_tracking_event(evlist); 909 if (!evsel__is_dummy_event(pos)) { 910 /* Set up dummy event. */ 911 if (evlist__add_dummy(evlist)) 912 return -ENOMEM; 913 pos = evlist__last(evlist); 914 evlist__set_tracking_event(evlist, pos); 915 } 916 917 /* 918 * Enable the dummy event when the process is forked for 919 * initial_delay, immediately for system wide. 920 */ 921 if (opts->initial_delay && !pos->immediate && 922 !target__has_cpu(&opts->target)) 923 pos->core.attr.enable_on_exec = 1; 924 else 925 pos->immediate = 1; 926 } 927 928 evlist__config(evlist, opts, &callchain_param); 929 930 evlist__for_each_entry(evlist, pos) { 931 try_again: 932 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 933 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 934 if (verbose > 0) 935 ui__warning("%s\n", msg); 936 goto try_again; 937 } 938 if ((errno == EINVAL || errno == EBADF) && 939 pos->core.leader != &pos->core && 940 pos->weak_group) { 941 pos = evlist__reset_weak_group(evlist, pos, true); 942 goto try_again; 943 } 944 rc = -errno; 945 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 946 ui__error("%s\n", msg); 947 goto out; 948 } 949 950 pos->supported = true; 951 } 952 953 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 954 pr_warning( 955 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 956 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 957 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 958 "file is not found in the buildid cache or in the vmlinux path.\n\n" 959 "Samples in kernel modules won't be resolved at all.\n\n" 960 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 961 "even with a suitable vmlinux or kallsyms file.\n\n"); 962 } 963 964 if (evlist__apply_filters(evlist, &pos)) { 965 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 966 pos->filter, evsel__name(pos), errno, 967 str_error_r(errno, msg, sizeof(msg))); 968 rc = -1; 969 goto out; 970 } 971 972 rc = record__mmap(rec); 973 if (rc) 974 goto out; 975 976 session->evlist = evlist; 977 perf_session__set_id_hdr_size(session); 978 out: 979 return rc; 980 } 981 982 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 983 { 984 if (rec->evlist->first_sample_time == 0) 985 rec->evlist->first_sample_time = sample_time; 986 987 if (sample_time) 988 rec->evlist->last_sample_time = sample_time; 989 } 990 991 static int process_sample_event(struct perf_tool *tool, 992 union perf_event *event, 993 struct perf_sample *sample, 994 struct evsel *evsel, 995 struct machine *machine) 996 { 997 struct record *rec = container_of(tool, struct record, tool); 998 999 set_timestamp_boundary(rec, sample->time); 1000 1001 if (rec->buildid_all) 1002 return 0; 1003 1004 rec->samples++; 1005 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1006 } 1007 1008 static int process_buildids(struct record *rec) 1009 { 1010 struct perf_session *session = rec->session; 1011 1012 if (perf_data__size(&rec->data) == 0) 1013 return 0; 1014 1015 /* 1016 * During this process, it'll load kernel map and replace the 1017 * dso->long_name to a real pathname it found. In this case 1018 * we prefer the vmlinux path like 1019 * /lib/modules/3.16.4/build/vmlinux 1020 * 1021 * rather than build-id path (in debug directory). 1022 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1023 */ 1024 symbol_conf.ignore_vmlinux_buildid = true; 1025 1026 /* 1027 * If --buildid-all is given, it marks all DSO regardless of hits, 1028 * so no need to process samples. But if timestamp_boundary is enabled, 1029 * it still needs to walk on all samples to get the timestamps of 1030 * first/last samples. 1031 */ 1032 if (rec->buildid_all && !rec->timestamp_boundary) 1033 rec->tool.sample = NULL; 1034 1035 return perf_session__process_events(session); 1036 } 1037 1038 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1039 { 1040 int err; 1041 struct perf_tool *tool = data; 1042 /* 1043 *As for guest kernel when processing subcommand record&report, 1044 *we arrange module mmap prior to guest kernel mmap and trigger 1045 *a preload dso because default guest module symbols are loaded 1046 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1047 *method is used to avoid symbol missing when the first addr is 1048 *in module instead of in guest kernel. 1049 */ 1050 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1051 machine); 1052 if (err < 0) 1053 pr_err("Couldn't record guest kernel [%d]'s reference" 1054 " relocation symbol.\n", machine->pid); 1055 1056 /* 1057 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1058 * have no _text sometimes. 1059 */ 1060 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1061 machine); 1062 if (err < 0) 1063 pr_err("Couldn't record guest kernel [%d]'s reference" 1064 " relocation symbol.\n", machine->pid); 1065 } 1066 1067 static struct perf_event_header finished_round_event = { 1068 .size = sizeof(struct perf_event_header), 1069 .type = PERF_RECORD_FINISHED_ROUND, 1070 }; 1071 1072 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1073 { 1074 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1075 !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits, 1076 rec->affinity_mask.nbits)) { 1077 bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits); 1078 bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits, 1079 map->affinity_mask.bits, rec->affinity_mask.nbits); 1080 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask), 1081 (cpu_set_t *)rec->affinity_mask.bits); 1082 if (verbose == 2) 1083 mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread"); 1084 } 1085 } 1086 1087 static size_t process_comp_header(void *record, size_t increment) 1088 { 1089 struct perf_record_compressed *event = record; 1090 size_t size = sizeof(*event); 1091 1092 if (increment) { 1093 event->header.size += increment; 1094 return increment; 1095 } 1096 1097 event->header.type = PERF_RECORD_COMPRESSED; 1098 event->header.size = size; 1099 1100 return size; 1101 } 1102 1103 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 1104 void *src, size_t src_size) 1105 { 1106 size_t compressed; 1107 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1108 1109 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, 1110 max_record_size, process_comp_header); 1111 1112 session->bytes_transferred += src_size; 1113 session->bytes_compressed += compressed; 1114 1115 return compressed; 1116 } 1117 1118 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1119 bool overwrite, bool synch) 1120 { 1121 u64 bytes_written = rec->bytes_written; 1122 int i; 1123 int rc = 0; 1124 struct mmap *maps; 1125 int trace_fd = rec->data.file.fd; 1126 off_t off = 0; 1127 1128 if (!evlist) 1129 return 0; 1130 1131 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 1132 if (!maps) 1133 return 0; 1134 1135 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1136 return 0; 1137 1138 if (record__aio_enabled(rec)) 1139 off = record__aio_get_pos(trace_fd); 1140 1141 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1142 u64 flush = 0; 1143 struct mmap *map = &maps[i]; 1144 1145 if (map->core.base) { 1146 record__adjust_affinity(rec, map); 1147 if (synch) { 1148 flush = map->core.flush; 1149 map->core.flush = 1; 1150 } 1151 if (!record__aio_enabled(rec)) { 1152 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1153 if (synch) 1154 map->core.flush = flush; 1155 rc = -1; 1156 goto out; 1157 } 1158 } else { 1159 if (record__aio_push(rec, map, &off) < 0) { 1160 record__aio_set_pos(trace_fd, off); 1161 if (synch) 1162 map->core.flush = flush; 1163 rc = -1; 1164 goto out; 1165 } 1166 } 1167 if (synch) 1168 map->core.flush = flush; 1169 } 1170 1171 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1172 !rec->opts.auxtrace_sample_mode && 1173 record__auxtrace_mmap_read(rec, map) != 0) { 1174 rc = -1; 1175 goto out; 1176 } 1177 } 1178 1179 if (record__aio_enabled(rec)) 1180 record__aio_set_pos(trace_fd, off); 1181 1182 /* 1183 * Mark the round finished in case we wrote 1184 * at least one event. 1185 */ 1186 if (bytes_written != rec->bytes_written) 1187 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1188 1189 if (overwrite) 1190 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1191 out: 1192 return rc; 1193 } 1194 1195 static int record__mmap_read_all(struct record *rec, bool synch) 1196 { 1197 int err; 1198 1199 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1200 if (err) 1201 return err; 1202 1203 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1204 } 1205 1206 static void record__init_features(struct record *rec) 1207 { 1208 struct perf_session *session = rec->session; 1209 int feat; 1210 1211 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1212 perf_header__set_feat(&session->header, feat); 1213 1214 if (rec->no_buildid) 1215 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1216 1217 if (!have_tracepoints(&rec->evlist->core.entries)) 1218 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1219 1220 if (!rec->opts.branch_stack) 1221 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1222 1223 if (!rec->opts.full_auxtrace) 1224 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1225 1226 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1227 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1228 1229 if (!rec->opts.use_clockid) 1230 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1231 1232 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1233 if (!record__comp_enabled(rec)) 1234 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1235 1236 perf_header__clear_feat(&session->header, HEADER_STAT); 1237 } 1238 1239 static void 1240 record__finish_output(struct record *rec) 1241 { 1242 struct perf_data *data = &rec->data; 1243 int fd = perf_data__fd(data); 1244 1245 if (data->is_pipe) 1246 return; 1247 1248 rec->session->header.data_size += rec->bytes_written; 1249 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1250 1251 if (!rec->no_buildid) { 1252 process_buildids(rec); 1253 1254 if (rec->buildid_all) 1255 dsos__hit_all(rec->session); 1256 } 1257 perf_session__write_header(rec->session, rec->evlist, fd, true); 1258 1259 return; 1260 } 1261 1262 static int record__synthesize_workload(struct record *rec, bool tail) 1263 { 1264 int err; 1265 struct perf_thread_map *thread_map; 1266 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1267 1268 if (rec->opts.tail_synthesize != tail) 1269 return 0; 1270 1271 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1272 if (thread_map == NULL) 1273 return -1; 1274 1275 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1276 process_synthesized_event, 1277 &rec->session->machines.host, 1278 needs_mmap, 1279 rec->opts.sample_address); 1280 perf_thread_map__put(thread_map); 1281 return err; 1282 } 1283 1284 static int record__synthesize(struct record *rec, bool tail); 1285 1286 static int 1287 record__switch_output(struct record *rec, bool at_exit) 1288 { 1289 struct perf_data *data = &rec->data; 1290 int fd, err; 1291 char *new_filename; 1292 1293 /* Same Size: "2015122520103046"*/ 1294 char timestamp[] = "InvalidTimestamp"; 1295 1296 record__aio_mmap_read_sync(rec); 1297 1298 record__synthesize(rec, true); 1299 if (target__none(&rec->opts.target)) 1300 record__synthesize_workload(rec, true); 1301 1302 rec->samples = 0; 1303 record__finish_output(rec); 1304 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1305 if (err) { 1306 pr_err("Failed to get current timestamp\n"); 1307 return -EINVAL; 1308 } 1309 1310 fd = perf_data__switch(data, timestamp, 1311 rec->session->header.data_offset, 1312 at_exit, &new_filename); 1313 if (fd >= 0 && !at_exit) { 1314 rec->bytes_written = 0; 1315 rec->session->header.data_size = 0; 1316 } 1317 1318 if (!quiet) 1319 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1320 data->path, timestamp); 1321 1322 if (rec->switch_output.num_files) { 1323 int n = rec->switch_output.cur_file + 1; 1324 1325 if (n >= rec->switch_output.num_files) 1326 n = 0; 1327 rec->switch_output.cur_file = n; 1328 if (rec->switch_output.filenames[n]) { 1329 remove(rec->switch_output.filenames[n]); 1330 zfree(&rec->switch_output.filenames[n]); 1331 } 1332 rec->switch_output.filenames[n] = new_filename; 1333 } else { 1334 free(new_filename); 1335 } 1336 1337 /* Output tracking events */ 1338 if (!at_exit) { 1339 record__synthesize(rec, false); 1340 1341 /* 1342 * In 'perf record --switch-output' without -a, 1343 * record__synthesize() in record__switch_output() won't 1344 * generate tracking events because there's no thread_map 1345 * in evlist. Which causes newly created perf.data doesn't 1346 * contain map and comm information. 1347 * Create a fake thread_map and directly call 1348 * perf_event__synthesize_thread_map() for those events. 1349 */ 1350 if (target__none(&rec->opts.target)) 1351 record__synthesize_workload(rec, false); 1352 } 1353 return fd; 1354 } 1355 1356 static volatile int workload_exec_errno; 1357 1358 /* 1359 * evlist__prepare_workload will send a SIGUSR1 1360 * if the fork fails, since we asked by setting its 1361 * want_signal to true. 1362 */ 1363 static void workload_exec_failed_signal(int signo __maybe_unused, 1364 siginfo_t *info, 1365 void *ucontext __maybe_unused) 1366 { 1367 workload_exec_errno = info->si_value.sival_int; 1368 done = 1; 1369 child_finished = 1; 1370 } 1371 1372 static void snapshot_sig_handler(int sig); 1373 static void alarm_sig_handler(int sig); 1374 1375 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1376 { 1377 if (evlist) { 1378 if (evlist->mmap && evlist->mmap[0].core.base) 1379 return evlist->mmap[0].core.base; 1380 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1381 return evlist->overwrite_mmap[0].core.base; 1382 } 1383 return NULL; 1384 } 1385 1386 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1387 { 1388 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1389 if (pc) 1390 return pc; 1391 return NULL; 1392 } 1393 1394 static int record__synthesize(struct record *rec, bool tail) 1395 { 1396 struct perf_session *session = rec->session; 1397 struct machine *machine = &session->machines.host; 1398 struct perf_data *data = &rec->data; 1399 struct record_opts *opts = &rec->opts; 1400 struct perf_tool *tool = &rec->tool; 1401 int err = 0; 1402 event_op f = process_synthesized_event; 1403 1404 if (rec->opts.tail_synthesize != tail) 1405 return 0; 1406 1407 if (data->is_pipe) { 1408 err = perf_event__synthesize_for_pipe(tool, session, data, 1409 process_synthesized_event); 1410 if (err < 0) 1411 goto out; 1412 1413 rec->bytes_written += err; 1414 } 1415 1416 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1417 process_synthesized_event, machine); 1418 if (err) 1419 goto out; 1420 1421 /* Synthesize id_index before auxtrace_info */ 1422 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { 1423 err = perf_event__synthesize_id_index(tool, 1424 process_synthesized_event, 1425 session->evlist, machine); 1426 if (err) 1427 goto out; 1428 } 1429 1430 if (rec->opts.full_auxtrace) { 1431 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1432 session, process_synthesized_event); 1433 if (err) 1434 goto out; 1435 } 1436 1437 if (!evlist__exclude_kernel(rec->evlist)) { 1438 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1439 machine); 1440 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1441 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1442 "Check /proc/kallsyms permission or run as root.\n"); 1443 1444 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1445 machine); 1446 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1447 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1448 "Check /proc/modules permission or run as root.\n"); 1449 } 1450 1451 if (perf_guest) { 1452 machines__process_guests(&session->machines, 1453 perf_event__synthesize_guest_os, tool); 1454 } 1455 1456 err = perf_event__synthesize_extra_attr(&rec->tool, 1457 rec->evlist, 1458 process_synthesized_event, 1459 data->is_pipe); 1460 if (err) 1461 goto out; 1462 1463 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1464 process_synthesized_event, 1465 NULL); 1466 if (err < 0) { 1467 pr_err("Couldn't synthesize thread map.\n"); 1468 return err; 1469 } 1470 1471 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus, 1472 process_synthesized_event, NULL); 1473 if (err < 0) { 1474 pr_err("Couldn't synthesize cpu map.\n"); 1475 return err; 1476 } 1477 1478 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1479 machine, opts); 1480 if (err < 0) 1481 pr_warning("Couldn't synthesize bpf events.\n"); 1482 1483 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1484 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1485 machine); 1486 if (err < 0) 1487 pr_warning("Couldn't synthesize cgroup events.\n"); 1488 } 1489 1490 if (rec->opts.nr_threads_synthesize > 1) { 1491 perf_set_multithreaded(); 1492 f = process_locked_synthesized_event; 1493 } 1494 1495 if (rec->opts.synth & PERF_SYNTH_TASK) { 1496 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1497 1498 err = __machine__synthesize_threads(machine, tool, &opts->target, 1499 rec->evlist->core.threads, 1500 f, needs_mmap, opts->sample_address, 1501 rec->opts.nr_threads_synthesize); 1502 } 1503 1504 if (rec->opts.nr_threads_synthesize > 1) 1505 perf_set_singlethreaded(); 1506 1507 out: 1508 return err; 1509 } 1510 1511 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 1512 { 1513 struct record *rec = data; 1514 pthread_kill(rec->thread_id, SIGUSR2); 1515 return 0; 1516 } 1517 1518 static int record__setup_sb_evlist(struct record *rec) 1519 { 1520 struct record_opts *opts = &rec->opts; 1521 1522 if (rec->sb_evlist != NULL) { 1523 /* 1524 * We get here if --switch-output-event populated the 1525 * sb_evlist, so associate a callback that will send a SIGUSR2 1526 * to the main thread. 1527 */ 1528 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 1529 rec->thread_id = pthread_self(); 1530 } 1531 #ifdef HAVE_LIBBPF_SUPPORT 1532 if (!opts->no_bpf_event) { 1533 if (rec->sb_evlist == NULL) { 1534 rec->sb_evlist = evlist__new(); 1535 1536 if (rec->sb_evlist == NULL) { 1537 pr_err("Couldn't create side band evlist.\n."); 1538 return -1; 1539 } 1540 } 1541 1542 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 1543 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 1544 return -1; 1545 } 1546 } 1547 #endif 1548 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 1549 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1550 opts->no_bpf_event = true; 1551 } 1552 1553 return 0; 1554 } 1555 1556 static int record__init_clock(struct record *rec) 1557 { 1558 struct perf_session *session = rec->session; 1559 struct timespec ref_clockid; 1560 struct timeval ref_tod; 1561 u64 ref; 1562 1563 if (!rec->opts.use_clockid) 1564 return 0; 1565 1566 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1567 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 1568 1569 session->header.env.clock.clockid = rec->opts.clockid; 1570 1571 if (gettimeofday(&ref_tod, NULL) != 0) { 1572 pr_err("gettimeofday failed, cannot set reference time.\n"); 1573 return -1; 1574 } 1575 1576 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 1577 pr_err("clock_gettime failed, cannot set reference time.\n"); 1578 return -1; 1579 } 1580 1581 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 1582 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 1583 1584 session->header.env.clock.tod_ns = ref; 1585 1586 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 1587 (u64) ref_clockid.tv_nsec; 1588 1589 session->header.env.clock.clockid_ns = ref; 1590 return 0; 1591 } 1592 1593 static void hit_auxtrace_snapshot_trigger(struct record *rec) 1594 { 1595 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 1596 trigger_hit(&auxtrace_snapshot_trigger); 1597 auxtrace_record__snapshot_started = 1; 1598 if (auxtrace_record__snapshot_start(rec->itr)) 1599 trigger_error(&auxtrace_snapshot_trigger); 1600 } 1601 } 1602 1603 static void record__uniquify_name(struct record *rec) 1604 { 1605 struct evsel *pos; 1606 struct evlist *evlist = rec->evlist; 1607 char *new_name; 1608 int ret; 1609 1610 if (!perf_pmu__has_hybrid()) 1611 return; 1612 1613 evlist__for_each_entry(evlist, pos) { 1614 if (!evsel__is_hybrid(pos)) 1615 continue; 1616 1617 if (strchr(pos->name, '/')) 1618 continue; 1619 1620 ret = asprintf(&new_name, "%s/%s/", 1621 pos->pmu_name, pos->name); 1622 if (ret) { 1623 free(pos->name); 1624 pos->name = new_name; 1625 } 1626 } 1627 } 1628 1629 static int __cmd_record(struct record *rec, int argc, const char **argv) 1630 { 1631 int err; 1632 int status = 0; 1633 unsigned long waking = 0; 1634 const bool forks = argc > 0; 1635 struct perf_tool *tool = &rec->tool; 1636 struct record_opts *opts = &rec->opts; 1637 struct perf_data *data = &rec->data; 1638 struct perf_session *session; 1639 bool disabled = false, draining = false; 1640 int fd; 1641 float ratio = 0; 1642 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 1643 1644 atexit(record__sig_exit); 1645 signal(SIGCHLD, sig_handler); 1646 signal(SIGINT, sig_handler); 1647 signal(SIGTERM, sig_handler); 1648 signal(SIGSEGV, sigsegv_handler); 1649 1650 if (rec->opts.record_namespaces) 1651 tool->namespace_events = true; 1652 1653 if (rec->opts.record_cgroup) { 1654 #ifdef HAVE_FILE_HANDLE 1655 tool->cgroup_events = true; 1656 #else 1657 pr_err("cgroup tracking is not supported\n"); 1658 return -1; 1659 #endif 1660 } 1661 1662 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1663 signal(SIGUSR2, snapshot_sig_handler); 1664 if (rec->opts.auxtrace_snapshot_mode) 1665 trigger_on(&auxtrace_snapshot_trigger); 1666 if (rec->switch_output.enabled) 1667 trigger_on(&switch_output_trigger); 1668 } else { 1669 signal(SIGUSR2, SIG_IGN); 1670 } 1671 1672 session = perf_session__new(data, tool); 1673 if (IS_ERR(session)) { 1674 pr_err("Perf session creation failed.\n"); 1675 return PTR_ERR(session); 1676 } 1677 1678 fd = perf_data__fd(data); 1679 rec->session = session; 1680 1681 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 1682 pr_err("Compression initialization failed.\n"); 1683 return -1; 1684 } 1685 #ifdef HAVE_EVENTFD_SUPPORT 1686 done_fd = eventfd(0, EFD_NONBLOCK); 1687 if (done_fd < 0) { 1688 pr_err("Failed to create wakeup eventfd, error: %m\n"); 1689 status = -1; 1690 goto out_delete_session; 1691 } 1692 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 1693 if (err < 0) { 1694 pr_err("Failed to add wakeup eventfd to poll list\n"); 1695 status = err; 1696 goto out_delete_session; 1697 } 1698 #endif // HAVE_EVENTFD_SUPPORT 1699 1700 session->header.env.comp_type = PERF_COMP_ZSTD; 1701 session->header.env.comp_level = rec->opts.comp_level; 1702 1703 if (rec->opts.kcore && 1704 !record__kcore_readable(&session->machines.host)) { 1705 pr_err("ERROR: kcore is not readable.\n"); 1706 return -1; 1707 } 1708 1709 if (record__init_clock(rec)) 1710 return -1; 1711 1712 record__init_features(rec); 1713 1714 if (forks) { 1715 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 1716 workload_exec_failed_signal); 1717 if (err < 0) { 1718 pr_err("Couldn't run the workload!\n"); 1719 status = err; 1720 goto out_delete_session; 1721 } 1722 } 1723 1724 /* 1725 * If we have just single event and are sending data 1726 * through pipe, we need to force the ids allocation, 1727 * because we synthesize event name through the pipe 1728 * and need the id for that. 1729 */ 1730 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 1731 rec->opts.sample_id = true; 1732 1733 record__uniquify_name(rec); 1734 1735 if (record__open(rec) != 0) { 1736 err = -1; 1737 goto out_child; 1738 } 1739 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 1740 1741 if (rec->opts.kcore) { 1742 err = record__kcore_copy(&session->machines.host, data); 1743 if (err) { 1744 pr_err("ERROR: Failed to copy kcore\n"); 1745 goto out_child; 1746 } 1747 } 1748 1749 err = bpf__apply_obj_config(); 1750 if (err) { 1751 char errbuf[BUFSIZ]; 1752 1753 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1754 pr_err("ERROR: Apply config to BPF failed: %s\n", 1755 errbuf); 1756 goto out_child; 1757 } 1758 1759 /* 1760 * Normally perf_session__new would do this, but it doesn't have the 1761 * evlist. 1762 */ 1763 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 1764 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1765 rec->tool.ordered_events = false; 1766 } 1767 1768 if (!rec->evlist->core.nr_groups) 1769 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1770 1771 if (data->is_pipe) { 1772 err = perf_header__write_pipe(fd); 1773 if (err < 0) 1774 goto out_child; 1775 } else { 1776 err = perf_session__write_header(session, rec->evlist, fd, false); 1777 if (err < 0) 1778 goto out_child; 1779 } 1780 1781 err = -1; 1782 if (!rec->no_buildid 1783 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1784 pr_err("Couldn't generate buildids. " 1785 "Use --no-buildid to profile anyway.\n"); 1786 goto out_child; 1787 } 1788 1789 err = record__setup_sb_evlist(rec); 1790 if (err) 1791 goto out_child; 1792 1793 err = record__synthesize(rec, false); 1794 if (err < 0) 1795 goto out_child; 1796 1797 if (rec->realtime_prio) { 1798 struct sched_param param; 1799 1800 param.sched_priority = rec->realtime_prio; 1801 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1802 pr_err("Could not set realtime priority.\n"); 1803 err = -1; 1804 goto out_child; 1805 } 1806 } 1807 1808 /* 1809 * When perf is starting the traced process, all the events 1810 * (apart from group members) have enable_on_exec=1 set, 1811 * so don't spoil it by prematurely enabling them. 1812 */ 1813 if (!target__none(&opts->target) && !opts->initial_delay) 1814 evlist__enable(rec->evlist); 1815 1816 /* 1817 * Let the child rip 1818 */ 1819 if (forks) { 1820 struct machine *machine = &session->machines.host; 1821 union perf_event *event; 1822 pid_t tgid; 1823 1824 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1825 if (event == NULL) { 1826 err = -ENOMEM; 1827 goto out_child; 1828 } 1829 1830 /* 1831 * Some H/W events are generated before COMM event 1832 * which is emitted during exec(), so perf script 1833 * cannot see a correct process name for those events. 1834 * Synthesize COMM event to prevent it. 1835 */ 1836 tgid = perf_event__synthesize_comm(tool, event, 1837 rec->evlist->workload.pid, 1838 process_synthesized_event, 1839 machine); 1840 free(event); 1841 1842 if (tgid == -1) 1843 goto out_child; 1844 1845 event = malloc(sizeof(event->namespaces) + 1846 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1847 machine->id_hdr_size); 1848 if (event == NULL) { 1849 err = -ENOMEM; 1850 goto out_child; 1851 } 1852 1853 /* 1854 * Synthesize NAMESPACES event for the command specified. 1855 */ 1856 perf_event__synthesize_namespaces(tool, event, 1857 rec->evlist->workload.pid, 1858 tgid, process_synthesized_event, 1859 machine); 1860 free(event); 1861 1862 evlist__start_workload(rec->evlist); 1863 } 1864 1865 if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1866 goto out_child; 1867 1868 if (opts->initial_delay) { 1869 pr_info(EVLIST_DISABLED_MSG); 1870 if (opts->initial_delay > 0) { 1871 usleep(opts->initial_delay * USEC_PER_MSEC); 1872 evlist__enable(rec->evlist); 1873 pr_info(EVLIST_ENABLED_MSG); 1874 } 1875 } 1876 1877 trigger_ready(&auxtrace_snapshot_trigger); 1878 trigger_ready(&switch_output_trigger); 1879 perf_hooks__invoke_record_start(); 1880 for (;;) { 1881 unsigned long long hits = rec->samples; 1882 1883 /* 1884 * rec->evlist->bkw_mmap_state is possible to be 1885 * BKW_MMAP_EMPTY here: when done == true and 1886 * hits != rec->samples in previous round. 1887 * 1888 * evlist__toggle_bkw_mmap ensure we never 1889 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1890 */ 1891 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1892 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1893 1894 if (record__mmap_read_all(rec, false) < 0) { 1895 trigger_error(&auxtrace_snapshot_trigger); 1896 trigger_error(&switch_output_trigger); 1897 err = -1; 1898 goto out_child; 1899 } 1900 1901 if (auxtrace_record__snapshot_started) { 1902 auxtrace_record__snapshot_started = 0; 1903 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1904 record__read_auxtrace_snapshot(rec, false); 1905 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1906 pr_err("AUX area tracing snapshot failed\n"); 1907 err = -1; 1908 goto out_child; 1909 } 1910 } 1911 1912 if (trigger_is_hit(&switch_output_trigger)) { 1913 /* 1914 * If switch_output_trigger is hit, the data in 1915 * overwritable ring buffer should have been collected, 1916 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1917 * 1918 * If SIGUSR2 raise after or during record__mmap_read_all(), 1919 * record__mmap_read_all() didn't collect data from 1920 * overwritable ring buffer. Read again. 1921 */ 1922 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1923 continue; 1924 trigger_ready(&switch_output_trigger); 1925 1926 /* 1927 * Reenable events in overwrite ring buffer after 1928 * record__mmap_read_all(): we should have collected 1929 * data from it. 1930 */ 1931 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1932 1933 if (!quiet) 1934 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1935 waking); 1936 waking = 0; 1937 fd = record__switch_output(rec, false); 1938 if (fd < 0) { 1939 pr_err("Failed to switch to new file\n"); 1940 trigger_error(&switch_output_trigger); 1941 err = fd; 1942 goto out_child; 1943 } 1944 1945 /* re-arm the alarm */ 1946 if (rec->switch_output.time) 1947 alarm(rec->switch_output.time); 1948 } 1949 1950 if (hits == rec->samples) { 1951 if (done || draining) 1952 break; 1953 err = evlist__poll(rec->evlist, -1); 1954 /* 1955 * Propagate error, only if there's any. Ignore positive 1956 * number of returned events and interrupt error. 1957 */ 1958 if (err > 0 || (err < 0 && errno == EINTR)) 1959 err = 0; 1960 waking++; 1961 1962 if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1963 draining = true; 1964 } 1965 1966 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 1967 switch (cmd) { 1968 case EVLIST_CTL_CMD_SNAPSHOT: 1969 hit_auxtrace_snapshot_trigger(rec); 1970 evlist__ctlfd_ack(rec->evlist); 1971 break; 1972 case EVLIST_CTL_CMD_STOP: 1973 done = 1; 1974 break; 1975 case EVLIST_CTL_CMD_ACK: 1976 case EVLIST_CTL_CMD_UNSUPPORTED: 1977 case EVLIST_CTL_CMD_ENABLE: 1978 case EVLIST_CTL_CMD_DISABLE: 1979 case EVLIST_CTL_CMD_EVLIST: 1980 case EVLIST_CTL_CMD_PING: 1981 default: 1982 break; 1983 } 1984 } 1985 1986 /* 1987 * When perf is starting the traced process, at the end events 1988 * die with the process and we wait for that. Thus no need to 1989 * disable events in this case. 1990 */ 1991 if (done && !disabled && !target__none(&opts->target)) { 1992 trigger_off(&auxtrace_snapshot_trigger); 1993 evlist__disable(rec->evlist); 1994 disabled = true; 1995 } 1996 } 1997 1998 trigger_off(&auxtrace_snapshot_trigger); 1999 trigger_off(&switch_output_trigger); 2000 2001 if (opts->auxtrace_snapshot_on_exit) 2002 record__auxtrace_snapshot_exit(rec); 2003 2004 if (forks && workload_exec_errno) { 2005 char msg[STRERR_BUFSIZE], strevsels[2048]; 2006 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2007 2008 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2009 2010 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2011 strevsels, argv[0], emsg); 2012 err = -1; 2013 goto out_child; 2014 } 2015 2016 if (!quiet) 2017 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 2018 2019 if (target__none(&rec->opts.target)) 2020 record__synthesize_workload(rec, true); 2021 2022 out_child: 2023 evlist__finalize_ctlfd(rec->evlist); 2024 record__mmap_read_all(rec, true); 2025 record__aio_mmap_read_sync(rec); 2026 2027 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2028 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2029 session->header.env.comp_ratio = ratio + 0.5; 2030 } 2031 2032 if (forks) { 2033 int exit_status; 2034 2035 if (!child_finished) 2036 kill(rec->evlist->workload.pid, SIGTERM); 2037 2038 wait(&exit_status); 2039 2040 if (err < 0) 2041 status = err; 2042 else if (WIFEXITED(exit_status)) 2043 status = WEXITSTATUS(exit_status); 2044 else if (WIFSIGNALED(exit_status)) 2045 signr = WTERMSIG(exit_status); 2046 } else 2047 status = err; 2048 2049 record__synthesize(rec, true); 2050 /* this will be recalculated during process_buildids() */ 2051 rec->samples = 0; 2052 2053 if (!err) { 2054 if (!rec->timestamp_filename) { 2055 record__finish_output(rec); 2056 } else { 2057 fd = record__switch_output(rec, true); 2058 if (fd < 0) { 2059 status = fd; 2060 goto out_delete_session; 2061 } 2062 } 2063 } 2064 2065 perf_hooks__invoke_record_end(); 2066 2067 if (!err && !quiet) { 2068 char samples[128]; 2069 const char *postfix = rec->timestamp_filename ? 2070 ".<timestamp>" : ""; 2071 2072 if (rec->samples && !rec->opts.full_auxtrace) 2073 scnprintf(samples, sizeof(samples), 2074 " (%" PRIu64 " samples)", rec->samples); 2075 else 2076 samples[0] = '\0'; 2077 2078 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2079 perf_data__size(data) / 1024.0 / 1024.0, 2080 data->path, postfix, samples); 2081 if (ratio) { 2082 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2083 rec->session->bytes_transferred / 1024.0 / 1024.0, 2084 ratio); 2085 } 2086 fprintf(stderr, " ]\n"); 2087 } 2088 2089 out_delete_session: 2090 #ifdef HAVE_EVENTFD_SUPPORT 2091 if (done_fd >= 0) 2092 close(done_fd); 2093 #endif 2094 zstd_fini(&session->zstd_data); 2095 perf_session__delete(session); 2096 2097 if (!opts->no_bpf_event) 2098 evlist__stop_sb_thread(rec->sb_evlist); 2099 return status; 2100 } 2101 2102 static void callchain_debug(struct callchain_param *callchain) 2103 { 2104 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2105 2106 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2107 2108 if (callchain->record_mode == CALLCHAIN_DWARF) 2109 pr_debug("callchain: stack dump size %d\n", 2110 callchain->dump_size); 2111 } 2112 2113 int record_opts__parse_callchain(struct record_opts *record, 2114 struct callchain_param *callchain, 2115 const char *arg, bool unset) 2116 { 2117 int ret; 2118 callchain->enabled = !unset; 2119 2120 /* --no-call-graph */ 2121 if (unset) { 2122 callchain->record_mode = CALLCHAIN_NONE; 2123 pr_debug("callchain: disabled\n"); 2124 return 0; 2125 } 2126 2127 ret = parse_callchain_record_opt(arg, callchain); 2128 if (!ret) { 2129 /* Enable data address sampling for DWARF unwind. */ 2130 if (callchain->record_mode == CALLCHAIN_DWARF) 2131 record->sample_address = true; 2132 callchain_debug(callchain); 2133 } 2134 2135 return ret; 2136 } 2137 2138 int record_parse_callchain_opt(const struct option *opt, 2139 const char *arg, 2140 int unset) 2141 { 2142 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2143 } 2144 2145 int record_callchain_opt(const struct option *opt, 2146 const char *arg __maybe_unused, 2147 int unset __maybe_unused) 2148 { 2149 struct callchain_param *callchain = opt->value; 2150 2151 callchain->enabled = true; 2152 2153 if (callchain->record_mode == CALLCHAIN_NONE) 2154 callchain->record_mode = CALLCHAIN_FP; 2155 2156 callchain_debug(callchain); 2157 return 0; 2158 } 2159 2160 static int perf_record_config(const char *var, const char *value, void *cb) 2161 { 2162 struct record *rec = cb; 2163 2164 if (!strcmp(var, "record.build-id")) { 2165 if (!strcmp(value, "cache")) 2166 rec->no_buildid_cache = false; 2167 else if (!strcmp(value, "no-cache")) 2168 rec->no_buildid_cache = true; 2169 else if (!strcmp(value, "skip")) 2170 rec->no_buildid = true; 2171 else if (!strcmp(value, "mmap")) 2172 rec->buildid_mmap = true; 2173 else 2174 return -1; 2175 return 0; 2176 } 2177 if (!strcmp(var, "record.call-graph")) { 2178 var = "call-graph.record-mode"; 2179 return perf_default_config(var, value, cb); 2180 } 2181 #ifdef HAVE_AIO_SUPPORT 2182 if (!strcmp(var, "record.aio")) { 2183 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2184 if (!rec->opts.nr_cblocks) 2185 rec->opts.nr_cblocks = nr_cblocks_default; 2186 } 2187 #endif 2188 if (!strcmp(var, "record.debuginfod")) { 2189 rec->debuginfod.urls = strdup(value); 2190 if (!rec->debuginfod.urls) 2191 return -ENOMEM; 2192 rec->debuginfod.set = true; 2193 } 2194 2195 return 0; 2196 } 2197 2198 2199 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2200 { 2201 struct record_opts *opts = (struct record_opts *)opt->value; 2202 2203 if (unset || !str) 2204 return 0; 2205 2206 if (!strcasecmp(str, "node")) 2207 opts->affinity = PERF_AFFINITY_NODE; 2208 else if (!strcasecmp(str, "cpu")) 2209 opts->affinity = PERF_AFFINITY_CPU; 2210 2211 return 0; 2212 } 2213 2214 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2215 { 2216 mask->nbits = nr_bits; 2217 mask->bits = bitmap_zalloc(mask->nbits); 2218 if (!mask->bits) 2219 return -ENOMEM; 2220 2221 return 0; 2222 } 2223 2224 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2225 { 2226 bitmap_free(mask->bits); 2227 mask->nbits = 0; 2228 } 2229 2230 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2231 { 2232 int ret; 2233 2234 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2235 if (ret) { 2236 mask->affinity.bits = NULL; 2237 return ret; 2238 } 2239 2240 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2241 if (ret) { 2242 record__mmap_cpu_mask_free(&mask->maps); 2243 mask->maps.bits = NULL; 2244 } 2245 2246 return ret; 2247 } 2248 2249 static void record__thread_mask_free(struct thread_mask *mask) 2250 { 2251 record__mmap_cpu_mask_free(&mask->maps); 2252 record__mmap_cpu_mask_free(&mask->affinity); 2253 } 2254 2255 static int parse_output_max_size(const struct option *opt, 2256 const char *str, int unset) 2257 { 2258 unsigned long *s = (unsigned long *)opt->value; 2259 static struct parse_tag tags_size[] = { 2260 { .tag = 'B', .mult = 1 }, 2261 { .tag = 'K', .mult = 1 << 10 }, 2262 { .tag = 'M', .mult = 1 << 20 }, 2263 { .tag = 'G', .mult = 1 << 30 }, 2264 { .tag = 0 }, 2265 }; 2266 unsigned long val; 2267 2268 if (unset) { 2269 *s = 0; 2270 return 0; 2271 } 2272 2273 val = parse_tag_value(str, tags_size); 2274 if (val != (unsigned long) -1) { 2275 *s = val; 2276 return 0; 2277 } 2278 2279 return -1; 2280 } 2281 2282 static int record__parse_mmap_pages(const struct option *opt, 2283 const char *str, 2284 int unset __maybe_unused) 2285 { 2286 struct record_opts *opts = opt->value; 2287 char *s, *p; 2288 unsigned int mmap_pages; 2289 int ret; 2290 2291 if (!str) 2292 return -EINVAL; 2293 2294 s = strdup(str); 2295 if (!s) 2296 return -ENOMEM; 2297 2298 p = strchr(s, ','); 2299 if (p) 2300 *p = '\0'; 2301 2302 if (*s) { 2303 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 2304 if (ret) 2305 goto out_free; 2306 opts->mmap_pages = mmap_pages; 2307 } 2308 2309 if (!p) { 2310 ret = 0; 2311 goto out_free; 2312 } 2313 2314 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 2315 if (ret) 2316 goto out_free; 2317 2318 opts->auxtrace_mmap_pages = mmap_pages; 2319 2320 out_free: 2321 free(s); 2322 return ret; 2323 } 2324 2325 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 2326 { 2327 } 2328 2329 static int parse_control_option(const struct option *opt, 2330 const char *str, 2331 int unset __maybe_unused) 2332 { 2333 struct record_opts *opts = opt->value; 2334 2335 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 2336 } 2337 2338 static void switch_output_size_warn(struct record *rec) 2339 { 2340 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 2341 struct switch_output *s = &rec->switch_output; 2342 2343 wakeup_size /= 2; 2344 2345 if (s->size < wakeup_size) { 2346 char buf[100]; 2347 2348 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 2349 pr_warning("WARNING: switch-output data size lower than " 2350 "wakeup kernel buffer size (%s) " 2351 "expect bigger perf.data sizes\n", buf); 2352 } 2353 } 2354 2355 static int switch_output_setup(struct record *rec) 2356 { 2357 struct switch_output *s = &rec->switch_output; 2358 static struct parse_tag tags_size[] = { 2359 { .tag = 'B', .mult = 1 }, 2360 { .tag = 'K', .mult = 1 << 10 }, 2361 { .tag = 'M', .mult = 1 << 20 }, 2362 { .tag = 'G', .mult = 1 << 30 }, 2363 { .tag = 0 }, 2364 }; 2365 static struct parse_tag tags_time[] = { 2366 { .tag = 's', .mult = 1 }, 2367 { .tag = 'm', .mult = 60 }, 2368 { .tag = 'h', .mult = 60*60 }, 2369 { .tag = 'd', .mult = 60*60*24 }, 2370 { .tag = 0 }, 2371 }; 2372 unsigned long val; 2373 2374 /* 2375 * If we're using --switch-output-events, then we imply its 2376 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 2377 * thread to its parent. 2378 */ 2379 if (rec->switch_output_event_set) 2380 goto do_signal; 2381 2382 if (!s->set) 2383 return 0; 2384 2385 if (!strcmp(s->str, "signal")) { 2386 do_signal: 2387 s->signal = true; 2388 pr_debug("switch-output with SIGUSR2 signal\n"); 2389 goto enabled; 2390 } 2391 2392 val = parse_tag_value(s->str, tags_size); 2393 if (val != (unsigned long) -1) { 2394 s->size = val; 2395 pr_debug("switch-output with %s size threshold\n", s->str); 2396 goto enabled; 2397 } 2398 2399 val = parse_tag_value(s->str, tags_time); 2400 if (val != (unsigned long) -1) { 2401 s->time = val; 2402 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2403 s->str, s->time); 2404 goto enabled; 2405 } 2406 2407 return -1; 2408 2409 enabled: 2410 rec->timestamp_filename = true; 2411 s->enabled = true; 2412 2413 if (s->size && !rec->opts.no_buffering) 2414 switch_output_size_warn(rec); 2415 2416 return 0; 2417 } 2418 2419 static const char * const __record_usage[] = { 2420 "perf record [<options>] [<command>]", 2421 "perf record [<options>] -- <command> [<options>]", 2422 NULL 2423 }; 2424 const char * const *record_usage = __record_usage; 2425 2426 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 2427 struct perf_sample *sample, struct machine *machine) 2428 { 2429 /* 2430 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 2431 * no need to add them twice. 2432 */ 2433 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 2434 return 0; 2435 return perf_event__process_mmap(tool, event, sample, machine); 2436 } 2437 2438 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 2439 struct perf_sample *sample, struct machine *machine) 2440 { 2441 /* 2442 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 2443 * no need to add them twice. 2444 */ 2445 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 2446 return 0; 2447 2448 return perf_event__process_mmap2(tool, event, sample, machine); 2449 } 2450 2451 static int process_timestamp_boundary(struct perf_tool *tool, 2452 union perf_event *event __maybe_unused, 2453 struct perf_sample *sample, 2454 struct machine *machine __maybe_unused) 2455 { 2456 struct record *rec = container_of(tool, struct record, tool); 2457 2458 set_timestamp_boundary(rec, sample->time); 2459 return 0; 2460 } 2461 2462 static int parse_record_synth_option(const struct option *opt, 2463 const char *str, 2464 int unset __maybe_unused) 2465 { 2466 struct record_opts *opts = opt->value; 2467 char *p = strdup(str); 2468 2469 if (p == NULL) 2470 return -1; 2471 2472 opts->synth = parse_synth_opt(p); 2473 free(p); 2474 2475 if (opts->synth < 0) { 2476 pr_err("Invalid synth option: %s\n", str); 2477 return -1; 2478 } 2479 return 0; 2480 } 2481 2482 /* 2483 * XXX Ideally would be local to cmd_record() and passed to a record__new 2484 * because we need to have access to it in record__exit, that is called 2485 * after cmd_record() exits, but since record_options need to be accessible to 2486 * builtin-script, leave it here. 2487 * 2488 * At least we don't ouch it in all the other functions here directly. 2489 * 2490 * Just say no to tons of global variables, sigh. 2491 */ 2492 static struct record record = { 2493 .opts = { 2494 .sample_time = true, 2495 .mmap_pages = UINT_MAX, 2496 .user_freq = UINT_MAX, 2497 .user_interval = ULLONG_MAX, 2498 .freq = 4000, 2499 .target = { 2500 .uses_mmap = true, 2501 .default_per_cpu = true, 2502 }, 2503 .mmap_flush = MMAP_FLUSH_DEFAULT, 2504 .nr_threads_synthesize = 1, 2505 .ctl_fd = -1, 2506 .ctl_fd_ack = -1, 2507 .synth = PERF_SYNTH_ALL, 2508 }, 2509 .tool = { 2510 .sample = process_sample_event, 2511 .fork = perf_event__process_fork, 2512 .exit = perf_event__process_exit, 2513 .comm = perf_event__process_comm, 2514 .namespaces = perf_event__process_namespaces, 2515 .mmap = build_id__process_mmap, 2516 .mmap2 = build_id__process_mmap2, 2517 .itrace_start = process_timestamp_boundary, 2518 .aux = process_timestamp_boundary, 2519 .ordered_events = true, 2520 }, 2521 }; 2522 2523 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 2524 "\n\t\t\t\tDefault: fp"; 2525 2526 static bool dry_run; 2527 2528 /* 2529 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 2530 * with it and switch to use the library functions in perf_evlist that came 2531 * from builtin-record.c, i.e. use record_opts, 2532 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 2533 * using pipes, etc. 2534 */ 2535 static struct option __record_options[] = { 2536 OPT_CALLBACK('e', "event", &record.evlist, "event", 2537 "event selector. use 'perf list' to list available events", 2538 parse_events_option), 2539 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 2540 "event filter", parse_filter), 2541 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 2542 NULL, "don't record events from perf itself", 2543 exclude_perf), 2544 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 2545 "record events on existing process id"), 2546 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 2547 "record events on existing thread id"), 2548 OPT_INTEGER('r', "realtime", &record.realtime_prio, 2549 "collect data with this RT SCHED_FIFO priority"), 2550 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 2551 "collect data without buffering"), 2552 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 2553 "collect raw sample records from all opened counters"), 2554 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 2555 "system-wide collection from all CPUs"), 2556 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 2557 "list of cpus to monitor"), 2558 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 2559 OPT_STRING('o', "output", &record.data.path, "file", 2560 "output file name"), 2561 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 2562 &record.opts.no_inherit_set, 2563 "child tasks do not inherit counters"), 2564 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 2565 "synthesize non-sample events at the end of output"), 2566 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 2567 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 2568 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 2569 "Fail if the specified frequency can't be used"), 2570 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 2571 "profile at this frequency", 2572 record__parse_freq), 2573 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 2574 "number of mmap data pages and AUX area tracing mmap pages", 2575 record__parse_mmap_pages), 2576 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 2577 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 2578 record__mmap_flush_parse), 2579 OPT_BOOLEAN(0, "group", &record.opts.group, 2580 "put the counters into a counter group"), 2581 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 2582 NULL, "enables call-graph recording" , 2583 &record_callchain_opt), 2584 OPT_CALLBACK(0, "call-graph", &record.opts, 2585 "record_mode[,record_size]", record_callchain_help, 2586 &record_parse_callchain_opt), 2587 OPT_INCR('v', "verbose", &verbose, 2588 "be more verbose (show counter open errors, etc)"), 2589 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 2590 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 2591 "per thread counts"), 2592 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 2593 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 2594 "Record the sample physical addresses"), 2595 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 2596 "Record the sampled data address data page size"), 2597 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 2598 "Record the sampled code address (ip) page size"), 2599 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 2600 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 2601 &record.opts.sample_time_set, 2602 "Record the sample timestamps"), 2603 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 2604 "Record the sample period"), 2605 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 2606 "don't sample"), 2607 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 2608 &record.no_buildid_cache_set, 2609 "do not update the buildid cache"), 2610 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 2611 &record.no_buildid_set, 2612 "do not collect buildids in perf.data"), 2613 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 2614 "monitor event in cgroup name only", 2615 parse_cgroups), 2616 OPT_INTEGER('D', "delay", &record.opts.initial_delay, 2617 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 2618 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 2619 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2620 "user to profile"), 2621 2622 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2623 "branch any", "sample any taken branches", 2624 parse_branch_stack), 2625 2626 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2627 "branch filter mask", "branch stack filter modes", 2628 parse_branch_stack), 2629 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2630 "sample by weight (on special events only)"), 2631 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2632 "sample transaction flags (special events only)"), 2633 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2634 "use per-thread mmaps"), 2635 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2636 "sample selected machine registers on interrupt," 2637 " use '-I?' to list register names", parse_intr_regs), 2638 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2639 "sample selected machine registers on interrupt," 2640 " use '--user-regs=?' to list register names", parse_user_regs), 2641 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2642 "Record running/enabled time of read (:S) events"), 2643 OPT_CALLBACK('k', "clockid", &record.opts, 2644 "clockid", "clockid to use for events, see clock_gettime()", 2645 parse_clockid), 2646 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2647 "opts", "AUX area tracing Snapshot Mode", ""), 2648 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 2649 "opts", "sample AUX area", ""), 2650 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2651 "per thread proc mmap processing timeout in ms"), 2652 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2653 "Record namespaces events"), 2654 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 2655 "Record cgroup events"), 2656 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 2657 &record.opts.record_switch_events_set, 2658 "Record context switch events"), 2659 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2660 "Configure all used events to run in kernel space.", 2661 PARSE_OPT_EXCLUSIVE), 2662 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2663 "Configure all used events to run in user space.", 2664 PARSE_OPT_EXCLUSIVE), 2665 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 2666 "collect kernel callchains"), 2667 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 2668 "collect user callchains"), 2669 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2670 "clang binary to use for compiling BPF scriptlets"), 2671 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2672 "options passed to clang when compiling BPF scriptlets"), 2673 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2674 "file", "vmlinux pathname"), 2675 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2676 "Record build-id of all DSOs regardless of hits"), 2677 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 2678 "Record build-id in map events"), 2679 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2680 "append timestamp to output filename"), 2681 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2682 "Record timestamp boundary (time of first/last samples)"), 2683 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2684 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2685 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2686 "signal"), 2687 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 2688 "switch output event selector. use 'perf list' to list available events", 2689 parse_events_option_new_evlist), 2690 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2691 "Limit number of switch output generated files"), 2692 OPT_BOOLEAN(0, "dry-run", &dry_run, 2693 "Parse options then exit"), 2694 #ifdef HAVE_AIO_SUPPORT 2695 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2696 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2697 record__aio_parse), 2698 #endif 2699 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2700 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2701 record__parse_affinity), 2702 #ifdef HAVE_ZSTD_SUPPORT 2703 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, 2704 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 2705 record__parse_comp_level), 2706 #endif 2707 OPT_CALLBACK(0, "max-size", &record.output_max_size, 2708 "size", "Limit the maximum size of the output file", parse_output_max_size), 2709 OPT_UINTEGER(0, "num-thread-synthesize", 2710 &record.opts.nr_threads_synthesize, 2711 "number of threads to run for event synthesis"), 2712 #ifdef HAVE_LIBPFM 2713 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 2714 "libpfm4 event selector. use 'perf list' to list available events", 2715 parse_libpfm_events_option), 2716 #endif 2717 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 2718 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 2719 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 2720 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 2721 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 2722 parse_control_option), 2723 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 2724 "Fine-tune event synthesis: default=all", parse_record_synth_option), 2725 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 2726 &record.debuginfod.set, "debuginfod urls", 2727 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 2728 "system"), 2729 OPT_END() 2730 }; 2731 2732 struct option *record_options = __record_options; 2733 2734 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 2735 { 2736 int c; 2737 2738 for (c = 0; c < cpus->nr; c++) 2739 set_bit(cpus->map[c].cpu, mask->bits); 2740 } 2741 2742 static void record__free_thread_masks(struct record *rec, int nr_threads) 2743 { 2744 int t; 2745 2746 if (rec->thread_masks) 2747 for (t = 0; t < nr_threads; t++) 2748 record__thread_mask_free(&rec->thread_masks[t]); 2749 2750 zfree(&rec->thread_masks); 2751 } 2752 2753 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 2754 { 2755 int t, ret; 2756 2757 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 2758 if (!rec->thread_masks) { 2759 pr_err("Failed to allocate thread masks\n"); 2760 return -ENOMEM; 2761 } 2762 2763 for (t = 0; t < nr_threads; t++) { 2764 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 2765 if (ret) { 2766 pr_err("Failed to allocate thread masks[%d]\n", t); 2767 goto out_free; 2768 } 2769 } 2770 2771 return 0; 2772 2773 out_free: 2774 record__free_thread_masks(rec, nr_threads); 2775 2776 return ret; 2777 } 2778 2779 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 2780 { 2781 int ret; 2782 2783 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 2784 if (ret) 2785 return ret; 2786 2787 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus); 2788 2789 rec->nr_threads = 1; 2790 2791 return 0; 2792 } 2793 2794 static int record__init_thread_masks(struct record *rec) 2795 { 2796 struct perf_cpu_map *cpus = rec->evlist->core.cpus; 2797 2798 return record__init_thread_default_masks(rec, cpus); 2799 } 2800 2801 int cmd_record(int argc, const char **argv) 2802 { 2803 int err; 2804 struct record *rec = &record; 2805 char errbuf[BUFSIZ]; 2806 2807 setlocale(LC_ALL, ""); 2808 2809 #ifndef HAVE_LIBBPF_SUPPORT 2810 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2811 set_nobuild('\0', "clang-path", true); 2812 set_nobuild('\0', "clang-opt", true); 2813 # undef set_nobuild 2814 #endif 2815 2816 #ifndef HAVE_BPF_PROLOGUE 2817 # if !defined (HAVE_DWARF_SUPPORT) 2818 # define REASON "NO_DWARF=1" 2819 # elif !defined (HAVE_LIBBPF_SUPPORT) 2820 # define REASON "NO_LIBBPF=1" 2821 # else 2822 # define REASON "this architecture doesn't support BPF prologue" 2823 # endif 2824 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2825 set_nobuild('\0', "vmlinux", true); 2826 # undef set_nobuild 2827 # undef REASON 2828 #endif 2829 2830 rec->opts.affinity = PERF_AFFINITY_SYS; 2831 2832 rec->evlist = evlist__new(); 2833 if (rec->evlist == NULL) 2834 return -ENOMEM; 2835 2836 err = perf_config(perf_record_config, rec); 2837 if (err) 2838 return err; 2839 2840 argc = parse_options(argc, argv, record_options, record_usage, 2841 PARSE_OPT_STOP_AT_NON_OPTION); 2842 if (quiet) 2843 perf_quiet_option(); 2844 2845 err = symbol__validate_sym_arguments(); 2846 if (err) 2847 return err; 2848 2849 perf_debuginfod_setup(&record.debuginfod); 2850 2851 /* Make system wide (-a) the default target. */ 2852 if (!argc && target__none(&rec->opts.target)) 2853 rec->opts.target.system_wide = true; 2854 2855 if (nr_cgroups && !rec->opts.target.system_wide) { 2856 usage_with_options_msg(record_usage, record_options, 2857 "cgroup monitoring only available in system-wide mode"); 2858 2859 } 2860 2861 if (rec->buildid_mmap) { 2862 if (!perf_can_record_build_id()) { 2863 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 2864 err = -EINVAL; 2865 goto out_opts; 2866 } 2867 pr_debug("Enabling build id in mmap2 events.\n"); 2868 /* Enable mmap build id synthesizing. */ 2869 symbol_conf.buildid_mmap2 = true; 2870 /* Enable perf_event_attr::build_id bit. */ 2871 rec->opts.build_id = true; 2872 /* Disable build id cache. */ 2873 rec->no_buildid = true; 2874 } 2875 2876 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 2877 pr_err("Kernel has no cgroup sampling support.\n"); 2878 err = -EINVAL; 2879 goto out_opts; 2880 } 2881 2882 if (rec->opts.kcore) 2883 rec->data.is_dir = true; 2884 2885 if (rec->opts.comp_level != 0) { 2886 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 2887 rec->no_buildid = true; 2888 } 2889 2890 if (rec->opts.record_switch_events && 2891 !perf_can_record_switch_events()) { 2892 ui__error("kernel does not support recording context switch events\n"); 2893 parse_options_usage(record_usage, record_options, "switch-events", 0); 2894 err = -EINVAL; 2895 goto out_opts; 2896 } 2897 2898 if (switch_output_setup(rec)) { 2899 parse_options_usage(record_usage, record_options, "switch-output", 0); 2900 err = -EINVAL; 2901 goto out_opts; 2902 } 2903 2904 if (rec->switch_output.time) { 2905 signal(SIGALRM, alarm_sig_handler); 2906 alarm(rec->switch_output.time); 2907 } 2908 2909 if (rec->switch_output.num_files) { 2910 rec->switch_output.filenames = calloc(sizeof(char *), 2911 rec->switch_output.num_files); 2912 if (!rec->switch_output.filenames) { 2913 err = -EINVAL; 2914 goto out_opts; 2915 } 2916 } 2917 2918 /* 2919 * Allow aliases to facilitate the lookup of symbols for address 2920 * filters. Refer to auxtrace_parse_filters(). 2921 */ 2922 symbol_conf.allow_aliases = true; 2923 2924 symbol__init(NULL); 2925 2926 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 2927 rec->affinity_mask.nbits = cpu__max_cpu().cpu; 2928 rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits); 2929 if (!rec->affinity_mask.bits) { 2930 pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); 2931 err = -ENOMEM; 2932 goto out_opts; 2933 } 2934 pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits); 2935 } 2936 2937 err = record__auxtrace_init(rec); 2938 if (err) 2939 goto out; 2940 2941 if (dry_run) 2942 goto out; 2943 2944 err = bpf__setup_stdout(rec->evlist); 2945 if (err) { 2946 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2947 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2948 errbuf); 2949 goto out; 2950 } 2951 2952 err = -ENOMEM; 2953 2954 if (rec->no_buildid_cache || rec->no_buildid) { 2955 disable_buildid_cache(); 2956 } else if (rec->switch_output.enabled) { 2957 /* 2958 * In 'perf record --switch-output', disable buildid 2959 * generation by default to reduce data file switching 2960 * overhead. Still generate buildid if they are required 2961 * explicitly using 2962 * 2963 * perf record --switch-output --no-no-buildid \ 2964 * --no-no-buildid-cache 2965 * 2966 * Following code equals to: 2967 * 2968 * if ((rec->no_buildid || !rec->no_buildid_set) && 2969 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2970 * disable_buildid_cache(); 2971 */ 2972 bool disable = true; 2973 2974 if (rec->no_buildid_set && !rec->no_buildid) 2975 disable = false; 2976 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2977 disable = false; 2978 if (disable) { 2979 rec->no_buildid = true; 2980 rec->no_buildid_cache = true; 2981 disable_buildid_cache(); 2982 } 2983 } 2984 2985 if (record.opts.overwrite) 2986 record.opts.tail_synthesize = true; 2987 2988 if (rec->evlist->core.nr_entries == 0) { 2989 if (perf_pmu__has_hybrid()) { 2990 err = evlist__add_default_hybrid(rec->evlist, 2991 !record.opts.no_samples); 2992 } else { 2993 err = __evlist__add_default(rec->evlist, 2994 !record.opts.no_samples); 2995 } 2996 2997 if (err < 0) { 2998 pr_err("Not enough memory for event selector list\n"); 2999 goto out; 3000 } 3001 } 3002 3003 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 3004 rec->opts.no_inherit = true; 3005 3006 err = target__validate(&rec->opts.target); 3007 if (err) { 3008 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3009 ui__warning("%s\n", errbuf); 3010 } 3011 3012 err = target__parse_uid(&rec->opts.target); 3013 if (err) { 3014 int saved_errno = errno; 3015 3016 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3017 ui__error("%s", errbuf); 3018 3019 err = -saved_errno; 3020 goto out; 3021 } 3022 3023 /* Enable ignoring missing threads when -u/-p option is defined. */ 3024 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 3025 3026 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 3027 pr_err("failed to use cpu list %s\n", 3028 rec->opts.target.cpu_list); 3029 goto out; 3030 } 3031 3032 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 3033 3034 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 3035 arch__add_leaf_frame_record_opts(&rec->opts); 3036 3037 err = -ENOMEM; 3038 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 3039 usage_with_options(record_usage, record_options); 3040 3041 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 3042 if (err) 3043 goto out; 3044 3045 /* 3046 * We take all buildids when the file contains 3047 * AUX area tracing data because we do not decode the 3048 * trace because it would take too long. 3049 */ 3050 if (rec->opts.full_auxtrace) 3051 rec->buildid_all = true; 3052 3053 if (rec->opts.text_poke) { 3054 err = record__config_text_poke(rec->evlist); 3055 if (err) { 3056 pr_err("record__config_text_poke failed, error %d\n", err); 3057 goto out; 3058 } 3059 } 3060 3061 if (record_opts__config(&rec->opts)) { 3062 err = -EINVAL; 3063 goto out; 3064 } 3065 3066 err = record__init_thread_masks(rec); 3067 if (err) { 3068 pr_err("Failed to initialize parallel data streaming masks\n"); 3069 goto out; 3070 } 3071 3072 if (rec->opts.nr_cblocks > nr_cblocks_max) 3073 rec->opts.nr_cblocks = nr_cblocks_max; 3074 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 3075 3076 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 3077 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 3078 3079 if (rec->opts.comp_level > comp_level_max) 3080 rec->opts.comp_level = comp_level_max; 3081 pr_debug("comp level: %d\n", rec->opts.comp_level); 3082 3083 err = __cmd_record(&record, argc, argv); 3084 out: 3085 bitmap_free(rec->affinity_mask.bits); 3086 evlist__delete(rec->evlist); 3087 symbol__exit(); 3088 auxtrace_record__free(rec->itr); 3089 out_opts: 3090 record__free_thread_masks(rec, rec->nr_threads); 3091 rec->nr_threads = 0; 3092 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 3093 return err; 3094 } 3095 3096 static void snapshot_sig_handler(int sig __maybe_unused) 3097 { 3098 struct record *rec = &record; 3099 3100 hit_auxtrace_snapshot_trigger(rec); 3101 3102 if (switch_output_signal(rec)) 3103 trigger_hit(&switch_output_trigger); 3104 } 3105 3106 static void alarm_sig_handler(int sig __maybe_unused) 3107 { 3108 struct record *rec = &record; 3109 3110 if (switch_output_time(rec)) 3111 trigger_hit(&switch_output_trigger); 3112 } 3113