1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "perf.h" 12 13 #include "util/build-id.h" 14 #include "util/util.h" 15 #include <subcmd/parse-options.h> 16 #include "util/parse-events.h" 17 #include "util/config.h" 18 19 #include "util/callchain.h" 20 #include "util/cgroup.h" 21 #include "util/header.h" 22 #include "util/event.h" 23 #include "util/evlist.h" 24 #include "util/evsel.h" 25 #include "util/debug.h" 26 #include "util/session.h" 27 #include "util/tool.h" 28 #include "util/symbol.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/llvm-utils.h" 38 #include "util/bpf-loader.h" 39 #include "util/trigger.h" 40 #include "util/perf-hooks.h" 41 #include "util/cpu-set-sched.h" 42 #include "util/time-utils.h" 43 #include "util/units.h" 44 #include "util/bpf-event.h" 45 #include "asm/bug.h" 46 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <locale.h> 50 #include <poll.h> 51 #include <unistd.h> 52 #include <sched.h> 53 #include <signal.h> 54 #include <sys/mman.h> 55 #include <sys/wait.h> 56 #include <linux/time64.h> 57 58 struct switch_output { 59 bool enabled; 60 bool signal; 61 unsigned long size; 62 unsigned long time; 63 const char *str; 64 bool set; 65 char **filenames; 66 int num_files; 67 int cur_file; 68 }; 69 70 struct record { 71 struct perf_tool tool; 72 struct record_opts opts; 73 u64 bytes_written; 74 struct perf_data data; 75 struct auxtrace_record *itr; 76 struct perf_evlist *evlist; 77 struct perf_session *session; 78 int realtime_prio; 79 bool no_buildid; 80 bool no_buildid_set; 81 bool no_buildid_cache; 82 bool no_buildid_cache_set; 83 bool buildid_all; 84 bool timestamp_filename; 85 bool timestamp_boundary; 86 struct switch_output switch_output; 87 unsigned long long samples; 88 cpu_set_t affinity_mask; 89 }; 90 91 static volatile int auxtrace_record__snapshot_started; 92 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 93 static DEFINE_TRIGGER(switch_output_trigger); 94 95 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 96 "SYS", "NODE", "CPU" 97 }; 98 99 static bool switch_output_signal(struct record *rec) 100 { 101 return rec->switch_output.signal && 102 trigger_is_ready(&switch_output_trigger); 103 } 104 105 static bool switch_output_size(struct record *rec) 106 { 107 return rec->switch_output.size && 108 trigger_is_ready(&switch_output_trigger) && 109 (rec->bytes_written >= rec->switch_output.size); 110 } 111 112 static bool switch_output_time(struct record *rec) 113 { 114 return rec->switch_output.time && 115 trigger_is_ready(&switch_output_trigger); 116 } 117 118 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 119 void *bf, size_t size) 120 { 121 struct perf_data_file *file = &rec->session->data->file; 122 123 if (perf_data_file__write(file, bf, size) < 0) { 124 pr_err("failed to write perf data, error: %m\n"); 125 return -1; 126 } 127 128 rec->bytes_written += size; 129 130 if (switch_output_size(rec)) 131 trigger_hit(&switch_output_trigger); 132 133 return 0; 134 } 135 136 static int record__aio_enabled(struct record *rec); 137 static int record__comp_enabled(struct record *rec); 138 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 139 void *src, size_t src_size); 140 141 #ifdef HAVE_AIO_SUPPORT 142 static int record__aio_write(struct aiocb *cblock, int trace_fd, 143 void *buf, size_t size, off_t off) 144 { 145 int rc; 146 147 cblock->aio_fildes = trace_fd; 148 cblock->aio_buf = buf; 149 cblock->aio_nbytes = size; 150 cblock->aio_offset = off; 151 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 152 153 do { 154 rc = aio_write(cblock); 155 if (rc == 0) { 156 break; 157 } else if (errno != EAGAIN) { 158 cblock->aio_fildes = -1; 159 pr_err("failed to queue perf data, error: %m\n"); 160 break; 161 } 162 } while (1); 163 164 return rc; 165 } 166 167 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) 168 { 169 void *rem_buf; 170 off_t rem_off; 171 size_t rem_size; 172 int rc, aio_errno; 173 ssize_t aio_ret, written; 174 175 aio_errno = aio_error(cblock); 176 if (aio_errno == EINPROGRESS) 177 return 0; 178 179 written = aio_ret = aio_return(cblock); 180 if (aio_ret < 0) { 181 if (aio_errno != EINTR) 182 pr_err("failed to write perf data, error: %m\n"); 183 written = 0; 184 } 185 186 rem_size = cblock->aio_nbytes - written; 187 188 if (rem_size == 0) { 189 cblock->aio_fildes = -1; 190 /* 191 * md->refcount is incremented in record__aio_pushfn() for 192 * every aio write request started in record__aio_push() so 193 * decrement it because the request is now complete. 194 */ 195 perf_mmap__put(md); 196 rc = 1; 197 } else { 198 /* 199 * aio write request may require restart with the 200 * reminder if the kernel didn't write whole 201 * chunk at once. 202 */ 203 rem_off = cblock->aio_offset + written; 204 rem_buf = (void *)(cblock->aio_buf + written); 205 record__aio_write(cblock, cblock->aio_fildes, 206 rem_buf, rem_size, rem_off); 207 rc = 0; 208 } 209 210 return rc; 211 } 212 213 static int record__aio_sync(struct perf_mmap *md, bool sync_all) 214 { 215 struct aiocb **aiocb = md->aio.aiocb; 216 struct aiocb *cblocks = md->aio.cblocks; 217 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 218 int i, do_suspend; 219 220 do { 221 do_suspend = 0; 222 for (i = 0; i < md->aio.nr_cblocks; ++i) { 223 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 224 if (sync_all) 225 aiocb[i] = NULL; 226 else 227 return i; 228 } else { 229 /* 230 * Started aio write is not complete yet 231 * so it has to be waited before the 232 * next allocation. 233 */ 234 aiocb[i] = &cblocks[i]; 235 do_suspend = 1; 236 } 237 } 238 if (!do_suspend) 239 return -1; 240 241 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 242 if (!(errno == EAGAIN || errno == EINTR)) 243 pr_err("failed to sync perf data, error: %m\n"); 244 } 245 } while (1); 246 } 247 248 struct record_aio { 249 struct record *rec; 250 void *data; 251 size_t size; 252 }; 253 254 static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) 255 { 256 struct record_aio *aio = to; 257 258 /* 259 * map->base data pointed by buf is copied into free map->aio.data[] buffer 260 * to release space in the kernel buffer as fast as possible, calling 261 * perf_mmap__consume() from perf_mmap__push() function. 262 * 263 * That lets the kernel to proceed with storing more profiling data into 264 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 265 * 266 * Coping can be done in two steps in case the chunk of profiling data 267 * crosses the upper bound of the kernel buffer. In this case we first move 268 * part of data from map->start till the upper bound and then the reminder 269 * from the beginning of the kernel buffer till the end of the data chunk. 270 */ 271 272 if (record__comp_enabled(aio->rec)) { 273 size = zstd_compress(aio->rec->session, aio->data + aio->size, 274 perf_mmap__mmap_len(map) - aio->size, 275 buf, size); 276 } else { 277 memcpy(aio->data + aio->size, buf, size); 278 } 279 280 if (!aio->size) { 281 /* 282 * Increment map->refcount to guard map->aio.data[] buffer 283 * from premature deallocation because map object can be 284 * released earlier than aio write request started on 285 * map->aio.data[] buffer is complete. 286 * 287 * perf_mmap__put() is done at record__aio_complete() 288 * after started aio request completion or at record__aio_push() 289 * if the request failed to start. 290 */ 291 perf_mmap__get(map); 292 } 293 294 aio->size += size; 295 296 return size; 297 } 298 299 static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) 300 { 301 int ret, idx; 302 int trace_fd = rec->session->data->file.fd; 303 struct record_aio aio = { .rec = rec, .size = 0 }; 304 305 /* 306 * Call record__aio_sync() to wait till map->aio.data[] buffer 307 * becomes available after previous aio write operation. 308 */ 309 310 idx = record__aio_sync(map, false); 311 aio.data = map->aio.data[idx]; 312 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 313 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 314 return ret; 315 316 rec->samples++; 317 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 318 if (!ret) { 319 *off += aio.size; 320 rec->bytes_written += aio.size; 321 if (switch_output_size(rec)) 322 trigger_hit(&switch_output_trigger); 323 } else { 324 /* 325 * Decrement map->refcount incremented in record__aio_pushfn() 326 * back if record__aio_write() operation failed to start, otherwise 327 * map->refcount is decremented in record__aio_complete() after 328 * aio write operation finishes successfully. 329 */ 330 perf_mmap__put(map); 331 } 332 333 return ret; 334 } 335 336 static off_t record__aio_get_pos(int trace_fd) 337 { 338 return lseek(trace_fd, 0, SEEK_CUR); 339 } 340 341 static void record__aio_set_pos(int trace_fd, off_t pos) 342 { 343 lseek(trace_fd, pos, SEEK_SET); 344 } 345 346 static void record__aio_mmap_read_sync(struct record *rec) 347 { 348 int i; 349 struct perf_evlist *evlist = rec->evlist; 350 struct perf_mmap *maps = evlist->mmap; 351 352 if (!record__aio_enabled(rec)) 353 return; 354 355 for (i = 0; i < evlist->nr_mmaps; i++) { 356 struct perf_mmap *map = &maps[i]; 357 358 if (map->base) 359 record__aio_sync(map, true); 360 } 361 } 362 363 static int nr_cblocks_default = 1; 364 static int nr_cblocks_max = 4; 365 366 static int record__aio_parse(const struct option *opt, 367 const char *str, 368 int unset) 369 { 370 struct record_opts *opts = (struct record_opts *)opt->value; 371 372 if (unset) { 373 opts->nr_cblocks = 0; 374 } else { 375 if (str) 376 opts->nr_cblocks = strtol(str, NULL, 0); 377 if (!opts->nr_cblocks) 378 opts->nr_cblocks = nr_cblocks_default; 379 } 380 381 return 0; 382 } 383 #else /* HAVE_AIO_SUPPORT */ 384 static int nr_cblocks_max = 0; 385 386 static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, 387 off_t *off __maybe_unused) 388 { 389 return -1; 390 } 391 392 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 393 { 394 return -1; 395 } 396 397 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 398 { 399 } 400 401 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 402 { 403 } 404 #endif 405 406 static int record__aio_enabled(struct record *rec) 407 { 408 return rec->opts.nr_cblocks > 0; 409 } 410 411 #define MMAP_FLUSH_DEFAULT 1 412 static int record__mmap_flush_parse(const struct option *opt, 413 const char *str, 414 int unset) 415 { 416 int flush_max; 417 struct record_opts *opts = (struct record_opts *)opt->value; 418 static struct parse_tag tags[] = { 419 { .tag = 'B', .mult = 1 }, 420 { .tag = 'K', .mult = 1 << 10 }, 421 { .tag = 'M', .mult = 1 << 20 }, 422 { .tag = 'G', .mult = 1 << 30 }, 423 { .tag = 0 }, 424 }; 425 426 if (unset) 427 return 0; 428 429 if (str) { 430 opts->mmap_flush = parse_tag_value(str, tags); 431 if (opts->mmap_flush == (int)-1) 432 opts->mmap_flush = strtol(str, NULL, 0); 433 } 434 435 if (!opts->mmap_flush) 436 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 437 438 flush_max = perf_evlist__mmap_size(opts->mmap_pages); 439 flush_max /= 4; 440 if (opts->mmap_flush > flush_max) 441 opts->mmap_flush = flush_max; 442 443 return 0; 444 } 445 446 #ifdef HAVE_ZSTD_SUPPORT 447 static unsigned int comp_level_default = 1; 448 449 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 450 { 451 struct record_opts *opts = opt->value; 452 453 if (unset) { 454 opts->comp_level = 0; 455 } else { 456 if (str) 457 opts->comp_level = strtol(str, NULL, 0); 458 if (!opts->comp_level) 459 opts->comp_level = comp_level_default; 460 } 461 462 return 0; 463 } 464 #endif 465 static unsigned int comp_level_max = 22; 466 467 static int record__comp_enabled(struct record *rec) 468 { 469 return rec->opts.comp_level > 0; 470 } 471 472 static int process_synthesized_event(struct perf_tool *tool, 473 union perf_event *event, 474 struct perf_sample *sample __maybe_unused, 475 struct machine *machine __maybe_unused) 476 { 477 struct record *rec = container_of(tool, struct record, tool); 478 return record__write(rec, NULL, event, event->header.size); 479 } 480 481 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 482 { 483 struct record *rec = to; 484 485 if (record__comp_enabled(rec)) { 486 size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); 487 bf = map->data; 488 } 489 490 rec->samples++; 491 return record__write(rec, map, bf, size); 492 } 493 494 static volatile int done; 495 static volatile int signr = -1; 496 static volatile int child_finished; 497 498 static void sig_handler(int sig) 499 { 500 if (sig == SIGCHLD) 501 child_finished = 1; 502 else 503 signr = sig; 504 505 done = 1; 506 } 507 508 static void sigsegv_handler(int sig) 509 { 510 perf_hooks__recover(); 511 sighandler_dump_stack(sig); 512 } 513 514 static void record__sig_exit(void) 515 { 516 if (signr == -1) 517 return; 518 519 signal(signr, SIG_DFL); 520 raise(signr); 521 } 522 523 #ifdef HAVE_AUXTRACE_SUPPORT 524 525 static int record__process_auxtrace(struct perf_tool *tool, 526 struct perf_mmap *map, 527 union perf_event *event, void *data1, 528 size_t len1, void *data2, size_t len2) 529 { 530 struct record *rec = container_of(tool, struct record, tool); 531 struct perf_data *data = &rec->data; 532 size_t padding; 533 u8 pad[8] = {0}; 534 535 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { 536 off_t file_offset; 537 int fd = perf_data__fd(data); 538 int err; 539 540 file_offset = lseek(fd, 0, SEEK_CUR); 541 if (file_offset == -1) 542 return -1; 543 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 544 event, file_offset); 545 if (err) 546 return err; 547 } 548 549 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 550 padding = (len1 + len2) & 7; 551 if (padding) 552 padding = 8 - padding; 553 554 record__write(rec, map, event, event->header.size); 555 record__write(rec, map, data1, len1); 556 if (len2) 557 record__write(rec, map, data2, len2); 558 record__write(rec, map, &pad, padding); 559 560 return 0; 561 } 562 563 static int record__auxtrace_mmap_read(struct record *rec, 564 struct perf_mmap *map) 565 { 566 int ret; 567 568 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 569 record__process_auxtrace); 570 if (ret < 0) 571 return ret; 572 573 if (ret) 574 rec->samples++; 575 576 return 0; 577 } 578 579 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 580 struct perf_mmap *map) 581 { 582 int ret; 583 584 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 585 record__process_auxtrace, 586 rec->opts.auxtrace_snapshot_size); 587 if (ret < 0) 588 return ret; 589 590 if (ret) 591 rec->samples++; 592 593 return 0; 594 } 595 596 static int record__auxtrace_read_snapshot_all(struct record *rec) 597 { 598 int i; 599 int rc = 0; 600 601 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 602 struct perf_mmap *map = &rec->evlist->mmap[i]; 603 604 if (!map->auxtrace_mmap.base) 605 continue; 606 607 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 608 rc = -1; 609 goto out; 610 } 611 } 612 out: 613 return rc; 614 } 615 616 static void record__read_auxtrace_snapshot(struct record *rec) 617 { 618 pr_debug("Recording AUX area tracing snapshot\n"); 619 if (record__auxtrace_read_snapshot_all(rec) < 0) { 620 trigger_error(&auxtrace_snapshot_trigger); 621 } else { 622 if (auxtrace_record__snapshot_finish(rec->itr)) 623 trigger_error(&auxtrace_snapshot_trigger); 624 else 625 trigger_ready(&auxtrace_snapshot_trigger); 626 } 627 } 628 629 static int record__auxtrace_init(struct record *rec) 630 { 631 int err; 632 633 if (!rec->itr) { 634 rec->itr = auxtrace_record__init(rec->evlist, &err); 635 if (err) 636 return err; 637 } 638 639 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 640 rec->opts.auxtrace_snapshot_opts); 641 if (err) 642 return err; 643 644 return auxtrace_parse_filters(rec->evlist); 645 } 646 647 #else 648 649 static inline 650 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 651 struct perf_mmap *map __maybe_unused) 652 { 653 return 0; 654 } 655 656 static inline 657 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) 658 { 659 } 660 661 static inline 662 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 663 { 664 return 0; 665 } 666 667 static int record__auxtrace_init(struct record *rec __maybe_unused) 668 { 669 return 0; 670 } 671 672 #endif 673 674 static int record__mmap_evlist(struct record *rec, 675 struct perf_evlist *evlist) 676 { 677 struct record_opts *opts = &rec->opts; 678 char msg[512]; 679 680 if (opts->affinity != PERF_AFFINITY_SYS) 681 cpu__setup_cpunode_map(); 682 683 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 684 opts->auxtrace_mmap_pages, 685 opts->auxtrace_snapshot_mode, 686 opts->nr_cblocks, opts->affinity, 687 opts->mmap_flush, opts->comp_level) < 0) { 688 if (errno == EPERM) { 689 pr_err("Permission error mapping pages.\n" 690 "Consider increasing " 691 "/proc/sys/kernel/perf_event_mlock_kb,\n" 692 "or try again with a smaller value of -m/--mmap_pages.\n" 693 "(current value: %u,%u)\n", 694 opts->mmap_pages, opts->auxtrace_mmap_pages); 695 return -errno; 696 } else { 697 pr_err("failed to mmap with %d (%s)\n", errno, 698 str_error_r(errno, msg, sizeof(msg))); 699 if (errno) 700 return -errno; 701 else 702 return -EINVAL; 703 } 704 } 705 return 0; 706 } 707 708 static int record__mmap(struct record *rec) 709 { 710 return record__mmap_evlist(rec, rec->evlist); 711 } 712 713 static int record__open(struct record *rec) 714 { 715 char msg[BUFSIZ]; 716 struct perf_evsel *pos; 717 struct perf_evlist *evlist = rec->evlist; 718 struct perf_session *session = rec->session; 719 struct record_opts *opts = &rec->opts; 720 int rc = 0; 721 722 /* 723 * For initial_delay we need to add a dummy event so that we can track 724 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 725 * real events, the ones asked by the user. 726 */ 727 if (opts->initial_delay) { 728 if (perf_evlist__add_dummy(evlist)) 729 return -ENOMEM; 730 731 pos = perf_evlist__first(evlist); 732 pos->tracking = 0; 733 pos = perf_evlist__last(evlist); 734 pos->tracking = 1; 735 pos->attr.enable_on_exec = 1; 736 } 737 738 perf_evlist__config(evlist, opts, &callchain_param); 739 740 evlist__for_each_entry(evlist, pos) { 741 try_again: 742 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { 743 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 744 if (verbose > 0) 745 ui__warning("%s\n", msg); 746 goto try_again; 747 } 748 if ((errno == EINVAL || errno == EBADF) && 749 pos->leader != pos && 750 pos->weak_group) { 751 pos = perf_evlist__reset_weak_group(evlist, pos); 752 goto try_again; 753 } 754 rc = -errno; 755 perf_evsel__open_strerror(pos, &opts->target, 756 errno, msg, sizeof(msg)); 757 ui__error("%s\n", msg); 758 goto out; 759 } 760 761 pos->supported = true; 762 } 763 764 if (perf_evlist__apply_filters(evlist, &pos)) { 765 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 766 pos->filter, perf_evsel__name(pos), errno, 767 str_error_r(errno, msg, sizeof(msg))); 768 rc = -1; 769 goto out; 770 } 771 772 rc = record__mmap(rec); 773 if (rc) 774 goto out; 775 776 session->evlist = evlist; 777 perf_session__set_id_hdr_size(session); 778 out: 779 return rc; 780 } 781 782 static int process_sample_event(struct perf_tool *tool, 783 union perf_event *event, 784 struct perf_sample *sample, 785 struct perf_evsel *evsel, 786 struct machine *machine) 787 { 788 struct record *rec = container_of(tool, struct record, tool); 789 790 if (rec->evlist->first_sample_time == 0) 791 rec->evlist->first_sample_time = sample->time; 792 793 rec->evlist->last_sample_time = sample->time; 794 795 if (rec->buildid_all) 796 return 0; 797 798 rec->samples++; 799 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 800 } 801 802 static int process_buildids(struct record *rec) 803 { 804 struct perf_session *session = rec->session; 805 806 if (perf_data__size(&rec->data) == 0) 807 return 0; 808 809 /* 810 * During this process, it'll load kernel map and replace the 811 * dso->long_name to a real pathname it found. In this case 812 * we prefer the vmlinux path like 813 * /lib/modules/3.16.4/build/vmlinux 814 * 815 * rather than build-id path (in debug directory). 816 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 817 */ 818 symbol_conf.ignore_vmlinux_buildid = true; 819 820 /* 821 * If --buildid-all is given, it marks all DSO regardless of hits, 822 * so no need to process samples. But if timestamp_boundary is enabled, 823 * it still needs to walk on all samples to get the timestamps of 824 * first/last samples. 825 */ 826 if (rec->buildid_all && !rec->timestamp_boundary) 827 rec->tool.sample = NULL; 828 829 return perf_session__process_events(session); 830 } 831 832 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 833 { 834 int err; 835 struct perf_tool *tool = data; 836 /* 837 *As for guest kernel when processing subcommand record&report, 838 *we arrange module mmap prior to guest kernel mmap and trigger 839 *a preload dso because default guest module symbols are loaded 840 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 841 *method is used to avoid symbol missing when the first addr is 842 *in module instead of in guest kernel. 843 */ 844 err = perf_event__synthesize_modules(tool, process_synthesized_event, 845 machine); 846 if (err < 0) 847 pr_err("Couldn't record guest kernel [%d]'s reference" 848 " relocation symbol.\n", machine->pid); 849 850 /* 851 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 852 * have no _text sometimes. 853 */ 854 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 855 machine); 856 if (err < 0) 857 pr_err("Couldn't record guest kernel [%d]'s reference" 858 " relocation symbol.\n", machine->pid); 859 } 860 861 static struct perf_event_header finished_round_event = { 862 .size = sizeof(struct perf_event_header), 863 .type = PERF_RECORD_FINISHED_ROUND, 864 }; 865 866 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 867 { 868 if (rec->opts.affinity != PERF_AFFINITY_SYS && 869 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 870 CPU_ZERO(&rec->affinity_mask); 871 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 872 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 873 } 874 } 875 876 static size_t process_comp_header(void *record, size_t increment) 877 { 878 struct compressed_event *event = record; 879 size_t size = sizeof(*event); 880 881 if (increment) { 882 event->header.size += increment; 883 return increment; 884 } 885 886 event->header.type = PERF_RECORD_COMPRESSED; 887 event->header.size = size; 888 889 return size; 890 } 891 892 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 893 void *src, size_t src_size) 894 { 895 size_t compressed; 896 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1; 897 898 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, 899 max_record_size, process_comp_header); 900 901 session->bytes_transferred += src_size; 902 session->bytes_compressed += compressed; 903 904 return compressed; 905 } 906 907 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 908 bool overwrite, bool synch) 909 { 910 u64 bytes_written = rec->bytes_written; 911 int i; 912 int rc = 0; 913 struct perf_mmap *maps; 914 int trace_fd = rec->data.file.fd; 915 off_t off = 0; 916 917 if (!evlist) 918 return 0; 919 920 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 921 if (!maps) 922 return 0; 923 924 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 925 return 0; 926 927 if (record__aio_enabled(rec)) 928 off = record__aio_get_pos(trace_fd); 929 930 for (i = 0; i < evlist->nr_mmaps; i++) { 931 u64 flush = 0; 932 struct perf_mmap *map = &maps[i]; 933 934 if (map->base) { 935 record__adjust_affinity(rec, map); 936 if (synch) { 937 flush = map->flush; 938 map->flush = 1; 939 } 940 if (!record__aio_enabled(rec)) { 941 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 942 if (synch) 943 map->flush = flush; 944 rc = -1; 945 goto out; 946 } 947 } else { 948 if (record__aio_push(rec, map, &off) < 0) { 949 record__aio_set_pos(trace_fd, off); 950 if (synch) 951 map->flush = flush; 952 rc = -1; 953 goto out; 954 } 955 } 956 if (synch) 957 map->flush = flush; 958 } 959 960 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 961 record__auxtrace_mmap_read(rec, map) != 0) { 962 rc = -1; 963 goto out; 964 } 965 } 966 967 if (record__aio_enabled(rec)) 968 record__aio_set_pos(trace_fd, off); 969 970 /* 971 * Mark the round finished in case we wrote 972 * at least one event. 973 */ 974 if (bytes_written != rec->bytes_written) 975 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 976 977 if (overwrite) 978 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 979 out: 980 return rc; 981 } 982 983 static int record__mmap_read_all(struct record *rec, bool synch) 984 { 985 int err; 986 987 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 988 if (err) 989 return err; 990 991 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 992 } 993 994 static void record__init_features(struct record *rec) 995 { 996 struct perf_session *session = rec->session; 997 int feat; 998 999 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1000 perf_header__set_feat(&session->header, feat); 1001 1002 if (rec->no_buildid) 1003 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1004 1005 if (!have_tracepoints(&rec->evlist->entries)) 1006 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1007 1008 if (!rec->opts.branch_stack) 1009 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1010 1011 if (!rec->opts.full_auxtrace) 1012 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1013 1014 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1015 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1016 1017 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1018 if (!record__comp_enabled(rec)) 1019 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1020 1021 perf_header__clear_feat(&session->header, HEADER_STAT); 1022 } 1023 1024 static void 1025 record__finish_output(struct record *rec) 1026 { 1027 struct perf_data *data = &rec->data; 1028 int fd = perf_data__fd(data); 1029 1030 if (data->is_pipe) 1031 return; 1032 1033 rec->session->header.data_size += rec->bytes_written; 1034 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1035 1036 if (!rec->no_buildid) { 1037 process_buildids(rec); 1038 1039 if (rec->buildid_all) 1040 dsos__hit_all(rec->session); 1041 } 1042 perf_session__write_header(rec->session, rec->evlist, fd, true); 1043 1044 return; 1045 } 1046 1047 static int record__synthesize_workload(struct record *rec, bool tail) 1048 { 1049 int err; 1050 struct thread_map *thread_map; 1051 1052 if (rec->opts.tail_synthesize != tail) 1053 return 0; 1054 1055 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1056 if (thread_map == NULL) 1057 return -1; 1058 1059 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1060 process_synthesized_event, 1061 &rec->session->machines.host, 1062 rec->opts.sample_address); 1063 thread_map__put(thread_map); 1064 return err; 1065 } 1066 1067 static int record__synthesize(struct record *rec, bool tail); 1068 1069 static int 1070 record__switch_output(struct record *rec, bool at_exit) 1071 { 1072 struct perf_data *data = &rec->data; 1073 int fd, err; 1074 char *new_filename; 1075 1076 /* Same Size: "2015122520103046"*/ 1077 char timestamp[] = "InvalidTimestamp"; 1078 1079 record__aio_mmap_read_sync(rec); 1080 1081 record__synthesize(rec, true); 1082 if (target__none(&rec->opts.target)) 1083 record__synthesize_workload(rec, true); 1084 1085 rec->samples = 0; 1086 record__finish_output(rec); 1087 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1088 if (err) { 1089 pr_err("Failed to get current timestamp\n"); 1090 return -EINVAL; 1091 } 1092 1093 fd = perf_data__switch(data, timestamp, 1094 rec->session->header.data_offset, 1095 at_exit, &new_filename); 1096 if (fd >= 0 && !at_exit) { 1097 rec->bytes_written = 0; 1098 rec->session->header.data_size = 0; 1099 } 1100 1101 if (!quiet) 1102 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1103 data->path, timestamp); 1104 1105 if (rec->switch_output.num_files) { 1106 int n = rec->switch_output.cur_file + 1; 1107 1108 if (n >= rec->switch_output.num_files) 1109 n = 0; 1110 rec->switch_output.cur_file = n; 1111 if (rec->switch_output.filenames[n]) { 1112 remove(rec->switch_output.filenames[n]); 1113 free(rec->switch_output.filenames[n]); 1114 } 1115 rec->switch_output.filenames[n] = new_filename; 1116 } else { 1117 free(new_filename); 1118 } 1119 1120 /* Output tracking events */ 1121 if (!at_exit) { 1122 record__synthesize(rec, false); 1123 1124 /* 1125 * In 'perf record --switch-output' without -a, 1126 * record__synthesize() in record__switch_output() won't 1127 * generate tracking events because there's no thread_map 1128 * in evlist. Which causes newly created perf.data doesn't 1129 * contain map and comm information. 1130 * Create a fake thread_map and directly call 1131 * perf_event__synthesize_thread_map() for those events. 1132 */ 1133 if (target__none(&rec->opts.target)) 1134 record__synthesize_workload(rec, false); 1135 } 1136 return fd; 1137 } 1138 1139 static volatile int workload_exec_errno; 1140 1141 /* 1142 * perf_evlist__prepare_workload will send a SIGUSR1 1143 * if the fork fails, since we asked by setting its 1144 * want_signal to true. 1145 */ 1146 static void workload_exec_failed_signal(int signo __maybe_unused, 1147 siginfo_t *info, 1148 void *ucontext __maybe_unused) 1149 { 1150 workload_exec_errno = info->si_value.sival_int; 1151 done = 1; 1152 child_finished = 1; 1153 } 1154 1155 static void snapshot_sig_handler(int sig); 1156 static void alarm_sig_handler(int sig); 1157 1158 int __weak 1159 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 1160 struct perf_tool *tool __maybe_unused, 1161 perf_event__handler_t process __maybe_unused, 1162 struct machine *machine __maybe_unused) 1163 { 1164 return 0; 1165 } 1166 1167 static const struct perf_event_mmap_page * 1168 perf_evlist__pick_pc(struct perf_evlist *evlist) 1169 { 1170 if (evlist) { 1171 if (evlist->mmap && evlist->mmap[0].base) 1172 return evlist->mmap[0].base; 1173 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 1174 return evlist->overwrite_mmap[0].base; 1175 } 1176 return NULL; 1177 } 1178 1179 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1180 { 1181 const struct perf_event_mmap_page *pc; 1182 1183 pc = perf_evlist__pick_pc(rec->evlist); 1184 if (pc) 1185 return pc; 1186 return NULL; 1187 } 1188 1189 static int record__synthesize(struct record *rec, bool tail) 1190 { 1191 struct perf_session *session = rec->session; 1192 struct machine *machine = &session->machines.host; 1193 struct perf_data *data = &rec->data; 1194 struct record_opts *opts = &rec->opts; 1195 struct perf_tool *tool = &rec->tool; 1196 int fd = perf_data__fd(data); 1197 int err = 0; 1198 1199 if (rec->opts.tail_synthesize != tail) 1200 return 0; 1201 1202 if (data->is_pipe) { 1203 /* 1204 * We need to synthesize events first, because some 1205 * features works on top of them (on report side). 1206 */ 1207 err = perf_event__synthesize_attrs(tool, rec->evlist, 1208 process_synthesized_event); 1209 if (err < 0) { 1210 pr_err("Couldn't synthesize attrs.\n"); 1211 goto out; 1212 } 1213 1214 err = perf_event__synthesize_features(tool, session, rec->evlist, 1215 process_synthesized_event); 1216 if (err < 0) { 1217 pr_err("Couldn't synthesize features.\n"); 1218 return err; 1219 } 1220 1221 if (have_tracepoints(&rec->evlist->entries)) { 1222 /* 1223 * FIXME err <= 0 here actually means that 1224 * there were no tracepoints so its not really 1225 * an error, just that we don't need to 1226 * synthesize anything. We really have to 1227 * return this more properly and also 1228 * propagate errors that now are calling die() 1229 */ 1230 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 1231 process_synthesized_event); 1232 if (err <= 0) { 1233 pr_err("Couldn't record tracing data.\n"); 1234 goto out; 1235 } 1236 rec->bytes_written += err; 1237 } 1238 } 1239 1240 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1241 process_synthesized_event, machine); 1242 if (err) 1243 goto out; 1244 1245 if (rec->opts.full_auxtrace) { 1246 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1247 session, process_synthesized_event); 1248 if (err) 1249 goto out; 1250 } 1251 1252 if (!perf_evlist__exclude_kernel(rec->evlist)) { 1253 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1254 machine); 1255 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1256 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1257 "Check /proc/kallsyms permission or run as root.\n"); 1258 1259 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1260 machine); 1261 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1262 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1263 "Check /proc/modules permission or run as root.\n"); 1264 } 1265 1266 if (perf_guest) { 1267 machines__process_guests(&session->machines, 1268 perf_event__synthesize_guest_os, tool); 1269 } 1270 1271 err = perf_event__synthesize_extra_attr(&rec->tool, 1272 rec->evlist, 1273 process_synthesized_event, 1274 data->is_pipe); 1275 if (err) 1276 goto out; 1277 1278 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, 1279 process_synthesized_event, 1280 NULL); 1281 if (err < 0) { 1282 pr_err("Couldn't synthesize thread map.\n"); 1283 return err; 1284 } 1285 1286 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, 1287 process_synthesized_event, NULL); 1288 if (err < 0) { 1289 pr_err("Couldn't synthesize cpu map.\n"); 1290 return err; 1291 } 1292 1293 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1294 machine, opts); 1295 if (err < 0) 1296 pr_warning("Couldn't synthesize bpf events.\n"); 1297 1298 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 1299 process_synthesized_event, opts->sample_address, 1300 1); 1301 out: 1302 return err; 1303 } 1304 1305 static int __cmd_record(struct record *rec, int argc, const char **argv) 1306 { 1307 int err; 1308 int status = 0; 1309 unsigned long waking = 0; 1310 const bool forks = argc > 0; 1311 struct perf_tool *tool = &rec->tool; 1312 struct record_opts *opts = &rec->opts; 1313 struct perf_data *data = &rec->data; 1314 struct perf_session *session; 1315 bool disabled = false, draining = false; 1316 struct perf_evlist *sb_evlist = NULL; 1317 int fd; 1318 float ratio = 0; 1319 1320 atexit(record__sig_exit); 1321 signal(SIGCHLD, sig_handler); 1322 signal(SIGINT, sig_handler); 1323 signal(SIGTERM, sig_handler); 1324 signal(SIGSEGV, sigsegv_handler); 1325 1326 if (rec->opts.record_namespaces) 1327 tool->namespace_events = true; 1328 1329 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1330 signal(SIGUSR2, snapshot_sig_handler); 1331 if (rec->opts.auxtrace_snapshot_mode) 1332 trigger_on(&auxtrace_snapshot_trigger); 1333 if (rec->switch_output.enabled) 1334 trigger_on(&switch_output_trigger); 1335 } else { 1336 signal(SIGUSR2, SIG_IGN); 1337 } 1338 1339 session = perf_session__new(data, false, tool); 1340 if (session == NULL) { 1341 pr_err("Perf session creation failed.\n"); 1342 return -1; 1343 } 1344 1345 fd = perf_data__fd(data); 1346 rec->session = session; 1347 1348 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 1349 pr_err("Compression initialization failed.\n"); 1350 return -1; 1351 } 1352 1353 session->header.env.comp_type = PERF_COMP_ZSTD; 1354 session->header.env.comp_level = rec->opts.comp_level; 1355 1356 record__init_features(rec); 1357 1358 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1359 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 1360 1361 if (forks) { 1362 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 1363 argv, data->is_pipe, 1364 workload_exec_failed_signal); 1365 if (err < 0) { 1366 pr_err("Couldn't run the workload!\n"); 1367 status = err; 1368 goto out_delete_session; 1369 } 1370 } 1371 1372 /* 1373 * If we have just single event and are sending data 1374 * through pipe, we need to force the ids allocation, 1375 * because we synthesize event name through the pipe 1376 * and need the id for that. 1377 */ 1378 if (data->is_pipe && rec->evlist->nr_entries == 1) 1379 rec->opts.sample_id = true; 1380 1381 if (record__open(rec) != 0) { 1382 err = -1; 1383 goto out_child; 1384 } 1385 session->header.env.comp_mmap_len = session->evlist->mmap_len; 1386 1387 err = bpf__apply_obj_config(); 1388 if (err) { 1389 char errbuf[BUFSIZ]; 1390 1391 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1392 pr_err("ERROR: Apply config to BPF failed: %s\n", 1393 errbuf); 1394 goto out_child; 1395 } 1396 1397 /* 1398 * Normally perf_session__new would do this, but it doesn't have the 1399 * evlist. 1400 */ 1401 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 1402 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1403 rec->tool.ordered_events = false; 1404 } 1405 1406 if (!rec->evlist->nr_groups) 1407 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1408 1409 if (data->is_pipe) { 1410 err = perf_header__write_pipe(fd); 1411 if (err < 0) 1412 goto out_child; 1413 } else { 1414 err = perf_session__write_header(session, rec->evlist, fd, false); 1415 if (err < 0) 1416 goto out_child; 1417 } 1418 1419 if (!rec->no_buildid 1420 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1421 pr_err("Couldn't generate buildids. " 1422 "Use --no-buildid to profile anyway.\n"); 1423 err = -1; 1424 goto out_child; 1425 } 1426 1427 if (!opts->no_bpf_event) 1428 bpf_event__add_sb_event(&sb_evlist, &session->header.env); 1429 1430 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { 1431 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1432 opts->no_bpf_event = true; 1433 } 1434 1435 err = record__synthesize(rec, false); 1436 if (err < 0) 1437 goto out_child; 1438 1439 if (rec->realtime_prio) { 1440 struct sched_param param; 1441 1442 param.sched_priority = rec->realtime_prio; 1443 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1444 pr_err("Could not set realtime priority.\n"); 1445 err = -1; 1446 goto out_child; 1447 } 1448 } 1449 1450 /* 1451 * When perf is starting the traced process, all the events 1452 * (apart from group members) have enable_on_exec=1 set, 1453 * so don't spoil it by prematurely enabling them. 1454 */ 1455 if (!target__none(&opts->target) && !opts->initial_delay) 1456 perf_evlist__enable(rec->evlist); 1457 1458 /* 1459 * Let the child rip 1460 */ 1461 if (forks) { 1462 struct machine *machine = &session->machines.host; 1463 union perf_event *event; 1464 pid_t tgid; 1465 1466 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1467 if (event == NULL) { 1468 err = -ENOMEM; 1469 goto out_child; 1470 } 1471 1472 /* 1473 * Some H/W events are generated before COMM event 1474 * which is emitted during exec(), so perf script 1475 * cannot see a correct process name for those events. 1476 * Synthesize COMM event to prevent it. 1477 */ 1478 tgid = perf_event__synthesize_comm(tool, event, 1479 rec->evlist->workload.pid, 1480 process_synthesized_event, 1481 machine); 1482 free(event); 1483 1484 if (tgid == -1) 1485 goto out_child; 1486 1487 event = malloc(sizeof(event->namespaces) + 1488 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1489 machine->id_hdr_size); 1490 if (event == NULL) { 1491 err = -ENOMEM; 1492 goto out_child; 1493 } 1494 1495 /* 1496 * Synthesize NAMESPACES event for the command specified. 1497 */ 1498 perf_event__synthesize_namespaces(tool, event, 1499 rec->evlist->workload.pid, 1500 tgid, process_synthesized_event, 1501 machine); 1502 free(event); 1503 1504 perf_evlist__start_workload(rec->evlist); 1505 } 1506 1507 if (opts->initial_delay) { 1508 usleep(opts->initial_delay * USEC_PER_MSEC); 1509 perf_evlist__enable(rec->evlist); 1510 } 1511 1512 trigger_ready(&auxtrace_snapshot_trigger); 1513 trigger_ready(&switch_output_trigger); 1514 perf_hooks__invoke_record_start(); 1515 for (;;) { 1516 unsigned long long hits = rec->samples; 1517 1518 /* 1519 * rec->evlist->bkw_mmap_state is possible to be 1520 * BKW_MMAP_EMPTY here: when done == true and 1521 * hits != rec->samples in previous round. 1522 * 1523 * perf_evlist__toggle_bkw_mmap ensure we never 1524 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1525 */ 1526 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1527 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1528 1529 if (record__mmap_read_all(rec, false) < 0) { 1530 trigger_error(&auxtrace_snapshot_trigger); 1531 trigger_error(&switch_output_trigger); 1532 err = -1; 1533 goto out_child; 1534 } 1535 1536 if (auxtrace_record__snapshot_started) { 1537 auxtrace_record__snapshot_started = 0; 1538 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1539 record__read_auxtrace_snapshot(rec); 1540 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1541 pr_err("AUX area tracing snapshot failed\n"); 1542 err = -1; 1543 goto out_child; 1544 } 1545 } 1546 1547 if (trigger_is_hit(&switch_output_trigger)) { 1548 /* 1549 * If switch_output_trigger is hit, the data in 1550 * overwritable ring buffer should have been collected, 1551 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1552 * 1553 * If SIGUSR2 raise after or during record__mmap_read_all(), 1554 * record__mmap_read_all() didn't collect data from 1555 * overwritable ring buffer. Read again. 1556 */ 1557 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1558 continue; 1559 trigger_ready(&switch_output_trigger); 1560 1561 /* 1562 * Reenable events in overwrite ring buffer after 1563 * record__mmap_read_all(): we should have collected 1564 * data from it. 1565 */ 1566 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1567 1568 if (!quiet) 1569 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1570 waking); 1571 waking = 0; 1572 fd = record__switch_output(rec, false); 1573 if (fd < 0) { 1574 pr_err("Failed to switch to new file\n"); 1575 trigger_error(&switch_output_trigger); 1576 err = fd; 1577 goto out_child; 1578 } 1579 1580 /* re-arm the alarm */ 1581 if (rec->switch_output.time) 1582 alarm(rec->switch_output.time); 1583 } 1584 1585 if (hits == rec->samples) { 1586 if (done || draining) 1587 break; 1588 err = perf_evlist__poll(rec->evlist, -1); 1589 /* 1590 * Propagate error, only if there's any. Ignore positive 1591 * number of returned events and interrupt error. 1592 */ 1593 if (err > 0 || (err < 0 && errno == EINTR)) 1594 err = 0; 1595 waking++; 1596 1597 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1598 draining = true; 1599 } 1600 1601 /* 1602 * When perf is starting the traced process, at the end events 1603 * die with the process and we wait for that. Thus no need to 1604 * disable events in this case. 1605 */ 1606 if (done && !disabled && !target__none(&opts->target)) { 1607 trigger_off(&auxtrace_snapshot_trigger); 1608 perf_evlist__disable(rec->evlist); 1609 disabled = true; 1610 } 1611 } 1612 trigger_off(&auxtrace_snapshot_trigger); 1613 trigger_off(&switch_output_trigger); 1614 1615 if (forks && workload_exec_errno) { 1616 char msg[STRERR_BUFSIZE]; 1617 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1618 pr_err("Workload failed: %s\n", emsg); 1619 err = -1; 1620 goto out_child; 1621 } 1622 1623 if (!quiet) 1624 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1625 1626 if (target__none(&rec->opts.target)) 1627 record__synthesize_workload(rec, true); 1628 1629 out_child: 1630 record__mmap_read_all(rec, true); 1631 record__aio_mmap_read_sync(rec); 1632 1633 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 1634 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 1635 session->header.env.comp_ratio = ratio + 0.5; 1636 } 1637 1638 if (forks) { 1639 int exit_status; 1640 1641 if (!child_finished) 1642 kill(rec->evlist->workload.pid, SIGTERM); 1643 1644 wait(&exit_status); 1645 1646 if (err < 0) 1647 status = err; 1648 else if (WIFEXITED(exit_status)) 1649 status = WEXITSTATUS(exit_status); 1650 else if (WIFSIGNALED(exit_status)) 1651 signr = WTERMSIG(exit_status); 1652 } else 1653 status = err; 1654 1655 record__synthesize(rec, true); 1656 /* this will be recalculated during process_buildids() */ 1657 rec->samples = 0; 1658 1659 if (!err) { 1660 if (!rec->timestamp_filename) { 1661 record__finish_output(rec); 1662 } else { 1663 fd = record__switch_output(rec, true); 1664 if (fd < 0) { 1665 status = fd; 1666 goto out_delete_session; 1667 } 1668 } 1669 } 1670 1671 perf_hooks__invoke_record_end(); 1672 1673 if (!err && !quiet) { 1674 char samples[128]; 1675 const char *postfix = rec->timestamp_filename ? 1676 ".<timestamp>" : ""; 1677 1678 if (rec->samples && !rec->opts.full_auxtrace) 1679 scnprintf(samples, sizeof(samples), 1680 " (%" PRIu64 " samples)", rec->samples); 1681 else 1682 samples[0] = '\0'; 1683 1684 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 1685 perf_data__size(data) / 1024.0 / 1024.0, 1686 data->path, postfix, samples); 1687 if (ratio) { 1688 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 1689 rec->session->bytes_transferred / 1024.0 / 1024.0, 1690 ratio); 1691 } 1692 fprintf(stderr, " ]\n"); 1693 } 1694 1695 out_delete_session: 1696 zstd_fini(&session->zstd_data); 1697 perf_session__delete(session); 1698 1699 if (!opts->no_bpf_event) 1700 perf_evlist__stop_sb_thread(sb_evlist); 1701 return status; 1702 } 1703 1704 static void callchain_debug(struct callchain_param *callchain) 1705 { 1706 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1707 1708 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1709 1710 if (callchain->record_mode == CALLCHAIN_DWARF) 1711 pr_debug("callchain: stack dump size %d\n", 1712 callchain->dump_size); 1713 } 1714 1715 int record_opts__parse_callchain(struct record_opts *record, 1716 struct callchain_param *callchain, 1717 const char *arg, bool unset) 1718 { 1719 int ret; 1720 callchain->enabled = !unset; 1721 1722 /* --no-call-graph */ 1723 if (unset) { 1724 callchain->record_mode = CALLCHAIN_NONE; 1725 pr_debug("callchain: disabled\n"); 1726 return 0; 1727 } 1728 1729 ret = parse_callchain_record_opt(arg, callchain); 1730 if (!ret) { 1731 /* Enable data address sampling for DWARF unwind. */ 1732 if (callchain->record_mode == CALLCHAIN_DWARF) 1733 record->sample_address = true; 1734 callchain_debug(callchain); 1735 } 1736 1737 return ret; 1738 } 1739 1740 int record_parse_callchain_opt(const struct option *opt, 1741 const char *arg, 1742 int unset) 1743 { 1744 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1745 } 1746 1747 int record_callchain_opt(const struct option *opt, 1748 const char *arg __maybe_unused, 1749 int unset __maybe_unused) 1750 { 1751 struct callchain_param *callchain = opt->value; 1752 1753 callchain->enabled = true; 1754 1755 if (callchain->record_mode == CALLCHAIN_NONE) 1756 callchain->record_mode = CALLCHAIN_FP; 1757 1758 callchain_debug(callchain); 1759 return 0; 1760 } 1761 1762 static int perf_record_config(const char *var, const char *value, void *cb) 1763 { 1764 struct record *rec = cb; 1765 1766 if (!strcmp(var, "record.build-id")) { 1767 if (!strcmp(value, "cache")) 1768 rec->no_buildid_cache = false; 1769 else if (!strcmp(value, "no-cache")) 1770 rec->no_buildid_cache = true; 1771 else if (!strcmp(value, "skip")) 1772 rec->no_buildid = true; 1773 else 1774 return -1; 1775 return 0; 1776 } 1777 if (!strcmp(var, "record.call-graph")) { 1778 var = "call-graph.record-mode"; 1779 return perf_default_config(var, value, cb); 1780 } 1781 #ifdef HAVE_AIO_SUPPORT 1782 if (!strcmp(var, "record.aio")) { 1783 rec->opts.nr_cblocks = strtol(value, NULL, 0); 1784 if (!rec->opts.nr_cblocks) 1785 rec->opts.nr_cblocks = nr_cblocks_default; 1786 } 1787 #endif 1788 1789 return 0; 1790 } 1791 1792 struct clockid_map { 1793 const char *name; 1794 int clockid; 1795 }; 1796 1797 #define CLOCKID_MAP(n, c) \ 1798 { .name = n, .clockid = (c), } 1799 1800 #define CLOCKID_END { .name = NULL, } 1801 1802 1803 /* 1804 * Add the missing ones, we need to build on many distros... 1805 */ 1806 #ifndef CLOCK_MONOTONIC_RAW 1807 #define CLOCK_MONOTONIC_RAW 4 1808 #endif 1809 #ifndef CLOCK_BOOTTIME 1810 #define CLOCK_BOOTTIME 7 1811 #endif 1812 #ifndef CLOCK_TAI 1813 #define CLOCK_TAI 11 1814 #endif 1815 1816 static const struct clockid_map clockids[] = { 1817 /* available for all events, NMI safe */ 1818 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1819 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1820 1821 /* available for some events */ 1822 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1823 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1824 CLOCKID_MAP("tai", CLOCK_TAI), 1825 1826 /* available for the lazy */ 1827 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1828 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1829 CLOCKID_MAP("real", CLOCK_REALTIME), 1830 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1831 1832 CLOCKID_END, 1833 }; 1834 1835 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1836 { 1837 struct timespec res; 1838 1839 *res_ns = 0; 1840 if (!clock_getres(clk_id, &res)) 1841 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1842 else 1843 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1844 1845 return 0; 1846 } 1847 1848 static int parse_clockid(const struct option *opt, const char *str, int unset) 1849 { 1850 struct record_opts *opts = (struct record_opts *)opt->value; 1851 const struct clockid_map *cm; 1852 const char *ostr = str; 1853 1854 if (unset) { 1855 opts->use_clockid = 0; 1856 return 0; 1857 } 1858 1859 /* no arg passed */ 1860 if (!str) 1861 return 0; 1862 1863 /* no setting it twice */ 1864 if (opts->use_clockid) 1865 return -1; 1866 1867 opts->use_clockid = true; 1868 1869 /* if its a number, we're done */ 1870 if (sscanf(str, "%d", &opts->clockid) == 1) 1871 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1872 1873 /* allow a "CLOCK_" prefix to the name */ 1874 if (!strncasecmp(str, "CLOCK_", 6)) 1875 str += 6; 1876 1877 for (cm = clockids; cm->name; cm++) { 1878 if (!strcasecmp(str, cm->name)) { 1879 opts->clockid = cm->clockid; 1880 return get_clockid_res(opts->clockid, 1881 &opts->clockid_res_ns); 1882 } 1883 } 1884 1885 opts->use_clockid = false; 1886 ui__warning("unknown clockid %s, check man page\n", ostr); 1887 return -1; 1888 } 1889 1890 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 1891 { 1892 struct record_opts *opts = (struct record_opts *)opt->value; 1893 1894 if (unset || !str) 1895 return 0; 1896 1897 if (!strcasecmp(str, "node")) 1898 opts->affinity = PERF_AFFINITY_NODE; 1899 else if (!strcasecmp(str, "cpu")) 1900 opts->affinity = PERF_AFFINITY_CPU; 1901 1902 return 0; 1903 } 1904 1905 static int record__parse_mmap_pages(const struct option *opt, 1906 const char *str, 1907 int unset __maybe_unused) 1908 { 1909 struct record_opts *opts = opt->value; 1910 char *s, *p; 1911 unsigned int mmap_pages; 1912 int ret; 1913 1914 if (!str) 1915 return -EINVAL; 1916 1917 s = strdup(str); 1918 if (!s) 1919 return -ENOMEM; 1920 1921 p = strchr(s, ','); 1922 if (p) 1923 *p = '\0'; 1924 1925 if (*s) { 1926 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1927 if (ret) 1928 goto out_free; 1929 opts->mmap_pages = mmap_pages; 1930 } 1931 1932 if (!p) { 1933 ret = 0; 1934 goto out_free; 1935 } 1936 1937 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1938 if (ret) 1939 goto out_free; 1940 1941 opts->auxtrace_mmap_pages = mmap_pages; 1942 1943 out_free: 1944 free(s); 1945 return ret; 1946 } 1947 1948 static void switch_output_size_warn(struct record *rec) 1949 { 1950 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1951 struct switch_output *s = &rec->switch_output; 1952 1953 wakeup_size /= 2; 1954 1955 if (s->size < wakeup_size) { 1956 char buf[100]; 1957 1958 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1959 pr_warning("WARNING: switch-output data size lower than " 1960 "wakeup kernel buffer size (%s) " 1961 "expect bigger perf.data sizes\n", buf); 1962 } 1963 } 1964 1965 static int switch_output_setup(struct record *rec) 1966 { 1967 struct switch_output *s = &rec->switch_output; 1968 static struct parse_tag tags_size[] = { 1969 { .tag = 'B', .mult = 1 }, 1970 { .tag = 'K', .mult = 1 << 10 }, 1971 { .tag = 'M', .mult = 1 << 20 }, 1972 { .tag = 'G', .mult = 1 << 30 }, 1973 { .tag = 0 }, 1974 }; 1975 static struct parse_tag tags_time[] = { 1976 { .tag = 's', .mult = 1 }, 1977 { .tag = 'm', .mult = 60 }, 1978 { .tag = 'h', .mult = 60*60 }, 1979 { .tag = 'd', .mult = 60*60*24 }, 1980 { .tag = 0 }, 1981 }; 1982 unsigned long val; 1983 1984 if (!s->set) 1985 return 0; 1986 1987 if (!strcmp(s->str, "signal")) { 1988 s->signal = true; 1989 pr_debug("switch-output with SIGUSR2 signal\n"); 1990 goto enabled; 1991 } 1992 1993 val = parse_tag_value(s->str, tags_size); 1994 if (val != (unsigned long) -1) { 1995 s->size = val; 1996 pr_debug("switch-output with %s size threshold\n", s->str); 1997 goto enabled; 1998 } 1999 2000 val = parse_tag_value(s->str, tags_time); 2001 if (val != (unsigned long) -1) { 2002 s->time = val; 2003 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2004 s->str, s->time); 2005 goto enabled; 2006 } 2007 2008 return -1; 2009 2010 enabled: 2011 rec->timestamp_filename = true; 2012 s->enabled = true; 2013 2014 if (s->size && !rec->opts.no_buffering) 2015 switch_output_size_warn(rec); 2016 2017 return 0; 2018 } 2019 2020 static const char * const __record_usage[] = { 2021 "perf record [<options>] [<command>]", 2022 "perf record [<options>] -- <command> [<options>]", 2023 NULL 2024 }; 2025 const char * const *record_usage = __record_usage; 2026 2027 /* 2028 * XXX Ideally would be local to cmd_record() and passed to a record__new 2029 * because we need to have access to it in record__exit, that is called 2030 * after cmd_record() exits, but since record_options need to be accessible to 2031 * builtin-script, leave it here. 2032 * 2033 * At least we don't ouch it in all the other functions here directly. 2034 * 2035 * Just say no to tons of global variables, sigh. 2036 */ 2037 static struct record record = { 2038 .opts = { 2039 .sample_time = true, 2040 .mmap_pages = UINT_MAX, 2041 .user_freq = UINT_MAX, 2042 .user_interval = ULLONG_MAX, 2043 .freq = 4000, 2044 .target = { 2045 .uses_mmap = true, 2046 .default_per_cpu = true, 2047 }, 2048 .mmap_flush = MMAP_FLUSH_DEFAULT, 2049 }, 2050 .tool = { 2051 .sample = process_sample_event, 2052 .fork = perf_event__process_fork, 2053 .exit = perf_event__process_exit, 2054 .comm = perf_event__process_comm, 2055 .namespaces = perf_event__process_namespaces, 2056 .mmap = perf_event__process_mmap, 2057 .mmap2 = perf_event__process_mmap2, 2058 .ordered_events = true, 2059 }, 2060 }; 2061 2062 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 2063 "\n\t\t\t\tDefault: fp"; 2064 2065 static bool dry_run; 2066 2067 /* 2068 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 2069 * with it and switch to use the library functions in perf_evlist that came 2070 * from builtin-record.c, i.e. use record_opts, 2071 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 2072 * using pipes, etc. 2073 */ 2074 static struct option __record_options[] = { 2075 OPT_CALLBACK('e', "event", &record.evlist, "event", 2076 "event selector. use 'perf list' to list available events", 2077 parse_events_option), 2078 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 2079 "event filter", parse_filter), 2080 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 2081 NULL, "don't record events from perf itself", 2082 exclude_perf), 2083 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 2084 "record events on existing process id"), 2085 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 2086 "record events on existing thread id"), 2087 OPT_INTEGER('r', "realtime", &record.realtime_prio, 2088 "collect data with this RT SCHED_FIFO priority"), 2089 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 2090 "collect data without buffering"), 2091 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 2092 "collect raw sample records from all opened counters"), 2093 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 2094 "system-wide collection from all CPUs"), 2095 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 2096 "list of cpus to monitor"), 2097 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 2098 OPT_STRING('o', "output", &record.data.path, "file", 2099 "output file name"), 2100 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 2101 &record.opts.no_inherit_set, 2102 "child tasks do not inherit counters"), 2103 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 2104 "synthesize non-sample events at the end of output"), 2105 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 2106 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), 2107 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 2108 "Fail if the specified frequency can't be used"), 2109 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 2110 "profile at this frequency", 2111 record__parse_freq), 2112 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 2113 "number of mmap data pages and AUX area tracing mmap pages", 2114 record__parse_mmap_pages), 2115 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 2116 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 2117 record__mmap_flush_parse), 2118 OPT_BOOLEAN(0, "group", &record.opts.group, 2119 "put the counters into a counter group"), 2120 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 2121 NULL, "enables call-graph recording" , 2122 &record_callchain_opt), 2123 OPT_CALLBACK(0, "call-graph", &record.opts, 2124 "record_mode[,record_size]", record_callchain_help, 2125 &record_parse_callchain_opt), 2126 OPT_INCR('v', "verbose", &verbose, 2127 "be more verbose (show counter open errors, etc)"), 2128 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 2129 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 2130 "per thread counts"), 2131 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 2132 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 2133 "Record the sample physical addresses"), 2134 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 2135 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 2136 &record.opts.sample_time_set, 2137 "Record the sample timestamps"), 2138 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 2139 "Record the sample period"), 2140 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 2141 "don't sample"), 2142 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 2143 &record.no_buildid_cache_set, 2144 "do not update the buildid cache"), 2145 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 2146 &record.no_buildid_set, 2147 "do not collect buildids in perf.data"), 2148 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 2149 "monitor event in cgroup name only", 2150 parse_cgroups), 2151 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 2152 "ms to wait before starting measurement after program start"), 2153 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2154 "user to profile"), 2155 2156 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2157 "branch any", "sample any taken branches", 2158 parse_branch_stack), 2159 2160 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2161 "branch filter mask", "branch stack filter modes", 2162 parse_branch_stack), 2163 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2164 "sample by weight (on special events only)"), 2165 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2166 "sample transaction flags (special events only)"), 2167 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2168 "use per-thread mmaps"), 2169 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2170 "sample selected machine registers on interrupt," 2171 " use '-I?' to list register names", parse_intr_regs), 2172 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2173 "sample selected machine registers on interrupt," 2174 " use '--user-regs=?' to list register names", parse_user_regs), 2175 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2176 "Record running/enabled time of read (:S) events"), 2177 OPT_CALLBACK('k', "clockid", &record.opts, 2178 "clockid", "clockid to use for events, see clock_gettime()", 2179 parse_clockid), 2180 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2181 "opts", "AUX area tracing Snapshot Mode", ""), 2182 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2183 "per thread proc mmap processing timeout in ms"), 2184 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2185 "Record namespaces events"), 2186 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2187 "Record context switch events"), 2188 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2189 "Configure all used events to run in kernel space.", 2190 PARSE_OPT_EXCLUSIVE), 2191 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2192 "Configure all used events to run in user space.", 2193 PARSE_OPT_EXCLUSIVE), 2194 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2195 "clang binary to use for compiling BPF scriptlets"), 2196 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2197 "options passed to clang when compiling BPF scriptlets"), 2198 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2199 "file", "vmlinux pathname"), 2200 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2201 "Record build-id of all DSOs regardless of hits"), 2202 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2203 "append timestamp to output filename"), 2204 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2205 "Record timestamp boundary (time of first/last samples)"), 2206 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2207 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2208 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2209 "signal"), 2210 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2211 "Limit number of switch output generated files"), 2212 OPT_BOOLEAN(0, "dry-run", &dry_run, 2213 "Parse options then exit"), 2214 #ifdef HAVE_AIO_SUPPORT 2215 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2216 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2217 record__aio_parse), 2218 #endif 2219 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2220 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2221 record__parse_affinity), 2222 #ifdef HAVE_ZSTD_SUPPORT 2223 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, 2224 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 2225 record__parse_comp_level), 2226 #endif 2227 OPT_END() 2228 }; 2229 2230 struct option *record_options = __record_options; 2231 2232 int cmd_record(int argc, const char **argv) 2233 { 2234 int err; 2235 struct record *rec = &record; 2236 char errbuf[BUFSIZ]; 2237 2238 setlocale(LC_ALL, ""); 2239 2240 #ifndef HAVE_LIBBPF_SUPPORT 2241 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2242 set_nobuild('\0', "clang-path", true); 2243 set_nobuild('\0', "clang-opt", true); 2244 # undef set_nobuild 2245 #endif 2246 2247 #ifndef HAVE_BPF_PROLOGUE 2248 # if !defined (HAVE_DWARF_SUPPORT) 2249 # define REASON "NO_DWARF=1" 2250 # elif !defined (HAVE_LIBBPF_SUPPORT) 2251 # define REASON "NO_LIBBPF=1" 2252 # else 2253 # define REASON "this architecture doesn't support BPF prologue" 2254 # endif 2255 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2256 set_nobuild('\0', "vmlinux", true); 2257 # undef set_nobuild 2258 # undef REASON 2259 #endif 2260 2261 CPU_ZERO(&rec->affinity_mask); 2262 rec->opts.affinity = PERF_AFFINITY_SYS; 2263 2264 rec->evlist = perf_evlist__new(); 2265 if (rec->evlist == NULL) 2266 return -ENOMEM; 2267 2268 err = perf_config(perf_record_config, rec); 2269 if (err) 2270 return err; 2271 2272 argc = parse_options(argc, argv, record_options, record_usage, 2273 PARSE_OPT_STOP_AT_NON_OPTION); 2274 if (quiet) 2275 perf_quiet_option(); 2276 2277 /* Make system wide (-a) the default target. */ 2278 if (!argc && target__none(&rec->opts.target)) 2279 rec->opts.target.system_wide = true; 2280 2281 if (nr_cgroups && !rec->opts.target.system_wide) { 2282 usage_with_options_msg(record_usage, record_options, 2283 "cgroup monitoring only available in system-wide mode"); 2284 2285 } 2286 2287 if (rec->opts.comp_level != 0) { 2288 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 2289 rec->no_buildid = true; 2290 } 2291 2292 if (rec->opts.record_switch_events && 2293 !perf_can_record_switch_events()) { 2294 ui__error("kernel does not support recording context switch events\n"); 2295 parse_options_usage(record_usage, record_options, "switch-events", 0); 2296 return -EINVAL; 2297 } 2298 2299 if (switch_output_setup(rec)) { 2300 parse_options_usage(record_usage, record_options, "switch-output", 0); 2301 return -EINVAL; 2302 } 2303 2304 if (rec->switch_output.time) { 2305 signal(SIGALRM, alarm_sig_handler); 2306 alarm(rec->switch_output.time); 2307 } 2308 2309 if (rec->switch_output.num_files) { 2310 rec->switch_output.filenames = calloc(sizeof(char *), 2311 rec->switch_output.num_files); 2312 if (!rec->switch_output.filenames) 2313 return -EINVAL; 2314 } 2315 2316 /* 2317 * Allow aliases to facilitate the lookup of symbols for address 2318 * filters. Refer to auxtrace_parse_filters(). 2319 */ 2320 symbol_conf.allow_aliases = true; 2321 2322 symbol__init(NULL); 2323 2324 err = record__auxtrace_init(rec); 2325 if (err) 2326 goto out; 2327 2328 if (dry_run) 2329 goto out; 2330 2331 err = bpf__setup_stdout(rec->evlist); 2332 if (err) { 2333 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2334 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2335 errbuf); 2336 goto out; 2337 } 2338 2339 err = -ENOMEM; 2340 2341 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 2342 pr_warning( 2343 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 2344 "check /proc/sys/kernel/kptr_restrict.\n\n" 2345 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 2346 "file is not found in the buildid cache or in the vmlinux path.\n\n" 2347 "Samples in kernel modules won't be resolved at all.\n\n" 2348 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 2349 "even with a suitable vmlinux or kallsyms file.\n\n"); 2350 2351 if (rec->no_buildid_cache || rec->no_buildid) { 2352 disable_buildid_cache(); 2353 } else if (rec->switch_output.enabled) { 2354 /* 2355 * In 'perf record --switch-output', disable buildid 2356 * generation by default to reduce data file switching 2357 * overhead. Still generate buildid if they are required 2358 * explicitly using 2359 * 2360 * perf record --switch-output --no-no-buildid \ 2361 * --no-no-buildid-cache 2362 * 2363 * Following code equals to: 2364 * 2365 * if ((rec->no_buildid || !rec->no_buildid_set) && 2366 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2367 * disable_buildid_cache(); 2368 */ 2369 bool disable = true; 2370 2371 if (rec->no_buildid_set && !rec->no_buildid) 2372 disable = false; 2373 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2374 disable = false; 2375 if (disable) { 2376 rec->no_buildid = true; 2377 rec->no_buildid_cache = true; 2378 disable_buildid_cache(); 2379 } 2380 } 2381 2382 if (record.opts.overwrite) 2383 record.opts.tail_synthesize = true; 2384 2385 if (rec->evlist->nr_entries == 0 && 2386 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 2387 pr_err("Not enough memory for event selector list\n"); 2388 goto out; 2389 } 2390 2391 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 2392 rec->opts.no_inherit = true; 2393 2394 err = target__validate(&rec->opts.target); 2395 if (err) { 2396 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2397 ui__warning("%s\n", errbuf); 2398 } 2399 2400 err = target__parse_uid(&rec->opts.target); 2401 if (err) { 2402 int saved_errno = errno; 2403 2404 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2405 ui__error("%s", errbuf); 2406 2407 err = -saved_errno; 2408 goto out; 2409 } 2410 2411 /* Enable ignoring missing threads when -u/-p option is defined. */ 2412 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 2413 2414 err = -ENOMEM; 2415 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 2416 usage_with_options(record_usage, record_options); 2417 2418 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 2419 if (err) 2420 goto out; 2421 2422 /* 2423 * We take all buildids when the file contains 2424 * AUX area tracing data because we do not decode the 2425 * trace because it would take too long. 2426 */ 2427 if (rec->opts.full_auxtrace) 2428 rec->buildid_all = true; 2429 2430 if (record_opts__config(&rec->opts)) { 2431 err = -EINVAL; 2432 goto out; 2433 } 2434 2435 if (rec->opts.nr_cblocks > nr_cblocks_max) 2436 rec->opts.nr_cblocks = nr_cblocks_max; 2437 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2438 2439 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2440 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2441 2442 if (rec->opts.comp_level > comp_level_max) 2443 rec->opts.comp_level = comp_level_max; 2444 pr_debug("comp level: %d\n", rec->opts.comp_level); 2445 2446 err = __cmd_record(&record, argc, argv); 2447 out: 2448 perf_evlist__delete(rec->evlist); 2449 symbol__exit(); 2450 auxtrace_record__free(rec->itr); 2451 return err; 2452 } 2453 2454 static void snapshot_sig_handler(int sig __maybe_unused) 2455 { 2456 struct record *rec = &record; 2457 2458 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2459 trigger_hit(&auxtrace_snapshot_trigger); 2460 auxtrace_record__snapshot_started = 1; 2461 if (auxtrace_record__snapshot_start(record.itr)) 2462 trigger_error(&auxtrace_snapshot_trigger); 2463 } 2464 2465 if (switch_output_signal(rec)) 2466 trigger_hit(&switch_output_trigger); 2467 } 2468 2469 static void alarm_sig_handler(int sig __maybe_unused) 2470 { 2471 struct record *rec = &record; 2472 2473 if (switch_output_time(rec)) 2474 trigger_hit(&switch_output_trigger); 2475 } 2476