1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/target.h" 24 #include "util/session.h" 25 #include "util/tool.h" 26 #include "util/symbol.h" 27 #include "util/record.h" 28 #include "util/cpumap.h" 29 #include "util/thread_map.h" 30 #include "util/data.h" 31 #include "util/perf_regs.h" 32 #include "util/auxtrace.h" 33 #include "util/tsc.h" 34 #include "util/parse-branch-options.h" 35 #include "util/parse-regs-options.h" 36 #include "util/llvm-utils.h" 37 #include "util/bpf-loader.h" 38 #include "util/trigger.h" 39 #include "util/perf-hooks.h" 40 #include "util/cpu-set-sched.h" 41 #include "util/time-utils.h" 42 #include "util/units.h" 43 #include "util/bpf-event.h" 44 #include "asm/bug.h" 45 #include "perf.h" 46 47 #include <errno.h> 48 #include <inttypes.h> 49 #include <locale.h> 50 #include <poll.h> 51 #include <unistd.h> 52 #include <sched.h> 53 #include <signal.h> 54 #include <sys/mman.h> 55 #include <sys/wait.h> 56 #include <linux/string.h> 57 #include <linux/time64.h> 58 #include <linux/zalloc.h> 59 60 struct switch_output { 61 bool enabled; 62 bool signal; 63 unsigned long size; 64 unsigned long time; 65 const char *str; 66 bool set; 67 char **filenames; 68 int num_files; 69 int cur_file; 70 }; 71 72 struct record { 73 struct perf_tool tool; 74 struct record_opts opts; 75 u64 bytes_written; 76 struct perf_data data; 77 struct auxtrace_record *itr; 78 struct evlist *evlist; 79 struct perf_session *session; 80 int realtime_prio; 81 bool no_buildid; 82 bool no_buildid_set; 83 bool no_buildid_cache; 84 bool no_buildid_cache_set; 85 bool buildid_all; 86 bool timestamp_filename; 87 bool timestamp_boundary; 88 struct switch_output switch_output; 89 unsigned long long samples; 90 cpu_set_t affinity_mask; 91 }; 92 93 static volatile int auxtrace_record__snapshot_started; 94 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 95 static DEFINE_TRIGGER(switch_output_trigger); 96 97 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 98 "SYS", "NODE", "CPU" 99 }; 100 101 static bool switch_output_signal(struct record *rec) 102 { 103 return rec->switch_output.signal && 104 trigger_is_ready(&switch_output_trigger); 105 } 106 107 static bool switch_output_size(struct record *rec) 108 { 109 return rec->switch_output.size && 110 trigger_is_ready(&switch_output_trigger) && 111 (rec->bytes_written >= rec->switch_output.size); 112 } 113 114 static bool switch_output_time(struct record *rec) 115 { 116 return rec->switch_output.time && 117 trigger_is_ready(&switch_output_trigger); 118 } 119 120 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, 121 void *bf, size_t size) 122 { 123 struct perf_data_file *file = &rec->session->data->file; 124 125 if (perf_data_file__write(file, bf, size) < 0) { 126 pr_err("failed to write perf data, error: %m\n"); 127 return -1; 128 } 129 130 rec->bytes_written += size; 131 132 if (switch_output_size(rec)) 133 trigger_hit(&switch_output_trigger); 134 135 return 0; 136 } 137 138 static int record__aio_enabled(struct record *rec); 139 static int record__comp_enabled(struct record *rec); 140 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 141 void *src, size_t src_size); 142 143 #ifdef HAVE_AIO_SUPPORT 144 static int record__aio_write(struct aiocb *cblock, int trace_fd, 145 void *buf, size_t size, off_t off) 146 { 147 int rc; 148 149 cblock->aio_fildes = trace_fd; 150 cblock->aio_buf = buf; 151 cblock->aio_nbytes = size; 152 cblock->aio_offset = off; 153 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 154 155 do { 156 rc = aio_write(cblock); 157 if (rc == 0) { 158 break; 159 } else if (errno != EAGAIN) { 160 cblock->aio_fildes = -1; 161 pr_err("failed to queue perf data, error: %m\n"); 162 break; 163 } 164 } while (1); 165 166 return rc; 167 } 168 169 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) 170 { 171 void *rem_buf; 172 off_t rem_off; 173 size_t rem_size; 174 int rc, aio_errno; 175 ssize_t aio_ret, written; 176 177 aio_errno = aio_error(cblock); 178 if (aio_errno == EINPROGRESS) 179 return 0; 180 181 written = aio_ret = aio_return(cblock); 182 if (aio_ret < 0) { 183 if (aio_errno != EINTR) 184 pr_err("failed to write perf data, error: %m\n"); 185 written = 0; 186 } 187 188 rem_size = cblock->aio_nbytes - written; 189 190 if (rem_size == 0) { 191 cblock->aio_fildes = -1; 192 /* 193 * md->refcount is incremented in record__aio_pushfn() for 194 * every aio write request started in record__aio_push() so 195 * decrement it because the request is now complete. 196 */ 197 perf_mmap__put(md); 198 rc = 1; 199 } else { 200 /* 201 * aio write request may require restart with the 202 * reminder if the kernel didn't write whole 203 * chunk at once. 204 */ 205 rem_off = cblock->aio_offset + written; 206 rem_buf = (void *)(cblock->aio_buf + written); 207 record__aio_write(cblock, cblock->aio_fildes, 208 rem_buf, rem_size, rem_off); 209 rc = 0; 210 } 211 212 return rc; 213 } 214 215 static int record__aio_sync(struct perf_mmap *md, bool sync_all) 216 { 217 struct aiocb **aiocb = md->aio.aiocb; 218 struct aiocb *cblocks = md->aio.cblocks; 219 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 220 int i, do_suspend; 221 222 do { 223 do_suspend = 0; 224 for (i = 0; i < md->aio.nr_cblocks; ++i) { 225 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 226 if (sync_all) 227 aiocb[i] = NULL; 228 else 229 return i; 230 } else { 231 /* 232 * Started aio write is not complete yet 233 * so it has to be waited before the 234 * next allocation. 235 */ 236 aiocb[i] = &cblocks[i]; 237 do_suspend = 1; 238 } 239 } 240 if (!do_suspend) 241 return -1; 242 243 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 244 if (!(errno == EAGAIN || errno == EINTR)) 245 pr_err("failed to sync perf data, error: %m\n"); 246 } 247 } while (1); 248 } 249 250 struct record_aio { 251 struct record *rec; 252 void *data; 253 size_t size; 254 }; 255 256 static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) 257 { 258 struct record_aio *aio = to; 259 260 /* 261 * map->base data pointed by buf is copied into free map->aio.data[] buffer 262 * to release space in the kernel buffer as fast as possible, calling 263 * perf_mmap__consume() from perf_mmap__push() function. 264 * 265 * That lets the kernel to proceed with storing more profiling data into 266 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 267 * 268 * Coping can be done in two steps in case the chunk of profiling data 269 * crosses the upper bound of the kernel buffer. In this case we first move 270 * part of data from map->start till the upper bound and then the reminder 271 * from the beginning of the kernel buffer till the end of the data chunk. 272 */ 273 274 if (record__comp_enabled(aio->rec)) { 275 size = zstd_compress(aio->rec->session, aio->data + aio->size, 276 perf_mmap__mmap_len(map) - aio->size, 277 buf, size); 278 } else { 279 memcpy(aio->data + aio->size, buf, size); 280 } 281 282 if (!aio->size) { 283 /* 284 * Increment map->refcount to guard map->aio.data[] buffer 285 * from premature deallocation because map object can be 286 * released earlier than aio write request started on 287 * map->aio.data[] buffer is complete. 288 * 289 * perf_mmap__put() is done at record__aio_complete() 290 * after started aio request completion or at record__aio_push() 291 * if the request failed to start. 292 */ 293 perf_mmap__get(map); 294 } 295 296 aio->size += size; 297 298 return size; 299 } 300 301 static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) 302 { 303 int ret, idx; 304 int trace_fd = rec->session->data->file.fd; 305 struct record_aio aio = { .rec = rec, .size = 0 }; 306 307 /* 308 * Call record__aio_sync() to wait till map->aio.data[] buffer 309 * becomes available after previous aio write operation. 310 */ 311 312 idx = record__aio_sync(map, false); 313 aio.data = map->aio.data[idx]; 314 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 315 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 316 return ret; 317 318 rec->samples++; 319 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 320 if (!ret) { 321 *off += aio.size; 322 rec->bytes_written += aio.size; 323 if (switch_output_size(rec)) 324 trigger_hit(&switch_output_trigger); 325 } else { 326 /* 327 * Decrement map->refcount incremented in record__aio_pushfn() 328 * back if record__aio_write() operation failed to start, otherwise 329 * map->refcount is decremented in record__aio_complete() after 330 * aio write operation finishes successfully. 331 */ 332 perf_mmap__put(map); 333 } 334 335 return ret; 336 } 337 338 static off_t record__aio_get_pos(int trace_fd) 339 { 340 return lseek(trace_fd, 0, SEEK_CUR); 341 } 342 343 static void record__aio_set_pos(int trace_fd, off_t pos) 344 { 345 lseek(trace_fd, pos, SEEK_SET); 346 } 347 348 static void record__aio_mmap_read_sync(struct record *rec) 349 { 350 int i; 351 struct evlist *evlist = rec->evlist; 352 struct perf_mmap *maps = evlist->mmap; 353 354 if (!record__aio_enabled(rec)) 355 return; 356 357 for (i = 0; i < evlist->nr_mmaps; i++) { 358 struct perf_mmap *map = &maps[i]; 359 360 if (map->base) 361 record__aio_sync(map, true); 362 } 363 } 364 365 static int nr_cblocks_default = 1; 366 static int nr_cblocks_max = 4; 367 368 static int record__aio_parse(const struct option *opt, 369 const char *str, 370 int unset) 371 { 372 struct record_opts *opts = (struct record_opts *)opt->value; 373 374 if (unset) { 375 opts->nr_cblocks = 0; 376 } else { 377 if (str) 378 opts->nr_cblocks = strtol(str, NULL, 0); 379 if (!opts->nr_cblocks) 380 opts->nr_cblocks = nr_cblocks_default; 381 } 382 383 return 0; 384 } 385 #else /* HAVE_AIO_SUPPORT */ 386 static int nr_cblocks_max = 0; 387 388 static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, 389 off_t *off __maybe_unused) 390 { 391 return -1; 392 } 393 394 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 395 { 396 return -1; 397 } 398 399 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 400 { 401 } 402 403 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 404 { 405 } 406 #endif 407 408 static int record__aio_enabled(struct record *rec) 409 { 410 return rec->opts.nr_cblocks > 0; 411 } 412 413 #define MMAP_FLUSH_DEFAULT 1 414 static int record__mmap_flush_parse(const struct option *opt, 415 const char *str, 416 int unset) 417 { 418 int flush_max; 419 struct record_opts *opts = (struct record_opts *)opt->value; 420 static struct parse_tag tags[] = { 421 { .tag = 'B', .mult = 1 }, 422 { .tag = 'K', .mult = 1 << 10 }, 423 { .tag = 'M', .mult = 1 << 20 }, 424 { .tag = 'G', .mult = 1 << 30 }, 425 { .tag = 0 }, 426 }; 427 428 if (unset) 429 return 0; 430 431 if (str) { 432 opts->mmap_flush = parse_tag_value(str, tags); 433 if (opts->mmap_flush == (int)-1) 434 opts->mmap_flush = strtol(str, NULL, 0); 435 } 436 437 if (!opts->mmap_flush) 438 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 439 440 flush_max = perf_evlist__mmap_size(opts->mmap_pages); 441 flush_max /= 4; 442 if (opts->mmap_flush > flush_max) 443 opts->mmap_flush = flush_max; 444 445 return 0; 446 } 447 448 #ifdef HAVE_ZSTD_SUPPORT 449 static unsigned int comp_level_default = 1; 450 451 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 452 { 453 struct record_opts *opts = opt->value; 454 455 if (unset) { 456 opts->comp_level = 0; 457 } else { 458 if (str) 459 opts->comp_level = strtol(str, NULL, 0); 460 if (!opts->comp_level) 461 opts->comp_level = comp_level_default; 462 } 463 464 return 0; 465 } 466 #endif 467 static unsigned int comp_level_max = 22; 468 469 static int record__comp_enabled(struct record *rec) 470 { 471 return rec->opts.comp_level > 0; 472 } 473 474 static int process_synthesized_event(struct perf_tool *tool, 475 union perf_event *event, 476 struct perf_sample *sample __maybe_unused, 477 struct machine *machine __maybe_unused) 478 { 479 struct record *rec = container_of(tool, struct record, tool); 480 return record__write(rec, NULL, event, event->header.size); 481 } 482 483 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) 484 { 485 struct record *rec = to; 486 487 if (record__comp_enabled(rec)) { 488 size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); 489 bf = map->data; 490 } 491 492 rec->samples++; 493 return record__write(rec, map, bf, size); 494 } 495 496 static volatile int done; 497 static volatile int signr = -1; 498 static volatile int child_finished; 499 500 static void sig_handler(int sig) 501 { 502 if (sig == SIGCHLD) 503 child_finished = 1; 504 else 505 signr = sig; 506 507 done = 1; 508 } 509 510 static void sigsegv_handler(int sig) 511 { 512 perf_hooks__recover(); 513 sighandler_dump_stack(sig); 514 } 515 516 static void record__sig_exit(void) 517 { 518 if (signr == -1) 519 return; 520 521 signal(signr, SIG_DFL); 522 raise(signr); 523 } 524 525 #ifdef HAVE_AUXTRACE_SUPPORT 526 527 static int record__process_auxtrace(struct perf_tool *tool, 528 struct perf_mmap *map, 529 union perf_event *event, void *data1, 530 size_t len1, void *data2, size_t len2) 531 { 532 struct record *rec = container_of(tool, struct record, tool); 533 struct perf_data *data = &rec->data; 534 size_t padding; 535 u8 pad[8] = {0}; 536 537 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { 538 off_t file_offset; 539 int fd = perf_data__fd(data); 540 int err; 541 542 file_offset = lseek(fd, 0, SEEK_CUR); 543 if (file_offset == -1) 544 return -1; 545 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 546 event, file_offset); 547 if (err) 548 return err; 549 } 550 551 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 552 padding = (len1 + len2) & 7; 553 if (padding) 554 padding = 8 - padding; 555 556 record__write(rec, map, event, event->header.size); 557 record__write(rec, map, data1, len1); 558 if (len2) 559 record__write(rec, map, data2, len2); 560 record__write(rec, map, &pad, padding); 561 562 return 0; 563 } 564 565 static int record__auxtrace_mmap_read(struct record *rec, 566 struct perf_mmap *map) 567 { 568 int ret; 569 570 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 571 record__process_auxtrace); 572 if (ret < 0) 573 return ret; 574 575 if (ret) 576 rec->samples++; 577 578 return 0; 579 } 580 581 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 582 struct perf_mmap *map) 583 { 584 int ret; 585 586 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 587 record__process_auxtrace, 588 rec->opts.auxtrace_snapshot_size); 589 if (ret < 0) 590 return ret; 591 592 if (ret) 593 rec->samples++; 594 595 return 0; 596 } 597 598 static int record__auxtrace_read_snapshot_all(struct record *rec) 599 { 600 int i; 601 int rc = 0; 602 603 for (i = 0; i < rec->evlist->nr_mmaps; i++) { 604 struct perf_mmap *map = &rec->evlist->mmap[i]; 605 606 if (!map->auxtrace_mmap.base) 607 continue; 608 609 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 610 rc = -1; 611 goto out; 612 } 613 } 614 out: 615 return rc; 616 } 617 618 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 619 { 620 pr_debug("Recording AUX area tracing snapshot\n"); 621 if (record__auxtrace_read_snapshot_all(rec) < 0) { 622 trigger_error(&auxtrace_snapshot_trigger); 623 } else { 624 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 625 trigger_error(&auxtrace_snapshot_trigger); 626 else 627 trigger_ready(&auxtrace_snapshot_trigger); 628 } 629 } 630 631 static int record__auxtrace_snapshot_exit(struct record *rec) 632 { 633 if (trigger_is_error(&auxtrace_snapshot_trigger)) 634 return 0; 635 636 if (!auxtrace_record__snapshot_started && 637 auxtrace_record__snapshot_start(rec->itr)) 638 return -1; 639 640 record__read_auxtrace_snapshot(rec, true); 641 if (trigger_is_error(&auxtrace_snapshot_trigger)) 642 return -1; 643 644 return 0; 645 } 646 647 static int record__auxtrace_init(struct record *rec) 648 { 649 int err; 650 651 if (!rec->itr) { 652 rec->itr = auxtrace_record__init(rec->evlist, &err); 653 if (err) 654 return err; 655 } 656 657 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 658 rec->opts.auxtrace_snapshot_opts); 659 if (err) 660 return err; 661 662 return auxtrace_parse_filters(rec->evlist); 663 } 664 665 #else 666 667 static inline 668 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 669 struct perf_mmap *map __maybe_unused) 670 { 671 return 0; 672 } 673 674 static inline 675 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 676 bool on_exit __maybe_unused) 677 { 678 } 679 680 static inline 681 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 682 { 683 return 0; 684 } 685 686 static inline 687 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 688 { 689 return 0; 690 } 691 692 static int record__auxtrace_init(struct record *rec __maybe_unused) 693 { 694 return 0; 695 } 696 697 #endif 698 699 static int record__mmap_evlist(struct record *rec, 700 struct evlist *evlist) 701 { 702 struct record_opts *opts = &rec->opts; 703 char msg[512]; 704 705 if (opts->affinity != PERF_AFFINITY_SYS) 706 cpu__setup_cpunode_map(); 707 708 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 709 opts->auxtrace_mmap_pages, 710 opts->auxtrace_snapshot_mode, 711 opts->nr_cblocks, opts->affinity, 712 opts->mmap_flush, opts->comp_level) < 0) { 713 if (errno == EPERM) { 714 pr_err("Permission error mapping pages.\n" 715 "Consider increasing " 716 "/proc/sys/kernel/perf_event_mlock_kb,\n" 717 "or try again with a smaller value of -m/--mmap_pages.\n" 718 "(current value: %u,%u)\n", 719 opts->mmap_pages, opts->auxtrace_mmap_pages); 720 return -errno; 721 } else { 722 pr_err("failed to mmap with %d (%s)\n", errno, 723 str_error_r(errno, msg, sizeof(msg))); 724 if (errno) 725 return -errno; 726 else 727 return -EINVAL; 728 } 729 } 730 return 0; 731 } 732 733 static int record__mmap(struct record *rec) 734 { 735 return record__mmap_evlist(rec, rec->evlist); 736 } 737 738 static int record__open(struct record *rec) 739 { 740 char msg[BUFSIZ]; 741 struct evsel *pos; 742 struct evlist *evlist = rec->evlist; 743 struct perf_session *session = rec->session; 744 struct record_opts *opts = &rec->opts; 745 int rc = 0; 746 747 /* 748 * For initial_delay we need to add a dummy event so that we can track 749 * PERF_RECORD_MMAP while we wait for the initial delay to enable the 750 * real events, the ones asked by the user. 751 */ 752 if (opts->initial_delay) { 753 if (perf_evlist__add_dummy(evlist)) 754 return -ENOMEM; 755 756 pos = perf_evlist__first(evlist); 757 pos->tracking = 0; 758 pos = perf_evlist__last(evlist); 759 pos->tracking = 1; 760 pos->core.attr.enable_on_exec = 1; 761 } 762 763 perf_evlist__config(evlist, opts, &callchain_param); 764 765 evlist__for_each_entry(evlist, pos) { 766 try_again: 767 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 768 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { 769 if (verbose > 0) 770 ui__warning("%s\n", msg); 771 goto try_again; 772 } 773 if ((errno == EINVAL || errno == EBADF) && 774 pos->leader != pos && 775 pos->weak_group) { 776 pos = perf_evlist__reset_weak_group(evlist, pos); 777 goto try_again; 778 } 779 rc = -errno; 780 perf_evsel__open_strerror(pos, &opts->target, 781 errno, msg, sizeof(msg)); 782 ui__error("%s\n", msg); 783 goto out; 784 } 785 786 pos->supported = true; 787 } 788 789 if (perf_evlist__apply_filters(evlist, &pos)) { 790 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 791 pos->filter, perf_evsel__name(pos), errno, 792 str_error_r(errno, msg, sizeof(msg))); 793 rc = -1; 794 goto out; 795 } 796 797 rc = record__mmap(rec); 798 if (rc) 799 goto out; 800 801 session->evlist = evlist; 802 perf_session__set_id_hdr_size(session); 803 out: 804 return rc; 805 } 806 807 static int process_sample_event(struct perf_tool *tool, 808 union perf_event *event, 809 struct perf_sample *sample, 810 struct evsel *evsel, 811 struct machine *machine) 812 { 813 struct record *rec = container_of(tool, struct record, tool); 814 815 if (rec->evlist->first_sample_time == 0) 816 rec->evlist->first_sample_time = sample->time; 817 818 rec->evlist->last_sample_time = sample->time; 819 820 if (rec->buildid_all) 821 return 0; 822 823 rec->samples++; 824 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 825 } 826 827 static int process_buildids(struct record *rec) 828 { 829 struct perf_session *session = rec->session; 830 831 if (perf_data__size(&rec->data) == 0) 832 return 0; 833 834 /* 835 * During this process, it'll load kernel map and replace the 836 * dso->long_name to a real pathname it found. In this case 837 * we prefer the vmlinux path like 838 * /lib/modules/3.16.4/build/vmlinux 839 * 840 * rather than build-id path (in debug directory). 841 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 842 */ 843 symbol_conf.ignore_vmlinux_buildid = true; 844 845 /* 846 * If --buildid-all is given, it marks all DSO regardless of hits, 847 * so no need to process samples. But if timestamp_boundary is enabled, 848 * it still needs to walk on all samples to get the timestamps of 849 * first/last samples. 850 */ 851 if (rec->buildid_all && !rec->timestamp_boundary) 852 rec->tool.sample = NULL; 853 854 return perf_session__process_events(session); 855 } 856 857 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 858 { 859 int err; 860 struct perf_tool *tool = data; 861 /* 862 *As for guest kernel when processing subcommand record&report, 863 *we arrange module mmap prior to guest kernel mmap and trigger 864 *a preload dso because default guest module symbols are loaded 865 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 866 *method is used to avoid symbol missing when the first addr is 867 *in module instead of in guest kernel. 868 */ 869 err = perf_event__synthesize_modules(tool, process_synthesized_event, 870 machine); 871 if (err < 0) 872 pr_err("Couldn't record guest kernel [%d]'s reference" 873 " relocation symbol.\n", machine->pid); 874 875 /* 876 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 877 * have no _text sometimes. 878 */ 879 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 880 machine); 881 if (err < 0) 882 pr_err("Couldn't record guest kernel [%d]'s reference" 883 " relocation symbol.\n", machine->pid); 884 } 885 886 static struct perf_event_header finished_round_event = { 887 .size = sizeof(struct perf_event_header), 888 .type = PERF_RECORD_FINISHED_ROUND, 889 }; 890 891 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) 892 { 893 if (rec->opts.affinity != PERF_AFFINITY_SYS && 894 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { 895 CPU_ZERO(&rec->affinity_mask); 896 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); 897 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); 898 } 899 } 900 901 static size_t process_comp_header(void *record, size_t increment) 902 { 903 struct perf_record_compressed *event = record; 904 size_t size = sizeof(*event); 905 906 if (increment) { 907 event->header.size += increment; 908 return increment; 909 } 910 911 event->header.type = PERF_RECORD_COMPRESSED; 912 event->header.size = size; 913 914 return size; 915 } 916 917 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, 918 void *src, size_t src_size) 919 { 920 size_t compressed; 921 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 922 923 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, 924 max_record_size, process_comp_header); 925 926 session->bytes_transferred += src_size; 927 session->bytes_compressed += compressed; 928 929 return compressed; 930 } 931 932 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 933 bool overwrite, bool synch) 934 { 935 u64 bytes_written = rec->bytes_written; 936 int i; 937 int rc = 0; 938 struct perf_mmap *maps; 939 int trace_fd = rec->data.file.fd; 940 off_t off = 0; 941 942 if (!evlist) 943 return 0; 944 945 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; 946 if (!maps) 947 return 0; 948 949 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 950 return 0; 951 952 if (record__aio_enabled(rec)) 953 off = record__aio_get_pos(trace_fd); 954 955 for (i = 0; i < evlist->nr_mmaps; i++) { 956 u64 flush = 0; 957 struct perf_mmap *map = &maps[i]; 958 959 if (map->base) { 960 record__adjust_affinity(rec, map); 961 if (synch) { 962 flush = map->flush; 963 map->flush = 1; 964 } 965 if (!record__aio_enabled(rec)) { 966 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 967 if (synch) 968 map->flush = flush; 969 rc = -1; 970 goto out; 971 } 972 } else { 973 if (record__aio_push(rec, map, &off) < 0) { 974 record__aio_set_pos(trace_fd, off); 975 if (synch) 976 map->flush = flush; 977 rc = -1; 978 goto out; 979 } 980 } 981 if (synch) 982 map->flush = flush; 983 } 984 985 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 986 record__auxtrace_mmap_read(rec, map) != 0) { 987 rc = -1; 988 goto out; 989 } 990 } 991 992 if (record__aio_enabled(rec)) 993 record__aio_set_pos(trace_fd, off); 994 995 /* 996 * Mark the round finished in case we wrote 997 * at least one event. 998 */ 999 if (bytes_written != rec->bytes_written) 1000 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1001 1002 if (overwrite) 1003 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1004 out: 1005 return rc; 1006 } 1007 1008 static int record__mmap_read_all(struct record *rec, bool synch) 1009 { 1010 int err; 1011 1012 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1013 if (err) 1014 return err; 1015 1016 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1017 } 1018 1019 static void record__init_features(struct record *rec) 1020 { 1021 struct perf_session *session = rec->session; 1022 int feat; 1023 1024 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1025 perf_header__set_feat(&session->header, feat); 1026 1027 if (rec->no_buildid) 1028 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1029 1030 if (!have_tracepoints(&rec->evlist->core.entries)) 1031 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1032 1033 if (!rec->opts.branch_stack) 1034 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1035 1036 if (!rec->opts.full_auxtrace) 1037 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1038 1039 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1040 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1041 1042 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1043 if (!record__comp_enabled(rec)) 1044 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1045 1046 perf_header__clear_feat(&session->header, HEADER_STAT); 1047 } 1048 1049 static void 1050 record__finish_output(struct record *rec) 1051 { 1052 struct perf_data *data = &rec->data; 1053 int fd = perf_data__fd(data); 1054 1055 if (data->is_pipe) 1056 return; 1057 1058 rec->session->header.data_size += rec->bytes_written; 1059 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1060 1061 if (!rec->no_buildid) { 1062 process_buildids(rec); 1063 1064 if (rec->buildid_all) 1065 dsos__hit_all(rec->session); 1066 } 1067 perf_session__write_header(rec->session, rec->evlist, fd, true); 1068 1069 return; 1070 } 1071 1072 static int record__synthesize_workload(struct record *rec, bool tail) 1073 { 1074 int err; 1075 struct perf_thread_map *thread_map; 1076 1077 if (rec->opts.tail_synthesize != tail) 1078 return 0; 1079 1080 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1081 if (thread_map == NULL) 1082 return -1; 1083 1084 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1085 process_synthesized_event, 1086 &rec->session->machines.host, 1087 rec->opts.sample_address); 1088 perf_thread_map__put(thread_map); 1089 return err; 1090 } 1091 1092 static int record__synthesize(struct record *rec, bool tail); 1093 1094 static int 1095 record__switch_output(struct record *rec, bool at_exit) 1096 { 1097 struct perf_data *data = &rec->data; 1098 int fd, err; 1099 char *new_filename; 1100 1101 /* Same Size: "2015122520103046"*/ 1102 char timestamp[] = "InvalidTimestamp"; 1103 1104 record__aio_mmap_read_sync(rec); 1105 1106 record__synthesize(rec, true); 1107 if (target__none(&rec->opts.target)) 1108 record__synthesize_workload(rec, true); 1109 1110 rec->samples = 0; 1111 record__finish_output(rec); 1112 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1113 if (err) { 1114 pr_err("Failed to get current timestamp\n"); 1115 return -EINVAL; 1116 } 1117 1118 fd = perf_data__switch(data, timestamp, 1119 rec->session->header.data_offset, 1120 at_exit, &new_filename); 1121 if (fd >= 0 && !at_exit) { 1122 rec->bytes_written = 0; 1123 rec->session->header.data_size = 0; 1124 } 1125 1126 if (!quiet) 1127 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1128 data->path, timestamp); 1129 1130 if (rec->switch_output.num_files) { 1131 int n = rec->switch_output.cur_file + 1; 1132 1133 if (n >= rec->switch_output.num_files) 1134 n = 0; 1135 rec->switch_output.cur_file = n; 1136 if (rec->switch_output.filenames[n]) { 1137 remove(rec->switch_output.filenames[n]); 1138 zfree(&rec->switch_output.filenames[n]); 1139 } 1140 rec->switch_output.filenames[n] = new_filename; 1141 } else { 1142 free(new_filename); 1143 } 1144 1145 /* Output tracking events */ 1146 if (!at_exit) { 1147 record__synthesize(rec, false); 1148 1149 /* 1150 * In 'perf record --switch-output' without -a, 1151 * record__synthesize() in record__switch_output() won't 1152 * generate tracking events because there's no thread_map 1153 * in evlist. Which causes newly created perf.data doesn't 1154 * contain map and comm information. 1155 * Create a fake thread_map and directly call 1156 * perf_event__synthesize_thread_map() for those events. 1157 */ 1158 if (target__none(&rec->opts.target)) 1159 record__synthesize_workload(rec, false); 1160 } 1161 return fd; 1162 } 1163 1164 static volatile int workload_exec_errno; 1165 1166 /* 1167 * perf_evlist__prepare_workload will send a SIGUSR1 1168 * if the fork fails, since we asked by setting its 1169 * want_signal to true. 1170 */ 1171 static void workload_exec_failed_signal(int signo __maybe_unused, 1172 siginfo_t *info, 1173 void *ucontext __maybe_unused) 1174 { 1175 workload_exec_errno = info->si_value.sival_int; 1176 done = 1; 1177 child_finished = 1; 1178 } 1179 1180 static void snapshot_sig_handler(int sig); 1181 static void alarm_sig_handler(int sig); 1182 1183 int __weak 1184 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, 1185 struct perf_tool *tool __maybe_unused, 1186 perf_event__handler_t process __maybe_unused, 1187 struct machine *machine __maybe_unused) 1188 { 1189 return 0; 1190 } 1191 1192 static const struct perf_event_mmap_page * 1193 perf_evlist__pick_pc(struct evlist *evlist) 1194 { 1195 if (evlist) { 1196 if (evlist->mmap && evlist->mmap[0].base) 1197 return evlist->mmap[0].base; 1198 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) 1199 return evlist->overwrite_mmap[0].base; 1200 } 1201 return NULL; 1202 } 1203 1204 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1205 { 1206 const struct perf_event_mmap_page *pc; 1207 1208 pc = perf_evlist__pick_pc(rec->evlist); 1209 if (pc) 1210 return pc; 1211 return NULL; 1212 } 1213 1214 static int record__synthesize(struct record *rec, bool tail) 1215 { 1216 struct perf_session *session = rec->session; 1217 struct machine *machine = &session->machines.host; 1218 struct perf_data *data = &rec->data; 1219 struct record_opts *opts = &rec->opts; 1220 struct perf_tool *tool = &rec->tool; 1221 int fd = perf_data__fd(data); 1222 int err = 0; 1223 1224 if (rec->opts.tail_synthesize != tail) 1225 return 0; 1226 1227 if (data->is_pipe) { 1228 /* 1229 * We need to synthesize events first, because some 1230 * features works on top of them (on report side). 1231 */ 1232 err = perf_event__synthesize_attrs(tool, rec->evlist, 1233 process_synthesized_event); 1234 if (err < 0) { 1235 pr_err("Couldn't synthesize attrs.\n"); 1236 goto out; 1237 } 1238 1239 err = perf_event__synthesize_features(tool, session, rec->evlist, 1240 process_synthesized_event); 1241 if (err < 0) { 1242 pr_err("Couldn't synthesize features.\n"); 1243 return err; 1244 } 1245 1246 if (have_tracepoints(&rec->evlist->core.entries)) { 1247 /* 1248 * FIXME err <= 0 here actually means that 1249 * there were no tracepoints so its not really 1250 * an error, just that we don't need to 1251 * synthesize anything. We really have to 1252 * return this more properly and also 1253 * propagate errors that now are calling die() 1254 */ 1255 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, 1256 process_synthesized_event); 1257 if (err <= 0) { 1258 pr_err("Couldn't record tracing data.\n"); 1259 goto out; 1260 } 1261 rec->bytes_written += err; 1262 } 1263 } 1264 1265 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1266 process_synthesized_event, machine); 1267 if (err) 1268 goto out; 1269 1270 if (rec->opts.full_auxtrace) { 1271 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1272 session, process_synthesized_event); 1273 if (err) 1274 goto out; 1275 } 1276 1277 if (!perf_evlist__exclude_kernel(rec->evlist)) { 1278 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1279 machine); 1280 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1281 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1282 "Check /proc/kallsyms permission or run as root.\n"); 1283 1284 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1285 machine); 1286 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1287 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1288 "Check /proc/modules permission or run as root.\n"); 1289 } 1290 1291 if (perf_guest) { 1292 machines__process_guests(&session->machines, 1293 perf_event__synthesize_guest_os, tool); 1294 } 1295 1296 err = perf_event__synthesize_extra_attr(&rec->tool, 1297 rec->evlist, 1298 process_synthesized_event, 1299 data->is_pipe); 1300 if (err) 1301 goto out; 1302 1303 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1304 process_synthesized_event, 1305 NULL); 1306 if (err < 0) { 1307 pr_err("Couldn't synthesize thread map.\n"); 1308 return err; 1309 } 1310 1311 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus, 1312 process_synthesized_event, NULL); 1313 if (err < 0) { 1314 pr_err("Couldn't synthesize cpu map.\n"); 1315 return err; 1316 } 1317 1318 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1319 machine, opts); 1320 if (err < 0) 1321 pr_warning("Couldn't synthesize bpf events.\n"); 1322 1323 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, 1324 process_synthesized_event, opts->sample_address, 1325 1); 1326 out: 1327 return err; 1328 } 1329 1330 static int __cmd_record(struct record *rec, int argc, const char **argv) 1331 { 1332 int err; 1333 int status = 0; 1334 unsigned long waking = 0; 1335 const bool forks = argc > 0; 1336 struct perf_tool *tool = &rec->tool; 1337 struct record_opts *opts = &rec->opts; 1338 struct perf_data *data = &rec->data; 1339 struct perf_session *session; 1340 bool disabled = false, draining = false; 1341 struct evlist *sb_evlist = NULL; 1342 int fd; 1343 float ratio = 0; 1344 1345 atexit(record__sig_exit); 1346 signal(SIGCHLD, sig_handler); 1347 signal(SIGINT, sig_handler); 1348 signal(SIGTERM, sig_handler); 1349 signal(SIGSEGV, sigsegv_handler); 1350 1351 if (rec->opts.record_namespaces) 1352 tool->namespace_events = true; 1353 1354 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 1355 signal(SIGUSR2, snapshot_sig_handler); 1356 if (rec->opts.auxtrace_snapshot_mode) 1357 trigger_on(&auxtrace_snapshot_trigger); 1358 if (rec->switch_output.enabled) 1359 trigger_on(&switch_output_trigger); 1360 } else { 1361 signal(SIGUSR2, SIG_IGN); 1362 } 1363 1364 session = perf_session__new(data, false, tool); 1365 if (session == NULL) { 1366 pr_err("Perf session creation failed.\n"); 1367 return -1; 1368 } 1369 1370 fd = perf_data__fd(data); 1371 rec->session = session; 1372 1373 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 1374 pr_err("Compression initialization failed.\n"); 1375 return -1; 1376 } 1377 1378 session->header.env.comp_type = PERF_COMP_ZSTD; 1379 session->header.env.comp_level = rec->opts.comp_level; 1380 1381 record__init_features(rec); 1382 1383 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1384 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; 1385 1386 if (forks) { 1387 err = perf_evlist__prepare_workload(rec->evlist, &opts->target, 1388 argv, data->is_pipe, 1389 workload_exec_failed_signal); 1390 if (err < 0) { 1391 pr_err("Couldn't run the workload!\n"); 1392 status = err; 1393 goto out_delete_session; 1394 } 1395 } 1396 1397 /* 1398 * If we have just single event and are sending data 1399 * through pipe, we need to force the ids allocation, 1400 * because we synthesize event name through the pipe 1401 * and need the id for that. 1402 */ 1403 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 1404 rec->opts.sample_id = true; 1405 1406 if (record__open(rec) != 0) { 1407 err = -1; 1408 goto out_child; 1409 } 1410 session->header.env.comp_mmap_len = session->evlist->mmap_len; 1411 1412 err = bpf__apply_obj_config(); 1413 if (err) { 1414 char errbuf[BUFSIZ]; 1415 1416 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 1417 pr_err("ERROR: Apply config to BPF failed: %s\n", 1418 errbuf); 1419 goto out_child; 1420 } 1421 1422 /* 1423 * Normally perf_session__new would do this, but it doesn't have the 1424 * evlist. 1425 */ 1426 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { 1427 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 1428 rec->tool.ordered_events = false; 1429 } 1430 1431 if (!rec->evlist->nr_groups) 1432 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 1433 1434 if (data->is_pipe) { 1435 err = perf_header__write_pipe(fd); 1436 if (err < 0) 1437 goto out_child; 1438 } else { 1439 err = perf_session__write_header(session, rec->evlist, fd, false); 1440 if (err < 0) 1441 goto out_child; 1442 } 1443 1444 if (!rec->no_buildid 1445 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 1446 pr_err("Couldn't generate buildids. " 1447 "Use --no-buildid to profile anyway.\n"); 1448 err = -1; 1449 goto out_child; 1450 } 1451 1452 if (!opts->no_bpf_event) 1453 bpf_event__add_sb_event(&sb_evlist, &session->header.env); 1454 1455 if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { 1456 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1457 opts->no_bpf_event = true; 1458 } 1459 1460 err = record__synthesize(rec, false); 1461 if (err < 0) 1462 goto out_child; 1463 1464 if (rec->realtime_prio) { 1465 struct sched_param param; 1466 1467 param.sched_priority = rec->realtime_prio; 1468 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1469 pr_err("Could not set realtime priority.\n"); 1470 err = -1; 1471 goto out_child; 1472 } 1473 } 1474 1475 /* 1476 * When perf is starting the traced process, all the events 1477 * (apart from group members) have enable_on_exec=1 set, 1478 * so don't spoil it by prematurely enabling them. 1479 */ 1480 if (!target__none(&opts->target) && !opts->initial_delay) 1481 evlist__enable(rec->evlist); 1482 1483 /* 1484 * Let the child rip 1485 */ 1486 if (forks) { 1487 struct machine *machine = &session->machines.host; 1488 union perf_event *event; 1489 pid_t tgid; 1490 1491 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 1492 if (event == NULL) { 1493 err = -ENOMEM; 1494 goto out_child; 1495 } 1496 1497 /* 1498 * Some H/W events are generated before COMM event 1499 * which is emitted during exec(), so perf script 1500 * cannot see a correct process name for those events. 1501 * Synthesize COMM event to prevent it. 1502 */ 1503 tgid = perf_event__synthesize_comm(tool, event, 1504 rec->evlist->workload.pid, 1505 process_synthesized_event, 1506 machine); 1507 free(event); 1508 1509 if (tgid == -1) 1510 goto out_child; 1511 1512 event = malloc(sizeof(event->namespaces) + 1513 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 1514 machine->id_hdr_size); 1515 if (event == NULL) { 1516 err = -ENOMEM; 1517 goto out_child; 1518 } 1519 1520 /* 1521 * Synthesize NAMESPACES event for the command specified. 1522 */ 1523 perf_event__synthesize_namespaces(tool, event, 1524 rec->evlist->workload.pid, 1525 tgid, process_synthesized_event, 1526 machine); 1527 free(event); 1528 1529 perf_evlist__start_workload(rec->evlist); 1530 } 1531 1532 if (opts->initial_delay) { 1533 usleep(opts->initial_delay * USEC_PER_MSEC); 1534 evlist__enable(rec->evlist); 1535 } 1536 1537 trigger_ready(&auxtrace_snapshot_trigger); 1538 trigger_ready(&switch_output_trigger); 1539 perf_hooks__invoke_record_start(); 1540 for (;;) { 1541 unsigned long long hits = rec->samples; 1542 1543 /* 1544 * rec->evlist->bkw_mmap_state is possible to be 1545 * BKW_MMAP_EMPTY here: when done == true and 1546 * hits != rec->samples in previous round. 1547 * 1548 * perf_evlist__toggle_bkw_mmap ensure we never 1549 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 1550 */ 1551 if (trigger_is_hit(&switch_output_trigger) || done || draining) 1552 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 1553 1554 if (record__mmap_read_all(rec, false) < 0) { 1555 trigger_error(&auxtrace_snapshot_trigger); 1556 trigger_error(&switch_output_trigger); 1557 err = -1; 1558 goto out_child; 1559 } 1560 1561 if (auxtrace_record__snapshot_started) { 1562 auxtrace_record__snapshot_started = 0; 1563 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 1564 record__read_auxtrace_snapshot(rec, false); 1565 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 1566 pr_err("AUX area tracing snapshot failed\n"); 1567 err = -1; 1568 goto out_child; 1569 } 1570 } 1571 1572 if (trigger_is_hit(&switch_output_trigger)) { 1573 /* 1574 * If switch_output_trigger is hit, the data in 1575 * overwritable ring buffer should have been collected, 1576 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 1577 * 1578 * If SIGUSR2 raise after or during record__mmap_read_all(), 1579 * record__mmap_read_all() didn't collect data from 1580 * overwritable ring buffer. Read again. 1581 */ 1582 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 1583 continue; 1584 trigger_ready(&switch_output_trigger); 1585 1586 /* 1587 * Reenable events in overwrite ring buffer after 1588 * record__mmap_read_all(): we should have collected 1589 * data from it. 1590 */ 1591 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 1592 1593 if (!quiet) 1594 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 1595 waking); 1596 waking = 0; 1597 fd = record__switch_output(rec, false); 1598 if (fd < 0) { 1599 pr_err("Failed to switch to new file\n"); 1600 trigger_error(&switch_output_trigger); 1601 err = fd; 1602 goto out_child; 1603 } 1604 1605 /* re-arm the alarm */ 1606 if (rec->switch_output.time) 1607 alarm(rec->switch_output.time); 1608 } 1609 1610 if (hits == rec->samples) { 1611 if (done || draining) 1612 break; 1613 err = perf_evlist__poll(rec->evlist, -1); 1614 /* 1615 * Propagate error, only if there's any. Ignore positive 1616 * number of returned events and interrupt error. 1617 */ 1618 if (err > 0 || (err < 0 && errno == EINTR)) 1619 err = 0; 1620 waking++; 1621 1622 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0) 1623 draining = true; 1624 } 1625 1626 /* 1627 * When perf is starting the traced process, at the end events 1628 * die with the process and we wait for that. Thus no need to 1629 * disable events in this case. 1630 */ 1631 if (done && !disabled && !target__none(&opts->target)) { 1632 trigger_off(&auxtrace_snapshot_trigger); 1633 evlist__disable(rec->evlist); 1634 disabled = true; 1635 } 1636 } 1637 1638 trigger_off(&auxtrace_snapshot_trigger); 1639 trigger_off(&switch_output_trigger); 1640 1641 if (opts->auxtrace_snapshot_on_exit) 1642 record__auxtrace_snapshot_exit(rec); 1643 1644 if (forks && workload_exec_errno) { 1645 char msg[STRERR_BUFSIZE]; 1646 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 1647 pr_err("Workload failed: %s\n", emsg); 1648 err = -1; 1649 goto out_child; 1650 } 1651 1652 if (!quiet) 1653 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 1654 1655 if (target__none(&rec->opts.target)) 1656 record__synthesize_workload(rec, true); 1657 1658 out_child: 1659 record__mmap_read_all(rec, true); 1660 record__aio_mmap_read_sync(rec); 1661 1662 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 1663 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 1664 session->header.env.comp_ratio = ratio + 0.5; 1665 } 1666 1667 if (forks) { 1668 int exit_status; 1669 1670 if (!child_finished) 1671 kill(rec->evlist->workload.pid, SIGTERM); 1672 1673 wait(&exit_status); 1674 1675 if (err < 0) 1676 status = err; 1677 else if (WIFEXITED(exit_status)) 1678 status = WEXITSTATUS(exit_status); 1679 else if (WIFSIGNALED(exit_status)) 1680 signr = WTERMSIG(exit_status); 1681 } else 1682 status = err; 1683 1684 record__synthesize(rec, true); 1685 /* this will be recalculated during process_buildids() */ 1686 rec->samples = 0; 1687 1688 if (!err) { 1689 if (!rec->timestamp_filename) { 1690 record__finish_output(rec); 1691 } else { 1692 fd = record__switch_output(rec, true); 1693 if (fd < 0) { 1694 status = fd; 1695 goto out_delete_session; 1696 } 1697 } 1698 } 1699 1700 perf_hooks__invoke_record_end(); 1701 1702 if (!err && !quiet) { 1703 char samples[128]; 1704 const char *postfix = rec->timestamp_filename ? 1705 ".<timestamp>" : ""; 1706 1707 if (rec->samples && !rec->opts.full_auxtrace) 1708 scnprintf(samples, sizeof(samples), 1709 " (%" PRIu64 " samples)", rec->samples); 1710 else 1711 samples[0] = '\0'; 1712 1713 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 1714 perf_data__size(data) / 1024.0 / 1024.0, 1715 data->path, postfix, samples); 1716 if (ratio) { 1717 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 1718 rec->session->bytes_transferred / 1024.0 / 1024.0, 1719 ratio); 1720 } 1721 fprintf(stderr, " ]\n"); 1722 } 1723 1724 out_delete_session: 1725 zstd_fini(&session->zstd_data); 1726 perf_session__delete(session); 1727 1728 if (!opts->no_bpf_event) 1729 perf_evlist__stop_sb_thread(sb_evlist); 1730 return status; 1731 } 1732 1733 static void callchain_debug(struct callchain_param *callchain) 1734 { 1735 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 1736 1737 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 1738 1739 if (callchain->record_mode == CALLCHAIN_DWARF) 1740 pr_debug("callchain: stack dump size %d\n", 1741 callchain->dump_size); 1742 } 1743 1744 int record_opts__parse_callchain(struct record_opts *record, 1745 struct callchain_param *callchain, 1746 const char *arg, bool unset) 1747 { 1748 int ret; 1749 callchain->enabled = !unset; 1750 1751 /* --no-call-graph */ 1752 if (unset) { 1753 callchain->record_mode = CALLCHAIN_NONE; 1754 pr_debug("callchain: disabled\n"); 1755 return 0; 1756 } 1757 1758 ret = parse_callchain_record_opt(arg, callchain); 1759 if (!ret) { 1760 /* Enable data address sampling for DWARF unwind. */ 1761 if (callchain->record_mode == CALLCHAIN_DWARF) 1762 record->sample_address = true; 1763 callchain_debug(callchain); 1764 } 1765 1766 return ret; 1767 } 1768 1769 int record_parse_callchain_opt(const struct option *opt, 1770 const char *arg, 1771 int unset) 1772 { 1773 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 1774 } 1775 1776 int record_callchain_opt(const struct option *opt, 1777 const char *arg __maybe_unused, 1778 int unset __maybe_unused) 1779 { 1780 struct callchain_param *callchain = opt->value; 1781 1782 callchain->enabled = true; 1783 1784 if (callchain->record_mode == CALLCHAIN_NONE) 1785 callchain->record_mode = CALLCHAIN_FP; 1786 1787 callchain_debug(callchain); 1788 return 0; 1789 } 1790 1791 static int perf_record_config(const char *var, const char *value, void *cb) 1792 { 1793 struct record *rec = cb; 1794 1795 if (!strcmp(var, "record.build-id")) { 1796 if (!strcmp(value, "cache")) 1797 rec->no_buildid_cache = false; 1798 else if (!strcmp(value, "no-cache")) 1799 rec->no_buildid_cache = true; 1800 else if (!strcmp(value, "skip")) 1801 rec->no_buildid = true; 1802 else 1803 return -1; 1804 return 0; 1805 } 1806 if (!strcmp(var, "record.call-graph")) { 1807 var = "call-graph.record-mode"; 1808 return perf_default_config(var, value, cb); 1809 } 1810 #ifdef HAVE_AIO_SUPPORT 1811 if (!strcmp(var, "record.aio")) { 1812 rec->opts.nr_cblocks = strtol(value, NULL, 0); 1813 if (!rec->opts.nr_cblocks) 1814 rec->opts.nr_cblocks = nr_cblocks_default; 1815 } 1816 #endif 1817 1818 return 0; 1819 } 1820 1821 struct clockid_map { 1822 const char *name; 1823 int clockid; 1824 }; 1825 1826 #define CLOCKID_MAP(n, c) \ 1827 { .name = n, .clockid = (c), } 1828 1829 #define CLOCKID_END { .name = NULL, } 1830 1831 1832 /* 1833 * Add the missing ones, we need to build on many distros... 1834 */ 1835 #ifndef CLOCK_MONOTONIC_RAW 1836 #define CLOCK_MONOTONIC_RAW 4 1837 #endif 1838 #ifndef CLOCK_BOOTTIME 1839 #define CLOCK_BOOTTIME 7 1840 #endif 1841 #ifndef CLOCK_TAI 1842 #define CLOCK_TAI 11 1843 #endif 1844 1845 static const struct clockid_map clockids[] = { 1846 /* available for all events, NMI safe */ 1847 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), 1848 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), 1849 1850 /* available for some events */ 1851 CLOCKID_MAP("realtime", CLOCK_REALTIME), 1852 CLOCKID_MAP("boottime", CLOCK_BOOTTIME), 1853 CLOCKID_MAP("tai", CLOCK_TAI), 1854 1855 /* available for the lazy */ 1856 CLOCKID_MAP("mono", CLOCK_MONOTONIC), 1857 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), 1858 CLOCKID_MAP("real", CLOCK_REALTIME), 1859 CLOCKID_MAP("boot", CLOCK_BOOTTIME), 1860 1861 CLOCKID_END, 1862 }; 1863 1864 static int get_clockid_res(clockid_t clk_id, u64 *res_ns) 1865 { 1866 struct timespec res; 1867 1868 *res_ns = 0; 1869 if (!clock_getres(clk_id, &res)) 1870 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; 1871 else 1872 pr_warning("WARNING: Failed to determine specified clock resolution.\n"); 1873 1874 return 0; 1875 } 1876 1877 static int parse_clockid(const struct option *opt, const char *str, int unset) 1878 { 1879 struct record_opts *opts = (struct record_opts *)opt->value; 1880 const struct clockid_map *cm; 1881 const char *ostr = str; 1882 1883 if (unset) { 1884 opts->use_clockid = 0; 1885 return 0; 1886 } 1887 1888 /* no arg passed */ 1889 if (!str) 1890 return 0; 1891 1892 /* no setting it twice */ 1893 if (opts->use_clockid) 1894 return -1; 1895 1896 opts->use_clockid = true; 1897 1898 /* if its a number, we're done */ 1899 if (sscanf(str, "%d", &opts->clockid) == 1) 1900 return get_clockid_res(opts->clockid, &opts->clockid_res_ns); 1901 1902 /* allow a "CLOCK_" prefix to the name */ 1903 if (!strncasecmp(str, "CLOCK_", 6)) 1904 str += 6; 1905 1906 for (cm = clockids; cm->name; cm++) { 1907 if (!strcasecmp(str, cm->name)) { 1908 opts->clockid = cm->clockid; 1909 return get_clockid_res(opts->clockid, 1910 &opts->clockid_res_ns); 1911 } 1912 } 1913 1914 opts->use_clockid = false; 1915 ui__warning("unknown clockid %s, check man page\n", ostr); 1916 return -1; 1917 } 1918 1919 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 1920 { 1921 struct record_opts *opts = (struct record_opts *)opt->value; 1922 1923 if (unset || !str) 1924 return 0; 1925 1926 if (!strcasecmp(str, "node")) 1927 opts->affinity = PERF_AFFINITY_NODE; 1928 else if (!strcasecmp(str, "cpu")) 1929 opts->affinity = PERF_AFFINITY_CPU; 1930 1931 return 0; 1932 } 1933 1934 static int record__parse_mmap_pages(const struct option *opt, 1935 const char *str, 1936 int unset __maybe_unused) 1937 { 1938 struct record_opts *opts = opt->value; 1939 char *s, *p; 1940 unsigned int mmap_pages; 1941 int ret; 1942 1943 if (!str) 1944 return -EINVAL; 1945 1946 s = strdup(str); 1947 if (!s) 1948 return -ENOMEM; 1949 1950 p = strchr(s, ','); 1951 if (p) 1952 *p = '\0'; 1953 1954 if (*s) { 1955 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); 1956 if (ret) 1957 goto out_free; 1958 opts->mmap_pages = mmap_pages; 1959 } 1960 1961 if (!p) { 1962 ret = 0; 1963 goto out_free; 1964 } 1965 1966 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); 1967 if (ret) 1968 goto out_free; 1969 1970 opts->auxtrace_mmap_pages = mmap_pages; 1971 1972 out_free: 1973 free(s); 1974 return ret; 1975 } 1976 1977 static void switch_output_size_warn(struct record *rec) 1978 { 1979 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages); 1980 struct switch_output *s = &rec->switch_output; 1981 1982 wakeup_size /= 2; 1983 1984 if (s->size < wakeup_size) { 1985 char buf[100]; 1986 1987 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 1988 pr_warning("WARNING: switch-output data size lower than " 1989 "wakeup kernel buffer size (%s) " 1990 "expect bigger perf.data sizes\n", buf); 1991 } 1992 } 1993 1994 static int switch_output_setup(struct record *rec) 1995 { 1996 struct switch_output *s = &rec->switch_output; 1997 static struct parse_tag tags_size[] = { 1998 { .tag = 'B', .mult = 1 }, 1999 { .tag = 'K', .mult = 1 << 10 }, 2000 { .tag = 'M', .mult = 1 << 20 }, 2001 { .tag = 'G', .mult = 1 << 30 }, 2002 { .tag = 0 }, 2003 }; 2004 static struct parse_tag tags_time[] = { 2005 { .tag = 's', .mult = 1 }, 2006 { .tag = 'm', .mult = 60 }, 2007 { .tag = 'h', .mult = 60*60 }, 2008 { .tag = 'd', .mult = 60*60*24 }, 2009 { .tag = 0 }, 2010 }; 2011 unsigned long val; 2012 2013 if (!s->set) 2014 return 0; 2015 2016 if (!strcmp(s->str, "signal")) { 2017 s->signal = true; 2018 pr_debug("switch-output with SIGUSR2 signal\n"); 2019 goto enabled; 2020 } 2021 2022 val = parse_tag_value(s->str, tags_size); 2023 if (val != (unsigned long) -1) { 2024 s->size = val; 2025 pr_debug("switch-output with %s size threshold\n", s->str); 2026 goto enabled; 2027 } 2028 2029 val = parse_tag_value(s->str, tags_time); 2030 if (val != (unsigned long) -1) { 2031 s->time = val; 2032 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2033 s->str, s->time); 2034 goto enabled; 2035 } 2036 2037 return -1; 2038 2039 enabled: 2040 rec->timestamp_filename = true; 2041 s->enabled = true; 2042 2043 if (s->size && !rec->opts.no_buffering) 2044 switch_output_size_warn(rec); 2045 2046 return 0; 2047 } 2048 2049 static const char * const __record_usage[] = { 2050 "perf record [<options>] [<command>]", 2051 "perf record [<options>] -- <command> [<options>]", 2052 NULL 2053 }; 2054 const char * const *record_usage = __record_usage; 2055 2056 /* 2057 * XXX Ideally would be local to cmd_record() and passed to a record__new 2058 * because we need to have access to it in record__exit, that is called 2059 * after cmd_record() exits, but since record_options need to be accessible to 2060 * builtin-script, leave it here. 2061 * 2062 * At least we don't ouch it in all the other functions here directly. 2063 * 2064 * Just say no to tons of global variables, sigh. 2065 */ 2066 static struct record record = { 2067 .opts = { 2068 .sample_time = true, 2069 .mmap_pages = UINT_MAX, 2070 .user_freq = UINT_MAX, 2071 .user_interval = ULLONG_MAX, 2072 .freq = 4000, 2073 .target = { 2074 .uses_mmap = true, 2075 .default_per_cpu = true, 2076 }, 2077 .mmap_flush = MMAP_FLUSH_DEFAULT, 2078 }, 2079 .tool = { 2080 .sample = process_sample_event, 2081 .fork = perf_event__process_fork, 2082 .exit = perf_event__process_exit, 2083 .comm = perf_event__process_comm, 2084 .namespaces = perf_event__process_namespaces, 2085 .mmap = perf_event__process_mmap, 2086 .mmap2 = perf_event__process_mmap2, 2087 .ordered_events = true, 2088 }, 2089 }; 2090 2091 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 2092 "\n\t\t\t\tDefault: fp"; 2093 2094 static bool dry_run; 2095 2096 /* 2097 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 2098 * with it and switch to use the library functions in perf_evlist that came 2099 * from builtin-record.c, i.e. use record_opts, 2100 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 2101 * using pipes, etc. 2102 */ 2103 static struct option __record_options[] = { 2104 OPT_CALLBACK('e', "event", &record.evlist, "event", 2105 "event selector. use 'perf list' to list available events", 2106 parse_events_option), 2107 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 2108 "event filter", parse_filter), 2109 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 2110 NULL, "don't record events from perf itself", 2111 exclude_perf), 2112 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 2113 "record events on existing process id"), 2114 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 2115 "record events on existing thread id"), 2116 OPT_INTEGER('r', "realtime", &record.realtime_prio, 2117 "collect data with this RT SCHED_FIFO priority"), 2118 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 2119 "collect data without buffering"), 2120 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 2121 "collect raw sample records from all opened counters"), 2122 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 2123 "system-wide collection from all CPUs"), 2124 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 2125 "list of cpus to monitor"), 2126 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 2127 OPT_STRING('o', "output", &record.data.path, "file", 2128 "output file name"), 2129 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 2130 &record.opts.no_inherit_set, 2131 "child tasks do not inherit counters"), 2132 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 2133 "synthesize non-sample events at the end of output"), 2134 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 2135 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), 2136 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 2137 "Fail if the specified frequency can't be used"), 2138 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 2139 "profile at this frequency", 2140 record__parse_freq), 2141 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 2142 "number of mmap data pages and AUX area tracing mmap pages", 2143 record__parse_mmap_pages), 2144 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 2145 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 2146 record__mmap_flush_parse), 2147 OPT_BOOLEAN(0, "group", &record.opts.group, 2148 "put the counters into a counter group"), 2149 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 2150 NULL, "enables call-graph recording" , 2151 &record_callchain_opt), 2152 OPT_CALLBACK(0, "call-graph", &record.opts, 2153 "record_mode[,record_size]", record_callchain_help, 2154 &record_parse_callchain_opt), 2155 OPT_INCR('v', "verbose", &verbose, 2156 "be more verbose (show counter open errors, etc)"), 2157 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 2158 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 2159 "per thread counts"), 2160 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 2161 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 2162 "Record the sample physical addresses"), 2163 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 2164 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 2165 &record.opts.sample_time_set, 2166 "Record the sample timestamps"), 2167 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 2168 "Record the sample period"), 2169 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 2170 "don't sample"), 2171 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 2172 &record.no_buildid_cache_set, 2173 "do not update the buildid cache"), 2174 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 2175 &record.no_buildid_set, 2176 "do not collect buildids in perf.data"), 2177 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 2178 "monitor event in cgroup name only", 2179 parse_cgroups), 2180 OPT_UINTEGER('D', "delay", &record.opts.initial_delay, 2181 "ms to wait before starting measurement after program start"), 2182 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 2183 "user to profile"), 2184 2185 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 2186 "branch any", "sample any taken branches", 2187 parse_branch_stack), 2188 2189 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 2190 "branch filter mask", "branch stack filter modes", 2191 parse_branch_stack), 2192 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 2193 "sample by weight (on special events only)"), 2194 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 2195 "sample transaction flags (special events only)"), 2196 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 2197 "use per-thread mmaps"), 2198 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2199 "sample selected machine registers on interrupt," 2200 " use '-I?' to list register names", parse_intr_regs), 2201 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2202 "sample selected machine registers on interrupt," 2203 " use '--user-regs=?' to list register names", parse_user_regs), 2204 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2205 "Record running/enabled time of read (:S) events"), 2206 OPT_CALLBACK('k', "clockid", &record.opts, 2207 "clockid", "clockid to use for events, see clock_gettime()", 2208 parse_clockid), 2209 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 2210 "opts", "AUX area tracing Snapshot Mode", ""), 2211 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 2212 "per thread proc mmap processing timeout in ms"), 2213 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 2214 "Record namespaces events"), 2215 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, 2216 "Record context switch events"), 2217 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 2218 "Configure all used events to run in kernel space.", 2219 PARSE_OPT_EXCLUSIVE), 2220 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2221 "Configure all used events to run in user space.", 2222 PARSE_OPT_EXCLUSIVE), 2223 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 2224 "collect kernel callchains"), 2225 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 2226 "collect user callchains"), 2227 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2228 "clang binary to use for compiling BPF scriptlets"), 2229 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2230 "options passed to clang when compiling BPF scriptlets"), 2231 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 2232 "file", "vmlinux pathname"), 2233 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 2234 "Record build-id of all DSOs regardless of hits"), 2235 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 2236 "append timestamp to output filename"), 2237 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 2238 "Record timestamp boundary (time of first/last samples)"), 2239 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 2240 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 2241 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 2242 "signal"), 2243 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 2244 "Limit number of switch output generated files"), 2245 OPT_BOOLEAN(0, "dry-run", &dry_run, 2246 "Parse options then exit"), 2247 #ifdef HAVE_AIO_SUPPORT 2248 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 2249 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 2250 record__aio_parse), 2251 #endif 2252 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2253 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2254 record__parse_affinity), 2255 #ifdef HAVE_ZSTD_SUPPORT 2256 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, 2257 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 2258 record__parse_comp_level), 2259 #endif 2260 OPT_END() 2261 }; 2262 2263 struct option *record_options = __record_options; 2264 2265 int cmd_record(int argc, const char **argv) 2266 { 2267 int err; 2268 struct record *rec = &record; 2269 char errbuf[BUFSIZ]; 2270 2271 setlocale(LC_ALL, ""); 2272 2273 #ifndef HAVE_LIBBPF_SUPPORT 2274 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 2275 set_nobuild('\0', "clang-path", true); 2276 set_nobuild('\0', "clang-opt", true); 2277 # undef set_nobuild 2278 #endif 2279 2280 #ifndef HAVE_BPF_PROLOGUE 2281 # if !defined (HAVE_DWARF_SUPPORT) 2282 # define REASON "NO_DWARF=1" 2283 # elif !defined (HAVE_LIBBPF_SUPPORT) 2284 # define REASON "NO_LIBBPF=1" 2285 # else 2286 # define REASON "this architecture doesn't support BPF prologue" 2287 # endif 2288 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 2289 set_nobuild('\0', "vmlinux", true); 2290 # undef set_nobuild 2291 # undef REASON 2292 #endif 2293 2294 CPU_ZERO(&rec->affinity_mask); 2295 rec->opts.affinity = PERF_AFFINITY_SYS; 2296 2297 rec->evlist = evlist__new(); 2298 if (rec->evlist == NULL) 2299 return -ENOMEM; 2300 2301 err = perf_config(perf_record_config, rec); 2302 if (err) 2303 return err; 2304 2305 argc = parse_options(argc, argv, record_options, record_usage, 2306 PARSE_OPT_STOP_AT_NON_OPTION); 2307 if (quiet) 2308 perf_quiet_option(); 2309 2310 /* Make system wide (-a) the default target. */ 2311 if (!argc && target__none(&rec->opts.target)) 2312 rec->opts.target.system_wide = true; 2313 2314 if (nr_cgroups && !rec->opts.target.system_wide) { 2315 usage_with_options_msg(record_usage, record_options, 2316 "cgroup monitoring only available in system-wide mode"); 2317 2318 } 2319 2320 if (rec->opts.comp_level != 0) { 2321 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 2322 rec->no_buildid = true; 2323 } 2324 2325 if (rec->opts.record_switch_events && 2326 !perf_can_record_switch_events()) { 2327 ui__error("kernel does not support recording context switch events\n"); 2328 parse_options_usage(record_usage, record_options, "switch-events", 0); 2329 return -EINVAL; 2330 } 2331 2332 if (switch_output_setup(rec)) { 2333 parse_options_usage(record_usage, record_options, "switch-output", 0); 2334 return -EINVAL; 2335 } 2336 2337 if (rec->switch_output.time) { 2338 signal(SIGALRM, alarm_sig_handler); 2339 alarm(rec->switch_output.time); 2340 } 2341 2342 if (rec->switch_output.num_files) { 2343 rec->switch_output.filenames = calloc(sizeof(char *), 2344 rec->switch_output.num_files); 2345 if (!rec->switch_output.filenames) 2346 return -EINVAL; 2347 } 2348 2349 /* 2350 * Allow aliases to facilitate the lookup of symbols for address 2351 * filters. Refer to auxtrace_parse_filters(). 2352 */ 2353 symbol_conf.allow_aliases = true; 2354 2355 symbol__init(NULL); 2356 2357 err = record__auxtrace_init(rec); 2358 if (err) 2359 goto out; 2360 2361 if (dry_run) 2362 goto out; 2363 2364 err = bpf__setup_stdout(rec->evlist); 2365 if (err) { 2366 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 2367 pr_err("ERROR: Setup BPF stdout failed: %s\n", 2368 errbuf); 2369 goto out; 2370 } 2371 2372 err = -ENOMEM; 2373 2374 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) 2375 pr_warning( 2376 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 2377 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 2378 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 2379 "file is not found in the buildid cache or in the vmlinux path.\n\n" 2380 "Samples in kernel modules won't be resolved at all.\n\n" 2381 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 2382 "even with a suitable vmlinux or kallsyms file.\n\n"); 2383 2384 if (rec->no_buildid_cache || rec->no_buildid) { 2385 disable_buildid_cache(); 2386 } else if (rec->switch_output.enabled) { 2387 /* 2388 * In 'perf record --switch-output', disable buildid 2389 * generation by default to reduce data file switching 2390 * overhead. Still generate buildid if they are required 2391 * explicitly using 2392 * 2393 * perf record --switch-output --no-no-buildid \ 2394 * --no-no-buildid-cache 2395 * 2396 * Following code equals to: 2397 * 2398 * if ((rec->no_buildid || !rec->no_buildid_set) && 2399 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 2400 * disable_buildid_cache(); 2401 */ 2402 bool disable = true; 2403 2404 if (rec->no_buildid_set && !rec->no_buildid) 2405 disable = false; 2406 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 2407 disable = false; 2408 if (disable) { 2409 rec->no_buildid = true; 2410 rec->no_buildid_cache = true; 2411 disable_buildid_cache(); 2412 } 2413 } 2414 2415 if (record.opts.overwrite) 2416 record.opts.tail_synthesize = true; 2417 2418 if (rec->evlist->core.nr_entries == 0 && 2419 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { 2420 pr_err("Not enough memory for event selector list\n"); 2421 goto out; 2422 } 2423 2424 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 2425 rec->opts.no_inherit = true; 2426 2427 err = target__validate(&rec->opts.target); 2428 if (err) { 2429 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2430 ui__warning("%s\n", errbuf); 2431 } 2432 2433 err = target__parse_uid(&rec->opts.target); 2434 if (err) { 2435 int saved_errno = errno; 2436 2437 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 2438 ui__error("%s", errbuf); 2439 2440 err = -saved_errno; 2441 goto out; 2442 } 2443 2444 /* Enable ignoring missing threads when -u/-p option is defined. */ 2445 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 2446 2447 err = -ENOMEM; 2448 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 2449 usage_with_options(record_usage, record_options); 2450 2451 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 2452 if (err) 2453 goto out; 2454 2455 /* 2456 * We take all buildids when the file contains 2457 * AUX area tracing data because we do not decode the 2458 * trace because it would take too long. 2459 */ 2460 if (rec->opts.full_auxtrace) 2461 rec->buildid_all = true; 2462 2463 if (record_opts__config(&rec->opts)) { 2464 err = -EINVAL; 2465 goto out; 2466 } 2467 2468 if (rec->opts.nr_cblocks > nr_cblocks_max) 2469 rec->opts.nr_cblocks = nr_cblocks_max; 2470 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2471 2472 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2473 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2474 2475 if (rec->opts.comp_level > comp_level_max) 2476 rec->opts.comp_level = comp_level_max; 2477 pr_debug("comp level: %d\n", rec->opts.comp_level); 2478 2479 err = __cmd_record(&record, argc, argv); 2480 out: 2481 evlist__delete(rec->evlist); 2482 symbol__exit(); 2483 auxtrace_record__free(rec->itr); 2484 return err; 2485 } 2486 2487 static void snapshot_sig_handler(int sig __maybe_unused) 2488 { 2489 struct record *rec = &record; 2490 2491 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2492 trigger_hit(&auxtrace_snapshot_trigger); 2493 auxtrace_record__snapshot_started = 1; 2494 if (auxtrace_record__snapshot_start(record.itr)) 2495 trigger_error(&auxtrace_snapshot_trigger); 2496 } 2497 2498 if (switch_output_signal(rec)) 2499 trigger_hit(&switch_output_trigger); 2500 } 2501 2502 static void alarm_sig_handler(int sig __maybe_unused) 2503 { 2504 struct record *rec = &record; 2505 2506 if (switch_output_time(rec)) 2507 trigger_hit(&switch_output_trigger); 2508 } 2509