1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/pmu.h" 50 #include "util/pmus.h" 51 #include "util/clockid.h" 52 #include "util/off_cpu.h" 53 #include "util/bpf-filter.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 u64 thread_bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool switch_output_event_set; 165 bool no_buildid; 166 bool no_buildid_set; 167 bool no_buildid_cache; 168 bool no_buildid_cache_set; 169 bool buildid_all; 170 bool buildid_mmap; 171 bool timestamp_filename; 172 bool timestamp_boundary; 173 bool off_cpu; 174 struct switch_output switch_output; 175 unsigned long long samples; 176 unsigned long output_max_size; /* = 0: unlimited */ 177 struct perf_debuginfod debuginfod; 178 int nr_threads; 179 struct thread_mask *thread_masks; 180 struct record_thread *thread_data; 181 struct pollfd_index_map *index_map; 182 size_t index_map_sz; 183 size_t index_map_cnt; 184 }; 185 186 static volatile int done; 187 188 static volatile int auxtrace_record__snapshot_started; 189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 190 static DEFINE_TRIGGER(switch_output_trigger); 191 192 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 193 "SYS", "NODE", "CPU" 194 }; 195 196 #ifndef HAVE_GETTID 197 static inline pid_t gettid(void) 198 { 199 return (pid_t)syscall(__NR_gettid); 200 } 201 #endif 202 203 static int record__threads_enabled(struct record *rec) 204 { 205 return rec->opts.threads_spec; 206 } 207 208 static bool switch_output_signal(struct record *rec) 209 { 210 return rec->switch_output.signal && 211 trigger_is_ready(&switch_output_trigger); 212 } 213 214 static bool switch_output_size(struct record *rec) 215 { 216 return rec->switch_output.size && 217 trigger_is_ready(&switch_output_trigger) && 218 (rec->bytes_written >= rec->switch_output.size); 219 } 220 221 static bool switch_output_time(struct record *rec) 222 { 223 return rec->switch_output.time && 224 trigger_is_ready(&switch_output_trigger); 225 } 226 227 static u64 record__bytes_written(struct record *rec) 228 { 229 return rec->bytes_written + rec->thread_bytes_written; 230 } 231 232 static bool record__output_max_size_exceeded(struct record *rec) 233 { 234 return rec->output_max_size && 235 (record__bytes_written(rec) >= rec->output_max_size); 236 } 237 238 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 239 void *bf, size_t size) 240 { 241 struct perf_data_file *file = &rec->session->data->file; 242 243 if (map && map->file) 244 file = map->file; 245 246 if (perf_data_file__write(file, bf, size) < 0) { 247 pr_err("failed to write perf data, error: %m\n"); 248 return -1; 249 } 250 251 if (map && map->file) { 252 thread->bytes_written += size; 253 rec->thread_bytes_written += size; 254 } else { 255 rec->bytes_written += size; 256 } 257 258 if (record__output_max_size_exceeded(rec) && !done) { 259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 260 " stopping session ]\n", 261 record__bytes_written(rec) >> 10); 262 done = 1; 263 } 264 265 if (switch_output_size(rec)) 266 trigger_hit(&switch_output_trigger); 267 268 return 0; 269 } 270 271 static int record__aio_enabled(struct record *rec); 272 static int record__comp_enabled(struct record *rec); 273 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 274 void *dst, size_t dst_size, void *src, size_t src_size); 275 276 #ifdef HAVE_AIO_SUPPORT 277 static int record__aio_write(struct aiocb *cblock, int trace_fd, 278 void *buf, size_t size, off_t off) 279 { 280 int rc; 281 282 cblock->aio_fildes = trace_fd; 283 cblock->aio_buf = buf; 284 cblock->aio_nbytes = size; 285 cblock->aio_offset = off; 286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 287 288 do { 289 rc = aio_write(cblock); 290 if (rc == 0) { 291 break; 292 } else if (errno != EAGAIN) { 293 cblock->aio_fildes = -1; 294 pr_err("failed to queue perf data, error: %m\n"); 295 break; 296 } 297 } while (1); 298 299 return rc; 300 } 301 302 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 303 { 304 void *rem_buf; 305 off_t rem_off; 306 size_t rem_size; 307 int rc, aio_errno; 308 ssize_t aio_ret, written; 309 310 aio_errno = aio_error(cblock); 311 if (aio_errno == EINPROGRESS) 312 return 0; 313 314 written = aio_ret = aio_return(cblock); 315 if (aio_ret < 0) { 316 if (aio_errno != EINTR) 317 pr_err("failed to write perf data, error: %m\n"); 318 written = 0; 319 } 320 321 rem_size = cblock->aio_nbytes - written; 322 323 if (rem_size == 0) { 324 cblock->aio_fildes = -1; 325 /* 326 * md->refcount is incremented in record__aio_pushfn() for 327 * every aio write request started in record__aio_push() so 328 * decrement it because the request is now complete. 329 */ 330 perf_mmap__put(&md->core); 331 rc = 1; 332 } else { 333 /* 334 * aio write request may require restart with the 335 * reminder if the kernel didn't write whole 336 * chunk at once. 337 */ 338 rem_off = cblock->aio_offset + written; 339 rem_buf = (void *)(cblock->aio_buf + written); 340 record__aio_write(cblock, cblock->aio_fildes, 341 rem_buf, rem_size, rem_off); 342 rc = 0; 343 } 344 345 return rc; 346 } 347 348 static int record__aio_sync(struct mmap *md, bool sync_all) 349 { 350 struct aiocb **aiocb = md->aio.aiocb; 351 struct aiocb *cblocks = md->aio.cblocks; 352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 353 int i, do_suspend; 354 355 do { 356 do_suspend = 0; 357 for (i = 0; i < md->aio.nr_cblocks; ++i) { 358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 359 if (sync_all) 360 aiocb[i] = NULL; 361 else 362 return i; 363 } else { 364 /* 365 * Started aio write is not complete yet 366 * so it has to be waited before the 367 * next allocation. 368 */ 369 aiocb[i] = &cblocks[i]; 370 do_suspend = 1; 371 } 372 } 373 if (!do_suspend) 374 return -1; 375 376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 377 if (!(errno == EAGAIN || errno == EINTR)) 378 pr_err("failed to sync perf data, error: %m\n"); 379 } 380 } while (1); 381 } 382 383 struct record_aio { 384 struct record *rec; 385 void *data; 386 size_t size; 387 }; 388 389 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 390 { 391 struct record_aio *aio = to; 392 393 /* 394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 395 * to release space in the kernel buffer as fast as possible, calling 396 * perf_mmap__consume() from perf_mmap__push() function. 397 * 398 * That lets the kernel to proceed with storing more profiling data into 399 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 400 * 401 * Coping can be done in two steps in case the chunk of profiling data 402 * crosses the upper bound of the kernel buffer. In this case we first move 403 * part of data from map->start till the upper bound and then the reminder 404 * from the beginning of the kernel buffer till the end of the data chunk. 405 */ 406 407 if (record__comp_enabled(aio->rec)) { 408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 mmap__mmap_len(map) - aio->size, 410 buf, size); 411 } else { 412 memcpy(aio->data + aio->size, buf, size); 413 } 414 415 if (!aio->size) { 416 /* 417 * Increment map->refcount to guard map->aio.data[] buffer 418 * from premature deallocation because map object can be 419 * released earlier than aio write request started on 420 * map->aio.data[] buffer is complete. 421 * 422 * perf_mmap__put() is done at record__aio_complete() 423 * after started aio request completion or at record__aio_push() 424 * if the request failed to start. 425 */ 426 perf_mmap__get(&map->core); 427 } 428 429 aio->size += size; 430 431 return size; 432 } 433 434 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 435 { 436 int ret, idx; 437 int trace_fd = rec->session->data->file.fd; 438 struct record_aio aio = { .rec = rec, .size = 0 }; 439 440 /* 441 * Call record__aio_sync() to wait till map->aio.data[] buffer 442 * becomes available after previous aio write operation. 443 */ 444 445 idx = record__aio_sync(map, false); 446 aio.data = map->aio.data[idx]; 447 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 449 return ret; 450 451 rec->samples++; 452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 453 if (!ret) { 454 *off += aio.size; 455 rec->bytes_written += aio.size; 456 if (switch_output_size(rec)) 457 trigger_hit(&switch_output_trigger); 458 } else { 459 /* 460 * Decrement map->refcount incremented in record__aio_pushfn() 461 * back if record__aio_write() operation failed to start, otherwise 462 * map->refcount is decremented in record__aio_complete() after 463 * aio write operation finishes successfully. 464 */ 465 perf_mmap__put(&map->core); 466 } 467 468 return ret; 469 } 470 471 static off_t record__aio_get_pos(int trace_fd) 472 { 473 return lseek(trace_fd, 0, SEEK_CUR); 474 } 475 476 static void record__aio_set_pos(int trace_fd, off_t pos) 477 { 478 lseek(trace_fd, pos, SEEK_SET); 479 } 480 481 static void record__aio_mmap_read_sync(struct record *rec) 482 { 483 int i; 484 struct evlist *evlist = rec->evlist; 485 struct mmap *maps = evlist->mmap; 486 487 if (!record__aio_enabled(rec)) 488 return; 489 490 for (i = 0; i < evlist->core.nr_mmaps; i++) { 491 struct mmap *map = &maps[i]; 492 493 if (map->core.base) 494 record__aio_sync(map, true); 495 } 496 } 497 498 static int nr_cblocks_default = 1; 499 static int nr_cblocks_max = 4; 500 501 static int record__aio_parse(const struct option *opt, 502 const char *str, 503 int unset) 504 { 505 struct record_opts *opts = (struct record_opts *)opt->value; 506 507 if (unset) { 508 opts->nr_cblocks = 0; 509 } else { 510 if (str) 511 opts->nr_cblocks = strtol(str, NULL, 0); 512 if (!opts->nr_cblocks) 513 opts->nr_cblocks = nr_cblocks_default; 514 } 515 516 return 0; 517 } 518 #else /* HAVE_AIO_SUPPORT */ 519 static int nr_cblocks_max = 0; 520 521 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 522 off_t *off __maybe_unused) 523 { 524 return -1; 525 } 526 527 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 528 { 529 return -1; 530 } 531 532 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 533 { 534 } 535 536 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 537 { 538 } 539 #endif 540 541 static int record__aio_enabled(struct record *rec) 542 { 543 return rec->opts.nr_cblocks > 0; 544 } 545 546 #define MMAP_FLUSH_DEFAULT 1 547 static int record__mmap_flush_parse(const struct option *opt, 548 const char *str, 549 int unset) 550 { 551 int flush_max; 552 struct record_opts *opts = (struct record_opts *)opt->value; 553 static struct parse_tag tags[] = { 554 { .tag = 'B', .mult = 1 }, 555 { .tag = 'K', .mult = 1 << 10 }, 556 { .tag = 'M', .mult = 1 << 20 }, 557 { .tag = 'G', .mult = 1 << 30 }, 558 { .tag = 0 }, 559 }; 560 561 if (unset) 562 return 0; 563 564 if (str) { 565 opts->mmap_flush = parse_tag_value(str, tags); 566 if (opts->mmap_flush == (int)-1) 567 opts->mmap_flush = strtol(str, NULL, 0); 568 } 569 570 if (!opts->mmap_flush) 571 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 572 573 flush_max = evlist__mmap_size(opts->mmap_pages); 574 flush_max /= 4; 575 if (opts->mmap_flush > flush_max) 576 opts->mmap_flush = flush_max; 577 578 return 0; 579 } 580 581 #ifdef HAVE_ZSTD_SUPPORT 582 static unsigned int comp_level_default = 1; 583 584 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 585 { 586 struct record_opts *opts = opt->value; 587 588 if (unset) { 589 opts->comp_level = 0; 590 } else { 591 if (str) 592 opts->comp_level = strtol(str, NULL, 0); 593 if (!opts->comp_level) 594 opts->comp_level = comp_level_default; 595 } 596 597 return 0; 598 } 599 #endif 600 static unsigned int comp_level_max = 22; 601 602 static int record__comp_enabled(struct record *rec) 603 { 604 return rec->opts.comp_level > 0; 605 } 606 607 static int process_synthesized_event(struct perf_tool *tool, 608 union perf_event *event, 609 struct perf_sample *sample __maybe_unused, 610 struct machine *machine __maybe_unused) 611 { 612 struct record *rec = container_of(tool, struct record, tool); 613 return record__write(rec, NULL, event, event->header.size); 614 } 615 616 static struct mutex synth_lock; 617 618 static int process_locked_synthesized_event(struct perf_tool *tool, 619 union perf_event *event, 620 struct perf_sample *sample __maybe_unused, 621 struct machine *machine __maybe_unused) 622 { 623 int ret; 624 625 mutex_lock(&synth_lock); 626 ret = process_synthesized_event(tool, event, sample, machine); 627 mutex_unlock(&synth_lock); 628 return ret; 629 } 630 631 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 632 { 633 struct record *rec = to; 634 635 if (record__comp_enabled(rec)) { 636 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 637 bf = map->data; 638 } 639 640 thread->samples++; 641 return record__write(rec, map, bf, size); 642 } 643 644 static volatile sig_atomic_t signr = -1; 645 static volatile sig_atomic_t child_finished; 646 #ifdef HAVE_EVENTFD_SUPPORT 647 static volatile sig_atomic_t done_fd = -1; 648 #endif 649 650 static void sig_handler(int sig) 651 { 652 if (sig == SIGCHLD) 653 child_finished = 1; 654 else 655 signr = sig; 656 657 done = 1; 658 #ifdef HAVE_EVENTFD_SUPPORT 659 if (done_fd >= 0) { 660 u64 tmp = 1; 661 int orig_errno = errno; 662 663 /* 664 * It is possible for this signal handler to run after done is 665 * checked in the main loop, but before the perf counter fds are 666 * polled. If this happens, the poll() will continue to wait 667 * even though done is set, and will only break out if either 668 * another signal is received, or the counters are ready for 669 * read. To ensure the poll() doesn't sleep when done is set, 670 * use an eventfd (done_fd) to wake up the poll(). 671 */ 672 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 673 pr_err("failed to signal wakeup fd, error: %m\n"); 674 675 errno = orig_errno; 676 } 677 #endif // HAVE_EVENTFD_SUPPORT 678 } 679 680 static void sigsegv_handler(int sig) 681 { 682 perf_hooks__recover(); 683 sighandler_dump_stack(sig); 684 } 685 686 static void record__sig_exit(void) 687 { 688 if (signr == -1) 689 return; 690 691 signal(signr, SIG_DFL); 692 raise(signr); 693 } 694 695 #ifdef HAVE_AUXTRACE_SUPPORT 696 697 static int record__process_auxtrace(struct perf_tool *tool, 698 struct mmap *map, 699 union perf_event *event, void *data1, 700 size_t len1, void *data2, size_t len2) 701 { 702 struct record *rec = container_of(tool, struct record, tool); 703 struct perf_data *data = &rec->data; 704 size_t padding; 705 u8 pad[8] = {0}; 706 707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 708 off_t file_offset; 709 int fd = perf_data__fd(data); 710 int err; 711 712 file_offset = lseek(fd, 0, SEEK_CUR); 713 if (file_offset == -1) 714 return -1; 715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 716 event, file_offset); 717 if (err) 718 return err; 719 } 720 721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 722 padding = (len1 + len2) & 7; 723 if (padding) 724 padding = 8 - padding; 725 726 record__write(rec, map, event, event->header.size); 727 record__write(rec, map, data1, len1); 728 if (len2) 729 record__write(rec, map, data2, len2); 730 record__write(rec, map, &pad, padding); 731 732 return 0; 733 } 734 735 static int record__auxtrace_mmap_read(struct record *rec, 736 struct mmap *map) 737 { 738 int ret; 739 740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 741 record__process_auxtrace); 742 if (ret < 0) 743 return ret; 744 745 if (ret) 746 rec->samples++; 747 748 return 0; 749 } 750 751 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 752 struct mmap *map) 753 { 754 int ret; 755 756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 757 record__process_auxtrace, 758 rec->opts.auxtrace_snapshot_size); 759 if (ret < 0) 760 return ret; 761 762 if (ret) 763 rec->samples++; 764 765 return 0; 766 } 767 768 static int record__auxtrace_read_snapshot_all(struct record *rec) 769 { 770 int i; 771 int rc = 0; 772 773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 774 struct mmap *map = &rec->evlist->mmap[i]; 775 776 if (!map->auxtrace_mmap.base) 777 continue; 778 779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 780 rc = -1; 781 goto out; 782 } 783 } 784 out: 785 return rc; 786 } 787 788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 789 { 790 pr_debug("Recording AUX area tracing snapshot\n"); 791 if (record__auxtrace_read_snapshot_all(rec) < 0) { 792 trigger_error(&auxtrace_snapshot_trigger); 793 } else { 794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 795 trigger_error(&auxtrace_snapshot_trigger); 796 else 797 trigger_ready(&auxtrace_snapshot_trigger); 798 } 799 } 800 801 static int record__auxtrace_snapshot_exit(struct record *rec) 802 { 803 if (trigger_is_error(&auxtrace_snapshot_trigger)) 804 return 0; 805 806 if (!auxtrace_record__snapshot_started && 807 auxtrace_record__snapshot_start(rec->itr)) 808 return -1; 809 810 record__read_auxtrace_snapshot(rec, true); 811 if (trigger_is_error(&auxtrace_snapshot_trigger)) 812 return -1; 813 814 return 0; 815 } 816 817 static int record__auxtrace_init(struct record *rec) 818 { 819 int err; 820 821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 822 && record__threads_enabled(rec)) { 823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 824 return -EINVAL; 825 } 826 827 if (!rec->itr) { 828 rec->itr = auxtrace_record__init(rec->evlist, &err); 829 if (err) 830 return err; 831 } 832 833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 834 rec->opts.auxtrace_snapshot_opts); 835 if (err) 836 return err; 837 838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 839 rec->opts.auxtrace_sample_opts); 840 if (err) 841 return err; 842 843 auxtrace_regroup_aux_output(rec->evlist); 844 845 return auxtrace_parse_filters(rec->evlist); 846 } 847 848 #else 849 850 static inline 851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 852 struct mmap *map __maybe_unused) 853 { 854 return 0; 855 } 856 857 static inline 858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 859 bool on_exit __maybe_unused) 860 { 861 } 862 863 static inline 864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 865 { 866 return 0; 867 } 868 869 static inline 870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 871 { 872 return 0; 873 } 874 875 static int record__auxtrace_init(struct record *rec __maybe_unused) 876 { 877 return 0; 878 } 879 880 #endif 881 882 static int record__config_text_poke(struct evlist *evlist) 883 { 884 struct evsel *evsel; 885 886 /* Nothing to do if text poke is already configured */ 887 evlist__for_each_entry(evlist, evsel) { 888 if (evsel->core.attr.text_poke) 889 return 0; 890 } 891 892 evsel = evlist__add_dummy_on_all_cpus(evlist); 893 if (!evsel) 894 return -ENOMEM; 895 896 evsel->core.attr.text_poke = 1; 897 evsel->core.attr.ksymbol = 1; 898 evsel->immediate = true; 899 evsel__set_sample_bit(evsel, TIME); 900 901 return 0; 902 } 903 904 static int record__config_off_cpu(struct record *rec) 905 { 906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 907 } 908 909 static int record__config_tracking_events(struct record *rec) 910 { 911 struct record_opts *opts = &rec->opts; 912 struct evlist *evlist = rec->evlist; 913 struct evsel *evsel; 914 915 /* 916 * For initial_delay, system wide or a hybrid system, we need to add 917 * tracking event so that we can track PERF_RECORD_MMAP to cover the 918 * delay of waiting or event synthesis. 919 */ 920 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 921 perf_pmus__num_core_pmus() > 1) { 922 evsel = evlist__findnew_tracking_event(evlist, false); 923 if (!evsel) 924 return -ENOMEM; 925 926 /* 927 * Enable the tracking event when the process is forked for 928 * initial_delay, immediately for system wide. 929 */ 930 if (opts->target.initial_delay && !evsel->immediate && 931 !target__has_cpu(&opts->target)) 932 evsel->core.attr.enable_on_exec = 1; 933 else 934 evsel->immediate = 1; 935 } 936 937 return 0; 938 } 939 940 static bool record__kcore_readable(struct machine *machine) 941 { 942 char kcore[PATH_MAX]; 943 int fd; 944 945 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 946 947 fd = open(kcore, O_RDONLY); 948 if (fd < 0) 949 return false; 950 951 close(fd); 952 953 return true; 954 } 955 956 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 957 { 958 char from_dir[PATH_MAX]; 959 char kcore_dir[PATH_MAX]; 960 int ret; 961 962 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 963 964 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 965 if (ret) 966 return ret; 967 968 return kcore_copy(from_dir, kcore_dir); 969 } 970 971 static void record__thread_data_init_pipes(struct record_thread *thread_data) 972 { 973 thread_data->pipes.msg[0] = -1; 974 thread_data->pipes.msg[1] = -1; 975 thread_data->pipes.ack[0] = -1; 976 thread_data->pipes.ack[1] = -1; 977 } 978 979 static int record__thread_data_open_pipes(struct record_thread *thread_data) 980 { 981 if (pipe(thread_data->pipes.msg)) 982 return -EINVAL; 983 984 if (pipe(thread_data->pipes.ack)) { 985 close(thread_data->pipes.msg[0]); 986 thread_data->pipes.msg[0] = -1; 987 close(thread_data->pipes.msg[1]); 988 thread_data->pipes.msg[1] = -1; 989 return -EINVAL; 990 } 991 992 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 993 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 994 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 995 996 return 0; 997 } 998 999 static void record__thread_data_close_pipes(struct record_thread *thread_data) 1000 { 1001 if (thread_data->pipes.msg[0] != -1) { 1002 close(thread_data->pipes.msg[0]); 1003 thread_data->pipes.msg[0] = -1; 1004 } 1005 if (thread_data->pipes.msg[1] != -1) { 1006 close(thread_data->pipes.msg[1]); 1007 thread_data->pipes.msg[1] = -1; 1008 } 1009 if (thread_data->pipes.ack[0] != -1) { 1010 close(thread_data->pipes.ack[0]); 1011 thread_data->pipes.ack[0] = -1; 1012 } 1013 if (thread_data->pipes.ack[1] != -1) { 1014 close(thread_data->pipes.ack[1]); 1015 thread_data->pipes.ack[1] = -1; 1016 } 1017 } 1018 1019 static bool evlist__per_thread(struct evlist *evlist) 1020 { 1021 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 1022 } 1023 1024 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 1025 { 1026 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 1027 struct mmap *mmap = evlist->mmap; 1028 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 1029 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1030 bool per_thread = evlist__per_thread(evlist); 1031 1032 if (per_thread) 1033 thread_data->nr_mmaps = nr_mmaps; 1034 else 1035 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1036 thread_data->mask->maps.nbits); 1037 if (mmap) { 1038 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1039 if (!thread_data->maps) 1040 return -ENOMEM; 1041 } 1042 if (overwrite_mmap) { 1043 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1044 if (!thread_data->overwrite_maps) { 1045 zfree(&thread_data->maps); 1046 return -ENOMEM; 1047 } 1048 } 1049 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1050 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1051 1052 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1053 if (per_thread || 1054 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1055 if (thread_data->maps) { 1056 thread_data->maps[tm] = &mmap[m]; 1057 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1058 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1059 } 1060 if (thread_data->overwrite_maps) { 1061 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1062 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1063 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1064 } 1065 tm++; 1066 } 1067 } 1068 1069 return 0; 1070 } 1071 1072 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1073 { 1074 int f, tm, pos; 1075 struct mmap *map, *overwrite_map; 1076 1077 fdarray__init(&thread_data->pollfd, 64); 1078 1079 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1080 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1081 overwrite_map = thread_data->overwrite_maps ? 1082 thread_data->overwrite_maps[tm] : NULL; 1083 1084 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1085 void *ptr = evlist->core.pollfd.priv[f].ptr; 1086 1087 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1088 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1089 &evlist->core.pollfd); 1090 if (pos < 0) 1091 return pos; 1092 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1093 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1094 } 1095 } 1096 } 1097 1098 return 0; 1099 } 1100 1101 static void record__free_thread_data(struct record *rec) 1102 { 1103 int t; 1104 struct record_thread *thread_data = rec->thread_data; 1105 1106 if (thread_data == NULL) 1107 return; 1108 1109 for (t = 0; t < rec->nr_threads; t++) { 1110 record__thread_data_close_pipes(&thread_data[t]); 1111 zfree(&thread_data[t].maps); 1112 zfree(&thread_data[t].overwrite_maps); 1113 fdarray__exit(&thread_data[t].pollfd); 1114 } 1115 1116 zfree(&rec->thread_data); 1117 } 1118 1119 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1120 int evlist_pollfd_index, 1121 int thread_pollfd_index) 1122 { 1123 size_t x = rec->index_map_cnt; 1124 1125 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1126 return -ENOMEM; 1127 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1128 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1129 rec->index_map_cnt += 1; 1130 return 0; 1131 } 1132 1133 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1134 struct evlist *evlist, 1135 struct record_thread *thread_data) 1136 { 1137 struct pollfd *e_entries = evlist->core.pollfd.entries; 1138 struct pollfd *t_entries = thread_data->pollfd.entries; 1139 int err = 0; 1140 size_t i; 1141 1142 for (i = 0; i < rec->index_map_cnt; i++) { 1143 int e_pos = rec->index_map[i].evlist_pollfd_index; 1144 int t_pos = rec->index_map[i].thread_pollfd_index; 1145 1146 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1147 e_entries[e_pos].events != t_entries[t_pos].events) { 1148 pr_err("Thread and evlist pollfd index mismatch\n"); 1149 err = -EINVAL; 1150 continue; 1151 } 1152 e_entries[e_pos].revents = t_entries[t_pos].revents; 1153 } 1154 return err; 1155 } 1156 1157 static int record__dup_non_perf_events(struct record *rec, 1158 struct evlist *evlist, 1159 struct record_thread *thread_data) 1160 { 1161 struct fdarray *fda = &evlist->core.pollfd; 1162 int i, ret; 1163 1164 for (i = 0; i < fda->nr; i++) { 1165 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1166 continue; 1167 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1168 if (ret < 0) { 1169 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1170 return ret; 1171 } 1172 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1173 thread_data, ret, fda->entries[i].fd); 1174 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1175 if (ret < 0) { 1176 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1177 return ret; 1178 } 1179 } 1180 return 0; 1181 } 1182 1183 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1184 { 1185 int t, ret; 1186 struct record_thread *thread_data; 1187 1188 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1189 if (!rec->thread_data) { 1190 pr_err("Failed to allocate thread data\n"); 1191 return -ENOMEM; 1192 } 1193 thread_data = rec->thread_data; 1194 1195 for (t = 0; t < rec->nr_threads; t++) 1196 record__thread_data_init_pipes(&thread_data[t]); 1197 1198 for (t = 0; t < rec->nr_threads; t++) { 1199 thread_data[t].rec = rec; 1200 thread_data[t].mask = &rec->thread_masks[t]; 1201 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1202 if (ret) { 1203 pr_err("Failed to initialize thread[%d] maps\n", t); 1204 goto out_free; 1205 } 1206 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1207 if (ret) { 1208 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1209 goto out_free; 1210 } 1211 if (t) { 1212 thread_data[t].tid = -1; 1213 ret = record__thread_data_open_pipes(&thread_data[t]); 1214 if (ret) { 1215 pr_err("Failed to open thread[%d] communication pipes\n", t); 1216 goto out_free; 1217 } 1218 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1219 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1220 if (ret < 0) { 1221 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1222 goto out_free; 1223 } 1224 thread_data[t].ctlfd_pos = ret; 1225 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1226 thread_data, thread_data[t].ctlfd_pos, 1227 thread_data[t].pipes.msg[0]); 1228 } else { 1229 thread_data[t].tid = gettid(); 1230 1231 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1232 if (ret < 0) 1233 goto out_free; 1234 1235 thread_data[t].ctlfd_pos = -1; /* Not used */ 1236 } 1237 } 1238 1239 return 0; 1240 1241 out_free: 1242 record__free_thread_data(rec); 1243 1244 return ret; 1245 } 1246 1247 static int record__mmap_evlist(struct record *rec, 1248 struct evlist *evlist) 1249 { 1250 int i, ret; 1251 struct record_opts *opts = &rec->opts; 1252 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1253 opts->auxtrace_sample_mode; 1254 char msg[512]; 1255 1256 if (opts->affinity != PERF_AFFINITY_SYS) 1257 cpu__setup_cpunode_map(); 1258 1259 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1260 opts->auxtrace_mmap_pages, 1261 auxtrace_overwrite, 1262 opts->nr_cblocks, opts->affinity, 1263 opts->mmap_flush, opts->comp_level) < 0) { 1264 if (errno == EPERM) { 1265 pr_err("Permission error mapping pages.\n" 1266 "Consider increasing " 1267 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1268 "or try again with a smaller value of -m/--mmap_pages.\n" 1269 "(current value: %u,%u)\n", 1270 opts->mmap_pages, opts->auxtrace_mmap_pages); 1271 return -errno; 1272 } else { 1273 pr_err("failed to mmap with %d (%s)\n", errno, 1274 str_error_r(errno, msg, sizeof(msg))); 1275 if (errno) 1276 return -errno; 1277 else 1278 return -EINVAL; 1279 } 1280 } 1281 1282 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1283 return -1; 1284 1285 ret = record__alloc_thread_data(rec, evlist); 1286 if (ret) 1287 return ret; 1288 1289 if (record__threads_enabled(rec)) { 1290 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1291 if (ret) { 1292 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1293 return ret; 1294 } 1295 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1296 if (evlist->mmap) 1297 evlist->mmap[i].file = &rec->data.dir.files[i]; 1298 if (evlist->overwrite_mmap) 1299 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1300 } 1301 } 1302 1303 return 0; 1304 } 1305 1306 static int record__mmap(struct record *rec) 1307 { 1308 return record__mmap_evlist(rec, rec->evlist); 1309 } 1310 1311 static int record__open(struct record *rec) 1312 { 1313 char msg[BUFSIZ]; 1314 struct evsel *pos; 1315 struct evlist *evlist = rec->evlist; 1316 struct perf_session *session = rec->session; 1317 struct record_opts *opts = &rec->opts; 1318 int rc = 0; 1319 1320 evlist__for_each_entry(evlist, pos) { 1321 try_again: 1322 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1323 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1324 if (verbose > 0) 1325 ui__warning("%s\n", msg); 1326 goto try_again; 1327 } 1328 if ((errno == EINVAL || errno == EBADF) && 1329 pos->core.leader != &pos->core && 1330 pos->weak_group) { 1331 pos = evlist__reset_weak_group(evlist, pos, true); 1332 goto try_again; 1333 } 1334 rc = -errno; 1335 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1336 ui__error("%s\n", msg); 1337 goto out; 1338 } 1339 1340 pos->supported = true; 1341 } 1342 1343 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1344 pr_warning( 1345 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1346 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1347 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1348 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1349 "Samples in kernel modules won't be resolved at all.\n\n" 1350 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1351 "even with a suitable vmlinux or kallsyms file.\n\n"); 1352 } 1353 1354 if (evlist__apply_filters(evlist, &pos)) { 1355 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1356 pos->filter ?: "BPF", evsel__name(pos), errno, 1357 str_error_r(errno, msg, sizeof(msg))); 1358 rc = -1; 1359 goto out; 1360 } 1361 1362 rc = record__mmap(rec); 1363 if (rc) 1364 goto out; 1365 1366 session->evlist = evlist; 1367 perf_session__set_id_hdr_size(session); 1368 out: 1369 return rc; 1370 } 1371 1372 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1373 { 1374 if (rec->evlist->first_sample_time == 0) 1375 rec->evlist->first_sample_time = sample_time; 1376 1377 if (sample_time) 1378 rec->evlist->last_sample_time = sample_time; 1379 } 1380 1381 static int process_sample_event(struct perf_tool *tool, 1382 union perf_event *event, 1383 struct perf_sample *sample, 1384 struct evsel *evsel, 1385 struct machine *machine) 1386 { 1387 struct record *rec = container_of(tool, struct record, tool); 1388 1389 set_timestamp_boundary(rec, sample->time); 1390 1391 if (rec->buildid_all) 1392 return 0; 1393 1394 rec->samples++; 1395 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1396 } 1397 1398 static int process_buildids(struct record *rec) 1399 { 1400 struct perf_session *session = rec->session; 1401 1402 if (perf_data__size(&rec->data) == 0) 1403 return 0; 1404 1405 /* 1406 * During this process, it'll load kernel map and replace the 1407 * dso->long_name to a real pathname it found. In this case 1408 * we prefer the vmlinux path like 1409 * /lib/modules/3.16.4/build/vmlinux 1410 * 1411 * rather than build-id path (in debug directory). 1412 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1413 */ 1414 symbol_conf.ignore_vmlinux_buildid = true; 1415 1416 /* 1417 * If --buildid-all is given, it marks all DSO regardless of hits, 1418 * so no need to process samples. But if timestamp_boundary is enabled, 1419 * it still needs to walk on all samples to get the timestamps of 1420 * first/last samples. 1421 */ 1422 if (rec->buildid_all && !rec->timestamp_boundary) 1423 rec->tool.sample = NULL; 1424 1425 return perf_session__process_events(session); 1426 } 1427 1428 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1429 { 1430 int err; 1431 struct perf_tool *tool = data; 1432 /* 1433 *As for guest kernel when processing subcommand record&report, 1434 *we arrange module mmap prior to guest kernel mmap and trigger 1435 *a preload dso because default guest module symbols are loaded 1436 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1437 *method is used to avoid symbol missing when the first addr is 1438 *in module instead of in guest kernel. 1439 */ 1440 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1441 machine); 1442 if (err < 0) 1443 pr_err("Couldn't record guest kernel [%d]'s reference" 1444 " relocation symbol.\n", machine->pid); 1445 1446 /* 1447 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1448 * have no _text sometimes. 1449 */ 1450 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1451 machine); 1452 if (err < 0) 1453 pr_err("Couldn't record guest kernel [%d]'s reference" 1454 " relocation symbol.\n", machine->pid); 1455 } 1456 1457 static struct perf_event_header finished_round_event = { 1458 .size = sizeof(struct perf_event_header), 1459 .type = PERF_RECORD_FINISHED_ROUND, 1460 }; 1461 1462 static struct perf_event_header finished_init_event = { 1463 .size = sizeof(struct perf_event_header), 1464 .type = PERF_RECORD_FINISHED_INIT, 1465 }; 1466 1467 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1468 { 1469 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1470 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1471 thread->mask->affinity.nbits)) { 1472 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1473 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1474 map->affinity_mask.bits, thread->mask->affinity.nbits); 1475 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1476 (cpu_set_t *)thread->mask->affinity.bits); 1477 if (verbose == 2) { 1478 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1479 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1480 } 1481 } 1482 } 1483 1484 static size_t process_comp_header(void *record, size_t increment) 1485 { 1486 struct perf_record_compressed *event = record; 1487 size_t size = sizeof(*event); 1488 1489 if (increment) { 1490 event->header.size += increment; 1491 return increment; 1492 } 1493 1494 event->header.type = PERF_RECORD_COMPRESSED; 1495 event->header.size = size; 1496 1497 return size; 1498 } 1499 1500 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1501 void *dst, size_t dst_size, void *src, size_t src_size) 1502 { 1503 size_t compressed; 1504 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1505 struct zstd_data *zstd_data = &session->zstd_data; 1506 1507 if (map && map->file) 1508 zstd_data = &map->zstd_data; 1509 1510 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1511 max_record_size, process_comp_header); 1512 1513 if (map && map->file) { 1514 thread->bytes_transferred += src_size; 1515 thread->bytes_compressed += compressed; 1516 } else { 1517 session->bytes_transferred += src_size; 1518 session->bytes_compressed += compressed; 1519 } 1520 1521 return compressed; 1522 } 1523 1524 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1525 bool overwrite, bool synch) 1526 { 1527 u64 bytes_written = rec->bytes_written; 1528 int i; 1529 int rc = 0; 1530 int nr_mmaps; 1531 struct mmap **maps; 1532 int trace_fd = rec->data.file.fd; 1533 off_t off = 0; 1534 1535 if (!evlist) 1536 return 0; 1537 1538 nr_mmaps = thread->nr_mmaps; 1539 maps = overwrite ? thread->overwrite_maps : thread->maps; 1540 1541 if (!maps) 1542 return 0; 1543 1544 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1545 return 0; 1546 1547 if (record__aio_enabled(rec)) 1548 off = record__aio_get_pos(trace_fd); 1549 1550 for (i = 0; i < nr_mmaps; i++) { 1551 u64 flush = 0; 1552 struct mmap *map = maps[i]; 1553 1554 if (map->core.base) { 1555 record__adjust_affinity(rec, map); 1556 if (synch) { 1557 flush = map->core.flush; 1558 map->core.flush = 1; 1559 } 1560 if (!record__aio_enabled(rec)) { 1561 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1562 if (synch) 1563 map->core.flush = flush; 1564 rc = -1; 1565 goto out; 1566 } 1567 } else { 1568 if (record__aio_push(rec, map, &off) < 0) { 1569 record__aio_set_pos(trace_fd, off); 1570 if (synch) 1571 map->core.flush = flush; 1572 rc = -1; 1573 goto out; 1574 } 1575 } 1576 if (synch) 1577 map->core.flush = flush; 1578 } 1579 1580 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1581 !rec->opts.auxtrace_sample_mode && 1582 record__auxtrace_mmap_read(rec, map) != 0) { 1583 rc = -1; 1584 goto out; 1585 } 1586 } 1587 1588 if (record__aio_enabled(rec)) 1589 record__aio_set_pos(trace_fd, off); 1590 1591 /* 1592 * Mark the round finished in case we wrote 1593 * at least one event. 1594 * 1595 * No need for round events in directory mode, 1596 * because per-cpu maps and files have data 1597 * sorted by kernel. 1598 */ 1599 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1600 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1601 1602 if (overwrite) 1603 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1604 out: 1605 return rc; 1606 } 1607 1608 static int record__mmap_read_all(struct record *rec, bool synch) 1609 { 1610 int err; 1611 1612 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1613 if (err) 1614 return err; 1615 1616 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1617 } 1618 1619 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1620 void *arg __maybe_unused) 1621 { 1622 struct perf_mmap *map = fda->priv[fd].ptr; 1623 1624 if (map) 1625 perf_mmap__put(map); 1626 } 1627 1628 static void *record__thread(void *arg) 1629 { 1630 enum thread_msg msg = THREAD_MSG__READY; 1631 bool terminate = false; 1632 struct fdarray *pollfd; 1633 int err, ctlfd_pos; 1634 1635 thread = arg; 1636 thread->tid = gettid(); 1637 1638 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1639 if (err == -1) 1640 pr_warning("threads[%d]: failed to notify on start: %s\n", 1641 thread->tid, strerror(errno)); 1642 1643 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1644 1645 pollfd = &thread->pollfd; 1646 ctlfd_pos = thread->ctlfd_pos; 1647 1648 for (;;) { 1649 unsigned long long hits = thread->samples; 1650 1651 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1652 break; 1653 1654 if (hits == thread->samples) { 1655 1656 err = fdarray__poll(pollfd, -1); 1657 /* 1658 * Propagate error, only if there's any. Ignore positive 1659 * number of returned events and interrupt error. 1660 */ 1661 if (err > 0 || (err < 0 && errno == EINTR)) 1662 err = 0; 1663 thread->waking++; 1664 1665 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1666 record__thread_munmap_filtered, NULL) == 0) 1667 break; 1668 } 1669 1670 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1671 terminate = true; 1672 close(thread->pipes.msg[0]); 1673 thread->pipes.msg[0] = -1; 1674 pollfd->entries[ctlfd_pos].fd = -1; 1675 pollfd->entries[ctlfd_pos].events = 0; 1676 } 1677 1678 pollfd->entries[ctlfd_pos].revents = 0; 1679 } 1680 record__mmap_read_all(thread->rec, true); 1681 1682 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1683 if (err == -1) 1684 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1685 thread->tid, strerror(errno)); 1686 1687 return NULL; 1688 } 1689 1690 static void record__init_features(struct record *rec) 1691 { 1692 struct perf_session *session = rec->session; 1693 int feat; 1694 1695 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1696 perf_header__set_feat(&session->header, feat); 1697 1698 if (rec->no_buildid) 1699 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1700 1701 #ifdef HAVE_LIBTRACEEVENT 1702 if (!have_tracepoints(&rec->evlist->core.entries)) 1703 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1704 #endif 1705 1706 if (!rec->opts.branch_stack) 1707 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1708 1709 if (!rec->opts.full_auxtrace) 1710 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1711 1712 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1713 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1714 1715 if (!rec->opts.use_clockid) 1716 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1717 1718 if (!record__threads_enabled(rec)) 1719 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1720 1721 if (!record__comp_enabled(rec)) 1722 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1723 1724 perf_header__clear_feat(&session->header, HEADER_STAT); 1725 } 1726 1727 static void 1728 record__finish_output(struct record *rec) 1729 { 1730 int i; 1731 struct perf_data *data = &rec->data; 1732 int fd = perf_data__fd(data); 1733 1734 if (data->is_pipe) 1735 return; 1736 1737 rec->session->header.data_size += rec->bytes_written; 1738 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1739 if (record__threads_enabled(rec)) { 1740 for (i = 0; i < data->dir.nr; i++) 1741 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1742 } 1743 1744 if (!rec->no_buildid) { 1745 process_buildids(rec); 1746 1747 if (rec->buildid_all) 1748 dsos__hit_all(rec->session); 1749 } 1750 perf_session__write_header(rec->session, rec->evlist, fd, true); 1751 1752 return; 1753 } 1754 1755 static int record__synthesize_workload(struct record *rec, bool tail) 1756 { 1757 int err; 1758 struct perf_thread_map *thread_map; 1759 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1760 1761 if (rec->opts.tail_synthesize != tail) 1762 return 0; 1763 1764 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1765 if (thread_map == NULL) 1766 return -1; 1767 1768 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1769 process_synthesized_event, 1770 &rec->session->machines.host, 1771 needs_mmap, 1772 rec->opts.sample_address); 1773 perf_thread_map__put(thread_map); 1774 return err; 1775 } 1776 1777 static int write_finished_init(struct record *rec, bool tail) 1778 { 1779 if (rec->opts.tail_synthesize != tail) 1780 return 0; 1781 1782 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1783 } 1784 1785 static int record__synthesize(struct record *rec, bool tail); 1786 1787 static int 1788 record__switch_output(struct record *rec, bool at_exit) 1789 { 1790 struct perf_data *data = &rec->data; 1791 char *new_filename = NULL; 1792 int fd, err; 1793 1794 /* Same Size: "2015122520103046"*/ 1795 char timestamp[] = "InvalidTimestamp"; 1796 1797 record__aio_mmap_read_sync(rec); 1798 1799 write_finished_init(rec, true); 1800 1801 record__synthesize(rec, true); 1802 if (target__none(&rec->opts.target)) 1803 record__synthesize_workload(rec, true); 1804 1805 rec->samples = 0; 1806 record__finish_output(rec); 1807 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1808 if (err) { 1809 pr_err("Failed to get current timestamp\n"); 1810 return -EINVAL; 1811 } 1812 1813 fd = perf_data__switch(data, timestamp, 1814 rec->session->header.data_offset, 1815 at_exit, &new_filename); 1816 if (fd >= 0 && !at_exit) { 1817 rec->bytes_written = 0; 1818 rec->session->header.data_size = 0; 1819 } 1820 1821 if (!quiet) 1822 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1823 data->path, timestamp); 1824 1825 if (rec->switch_output.num_files) { 1826 int n = rec->switch_output.cur_file + 1; 1827 1828 if (n >= rec->switch_output.num_files) 1829 n = 0; 1830 rec->switch_output.cur_file = n; 1831 if (rec->switch_output.filenames[n]) { 1832 remove(rec->switch_output.filenames[n]); 1833 zfree(&rec->switch_output.filenames[n]); 1834 } 1835 rec->switch_output.filenames[n] = new_filename; 1836 } else { 1837 free(new_filename); 1838 } 1839 1840 /* Output tracking events */ 1841 if (!at_exit) { 1842 record__synthesize(rec, false); 1843 1844 /* 1845 * In 'perf record --switch-output' without -a, 1846 * record__synthesize() in record__switch_output() won't 1847 * generate tracking events because there's no thread_map 1848 * in evlist. Which causes newly created perf.data doesn't 1849 * contain map and comm information. 1850 * Create a fake thread_map and directly call 1851 * perf_event__synthesize_thread_map() for those events. 1852 */ 1853 if (target__none(&rec->opts.target)) 1854 record__synthesize_workload(rec, false); 1855 write_finished_init(rec, false); 1856 } 1857 return fd; 1858 } 1859 1860 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1861 struct perf_record_lost_samples *lost, 1862 int cpu_idx, int thread_idx, u64 lost_count, 1863 u16 misc_flag) 1864 { 1865 struct perf_sample_id *sid; 1866 struct perf_sample sample = {}; 1867 int id_hdr_size; 1868 1869 lost->lost = lost_count; 1870 if (evsel->core.ids) { 1871 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1872 sample.id = sid->id; 1873 } 1874 1875 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1876 evsel->core.attr.sample_type, &sample); 1877 lost->header.size = sizeof(*lost) + id_hdr_size; 1878 lost->header.misc = misc_flag; 1879 record__write(rec, NULL, lost, lost->header.size); 1880 } 1881 1882 static void record__read_lost_samples(struct record *rec) 1883 { 1884 struct perf_session *session = rec->session; 1885 struct perf_record_lost_samples *lost; 1886 struct evsel *evsel; 1887 1888 /* there was an error during record__open */ 1889 if (session->evlist == NULL) 1890 return; 1891 1892 lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1893 if (lost == NULL) { 1894 pr_debug("Memory allocation failed\n"); 1895 return; 1896 } 1897 1898 lost->header.type = PERF_RECORD_LOST_SAMPLES; 1899 1900 evlist__for_each_entry(session->evlist, evsel) { 1901 struct xyarray *xy = evsel->core.sample_id; 1902 u64 lost_count; 1903 1904 if (xy == NULL || evsel->core.fd == NULL) 1905 continue; 1906 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1907 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1908 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1909 continue; 1910 } 1911 1912 for (int x = 0; x < xyarray__max_x(xy); x++) { 1913 for (int y = 0; y < xyarray__max_y(xy); y++) { 1914 struct perf_counts_values count; 1915 1916 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1917 pr_debug("read LOST count failed\n"); 1918 goto out; 1919 } 1920 1921 if (count.lost) { 1922 __record__save_lost_samples(rec, evsel, lost, 1923 x, y, count.lost, 0); 1924 } 1925 } 1926 } 1927 1928 lost_count = perf_bpf_filter__lost_count(evsel); 1929 if (lost_count) 1930 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, 1931 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1932 } 1933 out: 1934 free(lost); 1935 } 1936 1937 static volatile sig_atomic_t workload_exec_errno; 1938 1939 /* 1940 * evlist__prepare_workload will send a SIGUSR1 1941 * if the fork fails, since we asked by setting its 1942 * want_signal to true. 1943 */ 1944 static void workload_exec_failed_signal(int signo __maybe_unused, 1945 siginfo_t *info, 1946 void *ucontext __maybe_unused) 1947 { 1948 workload_exec_errno = info->si_value.sival_int; 1949 done = 1; 1950 child_finished = 1; 1951 } 1952 1953 static void snapshot_sig_handler(int sig); 1954 static void alarm_sig_handler(int sig); 1955 1956 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1957 { 1958 if (evlist) { 1959 if (evlist->mmap && evlist->mmap[0].core.base) 1960 return evlist->mmap[0].core.base; 1961 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1962 return evlist->overwrite_mmap[0].core.base; 1963 } 1964 return NULL; 1965 } 1966 1967 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1968 { 1969 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1970 if (pc) 1971 return pc; 1972 return NULL; 1973 } 1974 1975 static int record__synthesize(struct record *rec, bool tail) 1976 { 1977 struct perf_session *session = rec->session; 1978 struct machine *machine = &session->machines.host; 1979 struct perf_data *data = &rec->data; 1980 struct record_opts *opts = &rec->opts; 1981 struct perf_tool *tool = &rec->tool; 1982 int err = 0; 1983 event_op f = process_synthesized_event; 1984 1985 if (rec->opts.tail_synthesize != tail) 1986 return 0; 1987 1988 if (data->is_pipe) { 1989 err = perf_event__synthesize_for_pipe(tool, session, data, 1990 process_synthesized_event); 1991 if (err < 0) 1992 goto out; 1993 1994 rec->bytes_written += err; 1995 } 1996 1997 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1998 process_synthesized_event, machine); 1999 if (err) 2000 goto out; 2001 2002 /* Synthesize id_index before auxtrace_info */ 2003 err = perf_event__synthesize_id_index(tool, 2004 process_synthesized_event, 2005 session->evlist, machine); 2006 if (err) 2007 goto out; 2008 2009 if (rec->opts.full_auxtrace) { 2010 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2011 session, process_synthesized_event); 2012 if (err) 2013 goto out; 2014 } 2015 2016 if (!evlist__exclude_kernel(rec->evlist)) { 2017 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2018 machine); 2019 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2020 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2021 "Check /proc/kallsyms permission or run as root.\n"); 2022 2023 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2024 machine); 2025 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2026 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2027 "Check /proc/modules permission or run as root.\n"); 2028 } 2029 2030 if (perf_guest) { 2031 machines__process_guests(&session->machines, 2032 perf_event__synthesize_guest_os, tool); 2033 } 2034 2035 err = perf_event__synthesize_extra_attr(&rec->tool, 2036 rec->evlist, 2037 process_synthesized_event, 2038 data->is_pipe); 2039 if (err) 2040 goto out; 2041 2042 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2043 process_synthesized_event, 2044 NULL); 2045 if (err < 0) { 2046 pr_err("Couldn't synthesize thread map.\n"); 2047 return err; 2048 } 2049 2050 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2051 process_synthesized_event, NULL); 2052 if (err < 0) { 2053 pr_err("Couldn't synthesize cpu map.\n"); 2054 return err; 2055 } 2056 2057 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2058 machine, opts); 2059 if (err < 0) { 2060 pr_warning("Couldn't synthesize bpf events.\n"); 2061 err = 0; 2062 } 2063 2064 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2065 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2066 machine); 2067 if (err < 0) { 2068 pr_warning("Couldn't synthesize cgroup events.\n"); 2069 err = 0; 2070 } 2071 } 2072 2073 if (rec->opts.nr_threads_synthesize > 1) { 2074 mutex_init(&synth_lock); 2075 perf_set_multithreaded(); 2076 f = process_locked_synthesized_event; 2077 } 2078 2079 if (rec->opts.synth & PERF_SYNTH_TASK) { 2080 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2081 2082 err = __machine__synthesize_threads(machine, tool, &opts->target, 2083 rec->evlist->core.threads, 2084 f, needs_mmap, opts->sample_address, 2085 rec->opts.nr_threads_synthesize); 2086 } 2087 2088 if (rec->opts.nr_threads_synthesize > 1) { 2089 perf_set_singlethreaded(); 2090 mutex_destroy(&synth_lock); 2091 } 2092 2093 out: 2094 return err; 2095 } 2096 2097 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2098 { 2099 struct record *rec = data; 2100 pthread_kill(rec->thread_id, SIGUSR2); 2101 return 0; 2102 } 2103 2104 static int record__setup_sb_evlist(struct record *rec) 2105 { 2106 struct record_opts *opts = &rec->opts; 2107 2108 if (rec->sb_evlist != NULL) { 2109 /* 2110 * We get here if --switch-output-event populated the 2111 * sb_evlist, so associate a callback that will send a SIGUSR2 2112 * to the main thread. 2113 */ 2114 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2115 rec->thread_id = pthread_self(); 2116 } 2117 #ifdef HAVE_LIBBPF_SUPPORT 2118 if (!opts->no_bpf_event) { 2119 if (rec->sb_evlist == NULL) { 2120 rec->sb_evlist = evlist__new(); 2121 2122 if (rec->sb_evlist == NULL) { 2123 pr_err("Couldn't create side band evlist.\n."); 2124 return -1; 2125 } 2126 } 2127 2128 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2129 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2130 return -1; 2131 } 2132 } 2133 #endif 2134 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2135 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2136 opts->no_bpf_event = true; 2137 } 2138 2139 return 0; 2140 } 2141 2142 static int record__init_clock(struct record *rec) 2143 { 2144 struct perf_session *session = rec->session; 2145 struct timespec ref_clockid; 2146 struct timeval ref_tod; 2147 u64 ref; 2148 2149 if (!rec->opts.use_clockid) 2150 return 0; 2151 2152 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2153 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2154 2155 session->header.env.clock.clockid = rec->opts.clockid; 2156 2157 if (gettimeofday(&ref_tod, NULL) != 0) { 2158 pr_err("gettimeofday failed, cannot set reference time.\n"); 2159 return -1; 2160 } 2161 2162 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2163 pr_err("clock_gettime failed, cannot set reference time.\n"); 2164 return -1; 2165 } 2166 2167 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2168 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2169 2170 session->header.env.clock.tod_ns = ref; 2171 2172 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2173 (u64) ref_clockid.tv_nsec; 2174 2175 session->header.env.clock.clockid_ns = ref; 2176 return 0; 2177 } 2178 2179 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2180 { 2181 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2182 trigger_hit(&auxtrace_snapshot_trigger); 2183 auxtrace_record__snapshot_started = 1; 2184 if (auxtrace_record__snapshot_start(rec->itr)) 2185 trigger_error(&auxtrace_snapshot_trigger); 2186 } 2187 } 2188 2189 static int record__terminate_thread(struct record_thread *thread_data) 2190 { 2191 int err; 2192 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2193 pid_t tid = thread_data->tid; 2194 2195 close(thread_data->pipes.msg[1]); 2196 thread_data->pipes.msg[1] = -1; 2197 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2198 if (err > 0) 2199 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2200 else 2201 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2202 thread->tid, tid); 2203 2204 return 0; 2205 } 2206 2207 static int record__start_threads(struct record *rec) 2208 { 2209 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2210 struct record_thread *thread_data = rec->thread_data; 2211 sigset_t full, mask; 2212 pthread_t handle; 2213 pthread_attr_t attrs; 2214 2215 thread = &thread_data[0]; 2216 2217 if (!record__threads_enabled(rec)) 2218 return 0; 2219 2220 sigfillset(&full); 2221 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2222 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2223 return -1; 2224 } 2225 2226 pthread_attr_init(&attrs); 2227 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2228 2229 for (t = 1; t < nr_threads; t++) { 2230 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2231 2232 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2233 pthread_attr_setaffinity_np(&attrs, 2234 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2235 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2236 #endif 2237 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2238 for (tt = 1; tt < t; tt++) 2239 record__terminate_thread(&thread_data[t]); 2240 pr_err("Failed to start threads: %s\n", strerror(errno)); 2241 ret = -1; 2242 goto out_err; 2243 } 2244 2245 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2246 if (err > 0) 2247 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2248 thread_msg_tags[msg]); 2249 else 2250 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2251 thread->tid, rec->thread_data[t].tid); 2252 } 2253 2254 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2255 (cpu_set_t *)thread->mask->affinity.bits); 2256 2257 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2258 2259 out_err: 2260 pthread_attr_destroy(&attrs); 2261 2262 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2263 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2264 ret = -1; 2265 } 2266 2267 return ret; 2268 } 2269 2270 static int record__stop_threads(struct record *rec) 2271 { 2272 int t; 2273 struct record_thread *thread_data = rec->thread_data; 2274 2275 for (t = 1; t < rec->nr_threads; t++) 2276 record__terminate_thread(&thread_data[t]); 2277 2278 for (t = 0; t < rec->nr_threads; t++) { 2279 rec->samples += thread_data[t].samples; 2280 if (!record__threads_enabled(rec)) 2281 continue; 2282 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2283 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2284 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2285 thread_data[t].samples, thread_data[t].waking); 2286 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2287 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2288 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2289 else 2290 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2291 } 2292 2293 return 0; 2294 } 2295 2296 static unsigned long record__waking(struct record *rec) 2297 { 2298 int t; 2299 unsigned long waking = 0; 2300 struct record_thread *thread_data = rec->thread_data; 2301 2302 for (t = 0; t < rec->nr_threads; t++) 2303 waking += thread_data[t].waking; 2304 2305 return waking; 2306 } 2307 2308 static int __cmd_record(struct record *rec, int argc, const char **argv) 2309 { 2310 int err; 2311 int status = 0; 2312 const bool forks = argc > 0; 2313 struct perf_tool *tool = &rec->tool; 2314 struct record_opts *opts = &rec->opts; 2315 struct perf_data *data = &rec->data; 2316 struct perf_session *session; 2317 bool disabled = false, draining = false; 2318 int fd; 2319 float ratio = 0; 2320 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2321 2322 atexit(record__sig_exit); 2323 signal(SIGCHLD, sig_handler); 2324 signal(SIGINT, sig_handler); 2325 signal(SIGTERM, sig_handler); 2326 signal(SIGSEGV, sigsegv_handler); 2327 2328 if (rec->opts.record_namespaces) 2329 tool->namespace_events = true; 2330 2331 if (rec->opts.record_cgroup) { 2332 #ifdef HAVE_FILE_HANDLE 2333 tool->cgroup_events = true; 2334 #else 2335 pr_err("cgroup tracking is not supported\n"); 2336 return -1; 2337 #endif 2338 } 2339 2340 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2341 signal(SIGUSR2, snapshot_sig_handler); 2342 if (rec->opts.auxtrace_snapshot_mode) 2343 trigger_on(&auxtrace_snapshot_trigger); 2344 if (rec->switch_output.enabled) 2345 trigger_on(&switch_output_trigger); 2346 } else { 2347 signal(SIGUSR2, SIG_IGN); 2348 } 2349 2350 session = perf_session__new(data, tool); 2351 if (IS_ERR(session)) { 2352 pr_err("Perf session creation failed.\n"); 2353 return PTR_ERR(session); 2354 } 2355 2356 if (record__threads_enabled(rec)) { 2357 if (perf_data__is_pipe(&rec->data)) { 2358 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2359 return -1; 2360 } 2361 if (rec->opts.full_auxtrace) { 2362 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2363 return -1; 2364 } 2365 } 2366 2367 fd = perf_data__fd(data); 2368 rec->session = session; 2369 2370 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2371 pr_err("Compression initialization failed.\n"); 2372 return -1; 2373 } 2374 #ifdef HAVE_EVENTFD_SUPPORT 2375 done_fd = eventfd(0, EFD_NONBLOCK); 2376 if (done_fd < 0) { 2377 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2378 status = -1; 2379 goto out_delete_session; 2380 } 2381 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2382 if (err < 0) { 2383 pr_err("Failed to add wakeup eventfd to poll list\n"); 2384 status = err; 2385 goto out_delete_session; 2386 } 2387 #endif // HAVE_EVENTFD_SUPPORT 2388 2389 session->header.env.comp_type = PERF_COMP_ZSTD; 2390 session->header.env.comp_level = rec->opts.comp_level; 2391 2392 if (rec->opts.kcore && 2393 !record__kcore_readable(&session->machines.host)) { 2394 pr_err("ERROR: kcore is not readable.\n"); 2395 return -1; 2396 } 2397 2398 if (record__init_clock(rec)) 2399 return -1; 2400 2401 record__init_features(rec); 2402 2403 if (forks) { 2404 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2405 workload_exec_failed_signal); 2406 if (err < 0) { 2407 pr_err("Couldn't run the workload!\n"); 2408 status = err; 2409 goto out_delete_session; 2410 } 2411 } 2412 2413 /* 2414 * If we have just single event and are sending data 2415 * through pipe, we need to force the ids allocation, 2416 * because we synthesize event name through the pipe 2417 * and need the id for that. 2418 */ 2419 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2420 rec->opts.sample_id = true; 2421 2422 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2423 rec->timestamp_filename = false; 2424 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2425 } 2426 2427 evlist__uniquify_name(rec->evlist); 2428 2429 evlist__config(rec->evlist, opts, &callchain_param); 2430 2431 /* Debug message used by test scripts */ 2432 pr_debug3("perf record opening and mmapping events\n"); 2433 if (record__open(rec) != 0) { 2434 err = -1; 2435 goto out_free_threads; 2436 } 2437 /* Debug message used by test scripts */ 2438 pr_debug3("perf record done opening and mmapping events\n"); 2439 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2440 2441 if (rec->opts.kcore) { 2442 err = record__kcore_copy(&session->machines.host, data); 2443 if (err) { 2444 pr_err("ERROR: Failed to copy kcore\n"); 2445 goto out_free_threads; 2446 } 2447 } 2448 2449 /* 2450 * Normally perf_session__new would do this, but it doesn't have the 2451 * evlist. 2452 */ 2453 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2454 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2455 rec->tool.ordered_events = false; 2456 } 2457 2458 if (evlist__nr_groups(rec->evlist) == 0) 2459 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2460 2461 if (data->is_pipe) { 2462 err = perf_header__write_pipe(fd); 2463 if (err < 0) 2464 goto out_free_threads; 2465 } else { 2466 err = perf_session__write_header(session, rec->evlist, fd, false); 2467 if (err < 0) 2468 goto out_free_threads; 2469 } 2470 2471 err = -1; 2472 if (!rec->no_buildid 2473 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2474 pr_err("Couldn't generate buildids. " 2475 "Use --no-buildid to profile anyway.\n"); 2476 goto out_free_threads; 2477 } 2478 2479 err = record__setup_sb_evlist(rec); 2480 if (err) 2481 goto out_free_threads; 2482 2483 err = record__synthesize(rec, false); 2484 if (err < 0) 2485 goto out_free_threads; 2486 2487 if (rec->realtime_prio) { 2488 struct sched_param param; 2489 2490 param.sched_priority = rec->realtime_prio; 2491 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2492 pr_err("Could not set realtime priority.\n"); 2493 err = -1; 2494 goto out_free_threads; 2495 } 2496 } 2497 2498 if (record__start_threads(rec)) 2499 goto out_free_threads; 2500 2501 /* 2502 * When perf is starting the traced process, all the events 2503 * (apart from group members) have enable_on_exec=1 set, 2504 * so don't spoil it by prematurely enabling them. 2505 */ 2506 if (!target__none(&opts->target) && !opts->target.initial_delay) 2507 evlist__enable(rec->evlist); 2508 2509 /* 2510 * Let the child rip 2511 */ 2512 if (forks) { 2513 struct machine *machine = &session->machines.host; 2514 union perf_event *event; 2515 pid_t tgid; 2516 2517 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2518 if (event == NULL) { 2519 err = -ENOMEM; 2520 goto out_child; 2521 } 2522 2523 /* 2524 * Some H/W events are generated before COMM event 2525 * which is emitted during exec(), so perf script 2526 * cannot see a correct process name for those events. 2527 * Synthesize COMM event to prevent it. 2528 */ 2529 tgid = perf_event__synthesize_comm(tool, event, 2530 rec->evlist->workload.pid, 2531 process_synthesized_event, 2532 machine); 2533 free(event); 2534 2535 if (tgid == -1) 2536 goto out_child; 2537 2538 event = malloc(sizeof(event->namespaces) + 2539 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2540 machine->id_hdr_size); 2541 if (event == NULL) { 2542 err = -ENOMEM; 2543 goto out_child; 2544 } 2545 2546 /* 2547 * Synthesize NAMESPACES event for the command specified. 2548 */ 2549 perf_event__synthesize_namespaces(tool, event, 2550 rec->evlist->workload.pid, 2551 tgid, process_synthesized_event, 2552 machine); 2553 free(event); 2554 2555 evlist__start_workload(rec->evlist); 2556 } 2557 2558 if (opts->target.initial_delay) { 2559 pr_info(EVLIST_DISABLED_MSG); 2560 if (opts->target.initial_delay > 0) { 2561 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2562 evlist__enable(rec->evlist); 2563 pr_info(EVLIST_ENABLED_MSG); 2564 } 2565 } 2566 2567 err = event_enable_timer__start(rec->evlist->eet); 2568 if (err) 2569 goto out_child; 2570 2571 /* Debug message used by test scripts */ 2572 pr_debug3("perf record has started\n"); 2573 fflush(stderr); 2574 2575 trigger_ready(&auxtrace_snapshot_trigger); 2576 trigger_ready(&switch_output_trigger); 2577 perf_hooks__invoke_record_start(); 2578 2579 /* 2580 * Must write FINISHED_INIT so it will be seen after all other 2581 * synthesized user events, but before any regular events. 2582 */ 2583 err = write_finished_init(rec, false); 2584 if (err < 0) 2585 goto out_child; 2586 2587 for (;;) { 2588 unsigned long long hits = thread->samples; 2589 2590 /* 2591 * rec->evlist->bkw_mmap_state is possible to be 2592 * BKW_MMAP_EMPTY here: when done == true and 2593 * hits != rec->samples in previous round. 2594 * 2595 * evlist__toggle_bkw_mmap ensure we never 2596 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2597 */ 2598 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2599 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2600 2601 if (record__mmap_read_all(rec, false) < 0) { 2602 trigger_error(&auxtrace_snapshot_trigger); 2603 trigger_error(&switch_output_trigger); 2604 err = -1; 2605 goto out_child; 2606 } 2607 2608 if (auxtrace_record__snapshot_started) { 2609 auxtrace_record__snapshot_started = 0; 2610 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2611 record__read_auxtrace_snapshot(rec, false); 2612 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2613 pr_err("AUX area tracing snapshot failed\n"); 2614 err = -1; 2615 goto out_child; 2616 } 2617 } 2618 2619 if (trigger_is_hit(&switch_output_trigger)) { 2620 /* 2621 * If switch_output_trigger is hit, the data in 2622 * overwritable ring buffer should have been collected, 2623 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2624 * 2625 * If SIGUSR2 raise after or during record__mmap_read_all(), 2626 * record__mmap_read_all() didn't collect data from 2627 * overwritable ring buffer. Read again. 2628 */ 2629 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2630 continue; 2631 trigger_ready(&switch_output_trigger); 2632 2633 /* 2634 * Reenable events in overwrite ring buffer after 2635 * record__mmap_read_all(): we should have collected 2636 * data from it. 2637 */ 2638 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2639 2640 if (!quiet) 2641 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2642 record__waking(rec)); 2643 thread->waking = 0; 2644 fd = record__switch_output(rec, false); 2645 if (fd < 0) { 2646 pr_err("Failed to switch to new file\n"); 2647 trigger_error(&switch_output_trigger); 2648 err = fd; 2649 goto out_child; 2650 } 2651 2652 /* re-arm the alarm */ 2653 if (rec->switch_output.time) 2654 alarm(rec->switch_output.time); 2655 } 2656 2657 if (hits == thread->samples) { 2658 if (done || draining) 2659 break; 2660 err = fdarray__poll(&thread->pollfd, -1); 2661 /* 2662 * Propagate error, only if there's any. Ignore positive 2663 * number of returned events and interrupt error. 2664 */ 2665 if (err > 0 || (err < 0 && errno == EINTR)) 2666 err = 0; 2667 thread->waking++; 2668 2669 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2670 record__thread_munmap_filtered, NULL) == 0) 2671 draining = true; 2672 2673 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2674 if (err) 2675 goto out_child; 2676 } 2677 2678 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2679 switch (cmd) { 2680 case EVLIST_CTL_CMD_SNAPSHOT: 2681 hit_auxtrace_snapshot_trigger(rec); 2682 evlist__ctlfd_ack(rec->evlist); 2683 break; 2684 case EVLIST_CTL_CMD_STOP: 2685 done = 1; 2686 break; 2687 case EVLIST_CTL_CMD_ACK: 2688 case EVLIST_CTL_CMD_UNSUPPORTED: 2689 case EVLIST_CTL_CMD_ENABLE: 2690 case EVLIST_CTL_CMD_DISABLE: 2691 case EVLIST_CTL_CMD_EVLIST: 2692 case EVLIST_CTL_CMD_PING: 2693 default: 2694 break; 2695 } 2696 } 2697 2698 err = event_enable_timer__process(rec->evlist->eet); 2699 if (err < 0) 2700 goto out_child; 2701 if (err) { 2702 err = 0; 2703 done = 1; 2704 } 2705 2706 /* 2707 * When perf is starting the traced process, at the end events 2708 * die with the process and we wait for that. Thus no need to 2709 * disable events in this case. 2710 */ 2711 if (done && !disabled && !target__none(&opts->target)) { 2712 trigger_off(&auxtrace_snapshot_trigger); 2713 evlist__disable(rec->evlist); 2714 disabled = true; 2715 } 2716 } 2717 2718 trigger_off(&auxtrace_snapshot_trigger); 2719 trigger_off(&switch_output_trigger); 2720 2721 if (opts->auxtrace_snapshot_on_exit) 2722 record__auxtrace_snapshot_exit(rec); 2723 2724 if (forks && workload_exec_errno) { 2725 char msg[STRERR_BUFSIZE], strevsels[2048]; 2726 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2727 2728 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2729 2730 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2731 strevsels, argv[0], emsg); 2732 err = -1; 2733 goto out_child; 2734 } 2735 2736 if (!quiet) 2737 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2738 record__waking(rec)); 2739 2740 write_finished_init(rec, true); 2741 2742 if (target__none(&rec->opts.target)) 2743 record__synthesize_workload(rec, true); 2744 2745 out_child: 2746 record__stop_threads(rec); 2747 record__mmap_read_all(rec, true); 2748 out_free_threads: 2749 record__free_thread_data(rec); 2750 evlist__finalize_ctlfd(rec->evlist); 2751 record__aio_mmap_read_sync(rec); 2752 2753 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2754 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2755 session->header.env.comp_ratio = ratio + 0.5; 2756 } 2757 2758 if (forks) { 2759 int exit_status; 2760 2761 if (!child_finished) 2762 kill(rec->evlist->workload.pid, SIGTERM); 2763 2764 wait(&exit_status); 2765 2766 if (err < 0) 2767 status = err; 2768 else if (WIFEXITED(exit_status)) 2769 status = WEXITSTATUS(exit_status); 2770 else if (WIFSIGNALED(exit_status)) 2771 signr = WTERMSIG(exit_status); 2772 } else 2773 status = err; 2774 2775 if (rec->off_cpu) 2776 rec->bytes_written += off_cpu_write(rec->session); 2777 2778 record__read_lost_samples(rec); 2779 record__synthesize(rec, true); 2780 /* this will be recalculated during process_buildids() */ 2781 rec->samples = 0; 2782 2783 if (!err) { 2784 if (!rec->timestamp_filename) { 2785 record__finish_output(rec); 2786 } else { 2787 fd = record__switch_output(rec, true); 2788 if (fd < 0) { 2789 status = fd; 2790 goto out_delete_session; 2791 } 2792 } 2793 } 2794 2795 perf_hooks__invoke_record_end(); 2796 2797 if (!err && !quiet) { 2798 char samples[128]; 2799 const char *postfix = rec->timestamp_filename ? 2800 ".<timestamp>" : ""; 2801 2802 if (rec->samples && !rec->opts.full_auxtrace) 2803 scnprintf(samples, sizeof(samples), 2804 " (%" PRIu64 " samples)", rec->samples); 2805 else 2806 samples[0] = '\0'; 2807 2808 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2809 perf_data__size(data) / 1024.0 / 1024.0, 2810 data->path, postfix, samples); 2811 if (ratio) { 2812 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2813 rec->session->bytes_transferred / 1024.0 / 1024.0, 2814 ratio); 2815 } 2816 fprintf(stderr, " ]\n"); 2817 } 2818 2819 out_delete_session: 2820 #ifdef HAVE_EVENTFD_SUPPORT 2821 if (done_fd >= 0) { 2822 fd = done_fd; 2823 done_fd = -1; 2824 2825 close(fd); 2826 } 2827 #endif 2828 zstd_fini(&session->zstd_data); 2829 if (!opts->no_bpf_event) 2830 evlist__stop_sb_thread(rec->sb_evlist); 2831 2832 perf_session__delete(session); 2833 return status; 2834 } 2835 2836 static void callchain_debug(struct callchain_param *callchain) 2837 { 2838 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2839 2840 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2841 2842 if (callchain->record_mode == CALLCHAIN_DWARF) 2843 pr_debug("callchain: stack dump size %d\n", 2844 callchain->dump_size); 2845 } 2846 2847 int record_opts__parse_callchain(struct record_opts *record, 2848 struct callchain_param *callchain, 2849 const char *arg, bool unset) 2850 { 2851 int ret; 2852 callchain->enabled = !unset; 2853 2854 /* --no-call-graph */ 2855 if (unset) { 2856 callchain->record_mode = CALLCHAIN_NONE; 2857 pr_debug("callchain: disabled\n"); 2858 return 0; 2859 } 2860 2861 ret = parse_callchain_record_opt(arg, callchain); 2862 if (!ret) { 2863 /* Enable data address sampling for DWARF unwind. */ 2864 if (callchain->record_mode == CALLCHAIN_DWARF) 2865 record->sample_address = true; 2866 callchain_debug(callchain); 2867 } 2868 2869 return ret; 2870 } 2871 2872 int record_parse_callchain_opt(const struct option *opt, 2873 const char *arg, 2874 int unset) 2875 { 2876 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2877 } 2878 2879 int record_callchain_opt(const struct option *opt, 2880 const char *arg __maybe_unused, 2881 int unset __maybe_unused) 2882 { 2883 struct callchain_param *callchain = opt->value; 2884 2885 callchain->enabled = true; 2886 2887 if (callchain->record_mode == CALLCHAIN_NONE) 2888 callchain->record_mode = CALLCHAIN_FP; 2889 2890 callchain_debug(callchain); 2891 return 0; 2892 } 2893 2894 static int perf_record_config(const char *var, const char *value, void *cb) 2895 { 2896 struct record *rec = cb; 2897 2898 if (!strcmp(var, "record.build-id")) { 2899 if (!strcmp(value, "cache")) 2900 rec->no_buildid_cache = false; 2901 else if (!strcmp(value, "no-cache")) 2902 rec->no_buildid_cache = true; 2903 else if (!strcmp(value, "skip")) 2904 rec->no_buildid = true; 2905 else if (!strcmp(value, "mmap")) 2906 rec->buildid_mmap = true; 2907 else 2908 return -1; 2909 return 0; 2910 } 2911 if (!strcmp(var, "record.call-graph")) { 2912 var = "call-graph.record-mode"; 2913 return perf_default_config(var, value, cb); 2914 } 2915 #ifdef HAVE_AIO_SUPPORT 2916 if (!strcmp(var, "record.aio")) { 2917 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2918 if (!rec->opts.nr_cblocks) 2919 rec->opts.nr_cblocks = nr_cblocks_default; 2920 } 2921 #endif 2922 if (!strcmp(var, "record.debuginfod")) { 2923 rec->debuginfod.urls = strdup(value); 2924 if (!rec->debuginfod.urls) 2925 return -ENOMEM; 2926 rec->debuginfod.set = true; 2927 } 2928 2929 return 0; 2930 } 2931 2932 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2933 { 2934 struct record *rec = (struct record *)opt->value; 2935 2936 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2937 } 2938 2939 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2940 { 2941 struct record_opts *opts = (struct record_opts *)opt->value; 2942 2943 if (unset || !str) 2944 return 0; 2945 2946 if (!strcasecmp(str, "node")) 2947 opts->affinity = PERF_AFFINITY_NODE; 2948 else if (!strcasecmp(str, "cpu")) 2949 opts->affinity = PERF_AFFINITY_CPU; 2950 2951 return 0; 2952 } 2953 2954 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2955 { 2956 mask->nbits = nr_bits; 2957 mask->bits = bitmap_zalloc(mask->nbits); 2958 if (!mask->bits) 2959 return -ENOMEM; 2960 2961 return 0; 2962 } 2963 2964 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2965 { 2966 bitmap_free(mask->bits); 2967 mask->nbits = 0; 2968 } 2969 2970 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2971 { 2972 int ret; 2973 2974 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2975 if (ret) { 2976 mask->affinity.bits = NULL; 2977 return ret; 2978 } 2979 2980 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2981 if (ret) { 2982 record__mmap_cpu_mask_free(&mask->maps); 2983 mask->maps.bits = NULL; 2984 } 2985 2986 return ret; 2987 } 2988 2989 static void record__thread_mask_free(struct thread_mask *mask) 2990 { 2991 record__mmap_cpu_mask_free(&mask->maps); 2992 record__mmap_cpu_mask_free(&mask->affinity); 2993 } 2994 2995 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2996 { 2997 int s; 2998 struct record_opts *opts = opt->value; 2999 3000 if (unset || !str || !strlen(str)) { 3001 opts->threads_spec = THREAD_SPEC__CPU; 3002 } else { 3003 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3004 if (s == THREAD_SPEC__USER) { 3005 opts->threads_user_spec = strdup(str); 3006 if (!opts->threads_user_spec) 3007 return -ENOMEM; 3008 opts->threads_spec = THREAD_SPEC__USER; 3009 break; 3010 } 3011 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3012 opts->threads_spec = s; 3013 break; 3014 } 3015 } 3016 } 3017 3018 if (opts->threads_spec == THREAD_SPEC__USER) 3019 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3020 else 3021 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3022 3023 return 0; 3024 } 3025 3026 static int parse_output_max_size(const struct option *opt, 3027 const char *str, int unset) 3028 { 3029 unsigned long *s = (unsigned long *)opt->value; 3030 static struct parse_tag tags_size[] = { 3031 { .tag = 'B', .mult = 1 }, 3032 { .tag = 'K', .mult = 1 << 10 }, 3033 { .tag = 'M', .mult = 1 << 20 }, 3034 { .tag = 'G', .mult = 1 << 30 }, 3035 { .tag = 0 }, 3036 }; 3037 unsigned long val; 3038 3039 if (unset) { 3040 *s = 0; 3041 return 0; 3042 } 3043 3044 val = parse_tag_value(str, tags_size); 3045 if (val != (unsigned long) -1) { 3046 *s = val; 3047 return 0; 3048 } 3049 3050 return -1; 3051 } 3052 3053 static int record__parse_mmap_pages(const struct option *opt, 3054 const char *str, 3055 int unset __maybe_unused) 3056 { 3057 struct record_opts *opts = opt->value; 3058 char *s, *p; 3059 unsigned int mmap_pages; 3060 int ret; 3061 3062 if (!str) 3063 return -EINVAL; 3064 3065 s = strdup(str); 3066 if (!s) 3067 return -ENOMEM; 3068 3069 p = strchr(s, ','); 3070 if (p) 3071 *p = '\0'; 3072 3073 if (*s) { 3074 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3075 if (ret) 3076 goto out_free; 3077 opts->mmap_pages = mmap_pages; 3078 } 3079 3080 if (!p) { 3081 ret = 0; 3082 goto out_free; 3083 } 3084 3085 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3086 if (ret) 3087 goto out_free; 3088 3089 opts->auxtrace_mmap_pages = mmap_pages; 3090 3091 out_free: 3092 free(s); 3093 return ret; 3094 } 3095 3096 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3097 { 3098 } 3099 3100 static int parse_control_option(const struct option *opt, 3101 const char *str, 3102 int unset __maybe_unused) 3103 { 3104 struct record_opts *opts = opt->value; 3105 3106 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3107 } 3108 3109 static void switch_output_size_warn(struct record *rec) 3110 { 3111 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3112 struct switch_output *s = &rec->switch_output; 3113 3114 wakeup_size /= 2; 3115 3116 if (s->size < wakeup_size) { 3117 char buf[100]; 3118 3119 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3120 pr_warning("WARNING: switch-output data size lower than " 3121 "wakeup kernel buffer size (%s) " 3122 "expect bigger perf.data sizes\n", buf); 3123 } 3124 } 3125 3126 static int switch_output_setup(struct record *rec) 3127 { 3128 struct switch_output *s = &rec->switch_output; 3129 static struct parse_tag tags_size[] = { 3130 { .tag = 'B', .mult = 1 }, 3131 { .tag = 'K', .mult = 1 << 10 }, 3132 { .tag = 'M', .mult = 1 << 20 }, 3133 { .tag = 'G', .mult = 1 << 30 }, 3134 { .tag = 0 }, 3135 }; 3136 static struct parse_tag tags_time[] = { 3137 { .tag = 's', .mult = 1 }, 3138 { .tag = 'm', .mult = 60 }, 3139 { .tag = 'h', .mult = 60*60 }, 3140 { .tag = 'd', .mult = 60*60*24 }, 3141 { .tag = 0 }, 3142 }; 3143 unsigned long val; 3144 3145 /* 3146 * If we're using --switch-output-events, then we imply its 3147 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3148 * thread to its parent. 3149 */ 3150 if (rec->switch_output_event_set) { 3151 if (record__threads_enabled(rec)) { 3152 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3153 return 0; 3154 } 3155 goto do_signal; 3156 } 3157 3158 if (!s->set) 3159 return 0; 3160 3161 if (record__threads_enabled(rec)) { 3162 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3163 return 0; 3164 } 3165 3166 if (!strcmp(s->str, "signal")) { 3167 do_signal: 3168 s->signal = true; 3169 pr_debug("switch-output with SIGUSR2 signal\n"); 3170 goto enabled; 3171 } 3172 3173 val = parse_tag_value(s->str, tags_size); 3174 if (val != (unsigned long) -1) { 3175 s->size = val; 3176 pr_debug("switch-output with %s size threshold\n", s->str); 3177 goto enabled; 3178 } 3179 3180 val = parse_tag_value(s->str, tags_time); 3181 if (val != (unsigned long) -1) { 3182 s->time = val; 3183 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3184 s->str, s->time); 3185 goto enabled; 3186 } 3187 3188 return -1; 3189 3190 enabled: 3191 rec->timestamp_filename = true; 3192 s->enabled = true; 3193 3194 if (s->size && !rec->opts.no_buffering) 3195 switch_output_size_warn(rec); 3196 3197 return 0; 3198 } 3199 3200 static const char * const __record_usage[] = { 3201 "perf record [<options>] [<command>]", 3202 "perf record [<options>] -- <command> [<options>]", 3203 NULL 3204 }; 3205 const char * const *record_usage = __record_usage; 3206 3207 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3208 struct perf_sample *sample, struct machine *machine) 3209 { 3210 /* 3211 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3212 * no need to add them twice. 3213 */ 3214 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3215 return 0; 3216 return perf_event__process_mmap(tool, event, sample, machine); 3217 } 3218 3219 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3220 struct perf_sample *sample, struct machine *machine) 3221 { 3222 /* 3223 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3224 * no need to add them twice. 3225 */ 3226 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3227 return 0; 3228 3229 return perf_event__process_mmap2(tool, event, sample, machine); 3230 } 3231 3232 static int process_timestamp_boundary(struct perf_tool *tool, 3233 union perf_event *event __maybe_unused, 3234 struct perf_sample *sample, 3235 struct machine *machine __maybe_unused) 3236 { 3237 struct record *rec = container_of(tool, struct record, tool); 3238 3239 set_timestamp_boundary(rec, sample->time); 3240 return 0; 3241 } 3242 3243 static int parse_record_synth_option(const struct option *opt, 3244 const char *str, 3245 int unset __maybe_unused) 3246 { 3247 struct record_opts *opts = opt->value; 3248 char *p = strdup(str); 3249 3250 if (p == NULL) 3251 return -1; 3252 3253 opts->synth = parse_synth_opt(p); 3254 free(p); 3255 3256 if (opts->synth < 0) { 3257 pr_err("Invalid synth option: %s\n", str); 3258 return -1; 3259 } 3260 return 0; 3261 } 3262 3263 /* 3264 * XXX Ideally would be local to cmd_record() and passed to a record__new 3265 * because we need to have access to it in record__exit, that is called 3266 * after cmd_record() exits, but since record_options need to be accessible to 3267 * builtin-script, leave it here. 3268 * 3269 * At least we don't ouch it in all the other functions here directly. 3270 * 3271 * Just say no to tons of global variables, sigh. 3272 */ 3273 static struct record record = { 3274 .opts = { 3275 .sample_time = true, 3276 .mmap_pages = UINT_MAX, 3277 .user_freq = UINT_MAX, 3278 .user_interval = ULLONG_MAX, 3279 .freq = 4000, 3280 .target = { 3281 .uses_mmap = true, 3282 .default_per_cpu = true, 3283 }, 3284 .mmap_flush = MMAP_FLUSH_DEFAULT, 3285 .nr_threads_synthesize = 1, 3286 .ctl_fd = -1, 3287 .ctl_fd_ack = -1, 3288 .synth = PERF_SYNTH_ALL, 3289 }, 3290 .tool = { 3291 .sample = process_sample_event, 3292 .fork = perf_event__process_fork, 3293 .exit = perf_event__process_exit, 3294 .comm = perf_event__process_comm, 3295 .namespaces = perf_event__process_namespaces, 3296 .mmap = build_id__process_mmap, 3297 .mmap2 = build_id__process_mmap2, 3298 .itrace_start = process_timestamp_boundary, 3299 .aux = process_timestamp_boundary, 3300 .ordered_events = true, 3301 }, 3302 }; 3303 3304 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3305 "\n\t\t\t\tDefault: fp"; 3306 3307 static bool dry_run; 3308 3309 static struct parse_events_option_args parse_events_option_args = { 3310 .evlistp = &record.evlist, 3311 }; 3312 3313 static struct parse_events_option_args switch_output_parse_events_option_args = { 3314 .evlistp = &record.sb_evlist, 3315 }; 3316 3317 /* 3318 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3319 * with it and switch to use the library functions in perf_evlist that came 3320 * from builtin-record.c, i.e. use record_opts, 3321 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3322 * using pipes, etc. 3323 */ 3324 static struct option __record_options[] = { 3325 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3326 "event selector. use 'perf list' to list available events", 3327 parse_events_option), 3328 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3329 "event filter", parse_filter), 3330 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3331 NULL, "don't record events from perf itself", 3332 exclude_perf), 3333 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3334 "record events on existing process id"), 3335 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3336 "record events on existing thread id"), 3337 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3338 "collect data with this RT SCHED_FIFO priority"), 3339 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3340 "collect data without buffering"), 3341 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3342 "collect raw sample records from all opened counters"), 3343 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3344 "system-wide collection from all CPUs"), 3345 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3346 "list of cpus to monitor"), 3347 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3348 OPT_STRING('o', "output", &record.data.path, "file", 3349 "output file name"), 3350 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3351 &record.opts.no_inherit_set, 3352 "child tasks do not inherit counters"), 3353 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3354 "synthesize non-sample events at the end of output"), 3355 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3356 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3357 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3358 "Fail if the specified frequency can't be used"), 3359 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3360 "profile at this frequency", 3361 record__parse_freq), 3362 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3363 "number of mmap data pages and AUX area tracing mmap pages", 3364 record__parse_mmap_pages), 3365 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3366 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3367 record__mmap_flush_parse), 3368 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3369 NULL, "enables call-graph recording" , 3370 &record_callchain_opt), 3371 OPT_CALLBACK(0, "call-graph", &record.opts, 3372 "record_mode[,record_size]", record_callchain_help, 3373 &record_parse_callchain_opt), 3374 OPT_INCR('v', "verbose", &verbose, 3375 "be more verbose (show counter open errors, etc)"), 3376 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3377 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3378 "per thread counts"), 3379 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3380 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3381 "Record the sample physical addresses"), 3382 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3383 "Record the sampled data address data page size"), 3384 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3385 "Record the sampled code address (ip) page size"), 3386 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3387 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3388 "Record the sample identifier"), 3389 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3390 &record.opts.sample_time_set, 3391 "Record the sample timestamps"), 3392 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3393 "Record the sample period"), 3394 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3395 "don't sample"), 3396 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3397 &record.no_buildid_cache_set, 3398 "do not update the buildid cache"), 3399 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3400 &record.no_buildid_set, 3401 "do not collect buildids in perf.data"), 3402 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3403 "monitor event in cgroup name only", 3404 parse_cgroups), 3405 OPT_CALLBACK('D', "delay", &record, "ms", 3406 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3407 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3408 record__parse_event_enable_time), 3409 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3410 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3411 "user to profile"), 3412 3413 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3414 "branch any", "sample any taken branches", 3415 parse_branch_stack), 3416 3417 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3418 "branch filter mask", "branch stack filter modes", 3419 parse_branch_stack), 3420 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3421 "sample by weight (on special events only)"), 3422 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3423 "sample transaction flags (special events only)"), 3424 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3425 "use per-thread mmaps"), 3426 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3427 "sample selected machine registers on interrupt," 3428 " use '-I?' to list register names", parse_intr_regs), 3429 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3430 "sample selected machine registers on interrupt," 3431 " use '--user-regs=?' to list register names", parse_user_regs), 3432 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3433 "Record running/enabled time of read (:S) events"), 3434 OPT_CALLBACK('k', "clockid", &record.opts, 3435 "clockid", "clockid to use for events, see clock_gettime()", 3436 parse_clockid), 3437 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3438 "opts", "AUX area tracing Snapshot Mode", ""), 3439 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3440 "opts", "sample AUX area", ""), 3441 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3442 "per thread proc mmap processing timeout in ms"), 3443 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3444 "Record namespaces events"), 3445 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3446 "Record cgroup events"), 3447 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3448 &record.opts.record_switch_events_set, 3449 "Record context switch events"), 3450 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3451 "Configure all used events to run in kernel space.", 3452 PARSE_OPT_EXCLUSIVE), 3453 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3454 "Configure all used events to run in user space.", 3455 PARSE_OPT_EXCLUSIVE), 3456 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3457 "collect kernel callchains"), 3458 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3459 "collect user callchains"), 3460 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3461 "file", "vmlinux pathname"), 3462 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3463 "Record build-id of all DSOs regardless of hits"), 3464 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3465 "Record build-id in map events"), 3466 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3467 "append timestamp to output filename"), 3468 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3469 "Record timestamp boundary (time of first/last samples)"), 3470 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3471 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3472 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3473 "signal"), 3474 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3475 &record.switch_output_event_set, "switch output event", 3476 "switch output event selector. use 'perf list' to list available events", 3477 parse_events_option_new_evlist), 3478 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3479 "Limit number of switch output generated files"), 3480 OPT_BOOLEAN(0, "dry-run", &dry_run, 3481 "Parse options then exit"), 3482 #ifdef HAVE_AIO_SUPPORT 3483 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3484 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3485 record__aio_parse), 3486 #endif 3487 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3488 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3489 record__parse_affinity), 3490 #ifdef HAVE_ZSTD_SUPPORT 3491 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3492 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3493 record__parse_comp_level), 3494 #endif 3495 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3496 "size", "Limit the maximum size of the output file", parse_output_max_size), 3497 OPT_UINTEGER(0, "num-thread-synthesize", 3498 &record.opts.nr_threads_synthesize, 3499 "number of threads to run for event synthesis"), 3500 #ifdef HAVE_LIBPFM 3501 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3502 "libpfm4 event selector. use 'perf list' to list available events", 3503 parse_libpfm_events_option), 3504 #endif 3505 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3506 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3507 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3508 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3509 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3510 parse_control_option), 3511 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3512 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3513 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3514 &record.debuginfod.set, "debuginfod urls", 3515 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3516 "system"), 3517 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3518 "write collected trace data into several data files using parallel threads", 3519 record__parse_threads), 3520 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3521 OPT_END() 3522 }; 3523 3524 struct option *record_options = __record_options; 3525 3526 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3527 { 3528 struct perf_cpu cpu; 3529 int idx; 3530 3531 if (cpu_map__is_dummy(cpus)) 3532 return 0; 3533 3534 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3535 if (cpu.cpu == -1) 3536 continue; 3537 /* Return ENODEV is input cpu is greater than max cpu */ 3538 if ((unsigned long)cpu.cpu > mask->nbits) 3539 return -ENODEV; 3540 __set_bit(cpu.cpu, mask->bits); 3541 } 3542 3543 return 0; 3544 } 3545 3546 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3547 { 3548 struct perf_cpu_map *cpus; 3549 3550 cpus = perf_cpu_map__new(mask_spec); 3551 if (!cpus) 3552 return -ENOMEM; 3553 3554 bitmap_zero(mask->bits, mask->nbits); 3555 if (record__mmap_cpu_mask_init(mask, cpus)) 3556 return -ENODEV; 3557 3558 perf_cpu_map__put(cpus); 3559 3560 return 0; 3561 } 3562 3563 static void record__free_thread_masks(struct record *rec, int nr_threads) 3564 { 3565 int t; 3566 3567 if (rec->thread_masks) 3568 for (t = 0; t < nr_threads; t++) 3569 record__thread_mask_free(&rec->thread_masks[t]); 3570 3571 zfree(&rec->thread_masks); 3572 } 3573 3574 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3575 { 3576 int t, ret; 3577 3578 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3579 if (!rec->thread_masks) { 3580 pr_err("Failed to allocate thread masks\n"); 3581 return -ENOMEM; 3582 } 3583 3584 for (t = 0; t < nr_threads; t++) { 3585 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3586 if (ret) { 3587 pr_err("Failed to allocate thread masks[%d]\n", t); 3588 goto out_free; 3589 } 3590 } 3591 3592 return 0; 3593 3594 out_free: 3595 record__free_thread_masks(rec, nr_threads); 3596 3597 return ret; 3598 } 3599 3600 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3601 { 3602 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3603 3604 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3605 if (ret) 3606 return ret; 3607 3608 rec->nr_threads = nr_cpus; 3609 pr_debug("nr_threads: %d\n", rec->nr_threads); 3610 3611 for (t = 0; t < rec->nr_threads; t++) { 3612 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3613 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3614 if (verbose > 0) { 3615 pr_debug("thread_masks[%d]: ", t); 3616 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3617 pr_debug("thread_masks[%d]: ", t); 3618 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3619 } 3620 } 3621 3622 return 0; 3623 } 3624 3625 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3626 const char **maps_spec, const char **affinity_spec, 3627 u32 nr_spec) 3628 { 3629 u32 s; 3630 int ret = 0, t = 0; 3631 struct mmap_cpu_mask cpus_mask; 3632 struct thread_mask thread_mask, full_mask, *thread_masks; 3633 3634 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3635 if (ret) { 3636 pr_err("Failed to allocate CPUs mask\n"); 3637 return ret; 3638 } 3639 3640 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3641 if (ret) { 3642 pr_err("Failed to init cpu mask\n"); 3643 goto out_free_cpu_mask; 3644 } 3645 3646 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3647 if (ret) { 3648 pr_err("Failed to allocate full mask\n"); 3649 goto out_free_cpu_mask; 3650 } 3651 3652 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3653 if (ret) { 3654 pr_err("Failed to allocate thread mask\n"); 3655 goto out_free_full_and_cpu_masks; 3656 } 3657 3658 for (s = 0; s < nr_spec; s++) { 3659 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3660 if (ret) { 3661 pr_err("Failed to initialize maps thread mask\n"); 3662 goto out_free; 3663 } 3664 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3665 if (ret) { 3666 pr_err("Failed to initialize affinity thread mask\n"); 3667 goto out_free; 3668 } 3669 3670 /* ignore invalid CPUs but do not allow empty masks */ 3671 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3672 cpus_mask.bits, thread_mask.maps.nbits)) { 3673 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3674 ret = -EINVAL; 3675 goto out_free; 3676 } 3677 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3678 cpus_mask.bits, thread_mask.affinity.nbits)) { 3679 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3680 ret = -EINVAL; 3681 goto out_free; 3682 } 3683 3684 /* do not allow intersection with other masks (full_mask) */ 3685 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3686 thread_mask.maps.nbits)) { 3687 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3688 ret = -EINVAL; 3689 goto out_free; 3690 } 3691 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3692 thread_mask.affinity.nbits)) { 3693 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3694 ret = -EINVAL; 3695 goto out_free; 3696 } 3697 3698 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3699 thread_mask.maps.bits, full_mask.maps.nbits); 3700 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3701 thread_mask.affinity.bits, full_mask.maps.nbits); 3702 3703 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3704 if (!thread_masks) { 3705 pr_err("Failed to reallocate thread masks\n"); 3706 ret = -ENOMEM; 3707 goto out_free; 3708 } 3709 rec->thread_masks = thread_masks; 3710 rec->thread_masks[t] = thread_mask; 3711 if (verbose > 0) { 3712 pr_debug("thread_masks[%d]: ", t); 3713 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3714 pr_debug("thread_masks[%d]: ", t); 3715 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3716 } 3717 t++; 3718 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3719 if (ret) { 3720 pr_err("Failed to allocate thread mask\n"); 3721 goto out_free_full_and_cpu_masks; 3722 } 3723 } 3724 rec->nr_threads = t; 3725 pr_debug("nr_threads: %d\n", rec->nr_threads); 3726 if (!rec->nr_threads) 3727 ret = -EINVAL; 3728 3729 out_free: 3730 record__thread_mask_free(&thread_mask); 3731 out_free_full_and_cpu_masks: 3732 record__thread_mask_free(&full_mask); 3733 out_free_cpu_mask: 3734 record__mmap_cpu_mask_free(&cpus_mask); 3735 3736 return ret; 3737 } 3738 3739 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3740 { 3741 int ret; 3742 struct cpu_topology *topo; 3743 3744 topo = cpu_topology__new(); 3745 if (!topo) { 3746 pr_err("Failed to allocate CPU topology\n"); 3747 return -ENOMEM; 3748 } 3749 3750 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3751 topo->core_cpus_list, topo->core_cpus_lists); 3752 cpu_topology__delete(topo); 3753 3754 return ret; 3755 } 3756 3757 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3758 { 3759 int ret; 3760 struct cpu_topology *topo; 3761 3762 topo = cpu_topology__new(); 3763 if (!topo) { 3764 pr_err("Failed to allocate CPU topology\n"); 3765 return -ENOMEM; 3766 } 3767 3768 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3769 topo->package_cpus_list, topo->package_cpus_lists); 3770 cpu_topology__delete(topo); 3771 3772 return ret; 3773 } 3774 3775 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3776 { 3777 u32 s; 3778 int ret; 3779 const char **spec; 3780 struct numa_topology *topo; 3781 3782 topo = numa_topology__new(); 3783 if (!topo) { 3784 pr_err("Failed to allocate NUMA topology\n"); 3785 return -ENOMEM; 3786 } 3787 3788 spec = zalloc(topo->nr * sizeof(char *)); 3789 if (!spec) { 3790 pr_err("Failed to allocate NUMA spec\n"); 3791 ret = -ENOMEM; 3792 goto out_delete_topo; 3793 } 3794 for (s = 0; s < topo->nr; s++) 3795 spec[s] = topo->nodes[s].cpus; 3796 3797 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3798 3799 zfree(&spec); 3800 3801 out_delete_topo: 3802 numa_topology__delete(topo); 3803 3804 return ret; 3805 } 3806 3807 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3808 { 3809 int t, ret; 3810 u32 s, nr_spec = 0; 3811 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3812 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3813 3814 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3815 spec = strtok_r(user_spec, ":", &spec_ptr); 3816 if (spec == NULL) 3817 break; 3818 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3819 mask = strtok_r(spec, "/", &mask_ptr); 3820 if (mask == NULL) 3821 break; 3822 pr_debug2(" maps mask: %s\n", mask); 3823 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3824 if (!tmp_spec) { 3825 pr_err("Failed to reallocate maps spec\n"); 3826 ret = -ENOMEM; 3827 goto out_free; 3828 } 3829 maps_spec = tmp_spec; 3830 maps_spec[nr_spec] = dup_mask = strdup(mask); 3831 if (!maps_spec[nr_spec]) { 3832 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3833 ret = -ENOMEM; 3834 goto out_free; 3835 } 3836 mask = strtok_r(NULL, "/", &mask_ptr); 3837 if (mask == NULL) { 3838 pr_err("Invalid thread maps or affinity specs\n"); 3839 ret = -EINVAL; 3840 goto out_free; 3841 } 3842 pr_debug2(" affinity mask: %s\n", mask); 3843 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3844 if (!tmp_spec) { 3845 pr_err("Failed to reallocate affinity spec\n"); 3846 ret = -ENOMEM; 3847 goto out_free; 3848 } 3849 affinity_spec = tmp_spec; 3850 affinity_spec[nr_spec] = strdup(mask); 3851 if (!affinity_spec[nr_spec]) { 3852 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3853 ret = -ENOMEM; 3854 goto out_free; 3855 } 3856 dup_mask = NULL; 3857 nr_spec++; 3858 } 3859 3860 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3861 (const char **)affinity_spec, nr_spec); 3862 3863 out_free: 3864 free(dup_mask); 3865 for (s = 0; s < nr_spec; s++) { 3866 if (maps_spec) 3867 free(maps_spec[s]); 3868 if (affinity_spec) 3869 free(affinity_spec[s]); 3870 } 3871 free(affinity_spec); 3872 free(maps_spec); 3873 3874 return ret; 3875 } 3876 3877 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3878 { 3879 int ret; 3880 3881 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3882 if (ret) 3883 return ret; 3884 3885 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3886 return -ENODEV; 3887 3888 rec->nr_threads = 1; 3889 3890 return 0; 3891 } 3892 3893 static int record__init_thread_masks(struct record *rec) 3894 { 3895 int ret = 0; 3896 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3897 3898 if (!record__threads_enabled(rec)) 3899 return record__init_thread_default_masks(rec, cpus); 3900 3901 if (evlist__per_thread(rec->evlist)) { 3902 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3903 return -EINVAL; 3904 } 3905 3906 switch (rec->opts.threads_spec) { 3907 case THREAD_SPEC__CPU: 3908 ret = record__init_thread_cpu_masks(rec, cpus); 3909 break; 3910 case THREAD_SPEC__CORE: 3911 ret = record__init_thread_core_masks(rec, cpus); 3912 break; 3913 case THREAD_SPEC__PACKAGE: 3914 ret = record__init_thread_package_masks(rec, cpus); 3915 break; 3916 case THREAD_SPEC__NUMA: 3917 ret = record__init_thread_numa_masks(rec, cpus); 3918 break; 3919 case THREAD_SPEC__USER: 3920 ret = record__init_thread_user_masks(rec, cpus); 3921 break; 3922 default: 3923 break; 3924 } 3925 3926 return ret; 3927 } 3928 3929 int cmd_record(int argc, const char **argv) 3930 { 3931 int err; 3932 struct record *rec = &record; 3933 char errbuf[BUFSIZ]; 3934 3935 setlocale(LC_ALL, ""); 3936 3937 #ifndef HAVE_BPF_SKEL 3938 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3939 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3940 # undef set_nobuild 3941 #endif 3942 3943 /* Disable eager loading of kernel symbols that adds overhead to perf record. */ 3944 symbol_conf.lazy_load_kernel_maps = true; 3945 rec->opts.affinity = PERF_AFFINITY_SYS; 3946 3947 rec->evlist = evlist__new(); 3948 if (rec->evlist == NULL) 3949 return -ENOMEM; 3950 3951 err = perf_config(perf_record_config, rec); 3952 if (err) 3953 return err; 3954 3955 argc = parse_options(argc, argv, record_options, record_usage, 3956 PARSE_OPT_STOP_AT_NON_OPTION); 3957 if (quiet) 3958 perf_quiet_option(); 3959 3960 err = symbol__validate_sym_arguments(); 3961 if (err) 3962 return err; 3963 3964 perf_debuginfod_setup(&record.debuginfod); 3965 3966 /* Make system wide (-a) the default target. */ 3967 if (!argc && target__none(&rec->opts.target)) 3968 rec->opts.target.system_wide = true; 3969 3970 if (nr_cgroups && !rec->opts.target.system_wide) { 3971 usage_with_options_msg(record_usage, record_options, 3972 "cgroup monitoring only available in system-wide mode"); 3973 3974 } 3975 3976 if (rec->buildid_mmap) { 3977 if (!perf_can_record_build_id()) { 3978 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3979 err = -EINVAL; 3980 goto out_opts; 3981 } 3982 pr_debug("Enabling build id in mmap2 events.\n"); 3983 /* Enable mmap build id synthesizing. */ 3984 symbol_conf.buildid_mmap2 = true; 3985 /* Enable perf_event_attr::build_id bit. */ 3986 rec->opts.build_id = true; 3987 /* Disable build id cache. */ 3988 rec->no_buildid = true; 3989 } 3990 3991 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3992 pr_err("Kernel has no cgroup sampling support.\n"); 3993 err = -EINVAL; 3994 goto out_opts; 3995 } 3996 3997 if (rec->opts.kcore) 3998 rec->opts.text_poke = true; 3999 4000 if (rec->opts.kcore || record__threads_enabled(rec)) 4001 rec->data.is_dir = true; 4002 4003 if (record__threads_enabled(rec)) { 4004 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4005 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4006 goto out_opts; 4007 } 4008 if (record__aio_enabled(rec)) { 4009 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4010 goto out_opts; 4011 } 4012 } 4013 4014 if (rec->opts.comp_level != 0) { 4015 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4016 rec->no_buildid = true; 4017 } 4018 4019 if (rec->opts.record_switch_events && 4020 !perf_can_record_switch_events()) { 4021 ui__error("kernel does not support recording context switch events\n"); 4022 parse_options_usage(record_usage, record_options, "switch-events", 0); 4023 err = -EINVAL; 4024 goto out_opts; 4025 } 4026 4027 if (switch_output_setup(rec)) { 4028 parse_options_usage(record_usage, record_options, "switch-output", 0); 4029 err = -EINVAL; 4030 goto out_opts; 4031 } 4032 4033 if (rec->switch_output.time) { 4034 signal(SIGALRM, alarm_sig_handler); 4035 alarm(rec->switch_output.time); 4036 } 4037 4038 if (rec->switch_output.num_files) { 4039 rec->switch_output.filenames = calloc(sizeof(char *), 4040 rec->switch_output.num_files); 4041 if (!rec->switch_output.filenames) { 4042 err = -EINVAL; 4043 goto out_opts; 4044 } 4045 } 4046 4047 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4048 rec->timestamp_filename = false; 4049 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4050 } 4051 4052 /* 4053 * Allow aliases to facilitate the lookup of symbols for address 4054 * filters. Refer to auxtrace_parse_filters(). 4055 */ 4056 symbol_conf.allow_aliases = true; 4057 4058 symbol__init(NULL); 4059 4060 err = record__auxtrace_init(rec); 4061 if (err) 4062 goto out; 4063 4064 if (dry_run) 4065 goto out; 4066 4067 err = -ENOMEM; 4068 4069 if (rec->no_buildid_cache || rec->no_buildid) { 4070 disable_buildid_cache(); 4071 } else if (rec->switch_output.enabled) { 4072 /* 4073 * In 'perf record --switch-output', disable buildid 4074 * generation by default to reduce data file switching 4075 * overhead. Still generate buildid if they are required 4076 * explicitly using 4077 * 4078 * perf record --switch-output --no-no-buildid \ 4079 * --no-no-buildid-cache 4080 * 4081 * Following code equals to: 4082 * 4083 * if ((rec->no_buildid || !rec->no_buildid_set) && 4084 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4085 * disable_buildid_cache(); 4086 */ 4087 bool disable = true; 4088 4089 if (rec->no_buildid_set && !rec->no_buildid) 4090 disable = false; 4091 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4092 disable = false; 4093 if (disable) { 4094 rec->no_buildid = true; 4095 rec->no_buildid_cache = true; 4096 disable_buildid_cache(); 4097 } 4098 } 4099 4100 if (record.opts.overwrite) 4101 record.opts.tail_synthesize = true; 4102 4103 if (rec->evlist->core.nr_entries == 0) { 4104 bool can_profile_kernel = perf_event_paranoid_check(1); 4105 4106 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); 4107 if (err) 4108 goto out; 4109 } 4110 4111 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4112 rec->opts.no_inherit = true; 4113 4114 err = target__validate(&rec->opts.target); 4115 if (err) { 4116 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4117 ui__warning("%s\n", errbuf); 4118 } 4119 4120 err = target__parse_uid(&rec->opts.target); 4121 if (err) { 4122 int saved_errno = errno; 4123 4124 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4125 ui__error("%s", errbuf); 4126 4127 err = -saved_errno; 4128 goto out; 4129 } 4130 4131 /* Enable ignoring missing threads when -u/-p option is defined. */ 4132 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4133 4134 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4135 4136 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4137 arch__add_leaf_frame_record_opts(&rec->opts); 4138 4139 err = -ENOMEM; 4140 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4141 if (rec->opts.target.pid != NULL) { 4142 pr_err("Couldn't create thread/CPU maps: %s\n", 4143 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4144 goto out; 4145 } 4146 else 4147 usage_with_options(record_usage, record_options); 4148 } 4149 4150 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4151 if (err) 4152 goto out; 4153 4154 /* 4155 * We take all buildids when the file contains 4156 * AUX area tracing data because we do not decode the 4157 * trace because it would take too long. 4158 */ 4159 if (rec->opts.full_auxtrace) 4160 rec->buildid_all = true; 4161 4162 if (rec->opts.text_poke) { 4163 err = record__config_text_poke(rec->evlist); 4164 if (err) { 4165 pr_err("record__config_text_poke failed, error %d\n", err); 4166 goto out; 4167 } 4168 } 4169 4170 if (rec->off_cpu) { 4171 err = record__config_off_cpu(rec); 4172 if (err) { 4173 pr_err("record__config_off_cpu failed, error %d\n", err); 4174 goto out; 4175 } 4176 } 4177 4178 if (record_opts__config(&rec->opts)) { 4179 err = -EINVAL; 4180 goto out; 4181 } 4182 4183 err = record__config_tracking_events(rec); 4184 if (err) { 4185 pr_err("record__config_tracking_events failed, error %d\n", err); 4186 goto out; 4187 } 4188 4189 err = record__init_thread_masks(rec); 4190 if (err) { 4191 pr_err("Failed to initialize parallel data streaming masks\n"); 4192 goto out; 4193 } 4194 4195 if (rec->opts.nr_cblocks > nr_cblocks_max) 4196 rec->opts.nr_cblocks = nr_cblocks_max; 4197 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4198 4199 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4200 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4201 4202 if (rec->opts.comp_level > comp_level_max) 4203 rec->opts.comp_level = comp_level_max; 4204 pr_debug("comp level: %d\n", rec->opts.comp_level); 4205 4206 err = __cmd_record(&record, argc, argv); 4207 out: 4208 evlist__delete(rec->evlist); 4209 symbol__exit(); 4210 auxtrace_record__free(rec->itr); 4211 out_opts: 4212 record__free_thread_masks(rec, rec->nr_threads); 4213 rec->nr_threads = 0; 4214 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4215 return err; 4216 } 4217 4218 static void snapshot_sig_handler(int sig __maybe_unused) 4219 { 4220 struct record *rec = &record; 4221 4222 hit_auxtrace_snapshot_trigger(rec); 4223 4224 if (switch_output_signal(rec)) 4225 trigger_hit(&switch_output_trigger); 4226 } 4227 4228 static void alarm_sig_handler(int sig __maybe_unused) 4229 { 4230 struct record *rec = &record; 4231 4232 if (switch_output_time(rec)) 4233 trigger_hit(&switch_output_trigger); 4234 } 4235