1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/llvm-utils.h" 41 #include "util/bpf-loader.h" 42 #include "util/trigger.h" 43 #include "util/perf-hooks.h" 44 #include "util/cpu-set-sched.h" 45 #include "util/synthetic-events.h" 46 #include "util/time-utils.h" 47 #include "util/units.h" 48 #include "util/bpf-event.h" 49 #include "util/util.h" 50 #include "util/pfm.h" 51 #include "util/clockid.h" 52 #include "util/pmu-hybrid.h" 53 #include "util/evlist-hybrid.h" 54 #include "util/off_cpu.h" 55 #include "asm/bug.h" 56 #include "perf.h" 57 #include "cputopo.h" 58 59 #include <errno.h> 60 #include <inttypes.h> 61 #include <locale.h> 62 #include <poll.h> 63 #include <pthread.h> 64 #include <unistd.h> 65 #ifndef HAVE_GETTID 66 #include <syscall.h> 67 #endif 68 #include <sched.h> 69 #include <signal.h> 70 #ifdef HAVE_EVENTFD_SUPPORT 71 #include <sys/eventfd.h> 72 #endif 73 #include <sys/mman.h> 74 #include <sys/wait.h> 75 #include <sys/types.h> 76 #include <sys/stat.h> 77 #include <fcntl.h> 78 #include <linux/err.h> 79 #include <linux/string.h> 80 #include <linux/time64.h> 81 #include <linux/zalloc.h> 82 #include <linux/bitmap.h> 83 #include <sys/time.h> 84 85 struct switch_output { 86 bool enabled; 87 bool signal; 88 unsigned long size; 89 unsigned long time; 90 const char *str; 91 bool set; 92 char **filenames; 93 int num_files; 94 int cur_file; 95 }; 96 97 struct thread_mask { 98 struct mmap_cpu_mask maps; 99 struct mmap_cpu_mask affinity; 100 }; 101 102 struct record_thread { 103 pid_t tid; 104 struct thread_mask *mask; 105 struct { 106 int msg[2]; 107 int ack[2]; 108 } pipes; 109 struct fdarray pollfd; 110 int ctlfd_pos; 111 int nr_mmaps; 112 struct mmap **maps; 113 struct mmap **overwrite_maps; 114 struct record *rec; 115 unsigned long long samples; 116 unsigned long waking; 117 u64 bytes_written; 118 u64 bytes_transferred; 119 u64 bytes_compressed; 120 }; 121 122 static __thread struct record_thread *thread; 123 124 enum thread_msg { 125 THREAD_MSG__UNDEFINED = 0, 126 THREAD_MSG__READY, 127 THREAD_MSG__MAX, 128 }; 129 130 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 131 "UNDEFINED", "READY" 132 }; 133 134 enum thread_spec { 135 THREAD_SPEC__UNDEFINED = 0, 136 THREAD_SPEC__CPU, 137 THREAD_SPEC__CORE, 138 THREAD_SPEC__PACKAGE, 139 THREAD_SPEC__NUMA, 140 THREAD_SPEC__USER, 141 THREAD_SPEC__MAX, 142 }; 143 144 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 145 "undefined", "cpu", "core", "package", "numa", "user" 146 }; 147 148 struct pollfd_index_map { 149 int evlist_pollfd_index; 150 int thread_pollfd_index; 151 }; 152 153 struct record { 154 struct perf_tool tool; 155 struct record_opts opts; 156 u64 bytes_written; 157 u64 thread_bytes_written; 158 struct perf_data data; 159 struct auxtrace_record *itr; 160 struct evlist *evlist; 161 struct perf_session *session; 162 struct evlist *sb_evlist; 163 pthread_t thread_id; 164 int realtime_prio; 165 bool switch_output_event_set; 166 bool no_buildid; 167 bool no_buildid_set; 168 bool no_buildid_cache; 169 bool no_buildid_cache_set; 170 bool buildid_all; 171 bool buildid_mmap; 172 bool timestamp_filename; 173 bool timestamp_boundary; 174 bool off_cpu; 175 struct switch_output switch_output; 176 unsigned long long samples; 177 unsigned long output_max_size; /* = 0: unlimited */ 178 struct perf_debuginfod debuginfod; 179 int nr_threads; 180 struct thread_mask *thread_masks; 181 struct record_thread *thread_data; 182 struct pollfd_index_map *index_map; 183 size_t index_map_sz; 184 size_t index_map_cnt; 185 }; 186 187 static volatile int done; 188 189 static volatile int auxtrace_record__snapshot_started; 190 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 191 static DEFINE_TRIGGER(switch_output_trigger); 192 193 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 194 "SYS", "NODE", "CPU" 195 }; 196 197 #ifndef HAVE_GETTID 198 static inline pid_t gettid(void) 199 { 200 return (pid_t)syscall(__NR_gettid); 201 } 202 #endif 203 204 static int record__threads_enabled(struct record *rec) 205 { 206 return rec->opts.threads_spec; 207 } 208 209 static bool switch_output_signal(struct record *rec) 210 { 211 return rec->switch_output.signal && 212 trigger_is_ready(&switch_output_trigger); 213 } 214 215 static bool switch_output_size(struct record *rec) 216 { 217 return rec->switch_output.size && 218 trigger_is_ready(&switch_output_trigger) && 219 (rec->bytes_written >= rec->switch_output.size); 220 } 221 222 static bool switch_output_time(struct record *rec) 223 { 224 return rec->switch_output.time && 225 trigger_is_ready(&switch_output_trigger); 226 } 227 228 static u64 record__bytes_written(struct record *rec) 229 { 230 return rec->bytes_written + rec->thread_bytes_written; 231 } 232 233 static bool record__output_max_size_exceeded(struct record *rec) 234 { 235 return rec->output_max_size && 236 (record__bytes_written(rec) >= rec->output_max_size); 237 } 238 239 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 240 void *bf, size_t size) 241 { 242 struct perf_data_file *file = &rec->session->data->file; 243 244 if (map && map->file) 245 file = map->file; 246 247 if (perf_data_file__write(file, bf, size) < 0) { 248 pr_err("failed to write perf data, error: %m\n"); 249 return -1; 250 } 251 252 if (map && map->file) { 253 thread->bytes_written += size; 254 rec->thread_bytes_written += size; 255 } else { 256 rec->bytes_written += size; 257 } 258 259 if (record__output_max_size_exceeded(rec) && !done) { 260 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 261 " stopping session ]\n", 262 record__bytes_written(rec) >> 10); 263 done = 1; 264 } 265 266 if (switch_output_size(rec)) 267 trigger_hit(&switch_output_trigger); 268 269 return 0; 270 } 271 272 static int record__aio_enabled(struct record *rec); 273 static int record__comp_enabled(struct record *rec); 274 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 275 void *dst, size_t dst_size, void *src, size_t src_size); 276 277 #ifdef HAVE_AIO_SUPPORT 278 static int record__aio_write(struct aiocb *cblock, int trace_fd, 279 void *buf, size_t size, off_t off) 280 { 281 int rc; 282 283 cblock->aio_fildes = trace_fd; 284 cblock->aio_buf = buf; 285 cblock->aio_nbytes = size; 286 cblock->aio_offset = off; 287 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 288 289 do { 290 rc = aio_write(cblock); 291 if (rc == 0) { 292 break; 293 } else if (errno != EAGAIN) { 294 cblock->aio_fildes = -1; 295 pr_err("failed to queue perf data, error: %m\n"); 296 break; 297 } 298 } while (1); 299 300 return rc; 301 } 302 303 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 304 { 305 void *rem_buf; 306 off_t rem_off; 307 size_t rem_size; 308 int rc, aio_errno; 309 ssize_t aio_ret, written; 310 311 aio_errno = aio_error(cblock); 312 if (aio_errno == EINPROGRESS) 313 return 0; 314 315 written = aio_ret = aio_return(cblock); 316 if (aio_ret < 0) { 317 if (aio_errno != EINTR) 318 pr_err("failed to write perf data, error: %m\n"); 319 written = 0; 320 } 321 322 rem_size = cblock->aio_nbytes - written; 323 324 if (rem_size == 0) { 325 cblock->aio_fildes = -1; 326 /* 327 * md->refcount is incremented in record__aio_pushfn() for 328 * every aio write request started in record__aio_push() so 329 * decrement it because the request is now complete. 330 */ 331 perf_mmap__put(&md->core); 332 rc = 1; 333 } else { 334 /* 335 * aio write request may require restart with the 336 * reminder if the kernel didn't write whole 337 * chunk at once. 338 */ 339 rem_off = cblock->aio_offset + written; 340 rem_buf = (void *)(cblock->aio_buf + written); 341 record__aio_write(cblock, cblock->aio_fildes, 342 rem_buf, rem_size, rem_off); 343 rc = 0; 344 } 345 346 return rc; 347 } 348 349 static int record__aio_sync(struct mmap *md, bool sync_all) 350 { 351 struct aiocb **aiocb = md->aio.aiocb; 352 struct aiocb *cblocks = md->aio.cblocks; 353 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 354 int i, do_suspend; 355 356 do { 357 do_suspend = 0; 358 for (i = 0; i < md->aio.nr_cblocks; ++i) { 359 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 360 if (sync_all) 361 aiocb[i] = NULL; 362 else 363 return i; 364 } else { 365 /* 366 * Started aio write is not complete yet 367 * so it has to be waited before the 368 * next allocation. 369 */ 370 aiocb[i] = &cblocks[i]; 371 do_suspend = 1; 372 } 373 } 374 if (!do_suspend) 375 return -1; 376 377 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 378 if (!(errno == EAGAIN || errno == EINTR)) 379 pr_err("failed to sync perf data, error: %m\n"); 380 } 381 } while (1); 382 } 383 384 struct record_aio { 385 struct record *rec; 386 void *data; 387 size_t size; 388 }; 389 390 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 391 { 392 struct record_aio *aio = to; 393 394 /* 395 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 396 * to release space in the kernel buffer as fast as possible, calling 397 * perf_mmap__consume() from perf_mmap__push() function. 398 * 399 * That lets the kernel to proceed with storing more profiling data into 400 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 401 * 402 * Coping can be done in two steps in case the chunk of profiling data 403 * crosses the upper bound of the kernel buffer. In this case we first move 404 * part of data from map->start till the upper bound and then the reminder 405 * from the beginning of the kernel buffer till the end of the data chunk. 406 */ 407 408 if (record__comp_enabled(aio->rec)) { 409 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 410 mmap__mmap_len(map) - aio->size, 411 buf, size); 412 } else { 413 memcpy(aio->data + aio->size, buf, size); 414 } 415 416 if (!aio->size) { 417 /* 418 * Increment map->refcount to guard map->aio.data[] buffer 419 * from premature deallocation because map object can be 420 * released earlier than aio write request started on 421 * map->aio.data[] buffer is complete. 422 * 423 * perf_mmap__put() is done at record__aio_complete() 424 * after started aio request completion or at record__aio_push() 425 * if the request failed to start. 426 */ 427 perf_mmap__get(&map->core); 428 } 429 430 aio->size += size; 431 432 return size; 433 } 434 435 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 436 { 437 int ret, idx; 438 int trace_fd = rec->session->data->file.fd; 439 struct record_aio aio = { .rec = rec, .size = 0 }; 440 441 /* 442 * Call record__aio_sync() to wait till map->aio.data[] buffer 443 * becomes available after previous aio write operation. 444 */ 445 446 idx = record__aio_sync(map, false); 447 aio.data = map->aio.data[idx]; 448 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 449 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 450 return ret; 451 452 rec->samples++; 453 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 454 if (!ret) { 455 *off += aio.size; 456 rec->bytes_written += aio.size; 457 if (switch_output_size(rec)) 458 trigger_hit(&switch_output_trigger); 459 } else { 460 /* 461 * Decrement map->refcount incremented in record__aio_pushfn() 462 * back if record__aio_write() operation failed to start, otherwise 463 * map->refcount is decremented in record__aio_complete() after 464 * aio write operation finishes successfully. 465 */ 466 perf_mmap__put(&map->core); 467 } 468 469 return ret; 470 } 471 472 static off_t record__aio_get_pos(int trace_fd) 473 { 474 return lseek(trace_fd, 0, SEEK_CUR); 475 } 476 477 static void record__aio_set_pos(int trace_fd, off_t pos) 478 { 479 lseek(trace_fd, pos, SEEK_SET); 480 } 481 482 static void record__aio_mmap_read_sync(struct record *rec) 483 { 484 int i; 485 struct evlist *evlist = rec->evlist; 486 struct mmap *maps = evlist->mmap; 487 488 if (!record__aio_enabled(rec)) 489 return; 490 491 for (i = 0; i < evlist->core.nr_mmaps; i++) { 492 struct mmap *map = &maps[i]; 493 494 if (map->core.base) 495 record__aio_sync(map, true); 496 } 497 } 498 499 static int nr_cblocks_default = 1; 500 static int nr_cblocks_max = 4; 501 502 static int record__aio_parse(const struct option *opt, 503 const char *str, 504 int unset) 505 { 506 struct record_opts *opts = (struct record_opts *)opt->value; 507 508 if (unset) { 509 opts->nr_cblocks = 0; 510 } else { 511 if (str) 512 opts->nr_cblocks = strtol(str, NULL, 0); 513 if (!opts->nr_cblocks) 514 opts->nr_cblocks = nr_cblocks_default; 515 } 516 517 return 0; 518 } 519 #else /* HAVE_AIO_SUPPORT */ 520 static int nr_cblocks_max = 0; 521 522 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 523 off_t *off __maybe_unused) 524 { 525 return -1; 526 } 527 528 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 529 { 530 return -1; 531 } 532 533 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 534 { 535 } 536 537 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 538 { 539 } 540 #endif 541 542 static int record__aio_enabled(struct record *rec) 543 { 544 return rec->opts.nr_cblocks > 0; 545 } 546 547 #define MMAP_FLUSH_DEFAULT 1 548 static int record__mmap_flush_parse(const struct option *opt, 549 const char *str, 550 int unset) 551 { 552 int flush_max; 553 struct record_opts *opts = (struct record_opts *)opt->value; 554 static struct parse_tag tags[] = { 555 { .tag = 'B', .mult = 1 }, 556 { .tag = 'K', .mult = 1 << 10 }, 557 { .tag = 'M', .mult = 1 << 20 }, 558 { .tag = 'G', .mult = 1 << 30 }, 559 { .tag = 0 }, 560 }; 561 562 if (unset) 563 return 0; 564 565 if (str) { 566 opts->mmap_flush = parse_tag_value(str, tags); 567 if (opts->mmap_flush == (int)-1) 568 opts->mmap_flush = strtol(str, NULL, 0); 569 } 570 571 if (!opts->mmap_flush) 572 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 573 574 flush_max = evlist__mmap_size(opts->mmap_pages); 575 flush_max /= 4; 576 if (opts->mmap_flush > flush_max) 577 opts->mmap_flush = flush_max; 578 579 return 0; 580 } 581 582 #ifdef HAVE_ZSTD_SUPPORT 583 static unsigned int comp_level_default = 1; 584 585 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 586 { 587 struct record_opts *opts = opt->value; 588 589 if (unset) { 590 opts->comp_level = 0; 591 } else { 592 if (str) 593 opts->comp_level = strtol(str, NULL, 0); 594 if (!opts->comp_level) 595 opts->comp_level = comp_level_default; 596 } 597 598 return 0; 599 } 600 #endif 601 static unsigned int comp_level_max = 22; 602 603 static int record__comp_enabled(struct record *rec) 604 { 605 return rec->opts.comp_level > 0; 606 } 607 608 static int process_synthesized_event(struct perf_tool *tool, 609 union perf_event *event, 610 struct perf_sample *sample __maybe_unused, 611 struct machine *machine __maybe_unused) 612 { 613 struct record *rec = container_of(tool, struct record, tool); 614 return record__write(rec, NULL, event, event->header.size); 615 } 616 617 static struct mutex synth_lock; 618 619 static int process_locked_synthesized_event(struct perf_tool *tool, 620 union perf_event *event, 621 struct perf_sample *sample __maybe_unused, 622 struct machine *machine __maybe_unused) 623 { 624 int ret; 625 626 mutex_lock(&synth_lock); 627 ret = process_synthesized_event(tool, event, sample, machine); 628 mutex_unlock(&synth_lock); 629 return ret; 630 } 631 632 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 633 { 634 struct record *rec = to; 635 636 if (record__comp_enabled(rec)) { 637 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 638 bf = map->data; 639 } 640 641 thread->samples++; 642 return record__write(rec, map, bf, size); 643 } 644 645 static volatile sig_atomic_t signr = -1; 646 static volatile sig_atomic_t child_finished; 647 #ifdef HAVE_EVENTFD_SUPPORT 648 static volatile sig_atomic_t done_fd = -1; 649 #endif 650 651 static void sig_handler(int sig) 652 { 653 if (sig == SIGCHLD) 654 child_finished = 1; 655 else 656 signr = sig; 657 658 done = 1; 659 #ifdef HAVE_EVENTFD_SUPPORT 660 if (done_fd >= 0) { 661 u64 tmp = 1; 662 int orig_errno = errno; 663 664 /* 665 * It is possible for this signal handler to run after done is 666 * checked in the main loop, but before the perf counter fds are 667 * polled. If this happens, the poll() will continue to wait 668 * even though done is set, and will only break out if either 669 * another signal is received, or the counters are ready for 670 * read. To ensure the poll() doesn't sleep when done is set, 671 * use an eventfd (done_fd) to wake up the poll(). 672 */ 673 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 674 pr_err("failed to signal wakeup fd, error: %m\n"); 675 676 errno = orig_errno; 677 } 678 #endif // HAVE_EVENTFD_SUPPORT 679 } 680 681 static void sigsegv_handler(int sig) 682 { 683 perf_hooks__recover(); 684 sighandler_dump_stack(sig); 685 } 686 687 static void record__sig_exit(void) 688 { 689 if (signr == -1) 690 return; 691 692 signal(signr, SIG_DFL); 693 raise(signr); 694 } 695 696 #ifdef HAVE_AUXTRACE_SUPPORT 697 698 static int record__process_auxtrace(struct perf_tool *tool, 699 struct mmap *map, 700 union perf_event *event, void *data1, 701 size_t len1, void *data2, size_t len2) 702 { 703 struct record *rec = container_of(tool, struct record, tool); 704 struct perf_data *data = &rec->data; 705 size_t padding; 706 u8 pad[8] = {0}; 707 708 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 709 off_t file_offset; 710 int fd = perf_data__fd(data); 711 int err; 712 713 file_offset = lseek(fd, 0, SEEK_CUR); 714 if (file_offset == -1) 715 return -1; 716 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 717 event, file_offset); 718 if (err) 719 return err; 720 } 721 722 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 723 padding = (len1 + len2) & 7; 724 if (padding) 725 padding = 8 - padding; 726 727 record__write(rec, map, event, event->header.size); 728 record__write(rec, map, data1, len1); 729 if (len2) 730 record__write(rec, map, data2, len2); 731 record__write(rec, map, &pad, padding); 732 733 return 0; 734 } 735 736 static int record__auxtrace_mmap_read(struct record *rec, 737 struct mmap *map) 738 { 739 int ret; 740 741 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 742 record__process_auxtrace); 743 if (ret < 0) 744 return ret; 745 746 if (ret) 747 rec->samples++; 748 749 return 0; 750 } 751 752 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 753 struct mmap *map) 754 { 755 int ret; 756 757 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 758 record__process_auxtrace, 759 rec->opts.auxtrace_snapshot_size); 760 if (ret < 0) 761 return ret; 762 763 if (ret) 764 rec->samples++; 765 766 return 0; 767 } 768 769 static int record__auxtrace_read_snapshot_all(struct record *rec) 770 { 771 int i; 772 int rc = 0; 773 774 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 775 struct mmap *map = &rec->evlist->mmap[i]; 776 777 if (!map->auxtrace_mmap.base) 778 continue; 779 780 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 781 rc = -1; 782 goto out; 783 } 784 } 785 out: 786 return rc; 787 } 788 789 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 790 { 791 pr_debug("Recording AUX area tracing snapshot\n"); 792 if (record__auxtrace_read_snapshot_all(rec) < 0) { 793 trigger_error(&auxtrace_snapshot_trigger); 794 } else { 795 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 796 trigger_error(&auxtrace_snapshot_trigger); 797 else 798 trigger_ready(&auxtrace_snapshot_trigger); 799 } 800 } 801 802 static int record__auxtrace_snapshot_exit(struct record *rec) 803 { 804 if (trigger_is_error(&auxtrace_snapshot_trigger)) 805 return 0; 806 807 if (!auxtrace_record__snapshot_started && 808 auxtrace_record__snapshot_start(rec->itr)) 809 return -1; 810 811 record__read_auxtrace_snapshot(rec, true); 812 if (trigger_is_error(&auxtrace_snapshot_trigger)) 813 return -1; 814 815 return 0; 816 } 817 818 static int record__auxtrace_init(struct record *rec) 819 { 820 int err; 821 822 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 823 && record__threads_enabled(rec)) { 824 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 825 return -EINVAL; 826 } 827 828 if (!rec->itr) { 829 rec->itr = auxtrace_record__init(rec->evlist, &err); 830 if (err) 831 return err; 832 } 833 834 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 835 rec->opts.auxtrace_snapshot_opts); 836 if (err) 837 return err; 838 839 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 840 rec->opts.auxtrace_sample_opts); 841 if (err) 842 return err; 843 844 auxtrace_regroup_aux_output(rec->evlist); 845 846 return auxtrace_parse_filters(rec->evlist); 847 } 848 849 #else 850 851 static inline 852 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 853 struct mmap *map __maybe_unused) 854 { 855 return 0; 856 } 857 858 static inline 859 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 860 bool on_exit __maybe_unused) 861 { 862 } 863 864 static inline 865 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 866 { 867 return 0; 868 } 869 870 static inline 871 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 872 { 873 return 0; 874 } 875 876 static int record__auxtrace_init(struct record *rec __maybe_unused) 877 { 878 return 0; 879 } 880 881 #endif 882 883 static int record__config_text_poke(struct evlist *evlist) 884 { 885 struct evsel *evsel; 886 887 /* Nothing to do if text poke is already configured */ 888 evlist__for_each_entry(evlist, evsel) { 889 if (evsel->core.attr.text_poke) 890 return 0; 891 } 892 893 evsel = evlist__add_dummy_on_all_cpus(evlist); 894 if (!evsel) 895 return -ENOMEM; 896 897 evsel->core.attr.text_poke = 1; 898 evsel->core.attr.ksymbol = 1; 899 evsel->immediate = true; 900 evsel__set_sample_bit(evsel, TIME); 901 902 return 0; 903 } 904 905 static int record__config_off_cpu(struct record *rec) 906 { 907 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 908 } 909 910 static bool record__kcore_readable(struct machine *machine) 911 { 912 char kcore[PATH_MAX]; 913 int fd; 914 915 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 916 917 fd = open(kcore, O_RDONLY); 918 if (fd < 0) 919 return false; 920 921 close(fd); 922 923 return true; 924 } 925 926 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 927 { 928 char from_dir[PATH_MAX]; 929 char kcore_dir[PATH_MAX]; 930 int ret; 931 932 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 933 934 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 935 if (ret) 936 return ret; 937 938 return kcore_copy(from_dir, kcore_dir); 939 } 940 941 static void record__thread_data_init_pipes(struct record_thread *thread_data) 942 { 943 thread_data->pipes.msg[0] = -1; 944 thread_data->pipes.msg[1] = -1; 945 thread_data->pipes.ack[0] = -1; 946 thread_data->pipes.ack[1] = -1; 947 } 948 949 static int record__thread_data_open_pipes(struct record_thread *thread_data) 950 { 951 if (pipe(thread_data->pipes.msg)) 952 return -EINVAL; 953 954 if (pipe(thread_data->pipes.ack)) { 955 close(thread_data->pipes.msg[0]); 956 thread_data->pipes.msg[0] = -1; 957 close(thread_data->pipes.msg[1]); 958 thread_data->pipes.msg[1] = -1; 959 return -EINVAL; 960 } 961 962 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 963 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 964 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 965 966 return 0; 967 } 968 969 static void record__thread_data_close_pipes(struct record_thread *thread_data) 970 { 971 if (thread_data->pipes.msg[0] != -1) { 972 close(thread_data->pipes.msg[0]); 973 thread_data->pipes.msg[0] = -1; 974 } 975 if (thread_data->pipes.msg[1] != -1) { 976 close(thread_data->pipes.msg[1]); 977 thread_data->pipes.msg[1] = -1; 978 } 979 if (thread_data->pipes.ack[0] != -1) { 980 close(thread_data->pipes.ack[0]); 981 thread_data->pipes.ack[0] = -1; 982 } 983 if (thread_data->pipes.ack[1] != -1) { 984 close(thread_data->pipes.ack[1]); 985 thread_data->pipes.ack[1] = -1; 986 } 987 } 988 989 static bool evlist__per_thread(struct evlist *evlist) 990 { 991 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 992 } 993 994 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 995 { 996 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 997 struct mmap *mmap = evlist->mmap; 998 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 999 struct perf_cpu_map *cpus = evlist->core.all_cpus; 1000 bool per_thread = evlist__per_thread(evlist); 1001 1002 if (per_thread) 1003 thread_data->nr_mmaps = nr_mmaps; 1004 else 1005 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1006 thread_data->mask->maps.nbits); 1007 if (mmap) { 1008 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1009 if (!thread_data->maps) 1010 return -ENOMEM; 1011 } 1012 if (overwrite_mmap) { 1013 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1014 if (!thread_data->overwrite_maps) { 1015 zfree(&thread_data->maps); 1016 return -ENOMEM; 1017 } 1018 } 1019 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1020 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1021 1022 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1023 if (per_thread || 1024 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1025 if (thread_data->maps) { 1026 thread_data->maps[tm] = &mmap[m]; 1027 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1028 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1029 } 1030 if (thread_data->overwrite_maps) { 1031 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1032 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1033 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1034 } 1035 tm++; 1036 } 1037 } 1038 1039 return 0; 1040 } 1041 1042 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1043 { 1044 int f, tm, pos; 1045 struct mmap *map, *overwrite_map; 1046 1047 fdarray__init(&thread_data->pollfd, 64); 1048 1049 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1050 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1051 overwrite_map = thread_data->overwrite_maps ? 1052 thread_data->overwrite_maps[tm] : NULL; 1053 1054 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1055 void *ptr = evlist->core.pollfd.priv[f].ptr; 1056 1057 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1058 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1059 &evlist->core.pollfd); 1060 if (pos < 0) 1061 return pos; 1062 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1063 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1064 } 1065 } 1066 } 1067 1068 return 0; 1069 } 1070 1071 static void record__free_thread_data(struct record *rec) 1072 { 1073 int t; 1074 struct record_thread *thread_data = rec->thread_data; 1075 1076 if (thread_data == NULL) 1077 return; 1078 1079 for (t = 0; t < rec->nr_threads; t++) { 1080 record__thread_data_close_pipes(&thread_data[t]); 1081 zfree(&thread_data[t].maps); 1082 zfree(&thread_data[t].overwrite_maps); 1083 fdarray__exit(&thread_data[t].pollfd); 1084 } 1085 1086 zfree(&rec->thread_data); 1087 } 1088 1089 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1090 int evlist_pollfd_index, 1091 int thread_pollfd_index) 1092 { 1093 size_t x = rec->index_map_cnt; 1094 1095 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1096 return -ENOMEM; 1097 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1098 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1099 rec->index_map_cnt += 1; 1100 return 0; 1101 } 1102 1103 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1104 struct evlist *evlist, 1105 struct record_thread *thread_data) 1106 { 1107 struct pollfd *e_entries = evlist->core.pollfd.entries; 1108 struct pollfd *t_entries = thread_data->pollfd.entries; 1109 int err = 0; 1110 size_t i; 1111 1112 for (i = 0; i < rec->index_map_cnt; i++) { 1113 int e_pos = rec->index_map[i].evlist_pollfd_index; 1114 int t_pos = rec->index_map[i].thread_pollfd_index; 1115 1116 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1117 e_entries[e_pos].events != t_entries[t_pos].events) { 1118 pr_err("Thread and evlist pollfd index mismatch\n"); 1119 err = -EINVAL; 1120 continue; 1121 } 1122 e_entries[e_pos].revents = t_entries[t_pos].revents; 1123 } 1124 return err; 1125 } 1126 1127 static int record__dup_non_perf_events(struct record *rec, 1128 struct evlist *evlist, 1129 struct record_thread *thread_data) 1130 { 1131 struct fdarray *fda = &evlist->core.pollfd; 1132 int i, ret; 1133 1134 for (i = 0; i < fda->nr; i++) { 1135 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1136 continue; 1137 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1138 if (ret < 0) { 1139 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1140 return ret; 1141 } 1142 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1143 thread_data, ret, fda->entries[i].fd); 1144 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1145 if (ret < 0) { 1146 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1147 return ret; 1148 } 1149 } 1150 return 0; 1151 } 1152 1153 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1154 { 1155 int t, ret; 1156 struct record_thread *thread_data; 1157 1158 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1159 if (!rec->thread_data) { 1160 pr_err("Failed to allocate thread data\n"); 1161 return -ENOMEM; 1162 } 1163 thread_data = rec->thread_data; 1164 1165 for (t = 0; t < rec->nr_threads; t++) 1166 record__thread_data_init_pipes(&thread_data[t]); 1167 1168 for (t = 0; t < rec->nr_threads; t++) { 1169 thread_data[t].rec = rec; 1170 thread_data[t].mask = &rec->thread_masks[t]; 1171 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1172 if (ret) { 1173 pr_err("Failed to initialize thread[%d] maps\n", t); 1174 goto out_free; 1175 } 1176 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1177 if (ret) { 1178 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1179 goto out_free; 1180 } 1181 if (t) { 1182 thread_data[t].tid = -1; 1183 ret = record__thread_data_open_pipes(&thread_data[t]); 1184 if (ret) { 1185 pr_err("Failed to open thread[%d] communication pipes\n", t); 1186 goto out_free; 1187 } 1188 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1189 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1190 if (ret < 0) { 1191 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1192 goto out_free; 1193 } 1194 thread_data[t].ctlfd_pos = ret; 1195 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1196 thread_data, thread_data[t].ctlfd_pos, 1197 thread_data[t].pipes.msg[0]); 1198 } else { 1199 thread_data[t].tid = gettid(); 1200 1201 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1202 if (ret < 0) 1203 goto out_free; 1204 1205 thread_data[t].ctlfd_pos = -1; /* Not used */ 1206 } 1207 } 1208 1209 return 0; 1210 1211 out_free: 1212 record__free_thread_data(rec); 1213 1214 return ret; 1215 } 1216 1217 static int record__mmap_evlist(struct record *rec, 1218 struct evlist *evlist) 1219 { 1220 int i, ret; 1221 struct record_opts *opts = &rec->opts; 1222 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1223 opts->auxtrace_sample_mode; 1224 char msg[512]; 1225 1226 if (opts->affinity != PERF_AFFINITY_SYS) 1227 cpu__setup_cpunode_map(); 1228 1229 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1230 opts->auxtrace_mmap_pages, 1231 auxtrace_overwrite, 1232 opts->nr_cblocks, opts->affinity, 1233 opts->mmap_flush, opts->comp_level) < 0) { 1234 if (errno == EPERM) { 1235 pr_err("Permission error mapping pages.\n" 1236 "Consider increasing " 1237 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1238 "or try again with a smaller value of -m/--mmap_pages.\n" 1239 "(current value: %u,%u)\n", 1240 opts->mmap_pages, opts->auxtrace_mmap_pages); 1241 return -errno; 1242 } else { 1243 pr_err("failed to mmap with %d (%s)\n", errno, 1244 str_error_r(errno, msg, sizeof(msg))); 1245 if (errno) 1246 return -errno; 1247 else 1248 return -EINVAL; 1249 } 1250 } 1251 1252 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1253 return -1; 1254 1255 ret = record__alloc_thread_data(rec, evlist); 1256 if (ret) 1257 return ret; 1258 1259 if (record__threads_enabled(rec)) { 1260 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1261 if (ret) { 1262 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1263 return ret; 1264 } 1265 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1266 if (evlist->mmap) 1267 evlist->mmap[i].file = &rec->data.dir.files[i]; 1268 if (evlist->overwrite_mmap) 1269 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1270 } 1271 } 1272 1273 return 0; 1274 } 1275 1276 static int record__mmap(struct record *rec) 1277 { 1278 return record__mmap_evlist(rec, rec->evlist); 1279 } 1280 1281 static int record__open(struct record *rec) 1282 { 1283 char msg[BUFSIZ]; 1284 struct evsel *pos; 1285 struct evlist *evlist = rec->evlist; 1286 struct perf_session *session = rec->session; 1287 struct record_opts *opts = &rec->opts; 1288 int rc = 0; 1289 1290 /* 1291 * For initial_delay, system wide or a hybrid system, we need to add a 1292 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1293 * of waiting or event synthesis. 1294 */ 1295 if (opts->initial_delay || target__has_cpu(&opts->target) || 1296 perf_pmu__has_hybrid()) { 1297 pos = evlist__get_tracking_event(evlist); 1298 if (!evsel__is_dummy_event(pos)) { 1299 /* Set up dummy event. */ 1300 if (evlist__add_dummy(evlist)) 1301 return -ENOMEM; 1302 pos = evlist__last(evlist); 1303 evlist__set_tracking_event(evlist, pos); 1304 } 1305 1306 /* 1307 * Enable the dummy event when the process is forked for 1308 * initial_delay, immediately for system wide. 1309 */ 1310 if (opts->initial_delay && !pos->immediate && 1311 !target__has_cpu(&opts->target)) 1312 pos->core.attr.enable_on_exec = 1; 1313 else 1314 pos->immediate = 1; 1315 } 1316 1317 evlist__config(evlist, opts, &callchain_param); 1318 1319 evlist__for_each_entry(evlist, pos) { 1320 try_again: 1321 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1322 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1323 if (verbose > 0) 1324 ui__warning("%s\n", msg); 1325 goto try_again; 1326 } 1327 if ((errno == EINVAL || errno == EBADF) && 1328 pos->core.leader != &pos->core && 1329 pos->weak_group) { 1330 pos = evlist__reset_weak_group(evlist, pos, true); 1331 goto try_again; 1332 } 1333 rc = -errno; 1334 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1335 ui__error("%s\n", msg); 1336 goto out; 1337 } 1338 1339 pos->supported = true; 1340 } 1341 1342 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1343 pr_warning( 1344 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1345 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1346 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1347 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1348 "Samples in kernel modules won't be resolved at all.\n\n" 1349 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1350 "even with a suitable vmlinux or kallsyms file.\n\n"); 1351 } 1352 1353 if (evlist__apply_filters(evlist, &pos)) { 1354 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1355 pos->filter, evsel__name(pos), errno, 1356 str_error_r(errno, msg, sizeof(msg))); 1357 rc = -1; 1358 goto out; 1359 } 1360 1361 rc = record__mmap(rec); 1362 if (rc) 1363 goto out; 1364 1365 session->evlist = evlist; 1366 perf_session__set_id_hdr_size(session); 1367 out: 1368 return rc; 1369 } 1370 1371 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1372 { 1373 if (rec->evlist->first_sample_time == 0) 1374 rec->evlist->first_sample_time = sample_time; 1375 1376 if (sample_time) 1377 rec->evlist->last_sample_time = sample_time; 1378 } 1379 1380 static int process_sample_event(struct perf_tool *tool, 1381 union perf_event *event, 1382 struct perf_sample *sample, 1383 struct evsel *evsel, 1384 struct machine *machine) 1385 { 1386 struct record *rec = container_of(tool, struct record, tool); 1387 1388 set_timestamp_boundary(rec, sample->time); 1389 1390 if (rec->buildid_all) 1391 return 0; 1392 1393 rec->samples++; 1394 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1395 } 1396 1397 static int process_buildids(struct record *rec) 1398 { 1399 struct perf_session *session = rec->session; 1400 1401 if (perf_data__size(&rec->data) == 0) 1402 return 0; 1403 1404 /* 1405 * During this process, it'll load kernel map and replace the 1406 * dso->long_name to a real pathname it found. In this case 1407 * we prefer the vmlinux path like 1408 * /lib/modules/3.16.4/build/vmlinux 1409 * 1410 * rather than build-id path (in debug directory). 1411 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1412 */ 1413 symbol_conf.ignore_vmlinux_buildid = true; 1414 1415 /* 1416 * If --buildid-all is given, it marks all DSO regardless of hits, 1417 * so no need to process samples. But if timestamp_boundary is enabled, 1418 * it still needs to walk on all samples to get the timestamps of 1419 * first/last samples. 1420 */ 1421 if (rec->buildid_all && !rec->timestamp_boundary) 1422 rec->tool.sample = NULL; 1423 1424 return perf_session__process_events(session); 1425 } 1426 1427 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1428 { 1429 int err; 1430 struct perf_tool *tool = data; 1431 /* 1432 *As for guest kernel when processing subcommand record&report, 1433 *we arrange module mmap prior to guest kernel mmap and trigger 1434 *a preload dso because default guest module symbols are loaded 1435 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1436 *method is used to avoid symbol missing when the first addr is 1437 *in module instead of in guest kernel. 1438 */ 1439 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1440 machine); 1441 if (err < 0) 1442 pr_err("Couldn't record guest kernel [%d]'s reference" 1443 " relocation symbol.\n", machine->pid); 1444 1445 /* 1446 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1447 * have no _text sometimes. 1448 */ 1449 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1450 machine); 1451 if (err < 0) 1452 pr_err("Couldn't record guest kernel [%d]'s reference" 1453 " relocation symbol.\n", machine->pid); 1454 } 1455 1456 static struct perf_event_header finished_round_event = { 1457 .size = sizeof(struct perf_event_header), 1458 .type = PERF_RECORD_FINISHED_ROUND, 1459 }; 1460 1461 static struct perf_event_header finished_init_event = { 1462 .size = sizeof(struct perf_event_header), 1463 .type = PERF_RECORD_FINISHED_INIT, 1464 }; 1465 1466 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1467 { 1468 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1469 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1470 thread->mask->affinity.nbits)) { 1471 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1472 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1473 map->affinity_mask.bits, thread->mask->affinity.nbits); 1474 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1475 (cpu_set_t *)thread->mask->affinity.bits); 1476 if (verbose == 2) { 1477 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1478 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1479 } 1480 } 1481 } 1482 1483 static size_t process_comp_header(void *record, size_t increment) 1484 { 1485 struct perf_record_compressed *event = record; 1486 size_t size = sizeof(*event); 1487 1488 if (increment) { 1489 event->header.size += increment; 1490 return increment; 1491 } 1492 1493 event->header.type = PERF_RECORD_COMPRESSED; 1494 event->header.size = size; 1495 1496 return size; 1497 } 1498 1499 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1500 void *dst, size_t dst_size, void *src, size_t src_size) 1501 { 1502 size_t compressed; 1503 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1504 struct zstd_data *zstd_data = &session->zstd_data; 1505 1506 if (map && map->file) 1507 zstd_data = &map->zstd_data; 1508 1509 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1510 max_record_size, process_comp_header); 1511 1512 if (map && map->file) { 1513 thread->bytes_transferred += src_size; 1514 thread->bytes_compressed += compressed; 1515 } else { 1516 session->bytes_transferred += src_size; 1517 session->bytes_compressed += compressed; 1518 } 1519 1520 return compressed; 1521 } 1522 1523 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1524 bool overwrite, bool synch) 1525 { 1526 u64 bytes_written = rec->bytes_written; 1527 int i; 1528 int rc = 0; 1529 int nr_mmaps; 1530 struct mmap **maps; 1531 int trace_fd = rec->data.file.fd; 1532 off_t off = 0; 1533 1534 if (!evlist) 1535 return 0; 1536 1537 nr_mmaps = thread->nr_mmaps; 1538 maps = overwrite ? thread->overwrite_maps : thread->maps; 1539 1540 if (!maps) 1541 return 0; 1542 1543 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1544 return 0; 1545 1546 if (record__aio_enabled(rec)) 1547 off = record__aio_get_pos(trace_fd); 1548 1549 for (i = 0; i < nr_mmaps; i++) { 1550 u64 flush = 0; 1551 struct mmap *map = maps[i]; 1552 1553 if (map->core.base) { 1554 record__adjust_affinity(rec, map); 1555 if (synch) { 1556 flush = map->core.flush; 1557 map->core.flush = 1; 1558 } 1559 if (!record__aio_enabled(rec)) { 1560 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1561 if (synch) 1562 map->core.flush = flush; 1563 rc = -1; 1564 goto out; 1565 } 1566 } else { 1567 if (record__aio_push(rec, map, &off) < 0) { 1568 record__aio_set_pos(trace_fd, off); 1569 if (synch) 1570 map->core.flush = flush; 1571 rc = -1; 1572 goto out; 1573 } 1574 } 1575 if (synch) 1576 map->core.flush = flush; 1577 } 1578 1579 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1580 !rec->opts.auxtrace_sample_mode && 1581 record__auxtrace_mmap_read(rec, map) != 0) { 1582 rc = -1; 1583 goto out; 1584 } 1585 } 1586 1587 if (record__aio_enabled(rec)) 1588 record__aio_set_pos(trace_fd, off); 1589 1590 /* 1591 * Mark the round finished in case we wrote 1592 * at least one event. 1593 * 1594 * No need for round events in directory mode, 1595 * because per-cpu maps and files have data 1596 * sorted by kernel. 1597 */ 1598 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1599 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1600 1601 if (overwrite) 1602 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1603 out: 1604 return rc; 1605 } 1606 1607 static int record__mmap_read_all(struct record *rec, bool synch) 1608 { 1609 int err; 1610 1611 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1612 if (err) 1613 return err; 1614 1615 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1616 } 1617 1618 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1619 void *arg __maybe_unused) 1620 { 1621 struct perf_mmap *map = fda->priv[fd].ptr; 1622 1623 if (map) 1624 perf_mmap__put(map); 1625 } 1626 1627 static void *record__thread(void *arg) 1628 { 1629 enum thread_msg msg = THREAD_MSG__READY; 1630 bool terminate = false; 1631 struct fdarray *pollfd; 1632 int err, ctlfd_pos; 1633 1634 thread = arg; 1635 thread->tid = gettid(); 1636 1637 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1638 if (err == -1) 1639 pr_warning("threads[%d]: failed to notify on start: %s\n", 1640 thread->tid, strerror(errno)); 1641 1642 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1643 1644 pollfd = &thread->pollfd; 1645 ctlfd_pos = thread->ctlfd_pos; 1646 1647 for (;;) { 1648 unsigned long long hits = thread->samples; 1649 1650 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1651 break; 1652 1653 if (hits == thread->samples) { 1654 1655 err = fdarray__poll(pollfd, -1); 1656 /* 1657 * Propagate error, only if there's any. Ignore positive 1658 * number of returned events and interrupt error. 1659 */ 1660 if (err > 0 || (err < 0 && errno == EINTR)) 1661 err = 0; 1662 thread->waking++; 1663 1664 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1665 record__thread_munmap_filtered, NULL) == 0) 1666 break; 1667 } 1668 1669 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1670 terminate = true; 1671 close(thread->pipes.msg[0]); 1672 thread->pipes.msg[0] = -1; 1673 pollfd->entries[ctlfd_pos].fd = -1; 1674 pollfd->entries[ctlfd_pos].events = 0; 1675 } 1676 1677 pollfd->entries[ctlfd_pos].revents = 0; 1678 } 1679 record__mmap_read_all(thread->rec, true); 1680 1681 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1682 if (err == -1) 1683 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1684 thread->tid, strerror(errno)); 1685 1686 return NULL; 1687 } 1688 1689 static void record__init_features(struct record *rec) 1690 { 1691 struct perf_session *session = rec->session; 1692 int feat; 1693 1694 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1695 perf_header__set_feat(&session->header, feat); 1696 1697 if (rec->no_buildid) 1698 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1699 1700 #ifdef HAVE_LIBTRACEEVENT 1701 if (!have_tracepoints(&rec->evlist->core.entries)) 1702 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1703 #endif 1704 1705 if (!rec->opts.branch_stack) 1706 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1707 1708 if (!rec->opts.full_auxtrace) 1709 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1710 1711 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1712 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1713 1714 if (!rec->opts.use_clockid) 1715 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1716 1717 if (!record__threads_enabled(rec)) 1718 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1719 1720 if (!record__comp_enabled(rec)) 1721 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1722 1723 perf_header__clear_feat(&session->header, HEADER_STAT); 1724 } 1725 1726 static void 1727 record__finish_output(struct record *rec) 1728 { 1729 int i; 1730 struct perf_data *data = &rec->data; 1731 int fd = perf_data__fd(data); 1732 1733 if (data->is_pipe) 1734 return; 1735 1736 rec->session->header.data_size += rec->bytes_written; 1737 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1738 if (record__threads_enabled(rec)) { 1739 for (i = 0; i < data->dir.nr; i++) 1740 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1741 } 1742 1743 if (!rec->no_buildid) { 1744 process_buildids(rec); 1745 1746 if (rec->buildid_all) 1747 dsos__hit_all(rec->session); 1748 } 1749 perf_session__write_header(rec->session, rec->evlist, fd, true); 1750 1751 return; 1752 } 1753 1754 static int record__synthesize_workload(struct record *rec, bool tail) 1755 { 1756 int err; 1757 struct perf_thread_map *thread_map; 1758 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1759 1760 if (rec->opts.tail_synthesize != tail) 1761 return 0; 1762 1763 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1764 if (thread_map == NULL) 1765 return -1; 1766 1767 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1768 process_synthesized_event, 1769 &rec->session->machines.host, 1770 needs_mmap, 1771 rec->opts.sample_address); 1772 perf_thread_map__put(thread_map); 1773 return err; 1774 } 1775 1776 static int write_finished_init(struct record *rec, bool tail) 1777 { 1778 if (rec->opts.tail_synthesize != tail) 1779 return 0; 1780 1781 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1782 } 1783 1784 static int record__synthesize(struct record *rec, bool tail); 1785 1786 static int 1787 record__switch_output(struct record *rec, bool at_exit) 1788 { 1789 struct perf_data *data = &rec->data; 1790 int fd, err; 1791 char *new_filename; 1792 1793 /* Same Size: "2015122520103046"*/ 1794 char timestamp[] = "InvalidTimestamp"; 1795 1796 record__aio_mmap_read_sync(rec); 1797 1798 write_finished_init(rec, true); 1799 1800 record__synthesize(rec, true); 1801 if (target__none(&rec->opts.target)) 1802 record__synthesize_workload(rec, true); 1803 1804 rec->samples = 0; 1805 record__finish_output(rec); 1806 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1807 if (err) { 1808 pr_err("Failed to get current timestamp\n"); 1809 return -EINVAL; 1810 } 1811 1812 fd = perf_data__switch(data, timestamp, 1813 rec->session->header.data_offset, 1814 at_exit, &new_filename); 1815 if (fd >= 0 && !at_exit) { 1816 rec->bytes_written = 0; 1817 rec->session->header.data_size = 0; 1818 } 1819 1820 if (!quiet) 1821 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1822 data->path, timestamp); 1823 1824 if (rec->switch_output.num_files) { 1825 int n = rec->switch_output.cur_file + 1; 1826 1827 if (n >= rec->switch_output.num_files) 1828 n = 0; 1829 rec->switch_output.cur_file = n; 1830 if (rec->switch_output.filenames[n]) { 1831 remove(rec->switch_output.filenames[n]); 1832 zfree(&rec->switch_output.filenames[n]); 1833 } 1834 rec->switch_output.filenames[n] = new_filename; 1835 } else { 1836 free(new_filename); 1837 } 1838 1839 /* Output tracking events */ 1840 if (!at_exit) { 1841 record__synthesize(rec, false); 1842 1843 /* 1844 * In 'perf record --switch-output' without -a, 1845 * record__synthesize() in record__switch_output() won't 1846 * generate tracking events because there's no thread_map 1847 * in evlist. Which causes newly created perf.data doesn't 1848 * contain map and comm information. 1849 * Create a fake thread_map and directly call 1850 * perf_event__synthesize_thread_map() for those events. 1851 */ 1852 if (target__none(&rec->opts.target)) 1853 record__synthesize_workload(rec, false); 1854 write_finished_init(rec, false); 1855 } 1856 return fd; 1857 } 1858 1859 static void __record__read_lost_samples(struct record *rec, struct evsel *evsel, 1860 struct perf_record_lost_samples *lost, 1861 int cpu_idx, int thread_idx) 1862 { 1863 struct perf_counts_values count; 1864 struct perf_sample_id *sid; 1865 struct perf_sample sample = {}; 1866 int id_hdr_size; 1867 1868 if (perf_evsel__read(&evsel->core, cpu_idx, thread_idx, &count) < 0) { 1869 pr_err("read LOST count failed\n"); 1870 return; 1871 } 1872 1873 if (count.lost == 0) 1874 return; 1875 1876 lost->lost = count.lost; 1877 if (evsel->core.ids) { 1878 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1879 sample.id = sid->id; 1880 } 1881 1882 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1883 evsel->core.attr.sample_type, &sample); 1884 lost->header.size = sizeof(*lost) + id_hdr_size; 1885 record__write(rec, NULL, lost, lost->header.size); 1886 } 1887 1888 static void record__read_lost_samples(struct record *rec) 1889 { 1890 struct perf_session *session = rec->session; 1891 struct perf_record_lost_samples *lost; 1892 struct evsel *evsel; 1893 1894 /* there was an error during record__open */ 1895 if (session->evlist == NULL) 1896 return; 1897 1898 lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1899 if (lost == NULL) { 1900 pr_debug("Memory allocation failed\n"); 1901 return; 1902 } 1903 1904 lost->header.type = PERF_RECORD_LOST_SAMPLES; 1905 1906 evlist__for_each_entry(session->evlist, evsel) { 1907 struct xyarray *xy = evsel->core.sample_id; 1908 1909 if (xy == NULL || evsel->core.fd == NULL) 1910 continue; 1911 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1912 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1913 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1914 continue; 1915 } 1916 1917 for (int x = 0; x < xyarray__max_x(xy); x++) { 1918 for (int y = 0; y < xyarray__max_y(xy); y++) { 1919 __record__read_lost_samples(rec, evsel, lost, x, y); 1920 } 1921 } 1922 } 1923 free(lost); 1924 1925 } 1926 1927 static volatile sig_atomic_t workload_exec_errno; 1928 1929 /* 1930 * evlist__prepare_workload will send a SIGUSR1 1931 * if the fork fails, since we asked by setting its 1932 * want_signal to true. 1933 */ 1934 static void workload_exec_failed_signal(int signo __maybe_unused, 1935 siginfo_t *info, 1936 void *ucontext __maybe_unused) 1937 { 1938 workload_exec_errno = info->si_value.sival_int; 1939 done = 1; 1940 child_finished = 1; 1941 } 1942 1943 static void snapshot_sig_handler(int sig); 1944 static void alarm_sig_handler(int sig); 1945 1946 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1947 { 1948 if (evlist) { 1949 if (evlist->mmap && evlist->mmap[0].core.base) 1950 return evlist->mmap[0].core.base; 1951 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1952 return evlist->overwrite_mmap[0].core.base; 1953 } 1954 return NULL; 1955 } 1956 1957 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1958 { 1959 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1960 if (pc) 1961 return pc; 1962 return NULL; 1963 } 1964 1965 static int record__synthesize(struct record *rec, bool tail) 1966 { 1967 struct perf_session *session = rec->session; 1968 struct machine *machine = &session->machines.host; 1969 struct perf_data *data = &rec->data; 1970 struct record_opts *opts = &rec->opts; 1971 struct perf_tool *tool = &rec->tool; 1972 int err = 0; 1973 event_op f = process_synthesized_event; 1974 1975 if (rec->opts.tail_synthesize != tail) 1976 return 0; 1977 1978 if (data->is_pipe) { 1979 err = perf_event__synthesize_for_pipe(tool, session, data, 1980 process_synthesized_event); 1981 if (err < 0) 1982 goto out; 1983 1984 rec->bytes_written += err; 1985 } 1986 1987 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1988 process_synthesized_event, machine); 1989 if (err) 1990 goto out; 1991 1992 /* Synthesize id_index before auxtrace_info */ 1993 err = perf_event__synthesize_id_index(tool, 1994 process_synthesized_event, 1995 session->evlist, machine); 1996 if (err) 1997 goto out; 1998 1999 if (rec->opts.full_auxtrace) { 2000 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2001 session, process_synthesized_event); 2002 if (err) 2003 goto out; 2004 } 2005 2006 if (!evlist__exclude_kernel(rec->evlist)) { 2007 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2008 machine); 2009 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2010 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2011 "Check /proc/kallsyms permission or run as root.\n"); 2012 2013 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2014 machine); 2015 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2016 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2017 "Check /proc/modules permission or run as root.\n"); 2018 } 2019 2020 if (perf_guest) { 2021 machines__process_guests(&session->machines, 2022 perf_event__synthesize_guest_os, tool); 2023 } 2024 2025 err = perf_event__synthesize_extra_attr(&rec->tool, 2026 rec->evlist, 2027 process_synthesized_event, 2028 data->is_pipe); 2029 if (err) 2030 goto out; 2031 2032 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2033 process_synthesized_event, 2034 NULL); 2035 if (err < 0) { 2036 pr_err("Couldn't synthesize thread map.\n"); 2037 return err; 2038 } 2039 2040 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2041 process_synthesized_event, NULL); 2042 if (err < 0) { 2043 pr_err("Couldn't synthesize cpu map.\n"); 2044 return err; 2045 } 2046 2047 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2048 machine, opts); 2049 if (err < 0) { 2050 pr_warning("Couldn't synthesize bpf events.\n"); 2051 err = 0; 2052 } 2053 2054 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2055 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2056 machine); 2057 if (err < 0) { 2058 pr_warning("Couldn't synthesize cgroup events.\n"); 2059 err = 0; 2060 } 2061 } 2062 2063 if (rec->opts.nr_threads_synthesize > 1) { 2064 mutex_init(&synth_lock); 2065 perf_set_multithreaded(); 2066 f = process_locked_synthesized_event; 2067 } 2068 2069 if (rec->opts.synth & PERF_SYNTH_TASK) { 2070 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2071 2072 err = __machine__synthesize_threads(machine, tool, &opts->target, 2073 rec->evlist->core.threads, 2074 f, needs_mmap, opts->sample_address, 2075 rec->opts.nr_threads_synthesize); 2076 } 2077 2078 if (rec->opts.nr_threads_synthesize > 1) { 2079 perf_set_singlethreaded(); 2080 mutex_destroy(&synth_lock); 2081 } 2082 2083 out: 2084 return err; 2085 } 2086 2087 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2088 { 2089 struct record *rec = data; 2090 pthread_kill(rec->thread_id, SIGUSR2); 2091 return 0; 2092 } 2093 2094 static int record__setup_sb_evlist(struct record *rec) 2095 { 2096 struct record_opts *opts = &rec->opts; 2097 2098 if (rec->sb_evlist != NULL) { 2099 /* 2100 * We get here if --switch-output-event populated the 2101 * sb_evlist, so associate a callback that will send a SIGUSR2 2102 * to the main thread. 2103 */ 2104 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2105 rec->thread_id = pthread_self(); 2106 } 2107 #ifdef HAVE_LIBBPF_SUPPORT 2108 if (!opts->no_bpf_event) { 2109 if (rec->sb_evlist == NULL) { 2110 rec->sb_evlist = evlist__new(); 2111 2112 if (rec->sb_evlist == NULL) { 2113 pr_err("Couldn't create side band evlist.\n."); 2114 return -1; 2115 } 2116 } 2117 2118 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2119 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2120 return -1; 2121 } 2122 } 2123 #endif 2124 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2125 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2126 opts->no_bpf_event = true; 2127 } 2128 2129 return 0; 2130 } 2131 2132 static int record__init_clock(struct record *rec) 2133 { 2134 struct perf_session *session = rec->session; 2135 struct timespec ref_clockid; 2136 struct timeval ref_tod; 2137 u64 ref; 2138 2139 if (!rec->opts.use_clockid) 2140 return 0; 2141 2142 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2143 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2144 2145 session->header.env.clock.clockid = rec->opts.clockid; 2146 2147 if (gettimeofday(&ref_tod, NULL) != 0) { 2148 pr_err("gettimeofday failed, cannot set reference time.\n"); 2149 return -1; 2150 } 2151 2152 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2153 pr_err("clock_gettime failed, cannot set reference time.\n"); 2154 return -1; 2155 } 2156 2157 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2158 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2159 2160 session->header.env.clock.tod_ns = ref; 2161 2162 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2163 (u64) ref_clockid.tv_nsec; 2164 2165 session->header.env.clock.clockid_ns = ref; 2166 return 0; 2167 } 2168 2169 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2170 { 2171 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2172 trigger_hit(&auxtrace_snapshot_trigger); 2173 auxtrace_record__snapshot_started = 1; 2174 if (auxtrace_record__snapshot_start(rec->itr)) 2175 trigger_error(&auxtrace_snapshot_trigger); 2176 } 2177 } 2178 2179 static void record__uniquify_name(struct record *rec) 2180 { 2181 struct evsel *pos; 2182 struct evlist *evlist = rec->evlist; 2183 char *new_name; 2184 int ret; 2185 2186 if (!perf_pmu__has_hybrid()) 2187 return; 2188 2189 evlist__for_each_entry(evlist, pos) { 2190 if (!evsel__is_hybrid(pos)) 2191 continue; 2192 2193 if (strchr(pos->name, '/')) 2194 continue; 2195 2196 ret = asprintf(&new_name, "%s/%s/", 2197 pos->pmu_name, pos->name); 2198 if (ret) { 2199 free(pos->name); 2200 pos->name = new_name; 2201 } 2202 } 2203 } 2204 2205 static int record__terminate_thread(struct record_thread *thread_data) 2206 { 2207 int err; 2208 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2209 pid_t tid = thread_data->tid; 2210 2211 close(thread_data->pipes.msg[1]); 2212 thread_data->pipes.msg[1] = -1; 2213 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2214 if (err > 0) 2215 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2216 else 2217 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2218 thread->tid, tid); 2219 2220 return 0; 2221 } 2222 2223 static int record__start_threads(struct record *rec) 2224 { 2225 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2226 struct record_thread *thread_data = rec->thread_data; 2227 sigset_t full, mask; 2228 pthread_t handle; 2229 pthread_attr_t attrs; 2230 2231 thread = &thread_data[0]; 2232 2233 if (!record__threads_enabled(rec)) 2234 return 0; 2235 2236 sigfillset(&full); 2237 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2238 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2239 return -1; 2240 } 2241 2242 pthread_attr_init(&attrs); 2243 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2244 2245 for (t = 1; t < nr_threads; t++) { 2246 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2247 2248 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2249 pthread_attr_setaffinity_np(&attrs, 2250 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2251 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2252 #endif 2253 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2254 for (tt = 1; tt < t; tt++) 2255 record__terminate_thread(&thread_data[t]); 2256 pr_err("Failed to start threads: %s\n", strerror(errno)); 2257 ret = -1; 2258 goto out_err; 2259 } 2260 2261 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2262 if (err > 0) 2263 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2264 thread_msg_tags[msg]); 2265 else 2266 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2267 thread->tid, rec->thread_data[t].tid); 2268 } 2269 2270 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2271 (cpu_set_t *)thread->mask->affinity.bits); 2272 2273 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2274 2275 out_err: 2276 pthread_attr_destroy(&attrs); 2277 2278 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2279 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2280 ret = -1; 2281 } 2282 2283 return ret; 2284 } 2285 2286 static int record__stop_threads(struct record *rec) 2287 { 2288 int t; 2289 struct record_thread *thread_data = rec->thread_data; 2290 2291 for (t = 1; t < rec->nr_threads; t++) 2292 record__terminate_thread(&thread_data[t]); 2293 2294 for (t = 0; t < rec->nr_threads; t++) { 2295 rec->samples += thread_data[t].samples; 2296 if (!record__threads_enabled(rec)) 2297 continue; 2298 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2299 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2300 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2301 thread_data[t].samples, thread_data[t].waking); 2302 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2303 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2304 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2305 else 2306 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2307 } 2308 2309 return 0; 2310 } 2311 2312 static unsigned long record__waking(struct record *rec) 2313 { 2314 int t; 2315 unsigned long waking = 0; 2316 struct record_thread *thread_data = rec->thread_data; 2317 2318 for (t = 0; t < rec->nr_threads; t++) 2319 waking += thread_data[t].waking; 2320 2321 return waking; 2322 } 2323 2324 static int __cmd_record(struct record *rec, int argc, const char **argv) 2325 { 2326 int err; 2327 int status = 0; 2328 const bool forks = argc > 0; 2329 struct perf_tool *tool = &rec->tool; 2330 struct record_opts *opts = &rec->opts; 2331 struct perf_data *data = &rec->data; 2332 struct perf_session *session; 2333 bool disabled = false, draining = false; 2334 int fd; 2335 float ratio = 0; 2336 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2337 2338 atexit(record__sig_exit); 2339 signal(SIGCHLD, sig_handler); 2340 signal(SIGINT, sig_handler); 2341 signal(SIGTERM, sig_handler); 2342 signal(SIGSEGV, sigsegv_handler); 2343 2344 if (rec->opts.record_namespaces) 2345 tool->namespace_events = true; 2346 2347 if (rec->opts.record_cgroup) { 2348 #ifdef HAVE_FILE_HANDLE 2349 tool->cgroup_events = true; 2350 #else 2351 pr_err("cgroup tracking is not supported\n"); 2352 return -1; 2353 #endif 2354 } 2355 2356 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2357 signal(SIGUSR2, snapshot_sig_handler); 2358 if (rec->opts.auxtrace_snapshot_mode) 2359 trigger_on(&auxtrace_snapshot_trigger); 2360 if (rec->switch_output.enabled) 2361 trigger_on(&switch_output_trigger); 2362 } else { 2363 signal(SIGUSR2, SIG_IGN); 2364 } 2365 2366 session = perf_session__new(data, tool); 2367 if (IS_ERR(session)) { 2368 pr_err("Perf session creation failed.\n"); 2369 return PTR_ERR(session); 2370 } 2371 2372 if (record__threads_enabled(rec)) { 2373 if (perf_data__is_pipe(&rec->data)) { 2374 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2375 return -1; 2376 } 2377 if (rec->opts.full_auxtrace) { 2378 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2379 return -1; 2380 } 2381 } 2382 2383 fd = perf_data__fd(data); 2384 rec->session = session; 2385 2386 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2387 pr_err("Compression initialization failed.\n"); 2388 return -1; 2389 } 2390 #ifdef HAVE_EVENTFD_SUPPORT 2391 done_fd = eventfd(0, EFD_NONBLOCK); 2392 if (done_fd < 0) { 2393 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2394 status = -1; 2395 goto out_delete_session; 2396 } 2397 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2398 if (err < 0) { 2399 pr_err("Failed to add wakeup eventfd to poll list\n"); 2400 status = err; 2401 goto out_delete_session; 2402 } 2403 #endif // HAVE_EVENTFD_SUPPORT 2404 2405 session->header.env.comp_type = PERF_COMP_ZSTD; 2406 session->header.env.comp_level = rec->opts.comp_level; 2407 2408 if (rec->opts.kcore && 2409 !record__kcore_readable(&session->machines.host)) { 2410 pr_err("ERROR: kcore is not readable.\n"); 2411 return -1; 2412 } 2413 2414 if (record__init_clock(rec)) 2415 return -1; 2416 2417 record__init_features(rec); 2418 2419 if (forks) { 2420 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2421 workload_exec_failed_signal); 2422 if (err < 0) { 2423 pr_err("Couldn't run the workload!\n"); 2424 status = err; 2425 goto out_delete_session; 2426 } 2427 } 2428 2429 /* 2430 * If we have just single event and are sending data 2431 * through pipe, we need to force the ids allocation, 2432 * because we synthesize event name through the pipe 2433 * and need the id for that. 2434 */ 2435 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2436 rec->opts.sample_id = true; 2437 2438 record__uniquify_name(rec); 2439 2440 /* Debug message used by test scripts */ 2441 pr_debug3("perf record opening and mmapping events\n"); 2442 if (record__open(rec) != 0) { 2443 err = -1; 2444 goto out_free_threads; 2445 } 2446 /* Debug message used by test scripts */ 2447 pr_debug3("perf record done opening and mmapping events\n"); 2448 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2449 2450 if (rec->opts.kcore) { 2451 err = record__kcore_copy(&session->machines.host, data); 2452 if (err) { 2453 pr_err("ERROR: Failed to copy kcore\n"); 2454 goto out_free_threads; 2455 } 2456 } 2457 2458 err = bpf__apply_obj_config(); 2459 if (err) { 2460 char errbuf[BUFSIZ]; 2461 2462 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2463 pr_err("ERROR: Apply config to BPF failed: %s\n", 2464 errbuf); 2465 goto out_free_threads; 2466 } 2467 2468 /* 2469 * Normally perf_session__new would do this, but it doesn't have the 2470 * evlist. 2471 */ 2472 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2473 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2474 rec->tool.ordered_events = false; 2475 } 2476 2477 if (!rec->evlist->core.nr_groups) 2478 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2479 2480 if (data->is_pipe) { 2481 err = perf_header__write_pipe(fd); 2482 if (err < 0) 2483 goto out_free_threads; 2484 } else { 2485 err = perf_session__write_header(session, rec->evlist, fd, false); 2486 if (err < 0) 2487 goto out_free_threads; 2488 } 2489 2490 err = -1; 2491 if (!rec->no_buildid 2492 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2493 pr_err("Couldn't generate buildids. " 2494 "Use --no-buildid to profile anyway.\n"); 2495 goto out_free_threads; 2496 } 2497 2498 err = record__setup_sb_evlist(rec); 2499 if (err) 2500 goto out_free_threads; 2501 2502 err = record__synthesize(rec, false); 2503 if (err < 0) 2504 goto out_free_threads; 2505 2506 if (rec->realtime_prio) { 2507 struct sched_param param; 2508 2509 param.sched_priority = rec->realtime_prio; 2510 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2511 pr_err("Could not set realtime priority.\n"); 2512 err = -1; 2513 goto out_free_threads; 2514 } 2515 } 2516 2517 if (record__start_threads(rec)) 2518 goto out_free_threads; 2519 2520 /* 2521 * When perf is starting the traced process, all the events 2522 * (apart from group members) have enable_on_exec=1 set, 2523 * so don't spoil it by prematurely enabling them. 2524 */ 2525 if (!target__none(&opts->target) && !opts->initial_delay) 2526 evlist__enable(rec->evlist); 2527 2528 /* 2529 * Let the child rip 2530 */ 2531 if (forks) { 2532 struct machine *machine = &session->machines.host; 2533 union perf_event *event; 2534 pid_t tgid; 2535 2536 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2537 if (event == NULL) { 2538 err = -ENOMEM; 2539 goto out_child; 2540 } 2541 2542 /* 2543 * Some H/W events are generated before COMM event 2544 * which is emitted during exec(), so perf script 2545 * cannot see a correct process name for those events. 2546 * Synthesize COMM event to prevent it. 2547 */ 2548 tgid = perf_event__synthesize_comm(tool, event, 2549 rec->evlist->workload.pid, 2550 process_synthesized_event, 2551 machine); 2552 free(event); 2553 2554 if (tgid == -1) 2555 goto out_child; 2556 2557 event = malloc(sizeof(event->namespaces) + 2558 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2559 machine->id_hdr_size); 2560 if (event == NULL) { 2561 err = -ENOMEM; 2562 goto out_child; 2563 } 2564 2565 /* 2566 * Synthesize NAMESPACES event for the command specified. 2567 */ 2568 perf_event__synthesize_namespaces(tool, event, 2569 rec->evlist->workload.pid, 2570 tgid, process_synthesized_event, 2571 machine); 2572 free(event); 2573 2574 evlist__start_workload(rec->evlist); 2575 } 2576 2577 if (opts->initial_delay) { 2578 pr_info(EVLIST_DISABLED_MSG); 2579 if (opts->initial_delay > 0) { 2580 usleep(opts->initial_delay * USEC_PER_MSEC); 2581 evlist__enable(rec->evlist); 2582 pr_info(EVLIST_ENABLED_MSG); 2583 } 2584 } 2585 2586 err = event_enable_timer__start(rec->evlist->eet); 2587 if (err) 2588 goto out_child; 2589 2590 /* Debug message used by test scripts */ 2591 pr_debug3("perf record has started\n"); 2592 fflush(stderr); 2593 2594 trigger_ready(&auxtrace_snapshot_trigger); 2595 trigger_ready(&switch_output_trigger); 2596 perf_hooks__invoke_record_start(); 2597 2598 /* 2599 * Must write FINISHED_INIT so it will be seen after all other 2600 * synthesized user events, but before any regular events. 2601 */ 2602 err = write_finished_init(rec, false); 2603 if (err < 0) 2604 goto out_child; 2605 2606 for (;;) { 2607 unsigned long long hits = thread->samples; 2608 2609 /* 2610 * rec->evlist->bkw_mmap_state is possible to be 2611 * BKW_MMAP_EMPTY here: when done == true and 2612 * hits != rec->samples in previous round. 2613 * 2614 * evlist__toggle_bkw_mmap ensure we never 2615 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2616 */ 2617 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2618 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2619 2620 if (record__mmap_read_all(rec, false) < 0) { 2621 trigger_error(&auxtrace_snapshot_trigger); 2622 trigger_error(&switch_output_trigger); 2623 err = -1; 2624 goto out_child; 2625 } 2626 2627 if (auxtrace_record__snapshot_started) { 2628 auxtrace_record__snapshot_started = 0; 2629 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2630 record__read_auxtrace_snapshot(rec, false); 2631 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2632 pr_err("AUX area tracing snapshot failed\n"); 2633 err = -1; 2634 goto out_child; 2635 } 2636 } 2637 2638 if (trigger_is_hit(&switch_output_trigger)) { 2639 /* 2640 * If switch_output_trigger is hit, the data in 2641 * overwritable ring buffer should have been collected, 2642 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2643 * 2644 * If SIGUSR2 raise after or during record__mmap_read_all(), 2645 * record__mmap_read_all() didn't collect data from 2646 * overwritable ring buffer. Read again. 2647 */ 2648 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2649 continue; 2650 trigger_ready(&switch_output_trigger); 2651 2652 /* 2653 * Reenable events in overwrite ring buffer after 2654 * record__mmap_read_all(): we should have collected 2655 * data from it. 2656 */ 2657 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2658 2659 if (!quiet) 2660 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2661 record__waking(rec)); 2662 thread->waking = 0; 2663 fd = record__switch_output(rec, false); 2664 if (fd < 0) { 2665 pr_err("Failed to switch to new file\n"); 2666 trigger_error(&switch_output_trigger); 2667 err = fd; 2668 goto out_child; 2669 } 2670 2671 /* re-arm the alarm */ 2672 if (rec->switch_output.time) 2673 alarm(rec->switch_output.time); 2674 } 2675 2676 if (hits == thread->samples) { 2677 if (done || draining) 2678 break; 2679 err = fdarray__poll(&thread->pollfd, -1); 2680 /* 2681 * Propagate error, only if there's any. Ignore positive 2682 * number of returned events and interrupt error. 2683 */ 2684 if (err > 0 || (err < 0 && errno == EINTR)) 2685 err = 0; 2686 thread->waking++; 2687 2688 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2689 record__thread_munmap_filtered, NULL) == 0) 2690 draining = true; 2691 2692 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2693 if (err) 2694 goto out_child; 2695 } 2696 2697 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2698 switch (cmd) { 2699 case EVLIST_CTL_CMD_SNAPSHOT: 2700 hit_auxtrace_snapshot_trigger(rec); 2701 evlist__ctlfd_ack(rec->evlist); 2702 break; 2703 case EVLIST_CTL_CMD_STOP: 2704 done = 1; 2705 break; 2706 case EVLIST_CTL_CMD_ACK: 2707 case EVLIST_CTL_CMD_UNSUPPORTED: 2708 case EVLIST_CTL_CMD_ENABLE: 2709 case EVLIST_CTL_CMD_DISABLE: 2710 case EVLIST_CTL_CMD_EVLIST: 2711 case EVLIST_CTL_CMD_PING: 2712 default: 2713 break; 2714 } 2715 } 2716 2717 err = event_enable_timer__process(rec->evlist->eet); 2718 if (err < 0) 2719 goto out_child; 2720 if (err) { 2721 err = 0; 2722 done = 1; 2723 } 2724 2725 /* 2726 * When perf is starting the traced process, at the end events 2727 * die with the process and we wait for that. Thus no need to 2728 * disable events in this case. 2729 */ 2730 if (done && !disabled && !target__none(&opts->target)) { 2731 trigger_off(&auxtrace_snapshot_trigger); 2732 evlist__disable(rec->evlist); 2733 disabled = true; 2734 } 2735 } 2736 2737 trigger_off(&auxtrace_snapshot_trigger); 2738 trigger_off(&switch_output_trigger); 2739 2740 if (opts->auxtrace_snapshot_on_exit) 2741 record__auxtrace_snapshot_exit(rec); 2742 2743 if (forks && workload_exec_errno) { 2744 char msg[STRERR_BUFSIZE], strevsels[2048]; 2745 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2746 2747 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2748 2749 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2750 strevsels, argv[0], emsg); 2751 err = -1; 2752 goto out_child; 2753 } 2754 2755 if (!quiet) 2756 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2757 record__waking(rec)); 2758 2759 write_finished_init(rec, true); 2760 2761 if (target__none(&rec->opts.target)) 2762 record__synthesize_workload(rec, true); 2763 2764 out_child: 2765 record__stop_threads(rec); 2766 record__mmap_read_all(rec, true); 2767 out_free_threads: 2768 record__free_thread_data(rec); 2769 evlist__finalize_ctlfd(rec->evlist); 2770 record__aio_mmap_read_sync(rec); 2771 2772 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2773 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2774 session->header.env.comp_ratio = ratio + 0.5; 2775 } 2776 2777 if (forks) { 2778 int exit_status; 2779 2780 if (!child_finished) 2781 kill(rec->evlist->workload.pid, SIGTERM); 2782 2783 wait(&exit_status); 2784 2785 if (err < 0) 2786 status = err; 2787 else if (WIFEXITED(exit_status)) 2788 status = WEXITSTATUS(exit_status); 2789 else if (WIFSIGNALED(exit_status)) 2790 signr = WTERMSIG(exit_status); 2791 } else 2792 status = err; 2793 2794 if (rec->off_cpu) 2795 rec->bytes_written += off_cpu_write(rec->session); 2796 2797 record__read_lost_samples(rec); 2798 record__synthesize(rec, true); 2799 /* this will be recalculated during process_buildids() */ 2800 rec->samples = 0; 2801 2802 if (!err) { 2803 if (!rec->timestamp_filename) { 2804 record__finish_output(rec); 2805 } else { 2806 fd = record__switch_output(rec, true); 2807 if (fd < 0) { 2808 status = fd; 2809 goto out_delete_session; 2810 } 2811 } 2812 } 2813 2814 perf_hooks__invoke_record_end(); 2815 2816 if (!err && !quiet) { 2817 char samples[128]; 2818 const char *postfix = rec->timestamp_filename ? 2819 ".<timestamp>" : ""; 2820 2821 if (rec->samples && !rec->opts.full_auxtrace) 2822 scnprintf(samples, sizeof(samples), 2823 " (%" PRIu64 " samples)", rec->samples); 2824 else 2825 samples[0] = '\0'; 2826 2827 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2828 perf_data__size(data) / 1024.0 / 1024.0, 2829 data->path, postfix, samples); 2830 if (ratio) { 2831 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2832 rec->session->bytes_transferred / 1024.0 / 1024.0, 2833 ratio); 2834 } 2835 fprintf(stderr, " ]\n"); 2836 } 2837 2838 out_delete_session: 2839 #ifdef HAVE_EVENTFD_SUPPORT 2840 if (done_fd >= 0) { 2841 fd = done_fd; 2842 done_fd = -1; 2843 2844 close(fd); 2845 } 2846 #endif 2847 zstd_fini(&session->zstd_data); 2848 perf_session__delete(session); 2849 2850 if (!opts->no_bpf_event) 2851 evlist__stop_sb_thread(rec->sb_evlist); 2852 return status; 2853 } 2854 2855 static void callchain_debug(struct callchain_param *callchain) 2856 { 2857 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2858 2859 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2860 2861 if (callchain->record_mode == CALLCHAIN_DWARF) 2862 pr_debug("callchain: stack dump size %d\n", 2863 callchain->dump_size); 2864 } 2865 2866 int record_opts__parse_callchain(struct record_opts *record, 2867 struct callchain_param *callchain, 2868 const char *arg, bool unset) 2869 { 2870 int ret; 2871 callchain->enabled = !unset; 2872 2873 /* --no-call-graph */ 2874 if (unset) { 2875 callchain->record_mode = CALLCHAIN_NONE; 2876 pr_debug("callchain: disabled\n"); 2877 return 0; 2878 } 2879 2880 ret = parse_callchain_record_opt(arg, callchain); 2881 if (!ret) { 2882 /* Enable data address sampling for DWARF unwind. */ 2883 if (callchain->record_mode == CALLCHAIN_DWARF) 2884 record->sample_address = true; 2885 callchain_debug(callchain); 2886 } 2887 2888 return ret; 2889 } 2890 2891 int record_parse_callchain_opt(const struct option *opt, 2892 const char *arg, 2893 int unset) 2894 { 2895 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2896 } 2897 2898 int record_callchain_opt(const struct option *opt, 2899 const char *arg __maybe_unused, 2900 int unset __maybe_unused) 2901 { 2902 struct callchain_param *callchain = opt->value; 2903 2904 callchain->enabled = true; 2905 2906 if (callchain->record_mode == CALLCHAIN_NONE) 2907 callchain->record_mode = CALLCHAIN_FP; 2908 2909 callchain_debug(callchain); 2910 return 0; 2911 } 2912 2913 static int perf_record_config(const char *var, const char *value, void *cb) 2914 { 2915 struct record *rec = cb; 2916 2917 if (!strcmp(var, "record.build-id")) { 2918 if (!strcmp(value, "cache")) 2919 rec->no_buildid_cache = false; 2920 else if (!strcmp(value, "no-cache")) 2921 rec->no_buildid_cache = true; 2922 else if (!strcmp(value, "skip")) 2923 rec->no_buildid = true; 2924 else if (!strcmp(value, "mmap")) 2925 rec->buildid_mmap = true; 2926 else 2927 return -1; 2928 return 0; 2929 } 2930 if (!strcmp(var, "record.call-graph")) { 2931 var = "call-graph.record-mode"; 2932 return perf_default_config(var, value, cb); 2933 } 2934 #ifdef HAVE_AIO_SUPPORT 2935 if (!strcmp(var, "record.aio")) { 2936 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2937 if (!rec->opts.nr_cblocks) 2938 rec->opts.nr_cblocks = nr_cblocks_default; 2939 } 2940 #endif 2941 if (!strcmp(var, "record.debuginfod")) { 2942 rec->debuginfod.urls = strdup(value); 2943 if (!rec->debuginfod.urls) 2944 return -ENOMEM; 2945 rec->debuginfod.set = true; 2946 } 2947 2948 return 0; 2949 } 2950 2951 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2952 { 2953 struct record *rec = (struct record *)opt->value; 2954 2955 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2956 } 2957 2958 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2959 { 2960 struct record_opts *opts = (struct record_opts *)opt->value; 2961 2962 if (unset || !str) 2963 return 0; 2964 2965 if (!strcasecmp(str, "node")) 2966 opts->affinity = PERF_AFFINITY_NODE; 2967 else if (!strcasecmp(str, "cpu")) 2968 opts->affinity = PERF_AFFINITY_CPU; 2969 2970 return 0; 2971 } 2972 2973 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2974 { 2975 mask->nbits = nr_bits; 2976 mask->bits = bitmap_zalloc(mask->nbits); 2977 if (!mask->bits) 2978 return -ENOMEM; 2979 2980 return 0; 2981 } 2982 2983 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2984 { 2985 bitmap_free(mask->bits); 2986 mask->nbits = 0; 2987 } 2988 2989 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2990 { 2991 int ret; 2992 2993 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2994 if (ret) { 2995 mask->affinity.bits = NULL; 2996 return ret; 2997 } 2998 2999 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 3000 if (ret) { 3001 record__mmap_cpu_mask_free(&mask->maps); 3002 mask->maps.bits = NULL; 3003 } 3004 3005 return ret; 3006 } 3007 3008 static void record__thread_mask_free(struct thread_mask *mask) 3009 { 3010 record__mmap_cpu_mask_free(&mask->maps); 3011 record__mmap_cpu_mask_free(&mask->affinity); 3012 } 3013 3014 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3015 { 3016 int s; 3017 struct record_opts *opts = opt->value; 3018 3019 if (unset || !str || !strlen(str)) { 3020 opts->threads_spec = THREAD_SPEC__CPU; 3021 } else { 3022 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3023 if (s == THREAD_SPEC__USER) { 3024 opts->threads_user_spec = strdup(str); 3025 if (!opts->threads_user_spec) 3026 return -ENOMEM; 3027 opts->threads_spec = THREAD_SPEC__USER; 3028 break; 3029 } 3030 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3031 opts->threads_spec = s; 3032 break; 3033 } 3034 } 3035 } 3036 3037 if (opts->threads_spec == THREAD_SPEC__USER) 3038 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3039 else 3040 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3041 3042 return 0; 3043 } 3044 3045 static int parse_output_max_size(const struct option *opt, 3046 const char *str, int unset) 3047 { 3048 unsigned long *s = (unsigned long *)opt->value; 3049 static struct parse_tag tags_size[] = { 3050 { .tag = 'B', .mult = 1 }, 3051 { .tag = 'K', .mult = 1 << 10 }, 3052 { .tag = 'M', .mult = 1 << 20 }, 3053 { .tag = 'G', .mult = 1 << 30 }, 3054 { .tag = 0 }, 3055 }; 3056 unsigned long val; 3057 3058 if (unset) { 3059 *s = 0; 3060 return 0; 3061 } 3062 3063 val = parse_tag_value(str, tags_size); 3064 if (val != (unsigned long) -1) { 3065 *s = val; 3066 return 0; 3067 } 3068 3069 return -1; 3070 } 3071 3072 static int record__parse_mmap_pages(const struct option *opt, 3073 const char *str, 3074 int unset __maybe_unused) 3075 { 3076 struct record_opts *opts = opt->value; 3077 char *s, *p; 3078 unsigned int mmap_pages; 3079 int ret; 3080 3081 if (!str) 3082 return -EINVAL; 3083 3084 s = strdup(str); 3085 if (!s) 3086 return -ENOMEM; 3087 3088 p = strchr(s, ','); 3089 if (p) 3090 *p = '\0'; 3091 3092 if (*s) { 3093 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3094 if (ret) 3095 goto out_free; 3096 opts->mmap_pages = mmap_pages; 3097 } 3098 3099 if (!p) { 3100 ret = 0; 3101 goto out_free; 3102 } 3103 3104 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3105 if (ret) 3106 goto out_free; 3107 3108 opts->auxtrace_mmap_pages = mmap_pages; 3109 3110 out_free: 3111 free(s); 3112 return ret; 3113 } 3114 3115 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3116 { 3117 } 3118 3119 static int parse_control_option(const struct option *opt, 3120 const char *str, 3121 int unset __maybe_unused) 3122 { 3123 struct record_opts *opts = opt->value; 3124 3125 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3126 } 3127 3128 static void switch_output_size_warn(struct record *rec) 3129 { 3130 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3131 struct switch_output *s = &rec->switch_output; 3132 3133 wakeup_size /= 2; 3134 3135 if (s->size < wakeup_size) { 3136 char buf[100]; 3137 3138 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3139 pr_warning("WARNING: switch-output data size lower than " 3140 "wakeup kernel buffer size (%s) " 3141 "expect bigger perf.data sizes\n", buf); 3142 } 3143 } 3144 3145 static int switch_output_setup(struct record *rec) 3146 { 3147 struct switch_output *s = &rec->switch_output; 3148 static struct parse_tag tags_size[] = { 3149 { .tag = 'B', .mult = 1 }, 3150 { .tag = 'K', .mult = 1 << 10 }, 3151 { .tag = 'M', .mult = 1 << 20 }, 3152 { .tag = 'G', .mult = 1 << 30 }, 3153 { .tag = 0 }, 3154 }; 3155 static struct parse_tag tags_time[] = { 3156 { .tag = 's', .mult = 1 }, 3157 { .tag = 'm', .mult = 60 }, 3158 { .tag = 'h', .mult = 60*60 }, 3159 { .tag = 'd', .mult = 60*60*24 }, 3160 { .tag = 0 }, 3161 }; 3162 unsigned long val; 3163 3164 /* 3165 * If we're using --switch-output-events, then we imply its 3166 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3167 * thread to its parent. 3168 */ 3169 if (rec->switch_output_event_set) { 3170 if (record__threads_enabled(rec)) { 3171 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3172 return 0; 3173 } 3174 goto do_signal; 3175 } 3176 3177 if (!s->set) 3178 return 0; 3179 3180 if (record__threads_enabled(rec)) { 3181 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3182 return 0; 3183 } 3184 3185 if (!strcmp(s->str, "signal")) { 3186 do_signal: 3187 s->signal = true; 3188 pr_debug("switch-output with SIGUSR2 signal\n"); 3189 goto enabled; 3190 } 3191 3192 val = parse_tag_value(s->str, tags_size); 3193 if (val != (unsigned long) -1) { 3194 s->size = val; 3195 pr_debug("switch-output with %s size threshold\n", s->str); 3196 goto enabled; 3197 } 3198 3199 val = parse_tag_value(s->str, tags_time); 3200 if (val != (unsigned long) -1) { 3201 s->time = val; 3202 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3203 s->str, s->time); 3204 goto enabled; 3205 } 3206 3207 return -1; 3208 3209 enabled: 3210 rec->timestamp_filename = true; 3211 s->enabled = true; 3212 3213 if (s->size && !rec->opts.no_buffering) 3214 switch_output_size_warn(rec); 3215 3216 return 0; 3217 } 3218 3219 static const char * const __record_usage[] = { 3220 "perf record [<options>] [<command>]", 3221 "perf record [<options>] -- <command> [<options>]", 3222 NULL 3223 }; 3224 const char * const *record_usage = __record_usage; 3225 3226 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3227 struct perf_sample *sample, struct machine *machine) 3228 { 3229 /* 3230 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3231 * no need to add them twice. 3232 */ 3233 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3234 return 0; 3235 return perf_event__process_mmap(tool, event, sample, machine); 3236 } 3237 3238 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3239 struct perf_sample *sample, struct machine *machine) 3240 { 3241 /* 3242 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3243 * no need to add them twice. 3244 */ 3245 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3246 return 0; 3247 3248 return perf_event__process_mmap2(tool, event, sample, machine); 3249 } 3250 3251 static int process_timestamp_boundary(struct perf_tool *tool, 3252 union perf_event *event __maybe_unused, 3253 struct perf_sample *sample, 3254 struct machine *machine __maybe_unused) 3255 { 3256 struct record *rec = container_of(tool, struct record, tool); 3257 3258 set_timestamp_boundary(rec, sample->time); 3259 return 0; 3260 } 3261 3262 static int parse_record_synth_option(const struct option *opt, 3263 const char *str, 3264 int unset __maybe_unused) 3265 { 3266 struct record_opts *opts = opt->value; 3267 char *p = strdup(str); 3268 3269 if (p == NULL) 3270 return -1; 3271 3272 opts->synth = parse_synth_opt(p); 3273 free(p); 3274 3275 if (opts->synth < 0) { 3276 pr_err("Invalid synth option: %s\n", str); 3277 return -1; 3278 } 3279 return 0; 3280 } 3281 3282 /* 3283 * XXX Ideally would be local to cmd_record() and passed to a record__new 3284 * because we need to have access to it in record__exit, that is called 3285 * after cmd_record() exits, but since record_options need to be accessible to 3286 * builtin-script, leave it here. 3287 * 3288 * At least we don't ouch it in all the other functions here directly. 3289 * 3290 * Just say no to tons of global variables, sigh. 3291 */ 3292 static struct record record = { 3293 .opts = { 3294 .sample_time = true, 3295 .mmap_pages = UINT_MAX, 3296 .user_freq = UINT_MAX, 3297 .user_interval = ULLONG_MAX, 3298 .freq = 4000, 3299 .target = { 3300 .uses_mmap = true, 3301 .default_per_cpu = true, 3302 }, 3303 .mmap_flush = MMAP_FLUSH_DEFAULT, 3304 .nr_threads_synthesize = 1, 3305 .ctl_fd = -1, 3306 .ctl_fd_ack = -1, 3307 .synth = PERF_SYNTH_ALL, 3308 }, 3309 .tool = { 3310 .sample = process_sample_event, 3311 .fork = perf_event__process_fork, 3312 .exit = perf_event__process_exit, 3313 .comm = perf_event__process_comm, 3314 .namespaces = perf_event__process_namespaces, 3315 .mmap = build_id__process_mmap, 3316 .mmap2 = build_id__process_mmap2, 3317 .itrace_start = process_timestamp_boundary, 3318 .aux = process_timestamp_boundary, 3319 .ordered_events = true, 3320 }, 3321 }; 3322 3323 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3324 "\n\t\t\t\tDefault: fp"; 3325 3326 static bool dry_run; 3327 3328 /* 3329 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3330 * with it and switch to use the library functions in perf_evlist that came 3331 * from builtin-record.c, i.e. use record_opts, 3332 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3333 * using pipes, etc. 3334 */ 3335 static struct option __record_options[] = { 3336 OPT_CALLBACK('e', "event", &record.evlist, "event", 3337 "event selector. use 'perf list' to list available events", 3338 parse_events_option), 3339 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3340 "event filter", parse_filter), 3341 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3342 NULL, "don't record events from perf itself", 3343 exclude_perf), 3344 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3345 "record events on existing process id"), 3346 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3347 "record events on existing thread id"), 3348 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3349 "collect data with this RT SCHED_FIFO priority"), 3350 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3351 "collect data without buffering"), 3352 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3353 "collect raw sample records from all opened counters"), 3354 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3355 "system-wide collection from all CPUs"), 3356 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3357 "list of cpus to monitor"), 3358 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3359 OPT_STRING('o', "output", &record.data.path, "file", 3360 "output file name"), 3361 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3362 &record.opts.no_inherit_set, 3363 "child tasks do not inherit counters"), 3364 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3365 "synthesize non-sample events at the end of output"), 3366 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3367 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3368 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3369 "Fail if the specified frequency can't be used"), 3370 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3371 "profile at this frequency", 3372 record__parse_freq), 3373 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3374 "number of mmap data pages and AUX area tracing mmap pages", 3375 record__parse_mmap_pages), 3376 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3377 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3378 record__mmap_flush_parse), 3379 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3380 NULL, "enables call-graph recording" , 3381 &record_callchain_opt), 3382 OPT_CALLBACK(0, "call-graph", &record.opts, 3383 "record_mode[,record_size]", record_callchain_help, 3384 &record_parse_callchain_opt), 3385 OPT_INCR('v', "verbose", &verbose, 3386 "be more verbose (show counter open errors, etc)"), 3387 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3388 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3389 "per thread counts"), 3390 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3391 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3392 "Record the sample physical addresses"), 3393 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3394 "Record the sampled data address data page size"), 3395 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3396 "Record the sampled code address (ip) page size"), 3397 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3398 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3399 "Record the sample identifier"), 3400 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3401 &record.opts.sample_time_set, 3402 "Record the sample timestamps"), 3403 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3404 "Record the sample period"), 3405 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3406 "don't sample"), 3407 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3408 &record.no_buildid_cache_set, 3409 "do not update the buildid cache"), 3410 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3411 &record.no_buildid_set, 3412 "do not collect buildids in perf.data"), 3413 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3414 "monitor event in cgroup name only", 3415 parse_cgroups), 3416 OPT_CALLBACK('D', "delay", &record, "ms", 3417 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3418 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3419 record__parse_event_enable_time), 3420 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3421 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3422 "user to profile"), 3423 3424 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3425 "branch any", "sample any taken branches", 3426 parse_branch_stack), 3427 3428 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3429 "branch filter mask", "branch stack filter modes", 3430 parse_branch_stack), 3431 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3432 "sample by weight (on special events only)"), 3433 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3434 "sample transaction flags (special events only)"), 3435 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3436 "use per-thread mmaps"), 3437 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3438 "sample selected machine registers on interrupt," 3439 " use '-I?' to list register names", parse_intr_regs), 3440 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3441 "sample selected machine registers on interrupt," 3442 " use '--user-regs=?' to list register names", parse_user_regs), 3443 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3444 "Record running/enabled time of read (:S) events"), 3445 OPT_CALLBACK('k', "clockid", &record.opts, 3446 "clockid", "clockid to use for events, see clock_gettime()", 3447 parse_clockid), 3448 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3449 "opts", "AUX area tracing Snapshot Mode", ""), 3450 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3451 "opts", "sample AUX area", ""), 3452 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3453 "per thread proc mmap processing timeout in ms"), 3454 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3455 "Record namespaces events"), 3456 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3457 "Record cgroup events"), 3458 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3459 &record.opts.record_switch_events_set, 3460 "Record context switch events"), 3461 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3462 "Configure all used events to run in kernel space.", 3463 PARSE_OPT_EXCLUSIVE), 3464 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3465 "Configure all used events to run in user space.", 3466 PARSE_OPT_EXCLUSIVE), 3467 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3468 "collect kernel callchains"), 3469 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3470 "collect user callchains"), 3471 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3472 "clang binary to use for compiling BPF scriptlets"), 3473 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3474 "options passed to clang when compiling BPF scriptlets"), 3475 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3476 "file", "vmlinux pathname"), 3477 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3478 "Record build-id of all DSOs regardless of hits"), 3479 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3480 "Record build-id in map events"), 3481 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3482 "append timestamp to output filename"), 3483 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3484 "Record timestamp boundary (time of first/last samples)"), 3485 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3486 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3487 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3488 "signal"), 3489 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3490 "switch output event selector. use 'perf list' to list available events", 3491 parse_events_option_new_evlist), 3492 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3493 "Limit number of switch output generated files"), 3494 OPT_BOOLEAN(0, "dry-run", &dry_run, 3495 "Parse options then exit"), 3496 #ifdef HAVE_AIO_SUPPORT 3497 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3498 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3499 record__aio_parse), 3500 #endif 3501 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3502 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3503 record__parse_affinity), 3504 #ifdef HAVE_ZSTD_SUPPORT 3505 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3506 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3507 record__parse_comp_level), 3508 #endif 3509 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3510 "size", "Limit the maximum size of the output file", parse_output_max_size), 3511 OPT_UINTEGER(0, "num-thread-synthesize", 3512 &record.opts.nr_threads_synthesize, 3513 "number of threads to run for event synthesis"), 3514 #ifdef HAVE_LIBPFM 3515 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3516 "libpfm4 event selector. use 'perf list' to list available events", 3517 parse_libpfm_events_option), 3518 #endif 3519 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3520 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3521 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3522 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3523 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3524 parse_control_option), 3525 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3526 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3527 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3528 &record.debuginfod.set, "debuginfod urls", 3529 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3530 "system"), 3531 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3532 "write collected trace data into several data files using parallel threads", 3533 record__parse_threads), 3534 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3535 OPT_END() 3536 }; 3537 3538 struct option *record_options = __record_options; 3539 3540 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3541 { 3542 struct perf_cpu cpu; 3543 int idx; 3544 3545 if (cpu_map__is_dummy(cpus)) 3546 return 0; 3547 3548 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3549 if (cpu.cpu == -1) 3550 continue; 3551 /* Return ENODEV is input cpu is greater than max cpu */ 3552 if ((unsigned long)cpu.cpu > mask->nbits) 3553 return -ENODEV; 3554 __set_bit(cpu.cpu, mask->bits); 3555 } 3556 3557 return 0; 3558 } 3559 3560 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3561 { 3562 struct perf_cpu_map *cpus; 3563 3564 cpus = perf_cpu_map__new(mask_spec); 3565 if (!cpus) 3566 return -ENOMEM; 3567 3568 bitmap_zero(mask->bits, mask->nbits); 3569 if (record__mmap_cpu_mask_init(mask, cpus)) 3570 return -ENODEV; 3571 3572 perf_cpu_map__put(cpus); 3573 3574 return 0; 3575 } 3576 3577 static void record__free_thread_masks(struct record *rec, int nr_threads) 3578 { 3579 int t; 3580 3581 if (rec->thread_masks) 3582 for (t = 0; t < nr_threads; t++) 3583 record__thread_mask_free(&rec->thread_masks[t]); 3584 3585 zfree(&rec->thread_masks); 3586 } 3587 3588 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3589 { 3590 int t, ret; 3591 3592 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3593 if (!rec->thread_masks) { 3594 pr_err("Failed to allocate thread masks\n"); 3595 return -ENOMEM; 3596 } 3597 3598 for (t = 0; t < nr_threads; t++) { 3599 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3600 if (ret) { 3601 pr_err("Failed to allocate thread masks[%d]\n", t); 3602 goto out_free; 3603 } 3604 } 3605 3606 return 0; 3607 3608 out_free: 3609 record__free_thread_masks(rec, nr_threads); 3610 3611 return ret; 3612 } 3613 3614 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3615 { 3616 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3617 3618 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3619 if (ret) 3620 return ret; 3621 3622 rec->nr_threads = nr_cpus; 3623 pr_debug("nr_threads: %d\n", rec->nr_threads); 3624 3625 for (t = 0; t < rec->nr_threads; t++) { 3626 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3627 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3628 if (verbose > 0) { 3629 pr_debug("thread_masks[%d]: ", t); 3630 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3631 pr_debug("thread_masks[%d]: ", t); 3632 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3633 } 3634 } 3635 3636 return 0; 3637 } 3638 3639 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3640 const char **maps_spec, const char **affinity_spec, 3641 u32 nr_spec) 3642 { 3643 u32 s; 3644 int ret = 0, t = 0; 3645 struct mmap_cpu_mask cpus_mask; 3646 struct thread_mask thread_mask, full_mask, *thread_masks; 3647 3648 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3649 if (ret) { 3650 pr_err("Failed to allocate CPUs mask\n"); 3651 return ret; 3652 } 3653 3654 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3655 if (ret) { 3656 pr_err("Failed to init cpu mask\n"); 3657 goto out_free_cpu_mask; 3658 } 3659 3660 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3661 if (ret) { 3662 pr_err("Failed to allocate full mask\n"); 3663 goto out_free_cpu_mask; 3664 } 3665 3666 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3667 if (ret) { 3668 pr_err("Failed to allocate thread mask\n"); 3669 goto out_free_full_and_cpu_masks; 3670 } 3671 3672 for (s = 0; s < nr_spec; s++) { 3673 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3674 if (ret) { 3675 pr_err("Failed to initialize maps thread mask\n"); 3676 goto out_free; 3677 } 3678 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3679 if (ret) { 3680 pr_err("Failed to initialize affinity thread mask\n"); 3681 goto out_free; 3682 } 3683 3684 /* ignore invalid CPUs but do not allow empty masks */ 3685 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3686 cpus_mask.bits, thread_mask.maps.nbits)) { 3687 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3688 ret = -EINVAL; 3689 goto out_free; 3690 } 3691 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3692 cpus_mask.bits, thread_mask.affinity.nbits)) { 3693 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3694 ret = -EINVAL; 3695 goto out_free; 3696 } 3697 3698 /* do not allow intersection with other masks (full_mask) */ 3699 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3700 thread_mask.maps.nbits)) { 3701 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3702 ret = -EINVAL; 3703 goto out_free; 3704 } 3705 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3706 thread_mask.affinity.nbits)) { 3707 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3708 ret = -EINVAL; 3709 goto out_free; 3710 } 3711 3712 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3713 thread_mask.maps.bits, full_mask.maps.nbits); 3714 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3715 thread_mask.affinity.bits, full_mask.maps.nbits); 3716 3717 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3718 if (!thread_masks) { 3719 pr_err("Failed to reallocate thread masks\n"); 3720 ret = -ENOMEM; 3721 goto out_free; 3722 } 3723 rec->thread_masks = thread_masks; 3724 rec->thread_masks[t] = thread_mask; 3725 if (verbose > 0) { 3726 pr_debug("thread_masks[%d]: ", t); 3727 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3728 pr_debug("thread_masks[%d]: ", t); 3729 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3730 } 3731 t++; 3732 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3733 if (ret) { 3734 pr_err("Failed to allocate thread mask\n"); 3735 goto out_free_full_and_cpu_masks; 3736 } 3737 } 3738 rec->nr_threads = t; 3739 pr_debug("nr_threads: %d\n", rec->nr_threads); 3740 if (!rec->nr_threads) 3741 ret = -EINVAL; 3742 3743 out_free: 3744 record__thread_mask_free(&thread_mask); 3745 out_free_full_and_cpu_masks: 3746 record__thread_mask_free(&full_mask); 3747 out_free_cpu_mask: 3748 record__mmap_cpu_mask_free(&cpus_mask); 3749 3750 return ret; 3751 } 3752 3753 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3754 { 3755 int ret; 3756 struct cpu_topology *topo; 3757 3758 topo = cpu_topology__new(); 3759 if (!topo) { 3760 pr_err("Failed to allocate CPU topology\n"); 3761 return -ENOMEM; 3762 } 3763 3764 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3765 topo->core_cpus_list, topo->core_cpus_lists); 3766 cpu_topology__delete(topo); 3767 3768 return ret; 3769 } 3770 3771 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3772 { 3773 int ret; 3774 struct cpu_topology *topo; 3775 3776 topo = cpu_topology__new(); 3777 if (!topo) { 3778 pr_err("Failed to allocate CPU topology\n"); 3779 return -ENOMEM; 3780 } 3781 3782 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3783 topo->package_cpus_list, topo->package_cpus_lists); 3784 cpu_topology__delete(topo); 3785 3786 return ret; 3787 } 3788 3789 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3790 { 3791 u32 s; 3792 int ret; 3793 const char **spec; 3794 struct numa_topology *topo; 3795 3796 topo = numa_topology__new(); 3797 if (!topo) { 3798 pr_err("Failed to allocate NUMA topology\n"); 3799 return -ENOMEM; 3800 } 3801 3802 spec = zalloc(topo->nr * sizeof(char *)); 3803 if (!spec) { 3804 pr_err("Failed to allocate NUMA spec\n"); 3805 ret = -ENOMEM; 3806 goto out_delete_topo; 3807 } 3808 for (s = 0; s < topo->nr; s++) 3809 spec[s] = topo->nodes[s].cpus; 3810 3811 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3812 3813 zfree(&spec); 3814 3815 out_delete_topo: 3816 numa_topology__delete(topo); 3817 3818 return ret; 3819 } 3820 3821 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3822 { 3823 int t, ret; 3824 u32 s, nr_spec = 0; 3825 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3826 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3827 3828 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3829 spec = strtok_r(user_spec, ":", &spec_ptr); 3830 if (spec == NULL) 3831 break; 3832 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3833 mask = strtok_r(spec, "/", &mask_ptr); 3834 if (mask == NULL) 3835 break; 3836 pr_debug2(" maps mask: %s\n", mask); 3837 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3838 if (!tmp_spec) { 3839 pr_err("Failed to reallocate maps spec\n"); 3840 ret = -ENOMEM; 3841 goto out_free; 3842 } 3843 maps_spec = tmp_spec; 3844 maps_spec[nr_spec] = dup_mask = strdup(mask); 3845 if (!maps_spec[nr_spec]) { 3846 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3847 ret = -ENOMEM; 3848 goto out_free; 3849 } 3850 mask = strtok_r(NULL, "/", &mask_ptr); 3851 if (mask == NULL) { 3852 pr_err("Invalid thread maps or affinity specs\n"); 3853 ret = -EINVAL; 3854 goto out_free; 3855 } 3856 pr_debug2(" affinity mask: %s\n", mask); 3857 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3858 if (!tmp_spec) { 3859 pr_err("Failed to reallocate affinity spec\n"); 3860 ret = -ENOMEM; 3861 goto out_free; 3862 } 3863 affinity_spec = tmp_spec; 3864 affinity_spec[nr_spec] = strdup(mask); 3865 if (!affinity_spec[nr_spec]) { 3866 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3867 ret = -ENOMEM; 3868 goto out_free; 3869 } 3870 dup_mask = NULL; 3871 nr_spec++; 3872 } 3873 3874 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3875 (const char **)affinity_spec, nr_spec); 3876 3877 out_free: 3878 free(dup_mask); 3879 for (s = 0; s < nr_spec; s++) { 3880 if (maps_spec) 3881 free(maps_spec[s]); 3882 if (affinity_spec) 3883 free(affinity_spec[s]); 3884 } 3885 free(affinity_spec); 3886 free(maps_spec); 3887 3888 return ret; 3889 } 3890 3891 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3892 { 3893 int ret; 3894 3895 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3896 if (ret) 3897 return ret; 3898 3899 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3900 return -ENODEV; 3901 3902 rec->nr_threads = 1; 3903 3904 return 0; 3905 } 3906 3907 static int record__init_thread_masks(struct record *rec) 3908 { 3909 int ret = 0; 3910 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3911 3912 if (!record__threads_enabled(rec)) 3913 return record__init_thread_default_masks(rec, cpus); 3914 3915 if (evlist__per_thread(rec->evlist)) { 3916 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3917 return -EINVAL; 3918 } 3919 3920 switch (rec->opts.threads_spec) { 3921 case THREAD_SPEC__CPU: 3922 ret = record__init_thread_cpu_masks(rec, cpus); 3923 break; 3924 case THREAD_SPEC__CORE: 3925 ret = record__init_thread_core_masks(rec, cpus); 3926 break; 3927 case THREAD_SPEC__PACKAGE: 3928 ret = record__init_thread_package_masks(rec, cpus); 3929 break; 3930 case THREAD_SPEC__NUMA: 3931 ret = record__init_thread_numa_masks(rec, cpus); 3932 break; 3933 case THREAD_SPEC__USER: 3934 ret = record__init_thread_user_masks(rec, cpus); 3935 break; 3936 default: 3937 break; 3938 } 3939 3940 return ret; 3941 } 3942 3943 int cmd_record(int argc, const char **argv) 3944 { 3945 int err; 3946 struct record *rec = &record; 3947 char errbuf[BUFSIZ]; 3948 3949 setlocale(LC_ALL, ""); 3950 3951 #ifndef HAVE_LIBBPF_SUPPORT 3952 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3953 set_nobuild('\0', "clang-path", true); 3954 set_nobuild('\0', "clang-opt", true); 3955 # undef set_nobuild 3956 #endif 3957 3958 #ifndef HAVE_BPF_PROLOGUE 3959 # if !defined (HAVE_DWARF_SUPPORT) 3960 # define REASON "NO_DWARF=1" 3961 # elif !defined (HAVE_LIBBPF_SUPPORT) 3962 # define REASON "NO_LIBBPF=1" 3963 # else 3964 # define REASON "this architecture doesn't support BPF prologue" 3965 # endif 3966 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3967 set_nobuild('\0', "vmlinux", true); 3968 # undef set_nobuild 3969 # undef REASON 3970 #endif 3971 3972 #ifndef HAVE_BPF_SKEL 3973 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3974 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3975 # undef set_nobuild 3976 #endif 3977 3978 rec->opts.affinity = PERF_AFFINITY_SYS; 3979 3980 rec->evlist = evlist__new(); 3981 if (rec->evlist == NULL) 3982 return -ENOMEM; 3983 3984 err = perf_config(perf_record_config, rec); 3985 if (err) 3986 return err; 3987 3988 argc = parse_options(argc, argv, record_options, record_usage, 3989 PARSE_OPT_STOP_AT_NON_OPTION); 3990 if (quiet) 3991 perf_quiet_option(); 3992 3993 err = symbol__validate_sym_arguments(); 3994 if (err) 3995 return err; 3996 3997 perf_debuginfod_setup(&record.debuginfod); 3998 3999 /* Make system wide (-a) the default target. */ 4000 if (!argc && target__none(&rec->opts.target)) 4001 rec->opts.target.system_wide = true; 4002 4003 if (nr_cgroups && !rec->opts.target.system_wide) { 4004 usage_with_options_msg(record_usage, record_options, 4005 "cgroup monitoring only available in system-wide mode"); 4006 4007 } 4008 4009 if (rec->buildid_mmap) { 4010 if (!perf_can_record_build_id()) { 4011 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 4012 err = -EINVAL; 4013 goto out_opts; 4014 } 4015 pr_debug("Enabling build id in mmap2 events.\n"); 4016 /* Enable mmap build id synthesizing. */ 4017 symbol_conf.buildid_mmap2 = true; 4018 /* Enable perf_event_attr::build_id bit. */ 4019 rec->opts.build_id = true; 4020 /* Disable build id cache. */ 4021 rec->no_buildid = true; 4022 } 4023 4024 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4025 pr_err("Kernel has no cgroup sampling support.\n"); 4026 err = -EINVAL; 4027 goto out_opts; 4028 } 4029 4030 if (rec->opts.kcore) 4031 rec->opts.text_poke = true; 4032 4033 if (rec->opts.kcore || record__threads_enabled(rec)) 4034 rec->data.is_dir = true; 4035 4036 if (record__threads_enabled(rec)) { 4037 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4038 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4039 goto out_opts; 4040 } 4041 if (record__aio_enabled(rec)) { 4042 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4043 goto out_opts; 4044 } 4045 } 4046 4047 if (rec->opts.comp_level != 0) { 4048 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4049 rec->no_buildid = true; 4050 } 4051 4052 if (rec->opts.record_switch_events && 4053 !perf_can_record_switch_events()) { 4054 ui__error("kernel does not support recording context switch events\n"); 4055 parse_options_usage(record_usage, record_options, "switch-events", 0); 4056 err = -EINVAL; 4057 goto out_opts; 4058 } 4059 4060 if (switch_output_setup(rec)) { 4061 parse_options_usage(record_usage, record_options, "switch-output", 0); 4062 err = -EINVAL; 4063 goto out_opts; 4064 } 4065 4066 if (rec->switch_output.time) { 4067 signal(SIGALRM, alarm_sig_handler); 4068 alarm(rec->switch_output.time); 4069 } 4070 4071 if (rec->switch_output.num_files) { 4072 rec->switch_output.filenames = calloc(sizeof(char *), 4073 rec->switch_output.num_files); 4074 if (!rec->switch_output.filenames) { 4075 err = -EINVAL; 4076 goto out_opts; 4077 } 4078 } 4079 4080 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4081 rec->timestamp_filename = false; 4082 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4083 } 4084 4085 /* 4086 * Allow aliases to facilitate the lookup of symbols for address 4087 * filters. Refer to auxtrace_parse_filters(). 4088 */ 4089 symbol_conf.allow_aliases = true; 4090 4091 symbol__init(NULL); 4092 4093 err = record__auxtrace_init(rec); 4094 if (err) 4095 goto out; 4096 4097 if (dry_run) 4098 goto out; 4099 4100 err = bpf__setup_stdout(rec->evlist); 4101 if (err) { 4102 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 4103 pr_err("ERROR: Setup BPF stdout failed: %s\n", 4104 errbuf); 4105 goto out; 4106 } 4107 4108 err = -ENOMEM; 4109 4110 if (rec->no_buildid_cache || rec->no_buildid) { 4111 disable_buildid_cache(); 4112 } else if (rec->switch_output.enabled) { 4113 /* 4114 * In 'perf record --switch-output', disable buildid 4115 * generation by default to reduce data file switching 4116 * overhead. Still generate buildid if they are required 4117 * explicitly using 4118 * 4119 * perf record --switch-output --no-no-buildid \ 4120 * --no-no-buildid-cache 4121 * 4122 * Following code equals to: 4123 * 4124 * if ((rec->no_buildid || !rec->no_buildid_set) && 4125 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4126 * disable_buildid_cache(); 4127 */ 4128 bool disable = true; 4129 4130 if (rec->no_buildid_set && !rec->no_buildid) 4131 disable = false; 4132 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4133 disable = false; 4134 if (disable) { 4135 rec->no_buildid = true; 4136 rec->no_buildid_cache = true; 4137 disable_buildid_cache(); 4138 } 4139 } 4140 4141 if (record.opts.overwrite) 4142 record.opts.tail_synthesize = true; 4143 4144 if (rec->evlist->core.nr_entries == 0) { 4145 if (perf_pmu__has_hybrid()) { 4146 err = evlist__add_default_hybrid(rec->evlist, 4147 !record.opts.no_samples); 4148 } else { 4149 err = __evlist__add_default(rec->evlist, 4150 !record.opts.no_samples); 4151 } 4152 4153 if (err < 0) { 4154 pr_err("Not enough memory for event selector list\n"); 4155 goto out; 4156 } 4157 } 4158 4159 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4160 rec->opts.no_inherit = true; 4161 4162 err = target__validate(&rec->opts.target); 4163 if (err) { 4164 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4165 ui__warning("%s\n", errbuf); 4166 } 4167 4168 err = target__parse_uid(&rec->opts.target); 4169 if (err) { 4170 int saved_errno = errno; 4171 4172 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4173 ui__error("%s", errbuf); 4174 4175 err = -saved_errno; 4176 goto out; 4177 } 4178 4179 /* Enable ignoring missing threads when -u/-p option is defined. */ 4180 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4181 4182 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 4183 pr_err("failed to use cpu list %s\n", 4184 rec->opts.target.cpu_list); 4185 goto out; 4186 } 4187 4188 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 4189 4190 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4191 arch__add_leaf_frame_record_opts(&rec->opts); 4192 4193 err = -ENOMEM; 4194 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4195 if (rec->opts.target.pid != NULL) { 4196 pr_err("Couldn't create thread/CPU maps: %s\n", 4197 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4198 goto out; 4199 } 4200 else 4201 usage_with_options(record_usage, record_options); 4202 } 4203 4204 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4205 if (err) 4206 goto out; 4207 4208 /* 4209 * We take all buildids when the file contains 4210 * AUX area tracing data because we do not decode the 4211 * trace because it would take too long. 4212 */ 4213 if (rec->opts.full_auxtrace) 4214 rec->buildid_all = true; 4215 4216 if (rec->opts.text_poke) { 4217 err = record__config_text_poke(rec->evlist); 4218 if (err) { 4219 pr_err("record__config_text_poke failed, error %d\n", err); 4220 goto out; 4221 } 4222 } 4223 4224 if (rec->off_cpu) { 4225 err = record__config_off_cpu(rec); 4226 if (err) { 4227 pr_err("record__config_off_cpu failed, error %d\n", err); 4228 goto out; 4229 } 4230 } 4231 4232 if (record_opts__config(&rec->opts)) { 4233 err = -EINVAL; 4234 goto out; 4235 } 4236 4237 err = record__init_thread_masks(rec); 4238 if (err) { 4239 pr_err("Failed to initialize parallel data streaming masks\n"); 4240 goto out; 4241 } 4242 4243 if (rec->opts.nr_cblocks > nr_cblocks_max) 4244 rec->opts.nr_cblocks = nr_cblocks_max; 4245 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4246 4247 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4248 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4249 4250 if (rec->opts.comp_level > comp_level_max) 4251 rec->opts.comp_level = comp_level_max; 4252 pr_debug("comp level: %d\n", rec->opts.comp_level); 4253 4254 err = __cmd_record(&record, argc, argv); 4255 out: 4256 evlist__delete(rec->evlist); 4257 symbol__exit(); 4258 auxtrace_record__free(rec->itr); 4259 out_opts: 4260 record__free_thread_masks(rec, rec->nr_threads); 4261 rec->nr_threads = 0; 4262 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4263 return err; 4264 } 4265 4266 static void snapshot_sig_handler(int sig __maybe_unused) 4267 { 4268 struct record *rec = &record; 4269 4270 hit_auxtrace_snapshot_trigger(rec); 4271 4272 if (switch_output_signal(rec)) 4273 trigger_hit(&switch_output_trigger); 4274 } 4275 4276 static void alarm_sig_handler(int sig __maybe_unused) 4277 { 4278 struct record *rec = &record; 4279 4280 if (switch_output_time(rec)) 4281 trigger_hit(&switch_output_trigger); 4282 } 4283