1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/pmu.h" 50 #include "util/pmus.h" 51 #include "util/clockid.h" 52 #include "util/off_cpu.h" 53 #include "util/bpf-filter.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 u64 thread_bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool switch_output_event_set; 165 bool no_buildid; 166 bool no_buildid_set; 167 bool no_buildid_cache; 168 bool no_buildid_cache_set; 169 bool buildid_all; 170 bool buildid_mmap; 171 bool timestamp_filename; 172 bool timestamp_boundary; 173 bool off_cpu; 174 struct switch_output switch_output; 175 unsigned long long samples; 176 unsigned long output_max_size; /* = 0: unlimited */ 177 struct perf_debuginfod debuginfod; 178 int nr_threads; 179 struct thread_mask *thread_masks; 180 struct record_thread *thread_data; 181 struct pollfd_index_map *index_map; 182 size_t index_map_sz; 183 size_t index_map_cnt; 184 }; 185 186 static volatile int done; 187 188 static volatile int auxtrace_record__snapshot_started; 189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 190 static DEFINE_TRIGGER(switch_output_trigger); 191 192 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 193 "SYS", "NODE", "CPU" 194 }; 195 196 #ifndef HAVE_GETTID 197 static inline pid_t gettid(void) 198 { 199 return (pid_t)syscall(__NR_gettid); 200 } 201 #endif 202 203 static int record__threads_enabled(struct record *rec) 204 { 205 return rec->opts.threads_spec; 206 } 207 208 static bool switch_output_signal(struct record *rec) 209 { 210 return rec->switch_output.signal && 211 trigger_is_ready(&switch_output_trigger); 212 } 213 214 static bool switch_output_size(struct record *rec) 215 { 216 return rec->switch_output.size && 217 trigger_is_ready(&switch_output_trigger) && 218 (rec->bytes_written >= rec->switch_output.size); 219 } 220 221 static bool switch_output_time(struct record *rec) 222 { 223 return rec->switch_output.time && 224 trigger_is_ready(&switch_output_trigger); 225 } 226 227 static u64 record__bytes_written(struct record *rec) 228 { 229 return rec->bytes_written + rec->thread_bytes_written; 230 } 231 232 static bool record__output_max_size_exceeded(struct record *rec) 233 { 234 return rec->output_max_size && 235 (record__bytes_written(rec) >= rec->output_max_size); 236 } 237 238 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 239 void *bf, size_t size) 240 { 241 struct perf_data_file *file = &rec->session->data->file; 242 243 if (map && map->file) 244 file = map->file; 245 246 if (perf_data_file__write(file, bf, size) < 0) { 247 pr_err("failed to write perf data, error: %m\n"); 248 return -1; 249 } 250 251 if (map && map->file) { 252 thread->bytes_written += size; 253 rec->thread_bytes_written += size; 254 } else { 255 rec->bytes_written += size; 256 } 257 258 if (record__output_max_size_exceeded(rec) && !done) { 259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 260 " stopping session ]\n", 261 record__bytes_written(rec) >> 10); 262 done = 1; 263 } 264 265 if (switch_output_size(rec)) 266 trigger_hit(&switch_output_trigger); 267 268 return 0; 269 } 270 271 static int record__aio_enabled(struct record *rec); 272 static int record__comp_enabled(struct record *rec); 273 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 274 void *dst, size_t dst_size, void *src, size_t src_size); 275 276 #ifdef HAVE_AIO_SUPPORT 277 static int record__aio_write(struct aiocb *cblock, int trace_fd, 278 void *buf, size_t size, off_t off) 279 { 280 int rc; 281 282 cblock->aio_fildes = trace_fd; 283 cblock->aio_buf = buf; 284 cblock->aio_nbytes = size; 285 cblock->aio_offset = off; 286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 287 288 do { 289 rc = aio_write(cblock); 290 if (rc == 0) { 291 break; 292 } else if (errno != EAGAIN) { 293 cblock->aio_fildes = -1; 294 pr_err("failed to queue perf data, error: %m\n"); 295 break; 296 } 297 } while (1); 298 299 return rc; 300 } 301 302 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 303 { 304 void *rem_buf; 305 off_t rem_off; 306 size_t rem_size; 307 int rc, aio_errno; 308 ssize_t aio_ret, written; 309 310 aio_errno = aio_error(cblock); 311 if (aio_errno == EINPROGRESS) 312 return 0; 313 314 written = aio_ret = aio_return(cblock); 315 if (aio_ret < 0) { 316 if (aio_errno != EINTR) 317 pr_err("failed to write perf data, error: %m\n"); 318 written = 0; 319 } 320 321 rem_size = cblock->aio_nbytes - written; 322 323 if (rem_size == 0) { 324 cblock->aio_fildes = -1; 325 /* 326 * md->refcount is incremented in record__aio_pushfn() for 327 * every aio write request started in record__aio_push() so 328 * decrement it because the request is now complete. 329 */ 330 perf_mmap__put(&md->core); 331 rc = 1; 332 } else { 333 /* 334 * aio write request may require restart with the 335 * reminder if the kernel didn't write whole 336 * chunk at once. 337 */ 338 rem_off = cblock->aio_offset + written; 339 rem_buf = (void *)(cblock->aio_buf + written); 340 record__aio_write(cblock, cblock->aio_fildes, 341 rem_buf, rem_size, rem_off); 342 rc = 0; 343 } 344 345 return rc; 346 } 347 348 static int record__aio_sync(struct mmap *md, bool sync_all) 349 { 350 struct aiocb **aiocb = md->aio.aiocb; 351 struct aiocb *cblocks = md->aio.cblocks; 352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 353 int i, do_suspend; 354 355 do { 356 do_suspend = 0; 357 for (i = 0; i < md->aio.nr_cblocks; ++i) { 358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 359 if (sync_all) 360 aiocb[i] = NULL; 361 else 362 return i; 363 } else { 364 /* 365 * Started aio write is not complete yet 366 * so it has to be waited before the 367 * next allocation. 368 */ 369 aiocb[i] = &cblocks[i]; 370 do_suspend = 1; 371 } 372 } 373 if (!do_suspend) 374 return -1; 375 376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 377 if (!(errno == EAGAIN || errno == EINTR)) 378 pr_err("failed to sync perf data, error: %m\n"); 379 } 380 } while (1); 381 } 382 383 struct record_aio { 384 struct record *rec; 385 void *data; 386 size_t size; 387 }; 388 389 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 390 { 391 struct record_aio *aio = to; 392 393 /* 394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 395 * to release space in the kernel buffer as fast as possible, calling 396 * perf_mmap__consume() from perf_mmap__push() function. 397 * 398 * That lets the kernel to proceed with storing more profiling data into 399 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 400 * 401 * Coping can be done in two steps in case the chunk of profiling data 402 * crosses the upper bound of the kernel buffer. In this case we first move 403 * part of data from map->start till the upper bound and then the reminder 404 * from the beginning of the kernel buffer till the end of the data chunk. 405 */ 406 407 if (record__comp_enabled(aio->rec)) { 408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 mmap__mmap_len(map) - aio->size, 410 buf, size); 411 } else { 412 memcpy(aio->data + aio->size, buf, size); 413 } 414 415 if (!aio->size) { 416 /* 417 * Increment map->refcount to guard map->aio.data[] buffer 418 * from premature deallocation because map object can be 419 * released earlier than aio write request started on 420 * map->aio.data[] buffer is complete. 421 * 422 * perf_mmap__put() is done at record__aio_complete() 423 * after started aio request completion or at record__aio_push() 424 * if the request failed to start. 425 */ 426 perf_mmap__get(&map->core); 427 } 428 429 aio->size += size; 430 431 return size; 432 } 433 434 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 435 { 436 int ret, idx; 437 int trace_fd = rec->session->data->file.fd; 438 struct record_aio aio = { .rec = rec, .size = 0 }; 439 440 /* 441 * Call record__aio_sync() to wait till map->aio.data[] buffer 442 * becomes available after previous aio write operation. 443 */ 444 445 idx = record__aio_sync(map, false); 446 aio.data = map->aio.data[idx]; 447 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 449 return ret; 450 451 rec->samples++; 452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 453 if (!ret) { 454 *off += aio.size; 455 rec->bytes_written += aio.size; 456 if (switch_output_size(rec)) 457 trigger_hit(&switch_output_trigger); 458 } else { 459 /* 460 * Decrement map->refcount incremented in record__aio_pushfn() 461 * back if record__aio_write() operation failed to start, otherwise 462 * map->refcount is decremented in record__aio_complete() after 463 * aio write operation finishes successfully. 464 */ 465 perf_mmap__put(&map->core); 466 } 467 468 return ret; 469 } 470 471 static off_t record__aio_get_pos(int trace_fd) 472 { 473 return lseek(trace_fd, 0, SEEK_CUR); 474 } 475 476 static void record__aio_set_pos(int trace_fd, off_t pos) 477 { 478 lseek(trace_fd, pos, SEEK_SET); 479 } 480 481 static void record__aio_mmap_read_sync(struct record *rec) 482 { 483 int i; 484 struct evlist *evlist = rec->evlist; 485 struct mmap *maps = evlist->mmap; 486 487 if (!record__aio_enabled(rec)) 488 return; 489 490 for (i = 0; i < evlist->core.nr_mmaps; i++) { 491 struct mmap *map = &maps[i]; 492 493 if (map->core.base) 494 record__aio_sync(map, true); 495 } 496 } 497 498 static int nr_cblocks_default = 1; 499 static int nr_cblocks_max = 4; 500 501 static int record__aio_parse(const struct option *opt, 502 const char *str, 503 int unset) 504 { 505 struct record_opts *opts = (struct record_opts *)opt->value; 506 507 if (unset) { 508 opts->nr_cblocks = 0; 509 } else { 510 if (str) 511 opts->nr_cblocks = strtol(str, NULL, 0); 512 if (!opts->nr_cblocks) 513 opts->nr_cblocks = nr_cblocks_default; 514 } 515 516 return 0; 517 } 518 #else /* HAVE_AIO_SUPPORT */ 519 static int nr_cblocks_max = 0; 520 521 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 522 off_t *off __maybe_unused) 523 { 524 return -1; 525 } 526 527 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 528 { 529 return -1; 530 } 531 532 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 533 { 534 } 535 536 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 537 { 538 } 539 #endif 540 541 static int record__aio_enabled(struct record *rec) 542 { 543 return rec->opts.nr_cblocks > 0; 544 } 545 546 #define MMAP_FLUSH_DEFAULT 1 547 static int record__mmap_flush_parse(const struct option *opt, 548 const char *str, 549 int unset) 550 { 551 int flush_max; 552 struct record_opts *opts = (struct record_opts *)opt->value; 553 static struct parse_tag tags[] = { 554 { .tag = 'B', .mult = 1 }, 555 { .tag = 'K', .mult = 1 << 10 }, 556 { .tag = 'M', .mult = 1 << 20 }, 557 { .tag = 'G', .mult = 1 << 30 }, 558 { .tag = 0 }, 559 }; 560 561 if (unset) 562 return 0; 563 564 if (str) { 565 opts->mmap_flush = parse_tag_value(str, tags); 566 if (opts->mmap_flush == (int)-1) 567 opts->mmap_flush = strtol(str, NULL, 0); 568 } 569 570 if (!opts->mmap_flush) 571 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 572 573 flush_max = evlist__mmap_size(opts->mmap_pages); 574 flush_max /= 4; 575 if (opts->mmap_flush > flush_max) 576 opts->mmap_flush = flush_max; 577 578 return 0; 579 } 580 581 #ifdef HAVE_ZSTD_SUPPORT 582 static unsigned int comp_level_default = 1; 583 584 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 585 { 586 struct record_opts *opts = opt->value; 587 588 if (unset) { 589 opts->comp_level = 0; 590 } else { 591 if (str) 592 opts->comp_level = strtol(str, NULL, 0); 593 if (!opts->comp_level) 594 opts->comp_level = comp_level_default; 595 } 596 597 return 0; 598 } 599 #endif 600 static unsigned int comp_level_max = 22; 601 602 static int record__comp_enabled(struct record *rec) 603 { 604 return rec->opts.comp_level > 0; 605 } 606 607 static int process_synthesized_event(struct perf_tool *tool, 608 union perf_event *event, 609 struct perf_sample *sample __maybe_unused, 610 struct machine *machine __maybe_unused) 611 { 612 struct record *rec = container_of(tool, struct record, tool); 613 return record__write(rec, NULL, event, event->header.size); 614 } 615 616 static struct mutex synth_lock; 617 618 static int process_locked_synthesized_event(struct perf_tool *tool, 619 union perf_event *event, 620 struct perf_sample *sample __maybe_unused, 621 struct machine *machine __maybe_unused) 622 { 623 int ret; 624 625 mutex_lock(&synth_lock); 626 ret = process_synthesized_event(tool, event, sample, machine); 627 mutex_unlock(&synth_lock); 628 return ret; 629 } 630 631 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 632 { 633 struct record *rec = to; 634 635 if (record__comp_enabled(rec)) { 636 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 637 bf = map->data; 638 } 639 640 thread->samples++; 641 return record__write(rec, map, bf, size); 642 } 643 644 static volatile sig_atomic_t signr = -1; 645 static volatile sig_atomic_t child_finished; 646 #ifdef HAVE_EVENTFD_SUPPORT 647 static volatile sig_atomic_t done_fd = -1; 648 #endif 649 650 static void sig_handler(int sig) 651 { 652 if (sig == SIGCHLD) 653 child_finished = 1; 654 else 655 signr = sig; 656 657 done = 1; 658 #ifdef HAVE_EVENTFD_SUPPORT 659 if (done_fd >= 0) { 660 u64 tmp = 1; 661 int orig_errno = errno; 662 663 /* 664 * It is possible for this signal handler to run after done is 665 * checked in the main loop, but before the perf counter fds are 666 * polled. If this happens, the poll() will continue to wait 667 * even though done is set, and will only break out if either 668 * another signal is received, or the counters are ready for 669 * read. To ensure the poll() doesn't sleep when done is set, 670 * use an eventfd (done_fd) to wake up the poll(). 671 */ 672 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 673 pr_err("failed to signal wakeup fd, error: %m\n"); 674 675 errno = orig_errno; 676 } 677 #endif // HAVE_EVENTFD_SUPPORT 678 } 679 680 static void sigsegv_handler(int sig) 681 { 682 perf_hooks__recover(); 683 sighandler_dump_stack(sig); 684 } 685 686 static void record__sig_exit(void) 687 { 688 if (signr == -1) 689 return; 690 691 signal(signr, SIG_DFL); 692 raise(signr); 693 } 694 695 #ifdef HAVE_AUXTRACE_SUPPORT 696 697 static int record__process_auxtrace(struct perf_tool *tool, 698 struct mmap *map, 699 union perf_event *event, void *data1, 700 size_t len1, void *data2, size_t len2) 701 { 702 struct record *rec = container_of(tool, struct record, tool); 703 struct perf_data *data = &rec->data; 704 size_t padding; 705 u8 pad[8] = {0}; 706 707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 708 off_t file_offset; 709 int fd = perf_data__fd(data); 710 int err; 711 712 file_offset = lseek(fd, 0, SEEK_CUR); 713 if (file_offset == -1) 714 return -1; 715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 716 event, file_offset); 717 if (err) 718 return err; 719 } 720 721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 722 padding = (len1 + len2) & 7; 723 if (padding) 724 padding = 8 - padding; 725 726 record__write(rec, map, event, event->header.size); 727 record__write(rec, map, data1, len1); 728 if (len2) 729 record__write(rec, map, data2, len2); 730 record__write(rec, map, &pad, padding); 731 732 return 0; 733 } 734 735 static int record__auxtrace_mmap_read(struct record *rec, 736 struct mmap *map) 737 { 738 int ret; 739 740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 741 record__process_auxtrace); 742 if (ret < 0) 743 return ret; 744 745 if (ret) 746 rec->samples++; 747 748 return 0; 749 } 750 751 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 752 struct mmap *map) 753 { 754 int ret; 755 756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 757 record__process_auxtrace, 758 rec->opts.auxtrace_snapshot_size); 759 if (ret < 0) 760 return ret; 761 762 if (ret) 763 rec->samples++; 764 765 return 0; 766 } 767 768 static int record__auxtrace_read_snapshot_all(struct record *rec) 769 { 770 int i; 771 int rc = 0; 772 773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 774 struct mmap *map = &rec->evlist->mmap[i]; 775 776 if (!map->auxtrace_mmap.base) 777 continue; 778 779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 780 rc = -1; 781 goto out; 782 } 783 } 784 out: 785 return rc; 786 } 787 788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 789 { 790 pr_debug("Recording AUX area tracing snapshot\n"); 791 if (record__auxtrace_read_snapshot_all(rec) < 0) { 792 trigger_error(&auxtrace_snapshot_trigger); 793 } else { 794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 795 trigger_error(&auxtrace_snapshot_trigger); 796 else 797 trigger_ready(&auxtrace_snapshot_trigger); 798 } 799 } 800 801 static int record__auxtrace_snapshot_exit(struct record *rec) 802 { 803 if (trigger_is_error(&auxtrace_snapshot_trigger)) 804 return 0; 805 806 if (!auxtrace_record__snapshot_started && 807 auxtrace_record__snapshot_start(rec->itr)) 808 return -1; 809 810 record__read_auxtrace_snapshot(rec, true); 811 if (trigger_is_error(&auxtrace_snapshot_trigger)) 812 return -1; 813 814 return 0; 815 } 816 817 static int record__auxtrace_init(struct record *rec) 818 { 819 int err; 820 821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 822 && record__threads_enabled(rec)) { 823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 824 return -EINVAL; 825 } 826 827 if (!rec->itr) { 828 rec->itr = auxtrace_record__init(rec->evlist, &err); 829 if (err) 830 return err; 831 } 832 833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 834 rec->opts.auxtrace_snapshot_opts); 835 if (err) 836 return err; 837 838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 839 rec->opts.auxtrace_sample_opts); 840 if (err) 841 return err; 842 843 auxtrace_regroup_aux_output(rec->evlist); 844 845 return auxtrace_parse_filters(rec->evlist); 846 } 847 848 #else 849 850 static inline 851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 852 struct mmap *map __maybe_unused) 853 { 854 return 0; 855 } 856 857 static inline 858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 859 bool on_exit __maybe_unused) 860 { 861 } 862 863 static inline 864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 865 { 866 return 0; 867 } 868 869 static inline 870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 871 { 872 return 0; 873 } 874 875 static int record__auxtrace_init(struct record *rec __maybe_unused) 876 { 877 return 0; 878 } 879 880 #endif 881 882 static int record__config_text_poke(struct evlist *evlist) 883 { 884 struct evsel *evsel; 885 886 /* Nothing to do if text poke is already configured */ 887 evlist__for_each_entry(evlist, evsel) { 888 if (evsel->core.attr.text_poke) 889 return 0; 890 } 891 892 evsel = evlist__add_dummy_on_all_cpus(evlist); 893 if (!evsel) 894 return -ENOMEM; 895 896 evsel->core.attr.text_poke = 1; 897 evsel->core.attr.ksymbol = 1; 898 evsel->immediate = true; 899 evsel__set_sample_bit(evsel, TIME); 900 901 return 0; 902 } 903 904 static int record__config_off_cpu(struct record *rec) 905 { 906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 907 } 908 909 static bool record__kcore_readable(struct machine *machine) 910 { 911 char kcore[PATH_MAX]; 912 int fd; 913 914 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 915 916 fd = open(kcore, O_RDONLY); 917 if (fd < 0) 918 return false; 919 920 close(fd); 921 922 return true; 923 } 924 925 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 926 { 927 char from_dir[PATH_MAX]; 928 char kcore_dir[PATH_MAX]; 929 int ret; 930 931 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 932 933 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 934 if (ret) 935 return ret; 936 937 return kcore_copy(from_dir, kcore_dir); 938 } 939 940 static void record__thread_data_init_pipes(struct record_thread *thread_data) 941 { 942 thread_data->pipes.msg[0] = -1; 943 thread_data->pipes.msg[1] = -1; 944 thread_data->pipes.ack[0] = -1; 945 thread_data->pipes.ack[1] = -1; 946 } 947 948 static int record__thread_data_open_pipes(struct record_thread *thread_data) 949 { 950 if (pipe(thread_data->pipes.msg)) 951 return -EINVAL; 952 953 if (pipe(thread_data->pipes.ack)) { 954 close(thread_data->pipes.msg[0]); 955 thread_data->pipes.msg[0] = -1; 956 close(thread_data->pipes.msg[1]); 957 thread_data->pipes.msg[1] = -1; 958 return -EINVAL; 959 } 960 961 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 962 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 963 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 964 965 return 0; 966 } 967 968 static void record__thread_data_close_pipes(struct record_thread *thread_data) 969 { 970 if (thread_data->pipes.msg[0] != -1) { 971 close(thread_data->pipes.msg[0]); 972 thread_data->pipes.msg[0] = -1; 973 } 974 if (thread_data->pipes.msg[1] != -1) { 975 close(thread_data->pipes.msg[1]); 976 thread_data->pipes.msg[1] = -1; 977 } 978 if (thread_data->pipes.ack[0] != -1) { 979 close(thread_data->pipes.ack[0]); 980 thread_data->pipes.ack[0] = -1; 981 } 982 if (thread_data->pipes.ack[1] != -1) { 983 close(thread_data->pipes.ack[1]); 984 thread_data->pipes.ack[1] = -1; 985 } 986 } 987 988 static bool evlist__per_thread(struct evlist *evlist) 989 { 990 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 991 } 992 993 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 994 { 995 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 996 struct mmap *mmap = evlist->mmap; 997 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 998 struct perf_cpu_map *cpus = evlist->core.all_cpus; 999 bool per_thread = evlist__per_thread(evlist); 1000 1001 if (per_thread) 1002 thread_data->nr_mmaps = nr_mmaps; 1003 else 1004 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1005 thread_data->mask->maps.nbits); 1006 if (mmap) { 1007 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1008 if (!thread_data->maps) 1009 return -ENOMEM; 1010 } 1011 if (overwrite_mmap) { 1012 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1013 if (!thread_data->overwrite_maps) { 1014 zfree(&thread_data->maps); 1015 return -ENOMEM; 1016 } 1017 } 1018 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1019 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1020 1021 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1022 if (per_thread || 1023 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1024 if (thread_data->maps) { 1025 thread_data->maps[tm] = &mmap[m]; 1026 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1027 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1028 } 1029 if (thread_data->overwrite_maps) { 1030 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1031 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1032 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1033 } 1034 tm++; 1035 } 1036 } 1037 1038 return 0; 1039 } 1040 1041 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1042 { 1043 int f, tm, pos; 1044 struct mmap *map, *overwrite_map; 1045 1046 fdarray__init(&thread_data->pollfd, 64); 1047 1048 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1049 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1050 overwrite_map = thread_data->overwrite_maps ? 1051 thread_data->overwrite_maps[tm] : NULL; 1052 1053 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1054 void *ptr = evlist->core.pollfd.priv[f].ptr; 1055 1056 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1057 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1058 &evlist->core.pollfd); 1059 if (pos < 0) 1060 return pos; 1061 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1062 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1063 } 1064 } 1065 } 1066 1067 return 0; 1068 } 1069 1070 static void record__free_thread_data(struct record *rec) 1071 { 1072 int t; 1073 struct record_thread *thread_data = rec->thread_data; 1074 1075 if (thread_data == NULL) 1076 return; 1077 1078 for (t = 0; t < rec->nr_threads; t++) { 1079 record__thread_data_close_pipes(&thread_data[t]); 1080 zfree(&thread_data[t].maps); 1081 zfree(&thread_data[t].overwrite_maps); 1082 fdarray__exit(&thread_data[t].pollfd); 1083 } 1084 1085 zfree(&rec->thread_data); 1086 } 1087 1088 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1089 int evlist_pollfd_index, 1090 int thread_pollfd_index) 1091 { 1092 size_t x = rec->index_map_cnt; 1093 1094 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1095 return -ENOMEM; 1096 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1097 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1098 rec->index_map_cnt += 1; 1099 return 0; 1100 } 1101 1102 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1103 struct evlist *evlist, 1104 struct record_thread *thread_data) 1105 { 1106 struct pollfd *e_entries = evlist->core.pollfd.entries; 1107 struct pollfd *t_entries = thread_data->pollfd.entries; 1108 int err = 0; 1109 size_t i; 1110 1111 for (i = 0; i < rec->index_map_cnt; i++) { 1112 int e_pos = rec->index_map[i].evlist_pollfd_index; 1113 int t_pos = rec->index_map[i].thread_pollfd_index; 1114 1115 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1116 e_entries[e_pos].events != t_entries[t_pos].events) { 1117 pr_err("Thread and evlist pollfd index mismatch\n"); 1118 err = -EINVAL; 1119 continue; 1120 } 1121 e_entries[e_pos].revents = t_entries[t_pos].revents; 1122 } 1123 return err; 1124 } 1125 1126 static int record__dup_non_perf_events(struct record *rec, 1127 struct evlist *evlist, 1128 struct record_thread *thread_data) 1129 { 1130 struct fdarray *fda = &evlist->core.pollfd; 1131 int i, ret; 1132 1133 for (i = 0; i < fda->nr; i++) { 1134 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1135 continue; 1136 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1137 if (ret < 0) { 1138 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1139 return ret; 1140 } 1141 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1142 thread_data, ret, fda->entries[i].fd); 1143 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1144 if (ret < 0) { 1145 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1146 return ret; 1147 } 1148 } 1149 return 0; 1150 } 1151 1152 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1153 { 1154 int t, ret; 1155 struct record_thread *thread_data; 1156 1157 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1158 if (!rec->thread_data) { 1159 pr_err("Failed to allocate thread data\n"); 1160 return -ENOMEM; 1161 } 1162 thread_data = rec->thread_data; 1163 1164 for (t = 0; t < rec->nr_threads; t++) 1165 record__thread_data_init_pipes(&thread_data[t]); 1166 1167 for (t = 0; t < rec->nr_threads; t++) { 1168 thread_data[t].rec = rec; 1169 thread_data[t].mask = &rec->thread_masks[t]; 1170 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1171 if (ret) { 1172 pr_err("Failed to initialize thread[%d] maps\n", t); 1173 goto out_free; 1174 } 1175 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1176 if (ret) { 1177 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1178 goto out_free; 1179 } 1180 if (t) { 1181 thread_data[t].tid = -1; 1182 ret = record__thread_data_open_pipes(&thread_data[t]); 1183 if (ret) { 1184 pr_err("Failed to open thread[%d] communication pipes\n", t); 1185 goto out_free; 1186 } 1187 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1188 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1189 if (ret < 0) { 1190 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1191 goto out_free; 1192 } 1193 thread_data[t].ctlfd_pos = ret; 1194 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1195 thread_data, thread_data[t].ctlfd_pos, 1196 thread_data[t].pipes.msg[0]); 1197 } else { 1198 thread_data[t].tid = gettid(); 1199 1200 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1201 if (ret < 0) 1202 goto out_free; 1203 1204 thread_data[t].ctlfd_pos = -1; /* Not used */ 1205 } 1206 } 1207 1208 return 0; 1209 1210 out_free: 1211 record__free_thread_data(rec); 1212 1213 return ret; 1214 } 1215 1216 static int record__mmap_evlist(struct record *rec, 1217 struct evlist *evlist) 1218 { 1219 int i, ret; 1220 struct record_opts *opts = &rec->opts; 1221 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1222 opts->auxtrace_sample_mode; 1223 char msg[512]; 1224 1225 if (opts->affinity != PERF_AFFINITY_SYS) 1226 cpu__setup_cpunode_map(); 1227 1228 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1229 opts->auxtrace_mmap_pages, 1230 auxtrace_overwrite, 1231 opts->nr_cblocks, opts->affinity, 1232 opts->mmap_flush, opts->comp_level) < 0) { 1233 if (errno == EPERM) { 1234 pr_err("Permission error mapping pages.\n" 1235 "Consider increasing " 1236 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1237 "or try again with a smaller value of -m/--mmap_pages.\n" 1238 "(current value: %u,%u)\n", 1239 opts->mmap_pages, opts->auxtrace_mmap_pages); 1240 return -errno; 1241 } else { 1242 pr_err("failed to mmap with %d (%s)\n", errno, 1243 str_error_r(errno, msg, sizeof(msg))); 1244 if (errno) 1245 return -errno; 1246 else 1247 return -EINVAL; 1248 } 1249 } 1250 1251 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1252 return -1; 1253 1254 ret = record__alloc_thread_data(rec, evlist); 1255 if (ret) 1256 return ret; 1257 1258 if (record__threads_enabled(rec)) { 1259 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1260 if (ret) { 1261 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1262 return ret; 1263 } 1264 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1265 if (evlist->mmap) 1266 evlist->mmap[i].file = &rec->data.dir.files[i]; 1267 if (evlist->overwrite_mmap) 1268 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1269 } 1270 } 1271 1272 return 0; 1273 } 1274 1275 static int record__mmap(struct record *rec) 1276 { 1277 return record__mmap_evlist(rec, rec->evlist); 1278 } 1279 1280 static int record__open(struct record *rec) 1281 { 1282 char msg[BUFSIZ]; 1283 struct evsel *pos; 1284 struct evlist *evlist = rec->evlist; 1285 struct perf_session *session = rec->session; 1286 struct record_opts *opts = &rec->opts; 1287 int rc = 0; 1288 1289 /* 1290 * For initial_delay, system wide or a hybrid system, we need to add a 1291 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1292 * of waiting or event synthesis. 1293 */ 1294 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 1295 perf_pmus__num_core_pmus() > 1) { 1296 pos = evlist__get_tracking_event(evlist); 1297 if (!evsel__is_dummy_event(pos)) { 1298 /* Set up dummy event. */ 1299 if (evlist__add_dummy(evlist)) 1300 return -ENOMEM; 1301 pos = evlist__last(evlist); 1302 evlist__set_tracking_event(evlist, pos); 1303 } 1304 1305 /* 1306 * Enable the dummy event when the process is forked for 1307 * initial_delay, immediately for system wide. 1308 */ 1309 if (opts->target.initial_delay && !pos->immediate && 1310 !target__has_cpu(&opts->target)) 1311 pos->core.attr.enable_on_exec = 1; 1312 else 1313 pos->immediate = 1; 1314 } 1315 1316 evlist__config(evlist, opts, &callchain_param); 1317 1318 evlist__for_each_entry(evlist, pos) { 1319 try_again: 1320 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1321 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1322 if (verbose > 0) 1323 ui__warning("%s\n", msg); 1324 goto try_again; 1325 } 1326 if ((errno == EINVAL || errno == EBADF) && 1327 pos->core.leader != &pos->core && 1328 pos->weak_group) { 1329 pos = evlist__reset_weak_group(evlist, pos, true); 1330 goto try_again; 1331 } 1332 rc = -errno; 1333 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1334 ui__error("%s\n", msg); 1335 goto out; 1336 } 1337 1338 pos->supported = true; 1339 } 1340 1341 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1342 pr_warning( 1343 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1344 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1345 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1346 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1347 "Samples in kernel modules won't be resolved at all.\n\n" 1348 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1349 "even with a suitable vmlinux or kallsyms file.\n\n"); 1350 } 1351 1352 if (evlist__apply_filters(evlist, &pos)) { 1353 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1354 pos->filter ?: "BPF", evsel__name(pos), errno, 1355 str_error_r(errno, msg, sizeof(msg))); 1356 rc = -1; 1357 goto out; 1358 } 1359 1360 rc = record__mmap(rec); 1361 if (rc) 1362 goto out; 1363 1364 session->evlist = evlist; 1365 perf_session__set_id_hdr_size(session); 1366 out: 1367 return rc; 1368 } 1369 1370 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1371 { 1372 if (rec->evlist->first_sample_time == 0) 1373 rec->evlist->first_sample_time = sample_time; 1374 1375 if (sample_time) 1376 rec->evlist->last_sample_time = sample_time; 1377 } 1378 1379 static int process_sample_event(struct perf_tool *tool, 1380 union perf_event *event, 1381 struct perf_sample *sample, 1382 struct evsel *evsel, 1383 struct machine *machine) 1384 { 1385 struct record *rec = container_of(tool, struct record, tool); 1386 1387 set_timestamp_boundary(rec, sample->time); 1388 1389 if (rec->buildid_all) 1390 return 0; 1391 1392 rec->samples++; 1393 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1394 } 1395 1396 static int process_buildids(struct record *rec) 1397 { 1398 struct perf_session *session = rec->session; 1399 1400 if (perf_data__size(&rec->data) == 0) 1401 return 0; 1402 1403 /* 1404 * During this process, it'll load kernel map and replace the 1405 * dso->long_name to a real pathname it found. In this case 1406 * we prefer the vmlinux path like 1407 * /lib/modules/3.16.4/build/vmlinux 1408 * 1409 * rather than build-id path (in debug directory). 1410 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1411 */ 1412 symbol_conf.ignore_vmlinux_buildid = true; 1413 1414 /* 1415 * If --buildid-all is given, it marks all DSO regardless of hits, 1416 * so no need to process samples. But if timestamp_boundary is enabled, 1417 * it still needs to walk on all samples to get the timestamps of 1418 * first/last samples. 1419 */ 1420 if (rec->buildid_all && !rec->timestamp_boundary) 1421 rec->tool.sample = NULL; 1422 1423 return perf_session__process_events(session); 1424 } 1425 1426 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1427 { 1428 int err; 1429 struct perf_tool *tool = data; 1430 /* 1431 *As for guest kernel when processing subcommand record&report, 1432 *we arrange module mmap prior to guest kernel mmap and trigger 1433 *a preload dso because default guest module symbols are loaded 1434 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1435 *method is used to avoid symbol missing when the first addr is 1436 *in module instead of in guest kernel. 1437 */ 1438 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1439 machine); 1440 if (err < 0) 1441 pr_err("Couldn't record guest kernel [%d]'s reference" 1442 " relocation symbol.\n", machine->pid); 1443 1444 /* 1445 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1446 * have no _text sometimes. 1447 */ 1448 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1449 machine); 1450 if (err < 0) 1451 pr_err("Couldn't record guest kernel [%d]'s reference" 1452 " relocation symbol.\n", machine->pid); 1453 } 1454 1455 static struct perf_event_header finished_round_event = { 1456 .size = sizeof(struct perf_event_header), 1457 .type = PERF_RECORD_FINISHED_ROUND, 1458 }; 1459 1460 static struct perf_event_header finished_init_event = { 1461 .size = sizeof(struct perf_event_header), 1462 .type = PERF_RECORD_FINISHED_INIT, 1463 }; 1464 1465 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1466 { 1467 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1468 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1469 thread->mask->affinity.nbits)) { 1470 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1471 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1472 map->affinity_mask.bits, thread->mask->affinity.nbits); 1473 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1474 (cpu_set_t *)thread->mask->affinity.bits); 1475 if (verbose == 2) { 1476 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1477 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1478 } 1479 } 1480 } 1481 1482 static size_t process_comp_header(void *record, size_t increment) 1483 { 1484 struct perf_record_compressed *event = record; 1485 size_t size = sizeof(*event); 1486 1487 if (increment) { 1488 event->header.size += increment; 1489 return increment; 1490 } 1491 1492 event->header.type = PERF_RECORD_COMPRESSED; 1493 event->header.size = size; 1494 1495 return size; 1496 } 1497 1498 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1499 void *dst, size_t dst_size, void *src, size_t src_size) 1500 { 1501 size_t compressed; 1502 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1503 struct zstd_data *zstd_data = &session->zstd_data; 1504 1505 if (map && map->file) 1506 zstd_data = &map->zstd_data; 1507 1508 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1509 max_record_size, process_comp_header); 1510 1511 if (map && map->file) { 1512 thread->bytes_transferred += src_size; 1513 thread->bytes_compressed += compressed; 1514 } else { 1515 session->bytes_transferred += src_size; 1516 session->bytes_compressed += compressed; 1517 } 1518 1519 return compressed; 1520 } 1521 1522 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1523 bool overwrite, bool synch) 1524 { 1525 u64 bytes_written = rec->bytes_written; 1526 int i; 1527 int rc = 0; 1528 int nr_mmaps; 1529 struct mmap **maps; 1530 int trace_fd = rec->data.file.fd; 1531 off_t off = 0; 1532 1533 if (!evlist) 1534 return 0; 1535 1536 nr_mmaps = thread->nr_mmaps; 1537 maps = overwrite ? thread->overwrite_maps : thread->maps; 1538 1539 if (!maps) 1540 return 0; 1541 1542 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1543 return 0; 1544 1545 if (record__aio_enabled(rec)) 1546 off = record__aio_get_pos(trace_fd); 1547 1548 for (i = 0; i < nr_mmaps; i++) { 1549 u64 flush = 0; 1550 struct mmap *map = maps[i]; 1551 1552 if (map->core.base) { 1553 record__adjust_affinity(rec, map); 1554 if (synch) { 1555 flush = map->core.flush; 1556 map->core.flush = 1; 1557 } 1558 if (!record__aio_enabled(rec)) { 1559 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1560 if (synch) 1561 map->core.flush = flush; 1562 rc = -1; 1563 goto out; 1564 } 1565 } else { 1566 if (record__aio_push(rec, map, &off) < 0) { 1567 record__aio_set_pos(trace_fd, off); 1568 if (synch) 1569 map->core.flush = flush; 1570 rc = -1; 1571 goto out; 1572 } 1573 } 1574 if (synch) 1575 map->core.flush = flush; 1576 } 1577 1578 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1579 !rec->opts.auxtrace_sample_mode && 1580 record__auxtrace_mmap_read(rec, map) != 0) { 1581 rc = -1; 1582 goto out; 1583 } 1584 } 1585 1586 if (record__aio_enabled(rec)) 1587 record__aio_set_pos(trace_fd, off); 1588 1589 /* 1590 * Mark the round finished in case we wrote 1591 * at least one event. 1592 * 1593 * No need for round events in directory mode, 1594 * because per-cpu maps and files have data 1595 * sorted by kernel. 1596 */ 1597 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1598 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1599 1600 if (overwrite) 1601 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1602 out: 1603 return rc; 1604 } 1605 1606 static int record__mmap_read_all(struct record *rec, bool synch) 1607 { 1608 int err; 1609 1610 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1611 if (err) 1612 return err; 1613 1614 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1615 } 1616 1617 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1618 void *arg __maybe_unused) 1619 { 1620 struct perf_mmap *map = fda->priv[fd].ptr; 1621 1622 if (map) 1623 perf_mmap__put(map); 1624 } 1625 1626 static void *record__thread(void *arg) 1627 { 1628 enum thread_msg msg = THREAD_MSG__READY; 1629 bool terminate = false; 1630 struct fdarray *pollfd; 1631 int err, ctlfd_pos; 1632 1633 thread = arg; 1634 thread->tid = gettid(); 1635 1636 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1637 if (err == -1) 1638 pr_warning("threads[%d]: failed to notify on start: %s\n", 1639 thread->tid, strerror(errno)); 1640 1641 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1642 1643 pollfd = &thread->pollfd; 1644 ctlfd_pos = thread->ctlfd_pos; 1645 1646 for (;;) { 1647 unsigned long long hits = thread->samples; 1648 1649 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1650 break; 1651 1652 if (hits == thread->samples) { 1653 1654 err = fdarray__poll(pollfd, -1); 1655 /* 1656 * Propagate error, only if there's any. Ignore positive 1657 * number of returned events and interrupt error. 1658 */ 1659 if (err > 0 || (err < 0 && errno == EINTR)) 1660 err = 0; 1661 thread->waking++; 1662 1663 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1664 record__thread_munmap_filtered, NULL) == 0) 1665 break; 1666 } 1667 1668 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1669 terminate = true; 1670 close(thread->pipes.msg[0]); 1671 thread->pipes.msg[0] = -1; 1672 pollfd->entries[ctlfd_pos].fd = -1; 1673 pollfd->entries[ctlfd_pos].events = 0; 1674 } 1675 1676 pollfd->entries[ctlfd_pos].revents = 0; 1677 } 1678 record__mmap_read_all(thread->rec, true); 1679 1680 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1681 if (err == -1) 1682 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1683 thread->tid, strerror(errno)); 1684 1685 return NULL; 1686 } 1687 1688 static void record__init_features(struct record *rec) 1689 { 1690 struct perf_session *session = rec->session; 1691 int feat; 1692 1693 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1694 perf_header__set_feat(&session->header, feat); 1695 1696 if (rec->no_buildid) 1697 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1698 1699 #ifdef HAVE_LIBTRACEEVENT 1700 if (!have_tracepoints(&rec->evlist->core.entries)) 1701 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1702 #endif 1703 1704 if (!rec->opts.branch_stack) 1705 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1706 1707 if (!rec->opts.full_auxtrace) 1708 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1709 1710 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1711 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1712 1713 if (!rec->opts.use_clockid) 1714 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1715 1716 if (!record__threads_enabled(rec)) 1717 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1718 1719 if (!record__comp_enabled(rec)) 1720 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1721 1722 perf_header__clear_feat(&session->header, HEADER_STAT); 1723 } 1724 1725 static void 1726 record__finish_output(struct record *rec) 1727 { 1728 int i; 1729 struct perf_data *data = &rec->data; 1730 int fd = perf_data__fd(data); 1731 1732 if (data->is_pipe) 1733 return; 1734 1735 rec->session->header.data_size += rec->bytes_written; 1736 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1737 if (record__threads_enabled(rec)) { 1738 for (i = 0; i < data->dir.nr; i++) 1739 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1740 } 1741 1742 if (!rec->no_buildid) { 1743 process_buildids(rec); 1744 1745 if (rec->buildid_all) 1746 dsos__hit_all(rec->session); 1747 } 1748 perf_session__write_header(rec->session, rec->evlist, fd, true); 1749 1750 return; 1751 } 1752 1753 static int record__synthesize_workload(struct record *rec, bool tail) 1754 { 1755 int err; 1756 struct perf_thread_map *thread_map; 1757 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1758 1759 if (rec->opts.tail_synthesize != tail) 1760 return 0; 1761 1762 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1763 if (thread_map == NULL) 1764 return -1; 1765 1766 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1767 process_synthesized_event, 1768 &rec->session->machines.host, 1769 needs_mmap, 1770 rec->opts.sample_address); 1771 perf_thread_map__put(thread_map); 1772 return err; 1773 } 1774 1775 static int write_finished_init(struct record *rec, bool tail) 1776 { 1777 if (rec->opts.tail_synthesize != tail) 1778 return 0; 1779 1780 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1781 } 1782 1783 static int record__synthesize(struct record *rec, bool tail); 1784 1785 static int 1786 record__switch_output(struct record *rec, bool at_exit) 1787 { 1788 struct perf_data *data = &rec->data; 1789 char *new_filename = NULL; 1790 int fd, err; 1791 1792 /* Same Size: "2015122520103046"*/ 1793 char timestamp[] = "InvalidTimestamp"; 1794 1795 record__aio_mmap_read_sync(rec); 1796 1797 write_finished_init(rec, true); 1798 1799 record__synthesize(rec, true); 1800 if (target__none(&rec->opts.target)) 1801 record__synthesize_workload(rec, true); 1802 1803 rec->samples = 0; 1804 record__finish_output(rec); 1805 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1806 if (err) { 1807 pr_err("Failed to get current timestamp\n"); 1808 return -EINVAL; 1809 } 1810 1811 fd = perf_data__switch(data, timestamp, 1812 rec->session->header.data_offset, 1813 at_exit, &new_filename); 1814 if (fd >= 0 && !at_exit) { 1815 rec->bytes_written = 0; 1816 rec->session->header.data_size = 0; 1817 } 1818 1819 if (!quiet) 1820 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1821 data->path, timestamp); 1822 1823 if (rec->switch_output.num_files) { 1824 int n = rec->switch_output.cur_file + 1; 1825 1826 if (n >= rec->switch_output.num_files) 1827 n = 0; 1828 rec->switch_output.cur_file = n; 1829 if (rec->switch_output.filenames[n]) { 1830 remove(rec->switch_output.filenames[n]); 1831 zfree(&rec->switch_output.filenames[n]); 1832 } 1833 rec->switch_output.filenames[n] = new_filename; 1834 } else { 1835 free(new_filename); 1836 } 1837 1838 /* Output tracking events */ 1839 if (!at_exit) { 1840 record__synthesize(rec, false); 1841 1842 /* 1843 * In 'perf record --switch-output' without -a, 1844 * record__synthesize() in record__switch_output() won't 1845 * generate tracking events because there's no thread_map 1846 * in evlist. Which causes newly created perf.data doesn't 1847 * contain map and comm information. 1848 * Create a fake thread_map and directly call 1849 * perf_event__synthesize_thread_map() for those events. 1850 */ 1851 if (target__none(&rec->opts.target)) 1852 record__synthesize_workload(rec, false); 1853 write_finished_init(rec, false); 1854 } 1855 return fd; 1856 } 1857 1858 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1859 struct perf_record_lost_samples *lost, 1860 int cpu_idx, int thread_idx, u64 lost_count, 1861 u16 misc_flag) 1862 { 1863 struct perf_sample_id *sid; 1864 struct perf_sample sample = {}; 1865 int id_hdr_size; 1866 1867 lost->lost = lost_count; 1868 if (evsel->core.ids) { 1869 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1870 sample.id = sid->id; 1871 } 1872 1873 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1874 evsel->core.attr.sample_type, &sample); 1875 lost->header.size = sizeof(*lost) + id_hdr_size; 1876 lost->header.misc = misc_flag; 1877 record__write(rec, NULL, lost, lost->header.size); 1878 } 1879 1880 static void record__read_lost_samples(struct record *rec) 1881 { 1882 struct perf_session *session = rec->session; 1883 struct perf_record_lost_samples *lost; 1884 struct evsel *evsel; 1885 1886 /* there was an error during record__open */ 1887 if (session->evlist == NULL) 1888 return; 1889 1890 lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1891 if (lost == NULL) { 1892 pr_debug("Memory allocation failed\n"); 1893 return; 1894 } 1895 1896 lost->header.type = PERF_RECORD_LOST_SAMPLES; 1897 1898 evlist__for_each_entry(session->evlist, evsel) { 1899 struct xyarray *xy = evsel->core.sample_id; 1900 u64 lost_count; 1901 1902 if (xy == NULL || evsel->core.fd == NULL) 1903 continue; 1904 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1905 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1906 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1907 continue; 1908 } 1909 1910 for (int x = 0; x < xyarray__max_x(xy); x++) { 1911 for (int y = 0; y < xyarray__max_y(xy); y++) { 1912 struct perf_counts_values count; 1913 1914 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1915 pr_debug("read LOST count failed\n"); 1916 goto out; 1917 } 1918 1919 if (count.lost) { 1920 __record__save_lost_samples(rec, evsel, lost, 1921 x, y, count.lost, 0); 1922 } 1923 } 1924 } 1925 1926 lost_count = perf_bpf_filter__lost_count(evsel); 1927 if (lost_count) 1928 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, 1929 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1930 } 1931 out: 1932 free(lost); 1933 } 1934 1935 static volatile sig_atomic_t workload_exec_errno; 1936 1937 /* 1938 * evlist__prepare_workload will send a SIGUSR1 1939 * if the fork fails, since we asked by setting its 1940 * want_signal to true. 1941 */ 1942 static void workload_exec_failed_signal(int signo __maybe_unused, 1943 siginfo_t *info, 1944 void *ucontext __maybe_unused) 1945 { 1946 workload_exec_errno = info->si_value.sival_int; 1947 done = 1; 1948 child_finished = 1; 1949 } 1950 1951 static void snapshot_sig_handler(int sig); 1952 static void alarm_sig_handler(int sig); 1953 1954 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1955 { 1956 if (evlist) { 1957 if (evlist->mmap && evlist->mmap[0].core.base) 1958 return evlist->mmap[0].core.base; 1959 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1960 return evlist->overwrite_mmap[0].core.base; 1961 } 1962 return NULL; 1963 } 1964 1965 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1966 { 1967 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1968 if (pc) 1969 return pc; 1970 return NULL; 1971 } 1972 1973 static int record__synthesize(struct record *rec, bool tail) 1974 { 1975 struct perf_session *session = rec->session; 1976 struct machine *machine = &session->machines.host; 1977 struct perf_data *data = &rec->data; 1978 struct record_opts *opts = &rec->opts; 1979 struct perf_tool *tool = &rec->tool; 1980 int err = 0; 1981 event_op f = process_synthesized_event; 1982 1983 if (rec->opts.tail_synthesize != tail) 1984 return 0; 1985 1986 if (data->is_pipe) { 1987 err = perf_event__synthesize_for_pipe(tool, session, data, 1988 process_synthesized_event); 1989 if (err < 0) 1990 goto out; 1991 1992 rec->bytes_written += err; 1993 } 1994 1995 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1996 process_synthesized_event, machine); 1997 if (err) 1998 goto out; 1999 2000 /* Synthesize id_index before auxtrace_info */ 2001 err = perf_event__synthesize_id_index(tool, 2002 process_synthesized_event, 2003 session->evlist, machine); 2004 if (err) 2005 goto out; 2006 2007 if (rec->opts.full_auxtrace) { 2008 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2009 session, process_synthesized_event); 2010 if (err) 2011 goto out; 2012 } 2013 2014 if (!evlist__exclude_kernel(rec->evlist)) { 2015 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2016 machine); 2017 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2018 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2019 "Check /proc/kallsyms permission or run as root.\n"); 2020 2021 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2022 machine); 2023 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2024 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2025 "Check /proc/modules permission or run as root.\n"); 2026 } 2027 2028 if (perf_guest) { 2029 machines__process_guests(&session->machines, 2030 perf_event__synthesize_guest_os, tool); 2031 } 2032 2033 err = perf_event__synthesize_extra_attr(&rec->tool, 2034 rec->evlist, 2035 process_synthesized_event, 2036 data->is_pipe); 2037 if (err) 2038 goto out; 2039 2040 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2041 process_synthesized_event, 2042 NULL); 2043 if (err < 0) { 2044 pr_err("Couldn't synthesize thread map.\n"); 2045 return err; 2046 } 2047 2048 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2049 process_synthesized_event, NULL); 2050 if (err < 0) { 2051 pr_err("Couldn't synthesize cpu map.\n"); 2052 return err; 2053 } 2054 2055 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2056 machine, opts); 2057 if (err < 0) { 2058 pr_warning("Couldn't synthesize bpf events.\n"); 2059 err = 0; 2060 } 2061 2062 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2063 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2064 machine); 2065 if (err < 0) { 2066 pr_warning("Couldn't synthesize cgroup events.\n"); 2067 err = 0; 2068 } 2069 } 2070 2071 if (rec->opts.nr_threads_synthesize > 1) { 2072 mutex_init(&synth_lock); 2073 perf_set_multithreaded(); 2074 f = process_locked_synthesized_event; 2075 } 2076 2077 if (rec->opts.synth & PERF_SYNTH_TASK) { 2078 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2079 2080 err = __machine__synthesize_threads(machine, tool, &opts->target, 2081 rec->evlist->core.threads, 2082 f, needs_mmap, opts->sample_address, 2083 rec->opts.nr_threads_synthesize); 2084 } 2085 2086 if (rec->opts.nr_threads_synthesize > 1) { 2087 perf_set_singlethreaded(); 2088 mutex_destroy(&synth_lock); 2089 } 2090 2091 out: 2092 return err; 2093 } 2094 2095 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2096 { 2097 struct record *rec = data; 2098 pthread_kill(rec->thread_id, SIGUSR2); 2099 return 0; 2100 } 2101 2102 static int record__setup_sb_evlist(struct record *rec) 2103 { 2104 struct record_opts *opts = &rec->opts; 2105 2106 if (rec->sb_evlist != NULL) { 2107 /* 2108 * We get here if --switch-output-event populated the 2109 * sb_evlist, so associate a callback that will send a SIGUSR2 2110 * to the main thread. 2111 */ 2112 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2113 rec->thread_id = pthread_self(); 2114 } 2115 #ifdef HAVE_LIBBPF_SUPPORT 2116 if (!opts->no_bpf_event) { 2117 if (rec->sb_evlist == NULL) { 2118 rec->sb_evlist = evlist__new(); 2119 2120 if (rec->sb_evlist == NULL) { 2121 pr_err("Couldn't create side band evlist.\n."); 2122 return -1; 2123 } 2124 } 2125 2126 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2127 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2128 return -1; 2129 } 2130 } 2131 #endif 2132 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2133 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2134 opts->no_bpf_event = true; 2135 } 2136 2137 return 0; 2138 } 2139 2140 static int record__init_clock(struct record *rec) 2141 { 2142 struct perf_session *session = rec->session; 2143 struct timespec ref_clockid; 2144 struct timeval ref_tod; 2145 u64 ref; 2146 2147 if (!rec->opts.use_clockid) 2148 return 0; 2149 2150 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2151 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2152 2153 session->header.env.clock.clockid = rec->opts.clockid; 2154 2155 if (gettimeofday(&ref_tod, NULL) != 0) { 2156 pr_err("gettimeofday failed, cannot set reference time.\n"); 2157 return -1; 2158 } 2159 2160 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2161 pr_err("clock_gettime failed, cannot set reference time.\n"); 2162 return -1; 2163 } 2164 2165 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2166 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2167 2168 session->header.env.clock.tod_ns = ref; 2169 2170 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2171 (u64) ref_clockid.tv_nsec; 2172 2173 session->header.env.clock.clockid_ns = ref; 2174 return 0; 2175 } 2176 2177 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2178 { 2179 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2180 trigger_hit(&auxtrace_snapshot_trigger); 2181 auxtrace_record__snapshot_started = 1; 2182 if (auxtrace_record__snapshot_start(rec->itr)) 2183 trigger_error(&auxtrace_snapshot_trigger); 2184 } 2185 } 2186 2187 static int record__terminate_thread(struct record_thread *thread_data) 2188 { 2189 int err; 2190 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2191 pid_t tid = thread_data->tid; 2192 2193 close(thread_data->pipes.msg[1]); 2194 thread_data->pipes.msg[1] = -1; 2195 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2196 if (err > 0) 2197 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2198 else 2199 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2200 thread->tid, tid); 2201 2202 return 0; 2203 } 2204 2205 static int record__start_threads(struct record *rec) 2206 { 2207 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2208 struct record_thread *thread_data = rec->thread_data; 2209 sigset_t full, mask; 2210 pthread_t handle; 2211 pthread_attr_t attrs; 2212 2213 thread = &thread_data[0]; 2214 2215 if (!record__threads_enabled(rec)) 2216 return 0; 2217 2218 sigfillset(&full); 2219 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2220 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2221 return -1; 2222 } 2223 2224 pthread_attr_init(&attrs); 2225 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2226 2227 for (t = 1; t < nr_threads; t++) { 2228 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2229 2230 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2231 pthread_attr_setaffinity_np(&attrs, 2232 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2233 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2234 #endif 2235 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2236 for (tt = 1; tt < t; tt++) 2237 record__terminate_thread(&thread_data[t]); 2238 pr_err("Failed to start threads: %s\n", strerror(errno)); 2239 ret = -1; 2240 goto out_err; 2241 } 2242 2243 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2244 if (err > 0) 2245 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2246 thread_msg_tags[msg]); 2247 else 2248 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2249 thread->tid, rec->thread_data[t].tid); 2250 } 2251 2252 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2253 (cpu_set_t *)thread->mask->affinity.bits); 2254 2255 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2256 2257 out_err: 2258 pthread_attr_destroy(&attrs); 2259 2260 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2261 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2262 ret = -1; 2263 } 2264 2265 return ret; 2266 } 2267 2268 static int record__stop_threads(struct record *rec) 2269 { 2270 int t; 2271 struct record_thread *thread_data = rec->thread_data; 2272 2273 for (t = 1; t < rec->nr_threads; t++) 2274 record__terminate_thread(&thread_data[t]); 2275 2276 for (t = 0; t < rec->nr_threads; t++) { 2277 rec->samples += thread_data[t].samples; 2278 if (!record__threads_enabled(rec)) 2279 continue; 2280 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2281 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2282 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2283 thread_data[t].samples, thread_data[t].waking); 2284 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2285 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2286 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2287 else 2288 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2289 } 2290 2291 return 0; 2292 } 2293 2294 static unsigned long record__waking(struct record *rec) 2295 { 2296 int t; 2297 unsigned long waking = 0; 2298 struct record_thread *thread_data = rec->thread_data; 2299 2300 for (t = 0; t < rec->nr_threads; t++) 2301 waking += thread_data[t].waking; 2302 2303 return waking; 2304 } 2305 2306 static int __cmd_record(struct record *rec, int argc, const char **argv) 2307 { 2308 int err; 2309 int status = 0; 2310 const bool forks = argc > 0; 2311 struct perf_tool *tool = &rec->tool; 2312 struct record_opts *opts = &rec->opts; 2313 struct perf_data *data = &rec->data; 2314 struct perf_session *session; 2315 bool disabled = false, draining = false; 2316 int fd; 2317 float ratio = 0; 2318 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2319 2320 atexit(record__sig_exit); 2321 signal(SIGCHLD, sig_handler); 2322 signal(SIGINT, sig_handler); 2323 signal(SIGTERM, sig_handler); 2324 signal(SIGSEGV, sigsegv_handler); 2325 2326 if (rec->opts.record_namespaces) 2327 tool->namespace_events = true; 2328 2329 if (rec->opts.record_cgroup) { 2330 #ifdef HAVE_FILE_HANDLE 2331 tool->cgroup_events = true; 2332 #else 2333 pr_err("cgroup tracking is not supported\n"); 2334 return -1; 2335 #endif 2336 } 2337 2338 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2339 signal(SIGUSR2, snapshot_sig_handler); 2340 if (rec->opts.auxtrace_snapshot_mode) 2341 trigger_on(&auxtrace_snapshot_trigger); 2342 if (rec->switch_output.enabled) 2343 trigger_on(&switch_output_trigger); 2344 } else { 2345 signal(SIGUSR2, SIG_IGN); 2346 } 2347 2348 session = perf_session__new(data, tool); 2349 if (IS_ERR(session)) { 2350 pr_err("Perf session creation failed.\n"); 2351 return PTR_ERR(session); 2352 } 2353 2354 if (record__threads_enabled(rec)) { 2355 if (perf_data__is_pipe(&rec->data)) { 2356 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2357 return -1; 2358 } 2359 if (rec->opts.full_auxtrace) { 2360 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2361 return -1; 2362 } 2363 } 2364 2365 fd = perf_data__fd(data); 2366 rec->session = session; 2367 2368 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2369 pr_err("Compression initialization failed.\n"); 2370 return -1; 2371 } 2372 #ifdef HAVE_EVENTFD_SUPPORT 2373 done_fd = eventfd(0, EFD_NONBLOCK); 2374 if (done_fd < 0) { 2375 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2376 status = -1; 2377 goto out_delete_session; 2378 } 2379 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2380 if (err < 0) { 2381 pr_err("Failed to add wakeup eventfd to poll list\n"); 2382 status = err; 2383 goto out_delete_session; 2384 } 2385 #endif // HAVE_EVENTFD_SUPPORT 2386 2387 session->header.env.comp_type = PERF_COMP_ZSTD; 2388 session->header.env.comp_level = rec->opts.comp_level; 2389 2390 if (rec->opts.kcore && 2391 !record__kcore_readable(&session->machines.host)) { 2392 pr_err("ERROR: kcore is not readable.\n"); 2393 return -1; 2394 } 2395 2396 if (record__init_clock(rec)) 2397 return -1; 2398 2399 record__init_features(rec); 2400 2401 if (forks) { 2402 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2403 workload_exec_failed_signal); 2404 if (err < 0) { 2405 pr_err("Couldn't run the workload!\n"); 2406 status = err; 2407 goto out_delete_session; 2408 } 2409 } 2410 2411 /* 2412 * If we have just single event and are sending data 2413 * through pipe, we need to force the ids allocation, 2414 * because we synthesize event name through the pipe 2415 * and need the id for that. 2416 */ 2417 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2418 rec->opts.sample_id = true; 2419 2420 if (rec->timestamp_filename && perf_data__is_pipe(data)) { 2421 rec->timestamp_filename = false; 2422 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n"); 2423 } 2424 2425 evlist__uniquify_name(rec->evlist); 2426 2427 /* Debug message used by test scripts */ 2428 pr_debug3("perf record opening and mmapping events\n"); 2429 if (record__open(rec) != 0) { 2430 err = -1; 2431 goto out_free_threads; 2432 } 2433 /* Debug message used by test scripts */ 2434 pr_debug3("perf record done opening and mmapping events\n"); 2435 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2436 2437 if (rec->opts.kcore) { 2438 err = record__kcore_copy(&session->machines.host, data); 2439 if (err) { 2440 pr_err("ERROR: Failed to copy kcore\n"); 2441 goto out_free_threads; 2442 } 2443 } 2444 2445 /* 2446 * Normally perf_session__new would do this, but it doesn't have the 2447 * evlist. 2448 */ 2449 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2450 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2451 rec->tool.ordered_events = false; 2452 } 2453 2454 if (evlist__nr_groups(rec->evlist) == 0) 2455 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2456 2457 if (data->is_pipe) { 2458 err = perf_header__write_pipe(fd); 2459 if (err < 0) 2460 goto out_free_threads; 2461 } else { 2462 err = perf_session__write_header(session, rec->evlist, fd, false); 2463 if (err < 0) 2464 goto out_free_threads; 2465 } 2466 2467 err = -1; 2468 if (!rec->no_buildid 2469 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2470 pr_err("Couldn't generate buildids. " 2471 "Use --no-buildid to profile anyway.\n"); 2472 goto out_free_threads; 2473 } 2474 2475 err = record__setup_sb_evlist(rec); 2476 if (err) 2477 goto out_free_threads; 2478 2479 err = record__synthesize(rec, false); 2480 if (err < 0) 2481 goto out_free_threads; 2482 2483 if (rec->realtime_prio) { 2484 struct sched_param param; 2485 2486 param.sched_priority = rec->realtime_prio; 2487 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2488 pr_err("Could not set realtime priority.\n"); 2489 err = -1; 2490 goto out_free_threads; 2491 } 2492 } 2493 2494 if (record__start_threads(rec)) 2495 goto out_free_threads; 2496 2497 /* 2498 * When perf is starting the traced process, all the events 2499 * (apart from group members) have enable_on_exec=1 set, 2500 * so don't spoil it by prematurely enabling them. 2501 */ 2502 if (!target__none(&opts->target) && !opts->target.initial_delay) 2503 evlist__enable(rec->evlist); 2504 2505 /* 2506 * Let the child rip 2507 */ 2508 if (forks) { 2509 struct machine *machine = &session->machines.host; 2510 union perf_event *event; 2511 pid_t tgid; 2512 2513 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2514 if (event == NULL) { 2515 err = -ENOMEM; 2516 goto out_child; 2517 } 2518 2519 /* 2520 * Some H/W events are generated before COMM event 2521 * which is emitted during exec(), so perf script 2522 * cannot see a correct process name for those events. 2523 * Synthesize COMM event to prevent it. 2524 */ 2525 tgid = perf_event__synthesize_comm(tool, event, 2526 rec->evlist->workload.pid, 2527 process_synthesized_event, 2528 machine); 2529 free(event); 2530 2531 if (tgid == -1) 2532 goto out_child; 2533 2534 event = malloc(sizeof(event->namespaces) + 2535 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2536 machine->id_hdr_size); 2537 if (event == NULL) { 2538 err = -ENOMEM; 2539 goto out_child; 2540 } 2541 2542 /* 2543 * Synthesize NAMESPACES event for the command specified. 2544 */ 2545 perf_event__synthesize_namespaces(tool, event, 2546 rec->evlist->workload.pid, 2547 tgid, process_synthesized_event, 2548 machine); 2549 free(event); 2550 2551 evlist__start_workload(rec->evlist); 2552 } 2553 2554 if (opts->target.initial_delay) { 2555 pr_info(EVLIST_DISABLED_MSG); 2556 if (opts->target.initial_delay > 0) { 2557 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2558 evlist__enable(rec->evlist); 2559 pr_info(EVLIST_ENABLED_MSG); 2560 } 2561 } 2562 2563 err = event_enable_timer__start(rec->evlist->eet); 2564 if (err) 2565 goto out_child; 2566 2567 /* Debug message used by test scripts */ 2568 pr_debug3("perf record has started\n"); 2569 fflush(stderr); 2570 2571 trigger_ready(&auxtrace_snapshot_trigger); 2572 trigger_ready(&switch_output_trigger); 2573 perf_hooks__invoke_record_start(); 2574 2575 /* 2576 * Must write FINISHED_INIT so it will be seen after all other 2577 * synthesized user events, but before any regular events. 2578 */ 2579 err = write_finished_init(rec, false); 2580 if (err < 0) 2581 goto out_child; 2582 2583 for (;;) { 2584 unsigned long long hits = thread->samples; 2585 2586 /* 2587 * rec->evlist->bkw_mmap_state is possible to be 2588 * BKW_MMAP_EMPTY here: when done == true and 2589 * hits != rec->samples in previous round. 2590 * 2591 * evlist__toggle_bkw_mmap ensure we never 2592 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2593 */ 2594 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2595 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2596 2597 if (record__mmap_read_all(rec, false) < 0) { 2598 trigger_error(&auxtrace_snapshot_trigger); 2599 trigger_error(&switch_output_trigger); 2600 err = -1; 2601 goto out_child; 2602 } 2603 2604 if (auxtrace_record__snapshot_started) { 2605 auxtrace_record__snapshot_started = 0; 2606 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2607 record__read_auxtrace_snapshot(rec, false); 2608 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2609 pr_err("AUX area tracing snapshot failed\n"); 2610 err = -1; 2611 goto out_child; 2612 } 2613 } 2614 2615 if (trigger_is_hit(&switch_output_trigger)) { 2616 /* 2617 * If switch_output_trigger is hit, the data in 2618 * overwritable ring buffer should have been collected, 2619 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2620 * 2621 * If SIGUSR2 raise after or during record__mmap_read_all(), 2622 * record__mmap_read_all() didn't collect data from 2623 * overwritable ring buffer. Read again. 2624 */ 2625 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2626 continue; 2627 trigger_ready(&switch_output_trigger); 2628 2629 /* 2630 * Reenable events in overwrite ring buffer after 2631 * record__mmap_read_all(): we should have collected 2632 * data from it. 2633 */ 2634 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2635 2636 if (!quiet) 2637 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2638 record__waking(rec)); 2639 thread->waking = 0; 2640 fd = record__switch_output(rec, false); 2641 if (fd < 0) { 2642 pr_err("Failed to switch to new file\n"); 2643 trigger_error(&switch_output_trigger); 2644 err = fd; 2645 goto out_child; 2646 } 2647 2648 /* re-arm the alarm */ 2649 if (rec->switch_output.time) 2650 alarm(rec->switch_output.time); 2651 } 2652 2653 if (hits == thread->samples) { 2654 if (done || draining) 2655 break; 2656 err = fdarray__poll(&thread->pollfd, -1); 2657 /* 2658 * Propagate error, only if there's any. Ignore positive 2659 * number of returned events and interrupt error. 2660 */ 2661 if (err > 0 || (err < 0 && errno == EINTR)) 2662 err = 0; 2663 thread->waking++; 2664 2665 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2666 record__thread_munmap_filtered, NULL) == 0) 2667 draining = true; 2668 2669 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2670 if (err) 2671 goto out_child; 2672 } 2673 2674 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2675 switch (cmd) { 2676 case EVLIST_CTL_CMD_SNAPSHOT: 2677 hit_auxtrace_snapshot_trigger(rec); 2678 evlist__ctlfd_ack(rec->evlist); 2679 break; 2680 case EVLIST_CTL_CMD_STOP: 2681 done = 1; 2682 break; 2683 case EVLIST_CTL_CMD_ACK: 2684 case EVLIST_CTL_CMD_UNSUPPORTED: 2685 case EVLIST_CTL_CMD_ENABLE: 2686 case EVLIST_CTL_CMD_DISABLE: 2687 case EVLIST_CTL_CMD_EVLIST: 2688 case EVLIST_CTL_CMD_PING: 2689 default: 2690 break; 2691 } 2692 } 2693 2694 err = event_enable_timer__process(rec->evlist->eet); 2695 if (err < 0) 2696 goto out_child; 2697 if (err) { 2698 err = 0; 2699 done = 1; 2700 } 2701 2702 /* 2703 * When perf is starting the traced process, at the end events 2704 * die with the process and we wait for that. Thus no need to 2705 * disable events in this case. 2706 */ 2707 if (done && !disabled && !target__none(&opts->target)) { 2708 trigger_off(&auxtrace_snapshot_trigger); 2709 evlist__disable(rec->evlist); 2710 disabled = true; 2711 } 2712 } 2713 2714 trigger_off(&auxtrace_snapshot_trigger); 2715 trigger_off(&switch_output_trigger); 2716 2717 if (opts->auxtrace_snapshot_on_exit) 2718 record__auxtrace_snapshot_exit(rec); 2719 2720 if (forks && workload_exec_errno) { 2721 char msg[STRERR_BUFSIZE], strevsels[2048]; 2722 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2723 2724 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2725 2726 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2727 strevsels, argv[0], emsg); 2728 err = -1; 2729 goto out_child; 2730 } 2731 2732 if (!quiet) 2733 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2734 record__waking(rec)); 2735 2736 write_finished_init(rec, true); 2737 2738 if (target__none(&rec->opts.target)) 2739 record__synthesize_workload(rec, true); 2740 2741 out_child: 2742 record__stop_threads(rec); 2743 record__mmap_read_all(rec, true); 2744 out_free_threads: 2745 record__free_thread_data(rec); 2746 evlist__finalize_ctlfd(rec->evlist); 2747 record__aio_mmap_read_sync(rec); 2748 2749 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2750 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2751 session->header.env.comp_ratio = ratio + 0.5; 2752 } 2753 2754 if (forks) { 2755 int exit_status; 2756 2757 if (!child_finished) 2758 kill(rec->evlist->workload.pid, SIGTERM); 2759 2760 wait(&exit_status); 2761 2762 if (err < 0) 2763 status = err; 2764 else if (WIFEXITED(exit_status)) 2765 status = WEXITSTATUS(exit_status); 2766 else if (WIFSIGNALED(exit_status)) 2767 signr = WTERMSIG(exit_status); 2768 } else 2769 status = err; 2770 2771 if (rec->off_cpu) 2772 rec->bytes_written += off_cpu_write(rec->session); 2773 2774 record__read_lost_samples(rec); 2775 record__synthesize(rec, true); 2776 /* this will be recalculated during process_buildids() */ 2777 rec->samples = 0; 2778 2779 if (!err) { 2780 if (!rec->timestamp_filename) { 2781 record__finish_output(rec); 2782 } else { 2783 fd = record__switch_output(rec, true); 2784 if (fd < 0) { 2785 status = fd; 2786 goto out_delete_session; 2787 } 2788 } 2789 } 2790 2791 perf_hooks__invoke_record_end(); 2792 2793 if (!err && !quiet) { 2794 char samples[128]; 2795 const char *postfix = rec->timestamp_filename ? 2796 ".<timestamp>" : ""; 2797 2798 if (rec->samples && !rec->opts.full_auxtrace) 2799 scnprintf(samples, sizeof(samples), 2800 " (%" PRIu64 " samples)", rec->samples); 2801 else 2802 samples[0] = '\0'; 2803 2804 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2805 perf_data__size(data) / 1024.0 / 1024.0, 2806 data->path, postfix, samples); 2807 if (ratio) { 2808 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2809 rec->session->bytes_transferred / 1024.0 / 1024.0, 2810 ratio); 2811 } 2812 fprintf(stderr, " ]\n"); 2813 } 2814 2815 out_delete_session: 2816 #ifdef HAVE_EVENTFD_SUPPORT 2817 if (done_fd >= 0) { 2818 fd = done_fd; 2819 done_fd = -1; 2820 2821 close(fd); 2822 } 2823 #endif 2824 zstd_fini(&session->zstd_data); 2825 perf_session__delete(session); 2826 2827 if (!opts->no_bpf_event) 2828 evlist__stop_sb_thread(rec->sb_evlist); 2829 return status; 2830 } 2831 2832 static void callchain_debug(struct callchain_param *callchain) 2833 { 2834 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2835 2836 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2837 2838 if (callchain->record_mode == CALLCHAIN_DWARF) 2839 pr_debug("callchain: stack dump size %d\n", 2840 callchain->dump_size); 2841 } 2842 2843 int record_opts__parse_callchain(struct record_opts *record, 2844 struct callchain_param *callchain, 2845 const char *arg, bool unset) 2846 { 2847 int ret; 2848 callchain->enabled = !unset; 2849 2850 /* --no-call-graph */ 2851 if (unset) { 2852 callchain->record_mode = CALLCHAIN_NONE; 2853 pr_debug("callchain: disabled\n"); 2854 return 0; 2855 } 2856 2857 ret = parse_callchain_record_opt(arg, callchain); 2858 if (!ret) { 2859 /* Enable data address sampling for DWARF unwind. */ 2860 if (callchain->record_mode == CALLCHAIN_DWARF) 2861 record->sample_address = true; 2862 callchain_debug(callchain); 2863 } 2864 2865 return ret; 2866 } 2867 2868 int record_parse_callchain_opt(const struct option *opt, 2869 const char *arg, 2870 int unset) 2871 { 2872 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2873 } 2874 2875 int record_callchain_opt(const struct option *opt, 2876 const char *arg __maybe_unused, 2877 int unset __maybe_unused) 2878 { 2879 struct callchain_param *callchain = opt->value; 2880 2881 callchain->enabled = true; 2882 2883 if (callchain->record_mode == CALLCHAIN_NONE) 2884 callchain->record_mode = CALLCHAIN_FP; 2885 2886 callchain_debug(callchain); 2887 return 0; 2888 } 2889 2890 static int perf_record_config(const char *var, const char *value, void *cb) 2891 { 2892 struct record *rec = cb; 2893 2894 if (!strcmp(var, "record.build-id")) { 2895 if (!strcmp(value, "cache")) 2896 rec->no_buildid_cache = false; 2897 else if (!strcmp(value, "no-cache")) 2898 rec->no_buildid_cache = true; 2899 else if (!strcmp(value, "skip")) 2900 rec->no_buildid = true; 2901 else if (!strcmp(value, "mmap")) 2902 rec->buildid_mmap = true; 2903 else 2904 return -1; 2905 return 0; 2906 } 2907 if (!strcmp(var, "record.call-graph")) { 2908 var = "call-graph.record-mode"; 2909 return perf_default_config(var, value, cb); 2910 } 2911 #ifdef HAVE_AIO_SUPPORT 2912 if (!strcmp(var, "record.aio")) { 2913 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2914 if (!rec->opts.nr_cblocks) 2915 rec->opts.nr_cblocks = nr_cblocks_default; 2916 } 2917 #endif 2918 if (!strcmp(var, "record.debuginfod")) { 2919 rec->debuginfod.urls = strdup(value); 2920 if (!rec->debuginfod.urls) 2921 return -ENOMEM; 2922 rec->debuginfod.set = true; 2923 } 2924 2925 return 0; 2926 } 2927 2928 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2929 { 2930 struct record *rec = (struct record *)opt->value; 2931 2932 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2933 } 2934 2935 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2936 { 2937 struct record_opts *opts = (struct record_opts *)opt->value; 2938 2939 if (unset || !str) 2940 return 0; 2941 2942 if (!strcasecmp(str, "node")) 2943 opts->affinity = PERF_AFFINITY_NODE; 2944 else if (!strcasecmp(str, "cpu")) 2945 opts->affinity = PERF_AFFINITY_CPU; 2946 2947 return 0; 2948 } 2949 2950 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2951 { 2952 mask->nbits = nr_bits; 2953 mask->bits = bitmap_zalloc(mask->nbits); 2954 if (!mask->bits) 2955 return -ENOMEM; 2956 2957 return 0; 2958 } 2959 2960 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2961 { 2962 bitmap_free(mask->bits); 2963 mask->nbits = 0; 2964 } 2965 2966 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2967 { 2968 int ret; 2969 2970 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2971 if (ret) { 2972 mask->affinity.bits = NULL; 2973 return ret; 2974 } 2975 2976 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2977 if (ret) { 2978 record__mmap_cpu_mask_free(&mask->maps); 2979 mask->maps.bits = NULL; 2980 } 2981 2982 return ret; 2983 } 2984 2985 static void record__thread_mask_free(struct thread_mask *mask) 2986 { 2987 record__mmap_cpu_mask_free(&mask->maps); 2988 record__mmap_cpu_mask_free(&mask->affinity); 2989 } 2990 2991 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2992 { 2993 int s; 2994 struct record_opts *opts = opt->value; 2995 2996 if (unset || !str || !strlen(str)) { 2997 opts->threads_spec = THREAD_SPEC__CPU; 2998 } else { 2999 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3000 if (s == THREAD_SPEC__USER) { 3001 opts->threads_user_spec = strdup(str); 3002 if (!opts->threads_user_spec) 3003 return -ENOMEM; 3004 opts->threads_spec = THREAD_SPEC__USER; 3005 break; 3006 } 3007 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3008 opts->threads_spec = s; 3009 break; 3010 } 3011 } 3012 } 3013 3014 if (opts->threads_spec == THREAD_SPEC__USER) 3015 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3016 else 3017 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3018 3019 return 0; 3020 } 3021 3022 static int parse_output_max_size(const struct option *opt, 3023 const char *str, int unset) 3024 { 3025 unsigned long *s = (unsigned long *)opt->value; 3026 static struct parse_tag tags_size[] = { 3027 { .tag = 'B', .mult = 1 }, 3028 { .tag = 'K', .mult = 1 << 10 }, 3029 { .tag = 'M', .mult = 1 << 20 }, 3030 { .tag = 'G', .mult = 1 << 30 }, 3031 { .tag = 0 }, 3032 }; 3033 unsigned long val; 3034 3035 if (unset) { 3036 *s = 0; 3037 return 0; 3038 } 3039 3040 val = parse_tag_value(str, tags_size); 3041 if (val != (unsigned long) -1) { 3042 *s = val; 3043 return 0; 3044 } 3045 3046 return -1; 3047 } 3048 3049 static int record__parse_mmap_pages(const struct option *opt, 3050 const char *str, 3051 int unset __maybe_unused) 3052 { 3053 struct record_opts *opts = opt->value; 3054 char *s, *p; 3055 unsigned int mmap_pages; 3056 int ret; 3057 3058 if (!str) 3059 return -EINVAL; 3060 3061 s = strdup(str); 3062 if (!s) 3063 return -ENOMEM; 3064 3065 p = strchr(s, ','); 3066 if (p) 3067 *p = '\0'; 3068 3069 if (*s) { 3070 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3071 if (ret) 3072 goto out_free; 3073 opts->mmap_pages = mmap_pages; 3074 } 3075 3076 if (!p) { 3077 ret = 0; 3078 goto out_free; 3079 } 3080 3081 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3082 if (ret) 3083 goto out_free; 3084 3085 opts->auxtrace_mmap_pages = mmap_pages; 3086 3087 out_free: 3088 free(s); 3089 return ret; 3090 } 3091 3092 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3093 { 3094 } 3095 3096 static int parse_control_option(const struct option *opt, 3097 const char *str, 3098 int unset __maybe_unused) 3099 { 3100 struct record_opts *opts = opt->value; 3101 3102 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3103 } 3104 3105 static void switch_output_size_warn(struct record *rec) 3106 { 3107 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3108 struct switch_output *s = &rec->switch_output; 3109 3110 wakeup_size /= 2; 3111 3112 if (s->size < wakeup_size) { 3113 char buf[100]; 3114 3115 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3116 pr_warning("WARNING: switch-output data size lower than " 3117 "wakeup kernel buffer size (%s) " 3118 "expect bigger perf.data sizes\n", buf); 3119 } 3120 } 3121 3122 static int switch_output_setup(struct record *rec) 3123 { 3124 struct switch_output *s = &rec->switch_output; 3125 static struct parse_tag tags_size[] = { 3126 { .tag = 'B', .mult = 1 }, 3127 { .tag = 'K', .mult = 1 << 10 }, 3128 { .tag = 'M', .mult = 1 << 20 }, 3129 { .tag = 'G', .mult = 1 << 30 }, 3130 { .tag = 0 }, 3131 }; 3132 static struct parse_tag tags_time[] = { 3133 { .tag = 's', .mult = 1 }, 3134 { .tag = 'm', .mult = 60 }, 3135 { .tag = 'h', .mult = 60*60 }, 3136 { .tag = 'd', .mult = 60*60*24 }, 3137 { .tag = 0 }, 3138 }; 3139 unsigned long val; 3140 3141 /* 3142 * If we're using --switch-output-events, then we imply its 3143 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3144 * thread to its parent. 3145 */ 3146 if (rec->switch_output_event_set) { 3147 if (record__threads_enabled(rec)) { 3148 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3149 return 0; 3150 } 3151 goto do_signal; 3152 } 3153 3154 if (!s->set) 3155 return 0; 3156 3157 if (record__threads_enabled(rec)) { 3158 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3159 return 0; 3160 } 3161 3162 if (!strcmp(s->str, "signal")) { 3163 do_signal: 3164 s->signal = true; 3165 pr_debug("switch-output with SIGUSR2 signal\n"); 3166 goto enabled; 3167 } 3168 3169 val = parse_tag_value(s->str, tags_size); 3170 if (val != (unsigned long) -1) { 3171 s->size = val; 3172 pr_debug("switch-output with %s size threshold\n", s->str); 3173 goto enabled; 3174 } 3175 3176 val = parse_tag_value(s->str, tags_time); 3177 if (val != (unsigned long) -1) { 3178 s->time = val; 3179 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3180 s->str, s->time); 3181 goto enabled; 3182 } 3183 3184 return -1; 3185 3186 enabled: 3187 rec->timestamp_filename = true; 3188 s->enabled = true; 3189 3190 if (s->size && !rec->opts.no_buffering) 3191 switch_output_size_warn(rec); 3192 3193 return 0; 3194 } 3195 3196 static const char * const __record_usage[] = { 3197 "perf record [<options>] [<command>]", 3198 "perf record [<options>] -- <command> [<options>]", 3199 NULL 3200 }; 3201 const char * const *record_usage = __record_usage; 3202 3203 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3204 struct perf_sample *sample, struct machine *machine) 3205 { 3206 /* 3207 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3208 * no need to add them twice. 3209 */ 3210 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3211 return 0; 3212 return perf_event__process_mmap(tool, event, sample, machine); 3213 } 3214 3215 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3216 struct perf_sample *sample, struct machine *machine) 3217 { 3218 /* 3219 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3220 * no need to add them twice. 3221 */ 3222 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3223 return 0; 3224 3225 return perf_event__process_mmap2(tool, event, sample, machine); 3226 } 3227 3228 static int process_timestamp_boundary(struct perf_tool *tool, 3229 union perf_event *event __maybe_unused, 3230 struct perf_sample *sample, 3231 struct machine *machine __maybe_unused) 3232 { 3233 struct record *rec = container_of(tool, struct record, tool); 3234 3235 set_timestamp_boundary(rec, sample->time); 3236 return 0; 3237 } 3238 3239 static int parse_record_synth_option(const struct option *opt, 3240 const char *str, 3241 int unset __maybe_unused) 3242 { 3243 struct record_opts *opts = opt->value; 3244 char *p = strdup(str); 3245 3246 if (p == NULL) 3247 return -1; 3248 3249 opts->synth = parse_synth_opt(p); 3250 free(p); 3251 3252 if (opts->synth < 0) { 3253 pr_err("Invalid synth option: %s\n", str); 3254 return -1; 3255 } 3256 return 0; 3257 } 3258 3259 /* 3260 * XXX Ideally would be local to cmd_record() and passed to a record__new 3261 * because we need to have access to it in record__exit, that is called 3262 * after cmd_record() exits, but since record_options need to be accessible to 3263 * builtin-script, leave it here. 3264 * 3265 * At least we don't ouch it in all the other functions here directly. 3266 * 3267 * Just say no to tons of global variables, sigh. 3268 */ 3269 static struct record record = { 3270 .opts = { 3271 .sample_time = true, 3272 .mmap_pages = UINT_MAX, 3273 .user_freq = UINT_MAX, 3274 .user_interval = ULLONG_MAX, 3275 .freq = 4000, 3276 .target = { 3277 .uses_mmap = true, 3278 .default_per_cpu = true, 3279 }, 3280 .mmap_flush = MMAP_FLUSH_DEFAULT, 3281 .nr_threads_synthesize = 1, 3282 .ctl_fd = -1, 3283 .ctl_fd_ack = -1, 3284 .synth = PERF_SYNTH_ALL, 3285 }, 3286 .tool = { 3287 .sample = process_sample_event, 3288 .fork = perf_event__process_fork, 3289 .exit = perf_event__process_exit, 3290 .comm = perf_event__process_comm, 3291 .namespaces = perf_event__process_namespaces, 3292 .mmap = build_id__process_mmap, 3293 .mmap2 = build_id__process_mmap2, 3294 .itrace_start = process_timestamp_boundary, 3295 .aux = process_timestamp_boundary, 3296 .ordered_events = true, 3297 }, 3298 }; 3299 3300 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3301 "\n\t\t\t\tDefault: fp"; 3302 3303 static bool dry_run; 3304 3305 static struct parse_events_option_args parse_events_option_args = { 3306 .evlistp = &record.evlist, 3307 }; 3308 3309 static struct parse_events_option_args switch_output_parse_events_option_args = { 3310 .evlistp = &record.sb_evlist, 3311 }; 3312 3313 /* 3314 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3315 * with it and switch to use the library functions in perf_evlist that came 3316 * from builtin-record.c, i.e. use record_opts, 3317 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3318 * using pipes, etc. 3319 */ 3320 static struct option __record_options[] = { 3321 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3322 "event selector. use 'perf list' to list available events", 3323 parse_events_option), 3324 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3325 "event filter", parse_filter), 3326 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3327 NULL, "don't record events from perf itself", 3328 exclude_perf), 3329 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3330 "record events on existing process id"), 3331 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3332 "record events on existing thread id"), 3333 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3334 "collect data with this RT SCHED_FIFO priority"), 3335 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3336 "collect data without buffering"), 3337 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3338 "collect raw sample records from all opened counters"), 3339 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3340 "system-wide collection from all CPUs"), 3341 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3342 "list of cpus to monitor"), 3343 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3344 OPT_STRING('o', "output", &record.data.path, "file", 3345 "output file name"), 3346 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3347 &record.opts.no_inherit_set, 3348 "child tasks do not inherit counters"), 3349 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3350 "synthesize non-sample events at the end of output"), 3351 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3352 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3353 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3354 "Fail if the specified frequency can't be used"), 3355 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3356 "profile at this frequency", 3357 record__parse_freq), 3358 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3359 "number of mmap data pages and AUX area tracing mmap pages", 3360 record__parse_mmap_pages), 3361 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3362 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3363 record__mmap_flush_parse), 3364 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3365 NULL, "enables call-graph recording" , 3366 &record_callchain_opt), 3367 OPT_CALLBACK(0, "call-graph", &record.opts, 3368 "record_mode[,record_size]", record_callchain_help, 3369 &record_parse_callchain_opt), 3370 OPT_INCR('v', "verbose", &verbose, 3371 "be more verbose (show counter open errors, etc)"), 3372 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3373 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3374 "per thread counts"), 3375 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3376 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3377 "Record the sample physical addresses"), 3378 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3379 "Record the sampled data address data page size"), 3380 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3381 "Record the sampled code address (ip) page size"), 3382 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3383 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3384 "Record the sample identifier"), 3385 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3386 &record.opts.sample_time_set, 3387 "Record the sample timestamps"), 3388 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3389 "Record the sample period"), 3390 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3391 "don't sample"), 3392 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3393 &record.no_buildid_cache_set, 3394 "do not update the buildid cache"), 3395 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3396 &record.no_buildid_set, 3397 "do not collect buildids in perf.data"), 3398 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3399 "monitor event in cgroup name only", 3400 parse_cgroups), 3401 OPT_CALLBACK('D', "delay", &record, "ms", 3402 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3403 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3404 record__parse_event_enable_time), 3405 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3406 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3407 "user to profile"), 3408 3409 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3410 "branch any", "sample any taken branches", 3411 parse_branch_stack), 3412 3413 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3414 "branch filter mask", "branch stack filter modes", 3415 parse_branch_stack), 3416 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3417 "sample by weight (on special events only)"), 3418 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3419 "sample transaction flags (special events only)"), 3420 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3421 "use per-thread mmaps"), 3422 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3423 "sample selected machine registers on interrupt," 3424 " use '-I?' to list register names", parse_intr_regs), 3425 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3426 "sample selected machine registers on interrupt," 3427 " use '--user-regs=?' to list register names", parse_user_regs), 3428 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3429 "Record running/enabled time of read (:S) events"), 3430 OPT_CALLBACK('k', "clockid", &record.opts, 3431 "clockid", "clockid to use for events, see clock_gettime()", 3432 parse_clockid), 3433 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3434 "opts", "AUX area tracing Snapshot Mode", ""), 3435 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3436 "opts", "sample AUX area", ""), 3437 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3438 "per thread proc mmap processing timeout in ms"), 3439 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3440 "Record namespaces events"), 3441 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3442 "Record cgroup events"), 3443 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3444 &record.opts.record_switch_events_set, 3445 "Record context switch events"), 3446 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3447 "Configure all used events to run in kernel space.", 3448 PARSE_OPT_EXCLUSIVE), 3449 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3450 "Configure all used events to run in user space.", 3451 PARSE_OPT_EXCLUSIVE), 3452 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3453 "collect kernel callchains"), 3454 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3455 "collect user callchains"), 3456 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3457 "file", "vmlinux pathname"), 3458 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3459 "Record build-id of all DSOs regardless of hits"), 3460 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3461 "Record build-id in map events"), 3462 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3463 "append timestamp to output filename"), 3464 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3465 "Record timestamp boundary (time of first/last samples)"), 3466 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3467 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3468 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3469 "signal"), 3470 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3471 &record.switch_output_event_set, "switch output event", 3472 "switch output event selector. use 'perf list' to list available events", 3473 parse_events_option_new_evlist), 3474 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3475 "Limit number of switch output generated files"), 3476 OPT_BOOLEAN(0, "dry-run", &dry_run, 3477 "Parse options then exit"), 3478 #ifdef HAVE_AIO_SUPPORT 3479 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3480 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3481 record__aio_parse), 3482 #endif 3483 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3484 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3485 record__parse_affinity), 3486 #ifdef HAVE_ZSTD_SUPPORT 3487 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3488 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3489 record__parse_comp_level), 3490 #endif 3491 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3492 "size", "Limit the maximum size of the output file", parse_output_max_size), 3493 OPT_UINTEGER(0, "num-thread-synthesize", 3494 &record.opts.nr_threads_synthesize, 3495 "number of threads to run for event synthesis"), 3496 #ifdef HAVE_LIBPFM 3497 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3498 "libpfm4 event selector. use 'perf list' to list available events", 3499 parse_libpfm_events_option), 3500 #endif 3501 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3502 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3503 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3504 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3505 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3506 parse_control_option), 3507 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3508 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3509 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3510 &record.debuginfod.set, "debuginfod urls", 3511 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3512 "system"), 3513 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3514 "write collected trace data into several data files using parallel threads", 3515 record__parse_threads), 3516 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3517 OPT_END() 3518 }; 3519 3520 struct option *record_options = __record_options; 3521 3522 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3523 { 3524 struct perf_cpu cpu; 3525 int idx; 3526 3527 if (cpu_map__is_dummy(cpus)) 3528 return 0; 3529 3530 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3531 if (cpu.cpu == -1) 3532 continue; 3533 /* Return ENODEV is input cpu is greater than max cpu */ 3534 if ((unsigned long)cpu.cpu > mask->nbits) 3535 return -ENODEV; 3536 __set_bit(cpu.cpu, mask->bits); 3537 } 3538 3539 return 0; 3540 } 3541 3542 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3543 { 3544 struct perf_cpu_map *cpus; 3545 3546 cpus = perf_cpu_map__new(mask_spec); 3547 if (!cpus) 3548 return -ENOMEM; 3549 3550 bitmap_zero(mask->bits, mask->nbits); 3551 if (record__mmap_cpu_mask_init(mask, cpus)) 3552 return -ENODEV; 3553 3554 perf_cpu_map__put(cpus); 3555 3556 return 0; 3557 } 3558 3559 static void record__free_thread_masks(struct record *rec, int nr_threads) 3560 { 3561 int t; 3562 3563 if (rec->thread_masks) 3564 for (t = 0; t < nr_threads; t++) 3565 record__thread_mask_free(&rec->thread_masks[t]); 3566 3567 zfree(&rec->thread_masks); 3568 } 3569 3570 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3571 { 3572 int t, ret; 3573 3574 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3575 if (!rec->thread_masks) { 3576 pr_err("Failed to allocate thread masks\n"); 3577 return -ENOMEM; 3578 } 3579 3580 for (t = 0; t < nr_threads; t++) { 3581 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3582 if (ret) { 3583 pr_err("Failed to allocate thread masks[%d]\n", t); 3584 goto out_free; 3585 } 3586 } 3587 3588 return 0; 3589 3590 out_free: 3591 record__free_thread_masks(rec, nr_threads); 3592 3593 return ret; 3594 } 3595 3596 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3597 { 3598 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3599 3600 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3601 if (ret) 3602 return ret; 3603 3604 rec->nr_threads = nr_cpus; 3605 pr_debug("nr_threads: %d\n", rec->nr_threads); 3606 3607 for (t = 0; t < rec->nr_threads; t++) { 3608 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3609 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3610 if (verbose > 0) { 3611 pr_debug("thread_masks[%d]: ", t); 3612 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3613 pr_debug("thread_masks[%d]: ", t); 3614 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3615 } 3616 } 3617 3618 return 0; 3619 } 3620 3621 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3622 const char **maps_spec, const char **affinity_spec, 3623 u32 nr_spec) 3624 { 3625 u32 s; 3626 int ret = 0, t = 0; 3627 struct mmap_cpu_mask cpus_mask; 3628 struct thread_mask thread_mask, full_mask, *thread_masks; 3629 3630 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3631 if (ret) { 3632 pr_err("Failed to allocate CPUs mask\n"); 3633 return ret; 3634 } 3635 3636 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3637 if (ret) { 3638 pr_err("Failed to init cpu mask\n"); 3639 goto out_free_cpu_mask; 3640 } 3641 3642 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3643 if (ret) { 3644 pr_err("Failed to allocate full mask\n"); 3645 goto out_free_cpu_mask; 3646 } 3647 3648 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3649 if (ret) { 3650 pr_err("Failed to allocate thread mask\n"); 3651 goto out_free_full_and_cpu_masks; 3652 } 3653 3654 for (s = 0; s < nr_spec; s++) { 3655 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3656 if (ret) { 3657 pr_err("Failed to initialize maps thread mask\n"); 3658 goto out_free; 3659 } 3660 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3661 if (ret) { 3662 pr_err("Failed to initialize affinity thread mask\n"); 3663 goto out_free; 3664 } 3665 3666 /* ignore invalid CPUs but do not allow empty masks */ 3667 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3668 cpus_mask.bits, thread_mask.maps.nbits)) { 3669 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3670 ret = -EINVAL; 3671 goto out_free; 3672 } 3673 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3674 cpus_mask.bits, thread_mask.affinity.nbits)) { 3675 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3676 ret = -EINVAL; 3677 goto out_free; 3678 } 3679 3680 /* do not allow intersection with other masks (full_mask) */ 3681 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3682 thread_mask.maps.nbits)) { 3683 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3684 ret = -EINVAL; 3685 goto out_free; 3686 } 3687 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3688 thread_mask.affinity.nbits)) { 3689 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3690 ret = -EINVAL; 3691 goto out_free; 3692 } 3693 3694 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3695 thread_mask.maps.bits, full_mask.maps.nbits); 3696 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3697 thread_mask.affinity.bits, full_mask.maps.nbits); 3698 3699 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3700 if (!thread_masks) { 3701 pr_err("Failed to reallocate thread masks\n"); 3702 ret = -ENOMEM; 3703 goto out_free; 3704 } 3705 rec->thread_masks = thread_masks; 3706 rec->thread_masks[t] = thread_mask; 3707 if (verbose > 0) { 3708 pr_debug("thread_masks[%d]: ", t); 3709 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3710 pr_debug("thread_masks[%d]: ", t); 3711 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3712 } 3713 t++; 3714 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3715 if (ret) { 3716 pr_err("Failed to allocate thread mask\n"); 3717 goto out_free_full_and_cpu_masks; 3718 } 3719 } 3720 rec->nr_threads = t; 3721 pr_debug("nr_threads: %d\n", rec->nr_threads); 3722 if (!rec->nr_threads) 3723 ret = -EINVAL; 3724 3725 out_free: 3726 record__thread_mask_free(&thread_mask); 3727 out_free_full_and_cpu_masks: 3728 record__thread_mask_free(&full_mask); 3729 out_free_cpu_mask: 3730 record__mmap_cpu_mask_free(&cpus_mask); 3731 3732 return ret; 3733 } 3734 3735 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3736 { 3737 int ret; 3738 struct cpu_topology *topo; 3739 3740 topo = cpu_topology__new(); 3741 if (!topo) { 3742 pr_err("Failed to allocate CPU topology\n"); 3743 return -ENOMEM; 3744 } 3745 3746 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3747 topo->core_cpus_list, topo->core_cpus_lists); 3748 cpu_topology__delete(topo); 3749 3750 return ret; 3751 } 3752 3753 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3754 { 3755 int ret; 3756 struct cpu_topology *topo; 3757 3758 topo = cpu_topology__new(); 3759 if (!topo) { 3760 pr_err("Failed to allocate CPU topology\n"); 3761 return -ENOMEM; 3762 } 3763 3764 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3765 topo->package_cpus_list, topo->package_cpus_lists); 3766 cpu_topology__delete(topo); 3767 3768 return ret; 3769 } 3770 3771 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3772 { 3773 u32 s; 3774 int ret; 3775 const char **spec; 3776 struct numa_topology *topo; 3777 3778 topo = numa_topology__new(); 3779 if (!topo) { 3780 pr_err("Failed to allocate NUMA topology\n"); 3781 return -ENOMEM; 3782 } 3783 3784 spec = zalloc(topo->nr * sizeof(char *)); 3785 if (!spec) { 3786 pr_err("Failed to allocate NUMA spec\n"); 3787 ret = -ENOMEM; 3788 goto out_delete_topo; 3789 } 3790 for (s = 0; s < topo->nr; s++) 3791 spec[s] = topo->nodes[s].cpus; 3792 3793 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3794 3795 zfree(&spec); 3796 3797 out_delete_topo: 3798 numa_topology__delete(topo); 3799 3800 return ret; 3801 } 3802 3803 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3804 { 3805 int t, ret; 3806 u32 s, nr_spec = 0; 3807 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3808 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3809 3810 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3811 spec = strtok_r(user_spec, ":", &spec_ptr); 3812 if (spec == NULL) 3813 break; 3814 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3815 mask = strtok_r(spec, "/", &mask_ptr); 3816 if (mask == NULL) 3817 break; 3818 pr_debug2(" maps mask: %s\n", mask); 3819 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3820 if (!tmp_spec) { 3821 pr_err("Failed to reallocate maps spec\n"); 3822 ret = -ENOMEM; 3823 goto out_free; 3824 } 3825 maps_spec = tmp_spec; 3826 maps_spec[nr_spec] = dup_mask = strdup(mask); 3827 if (!maps_spec[nr_spec]) { 3828 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3829 ret = -ENOMEM; 3830 goto out_free; 3831 } 3832 mask = strtok_r(NULL, "/", &mask_ptr); 3833 if (mask == NULL) { 3834 pr_err("Invalid thread maps or affinity specs\n"); 3835 ret = -EINVAL; 3836 goto out_free; 3837 } 3838 pr_debug2(" affinity mask: %s\n", mask); 3839 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3840 if (!tmp_spec) { 3841 pr_err("Failed to reallocate affinity spec\n"); 3842 ret = -ENOMEM; 3843 goto out_free; 3844 } 3845 affinity_spec = tmp_spec; 3846 affinity_spec[nr_spec] = strdup(mask); 3847 if (!affinity_spec[nr_spec]) { 3848 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3849 ret = -ENOMEM; 3850 goto out_free; 3851 } 3852 dup_mask = NULL; 3853 nr_spec++; 3854 } 3855 3856 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3857 (const char **)affinity_spec, nr_spec); 3858 3859 out_free: 3860 free(dup_mask); 3861 for (s = 0; s < nr_spec; s++) { 3862 if (maps_spec) 3863 free(maps_spec[s]); 3864 if (affinity_spec) 3865 free(affinity_spec[s]); 3866 } 3867 free(affinity_spec); 3868 free(maps_spec); 3869 3870 return ret; 3871 } 3872 3873 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3874 { 3875 int ret; 3876 3877 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3878 if (ret) 3879 return ret; 3880 3881 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3882 return -ENODEV; 3883 3884 rec->nr_threads = 1; 3885 3886 return 0; 3887 } 3888 3889 static int record__init_thread_masks(struct record *rec) 3890 { 3891 int ret = 0; 3892 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3893 3894 if (!record__threads_enabled(rec)) 3895 return record__init_thread_default_masks(rec, cpus); 3896 3897 if (evlist__per_thread(rec->evlist)) { 3898 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3899 return -EINVAL; 3900 } 3901 3902 switch (rec->opts.threads_spec) { 3903 case THREAD_SPEC__CPU: 3904 ret = record__init_thread_cpu_masks(rec, cpus); 3905 break; 3906 case THREAD_SPEC__CORE: 3907 ret = record__init_thread_core_masks(rec, cpus); 3908 break; 3909 case THREAD_SPEC__PACKAGE: 3910 ret = record__init_thread_package_masks(rec, cpus); 3911 break; 3912 case THREAD_SPEC__NUMA: 3913 ret = record__init_thread_numa_masks(rec, cpus); 3914 break; 3915 case THREAD_SPEC__USER: 3916 ret = record__init_thread_user_masks(rec, cpus); 3917 break; 3918 default: 3919 break; 3920 } 3921 3922 return ret; 3923 } 3924 3925 int cmd_record(int argc, const char **argv) 3926 { 3927 int err; 3928 struct record *rec = &record; 3929 char errbuf[BUFSIZ]; 3930 3931 setlocale(LC_ALL, ""); 3932 3933 #ifndef HAVE_BPF_SKEL 3934 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3935 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3936 # undef set_nobuild 3937 #endif 3938 3939 rec->opts.affinity = PERF_AFFINITY_SYS; 3940 3941 rec->evlist = evlist__new(); 3942 if (rec->evlist == NULL) 3943 return -ENOMEM; 3944 3945 err = perf_config(perf_record_config, rec); 3946 if (err) 3947 return err; 3948 3949 argc = parse_options(argc, argv, record_options, record_usage, 3950 PARSE_OPT_STOP_AT_NON_OPTION); 3951 if (quiet) 3952 perf_quiet_option(); 3953 3954 err = symbol__validate_sym_arguments(); 3955 if (err) 3956 return err; 3957 3958 perf_debuginfod_setup(&record.debuginfod); 3959 3960 /* Make system wide (-a) the default target. */ 3961 if (!argc && target__none(&rec->opts.target)) 3962 rec->opts.target.system_wide = true; 3963 3964 if (nr_cgroups && !rec->opts.target.system_wide) { 3965 usage_with_options_msg(record_usage, record_options, 3966 "cgroup monitoring only available in system-wide mode"); 3967 3968 } 3969 3970 if (rec->buildid_mmap) { 3971 if (!perf_can_record_build_id()) { 3972 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3973 err = -EINVAL; 3974 goto out_opts; 3975 } 3976 pr_debug("Enabling build id in mmap2 events.\n"); 3977 /* Enable mmap build id synthesizing. */ 3978 symbol_conf.buildid_mmap2 = true; 3979 /* Enable perf_event_attr::build_id bit. */ 3980 rec->opts.build_id = true; 3981 /* Disable build id cache. */ 3982 rec->no_buildid = true; 3983 } 3984 3985 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3986 pr_err("Kernel has no cgroup sampling support.\n"); 3987 err = -EINVAL; 3988 goto out_opts; 3989 } 3990 3991 if (rec->opts.kcore) 3992 rec->opts.text_poke = true; 3993 3994 if (rec->opts.kcore || record__threads_enabled(rec)) 3995 rec->data.is_dir = true; 3996 3997 if (record__threads_enabled(rec)) { 3998 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 3999 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4000 goto out_opts; 4001 } 4002 if (record__aio_enabled(rec)) { 4003 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4004 goto out_opts; 4005 } 4006 } 4007 4008 if (rec->opts.comp_level != 0) { 4009 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4010 rec->no_buildid = true; 4011 } 4012 4013 if (rec->opts.record_switch_events && 4014 !perf_can_record_switch_events()) { 4015 ui__error("kernel does not support recording context switch events\n"); 4016 parse_options_usage(record_usage, record_options, "switch-events", 0); 4017 err = -EINVAL; 4018 goto out_opts; 4019 } 4020 4021 if (switch_output_setup(rec)) { 4022 parse_options_usage(record_usage, record_options, "switch-output", 0); 4023 err = -EINVAL; 4024 goto out_opts; 4025 } 4026 4027 if (rec->switch_output.time) { 4028 signal(SIGALRM, alarm_sig_handler); 4029 alarm(rec->switch_output.time); 4030 } 4031 4032 if (rec->switch_output.num_files) { 4033 rec->switch_output.filenames = calloc(sizeof(char *), 4034 rec->switch_output.num_files); 4035 if (!rec->switch_output.filenames) { 4036 err = -EINVAL; 4037 goto out_opts; 4038 } 4039 } 4040 4041 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4042 rec->timestamp_filename = false; 4043 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4044 } 4045 4046 /* 4047 * Allow aliases to facilitate the lookup of symbols for address 4048 * filters. Refer to auxtrace_parse_filters(). 4049 */ 4050 symbol_conf.allow_aliases = true; 4051 4052 symbol__init(NULL); 4053 4054 err = record__auxtrace_init(rec); 4055 if (err) 4056 goto out; 4057 4058 if (dry_run) 4059 goto out; 4060 4061 err = -ENOMEM; 4062 4063 if (rec->no_buildid_cache || rec->no_buildid) { 4064 disable_buildid_cache(); 4065 } else if (rec->switch_output.enabled) { 4066 /* 4067 * In 'perf record --switch-output', disable buildid 4068 * generation by default to reduce data file switching 4069 * overhead. Still generate buildid if they are required 4070 * explicitly using 4071 * 4072 * perf record --switch-output --no-no-buildid \ 4073 * --no-no-buildid-cache 4074 * 4075 * Following code equals to: 4076 * 4077 * if ((rec->no_buildid || !rec->no_buildid_set) && 4078 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4079 * disable_buildid_cache(); 4080 */ 4081 bool disable = true; 4082 4083 if (rec->no_buildid_set && !rec->no_buildid) 4084 disable = false; 4085 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4086 disable = false; 4087 if (disable) { 4088 rec->no_buildid = true; 4089 rec->no_buildid_cache = true; 4090 disable_buildid_cache(); 4091 } 4092 } 4093 4094 if (record.opts.overwrite) 4095 record.opts.tail_synthesize = true; 4096 4097 if (rec->evlist->core.nr_entries == 0) { 4098 bool can_profile_kernel = perf_event_paranoid_check(1); 4099 4100 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); 4101 if (err) 4102 goto out; 4103 } 4104 4105 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4106 rec->opts.no_inherit = true; 4107 4108 err = target__validate(&rec->opts.target); 4109 if (err) { 4110 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4111 ui__warning("%s\n", errbuf); 4112 } 4113 4114 err = target__parse_uid(&rec->opts.target); 4115 if (err) { 4116 int saved_errno = errno; 4117 4118 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4119 ui__error("%s", errbuf); 4120 4121 err = -saved_errno; 4122 goto out; 4123 } 4124 4125 /* Enable ignoring missing threads when -u/-p option is defined. */ 4126 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4127 4128 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4129 4130 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4131 arch__add_leaf_frame_record_opts(&rec->opts); 4132 4133 err = -ENOMEM; 4134 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4135 if (rec->opts.target.pid != NULL) { 4136 pr_err("Couldn't create thread/CPU maps: %s\n", 4137 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4138 goto out; 4139 } 4140 else 4141 usage_with_options(record_usage, record_options); 4142 } 4143 4144 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4145 if (err) 4146 goto out; 4147 4148 /* 4149 * We take all buildids when the file contains 4150 * AUX area tracing data because we do not decode the 4151 * trace because it would take too long. 4152 */ 4153 if (rec->opts.full_auxtrace) 4154 rec->buildid_all = true; 4155 4156 if (rec->opts.text_poke) { 4157 err = record__config_text_poke(rec->evlist); 4158 if (err) { 4159 pr_err("record__config_text_poke failed, error %d\n", err); 4160 goto out; 4161 } 4162 } 4163 4164 if (rec->off_cpu) { 4165 err = record__config_off_cpu(rec); 4166 if (err) { 4167 pr_err("record__config_off_cpu failed, error %d\n", err); 4168 goto out; 4169 } 4170 } 4171 4172 if (record_opts__config(&rec->opts)) { 4173 err = -EINVAL; 4174 goto out; 4175 } 4176 4177 err = record__init_thread_masks(rec); 4178 if (err) { 4179 pr_err("Failed to initialize parallel data streaming masks\n"); 4180 goto out; 4181 } 4182 4183 if (rec->opts.nr_cblocks > nr_cblocks_max) 4184 rec->opts.nr_cblocks = nr_cblocks_max; 4185 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4186 4187 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4188 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4189 4190 if (rec->opts.comp_level > comp_level_max) 4191 rec->opts.comp_level = comp_level_max; 4192 pr_debug("comp level: %d\n", rec->opts.comp_level); 4193 4194 err = __cmd_record(&record, argc, argv); 4195 out: 4196 evlist__delete(rec->evlist); 4197 symbol__exit(); 4198 auxtrace_record__free(rec->itr); 4199 out_opts: 4200 record__free_thread_masks(rec, rec->nr_threads); 4201 rec->nr_threads = 0; 4202 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4203 return err; 4204 } 4205 4206 static void snapshot_sig_handler(int sig __maybe_unused) 4207 { 4208 struct record *rec = &record; 4209 4210 hit_auxtrace_snapshot_trigger(rec); 4211 4212 if (switch_output_signal(rec)) 4213 trigger_hit(&switch_output_trigger); 4214 } 4215 4216 static void alarm_sig_handler(int sig __maybe_unused) 4217 { 4218 struct record *rec = &record; 4219 4220 if (switch_output_time(rec)) 4221 trigger_hit(&switch_output_trigger); 4222 } 4223