1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include <internal/xyarray.h> 14 #include "util/parse-events.h" 15 #include "util/config.h" 16 17 #include "util/callchain.h" 18 #include "util/cgroup.h" 19 #include "util/header.h" 20 #include "util/event.h" 21 #include "util/evlist.h" 22 #include "util/evsel.h" 23 #include "util/debug.h" 24 #include "util/mmap.h" 25 #include "util/mutex.h" 26 #include "util/target.h" 27 #include "util/session.h" 28 #include "util/tool.h" 29 #include "util/symbol.h" 30 #include "util/record.h" 31 #include "util/cpumap.h" 32 #include "util/thread_map.h" 33 #include "util/data.h" 34 #include "util/perf_regs.h" 35 #include "util/auxtrace.h" 36 #include "util/tsc.h" 37 #include "util/parse-branch-options.h" 38 #include "util/parse-regs-options.h" 39 #include "util/perf_api_probe.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/pmu.h" 50 #include "util/pmus.h" 51 #include "util/clockid.h" 52 #include "util/off_cpu.h" 53 #include "util/bpf-filter.h" 54 #include "asm/bug.h" 55 #include "perf.h" 56 #include "cputopo.h" 57 58 #include <errno.h> 59 #include <inttypes.h> 60 #include <locale.h> 61 #include <poll.h> 62 #include <pthread.h> 63 #include <unistd.h> 64 #ifndef HAVE_GETTID 65 #include <syscall.h> 66 #endif 67 #include <sched.h> 68 #include <signal.h> 69 #ifdef HAVE_EVENTFD_SUPPORT 70 #include <sys/eventfd.h> 71 #endif 72 #include <sys/mman.h> 73 #include <sys/wait.h> 74 #include <sys/types.h> 75 #include <sys/stat.h> 76 #include <fcntl.h> 77 #include <linux/err.h> 78 #include <linux/string.h> 79 #include <linux/time64.h> 80 #include <linux/zalloc.h> 81 #include <linux/bitmap.h> 82 #include <sys/time.h> 83 84 struct switch_output { 85 bool enabled; 86 bool signal; 87 unsigned long size; 88 unsigned long time; 89 const char *str; 90 bool set; 91 char **filenames; 92 int num_files; 93 int cur_file; 94 }; 95 96 struct thread_mask { 97 struct mmap_cpu_mask maps; 98 struct mmap_cpu_mask affinity; 99 }; 100 101 struct record_thread { 102 pid_t tid; 103 struct thread_mask *mask; 104 struct { 105 int msg[2]; 106 int ack[2]; 107 } pipes; 108 struct fdarray pollfd; 109 int ctlfd_pos; 110 int nr_mmaps; 111 struct mmap **maps; 112 struct mmap **overwrite_maps; 113 struct record *rec; 114 unsigned long long samples; 115 unsigned long waking; 116 u64 bytes_written; 117 u64 bytes_transferred; 118 u64 bytes_compressed; 119 }; 120 121 static __thread struct record_thread *thread; 122 123 enum thread_msg { 124 THREAD_MSG__UNDEFINED = 0, 125 THREAD_MSG__READY, 126 THREAD_MSG__MAX, 127 }; 128 129 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 130 "UNDEFINED", "READY" 131 }; 132 133 enum thread_spec { 134 THREAD_SPEC__UNDEFINED = 0, 135 THREAD_SPEC__CPU, 136 THREAD_SPEC__CORE, 137 THREAD_SPEC__PACKAGE, 138 THREAD_SPEC__NUMA, 139 THREAD_SPEC__USER, 140 THREAD_SPEC__MAX, 141 }; 142 143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 144 "undefined", "cpu", "core", "package", "numa", "user" 145 }; 146 147 struct pollfd_index_map { 148 int evlist_pollfd_index; 149 int thread_pollfd_index; 150 }; 151 152 struct record { 153 struct perf_tool tool; 154 struct record_opts opts; 155 u64 bytes_written; 156 u64 thread_bytes_written; 157 struct perf_data data; 158 struct auxtrace_record *itr; 159 struct evlist *evlist; 160 struct perf_session *session; 161 struct evlist *sb_evlist; 162 pthread_t thread_id; 163 int realtime_prio; 164 bool switch_output_event_set; 165 bool no_buildid; 166 bool no_buildid_set; 167 bool no_buildid_cache; 168 bool no_buildid_cache_set; 169 bool buildid_all; 170 bool buildid_mmap; 171 bool timestamp_filename; 172 bool timestamp_boundary; 173 bool off_cpu; 174 struct switch_output switch_output; 175 unsigned long long samples; 176 unsigned long output_max_size; /* = 0: unlimited */ 177 struct perf_debuginfod debuginfod; 178 int nr_threads; 179 struct thread_mask *thread_masks; 180 struct record_thread *thread_data; 181 struct pollfd_index_map *index_map; 182 size_t index_map_sz; 183 size_t index_map_cnt; 184 }; 185 186 static volatile int done; 187 188 static volatile int auxtrace_record__snapshot_started; 189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 190 static DEFINE_TRIGGER(switch_output_trigger); 191 192 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 193 "SYS", "NODE", "CPU" 194 }; 195 196 #ifndef HAVE_GETTID 197 static inline pid_t gettid(void) 198 { 199 return (pid_t)syscall(__NR_gettid); 200 } 201 #endif 202 203 static int record__threads_enabled(struct record *rec) 204 { 205 return rec->opts.threads_spec; 206 } 207 208 static bool switch_output_signal(struct record *rec) 209 { 210 return rec->switch_output.signal && 211 trigger_is_ready(&switch_output_trigger); 212 } 213 214 static bool switch_output_size(struct record *rec) 215 { 216 return rec->switch_output.size && 217 trigger_is_ready(&switch_output_trigger) && 218 (rec->bytes_written >= rec->switch_output.size); 219 } 220 221 static bool switch_output_time(struct record *rec) 222 { 223 return rec->switch_output.time && 224 trigger_is_ready(&switch_output_trigger); 225 } 226 227 static u64 record__bytes_written(struct record *rec) 228 { 229 return rec->bytes_written + rec->thread_bytes_written; 230 } 231 232 static bool record__output_max_size_exceeded(struct record *rec) 233 { 234 return rec->output_max_size && 235 (record__bytes_written(rec) >= rec->output_max_size); 236 } 237 238 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 239 void *bf, size_t size) 240 { 241 struct perf_data_file *file = &rec->session->data->file; 242 243 if (map && map->file) 244 file = map->file; 245 246 if (perf_data_file__write(file, bf, size) < 0) { 247 pr_err("failed to write perf data, error: %m\n"); 248 return -1; 249 } 250 251 if (map && map->file) { 252 thread->bytes_written += size; 253 rec->thread_bytes_written += size; 254 } else { 255 rec->bytes_written += size; 256 } 257 258 if (record__output_max_size_exceeded(rec) && !done) { 259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 260 " stopping session ]\n", 261 record__bytes_written(rec) >> 10); 262 done = 1; 263 } 264 265 if (switch_output_size(rec)) 266 trigger_hit(&switch_output_trigger); 267 268 return 0; 269 } 270 271 static int record__aio_enabled(struct record *rec); 272 static int record__comp_enabled(struct record *rec); 273 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 274 void *dst, size_t dst_size, void *src, size_t src_size); 275 276 #ifdef HAVE_AIO_SUPPORT 277 static int record__aio_write(struct aiocb *cblock, int trace_fd, 278 void *buf, size_t size, off_t off) 279 { 280 int rc; 281 282 cblock->aio_fildes = trace_fd; 283 cblock->aio_buf = buf; 284 cblock->aio_nbytes = size; 285 cblock->aio_offset = off; 286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 287 288 do { 289 rc = aio_write(cblock); 290 if (rc == 0) { 291 break; 292 } else if (errno != EAGAIN) { 293 cblock->aio_fildes = -1; 294 pr_err("failed to queue perf data, error: %m\n"); 295 break; 296 } 297 } while (1); 298 299 return rc; 300 } 301 302 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 303 { 304 void *rem_buf; 305 off_t rem_off; 306 size_t rem_size; 307 int rc, aio_errno; 308 ssize_t aio_ret, written; 309 310 aio_errno = aio_error(cblock); 311 if (aio_errno == EINPROGRESS) 312 return 0; 313 314 written = aio_ret = aio_return(cblock); 315 if (aio_ret < 0) { 316 if (aio_errno != EINTR) 317 pr_err("failed to write perf data, error: %m\n"); 318 written = 0; 319 } 320 321 rem_size = cblock->aio_nbytes - written; 322 323 if (rem_size == 0) { 324 cblock->aio_fildes = -1; 325 /* 326 * md->refcount is incremented in record__aio_pushfn() for 327 * every aio write request started in record__aio_push() so 328 * decrement it because the request is now complete. 329 */ 330 perf_mmap__put(&md->core); 331 rc = 1; 332 } else { 333 /* 334 * aio write request may require restart with the 335 * reminder if the kernel didn't write whole 336 * chunk at once. 337 */ 338 rem_off = cblock->aio_offset + written; 339 rem_buf = (void *)(cblock->aio_buf + written); 340 record__aio_write(cblock, cblock->aio_fildes, 341 rem_buf, rem_size, rem_off); 342 rc = 0; 343 } 344 345 return rc; 346 } 347 348 static int record__aio_sync(struct mmap *md, bool sync_all) 349 { 350 struct aiocb **aiocb = md->aio.aiocb; 351 struct aiocb *cblocks = md->aio.cblocks; 352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 353 int i, do_suspend; 354 355 do { 356 do_suspend = 0; 357 for (i = 0; i < md->aio.nr_cblocks; ++i) { 358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 359 if (sync_all) 360 aiocb[i] = NULL; 361 else 362 return i; 363 } else { 364 /* 365 * Started aio write is not complete yet 366 * so it has to be waited before the 367 * next allocation. 368 */ 369 aiocb[i] = &cblocks[i]; 370 do_suspend = 1; 371 } 372 } 373 if (!do_suspend) 374 return -1; 375 376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 377 if (!(errno == EAGAIN || errno == EINTR)) 378 pr_err("failed to sync perf data, error: %m\n"); 379 } 380 } while (1); 381 } 382 383 struct record_aio { 384 struct record *rec; 385 void *data; 386 size_t size; 387 }; 388 389 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 390 { 391 struct record_aio *aio = to; 392 393 /* 394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 395 * to release space in the kernel buffer as fast as possible, calling 396 * perf_mmap__consume() from perf_mmap__push() function. 397 * 398 * That lets the kernel to proceed with storing more profiling data into 399 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 400 * 401 * Coping can be done in two steps in case the chunk of profiling data 402 * crosses the upper bound of the kernel buffer. In this case we first move 403 * part of data from map->start till the upper bound and then the reminder 404 * from the beginning of the kernel buffer till the end of the data chunk. 405 */ 406 407 if (record__comp_enabled(aio->rec)) { 408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 409 mmap__mmap_len(map) - aio->size, 410 buf, size); 411 } else { 412 memcpy(aio->data + aio->size, buf, size); 413 } 414 415 if (!aio->size) { 416 /* 417 * Increment map->refcount to guard map->aio.data[] buffer 418 * from premature deallocation because map object can be 419 * released earlier than aio write request started on 420 * map->aio.data[] buffer is complete. 421 * 422 * perf_mmap__put() is done at record__aio_complete() 423 * after started aio request completion or at record__aio_push() 424 * if the request failed to start. 425 */ 426 perf_mmap__get(&map->core); 427 } 428 429 aio->size += size; 430 431 return size; 432 } 433 434 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 435 { 436 int ret, idx; 437 int trace_fd = rec->session->data->file.fd; 438 struct record_aio aio = { .rec = rec, .size = 0 }; 439 440 /* 441 * Call record__aio_sync() to wait till map->aio.data[] buffer 442 * becomes available after previous aio write operation. 443 */ 444 445 idx = record__aio_sync(map, false); 446 aio.data = map->aio.data[idx]; 447 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 449 return ret; 450 451 rec->samples++; 452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 453 if (!ret) { 454 *off += aio.size; 455 rec->bytes_written += aio.size; 456 if (switch_output_size(rec)) 457 trigger_hit(&switch_output_trigger); 458 } else { 459 /* 460 * Decrement map->refcount incremented in record__aio_pushfn() 461 * back if record__aio_write() operation failed to start, otherwise 462 * map->refcount is decremented in record__aio_complete() after 463 * aio write operation finishes successfully. 464 */ 465 perf_mmap__put(&map->core); 466 } 467 468 return ret; 469 } 470 471 static off_t record__aio_get_pos(int trace_fd) 472 { 473 return lseek(trace_fd, 0, SEEK_CUR); 474 } 475 476 static void record__aio_set_pos(int trace_fd, off_t pos) 477 { 478 lseek(trace_fd, pos, SEEK_SET); 479 } 480 481 static void record__aio_mmap_read_sync(struct record *rec) 482 { 483 int i; 484 struct evlist *evlist = rec->evlist; 485 struct mmap *maps = evlist->mmap; 486 487 if (!record__aio_enabled(rec)) 488 return; 489 490 for (i = 0; i < evlist->core.nr_mmaps; i++) { 491 struct mmap *map = &maps[i]; 492 493 if (map->core.base) 494 record__aio_sync(map, true); 495 } 496 } 497 498 static int nr_cblocks_default = 1; 499 static int nr_cblocks_max = 4; 500 501 static int record__aio_parse(const struct option *opt, 502 const char *str, 503 int unset) 504 { 505 struct record_opts *opts = (struct record_opts *)opt->value; 506 507 if (unset) { 508 opts->nr_cblocks = 0; 509 } else { 510 if (str) 511 opts->nr_cblocks = strtol(str, NULL, 0); 512 if (!opts->nr_cblocks) 513 opts->nr_cblocks = nr_cblocks_default; 514 } 515 516 return 0; 517 } 518 #else /* HAVE_AIO_SUPPORT */ 519 static int nr_cblocks_max = 0; 520 521 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 522 off_t *off __maybe_unused) 523 { 524 return -1; 525 } 526 527 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 528 { 529 return -1; 530 } 531 532 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 533 { 534 } 535 536 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 537 { 538 } 539 #endif 540 541 static int record__aio_enabled(struct record *rec) 542 { 543 return rec->opts.nr_cblocks > 0; 544 } 545 546 #define MMAP_FLUSH_DEFAULT 1 547 static int record__mmap_flush_parse(const struct option *opt, 548 const char *str, 549 int unset) 550 { 551 int flush_max; 552 struct record_opts *opts = (struct record_opts *)opt->value; 553 static struct parse_tag tags[] = { 554 { .tag = 'B', .mult = 1 }, 555 { .tag = 'K', .mult = 1 << 10 }, 556 { .tag = 'M', .mult = 1 << 20 }, 557 { .tag = 'G', .mult = 1 << 30 }, 558 { .tag = 0 }, 559 }; 560 561 if (unset) 562 return 0; 563 564 if (str) { 565 opts->mmap_flush = parse_tag_value(str, tags); 566 if (opts->mmap_flush == (int)-1) 567 opts->mmap_flush = strtol(str, NULL, 0); 568 } 569 570 if (!opts->mmap_flush) 571 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 572 573 flush_max = evlist__mmap_size(opts->mmap_pages); 574 flush_max /= 4; 575 if (opts->mmap_flush > flush_max) 576 opts->mmap_flush = flush_max; 577 578 return 0; 579 } 580 581 #ifdef HAVE_ZSTD_SUPPORT 582 static unsigned int comp_level_default = 1; 583 584 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 585 { 586 struct record_opts *opts = opt->value; 587 588 if (unset) { 589 opts->comp_level = 0; 590 } else { 591 if (str) 592 opts->comp_level = strtol(str, NULL, 0); 593 if (!opts->comp_level) 594 opts->comp_level = comp_level_default; 595 } 596 597 return 0; 598 } 599 #endif 600 static unsigned int comp_level_max = 22; 601 602 static int record__comp_enabled(struct record *rec) 603 { 604 return rec->opts.comp_level > 0; 605 } 606 607 static int process_synthesized_event(struct perf_tool *tool, 608 union perf_event *event, 609 struct perf_sample *sample __maybe_unused, 610 struct machine *machine __maybe_unused) 611 { 612 struct record *rec = container_of(tool, struct record, tool); 613 return record__write(rec, NULL, event, event->header.size); 614 } 615 616 static struct mutex synth_lock; 617 618 static int process_locked_synthesized_event(struct perf_tool *tool, 619 union perf_event *event, 620 struct perf_sample *sample __maybe_unused, 621 struct machine *machine __maybe_unused) 622 { 623 int ret; 624 625 mutex_lock(&synth_lock); 626 ret = process_synthesized_event(tool, event, sample, machine); 627 mutex_unlock(&synth_lock); 628 return ret; 629 } 630 631 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 632 { 633 struct record *rec = to; 634 635 if (record__comp_enabled(rec)) { 636 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 637 bf = map->data; 638 } 639 640 thread->samples++; 641 return record__write(rec, map, bf, size); 642 } 643 644 static volatile sig_atomic_t signr = -1; 645 static volatile sig_atomic_t child_finished; 646 #ifdef HAVE_EVENTFD_SUPPORT 647 static volatile sig_atomic_t done_fd = -1; 648 #endif 649 650 static void sig_handler(int sig) 651 { 652 if (sig == SIGCHLD) 653 child_finished = 1; 654 else 655 signr = sig; 656 657 done = 1; 658 #ifdef HAVE_EVENTFD_SUPPORT 659 if (done_fd >= 0) { 660 u64 tmp = 1; 661 int orig_errno = errno; 662 663 /* 664 * It is possible for this signal handler to run after done is 665 * checked in the main loop, but before the perf counter fds are 666 * polled. If this happens, the poll() will continue to wait 667 * even though done is set, and will only break out if either 668 * another signal is received, or the counters are ready for 669 * read. To ensure the poll() doesn't sleep when done is set, 670 * use an eventfd (done_fd) to wake up the poll(). 671 */ 672 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 673 pr_err("failed to signal wakeup fd, error: %m\n"); 674 675 errno = orig_errno; 676 } 677 #endif // HAVE_EVENTFD_SUPPORT 678 } 679 680 static void sigsegv_handler(int sig) 681 { 682 perf_hooks__recover(); 683 sighandler_dump_stack(sig); 684 } 685 686 static void record__sig_exit(void) 687 { 688 if (signr == -1) 689 return; 690 691 signal(signr, SIG_DFL); 692 raise(signr); 693 } 694 695 #ifdef HAVE_AUXTRACE_SUPPORT 696 697 static int record__process_auxtrace(struct perf_tool *tool, 698 struct mmap *map, 699 union perf_event *event, void *data1, 700 size_t len1, void *data2, size_t len2) 701 { 702 struct record *rec = container_of(tool, struct record, tool); 703 struct perf_data *data = &rec->data; 704 size_t padding; 705 u8 pad[8] = {0}; 706 707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 708 off_t file_offset; 709 int fd = perf_data__fd(data); 710 int err; 711 712 file_offset = lseek(fd, 0, SEEK_CUR); 713 if (file_offset == -1) 714 return -1; 715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 716 event, file_offset); 717 if (err) 718 return err; 719 } 720 721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 722 padding = (len1 + len2) & 7; 723 if (padding) 724 padding = 8 - padding; 725 726 record__write(rec, map, event, event->header.size); 727 record__write(rec, map, data1, len1); 728 if (len2) 729 record__write(rec, map, data2, len2); 730 record__write(rec, map, &pad, padding); 731 732 return 0; 733 } 734 735 static int record__auxtrace_mmap_read(struct record *rec, 736 struct mmap *map) 737 { 738 int ret; 739 740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 741 record__process_auxtrace); 742 if (ret < 0) 743 return ret; 744 745 if (ret) 746 rec->samples++; 747 748 return 0; 749 } 750 751 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 752 struct mmap *map) 753 { 754 int ret; 755 756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 757 record__process_auxtrace, 758 rec->opts.auxtrace_snapshot_size); 759 if (ret < 0) 760 return ret; 761 762 if (ret) 763 rec->samples++; 764 765 return 0; 766 } 767 768 static int record__auxtrace_read_snapshot_all(struct record *rec) 769 { 770 int i; 771 int rc = 0; 772 773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 774 struct mmap *map = &rec->evlist->mmap[i]; 775 776 if (!map->auxtrace_mmap.base) 777 continue; 778 779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 780 rc = -1; 781 goto out; 782 } 783 } 784 out: 785 return rc; 786 } 787 788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 789 { 790 pr_debug("Recording AUX area tracing snapshot\n"); 791 if (record__auxtrace_read_snapshot_all(rec) < 0) { 792 trigger_error(&auxtrace_snapshot_trigger); 793 } else { 794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 795 trigger_error(&auxtrace_snapshot_trigger); 796 else 797 trigger_ready(&auxtrace_snapshot_trigger); 798 } 799 } 800 801 static int record__auxtrace_snapshot_exit(struct record *rec) 802 { 803 if (trigger_is_error(&auxtrace_snapshot_trigger)) 804 return 0; 805 806 if (!auxtrace_record__snapshot_started && 807 auxtrace_record__snapshot_start(rec->itr)) 808 return -1; 809 810 record__read_auxtrace_snapshot(rec, true); 811 if (trigger_is_error(&auxtrace_snapshot_trigger)) 812 return -1; 813 814 return 0; 815 } 816 817 static int record__auxtrace_init(struct record *rec) 818 { 819 int err; 820 821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 822 && record__threads_enabled(rec)) { 823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 824 return -EINVAL; 825 } 826 827 if (!rec->itr) { 828 rec->itr = auxtrace_record__init(rec->evlist, &err); 829 if (err) 830 return err; 831 } 832 833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 834 rec->opts.auxtrace_snapshot_opts); 835 if (err) 836 return err; 837 838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 839 rec->opts.auxtrace_sample_opts); 840 if (err) 841 return err; 842 843 auxtrace_regroup_aux_output(rec->evlist); 844 845 return auxtrace_parse_filters(rec->evlist); 846 } 847 848 #else 849 850 static inline 851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 852 struct mmap *map __maybe_unused) 853 { 854 return 0; 855 } 856 857 static inline 858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 859 bool on_exit __maybe_unused) 860 { 861 } 862 863 static inline 864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 865 { 866 return 0; 867 } 868 869 static inline 870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 871 { 872 return 0; 873 } 874 875 static int record__auxtrace_init(struct record *rec __maybe_unused) 876 { 877 return 0; 878 } 879 880 #endif 881 882 static int record__config_text_poke(struct evlist *evlist) 883 { 884 struct evsel *evsel; 885 886 /* Nothing to do if text poke is already configured */ 887 evlist__for_each_entry(evlist, evsel) { 888 if (evsel->core.attr.text_poke) 889 return 0; 890 } 891 892 evsel = evlist__add_dummy_on_all_cpus(evlist); 893 if (!evsel) 894 return -ENOMEM; 895 896 evsel->core.attr.text_poke = 1; 897 evsel->core.attr.ksymbol = 1; 898 evsel->immediate = true; 899 evsel__set_sample_bit(evsel, TIME); 900 901 return 0; 902 } 903 904 static int record__config_off_cpu(struct record *rec) 905 { 906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 907 } 908 909 static bool record__kcore_readable(struct machine *machine) 910 { 911 char kcore[PATH_MAX]; 912 int fd; 913 914 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 915 916 fd = open(kcore, O_RDONLY); 917 if (fd < 0) 918 return false; 919 920 close(fd); 921 922 return true; 923 } 924 925 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 926 { 927 char from_dir[PATH_MAX]; 928 char kcore_dir[PATH_MAX]; 929 int ret; 930 931 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 932 933 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 934 if (ret) 935 return ret; 936 937 return kcore_copy(from_dir, kcore_dir); 938 } 939 940 static void record__thread_data_init_pipes(struct record_thread *thread_data) 941 { 942 thread_data->pipes.msg[0] = -1; 943 thread_data->pipes.msg[1] = -1; 944 thread_data->pipes.ack[0] = -1; 945 thread_data->pipes.ack[1] = -1; 946 } 947 948 static int record__thread_data_open_pipes(struct record_thread *thread_data) 949 { 950 if (pipe(thread_data->pipes.msg)) 951 return -EINVAL; 952 953 if (pipe(thread_data->pipes.ack)) { 954 close(thread_data->pipes.msg[0]); 955 thread_data->pipes.msg[0] = -1; 956 close(thread_data->pipes.msg[1]); 957 thread_data->pipes.msg[1] = -1; 958 return -EINVAL; 959 } 960 961 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 962 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 963 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 964 965 return 0; 966 } 967 968 static void record__thread_data_close_pipes(struct record_thread *thread_data) 969 { 970 if (thread_data->pipes.msg[0] != -1) { 971 close(thread_data->pipes.msg[0]); 972 thread_data->pipes.msg[0] = -1; 973 } 974 if (thread_data->pipes.msg[1] != -1) { 975 close(thread_data->pipes.msg[1]); 976 thread_data->pipes.msg[1] = -1; 977 } 978 if (thread_data->pipes.ack[0] != -1) { 979 close(thread_data->pipes.ack[0]); 980 thread_data->pipes.ack[0] = -1; 981 } 982 if (thread_data->pipes.ack[1] != -1) { 983 close(thread_data->pipes.ack[1]); 984 thread_data->pipes.ack[1] = -1; 985 } 986 } 987 988 static bool evlist__per_thread(struct evlist *evlist) 989 { 990 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 991 } 992 993 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 994 { 995 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 996 struct mmap *mmap = evlist->mmap; 997 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 998 struct perf_cpu_map *cpus = evlist->core.all_cpus; 999 bool per_thread = evlist__per_thread(evlist); 1000 1001 if (per_thread) 1002 thread_data->nr_mmaps = nr_mmaps; 1003 else 1004 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1005 thread_data->mask->maps.nbits); 1006 if (mmap) { 1007 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1008 if (!thread_data->maps) 1009 return -ENOMEM; 1010 } 1011 if (overwrite_mmap) { 1012 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1013 if (!thread_data->overwrite_maps) { 1014 zfree(&thread_data->maps); 1015 return -ENOMEM; 1016 } 1017 } 1018 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1019 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1020 1021 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1022 if (per_thread || 1023 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1024 if (thread_data->maps) { 1025 thread_data->maps[tm] = &mmap[m]; 1026 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1027 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1028 } 1029 if (thread_data->overwrite_maps) { 1030 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1031 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1032 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1033 } 1034 tm++; 1035 } 1036 } 1037 1038 return 0; 1039 } 1040 1041 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1042 { 1043 int f, tm, pos; 1044 struct mmap *map, *overwrite_map; 1045 1046 fdarray__init(&thread_data->pollfd, 64); 1047 1048 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1049 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1050 overwrite_map = thread_data->overwrite_maps ? 1051 thread_data->overwrite_maps[tm] : NULL; 1052 1053 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1054 void *ptr = evlist->core.pollfd.priv[f].ptr; 1055 1056 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1057 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1058 &evlist->core.pollfd); 1059 if (pos < 0) 1060 return pos; 1061 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1062 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1063 } 1064 } 1065 } 1066 1067 return 0; 1068 } 1069 1070 static void record__free_thread_data(struct record *rec) 1071 { 1072 int t; 1073 struct record_thread *thread_data = rec->thread_data; 1074 1075 if (thread_data == NULL) 1076 return; 1077 1078 for (t = 0; t < rec->nr_threads; t++) { 1079 record__thread_data_close_pipes(&thread_data[t]); 1080 zfree(&thread_data[t].maps); 1081 zfree(&thread_data[t].overwrite_maps); 1082 fdarray__exit(&thread_data[t].pollfd); 1083 } 1084 1085 zfree(&rec->thread_data); 1086 } 1087 1088 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1089 int evlist_pollfd_index, 1090 int thread_pollfd_index) 1091 { 1092 size_t x = rec->index_map_cnt; 1093 1094 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1095 return -ENOMEM; 1096 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1097 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1098 rec->index_map_cnt += 1; 1099 return 0; 1100 } 1101 1102 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1103 struct evlist *evlist, 1104 struct record_thread *thread_data) 1105 { 1106 struct pollfd *e_entries = evlist->core.pollfd.entries; 1107 struct pollfd *t_entries = thread_data->pollfd.entries; 1108 int err = 0; 1109 size_t i; 1110 1111 for (i = 0; i < rec->index_map_cnt; i++) { 1112 int e_pos = rec->index_map[i].evlist_pollfd_index; 1113 int t_pos = rec->index_map[i].thread_pollfd_index; 1114 1115 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1116 e_entries[e_pos].events != t_entries[t_pos].events) { 1117 pr_err("Thread and evlist pollfd index mismatch\n"); 1118 err = -EINVAL; 1119 continue; 1120 } 1121 e_entries[e_pos].revents = t_entries[t_pos].revents; 1122 } 1123 return err; 1124 } 1125 1126 static int record__dup_non_perf_events(struct record *rec, 1127 struct evlist *evlist, 1128 struct record_thread *thread_data) 1129 { 1130 struct fdarray *fda = &evlist->core.pollfd; 1131 int i, ret; 1132 1133 for (i = 0; i < fda->nr; i++) { 1134 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1135 continue; 1136 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1137 if (ret < 0) { 1138 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1139 return ret; 1140 } 1141 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1142 thread_data, ret, fda->entries[i].fd); 1143 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1144 if (ret < 0) { 1145 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1146 return ret; 1147 } 1148 } 1149 return 0; 1150 } 1151 1152 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1153 { 1154 int t, ret; 1155 struct record_thread *thread_data; 1156 1157 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1158 if (!rec->thread_data) { 1159 pr_err("Failed to allocate thread data\n"); 1160 return -ENOMEM; 1161 } 1162 thread_data = rec->thread_data; 1163 1164 for (t = 0; t < rec->nr_threads; t++) 1165 record__thread_data_init_pipes(&thread_data[t]); 1166 1167 for (t = 0; t < rec->nr_threads; t++) { 1168 thread_data[t].rec = rec; 1169 thread_data[t].mask = &rec->thread_masks[t]; 1170 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1171 if (ret) { 1172 pr_err("Failed to initialize thread[%d] maps\n", t); 1173 goto out_free; 1174 } 1175 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1176 if (ret) { 1177 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1178 goto out_free; 1179 } 1180 if (t) { 1181 thread_data[t].tid = -1; 1182 ret = record__thread_data_open_pipes(&thread_data[t]); 1183 if (ret) { 1184 pr_err("Failed to open thread[%d] communication pipes\n", t); 1185 goto out_free; 1186 } 1187 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1188 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1189 if (ret < 0) { 1190 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1191 goto out_free; 1192 } 1193 thread_data[t].ctlfd_pos = ret; 1194 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1195 thread_data, thread_data[t].ctlfd_pos, 1196 thread_data[t].pipes.msg[0]); 1197 } else { 1198 thread_data[t].tid = gettid(); 1199 1200 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1201 if (ret < 0) 1202 goto out_free; 1203 1204 thread_data[t].ctlfd_pos = -1; /* Not used */ 1205 } 1206 } 1207 1208 return 0; 1209 1210 out_free: 1211 record__free_thread_data(rec); 1212 1213 return ret; 1214 } 1215 1216 static int record__mmap_evlist(struct record *rec, 1217 struct evlist *evlist) 1218 { 1219 int i, ret; 1220 struct record_opts *opts = &rec->opts; 1221 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1222 opts->auxtrace_sample_mode; 1223 char msg[512]; 1224 1225 if (opts->affinity != PERF_AFFINITY_SYS) 1226 cpu__setup_cpunode_map(); 1227 1228 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1229 opts->auxtrace_mmap_pages, 1230 auxtrace_overwrite, 1231 opts->nr_cblocks, opts->affinity, 1232 opts->mmap_flush, opts->comp_level) < 0) { 1233 if (errno == EPERM) { 1234 pr_err("Permission error mapping pages.\n" 1235 "Consider increasing " 1236 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1237 "or try again with a smaller value of -m/--mmap_pages.\n" 1238 "(current value: %u,%u)\n", 1239 opts->mmap_pages, opts->auxtrace_mmap_pages); 1240 return -errno; 1241 } else { 1242 pr_err("failed to mmap with %d (%s)\n", errno, 1243 str_error_r(errno, msg, sizeof(msg))); 1244 if (errno) 1245 return -errno; 1246 else 1247 return -EINVAL; 1248 } 1249 } 1250 1251 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1252 return -1; 1253 1254 ret = record__alloc_thread_data(rec, evlist); 1255 if (ret) 1256 return ret; 1257 1258 if (record__threads_enabled(rec)) { 1259 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1260 if (ret) { 1261 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1262 return ret; 1263 } 1264 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1265 if (evlist->mmap) 1266 evlist->mmap[i].file = &rec->data.dir.files[i]; 1267 if (evlist->overwrite_mmap) 1268 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1269 } 1270 } 1271 1272 return 0; 1273 } 1274 1275 static int record__mmap(struct record *rec) 1276 { 1277 return record__mmap_evlist(rec, rec->evlist); 1278 } 1279 1280 static int record__open(struct record *rec) 1281 { 1282 char msg[BUFSIZ]; 1283 struct evsel *pos; 1284 struct evlist *evlist = rec->evlist; 1285 struct perf_session *session = rec->session; 1286 struct record_opts *opts = &rec->opts; 1287 int rc = 0; 1288 1289 /* 1290 * For initial_delay, system wide or a hybrid system, we need to add a 1291 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1292 * of waiting or event synthesis. 1293 */ 1294 if (opts->target.initial_delay || target__has_cpu(&opts->target) || 1295 perf_pmus__num_core_pmus() > 1) { 1296 pos = evlist__get_tracking_event(evlist); 1297 if (!evsel__is_dummy_event(pos)) { 1298 /* Set up dummy event. */ 1299 if (evlist__add_dummy(evlist)) 1300 return -ENOMEM; 1301 pos = evlist__last(evlist); 1302 evlist__set_tracking_event(evlist, pos); 1303 } 1304 1305 /* 1306 * Enable the dummy event when the process is forked for 1307 * initial_delay, immediately for system wide. 1308 */ 1309 if (opts->target.initial_delay && !pos->immediate && 1310 !target__has_cpu(&opts->target)) 1311 pos->core.attr.enable_on_exec = 1; 1312 else 1313 pos->immediate = 1; 1314 } 1315 1316 evlist__config(evlist, opts, &callchain_param); 1317 1318 evlist__for_each_entry(evlist, pos) { 1319 try_again: 1320 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1321 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1322 if (verbose > 0) 1323 ui__warning("%s\n", msg); 1324 goto try_again; 1325 } 1326 if ((errno == EINVAL || errno == EBADF) && 1327 pos->core.leader != &pos->core && 1328 pos->weak_group) { 1329 pos = evlist__reset_weak_group(evlist, pos, true); 1330 goto try_again; 1331 } 1332 rc = -errno; 1333 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1334 ui__error("%s\n", msg); 1335 goto out; 1336 } 1337 1338 pos->supported = true; 1339 } 1340 1341 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1342 pr_warning( 1343 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1344 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1345 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1346 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1347 "Samples in kernel modules won't be resolved at all.\n\n" 1348 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1349 "even with a suitable vmlinux or kallsyms file.\n\n"); 1350 } 1351 1352 if (evlist__apply_filters(evlist, &pos)) { 1353 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1354 pos->filter ?: "BPF", evsel__name(pos), errno, 1355 str_error_r(errno, msg, sizeof(msg))); 1356 rc = -1; 1357 goto out; 1358 } 1359 1360 rc = record__mmap(rec); 1361 if (rc) 1362 goto out; 1363 1364 session->evlist = evlist; 1365 perf_session__set_id_hdr_size(session); 1366 out: 1367 return rc; 1368 } 1369 1370 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1371 { 1372 if (rec->evlist->first_sample_time == 0) 1373 rec->evlist->first_sample_time = sample_time; 1374 1375 if (sample_time) 1376 rec->evlist->last_sample_time = sample_time; 1377 } 1378 1379 static int process_sample_event(struct perf_tool *tool, 1380 union perf_event *event, 1381 struct perf_sample *sample, 1382 struct evsel *evsel, 1383 struct machine *machine) 1384 { 1385 struct record *rec = container_of(tool, struct record, tool); 1386 1387 set_timestamp_boundary(rec, sample->time); 1388 1389 if (rec->buildid_all) 1390 return 0; 1391 1392 rec->samples++; 1393 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1394 } 1395 1396 static int process_buildids(struct record *rec) 1397 { 1398 struct perf_session *session = rec->session; 1399 1400 if (perf_data__size(&rec->data) == 0) 1401 return 0; 1402 1403 /* 1404 * During this process, it'll load kernel map and replace the 1405 * dso->long_name to a real pathname it found. In this case 1406 * we prefer the vmlinux path like 1407 * /lib/modules/3.16.4/build/vmlinux 1408 * 1409 * rather than build-id path (in debug directory). 1410 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1411 */ 1412 symbol_conf.ignore_vmlinux_buildid = true; 1413 1414 /* 1415 * If --buildid-all is given, it marks all DSO regardless of hits, 1416 * so no need to process samples. But if timestamp_boundary is enabled, 1417 * it still needs to walk on all samples to get the timestamps of 1418 * first/last samples. 1419 */ 1420 if (rec->buildid_all && !rec->timestamp_boundary) 1421 rec->tool.sample = NULL; 1422 1423 return perf_session__process_events(session); 1424 } 1425 1426 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1427 { 1428 int err; 1429 struct perf_tool *tool = data; 1430 /* 1431 *As for guest kernel when processing subcommand record&report, 1432 *we arrange module mmap prior to guest kernel mmap and trigger 1433 *a preload dso because default guest module symbols are loaded 1434 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1435 *method is used to avoid symbol missing when the first addr is 1436 *in module instead of in guest kernel. 1437 */ 1438 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1439 machine); 1440 if (err < 0) 1441 pr_err("Couldn't record guest kernel [%d]'s reference" 1442 " relocation symbol.\n", machine->pid); 1443 1444 /* 1445 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1446 * have no _text sometimes. 1447 */ 1448 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1449 machine); 1450 if (err < 0) 1451 pr_err("Couldn't record guest kernel [%d]'s reference" 1452 " relocation symbol.\n", machine->pid); 1453 } 1454 1455 static struct perf_event_header finished_round_event = { 1456 .size = sizeof(struct perf_event_header), 1457 .type = PERF_RECORD_FINISHED_ROUND, 1458 }; 1459 1460 static struct perf_event_header finished_init_event = { 1461 .size = sizeof(struct perf_event_header), 1462 .type = PERF_RECORD_FINISHED_INIT, 1463 }; 1464 1465 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1466 { 1467 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1468 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1469 thread->mask->affinity.nbits)) { 1470 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1471 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1472 map->affinity_mask.bits, thread->mask->affinity.nbits); 1473 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1474 (cpu_set_t *)thread->mask->affinity.bits); 1475 if (verbose == 2) { 1476 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1477 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1478 } 1479 } 1480 } 1481 1482 static size_t process_comp_header(void *record, size_t increment) 1483 { 1484 struct perf_record_compressed *event = record; 1485 size_t size = sizeof(*event); 1486 1487 if (increment) { 1488 event->header.size += increment; 1489 return increment; 1490 } 1491 1492 event->header.type = PERF_RECORD_COMPRESSED; 1493 event->header.size = size; 1494 1495 return size; 1496 } 1497 1498 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1499 void *dst, size_t dst_size, void *src, size_t src_size) 1500 { 1501 size_t compressed; 1502 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1503 struct zstd_data *zstd_data = &session->zstd_data; 1504 1505 if (map && map->file) 1506 zstd_data = &map->zstd_data; 1507 1508 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1509 max_record_size, process_comp_header); 1510 1511 if (map && map->file) { 1512 thread->bytes_transferred += src_size; 1513 thread->bytes_compressed += compressed; 1514 } else { 1515 session->bytes_transferred += src_size; 1516 session->bytes_compressed += compressed; 1517 } 1518 1519 return compressed; 1520 } 1521 1522 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1523 bool overwrite, bool synch) 1524 { 1525 u64 bytes_written = rec->bytes_written; 1526 int i; 1527 int rc = 0; 1528 int nr_mmaps; 1529 struct mmap **maps; 1530 int trace_fd = rec->data.file.fd; 1531 off_t off = 0; 1532 1533 if (!evlist) 1534 return 0; 1535 1536 nr_mmaps = thread->nr_mmaps; 1537 maps = overwrite ? thread->overwrite_maps : thread->maps; 1538 1539 if (!maps) 1540 return 0; 1541 1542 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1543 return 0; 1544 1545 if (record__aio_enabled(rec)) 1546 off = record__aio_get_pos(trace_fd); 1547 1548 for (i = 0; i < nr_mmaps; i++) { 1549 u64 flush = 0; 1550 struct mmap *map = maps[i]; 1551 1552 if (map->core.base) { 1553 record__adjust_affinity(rec, map); 1554 if (synch) { 1555 flush = map->core.flush; 1556 map->core.flush = 1; 1557 } 1558 if (!record__aio_enabled(rec)) { 1559 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1560 if (synch) 1561 map->core.flush = flush; 1562 rc = -1; 1563 goto out; 1564 } 1565 } else { 1566 if (record__aio_push(rec, map, &off) < 0) { 1567 record__aio_set_pos(trace_fd, off); 1568 if (synch) 1569 map->core.flush = flush; 1570 rc = -1; 1571 goto out; 1572 } 1573 } 1574 if (synch) 1575 map->core.flush = flush; 1576 } 1577 1578 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1579 !rec->opts.auxtrace_sample_mode && 1580 record__auxtrace_mmap_read(rec, map) != 0) { 1581 rc = -1; 1582 goto out; 1583 } 1584 } 1585 1586 if (record__aio_enabled(rec)) 1587 record__aio_set_pos(trace_fd, off); 1588 1589 /* 1590 * Mark the round finished in case we wrote 1591 * at least one event. 1592 * 1593 * No need for round events in directory mode, 1594 * because per-cpu maps and files have data 1595 * sorted by kernel. 1596 */ 1597 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1598 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1599 1600 if (overwrite) 1601 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1602 out: 1603 return rc; 1604 } 1605 1606 static int record__mmap_read_all(struct record *rec, bool synch) 1607 { 1608 int err; 1609 1610 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1611 if (err) 1612 return err; 1613 1614 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1615 } 1616 1617 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1618 void *arg __maybe_unused) 1619 { 1620 struct perf_mmap *map = fda->priv[fd].ptr; 1621 1622 if (map) 1623 perf_mmap__put(map); 1624 } 1625 1626 static void *record__thread(void *arg) 1627 { 1628 enum thread_msg msg = THREAD_MSG__READY; 1629 bool terminate = false; 1630 struct fdarray *pollfd; 1631 int err, ctlfd_pos; 1632 1633 thread = arg; 1634 thread->tid = gettid(); 1635 1636 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1637 if (err == -1) 1638 pr_warning("threads[%d]: failed to notify on start: %s\n", 1639 thread->tid, strerror(errno)); 1640 1641 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1642 1643 pollfd = &thread->pollfd; 1644 ctlfd_pos = thread->ctlfd_pos; 1645 1646 for (;;) { 1647 unsigned long long hits = thread->samples; 1648 1649 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1650 break; 1651 1652 if (hits == thread->samples) { 1653 1654 err = fdarray__poll(pollfd, -1); 1655 /* 1656 * Propagate error, only if there's any. Ignore positive 1657 * number of returned events and interrupt error. 1658 */ 1659 if (err > 0 || (err < 0 && errno == EINTR)) 1660 err = 0; 1661 thread->waking++; 1662 1663 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1664 record__thread_munmap_filtered, NULL) == 0) 1665 break; 1666 } 1667 1668 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1669 terminate = true; 1670 close(thread->pipes.msg[0]); 1671 thread->pipes.msg[0] = -1; 1672 pollfd->entries[ctlfd_pos].fd = -1; 1673 pollfd->entries[ctlfd_pos].events = 0; 1674 } 1675 1676 pollfd->entries[ctlfd_pos].revents = 0; 1677 } 1678 record__mmap_read_all(thread->rec, true); 1679 1680 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1681 if (err == -1) 1682 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1683 thread->tid, strerror(errno)); 1684 1685 return NULL; 1686 } 1687 1688 static void record__init_features(struct record *rec) 1689 { 1690 struct perf_session *session = rec->session; 1691 int feat; 1692 1693 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1694 perf_header__set_feat(&session->header, feat); 1695 1696 if (rec->no_buildid) 1697 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1698 1699 #ifdef HAVE_LIBTRACEEVENT 1700 if (!have_tracepoints(&rec->evlist->core.entries)) 1701 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1702 #endif 1703 1704 if (!rec->opts.branch_stack) 1705 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1706 1707 if (!rec->opts.full_auxtrace) 1708 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1709 1710 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1711 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1712 1713 if (!rec->opts.use_clockid) 1714 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1715 1716 if (!record__threads_enabled(rec)) 1717 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1718 1719 if (!record__comp_enabled(rec)) 1720 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1721 1722 perf_header__clear_feat(&session->header, HEADER_STAT); 1723 } 1724 1725 static void 1726 record__finish_output(struct record *rec) 1727 { 1728 int i; 1729 struct perf_data *data = &rec->data; 1730 int fd = perf_data__fd(data); 1731 1732 if (data->is_pipe) 1733 return; 1734 1735 rec->session->header.data_size += rec->bytes_written; 1736 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1737 if (record__threads_enabled(rec)) { 1738 for (i = 0; i < data->dir.nr; i++) 1739 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1740 } 1741 1742 if (!rec->no_buildid) { 1743 process_buildids(rec); 1744 1745 if (rec->buildid_all) 1746 dsos__hit_all(rec->session); 1747 } 1748 perf_session__write_header(rec->session, rec->evlist, fd, true); 1749 1750 return; 1751 } 1752 1753 static int record__synthesize_workload(struct record *rec, bool tail) 1754 { 1755 int err; 1756 struct perf_thread_map *thread_map; 1757 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1758 1759 if (rec->opts.tail_synthesize != tail) 1760 return 0; 1761 1762 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1763 if (thread_map == NULL) 1764 return -1; 1765 1766 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1767 process_synthesized_event, 1768 &rec->session->machines.host, 1769 needs_mmap, 1770 rec->opts.sample_address); 1771 perf_thread_map__put(thread_map); 1772 return err; 1773 } 1774 1775 static int write_finished_init(struct record *rec, bool tail) 1776 { 1777 if (rec->opts.tail_synthesize != tail) 1778 return 0; 1779 1780 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1781 } 1782 1783 static int record__synthesize(struct record *rec, bool tail); 1784 1785 static int 1786 record__switch_output(struct record *rec, bool at_exit) 1787 { 1788 struct perf_data *data = &rec->data; 1789 int fd, err; 1790 char *new_filename; 1791 1792 /* Same Size: "2015122520103046"*/ 1793 char timestamp[] = "InvalidTimestamp"; 1794 1795 record__aio_mmap_read_sync(rec); 1796 1797 write_finished_init(rec, true); 1798 1799 record__synthesize(rec, true); 1800 if (target__none(&rec->opts.target)) 1801 record__synthesize_workload(rec, true); 1802 1803 rec->samples = 0; 1804 record__finish_output(rec); 1805 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1806 if (err) { 1807 pr_err("Failed to get current timestamp\n"); 1808 return -EINVAL; 1809 } 1810 1811 fd = perf_data__switch(data, timestamp, 1812 rec->session->header.data_offset, 1813 at_exit, &new_filename); 1814 if (fd >= 0 && !at_exit) { 1815 rec->bytes_written = 0; 1816 rec->session->header.data_size = 0; 1817 } 1818 1819 if (!quiet) 1820 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1821 data->path, timestamp); 1822 1823 if (rec->switch_output.num_files) { 1824 int n = rec->switch_output.cur_file + 1; 1825 1826 if (n >= rec->switch_output.num_files) 1827 n = 0; 1828 rec->switch_output.cur_file = n; 1829 if (rec->switch_output.filenames[n]) { 1830 remove(rec->switch_output.filenames[n]); 1831 zfree(&rec->switch_output.filenames[n]); 1832 } 1833 rec->switch_output.filenames[n] = new_filename; 1834 } else { 1835 free(new_filename); 1836 } 1837 1838 /* Output tracking events */ 1839 if (!at_exit) { 1840 record__synthesize(rec, false); 1841 1842 /* 1843 * In 'perf record --switch-output' without -a, 1844 * record__synthesize() in record__switch_output() won't 1845 * generate tracking events because there's no thread_map 1846 * in evlist. Which causes newly created perf.data doesn't 1847 * contain map and comm information. 1848 * Create a fake thread_map and directly call 1849 * perf_event__synthesize_thread_map() for those events. 1850 */ 1851 if (target__none(&rec->opts.target)) 1852 record__synthesize_workload(rec, false); 1853 write_finished_init(rec, false); 1854 } 1855 return fd; 1856 } 1857 1858 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, 1859 struct perf_record_lost_samples *lost, 1860 int cpu_idx, int thread_idx, u64 lost_count, 1861 u16 misc_flag) 1862 { 1863 struct perf_sample_id *sid; 1864 struct perf_sample sample = {}; 1865 int id_hdr_size; 1866 1867 lost->lost = lost_count; 1868 if (evsel->core.ids) { 1869 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx); 1870 sample.id = sid->id; 1871 } 1872 1873 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1), 1874 evsel->core.attr.sample_type, &sample); 1875 lost->header.size = sizeof(*lost) + id_hdr_size; 1876 lost->header.misc = misc_flag; 1877 record__write(rec, NULL, lost, lost->header.size); 1878 } 1879 1880 static void record__read_lost_samples(struct record *rec) 1881 { 1882 struct perf_session *session = rec->session; 1883 struct perf_record_lost_samples *lost; 1884 struct evsel *evsel; 1885 1886 /* there was an error during record__open */ 1887 if (session->evlist == NULL) 1888 return; 1889 1890 lost = zalloc(PERF_SAMPLE_MAX_SIZE); 1891 if (lost == NULL) { 1892 pr_debug("Memory allocation failed\n"); 1893 return; 1894 } 1895 1896 lost->header.type = PERF_RECORD_LOST_SAMPLES; 1897 1898 evlist__for_each_entry(session->evlist, evsel) { 1899 struct xyarray *xy = evsel->core.sample_id; 1900 u64 lost_count; 1901 1902 if (xy == NULL || evsel->core.fd == NULL) 1903 continue; 1904 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) || 1905 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) { 1906 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n"); 1907 continue; 1908 } 1909 1910 for (int x = 0; x < xyarray__max_x(xy); x++) { 1911 for (int y = 0; y < xyarray__max_y(xy); y++) { 1912 struct perf_counts_values count; 1913 1914 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) { 1915 pr_debug("read LOST count failed\n"); 1916 goto out; 1917 } 1918 1919 if (count.lost) { 1920 __record__save_lost_samples(rec, evsel, lost, 1921 x, y, count.lost, 0); 1922 } 1923 } 1924 } 1925 1926 lost_count = perf_bpf_filter__lost_count(evsel); 1927 if (lost_count) 1928 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, 1929 PERF_RECORD_MISC_LOST_SAMPLES_BPF); 1930 } 1931 out: 1932 free(lost); 1933 } 1934 1935 static volatile sig_atomic_t workload_exec_errno; 1936 1937 /* 1938 * evlist__prepare_workload will send a SIGUSR1 1939 * if the fork fails, since we asked by setting its 1940 * want_signal to true. 1941 */ 1942 static void workload_exec_failed_signal(int signo __maybe_unused, 1943 siginfo_t *info, 1944 void *ucontext __maybe_unused) 1945 { 1946 workload_exec_errno = info->si_value.sival_int; 1947 done = 1; 1948 child_finished = 1; 1949 } 1950 1951 static void snapshot_sig_handler(int sig); 1952 static void alarm_sig_handler(int sig); 1953 1954 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1955 { 1956 if (evlist) { 1957 if (evlist->mmap && evlist->mmap[0].core.base) 1958 return evlist->mmap[0].core.base; 1959 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1960 return evlist->overwrite_mmap[0].core.base; 1961 } 1962 return NULL; 1963 } 1964 1965 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1966 { 1967 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1968 if (pc) 1969 return pc; 1970 return NULL; 1971 } 1972 1973 static int record__synthesize(struct record *rec, bool tail) 1974 { 1975 struct perf_session *session = rec->session; 1976 struct machine *machine = &session->machines.host; 1977 struct perf_data *data = &rec->data; 1978 struct record_opts *opts = &rec->opts; 1979 struct perf_tool *tool = &rec->tool; 1980 int err = 0; 1981 event_op f = process_synthesized_event; 1982 1983 if (rec->opts.tail_synthesize != tail) 1984 return 0; 1985 1986 if (data->is_pipe) { 1987 err = perf_event__synthesize_for_pipe(tool, session, data, 1988 process_synthesized_event); 1989 if (err < 0) 1990 goto out; 1991 1992 rec->bytes_written += err; 1993 } 1994 1995 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1996 process_synthesized_event, machine); 1997 if (err) 1998 goto out; 1999 2000 /* Synthesize id_index before auxtrace_info */ 2001 err = perf_event__synthesize_id_index(tool, 2002 process_synthesized_event, 2003 session->evlist, machine); 2004 if (err) 2005 goto out; 2006 2007 if (rec->opts.full_auxtrace) { 2008 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 2009 session, process_synthesized_event); 2010 if (err) 2011 goto out; 2012 } 2013 2014 if (!evlist__exclude_kernel(rec->evlist)) { 2015 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 2016 machine); 2017 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 2018 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2019 "Check /proc/kallsyms permission or run as root.\n"); 2020 2021 err = perf_event__synthesize_modules(tool, process_synthesized_event, 2022 machine); 2023 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 2024 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 2025 "Check /proc/modules permission or run as root.\n"); 2026 } 2027 2028 if (perf_guest) { 2029 machines__process_guests(&session->machines, 2030 perf_event__synthesize_guest_os, tool); 2031 } 2032 2033 err = perf_event__synthesize_extra_attr(&rec->tool, 2034 rec->evlist, 2035 process_synthesized_event, 2036 data->is_pipe); 2037 if (err) 2038 goto out; 2039 2040 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 2041 process_synthesized_event, 2042 NULL); 2043 if (err < 0) { 2044 pr_err("Couldn't synthesize thread map.\n"); 2045 return err; 2046 } 2047 2048 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 2049 process_synthesized_event, NULL); 2050 if (err < 0) { 2051 pr_err("Couldn't synthesize cpu map.\n"); 2052 return err; 2053 } 2054 2055 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 2056 machine, opts); 2057 if (err < 0) { 2058 pr_warning("Couldn't synthesize bpf events.\n"); 2059 err = 0; 2060 } 2061 2062 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 2063 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 2064 machine); 2065 if (err < 0) { 2066 pr_warning("Couldn't synthesize cgroup events.\n"); 2067 err = 0; 2068 } 2069 } 2070 2071 if (rec->opts.nr_threads_synthesize > 1) { 2072 mutex_init(&synth_lock); 2073 perf_set_multithreaded(); 2074 f = process_locked_synthesized_event; 2075 } 2076 2077 if (rec->opts.synth & PERF_SYNTH_TASK) { 2078 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2079 2080 err = __machine__synthesize_threads(machine, tool, &opts->target, 2081 rec->evlist->core.threads, 2082 f, needs_mmap, opts->sample_address, 2083 rec->opts.nr_threads_synthesize); 2084 } 2085 2086 if (rec->opts.nr_threads_synthesize > 1) { 2087 perf_set_singlethreaded(); 2088 mutex_destroy(&synth_lock); 2089 } 2090 2091 out: 2092 return err; 2093 } 2094 2095 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2096 { 2097 struct record *rec = data; 2098 pthread_kill(rec->thread_id, SIGUSR2); 2099 return 0; 2100 } 2101 2102 static int record__setup_sb_evlist(struct record *rec) 2103 { 2104 struct record_opts *opts = &rec->opts; 2105 2106 if (rec->sb_evlist != NULL) { 2107 /* 2108 * We get here if --switch-output-event populated the 2109 * sb_evlist, so associate a callback that will send a SIGUSR2 2110 * to the main thread. 2111 */ 2112 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2113 rec->thread_id = pthread_self(); 2114 } 2115 #ifdef HAVE_LIBBPF_SUPPORT 2116 if (!opts->no_bpf_event) { 2117 if (rec->sb_evlist == NULL) { 2118 rec->sb_evlist = evlist__new(); 2119 2120 if (rec->sb_evlist == NULL) { 2121 pr_err("Couldn't create side band evlist.\n."); 2122 return -1; 2123 } 2124 } 2125 2126 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2127 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2128 return -1; 2129 } 2130 } 2131 #endif 2132 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2133 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2134 opts->no_bpf_event = true; 2135 } 2136 2137 return 0; 2138 } 2139 2140 static int record__init_clock(struct record *rec) 2141 { 2142 struct perf_session *session = rec->session; 2143 struct timespec ref_clockid; 2144 struct timeval ref_tod; 2145 u64 ref; 2146 2147 if (!rec->opts.use_clockid) 2148 return 0; 2149 2150 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2151 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2152 2153 session->header.env.clock.clockid = rec->opts.clockid; 2154 2155 if (gettimeofday(&ref_tod, NULL) != 0) { 2156 pr_err("gettimeofday failed, cannot set reference time.\n"); 2157 return -1; 2158 } 2159 2160 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2161 pr_err("clock_gettime failed, cannot set reference time.\n"); 2162 return -1; 2163 } 2164 2165 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2166 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2167 2168 session->header.env.clock.tod_ns = ref; 2169 2170 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2171 (u64) ref_clockid.tv_nsec; 2172 2173 session->header.env.clock.clockid_ns = ref; 2174 return 0; 2175 } 2176 2177 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2178 { 2179 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2180 trigger_hit(&auxtrace_snapshot_trigger); 2181 auxtrace_record__snapshot_started = 1; 2182 if (auxtrace_record__snapshot_start(rec->itr)) 2183 trigger_error(&auxtrace_snapshot_trigger); 2184 } 2185 } 2186 2187 static void record__uniquify_name(struct record *rec) 2188 { 2189 struct evsel *pos; 2190 struct evlist *evlist = rec->evlist; 2191 char *new_name; 2192 int ret; 2193 2194 if (perf_pmus__num_core_pmus() == 1) 2195 return; 2196 2197 evlist__for_each_entry(evlist, pos) { 2198 if (!evsel__is_hybrid(pos)) 2199 continue; 2200 2201 if (strchr(pos->name, '/')) 2202 continue; 2203 2204 ret = asprintf(&new_name, "%s/%s/", 2205 pos->pmu_name, pos->name); 2206 if (ret) { 2207 free(pos->name); 2208 pos->name = new_name; 2209 } 2210 } 2211 } 2212 2213 static int record__terminate_thread(struct record_thread *thread_data) 2214 { 2215 int err; 2216 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2217 pid_t tid = thread_data->tid; 2218 2219 close(thread_data->pipes.msg[1]); 2220 thread_data->pipes.msg[1] = -1; 2221 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2222 if (err > 0) 2223 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2224 else 2225 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2226 thread->tid, tid); 2227 2228 return 0; 2229 } 2230 2231 static int record__start_threads(struct record *rec) 2232 { 2233 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2234 struct record_thread *thread_data = rec->thread_data; 2235 sigset_t full, mask; 2236 pthread_t handle; 2237 pthread_attr_t attrs; 2238 2239 thread = &thread_data[0]; 2240 2241 if (!record__threads_enabled(rec)) 2242 return 0; 2243 2244 sigfillset(&full); 2245 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2246 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2247 return -1; 2248 } 2249 2250 pthread_attr_init(&attrs); 2251 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2252 2253 for (t = 1; t < nr_threads; t++) { 2254 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2255 2256 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2257 pthread_attr_setaffinity_np(&attrs, 2258 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2259 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2260 #endif 2261 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2262 for (tt = 1; tt < t; tt++) 2263 record__terminate_thread(&thread_data[t]); 2264 pr_err("Failed to start threads: %s\n", strerror(errno)); 2265 ret = -1; 2266 goto out_err; 2267 } 2268 2269 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2270 if (err > 0) 2271 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2272 thread_msg_tags[msg]); 2273 else 2274 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2275 thread->tid, rec->thread_data[t].tid); 2276 } 2277 2278 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2279 (cpu_set_t *)thread->mask->affinity.bits); 2280 2281 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2282 2283 out_err: 2284 pthread_attr_destroy(&attrs); 2285 2286 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2287 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2288 ret = -1; 2289 } 2290 2291 return ret; 2292 } 2293 2294 static int record__stop_threads(struct record *rec) 2295 { 2296 int t; 2297 struct record_thread *thread_data = rec->thread_data; 2298 2299 for (t = 1; t < rec->nr_threads; t++) 2300 record__terminate_thread(&thread_data[t]); 2301 2302 for (t = 0; t < rec->nr_threads; t++) { 2303 rec->samples += thread_data[t].samples; 2304 if (!record__threads_enabled(rec)) 2305 continue; 2306 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2307 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2308 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2309 thread_data[t].samples, thread_data[t].waking); 2310 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2311 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2312 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2313 else 2314 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2315 } 2316 2317 return 0; 2318 } 2319 2320 static unsigned long record__waking(struct record *rec) 2321 { 2322 int t; 2323 unsigned long waking = 0; 2324 struct record_thread *thread_data = rec->thread_data; 2325 2326 for (t = 0; t < rec->nr_threads; t++) 2327 waking += thread_data[t].waking; 2328 2329 return waking; 2330 } 2331 2332 static int __cmd_record(struct record *rec, int argc, const char **argv) 2333 { 2334 int err; 2335 int status = 0; 2336 const bool forks = argc > 0; 2337 struct perf_tool *tool = &rec->tool; 2338 struct record_opts *opts = &rec->opts; 2339 struct perf_data *data = &rec->data; 2340 struct perf_session *session; 2341 bool disabled = false, draining = false; 2342 int fd; 2343 float ratio = 0; 2344 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2345 2346 atexit(record__sig_exit); 2347 signal(SIGCHLD, sig_handler); 2348 signal(SIGINT, sig_handler); 2349 signal(SIGTERM, sig_handler); 2350 signal(SIGSEGV, sigsegv_handler); 2351 2352 if (rec->opts.record_namespaces) 2353 tool->namespace_events = true; 2354 2355 if (rec->opts.record_cgroup) { 2356 #ifdef HAVE_FILE_HANDLE 2357 tool->cgroup_events = true; 2358 #else 2359 pr_err("cgroup tracking is not supported\n"); 2360 return -1; 2361 #endif 2362 } 2363 2364 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2365 signal(SIGUSR2, snapshot_sig_handler); 2366 if (rec->opts.auxtrace_snapshot_mode) 2367 trigger_on(&auxtrace_snapshot_trigger); 2368 if (rec->switch_output.enabled) 2369 trigger_on(&switch_output_trigger); 2370 } else { 2371 signal(SIGUSR2, SIG_IGN); 2372 } 2373 2374 session = perf_session__new(data, tool); 2375 if (IS_ERR(session)) { 2376 pr_err("Perf session creation failed.\n"); 2377 return PTR_ERR(session); 2378 } 2379 2380 if (record__threads_enabled(rec)) { 2381 if (perf_data__is_pipe(&rec->data)) { 2382 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2383 return -1; 2384 } 2385 if (rec->opts.full_auxtrace) { 2386 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2387 return -1; 2388 } 2389 } 2390 2391 fd = perf_data__fd(data); 2392 rec->session = session; 2393 2394 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2395 pr_err("Compression initialization failed.\n"); 2396 return -1; 2397 } 2398 #ifdef HAVE_EVENTFD_SUPPORT 2399 done_fd = eventfd(0, EFD_NONBLOCK); 2400 if (done_fd < 0) { 2401 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2402 status = -1; 2403 goto out_delete_session; 2404 } 2405 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2406 if (err < 0) { 2407 pr_err("Failed to add wakeup eventfd to poll list\n"); 2408 status = err; 2409 goto out_delete_session; 2410 } 2411 #endif // HAVE_EVENTFD_SUPPORT 2412 2413 session->header.env.comp_type = PERF_COMP_ZSTD; 2414 session->header.env.comp_level = rec->opts.comp_level; 2415 2416 if (rec->opts.kcore && 2417 !record__kcore_readable(&session->machines.host)) { 2418 pr_err("ERROR: kcore is not readable.\n"); 2419 return -1; 2420 } 2421 2422 if (record__init_clock(rec)) 2423 return -1; 2424 2425 record__init_features(rec); 2426 2427 if (forks) { 2428 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2429 workload_exec_failed_signal); 2430 if (err < 0) { 2431 pr_err("Couldn't run the workload!\n"); 2432 status = err; 2433 goto out_delete_session; 2434 } 2435 } 2436 2437 /* 2438 * If we have just single event and are sending data 2439 * through pipe, we need to force the ids allocation, 2440 * because we synthesize event name through the pipe 2441 * and need the id for that. 2442 */ 2443 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2444 rec->opts.sample_id = true; 2445 2446 record__uniquify_name(rec); 2447 2448 /* Debug message used by test scripts */ 2449 pr_debug3("perf record opening and mmapping events\n"); 2450 if (record__open(rec) != 0) { 2451 err = -1; 2452 goto out_free_threads; 2453 } 2454 /* Debug message used by test scripts */ 2455 pr_debug3("perf record done opening and mmapping events\n"); 2456 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2457 2458 if (rec->opts.kcore) { 2459 err = record__kcore_copy(&session->machines.host, data); 2460 if (err) { 2461 pr_err("ERROR: Failed to copy kcore\n"); 2462 goto out_free_threads; 2463 } 2464 } 2465 2466 /* 2467 * Normally perf_session__new would do this, but it doesn't have the 2468 * evlist. 2469 */ 2470 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2471 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2472 rec->tool.ordered_events = false; 2473 } 2474 2475 if (evlist__nr_groups(rec->evlist) == 0) 2476 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2477 2478 if (data->is_pipe) { 2479 err = perf_header__write_pipe(fd); 2480 if (err < 0) 2481 goto out_free_threads; 2482 } else { 2483 err = perf_session__write_header(session, rec->evlist, fd, false); 2484 if (err < 0) 2485 goto out_free_threads; 2486 } 2487 2488 err = -1; 2489 if (!rec->no_buildid 2490 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2491 pr_err("Couldn't generate buildids. " 2492 "Use --no-buildid to profile anyway.\n"); 2493 goto out_free_threads; 2494 } 2495 2496 err = record__setup_sb_evlist(rec); 2497 if (err) 2498 goto out_free_threads; 2499 2500 err = record__synthesize(rec, false); 2501 if (err < 0) 2502 goto out_free_threads; 2503 2504 if (rec->realtime_prio) { 2505 struct sched_param param; 2506 2507 param.sched_priority = rec->realtime_prio; 2508 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2509 pr_err("Could not set realtime priority.\n"); 2510 err = -1; 2511 goto out_free_threads; 2512 } 2513 } 2514 2515 if (record__start_threads(rec)) 2516 goto out_free_threads; 2517 2518 /* 2519 * When perf is starting the traced process, all the events 2520 * (apart from group members) have enable_on_exec=1 set, 2521 * so don't spoil it by prematurely enabling them. 2522 */ 2523 if (!target__none(&opts->target) && !opts->target.initial_delay) 2524 evlist__enable(rec->evlist); 2525 2526 /* 2527 * Let the child rip 2528 */ 2529 if (forks) { 2530 struct machine *machine = &session->machines.host; 2531 union perf_event *event; 2532 pid_t tgid; 2533 2534 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2535 if (event == NULL) { 2536 err = -ENOMEM; 2537 goto out_child; 2538 } 2539 2540 /* 2541 * Some H/W events are generated before COMM event 2542 * which is emitted during exec(), so perf script 2543 * cannot see a correct process name for those events. 2544 * Synthesize COMM event to prevent it. 2545 */ 2546 tgid = perf_event__synthesize_comm(tool, event, 2547 rec->evlist->workload.pid, 2548 process_synthesized_event, 2549 machine); 2550 free(event); 2551 2552 if (tgid == -1) 2553 goto out_child; 2554 2555 event = malloc(sizeof(event->namespaces) + 2556 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2557 machine->id_hdr_size); 2558 if (event == NULL) { 2559 err = -ENOMEM; 2560 goto out_child; 2561 } 2562 2563 /* 2564 * Synthesize NAMESPACES event for the command specified. 2565 */ 2566 perf_event__synthesize_namespaces(tool, event, 2567 rec->evlist->workload.pid, 2568 tgid, process_synthesized_event, 2569 machine); 2570 free(event); 2571 2572 evlist__start_workload(rec->evlist); 2573 } 2574 2575 if (opts->target.initial_delay) { 2576 pr_info(EVLIST_DISABLED_MSG); 2577 if (opts->target.initial_delay > 0) { 2578 usleep(opts->target.initial_delay * USEC_PER_MSEC); 2579 evlist__enable(rec->evlist); 2580 pr_info(EVLIST_ENABLED_MSG); 2581 } 2582 } 2583 2584 err = event_enable_timer__start(rec->evlist->eet); 2585 if (err) 2586 goto out_child; 2587 2588 /* Debug message used by test scripts */ 2589 pr_debug3("perf record has started\n"); 2590 fflush(stderr); 2591 2592 trigger_ready(&auxtrace_snapshot_trigger); 2593 trigger_ready(&switch_output_trigger); 2594 perf_hooks__invoke_record_start(); 2595 2596 /* 2597 * Must write FINISHED_INIT so it will be seen after all other 2598 * synthesized user events, but before any regular events. 2599 */ 2600 err = write_finished_init(rec, false); 2601 if (err < 0) 2602 goto out_child; 2603 2604 for (;;) { 2605 unsigned long long hits = thread->samples; 2606 2607 /* 2608 * rec->evlist->bkw_mmap_state is possible to be 2609 * BKW_MMAP_EMPTY here: when done == true and 2610 * hits != rec->samples in previous round. 2611 * 2612 * evlist__toggle_bkw_mmap ensure we never 2613 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2614 */ 2615 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2616 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2617 2618 if (record__mmap_read_all(rec, false) < 0) { 2619 trigger_error(&auxtrace_snapshot_trigger); 2620 trigger_error(&switch_output_trigger); 2621 err = -1; 2622 goto out_child; 2623 } 2624 2625 if (auxtrace_record__snapshot_started) { 2626 auxtrace_record__snapshot_started = 0; 2627 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2628 record__read_auxtrace_snapshot(rec, false); 2629 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2630 pr_err("AUX area tracing snapshot failed\n"); 2631 err = -1; 2632 goto out_child; 2633 } 2634 } 2635 2636 if (trigger_is_hit(&switch_output_trigger)) { 2637 /* 2638 * If switch_output_trigger is hit, the data in 2639 * overwritable ring buffer should have been collected, 2640 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2641 * 2642 * If SIGUSR2 raise after or during record__mmap_read_all(), 2643 * record__mmap_read_all() didn't collect data from 2644 * overwritable ring buffer. Read again. 2645 */ 2646 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2647 continue; 2648 trigger_ready(&switch_output_trigger); 2649 2650 /* 2651 * Reenable events in overwrite ring buffer after 2652 * record__mmap_read_all(): we should have collected 2653 * data from it. 2654 */ 2655 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2656 2657 if (!quiet) 2658 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2659 record__waking(rec)); 2660 thread->waking = 0; 2661 fd = record__switch_output(rec, false); 2662 if (fd < 0) { 2663 pr_err("Failed to switch to new file\n"); 2664 trigger_error(&switch_output_trigger); 2665 err = fd; 2666 goto out_child; 2667 } 2668 2669 /* re-arm the alarm */ 2670 if (rec->switch_output.time) 2671 alarm(rec->switch_output.time); 2672 } 2673 2674 if (hits == thread->samples) { 2675 if (done || draining) 2676 break; 2677 err = fdarray__poll(&thread->pollfd, -1); 2678 /* 2679 * Propagate error, only if there's any. Ignore positive 2680 * number of returned events and interrupt error. 2681 */ 2682 if (err > 0 || (err < 0 && errno == EINTR)) 2683 err = 0; 2684 thread->waking++; 2685 2686 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2687 record__thread_munmap_filtered, NULL) == 0) 2688 draining = true; 2689 2690 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2691 if (err) 2692 goto out_child; 2693 } 2694 2695 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2696 switch (cmd) { 2697 case EVLIST_CTL_CMD_SNAPSHOT: 2698 hit_auxtrace_snapshot_trigger(rec); 2699 evlist__ctlfd_ack(rec->evlist); 2700 break; 2701 case EVLIST_CTL_CMD_STOP: 2702 done = 1; 2703 break; 2704 case EVLIST_CTL_CMD_ACK: 2705 case EVLIST_CTL_CMD_UNSUPPORTED: 2706 case EVLIST_CTL_CMD_ENABLE: 2707 case EVLIST_CTL_CMD_DISABLE: 2708 case EVLIST_CTL_CMD_EVLIST: 2709 case EVLIST_CTL_CMD_PING: 2710 default: 2711 break; 2712 } 2713 } 2714 2715 err = event_enable_timer__process(rec->evlist->eet); 2716 if (err < 0) 2717 goto out_child; 2718 if (err) { 2719 err = 0; 2720 done = 1; 2721 } 2722 2723 /* 2724 * When perf is starting the traced process, at the end events 2725 * die with the process and we wait for that. Thus no need to 2726 * disable events in this case. 2727 */ 2728 if (done && !disabled && !target__none(&opts->target)) { 2729 trigger_off(&auxtrace_snapshot_trigger); 2730 evlist__disable(rec->evlist); 2731 disabled = true; 2732 } 2733 } 2734 2735 trigger_off(&auxtrace_snapshot_trigger); 2736 trigger_off(&switch_output_trigger); 2737 2738 if (opts->auxtrace_snapshot_on_exit) 2739 record__auxtrace_snapshot_exit(rec); 2740 2741 if (forks && workload_exec_errno) { 2742 char msg[STRERR_BUFSIZE], strevsels[2048]; 2743 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2744 2745 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2746 2747 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2748 strevsels, argv[0], emsg); 2749 err = -1; 2750 goto out_child; 2751 } 2752 2753 if (!quiet) 2754 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2755 record__waking(rec)); 2756 2757 write_finished_init(rec, true); 2758 2759 if (target__none(&rec->opts.target)) 2760 record__synthesize_workload(rec, true); 2761 2762 out_child: 2763 record__stop_threads(rec); 2764 record__mmap_read_all(rec, true); 2765 out_free_threads: 2766 record__free_thread_data(rec); 2767 evlist__finalize_ctlfd(rec->evlist); 2768 record__aio_mmap_read_sync(rec); 2769 2770 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2771 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2772 session->header.env.comp_ratio = ratio + 0.5; 2773 } 2774 2775 if (forks) { 2776 int exit_status; 2777 2778 if (!child_finished) 2779 kill(rec->evlist->workload.pid, SIGTERM); 2780 2781 wait(&exit_status); 2782 2783 if (err < 0) 2784 status = err; 2785 else if (WIFEXITED(exit_status)) 2786 status = WEXITSTATUS(exit_status); 2787 else if (WIFSIGNALED(exit_status)) 2788 signr = WTERMSIG(exit_status); 2789 } else 2790 status = err; 2791 2792 if (rec->off_cpu) 2793 rec->bytes_written += off_cpu_write(rec->session); 2794 2795 record__read_lost_samples(rec); 2796 record__synthesize(rec, true); 2797 /* this will be recalculated during process_buildids() */ 2798 rec->samples = 0; 2799 2800 if (!err) { 2801 if (!rec->timestamp_filename) { 2802 record__finish_output(rec); 2803 } else { 2804 fd = record__switch_output(rec, true); 2805 if (fd < 0) { 2806 status = fd; 2807 goto out_delete_session; 2808 } 2809 } 2810 } 2811 2812 perf_hooks__invoke_record_end(); 2813 2814 if (!err && !quiet) { 2815 char samples[128]; 2816 const char *postfix = rec->timestamp_filename ? 2817 ".<timestamp>" : ""; 2818 2819 if (rec->samples && !rec->opts.full_auxtrace) 2820 scnprintf(samples, sizeof(samples), 2821 " (%" PRIu64 " samples)", rec->samples); 2822 else 2823 samples[0] = '\0'; 2824 2825 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2826 perf_data__size(data) / 1024.0 / 1024.0, 2827 data->path, postfix, samples); 2828 if (ratio) { 2829 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2830 rec->session->bytes_transferred / 1024.0 / 1024.0, 2831 ratio); 2832 } 2833 fprintf(stderr, " ]\n"); 2834 } 2835 2836 out_delete_session: 2837 #ifdef HAVE_EVENTFD_SUPPORT 2838 if (done_fd >= 0) { 2839 fd = done_fd; 2840 done_fd = -1; 2841 2842 close(fd); 2843 } 2844 #endif 2845 zstd_fini(&session->zstd_data); 2846 perf_session__delete(session); 2847 2848 if (!opts->no_bpf_event) 2849 evlist__stop_sb_thread(rec->sb_evlist); 2850 return status; 2851 } 2852 2853 static void callchain_debug(struct callchain_param *callchain) 2854 { 2855 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2856 2857 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2858 2859 if (callchain->record_mode == CALLCHAIN_DWARF) 2860 pr_debug("callchain: stack dump size %d\n", 2861 callchain->dump_size); 2862 } 2863 2864 int record_opts__parse_callchain(struct record_opts *record, 2865 struct callchain_param *callchain, 2866 const char *arg, bool unset) 2867 { 2868 int ret; 2869 callchain->enabled = !unset; 2870 2871 /* --no-call-graph */ 2872 if (unset) { 2873 callchain->record_mode = CALLCHAIN_NONE; 2874 pr_debug("callchain: disabled\n"); 2875 return 0; 2876 } 2877 2878 ret = parse_callchain_record_opt(arg, callchain); 2879 if (!ret) { 2880 /* Enable data address sampling for DWARF unwind. */ 2881 if (callchain->record_mode == CALLCHAIN_DWARF) 2882 record->sample_address = true; 2883 callchain_debug(callchain); 2884 } 2885 2886 return ret; 2887 } 2888 2889 int record_parse_callchain_opt(const struct option *opt, 2890 const char *arg, 2891 int unset) 2892 { 2893 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2894 } 2895 2896 int record_callchain_opt(const struct option *opt, 2897 const char *arg __maybe_unused, 2898 int unset __maybe_unused) 2899 { 2900 struct callchain_param *callchain = opt->value; 2901 2902 callchain->enabled = true; 2903 2904 if (callchain->record_mode == CALLCHAIN_NONE) 2905 callchain->record_mode = CALLCHAIN_FP; 2906 2907 callchain_debug(callchain); 2908 return 0; 2909 } 2910 2911 static int perf_record_config(const char *var, const char *value, void *cb) 2912 { 2913 struct record *rec = cb; 2914 2915 if (!strcmp(var, "record.build-id")) { 2916 if (!strcmp(value, "cache")) 2917 rec->no_buildid_cache = false; 2918 else if (!strcmp(value, "no-cache")) 2919 rec->no_buildid_cache = true; 2920 else if (!strcmp(value, "skip")) 2921 rec->no_buildid = true; 2922 else if (!strcmp(value, "mmap")) 2923 rec->buildid_mmap = true; 2924 else 2925 return -1; 2926 return 0; 2927 } 2928 if (!strcmp(var, "record.call-graph")) { 2929 var = "call-graph.record-mode"; 2930 return perf_default_config(var, value, cb); 2931 } 2932 #ifdef HAVE_AIO_SUPPORT 2933 if (!strcmp(var, "record.aio")) { 2934 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2935 if (!rec->opts.nr_cblocks) 2936 rec->opts.nr_cblocks = nr_cblocks_default; 2937 } 2938 #endif 2939 if (!strcmp(var, "record.debuginfod")) { 2940 rec->debuginfod.urls = strdup(value); 2941 if (!rec->debuginfod.urls) 2942 return -ENOMEM; 2943 rec->debuginfod.set = true; 2944 } 2945 2946 return 0; 2947 } 2948 2949 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset) 2950 { 2951 struct record *rec = (struct record *)opt->value; 2952 2953 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset); 2954 } 2955 2956 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2957 { 2958 struct record_opts *opts = (struct record_opts *)opt->value; 2959 2960 if (unset || !str) 2961 return 0; 2962 2963 if (!strcasecmp(str, "node")) 2964 opts->affinity = PERF_AFFINITY_NODE; 2965 else if (!strcasecmp(str, "cpu")) 2966 opts->affinity = PERF_AFFINITY_CPU; 2967 2968 return 0; 2969 } 2970 2971 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2972 { 2973 mask->nbits = nr_bits; 2974 mask->bits = bitmap_zalloc(mask->nbits); 2975 if (!mask->bits) 2976 return -ENOMEM; 2977 2978 return 0; 2979 } 2980 2981 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2982 { 2983 bitmap_free(mask->bits); 2984 mask->nbits = 0; 2985 } 2986 2987 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2988 { 2989 int ret; 2990 2991 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2992 if (ret) { 2993 mask->affinity.bits = NULL; 2994 return ret; 2995 } 2996 2997 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2998 if (ret) { 2999 record__mmap_cpu_mask_free(&mask->maps); 3000 mask->maps.bits = NULL; 3001 } 3002 3003 return ret; 3004 } 3005 3006 static void record__thread_mask_free(struct thread_mask *mask) 3007 { 3008 record__mmap_cpu_mask_free(&mask->maps); 3009 record__mmap_cpu_mask_free(&mask->affinity); 3010 } 3011 3012 static int record__parse_threads(const struct option *opt, const char *str, int unset) 3013 { 3014 int s; 3015 struct record_opts *opts = opt->value; 3016 3017 if (unset || !str || !strlen(str)) { 3018 opts->threads_spec = THREAD_SPEC__CPU; 3019 } else { 3020 for (s = 1; s < THREAD_SPEC__MAX; s++) { 3021 if (s == THREAD_SPEC__USER) { 3022 opts->threads_user_spec = strdup(str); 3023 if (!opts->threads_user_spec) 3024 return -ENOMEM; 3025 opts->threads_spec = THREAD_SPEC__USER; 3026 break; 3027 } 3028 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 3029 opts->threads_spec = s; 3030 break; 3031 } 3032 } 3033 } 3034 3035 if (opts->threads_spec == THREAD_SPEC__USER) 3036 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 3037 else 3038 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 3039 3040 return 0; 3041 } 3042 3043 static int parse_output_max_size(const struct option *opt, 3044 const char *str, int unset) 3045 { 3046 unsigned long *s = (unsigned long *)opt->value; 3047 static struct parse_tag tags_size[] = { 3048 { .tag = 'B', .mult = 1 }, 3049 { .tag = 'K', .mult = 1 << 10 }, 3050 { .tag = 'M', .mult = 1 << 20 }, 3051 { .tag = 'G', .mult = 1 << 30 }, 3052 { .tag = 0 }, 3053 }; 3054 unsigned long val; 3055 3056 if (unset) { 3057 *s = 0; 3058 return 0; 3059 } 3060 3061 val = parse_tag_value(str, tags_size); 3062 if (val != (unsigned long) -1) { 3063 *s = val; 3064 return 0; 3065 } 3066 3067 return -1; 3068 } 3069 3070 static int record__parse_mmap_pages(const struct option *opt, 3071 const char *str, 3072 int unset __maybe_unused) 3073 { 3074 struct record_opts *opts = opt->value; 3075 char *s, *p; 3076 unsigned int mmap_pages; 3077 int ret; 3078 3079 if (!str) 3080 return -EINVAL; 3081 3082 s = strdup(str); 3083 if (!s) 3084 return -ENOMEM; 3085 3086 p = strchr(s, ','); 3087 if (p) 3088 *p = '\0'; 3089 3090 if (*s) { 3091 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 3092 if (ret) 3093 goto out_free; 3094 opts->mmap_pages = mmap_pages; 3095 } 3096 3097 if (!p) { 3098 ret = 0; 3099 goto out_free; 3100 } 3101 3102 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3103 if (ret) 3104 goto out_free; 3105 3106 opts->auxtrace_mmap_pages = mmap_pages; 3107 3108 out_free: 3109 free(s); 3110 return ret; 3111 } 3112 3113 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3114 { 3115 } 3116 3117 static int parse_control_option(const struct option *opt, 3118 const char *str, 3119 int unset __maybe_unused) 3120 { 3121 struct record_opts *opts = opt->value; 3122 3123 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3124 } 3125 3126 static void switch_output_size_warn(struct record *rec) 3127 { 3128 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3129 struct switch_output *s = &rec->switch_output; 3130 3131 wakeup_size /= 2; 3132 3133 if (s->size < wakeup_size) { 3134 char buf[100]; 3135 3136 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3137 pr_warning("WARNING: switch-output data size lower than " 3138 "wakeup kernel buffer size (%s) " 3139 "expect bigger perf.data sizes\n", buf); 3140 } 3141 } 3142 3143 static int switch_output_setup(struct record *rec) 3144 { 3145 struct switch_output *s = &rec->switch_output; 3146 static struct parse_tag tags_size[] = { 3147 { .tag = 'B', .mult = 1 }, 3148 { .tag = 'K', .mult = 1 << 10 }, 3149 { .tag = 'M', .mult = 1 << 20 }, 3150 { .tag = 'G', .mult = 1 << 30 }, 3151 { .tag = 0 }, 3152 }; 3153 static struct parse_tag tags_time[] = { 3154 { .tag = 's', .mult = 1 }, 3155 { .tag = 'm', .mult = 60 }, 3156 { .tag = 'h', .mult = 60*60 }, 3157 { .tag = 'd', .mult = 60*60*24 }, 3158 { .tag = 0 }, 3159 }; 3160 unsigned long val; 3161 3162 /* 3163 * If we're using --switch-output-events, then we imply its 3164 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3165 * thread to its parent. 3166 */ 3167 if (rec->switch_output_event_set) { 3168 if (record__threads_enabled(rec)) { 3169 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3170 return 0; 3171 } 3172 goto do_signal; 3173 } 3174 3175 if (!s->set) 3176 return 0; 3177 3178 if (record__threads_enabled(rec)) { 3179 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3180 return 0; 3181 } 3182 3183 if (!strcmp(s->str, "signal")) { 3184 do_signal: 3185 s->signal = true; 3186 pr_debug("switch-output with SIGUSR2 signal\n"); 3187 goto enabled; 3188 } 3189 3190 val = parse_tag_value(s->str, tags_size); 3191 if (val != (unsigned long) -1) { 3192 s->size = val; 3193 pr_debug("switch-output with %s size threshold\n", s->str); 3194 goto enabled; 3195 } 3196 3197 val = parse_tag_value(s->str, tags_time); 3198 if (val != (unsigned long) -1) { 3199 s->time = val; 3200 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3201 s->str, s->time); 3202 goto enabled; 3203 } 3204 3205 return -1; 3206 3207 enabled: 3208 rec->timestamp_filename = true; 3209 s->enabled = true; 3210 3211 if (s->size && !rec->opts.no_buffering) 3212 switch_output_size_warn(rec); 3213 3214 return 0; 3215 } 3216 3217 static const char * const __record_usage[] = { 3218 "perf record [<options>] [<command>]", 3219 "perf record [<options>] -- <command> [<options>]", 3220 NULL 3221 }; 3222 const char * const *record_usage = __record_usage; 3223 3224 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3225 struct perf_sample *sample, struct machine *machine) 3226 { 3227 /* 3228 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3229 * no need to add them twice. 3230 */ 3231 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3232 return 0; 3233 return perf_event__process_mmap(tool, event, sample, machine); 3234 } 3235 3236 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3237 struct perf_sample *sample, struct machine *machine) 3238 { 3239 /* 3240 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3241 * no need to add them twice. 3242 */ 3243 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3244 return 0; 3245 3246 return perf_event__process_mmap2(tool, event, sample, machine); 3247 } 3248 3249 static int process_timestamp_boundary(struct perf_tool *tool, 3250 union perf_event *event __maybe_unused, 3251 struct perf_sample *sample, 3252 struct machine *machine __maybe_unused) 3253 { 3254 struct record *rec = container_of(tool, struct record, tool); 3255 3256 set_timestamp_boundary(rec, sample->time); 3257 return 0; 3258 } 3259 3260 static int parse_record_synth_option(const struct option *opt, 3261 const char *str, 3262 int unset __maybe_unused) 3263 { 3264 struct record_opts *opts = opt->value; 3265 char *p = strdup(str); 3266 3267 if (p == NULL) 3268 return -1; 3269 3270 opts->synth = parse_synth_opt(p); 3271 free(p); 3272 3273 if (opts->synth < 0) { 3274 pr_err("Invalid synth option: %s\n", str); 3275 return -1; 3276 } 3277 return 0; 3278 } 3279 3280 /* 3281 * XXX Ideally would be local to cmd_record() and passed to a record__new 3282 * because we need to have access to it in record__exit, that is called 3283 * after cmd_record() exits, but since record_options need to be accessible to 3284 * builtin-script, leave it here. 3285 * 3286 * At least we don't ouch it in all the other functions here directly. 3287 * 3288 * Just say no to tons of global variables, sigh. 3289 */ 3290 static struct record record = { 3291 .opts = { 3292 .sample_time = true, 3293 .mmap_pages = UINT_MAX, 3294 .user_freq = UINT_MAX, 3295 .user_interval = ULLONG_MAX, 3296 .freq = 4000, 3297 .target = { 3298 .uses_mmap = true, 3299 .default_per_cpu = true, 3300 }, 3301 .mmap_flush = MMAP_FLUSH_DEFAULT, 3302 .nr_threads_synthesize = 1, 3303 .ctl_fd = -1, 3304 .ctl_fd_ack = -1, 3305 .synth = PERF_SYNTH_ALL, 3306 }, 3307 .tool = { 3308 .sample = process_sample_event, 3309 .fork = perf_event__process_fork, 3310 .exit = perf_event__process_exit, 3311 .comm = perf_event__process_comm, 3312 .namespaces = perf_event__process_namespaces, 3313 .mmap = build_id__process_mmap, 3314 .mmap2 = build_id__process_mmap2, 3315 .itrace_start = process_timestamp_boundary, 3316 .aux = process_timestamp_boundary, 3317 .ordered_events = true, 3318 }, 3319 }; 3320 3321 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3322 "\n\t\t\t\tDefault: fp"; 3323 3324 static bool dry_run; 3325 3326 static struct parse_events_option_args parse_events_option_args = { 3327 .evlistp = &record.evlist, 3328 }; 3329 3330 static struct parse_events_option_args switch_output_parse_events_option_args = { 3331 .evlistp = &record.sb_evlist, 3332 }; 3333 3334 /* 3335 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3336 * with it and switch to use the library functions in perf_evlist that came 3337 * from builtin-record.c, i.e. use record_opts, 3338 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3339 * using pipes, etc. 3340 */ 3341 static struct option __record_options[] = { 3342 OPT_CALLBACK('e', "event", &parse_events_option_args, "event", 3343 "event selector. use 'perf list' to list available events", 3344 parse_events_option), 3345 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3346 "event filter", parse_filter), 3347 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3348 NULL, "don't record events from perf itself", 3349 exclude_perf), 3350 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3351 "record events on existing process id"), 3352 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3353 "record events on existing thread id"), 3354 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3355 "collect data with this RT SCHED_FIFO priority"), 3356 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3357 "collect data without buffering"), 3358 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3359 "collect raw sample records from all opened counters"), 3360 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3361 "system-wide collection from all CPUs"), 3362 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3363 "list of cpus to monitor"), 3364 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3365 OPT_STRING('o', "output", &record.data.path, "file", 3366 "output file name"), 3367 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3368 &record.opts.no_inherit_set, 3369 "child tasks do not inherit counters"), 3370 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3371 "synthesize non-sample events at the end of output"), 3372 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3373 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3374 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3375 "Fail if the specified frequency can't be used"), 3376 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3377 "profile at this frequency", 3378 record__parse_freq), 3379 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3380 "number of mmap data pages and AUX area tracing mmap pages", 3381 record__parse_mmap_pages), 3382 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3383 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3384 record__mmap_flush_parse), 3385 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3386 NULL, "enables call-graph recording" , 3387 &record_callchain_opt), 3388 OPT_CALLBACK(0, "call-graph", &record.opts, 3389 "record_mode[,record_size]", record_callchain_help, 3390 &record_parse_callchain_opt), 3391 OPT_INCR('v', "verbose", &verbose, 3392 "be more verbose (show counter open errors, etc)"), 3393 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), 3394 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3395 "per thread counts"), 3396 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3397 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3398 "Record the sample physical addresses"), 3399 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3400 "Record the sampled data address data page size"), 3401 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3402 "Record the sampled code address (ip) page size"), 3403 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3404 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3405 "Record the sample identifier"), 3406 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3407 &record.opts.sample_time_set, 3408 "Record the sample timestamps"), 3409 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3410 "Record the sample period"), 3411 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3412 "don't sample"), 3413 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3414 &record.no_buildid_cache_set, 3415 "do not update the buildid cache"), 3416 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3417 &record.no_buildid_set, 3418 "do not collect buildids in perf.data"), 3419 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3420 "monitor event in cgroup name only", 3421 parse_cgroups), 3422 OPT_CALLBACK('D', "delay", &record, "ms", 3423 "ms to wait before starting measurement after program start (-1: start with events disabled), " 3424 "or ranges of time to enable events e.g. '-D 10-20,30-40'", 3425 record__parse_event_enable_time), 3426 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3427 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3428 "user to profile"), 3429 3430 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3431 "branch any", "sample any taken branches", 3432 parse_branch_stack), 3433 3434 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3435 "branch filter mask", "branch stack filter modes", 3436 parse_branch_stack), 3437 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3438 "sample by weight (on special events only)"), 3439 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3440 "sample transaction flags (special events only)"), 3441 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3442 "use per-thread mmaps"), 3443 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3444 "sample selected machine registers on interrupt," 3445 " use '-I?' to list register names", parse_intr_regs), 3446 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3447 "sample selected machine registers on interrupt," 3448 " use '--user-regs=?' to list register names", parse_user_regs), 3449 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3450 "Record running/enabled time of read (:S) events"), 3451 OPT_CALLBACK('k', "clockid", &record.opts, 3452 "clockid", "clockid to use for events, see clock_gettime()", 3453 parse_clockid), 3454 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3455 "opts", "AUX area tracing Snapshot Mode", ""), 3456 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3457 "opts", "sample AUX area", ""), 3458 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3459 "per thread proc mmap processing timeout in ms"), 3460 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3461 "Record namespaces events"), 3462 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3463 "Record cgroup events"), 3464 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3465 &record.opts.record_switch_events_set, 3466 "Record context switch events"), 3467 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3468 "Configure all used events to run in kernel space.", 3469 PARSE_OPT_EXCLUSIVE), 3470 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3471 "Configure all used events to run in user space.", 3472 PARSE_OPT_EXCLUSIVE), 3473 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3474 "collect kernel callchains"), 3475 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3476 "collect user callchains"), 3477 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3478 "file", "vmlinux pathname"), 3479 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3480 "Record build-id of all DSOs regardless of hits"), 3481 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3482 "Record build-id in map events"), 3483 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3484 "append timestamp to output filename"), 3485 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3486 "Record timestamp boundary (time of first/last samples)"), 3487 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3488 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3489 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3490 "signal"), 3491 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args, 3492 &record.switch_output_event_set, "switch output event", 3493 "switch output event selector. use 'perf list' to list available events", 3494 parse_events_option_new_evlist), 3495 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3496 "Limit number of switch output generated files"), 3497 OPT_BOOLEAN(0, "dry-run", &dry_run, 3498 "Parse options then exit"), 3499 #ifdef HAVE_AIO_SUPPORT 3500 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3501 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3502 record__aio_parse), 3503 #endif 3504 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3505 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3506 record__parse_affinity), 3507 #ifdef HAVE_ZSTD_SUPPORT 3508 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3509 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3510 record__parse_comp_level), 3511 #endif 3512 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3513 "size", "Limit the maximum size of the output file", parse_output_max_size), 3514 OPT_UINTEGER(0, "num-thread-synthesize", 3515 &record.opts.nr_threads_synthesize, 3516 "number of threads to run for event synthesis"), 3517 #ifdef HAVE_LIBPFM 3518 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3519 "libpfm4 event selector. use 'perf list' to list available events", 3520 parse_libpfm_events_option), 3521 #endif 3522 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3523 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3524 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3525 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3526 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3527 parse_control_option), 3528 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3529 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3530 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3531 &record.debuginfod.set, "debuginfod urls", 3532 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3533 "system"), 3534 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3535 "write collected trace data into several data files using parallel threads", 3536 record__parse_threads), 3537 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3538 OPT_END() 3539 }; 3540 3541 struct option *record_options = __record_options; 3542 3543 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3544 { 3545 struct perf_cpu cpu; 3546 int idx; 3547 3548 if (cpu_map__is_dummy(cpus)) 3549 return 0; 3550 3551 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3552 if (cpu.cpu == -1) 3553 continue; 3554 /* Return ENODEV is input cpu is greater than max cpu */ 3555 if ((unsigned long)cpu.cpu > mask->nbits) 3556 return -ENODEV; 3557 __set_bit(cpu.cpu, mask->bits); 3558 } 3559 3560 return 0; 3561 } 3562 3563 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3564 { 3565 struct perf_cpu_map *cpus; 3566 3567 cpus = perf_cpu_map__new(mask_spec); 3568 if (!cpus) 3569 return -ENOMEM; 3570 3571 bitmap_zero(mask->bits, mask->nbits); 3572 if (record__mmap_cpu_mask_init(mask, cpus)) 3573 return -ENODEV; 3574 3575 perf_cpu_map__put(cpus); 3576 3577 return 0; 3578 } 3579 3580 static void record__free_thread_masks(struct record *rec, int nr_threads) 3581 { 3582 int t; 3583 3584 if (rec->thread_masks) 3585 for (t = 0; t < nr_threads; t++) 3586 record__thread_mask_free(&rec->thread_masks[t]); 3587 3588 zfree(&rec->thread_masks); 3589 } 3590 3591 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3592 { 3593 int t, ret; 3594 3595 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3596 if (!rec->thread_masks) { 3597 pr_err("Failed to allocate thread masks\n"); 3598 return -ENOMEM; 3599 } 3600 3601 for (t = 0; t < nr_threads; t++) { 3602 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3603 if (ret) { 3604 pr_err("Failed to allocate thread masks[%d]\n", t); 3605 goto out_free; 3606 } 3607 } 3608 3609 return 0; 3610 3611 out_free: 3612 record__free_thread_masks(rec, nr_threads); 3613 3614 return ret; 3615 } 3616 3617 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3618 { 3619 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3620 3621 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3622 if (ret) 3623 return ret; 3624 3625 rec->nr_threads = nr_cpus; 3626 pr_debug("nr_threads: %d\n", rec->nr_threads); 3627 3628 for (t = 0; t < rec->nr_threads; t++) { 3629 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3630 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3631 if (verbose > 0) { 3632 pr_debug("thread_masks[%d]: ", t); 3633 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3634 pr_debug("thread_masks[%d]: ", t); 3635 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3636 } 3637 } 3638 3639 return 0; 3640 } 3641 3642 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3643 const char **maps_spec, const char **affinity_spec, 3644 u32 nr_spec) 3645 { 3646 u32 s; 3647 int ret = 0, t = 0; 3648 struct mmap_cpu_mask cpus_mask; 3649 struct thread_mask thread_mask, full_mask, *thread_masks; 3650 3651 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3652 if (ret) { 3653 pr_err("Failed to allocate CPUs mask\n"); 3654 return ret; 3655 } 3656 3657 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3658 if (ret) { 3659 pr_err("Failed to init cpu mask\n"); 3660 goto out_free_cpu_mask; 3661 } 3662 3663 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3664 if (ret) { 3665 pr_err("Failed to allocate full mask\n"); 3666 goto out_free_cpu_mask; 3667 } 3668 3669 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3670 if (ret) { 3671 pr_err("Failed to allocate thread mask\n"); 3672 goto out_free_full_and_cpu_masks; 3673 } 3674 3675 for (s = 0; s < nr_spec; s++) { 3676 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3677 if (ret) { 3678 pr_err("Failed to initialize maps thread mask\n"); 3679 goto out_free; 3680 } 3681 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3682 if (ret) { 3683 pr_err("Failed to initialize affinity thread mask\n"); 3684 goto out_free; 3685 } 3686 3687 /* ignore invalid CPUs but do not allow empty masks */ 3688 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3689 cpus_mask.bits, thread_mask.maps.nbits)) { 3690 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3691 ret = -EINVAL; 3692 goto out_free; 3693 } 3694 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3695 cpus_mask.bits, thread_mask.affinity.nbits)) { 3696 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3697 ret = -EINVAL; 3698 goto out_free; 3699 } 3700 3701 /* do not allow intersection with other masks (full_mask) */ 3702 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3703 thread_mask.maps.nbits)) { 3704 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3705 ret = -EINVAL; 3706 goto out_free; 3707 } 3708 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3709 thread_mask.affinity.nbits)) { 3710 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3711 ret = -EINVAL; 3712 goto out_free; 3713 } 3714 3715 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3716 thread_mask.maps.bits, full_mask.maps.nbits); 3717 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3718 thread_mask.affinity.bits, full_mask.maps.nbits); 3719 3720 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3721 if (!thread_masks) { 3722 pr_err("Failed to reallocate thread masks\n"); 3723 ret = -ENOMEM; 3724 goto out_free; 3725 } 3726 rec->thread_masks = thread_masks; 3727 rec->thread_masks[t] = thread_mask; 3728 if (verbose > 0) { 3729 pr_debug("thread_masks[%d]: ", t); 3730 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3731 pr_debug("thread_masks[%d]: ", t); 3732 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3733 } 3734 t++; 3735 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3736 if (ret) { 3737 pr_err("Failed to allocate thread mask\n"); 3738 goto out_free_full_and_cpu_masks; 3739 } 3740 } 3741 rec->nr_threads = t; 3742 pr_debug("nr_threads: %d\n", rec->nr_threads); 3743 if (!rec->nr_threads) 3744 ret = -EINVAL; 3745 3746 out_free: 3747 record__thread_mask_free(&thread_mask); 3748 out_free_full_and_cpu_masks: 3749 record__thread_mask_free(&full_mask); 3750 out_free_cpu_mask: 3751 record__mmap_cpu_mask_free(&cpus_mask); 3752 3753 return ret; 3754 } 3755 3756 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3757 { 3758 int ret; 3759 struct cpu_topology *topo; 3760 3761 topo = cpu_topology__new(); 3762 if (!topo) { 3763 pr_err("Failed to allocate CPU topology\n"); 3764 return -ENOMEM; 3765 } 3766 3767 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3768 topo->core_cpus_list, topo->core_cpus_lists); 3769 cpu_topology__delete(topo); 3770 3771 return ret; 3772 } 3773 3774 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3775 { 3776 int ret; 3777 struct cpu_topology *topo; 3778 3779 topo = cpu_topology__new(); 3780 if (!topo) { 3781 pr_err("Failed to allocate CPU topology\n"); 3782 return -ENOMEM; 3783 } 3784 3785 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3786 topo->package_cpus_list, topo->package_cpus_lists); 3787 cpu_topology__delete(topo); 3788 3789 return ret; 3790 } 3791 3792 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3793 { 3794 u32 s; 3795 int ret; 3796 const char **spec; 3797 struct numa_topology *topo; 3798 3799 topo = numa_topology__new(); 3800 if (!topo) { 3801 pr_err("Failed to allocate NUMA topology\n"); 3802 return -ENOMEM; 3803 } 3804 3805 spec = zalloc(topo->nr * sizeof(char *)); 3806 if (!spec) { 3807 pr_err("Failed to allocate NUMA spec\n"); 3808 ret = -ENOMEM; 3809 goto out_delete_topo; 3810 } 3811 for (s = 0; s < topo->nr; s++) 3812 spec[s] = topo->nodes[s].cpus; 3813 3814 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3815 3816 zfree(&spec); 3817 3818 out_delete_topo: 3819 numa_topology__delete(topo); 3820 3821 return ret; 3822 } 3823 3824 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3825 { 3826 int t, ret; 3827 u32 s, nr_spec = 0; 3828 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3829 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3830 3831 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3832 spec = strtok_r(user_spec, ":", &spec_ptr); 3833 if (spec == NULL) 3834 break; 3835 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3836 mask = strtok_r(spec, "/", &mask_ptr); 3837 if (mask == NULL) 3838 break; 3839 pr_debug2(" maps mask: %s\n", mask); 3840 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3841 if (!tmp_spec) { 3842 pr_err("Failed to reallocate maps spec\n"); 3843 ret = -ENOMEM; 3844 goto out_free; 3845 } 3846 maps_spec = tmp_spec; 3847 maps_spec[nr_spec] = dup_mask = strdup(mask); 3848 if (!maps_spec[nr_spec]) { 3849 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3850 ret = -ENOMEM; 3851 goto out_free; 3852 } 3853 mask = strtok_r(NULL, "/", &mask_ptr); 3854 if (mask == NULL) { 3855 pr_err("Invalid thread maps or affinity specs\n"); 3856 ret = -EINVAL; 3857 goto out_free; 3858 } 3859 pr_debug2(" affinity mask: %s\n", mask); 3860 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3861 if (!tmp_spec) { 3862 pr_err("Failed to reallocate affinity spec\n"); 3863 ret = -ENOMEM; 3864 goto out_free; 3865 } 3866 affinity_spec = tmp_spec; 3867 affinity_spec[nr_spec] = strdup(mask); 3868 if (!affinity_spec[nr_spec]) { 3869 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3870 ret = -ENOMEM; 3871 goto out_free; 3872 } 3873 dup_mask = NULL; 3874 nr_spec++; 3875 } 3876 3877 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3878 (const char **)affinity_spec, nr_spec); 3879 3880 out_free: 3881 free(dup_mask); 3882 for (s = 0; s < nr_spec; s++) { 3883 if (maps_spec) 3884 free(maps_spec[s]); 3885 if (affinity_spec) 3886 free(affinity_spec[s]); 3887 } 3888 free(affinity_spec); 3889 free(maps_spec); 3890 3891 return ret; 3892 } 3893 3894 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3895 { 3896 int ret; 3897 3898 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3899 if (ret) 3900 return ret; 3901 3902 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3903 return -ENODEV; 3904 3905 rec->nr_threads = 1; 3906 3907 return 0; 3908 } 3909 3910 static int record__init_thread_masks(struct record *rec) 3911 { 3912 int ret = 0; 3913 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3914 3915 if (!record__threads_enabled(rec)) 3916 return record__init_thread_default_masks(rec, cpus); 3917 3918 if (evlist__per_thread(rec->evlist)) { 3919 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3920 return -EINVAL; 3921 } 3922 3923 switch (rec->opts.threads_spec) { 3924 case THREAD_SPEC__CPU: 3925 ret = record__init_thread_cpu_masks(rec, cpus); 3926 break; 3927 case THREAD_SPEC__CORE: 3928 ret = record__init_thread_core_masks(rec, cpus); 3929 break; 3930 case THREAD_SPEC__PACKAGE: 3931 ret = record__init_thread_package_masks(rec, cpus); 3932 break; 3933 case THREAD_SPEC__NUMA: 3934 ret = record__init_thread_numa_masks(rec, cpus); 3935 break; 3936 case THREAD_SPEC__USER: 3937 ret = record__init_thread_user_masks(rec, cpus); 3938 break; 3939 default: 3940 break; 3941 } 3942 3943 return ret; 3944 } 3945 3946 int cmd_record(int argc, const char **argv) 3947 { 3948 int err; 3949 struct record *rec = &record; 3950 char errbuf[BUFSIZ]; 3951 3952 setlocale(LC_ALL, ""); 3953 3954 #ifndef HAVE_BPF_SKEL 3955 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3956 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3957 # undef set_nobuild 3958 #endif 3959 3960 rec->opts.affinity = PERF_AFFINITY_SYS; 3961 3962 rec->evlist = evlist__new(); 3963 if (rec->evlist == NULL) 3964 return -ENOMEM; 3965 3966 err = perf_config(perf_record_config, rec); 3967 if (err) 3968 return err; 3969 3970 argc = parse_options(argc, argv, record_options, record_usage, 3971 PARSE_OPT_STOP_AT_NON_OPTION); 3972 if (quiet) 3973 perf_quiet_option(); 3974 3975 err = symbol__validate_sym_arguments(); 3976 if (err) 3977 return err; 3978 3979 perf_debuginfod_setup(&record.debuginfod); 3980 3981 /* Make system wide (-a) the default target. */ 3982 if (!argc && target__none(&rec->opts.target)) 3983 rec->opts.target.system_wide = true; 3984 3985 if (nr_cgroups && !rec->opts.target.system_wide) { 3986 usage_with_options_msg(record_usage, record_options, 3987 "cgroup monitoring only available in system-wide mode"); 3988 3989 } 3990 3991 if (rec->buildid_mmap) { 3992 if (!perf_can_record_build_id()) { 3993 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3994 err = -EINVAL; 3995 goto out_opts; 3996 } 3997 pr_debug("Enabling build id in mmap2 events.\n"); 3998 /* Enable mmap build id synthesizing. */ 3999 symbol_conf.buildid_mmap2 = true; 4000 /* Enable perf_event_attr::build_id bit. */ 4001 rec->opts.build_id = true; 4002 /* Disable build id cache. */ 4003 rec->no_buildid = true; 4004 } 4005 4006 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 4007 pr_err("Kernel has no cgroup sampling support.\n"); 4008 err = -EINVAL; 4009 goto out_opts; 4010 } 4011 4012 if (rec->opts.kcore) 4013 rec->opts.text_poke = true; 4014 4015 if (rec->opts.kcore || record__threads_enabled(rec)) 4016 rec->data.is_dir = true; 4017 4018 if (record__threads_enabled(rec)) { 4019 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 4020 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 4021 goto out_opts; 4022 } 4023 if (record__aio_enabled(rec)) { 4024 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 4025 goto out_opts; 4026 } 4027 } 4028 4029 if (rec->opts.comp_level != 0) { 4030 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 4031 rec->no_buildid = true; 4032 } 4033 4034 if (rec->opts.record_switch_events && 4035 !perf_can_record_switch_events()) { 4036 ui__error("kernel does not support recording context switch events\n"); 4037 parse_options_usage(record_usage, record_options, "switch-events", 0); 4038 err = -EINVAL; 4039 goto out_opts; 4040 } 4041 4042 if (switch_output_setup(rec)) { 4043 parse_options_usage(record_usage, record_options, "switch-output", 0); 4044 err = -EINVAL; 4045 goto out_opts; 4046 } 4047 4048 if (rec->switch_output.time) { 4049 signal(SIGALRM, alarm_sig_handler); 4050 alarm(rec->switch_output.time); 4051 } 4052 4053 if (rec->switch_output.num_files) { 4054 rec->switch_output.filenames = calloc(sizeof(char *), 4055 rec->switch_output.num_files); 4056 if (!rec->switch_output.filenames) { 4057 err = -EINVAL; 4058 goto out_opts; 4059 } 4060 } 4061 4062 if (rec->timestamp_filename && record__threads_enabled(rec)) { 4063 rec->timestamp_filename = false; 4064 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 4065 } 4066 4067 /* 4068 * Allow aliases to facilitate the lookup of symbols for address 4069 * filters. Refer to auxtrace_parse_filters(). 4070 */ 4071 symbol_conf.allow_aliases = true; 4072 4073 symbol__init(NULL); 4074 4075 err = record__auxtrace_init(rec); 4076 if (err) 4077 goto out; 4078 4079 if (dry_run) 4080 goto out; 4081 4082 err = -ENOMEM; 4083 4084 if (rec->no_buildid_cache || rec->no_buildid) { 4085 disable_buildid_cache(); 4086 } else if (rec->switch_output.enabled) { 4087 /* 4088 * In 'perf record --switch-output', disable buildid 4089 * generation by default to reduce data file switching 4090 * overhead. Still generate buildid if they are required 4091 * explicitly using 4092 * 4093 * perf record --switch-output --no-no-buildid \ 4094 * --no-no-buildid-cache 4095 * 4096 * Following code equals to: 4097 * 4098 * if ((rec->no_buildid || !rec->no_buildid_set) && 4099 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4100 * disable_buildid_cache(); 4101 */ 4102 bool disable = true; 4103 4104 if (rec->no_buildid_set && !rec->no_buildid) 4105 disable = false; 4106 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4107 disable = false; 4108 if (disable) { 4109 rec->no_buildid = true; 4110 rec->no_buildid_cache = true; 4111 disable_buildid_cache(); 4112 } 4113 } 4114 4115 if (record.opts.overwrite) 4116 record.opts.tail_synthesize = true; 4117 4118 if (rec->evlist->core.nr_entries == 0) { 4119 bool can_profile_kernel = perf_event_paranoid_check(1); 4120 4121 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); 4122 if (err) 4123 goto out; 4124 } 4125 4126 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4127 rec->opts.no_inherit = true; 4128 4129 err = target__validate(&rec->opts.target); 4130 if (err) { 4131 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4132 ui__warning("%s\n", errbuf); 4133 } 4134 4135 err = target__parse_uid(&rec->opts.target); 4136 if (err) { 4137 int saved_errno = errno; 4138 4139 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4140 ui__error("%s", errbuf); 4141 4142 err = -saved_errno; 4143 goto out; 4144 } 4145 4146 /* Enable ignoring missing threads when -u/-p option is defined. */ 4147 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4148 4149 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list); 4150 4151 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4152 arch__add_leaf_frame_record_opts(&rec->opts); 4153 4154 err = -ENOMEM; 4155 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4156 if (rec->opts.target.pid != NULL) { 4157 pr_err("Couldn't create thread/CPU maps: %s\n", 4158 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4159 goto out; 4160 } 4161 else 4162 usage_with_options(record_usage, record_options); 4163 } 4164 4165 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4166 if (err) 4167 goto out; 4168 4169 /* 4170 * We take all buildids when the file contains 4171 * AUX area tracing data because we do not decode the 4172 * trace because it would take too long. 4173 */ 4174 if (rec->opts.full_auxtrace) 4175 rec->buildid_all = true; 4176 4177 if (rec->opts.text_poke) { 4178 err = record__config_text_poke(rec->evlist); 4179 if (err) { 4180 pr_err("record__config_text_poke failed, error %d\n", err); 4181 goto out; 4182 } 4183 } 4184 4185 if (rec->off_cpu) { 4186 err = record__config_off_cpu(rec); 4187 if (err) { 4188 pr_err("record__config_off_cpu failed, error %d\n", err); 4189 goto out; 4190 } 4191 } 4192 4193 if (record_opts__config(&rec->opts)) { 4194 err = -EINVAL; 4195 goto out; 4196 } 4197 4198 err = record__init_thread_masks(rec); 4199 if (err) { 4200 pr_err("Failed to initialize parallel data streaming masks\n"); 4201 goto out; 4202 } 4203 4204 if (rec->opts.nr_cblocks > nr_cblocks_max) 4205 rec->opts.nr_cblocks = nr_cblocks_max; 4206 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4207 4208 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4209 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4210 4211 if (rec->opts.comp_level > comp_level_max) 4212 rec->opts.comp_level = comp_level_max; 4213 pr_debug("comp level: %d\n", rec->opts.comp_level); 4214 4215 err = __cmd_record(&record, argc, argv); 4216 out: 4217 evlist__delete(rec->evlist); 4218 symbol__exit(); 4219 auxtrace_record__free(rec->itr); 4220 out_opts: 4221 record__free_thread_masks(rec, rec->nr_threads); 4222 rec->nr_threads = 0; 4223 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4224 return err; 4225 } 4226 4227 static void snapshot_sig_handler(int sig __maybe_unused) 4228 { 4229 struct record *rec = &record; 4230 4231 hit_auxtrace_snapshot_trigger(rec); 4232 4233 if (switch_output_signal(rec)) 4234 trigger_hit(&switch_output_trigger); 4235 } 4236 4237 static void alarm_sig_handler(int sig __maybe_unused) 4238 { 4239 struct record *rec = &record; 4240 4241 if (switch_output_time(rec)) 4242 trigger_hit(&switch_output_trigger); 4243 } 4244