1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/target.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/record.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/perf_api_probe.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/clockid.h" 50 #include "util/pmu-hybrid.h" 51 #include "util/evlist-hybrid.h" 52 #include "util/off_cpu.h" 53 #include "asm/bug.h" 54 #include "perf.h" 55 #include "cputopo.h" 56 57 #include <errno.h> 58 #include <inttypes.h> 59 #include <locale.h> 60 #include <poll.h> 61 #include <pthread.h> 62 #include <unistd.h> 63 #ifndef HAVE_GETTID 64 #include <syscall.h> 65 #endif 66 #include <sched.h> 67 #include <signal.h> 68 #ifdef HAVE_EVENTFD_SUPPORT 69 #include <sys/eventfd.h> 70 #endif 71 #include <sys/mman.h> 72 #include <sys/wait.h> 73 #include <sys/types.h> 74 #include <sys/stat.h> 75 #include <fcntl.h> 76 #include <linux/err.h> 77 #include <linux/string.h> 78 #include <linux/time64.h> 79 #include <linux/zalloc.h> 80 #include <linux/bitmap.h> 81 #include <sys/time.h> 82 83 struct switch_output { 84 bool enabled; 85 bool signal; 86 unsigned long size; 87 unsigned long time; 88 const char *str; 89 bool set; 90 char **filenames; 91 int num_files; 92 int cur_file; 93 }; 94 95 struct thread_mask { 96 struct mmap_cpu_mask maps; 97 struct mmap_cpu_mask affinity; 98 }; 99 100 struct record_thread { 101 pid_t tid; 102 struct thread_mask *mask; 103 struct { 104 int msg[2]; 105 int ack[2]; 106 } pipes; 107 struct fdarray pollfd; 108 int ctlfd_pos; 109 int nr_mmaps; 110 struct mmap **maps; 111 struct mmap **overwrite_maps; 112 struct record *rec; 113 unsigned long long samples; 114 unsigned long waking; 115 u64 bytes_written; 116 u64 bytes_transferred; 117 u64 bytes_compressed; 118 }; 119 120 static __thread struct record_thread *thread; 121 122 enum thread_msg { 123 THREAD_MSG__UNDEFINED = 0, 124 THREAD_MSG__READY, 125 THREAD_MSG__MAX, 126 }; 127 128 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 129 "UNDEFINED", "READY" 130 }; 131 132 enum thread_spec { 133 THREAD_SPEC__UNDEFINED = 0, 134 THREAD_SPEC__CPU, 135 THREAD_SPEC__CORE, 136 THREAD_SPEC__PACKAGE, 137 THREAD_SPEC__NUMA, 138 THREAD_SPEC__USER, 139 THREAD_SPEC__MAX, 140 }; 141 142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 143 "undefined", "cpu", "core", "package", "numa", "user" 144 }; 145 146 struct pollfd_index_map { 147 int evlist_pollfd_index; 148 int thread_pollfd_index; 149 }; 150 151 struct record { 152 struct perf_tool tool; 153 struct record_opts opts; 154 u64 bytes_written; 155 struct perf_data data; 156 struct auxtrace_record *itr; 157 struct evlist *evlist; 158 struct perf_session *session; 159 struct evlist *sb_evlist; 160 pthread_t thread_id; 161 int realtime_prio; 162 bool switch_output_event_set; 163 bool no_buildid; 164 bool no_buildid_set; 165 bool no_buildid_cache; 166 bool no_buildid_cache_set; 167 bool buildid_all; 168 bool buildid_mmap; 169 bool timestamp_filename; 170 bool timestamp_boundary; 171 bool off_cpu; 172 struct switch_output switch_output; 173 unsigned long long samples; 174 unsigned long output_max_size; /* = 0: unlimited */ 175 struct perf_debuginfod debuginfod; 176 int nr_threads; 177 struct thread_mask *thread_masks; 178 struct record_thread *thread_data; 179 struct pollfd_index_map *index_map; 180 size_t index_map_sz; 181 size_t index_map_cnt; 182 }; 183 184 static volatile int done; 185 186 static volatile int auxtrace_record__snapshot_started; 187 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 188 static DEFINE_TRIGGER(switch_output_trigger); 189 190 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 191 "SYS", "NODE", "CPU" 192 }; 193 194 #ifndef HAVE_GETTID 195 static inline pid_t gettid(void) 196 { 197 return (pid_t)syscall(__NR_gettid); 198 } 199 #endif 200 201 static int record__threads_enabled(struct record *rec) 202 { 203 return rec->opts.threads_spec; 204 } 205 206 static bool switch_output_signal(struct record *rec) 207 { 208 return rec->switch_output.signal && 209 trigger_is_ready(&switch_output_trigger); 210 } 211 212 static bool switch_output_size(struct record *rec) 213 { 214 return rec->switch_output.size && 215 trigger_is_ready(&switch_output_trigger) && 216 (rec->bytes_written >= rec->switch_output.size); 217 } 218 219 static bool switch_output_time(struct record *rec) 220 { 221 return rec->switch_output.time && 222 trigger_is_ready(&switch_output_trigger); 223 } 224 225 static u64 record__bytes_written(struct record *rec) 226 { 227 int t; 228 u64 bytes_written = rec->bytes_written; 229 struct record_thread *thread_data = rec->thread_data; 230 231 for (t = 0; t < rec->nr_threads; t++) 232 bytes_written += thread_data[t].bytes_written; 233 234 return bytes_written; 235 } 236 237 static bool record__output_max_size_exceeded(struct record *rec) 238 { 239 return rec->output_max_size && 240 (record__bytes_written(rec) >= rec->output_max_size); 241 } 242 243 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 244 void *bf, size_t size) 245 { 246 struct perf_data_file *file = &rec->session->data->file; 247 248 if (map && map->file) 249 file = map->file; 250 251 if (perf_data_file__write(file, bf, size) < 0) { 252 pr_err("failed to write perf data, error: %m\n"); 253 return -1; 254 } 255 256 if (map && map->file) 257 thread->bytes_written += size; 258 else 259 rec->bytes_written += size; 260 261 if (record__output_max_size_exceeded(rec) && !done) { 262 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 263 " stopping session ]\n", 264 record__bytes_written(rec) >> 10); 265 done = 1; 266 } 267 268 if (switch_output_size(rec)) 269 trigger_hit(&switch_output_trigger); 270 271 return 0; 272 } 273 274 static int record__aio_enabled(struct record *rec); 275 static int record__comp_enabled(struct record *rec); 276 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 277 void *dst, size_t dst_size, void *src, size_t src_size); 278 279 #ifdef HAVE_AIO_SUPPORT 280 static int record__aio_write(struct aiocb *cblock, int trace_fd, 281 void *buf, size_t size, off_t off) 282 { 283 int rc; 284 285 cblock->aio_fildes = trace_fd; 286 cblock->aio_buf = buf; 287 cblock->aio_nbytes = size; 288 cblock->aio_offset = off; 289 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 290 291 do { 292 rc = aio_write(cblock); 293 if (rc == 0) { 294 break; 295 } else if (errno != EAGAIN) { 296 cblock->aio_fildes = -1; 297 pr_err("failed to queue perf data, error: %m\n"); 298 break; 299 } 300 } while (1); 301 302 return rc; 303 } 304 305 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 306 { 307 void *rem_buf; 308 off_t rem_off; 309 size_t rem_size; 310 int rc, aio_errno; 311 ssize_t aio_ret, written; 312 313 aio_errno = aio_error(cblock); 314 if (aio_errno == EINPROGRESS) 315 return 0; 316 317 written = aio_ret = aio_return(cblock); 318 if (aio_ret < 0) { 319 if (aio_errno != EINTR) 320 pr_err("failed to write perf data, error: %m\n"); 321 written = 0; 322 } 323 324 rem_size = cblock->aio_nbytes - written; 325 326 if (rem_size == 0) { 327 cblock->aio_fildes = -1; 328 /* 329 * md->refcount is incremented in record__aio_pushfn() for 330 * every aio write request started in record__aio_push() so 331 * decrement it because the request is now complete. 332 */ 333 perf_mmap__put(&md->core); 334 rc = 1; 335 } else { 336 /* 337 * aio write request may require restart with the 338 * reminder if the kernel didn't write whole 339 * chunk at once. 340 */ 341 rem_off = cblock->aio_offset + written; 342 rem_buf = (void *)(cblock->aio_buf + written); 343 record__aio_write(cblock, cblock->aio_fildes, 344 rem_buf, rem_size, rem_off); 345 rc = 0; 346 } 347 348 return rc; 349 } 350 351 static int record__aio_sync(struct mmap *md, bool sync_all) 352 { 353 struct aiocb **aiocb = md->aio.aiocb; 354 struct aiocb *cblocks = md->aio.cblocks; 355 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 356 int i, do_suspend; 357 358 do { 359 do_suspend = 0; 360 for (i = 0; i < md->aio.nr_cblocks; ++i) { 361 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 362 if (sync_all) 363 aiocb[i] = NULL; 364 else 365 return i; 366 } else { 367 /* 368 * Started aio write is not complete yet 369 * so it has to be waited before the 370 * next allocation. 371 */ 372 aiocb[i] = &cblocks[i]; 373 do_suspend = 1; 374 } 375 } 376 if (!do_suspend) 377 return -1; 378 379 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 380 if (!(errno == EAGAIN || errno == EINTR)) 381 pr_err("failed to sync perf data, error: %m\n"); 382 } 383 } while (1); 384 } 385 386 struct record_aio { 387 struct record *rec; 388 void *data; 389 size_t size; 390 }; 391 392 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 393 { 394 struct record_aio *aio = to; 395 396 /* 397 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 398 * to release space in the kernel buffer as fast as possible, calling 399 * perf_mmap__consume() from perf_mmap__push() function. 400 * 401 * That lets the kernel to proceed with storing more profiling data into 402 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 403 * 404 * Coping can be done in two steps in case the chunk of profiling data 405 * crosses the upper bound of the kernel buffer. In this case we first move 406 * part of data from map->start till the upper bound and then the reminder 407 * from the beginning of the kernel buffer till the end of the data chunk. 408 */ 409 410 if (record__comp_enabled(aio->rec)) { 411 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 412 mmap__mmap_len(map) - aio->size, 413 buf, size); 414 } else { 415 memcpy(aio->data + aio->size, buf, size); 416 } 417 418 if (!aio->size) { 419 /* 420 * Increment map->refcount to guard map->aio.data[] buffer 421 * from premature deallocation because map object can be 422 * released earlier than aio write request started on 423 * map->aio.data[] buffer is complete. 424 * 425 * perf_mmap__put() is done at record__aio_complete() 426 * after started aio request completion or at record__aio_push() 427 * if the request failed to start. 428 */ 429 perf_mmap__get(&map->core); 430 } 431 432 aio->size += size; 433 434 return size; 435 } 436 437 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 438 { 439 int ret, idx; 440 int trace_fd = rec->session->data->file.fd; 441 struct record_aio aio = { .rec = rec, .size = 0 }; 442 443 /* 444 * Call record__aio_sync() to wait till map->aio.data[] buffer 445 * becomes available after previous aio write operation. 446 */ 447 448 idx = record__aio_sync(map, false); 449 aio.data = map->aio.data[idx]; 450 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 451 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 452 return ret; 453 454 rec->samples++; 455 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 456 if (!ret) { 457 *off += aio.size; 458 rec->bytes_written += aio.size; 459 if (switch_output_size(rec)) 460 trigger_hit(&switch_output_trigger); 461 } else { 462 /* 463 * Decrement map->refcount incremented in record__aio_pushfn() 464 * back if record__aio_write() operation failed to start, otherwise 465 * map->refcount is decremented in record__aio_complete() after 466 * aio write operation finishes successfully. 467 */ 468 perf_mmap__put(&map->core); 469 } 470 471 return ret; 472 } 473 474 static off_t record__aio_get_pos(int trace_fd) 475 { 476 return lseek(trace_fd, 0, SEEK_CUR); 477 } 478 479 static void record__aio_set_pos(int trace_fd, off_t pos) 480 { 481 lseek(trace_fd, pos, SEEK_SET); 482 } 483 484 static void record__aio_mmap_read_sync(struct record *rec) 485 { 486 int i; 487 struct evlist *evlist = rec->evlist; 488 struct mmap *maps = evlist->mmap; 489 490 if (!record__aio_enabled(rec)) 491 return; 492 493 for (i = 0; i < evlist->core.nr_mmaps; i++) { 494 struct mmap *map = &maps[i]; 495 496 if (map->core.base) 497 record__aio_sync(map, true); 498 } 499 } 500 501 static int nr_cblocks_default = 1; 502 static int nr_cblocks_max = 4; 503 504 static int record__aio_parse(const struct option *opt, 505 const char *str, 506 int unset) 507 { 508 struct record_opts *opts = (struct record_opts *)opt->value; 509 510 if (unset) { 511 opts->nr_cblocks = 0; 512 } else { 513 if (str) 514 opts->nr_cblocks = strtol(str, NULL, 0); 515 if (!opts->nr_cblocks) 516 opts->nr_cblocks = nr_cblocks_default; 517 } 518 519 return 0; 520 } 521 #else /* HAVE_AIO_SUPPORT */ 522 static int nr_cblocks_max = 0; 523 524 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 525 off_t *off __maybe_unused) 526 { 527 return -1; 528 } 529 530 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 531 { 532 return -1; 533 } 534 535 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 536 { 537 } 538 539 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 540 { 541 } 542 #endif 543 544 static int record__aio_enabled(struct record *rec) 545 { 546 return rec->opts.nr_cblocks > 0; 547 } 548 549 #define MMAP_FLUSH_DEFAULT 1 550 static int record__mmap_flush_parse(const struct option *opt, 551 const char *str, 552 int unset) 553 { 554 int flush_max; 555 struct record_opts *opts = (struct record_opts *)opt->value; 556 static struct parse_tag tags[] = { 557 { .tag = 'B', .mult = 1 }, 558 { .tag = 'K', .mult = 1 << 10 }, 559 { .tag = 'M', .mult = 1 << 20 }, 560 { .tag = 'G', .mult = 1 << 30 }, 561 { .tag = 0 }, 562 }; 563 564 if (unset) 565 return 0; 566 567 if (str) { 568 opts->mmap_flush = parse_tag_value(str, tags); 569 if (opts->mmap_flush == (int)-1) 570 opts->mmap_flush = strtol(str, NULL, 0); 571 } 572 573 if (!opts->mmap_flush) 574 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 575 576 flush_max = evlist__mmap_size(opts->mmap_pages); 577 flush_max /= 4; 578 if (opts->mmap_flush > flush_max) 579 opts->mmap_flush = flush_max; 580 581 return 0; 582 } 583 584 #ifdef HAVE_ZSTD_SUPPORT 585 static unsigned int comp_level_default = 1; 586 587 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 588 { 589 struct record_opts *opts = opt->value; 590 591 if (unset) { 592 opts->comp_level = 0; 593 } else { 594 if (str) 595 opts->comp_level = strtol(str, NULL, 0); 596 if (!opts->comp_level) 597 opts->comp_level = comp_level_default; 598 } 599 600 return 0; 601 } 602 #endif 603 static unsigned int comp_level_max = 22; 604 605 static int record__comp_enabled(struct record *rec) 606 { 607 return rec->opts.comp_level > 0; 608 } 609 610 static int process_synthesized_event(struct perf_tool *tool, 611 union perf_event *event, 612 struct perf_sample *sample __maybe_unused, 613 struct machine *machine __maybe_unused) 614 { 615 struct record *rec = container_of(tool, struct record, tool); 616 return record__write(rec, NULL, event, event->header.size); 617 } 618 619 static int process_locked_synthesized_event(struct perf_tool *tool, 620 union perf_event *event, 621 struct perf_sample *sample __maybe_unused, 622 struct machine *machine __maybe_unused) 623 { 624 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; 625 int ret; 626 627 pthread_mutex_lock(&synth_lock); 628 ret = process_synthesized_event(tool, event, sample, machine); 629 pthread_mutex_unlock(&synth_lock); 630 return ret; 631 } 632 633 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 634 { 635 struct record *rec = to; 636 637 if (record__comp_enabled(rec)) { 638 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 639 bf = map->data; 640 } 641 642 thread->samples++; 643 return record__write(rec, map, bf, size); 644 } 645 646 static volatile int signr = -1; 647 static volatile int child_finished; 648 #ifdef HAVE_EVENTFD_SUPPORT 649 static int done_fd = -1; 650 #endif 651 652 static void sig_handler(int sig) 653 { 654 if (sig == SIGCHLD) 655 child_finished = 1; 656 else 657 signr = sig; 658 659 done = 1; 660 #ifdef HAVE_EVENTFD_SUPPORT 661 { 662 u64 tmp = 1; 663 /* 664 * It is possible for this signal handler to run after done is checked 665 * in the main loop, but before the perf counter fds are polled. If this 666 * happens, the poll() will continue to wait even though done is set, 667 * and will only break out if either another signal is received, or the 668 * counters are ready for read. To ensure the poll() doesn't sleep when 669 * done is set, use an eventfd (done_fd) to wake up the poll(). 670 */ 671 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 672 pr_err("failed to signal wakeup fd, error: %m\n"); 673 } 674 #endif // HAVE_EVENTFD_SUPPORT 675 } 676 677 static void sigsegv_handler(int sig) 678 { 679 perf_hooks__recover(); 680 sighandler_dump_stack(sig); 681 } 682 683 static void record__sig_exit(void) 684 { 685 if (signr == -1) 686 return; 687 688 signal(signr, SIG_DFL); 689 raise(signr); 690 } 691 692 #ifdef HAVE_AUXTRACE_SUPPORT 693 694 static int record__process_auxtrace(struct perf_tool *tool, 695 struct mmap *map, 696 union perf_event *event, void *data1, 697 size_t len1, void *data2, size_t len2) 698 { 699 struct record *rec = container_of(tool, struct record, tool); 700 struct perf_data *data = &rec->data; 701 size_t padding; 702 u8 pad[8] = {0}; 703 704 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 705 off_t file_offset; 706 int fd = perf_data__fd(data); 707 int err; 708 709 file_offset = lseek(fd, 0, SEEK_CUR); 710 if (file_offset == -1) 711 return -1; 712 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 713 event, file_offset); 714 if (err) 715 return err; 716 } 717 718 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 719 padding = (len1 + len2) & 7; 720 if (padding) 721 padding = 8 - padding; 722 723 record__write(rec, map, event, event->header.size); 724 record__write(rec, map, data1, len1); 725 if (len2) 726 record__write(rec, map, data2, len2); 727 record__write(rec, map, &pad, padding); 728 729 return 0; 730 } 731 732 static int record__auxtrace_mmap_read(struct record *rec, 733 struct mmap *map) 734 { 735 int ret; 736 737 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 738 record__process_auxtrace); 739 if (ret < 0) 740 return ret; 741 742 if (ret) 743 rec->samples++; 744 745 return 0; 746 } 747 748 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 749 struct mmap *map) 750 { 751 int ret; 752 753 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 754 record__process_auxtrace, 755 rec->opts.auxtrace_snapshot_size); 756 if (ret < 0) 757 return ret; 758 759 if (ret) 760 rec->samples++; 761 762 return 0; 763 } 764 765 static int record__auxtrace_read_snapshot_all(struct record *rec) 766 { 767 int i; 768 int rc = 0; 769 770 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 771 struct mmap *map = &rec->evlist->mmap[i]; 772 773 if (!map->auxtrace_mmap.base) 774 continue; 775 776 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 777 rc = -1; 778 goto out; 779 } 780 } 781 out: 782 return rc; 783 } 784 785 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 786 { 787 pr_debug("Recording AUX area tracing snapshot\n"); 788 if (record__auxtrace_read_snapshot_all(rec) < 0) { 789 trigger_error(&auxtrace_snapshot_trigger); 790 } else { 791 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 792 trigger_error(&auxtrace_snapshot_trigger); 793 else 794 trigger_ready(&auxtrace_snapshot_trigger); 795 } 796 } 797 798 static int record__auxtrace_snapshot_exit(struct record *rec) 799 { 800 if (trigger_is_error(&auxtrace_snapshot_trigger)) 801 return 0; 802 803 if (!auxtrace_record__snapshot_started && 804 auxtrace_record__snapshot_start(rec->itr)) 805 return -1; 806 807 record__read_auxtrace_snapshot(rec, true); 808 if (trigger_is_error(&auxtrace_snapshot_trigger)) 809 return -1; 810 811 return 0; 812 } 813 814 static int record__auxtrace_init(struct record *rec) 815 { 816 int err; 817 818 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 819 && record__threads_enabled(rec)) { 820 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 821 return -EINVAL; 822 } 823 824 if (!rec->itr) { 825 rec->itr = auxtrace_record__init(rec->evlist, &err); 826 if (err) 827 return err; 828 } 829 830 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 831 rec->opts.auxtrace_snapshot_opts); 832 if (err) 833 return err; 834 835 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 836 rec->opts.auxtrace_sample_opts); 837 if (err) 838 return err; 839 840 auxtrace_regroup_aux_output(rec->evlist); 841 842 return auxtrace_parse_filters(rec->evlist); 843 } 844 845 #else 846 847 static inline 848 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 849 struct mmap *map __maybe_unused) 850 { 851 return 0; 852 } 853 854 static inline 855 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 856 bool on_exit __maybe_unused) 857 { 858 } 859 860 static inline 861 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 862 { 863 return 0; 864 } 865 866 static inline 867 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 868 { 869 return 0; 870 } 871 872 static int record__auxtrace_init(struct record *rec __maybe_unused) 873 { 874 return 0; 875 } 876 877 #endif 878 879 static int record__config_text_poke(struct evlist *evlist) 880 { 881 struct evsel *evsel; 882 883 /* Nothing to do if text poke is already configured */ 884 evlist__for_each_entry(evlist, evsel) { 885 if (evsel->core.attr.text_poke) 886 return 0; 887 } 888 889 evsel = evlist__add_dummy_on_all_cpus(evlist); 890 if (!evsel) 891 return -ENOMEM; 892 893 evsel->core.attr.text_poke = 1; 894 evsel->core.attr.ksymbol = 1; 895 evsel->immediate = true; 896 evsel__set_sample_bit(evsel, TIME); 897 898 return 0; 899 } 900 901 static int record__config_off_cpu(struct record *rec) 902 { 903 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); 904 } 905 906 static bool record__kcore_readable(struct machine *machine) 907 { 908 char kcore[PATH_MAX]; 909 int fd; 910 911 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 912 913 fd = open(kcore, O_RDONLY); 914 if (fd < 0) 915 return false; 916 917 close(fd); 918 919 return true; 920 } 921 922 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 923 { 924 char from_dir[PATH_MAX]; 925 char kcore_dir[PATH_MAX]; 926 int ret; 927 928 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 929 930 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 931 if (ret) 932 return ret; 933 934 return kcore_copy(from_dir, kcore_dir); 935 } 936 937 static void record__thread_data_init_pipes(struct record_thread *thread_data) 938 { 939 thread_data->pipes.msg[0] = -1; 940 thread_data->pipes.msg[1] = -1; 941 thread_data->pipes.ack[0] = -1; 942 thread_data->pipes.ack[1] = -1; 943 } 944 945 static int record__thread_data_open_pipes(struct record_thread *thread_data) 946 { 947 if (pipe(thread_data->pipes.msg)) 948 return -EINVAL; 949 950 if (pipe(thread_data->pipes.ack)) { 951 close(thread_data->pipes.msg[0]); 952 thread_data->pipes.msg[0] = -1; 953 close(thread_data->pipes.msg[1]); 954 thread_data->pipes.msg[1] = -1; 955 return -EINVAL; 956 } 957 958 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 959 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 960 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 961 962 return 0; 963 } 964 965 static void record__thread_data_close_pipes(struct record_thread *thread_data) 966 { 967 if (thread_data->pipes.msg[0] != -1) { 968 close(thread_data->pipes.msg[0]); 969 thread_data->pipes.msg[0] = -1; 970 } 971 if (thread_data->pipes.msg[1] != -1) { 972 close(thread_data->pipes.msg[1]); 973 thread_data->pipes.msg[1] = -1; 974 } 975 if (thread_data->pipes.ack[0] != -1) { 976 close(thread_data->pipes.ack[0]); 977 thread_data->pipes.ack[0] = -1; 978 } 979 if (thread_data->pipes.ack[1] != -1) { 980 close(thread_data->pipes.ack[1]); 981 thread_data->pipes.ack[1] = -1; 982 } 983 } 984 985 static bool evlist__per_thread(struct evlist *evlist) 986 { 987 return cpu_map__is_dummy(evlist->core.user_requested_cpus); 988 } 989 990 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 991 { 992 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 993 struct mmap *mmap = evlist->mmap; 994 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 995 struct perf_cpu_map *cpus = evlist->core.all_cpus; 996 bool per_thread = evlist__per_thread(evlist); 997 998 if (per_thread) 999 thread_data->nr_mmaps = nr_mmaps; 1000 else 1001 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 1002 thread_data->mask->maps.nbits); 1003 if (mmap) { 1004 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1005 if (!thread_data->maps) 1006 return -ENOMEM; 1007 } 1008 if (overwrite_mmap) { 1009 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1010 if (!thread_data->overwrite_maps) { 1011 zfree(&thread_data->maps); 1012 return -ENOMEM; 1013 } 1014 } 1015 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1016 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1017 1018 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1019 if (per_thread || 1020 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1021 if (thread_data->maps) { 1022 thread_data->maps[tm] = &mmap[m]; 1023 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1024 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1025 } 1026 if (thread_data->overwrite_maps) { 1027 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1028 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1029 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1030 } 1031 tm++; 1032 } 1033 } 1034 1035 return 0; 1036 } 1037 1038 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1039 { 1040 int f, tm, pos; 1041 struct mmap *map, *overwrite_map; 1042 1043 fdarray__init(&thread_data->pollfd, 64); 1044 1045 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1046 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1047 overwrite_map = thread_data->overwrite_maps ? 1048 thread_data->overwrite_maps[tm] : NULL; 1049 1050 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1051 void *ptr = evlist->core.pollfd.priv[f].ptr; 1052 1053 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1054 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1055 &evlist->core.pollfd); 1056 if (pos < 0) 1057 return pos; 1058 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1059 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1060 } 1061 } 1062 } 1063 1064 return 0; 1065 } 1066 1067 static void record__free_thread_data(struct record *rec) 1068 { 1069 int t; 1070 struct record_thread *thread_data = rec->thread_data; 1071 1072 if (thread_data == NULL) 1073 return; 1074 1075 for (t = 0; t < rec->nr_threads; t++) { 1076 record__thread_data_close_pipes(&thread_data[t]); 1077 zfree(&thread_data[t].maps); 1078 zfree(&thread_data[t].overwrite_maps); 1079 fdarray__exit(&thread_data[t].pollfd); 1080 } 1081 1082 zfree(&rec->thread_data); 1083 } 1084 1085 static int record__map_thread_evlist_pollfd_indexes(struct record *rec, 1086 int evlist_pollfd_index, 1087 int thread_pollfd_index) 1088 { 1089 size_t x = rec->index_map_cnt; 1090 1091 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL)) 1092 return -ENOMEM; 1093 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index; 1094 rec->index_map[x].thread_pollfd_index = thread_pollfd_index; 1095 rec->index_map_cnt += 1; 1096 return 0; 1097 } 1098 1099 static int record__update_evlist_pollfd_from_thread(struct record *rec, 1100 struct evlist *evlist, 1101 struct record_thread *thread_data) 1102 { 1103 struct pollfd *e_entries = evlist->core.pollfd.entries; 1104 struct pollfd *t_entries = thread_data->pollfd.entries; 1105 int err = 0; 1106 size_t i; 1107 1108 for (i = 0; i < rec->index_map_cnt; i++) { 1109 int e_pos = rec->index_map[i].evlist_pollfd_index; 1110 int t_pos = rec->index_map[i].thread_pollfd_index; 1111 1112 if (e_entries[e_pos].fd != t_entries[t_pos].fd || 1113 e_entries[e_pos].events != t_entries[t_pos].events) { 1114 pr_err("Thread and evlist pollfd index mismatch\n"); 1115 err = -EINVAL; 1116 continue; 1117 } 1118 e_entries[e_pos].revents = t_entries[t_pos].revents; 1119 } 1120 return err; 1121 } 1122 1123 static int record__dup_non_perf_events(struct record *rec, 1124 struct evlist *evlist, 1125 struct record_thread *thread_data) 1126 { 1127 struct fdarray *fda = &evlist->core.pollfd; 1128 int i, ret; 1129 1130 for (i = 0; i < fda->nr; i++) { 1131 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event)) 1132 continue; 1133 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda); 1134 if (ret < 0) { 1135 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1136 return ret; 1137 } 1138 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n", 1139 thread_data, ret, fda->entries[i].fd); 1140 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret); 1141 if (ret < 0) { 1142 pr_err("Failed to map thread and evlist pollfd indexes\n"); 1143 return ret; 1144 } 1145 } 1146 return 0; 1147 } 1148 1149 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1150 { 1151 int t, ret; 1152 struct record_thread *thread_data; 1153 1154 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1155 if (!rec->thread_data) { 1156 pr_err("Failed to allocate thread data\n"); 1157 return -ENOMEM; 1158 } 1159 thread_data = rec->thread_data; 1160 1161 for (t = 0; t < rec->nr_threads; t++) 1162 record__thread_data_init_pipes(&thread_data[t]); 1163 1164 for (t = 0; t < rec->nr_threads; t++) { 1165 thread_data[t].rec = rec; 1166 thread_data[t].mask = &rec->thread_masks[t]; 1167 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1168 if (ret) { 1169 pr_err("Failed to initialize thread[%d] maps\n", t); 1170 goto out_free; 1171 } 1172 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1173 if (ret) { 1174 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1175 goto out_free; 1176 } 1177 if (t) { 1178 thread_data[t].tid = -1; 1179 ret = record__thread_data_open_pipes(&thread_data[t]); 1180 if (ret) { 1181 pr_err("Failed to open thread[%d] communication pipes\n", t); 1182 goto out_free; 1183 } 1184 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1185 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1186 if (ret < 0) { 1187 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1188 goto out_free; 1189 } 1190 thread_data[t].ctlfd_pos = ret; 1191 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1192 thread_data, thread_data[t].ctlfd_pos, 1193 thread_data[t].pipes.msg[0]); 1194 } else { 1195 thread_data[t].tid = gettid(); 1196 1197 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]); 1198 if (ret < 0) 1199 goto out_free; 1200 1201 if (evlist->ctl_fd.pos == -1) 1202 continue; 1203 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos, 1204 &evlist->core.pollfd); 1205 if (ret < 0) { 1206 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1207 goto out_free; 1208 } 1209 thread_data[t].ctlfd_pos = ret; 1210 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1211 thread_data, thread_data[t].ctlfd_pos, 1212 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd); 1213 } 1214 } 1215 1216 return 0; 1217 1218 out_free: 1219 record__free_thread_data(rec); 1220 1221 return ret; 1222 } 1223 1224 static int record__mmap_evlist(struct record *rec, 1225 struct evlist *evlist) 1226 { 1227 int i, ret; 1228 struct record_opts *opts = &rec->opts; 1229 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1230 opts->auxtrace_sample_mode; 1231 char msg[512]; 1232 1233 if (opts->affinity != PERF_AFFINITY_SYS) 1234 cpu__setup_cpunode_map(); 1235 1236 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1237 opts->auxtrace_mmap_pages, 1238 auxtrace_overwrite, 1239 opts->nr_cblocks, opts->affinity, 1240 opts->mmap_flush, opts->comp_level) < 0) { 1241 if (errno == EPERM) { 1242 pr_err("Permission error mapping pages.\n" 1243 "Consider increasing " 1244 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1245 "or try again with a smaller value of -m/--mmap_pages.\n" 1246 "(current value: %u,%u)\n", 1247 opts->mmap_pages, opts->auxtrace_mmap_pages); 1248 return -errno; 1249 } else { 1250 pr_err("failed to mmap with %d (%s)\n", errno, 1251 str_error_r(errno, msg, sizeof(msg))); 1252 if (errno) 1253 return -errno; 1254 else 1255 return -EINVAL; 1256 } 1257 } 1258 1259 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1260 return -1; 1261 1262 ret = record__alloc_thread_data(rec, evlist); 1263 if (ret) 1264 return ret; 1265 1266 if (record__threads_enabled(rec)) { 1267 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1268 if (ret) { 1269 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1270 return ret; 1271 } 1272 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1273 if (evlist->mmap) 1274 evlist->mmap[i].file = &rec->data.dir.files[i]; 1275 if (evlist->overwrite_mmap) 1276 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1277 } 1278 } 1279 1280 return 0; 1281 } 1282 1283 static int record__mmap(struct record *rec) 1284 { 1285 return record__mmap_evlist(rec, rec->evlist); 1286 } 1287 1288 static int record__open(struct record *rec) 1289 { 1290 char msg[BUFSIZ]; 1291 struct evsel *pos; 1292 struct evlist *evlist = rec->evlist; 1293 struct perf_session *session = rec->session; 1294 struct record_opts *opts = &rec->opts; 1295 int rc = 0; 1296 1297 /* 1298 * For initial_delay, system wide or a hybrid system, we need to add a 1299 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1300 * of waiting or event synthesis. 1301 */ 1302 if (opts->initial_delay || target__has_cpu(&opts->target) || 1303 perf_pmu__has_hybrid()) { 1304 pos = evlist__get_tracking_event(evlist); 1305 if (!evsel__is_dummy_event(pos)) { 1306 /* Set up dummy event. */ 1307 if (evlist__add_dummy(evlist)) 1308 return -ENOMEM; 1309 pos = evlist__last(evlist); 1310 evlist__set_tracking_event(evlist, pos); 1311 } 1312 1313 /* 1314 * Enable the dummy event when the process is forked for 1315 * initial_delay, immediately for system wide. 1316 */ 1317 if (opts->initial_delay && !pos->immediate && 1318 !target__has_cpu(&opts->target)) 1319 pos->core.attr.enable_on_exec = 1; 1320 else 1321 pos->immediate = 1; 1322 } 1323 1324 evlist__config(evlist, opts, &callchain_param); 1325 1326 evlist__for_each_entry(evlist, pos) { 1327 try_again: 1328 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1329 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1330 if (verbose > 0) 1331 ui__warning("%s\n", msg); 1332 goto try_again; 1333 } 1334 if ((errno == EINVAL || errno == EBADF) && 1335 pos->core.leader != &pos->core && 1336 pos->weak_group) { 1337 pos = evlist__reset_weak_group(evlist, pos, true); 1338 goto try_again; 1339 } 1340 rc = -errno; 1341 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1342 ui__error("%s\n", msg); 1343 goto out; 1344 } 1345 1346 pos->supported = true; 1347 } 1348 1349 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1350 pr_warning( 1351 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1352 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1353 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1354 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1355 "Samples in kernel modules won't be resolved at all.\n\n" 1356 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1357 "even with a suitable vmlinux or kallsyms file.\n\n"); 1358 } 1359 1360 if (evlist__apply_filters(evlist, &pos)) { 1361 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1362 pos->filter, evsel__name(pos), errno, 1363 str_error_r(errno, msg, sizeof(msg))); 1364 rc = -1; 1365 goto out; 1366 } 1367 1368 rc = record__mmap(rec); 1369 if (rc) 1370 goto out; 1371 1372 session->evlist = evlist; 1373 perf_session__set_id_hdr_size(session); 1374 out: 1375 return rc; 1376 } 1377 1378 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1379 { 1380 if (rec->evlist->first_sample_time == 0) 1381 rec->evlist->first_sample_time = sample_time; 1382 1383 if (sample_time) 1384 rec->evlist->last_sample_time = sample_time; 1385 } 1386 1387 static int process_sample_event(struct perf_tool *tool, 1388 union perf_event *event, 1389 struct perf_sample *sample, 1390 struct evsel *evsel, 1391 struct machine *machine) 1392 { 1393 struct record *rec = container_of(tool, struct record, tool); 1394 1395 set_timestamp_boundary(rec, sample->time); 1396 1397 if (rec->buildid_all) 1398 return 0; 1399 1400 rec->samples++; 1401 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1402 } 1403 1404 static int process_buildids(struct record *rec) 1405 { 1406 struct perf_session *session = rec->session; 1407 1408 if (perf_data__size(&rec->data) == 0) 1409 return 0; 1410 1411 /* 1412 * During this process, it'll load kernel map and replace the 1413 * dso->long_name to a real pathname it found. In this case 1414 * we prefer the vmlinux path like 1415 * /lib/modules/3.16.4/build/vmlinux 1416 * 1417 * rather than build-id path (in debug directory). 1418 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1419 */ 1420 symbol_conf.ignore_vmlinux_buildid = true; 1421 1422 /* 1423 * If --buildid-all is given, it marks all DSO regardless of hits, 1424 * so no need to process samples. But if timestamp_boundary is enabled, 1425 * it still needs to walk on all samples to get the timestamps of 1426 * first/last samples. 1427 */ 1428 if (rec->buildid_all && !rec->timestamp_boundary) 1429 rec->tool.sample = NULL; 1430 1431 return perf_session__process_events(session); 1432 } 1433 1434 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1435 { 1436 int err; 1437 struct perf_tool *tool = data; 1438 /* 1439 *As for guest kernel when processing subcommand record&report, 1440 *we arrange module mmap prior to guest kernel mmap and trigger 1441 *a preload dso because default guest module symbols are loaded 1442 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1443 *method is used to avoid symbol missing when the first addr is 1444 *in module instead of in guest kernel. 1445 */ 1446 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1447 machine); 1448 if (err < 0) 1449 pr_err("Couldn't record guest kernel [%d]'s reference" 1450 " relocation symbol.\n", machine->pid); 1451 1452 /* 1453 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1454 * have no _text sometimes. 1455 */ 1456 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1457 machine); 1458 if (err < 0) 1459 pr_err("Couldn't record guest kernel [%d]'s reference" 1460 " relocation symbol.\n", machine->pid); 1461 } 1462 1463 static struct perf_event_header finished_round_event = { 1464 .size = sizeof(struct perf_event_header), 1465 .type = PERF_RECORD_FINISHED_ROUND, 1466 }; 1467 1468 static struct perf_event_header finished_init_event = { 1469 .size = sizeof(struct perf_event_header), 1470 .type = PERF_RECORD_FINISHED_INIT, 1471 }; 1472 1473 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1474 { 1475 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1476 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1477 thread->mask->affinity.nbits)) { 1478 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1479 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1480 map->affinity_mask.bits, thread->mask->affinity.nbits); 1481 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1482 (cpu_set_t *)thread->mask->affinity.bits); 1483 if (verbose == 2) { 1484 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1485 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1486 } 1487 } 1488 } 1489 1490 static size_t process_comp_header(void *record, size_t increment) 1491 { 1492 struct perf_record_compressed *event = record; 1493 size_t size = sizeof(*event); 1494 1495 if (increment) { 1496 event->header.size += increment; 1497 return increment; 1498 } 1499 1500 event->header.type = PERF_RECORD_COMPRESSED; 1501 event->header.size = size; 1502 1503 return size; 1504 } 1505 1506 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1507 void *dst, size_t dst_size, void *src, size_t src_size) 1508 { 1509 size_t compressed; 1510 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1511 struct zstd_data *zstd_data = &session->zstd_data; 1512 1513 if (map && map->file) 1514 zstd_data = &map->zstd_data; 1515 1516 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1517 max_record_size, process_comp_header); 1518 1519 if (map && map->file) { 1520 thread->bytes_transferred += src_size; 1521 thread->bytes_compressed += compressed; 1522 } else { 1523 session->bytes_transferred += src_size; 1524 session->bytes_compressed += compressed; 1525 } 1526 1527 return compressed; 1528 } 1529 1530 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1531 bool overwrite, bool synch) 1532 { 1533 u64 bytes_written = rec->bytes_written; 1534 int i; 1535 int rc = 0; 1536 int nr_mmaps; 1537 struct mmap **maps; 1538 int trace_fd = rec->data.file.fd; 1539 off_t off = 0; 1540 1541 if (!evlist) 1542 return 0; 1543 1544 nr_mmaps = thread->nr_mmaps; 1545 maps = overwrite ? thread->overwrite_maps : thread->maps; 1546 1547 if (!maps) 1548 return 0; 1549 1550 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1551 return 0; 1552 1553 if (record__aio_enabled(rec)) 1554 off = record__aio_get_pos(trace_fd); 1555 1556 for (i = 0; i < nr_mmaps; i++) { 1557 u64 flush = 0; 1558 struct mmap *map = maps[i]; 1559 1560 if (map->core.base) { 1561 record__adjust_affinity(rec, map); 1562 if (synch) { 1563 flush = map->core.flush; 1564 map->core.flush = 1; 1565 } 1566 if (!record__aio_enabled(rec)) { 1567 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1568 if (synch) 1569 map->core.flush = flush; 1570 rc = -1; 1571 goto out; 1572 } 1573 } else { 1574 if (record__aio_push(rec, map, &off) < 0) { 1575 record__aio_set_pos(trace_fd, off); 1576 if (synch) 1577 map->core.flush = flush; 1578 rc = -1; 1579 goto out; 1580 } 1581 } 1582 if (synch) 1583 map->core.flush = flush; 1584 } 1585 1586 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1587 !rec->opts.auxtrace_sample_mode && 1588 record__auxtrace_mmap_read(rec, map) != 0) { 1589 rc = -1; 1590 goto out; 1591 } 1592 } 1593 1594 if (record__aio_enabled(rec)) 1595 record__aio_set_pos(trace_fd, off); 1596 1597 /* 1598 * Mark the round finished in case we wrote 1599 * at least one event. 1600 * 1601 * No need for round events in directory mode, 1602 * because per-cpu maps and files have data 1603 * sorted by kernel. 1604 */ 1605 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1606 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1607 1608 if (overwrite) 1609 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1610 out: 1611 return rc; 1612 } 1613 1614 static int record__mmap_read_all(struct record *rec, bool synch) 1615 { 1616 int err; 1617 1618 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1619 if (err) 1620 return err; 1621 1622 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1623 } 1624 1625 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1626 void *arg __maybe_unused) 1627 { 1628 struct perf_mmap *map = fda->priv[fd].ptr; 1629 1630 if (map) 1631 perf_mmap__put(map); 1632 } 1633 1634 static void *record__thread(void *arg) 1635 { 1636 enum thread_msg msg = THREAD_MSG__READY; 1637 bool terminate = false; 1638 struct fdarray *pollfd; 1639 int err, ctlfd_pos; 1640 1641 thread = arg; 1642 thread->tid = gettid(); 1643 1644 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1645 if (err == -1) 1646 pr_warning("threads[%d]: failed to notify on start: %s\n", 1647 thread->tid, strerror(errno)); 1648 1649 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1650 1651 pollfd = &thread->pollfd; 1652 ctlfd_pos = thread->ctlfd_pos; 1653 1654 for (;;) { 1655 unsigned long long hits = thread->samples; 1656 1657 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1658 break; 1659 1660 if (hits == thread->samples) { 1661 1662 err = fdarray__poll(pollfd, -1); 1663 /* 1664 * Propagate error, only if there's any. Ignore positive 1665 * number of returned events and interrupt error. 1666 */ 1667 if (err > 0 || (err < 0 && errno == EINTR)) 1668 err = 0; 1669 thread->waking++; 1670 1671 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1672 record__thread_munmap_filtered, NULL) == 0) 1673 break; 1674 } 1675 1676 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1677 terminate = true; 1678 close(thread->pipes.msg[0]); 1679 thread->pipes.msg[0] = -1; 1680 pollfd->entries[ctlfd_pos].fd = -1; 1681 pollfd->entries[ctlfd_pos].events = 0; 1682 } 1683 1684 pollfd->entries[ctlfd_pos].revents = 0; 1685 } 1686 record__mmap_read_all(thread->rec, true); 1687 1688 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1689 if (err == -1) 1690 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1691 thread->tid, strerror(errno)); 1692 1693 return NULL; 1694 } 1695 1696 static void record__init_features(struct record *rec) 1697 { 1698 struct perf_session *session = rec->session; 1699 int feat; 1700 1701 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1702 perf_header__set_feat(&session->header, feat); 1703 1704 if (rec->no_buildid) 1705 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1706 1707 if (!have_tracepoints(&rec->evlist->core.entries)) 1708 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1709 1710 if (!rec->opts.branch_stack) 1711 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1712 1713 if (!rec->opts.full_auxtrace) 1714 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1715 1716 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1717 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1718 1719 if (!rec->opts.use_clockid) 1720 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1721 1722 if (!record__threads_enabled(rec)) 1723 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1724 1725 if (!record__comp_enabled(rec)) 1726 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1727 1728 perf_header__clear_feat(&session->header, HEADER_STAT); 1729 } 1730 1731 static void 1732 record__finish_output(struct record *rec) 1733 { 1734 int i; 1735 struct perf_data *data = &rec->data; 1736 int fd = perf_data__fd(data); 1737 1738 if (data->is_pipe) 1739 return; 1740 1741 rec->session->header.data_size += rec->bytes_written; 1742 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1743 if (record__threads_enabled(rec)) { 1744 for (i = 0; i < data->dir.nr; i++) 1745 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1746 } 1747 1748 if (!rec->no_buildid) { 1749 process_buildids(rec); 1750 1751 if (rec->buildid_all) 1752 dsos__hit_all(rec->session); 1753 } 1754 perf_session__write_header(rec->session, rec->evlist, fd, true); 1755 1756 return; 1757 } 1758 1759 static int record__synthesize_workload(struct record *rec, bool tail) 1760 { 1761 int err; 1762 struct perf_thread_map *thread_map; 1763 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1764 1765 if (rec->opts.tail_synthesize != tail) 1766 return 0; 1767 1768 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1769 if (thread_map == NULL) 1770 return -1; 1771 1772 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1773 process_synthesized_event, 1774 &rec->session->machines.host, 1775 needs_mmap, 1776 rec->opts.sample_address); 1777 perf_thread_map__put(thread_map); 1778 return err; 1779 } 1780 1781 static int write_finished_init(struct record *rec, bool tail) 1782 { 1783 if (rec->opts.tail_synthesize != tail) 1784 return 0; 1785 1786 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); 1787 } 1788 1789 static int record__synthesize(struct record *rec, bool tail); 1790 1791 static int 1792 record__switch_output(struct record *rec, bool at_exit) 1793 { 1794 struct perf_data *data = &rec->data; 1795 int fd, err; 1796 char *new_filename; 1797 1798 /* Same Size: "2015122520103046"*/ 1799 char timestamp[] = "InvalidTimestamp"; 1800 1801 record__aio_mmap_read_sync(rec); 1802 1803 write_finished_init(rec, true); 1804 1805 record__synthesize(rec, true); 1806 if (target__none(&rec->opts.target)) 1807 record__synthesize_workload(rec, true); 1808 1809 rec->samples = 0; 1810 record__finish_output(rec); 1811 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1812 if (err) { 1813 pr_err("Failed to get current timestamp\n"); 1814 return -EINVAL; 1815 } 1816 1817 fd = perf_data__switch(data, timestamp, 1818 rec->session->header.data_offset, 1819 at_exit, &new_filename); 1820 if (fd >= 0 && !at_exit) { 1821 rec->bytes_written = 0; 1822 rec->session->header.data_size = 0; 1823 } 1824 1825 if (!quiet) 1826 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1827 data->path, timestamp); 1828 1829 if (rec->switch_output.num_files) { 1830 int n = rec->switch_output.cur_file + 1; 1831 1832 if (n >= rec->switch_output.num_files) 1833 n = 0; 1834 rec->switch_output.cur_file = n; 1835 if (rec->switch_output.filenames[n]) { 1836 remove(rec->switch_output.filenames[n]); 1837 zfree(&rec->switch_output.filenames[n]); 1838 } 1839 rec->switch_output.filenames[n] = new_filename; 1840 } else { 1841 free(new_filename); 1842 } 1843 1844 /* Output tracking events */ 1845 if (!at_exit) { 1846 record__synthesize(rec, false); 1847 1848 /* 1849 * In 'perf record --switch-output' without -a, 1850 * record__synthesize() in record__switch_output() won't 1851 * generate tracking events because there's no thread_map 1852 * in evlist. Which causes newly created perf.data doesn't 1853 * contain map and comm information. 1854 * Create a fake thread_map and directly call 1855 * perf_event__synthesize_thread_map() for those events. 1856 */ 1857 if (target__none(&rec->opts.target)) 1858 record__synthesize_workload(rec, false); 1859 write_finished_init(rec, false); 1860 } 1861 return fd; 1862 } 1863 1864 static volatile int workload_exec_errno; 1865 1866 /* 1867 * evlist__prepare_workload will send a SIGUSR1 1868 * if the fork fails, since we asked by setting its 1869 * want_signal to true. 1870 */ 1871 static void workload_exec_failed_signal(int signo __maybe_unused, 1872 siginfo_t *info, 1873 void *ucontext __maybe_unused) 1874 { 1875 workload_exec_errno = info->si_value.sival_int; 1876 done = 1; 1877 child_finished = 1; 1878 } 1879 1880 static void snapshot_sig_handler(int sig); 1881 static void alarm_sig_handler(int sig); 1882 1883 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1884 { 1885 if (evlist) { 1886 if (evlist->mmap && evlist->mmap[0].core.base) 1887 return evlist->mmap[0].core.base; 1888 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1889 return evlist->overwrite_mmap[0].core.base; 1890 } 1891 return NULL; 1892 } 1893 1894 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1895 { 1896 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1897 if (pc) 1898 return pc; 1899 return NULL; 1900 } 1901 1902 static int record__synthesize(struct record *rec, bool tail) 1903 { 1904 struct perf_session *session = rec->session; 1905 struct machine *machine = &session->machines.host; 1906 struct perf_data *data = &rec->data; 1907 struct record_opts *opts = &rec->opts; 1908 struct perf_tool *tool = &rec->tool; 1909 int err = 0; 1910 event_op f = process_synthesized_event; 1911 1912 if (rec->opts.tail_synthesize != tail) 1913 return 0; 1914 1915 if (data->is_pipe) { 1916 err = perf_event__synthesize_for_pipe(tool, session, data, 1917 process_synthesized_event); 1918 if (err < 0) 1919 goto out; 1920 1921 rec->bytes_written += err; 1922 } 1923 1924 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1925 process_synthesized_event, machine); 1926 if (err) 1927 goto out; 1928 1929 /* Synthesize id_index before auxtrace_info */ 1930 err = perf_event__synthesize_id_index(tool, 1931 process_synthesized_event, 1932 session->evlist, machine); 1933 if (err) 1934 goto out; 1935 1936 if (rec->opts.full_auxtrace) { 1937 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1938 session, process_synthesized_event); 1939 if (err) 1940 goto out; 1941 } 1942 1943 if (!evlist__exclude_kernel(rec->evlist)) { 1944 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1945 machine); 1946 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1947 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1948 "Check /proc/kallsyms permission or run as root.\n"); 1949 1950 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1951 machine); 1952 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1953 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1954 "Check /proc/modules permission or run as root.\n"); 1955 } 1956 1957 if (perf_guest) { 1958 machines__process_guests(&session->machines, 1959 perf_event__synthesize_guest_os, tool); 1960 } 1961 1962 err = perf_event__synthesize_extra_attr(&rec->tool, 1963 rec->evlist, 1964 process_synthesized_event, 1965 data->is_pipe); 1966 if (err) 1967 goto out; 1968 1969 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1970 process_synthesized_event, 1971 NULL); 1972 if (err < 0) { 1973 pr_err("Couldn't synthesize thread map.\n"); 1974 return err; 1975 } 1976 1977 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, 1978 process_synthesized_event, NULL); 1979 if (err < 0) { 1980 pr_err("Couldn't synthesize cpu map.\n"); 1981 return err; 1982 } 1983 1984 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1985 machine, opts); 1986 if (err < 0) { 1987 pr_warning("Couldn't synthesize bpf events.\n"); 1988 err = 0; 1989 } 1990 1991 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1992 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1993 machine); 1994 if (err < 0) { 1995 pr_warning("Couldn't synthesize cgroup events.\n"); 1996 err = 0; 1997 } 1998 } 1999 2000 if (rec->opts.nr_threads_synthesize > 1) { 2001 perf_set_multithreaded(); 2002 f = process_locked_synthesized_event; 2003 } 2004 2005 if (rec->opts.synth & PERF_SYNTH_TASK) { 2006 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 2007 2008 err = __machine__synthesize_threads(machine, tool, &opts->target, 2009 rec->evlist->core.threads, 2010 f, needs_mmap, opts->sample_address, 2011 rec->opts.nr_threads_synthesize); 2012 } 2013 2014 if (rec->opts.nr_threads_synthesize > 1) 2015 perf_set_singlethreaded(); 2016 2017 out: 2018 return err; 2019 } 2020 2021 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 2022 { 2023 struct record *rec = data; 2024 pthread_kill(rec->thread_id, SIGUSR2); 2025 return 0; 2026 } 2027 2028 static int record__setup_sb_evlist(struct record *rec) 2029 { 2030 struct record_opts *opts = &rec->opts; 2031 2032 if (rec->sb_evlist != NULL) { 2033 /* 2034 * We get here if --switch-output-event populated the 2035 * sb_evlist, so associate a callback that will send a SIGUSR2 2036 * to the main thread. 2037 */ 2038 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 2039 rec->thread_id = pthread_self(); 2040 } 2041 #ifdef HAVE_LIBBPF_SUPPORT 2042 if (!opts->no_bpf_event) { 2043 if (rec->sb_evlist == NULL) { 2044 rec->sb_evlist = evlist__new(); 2045 2046 if (rec->sb_evlist == NULL) { 2047 pr_err("Couldn't create side band evlist.\n."); 2048 return -1; 2049 } 2050 } 2051 2052 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 2053 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 2054 return -1; 2055 } 2056 } 2057 #endif 2058 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 2059 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 2060 opts->no_bpf_event = true; 2061 } 2062 2063 return 0; 2064 } 2065 2066 static int record__init_clock(struct record *rec) 2067 { 2068 struct perf_session *session = rec->session; 2069 struct timespec ref_clockid; 2070 struct timeval ref_tod; 2071 u64 ref; 2072 2073 if (!rec->opts.use_clockid) 2074 return 0; 2075 2076 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 2077 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 2078 2079 session->header.env.clock.clockid = rec->opts.clockid; 2080 2081 if (gettimeofday(&ref_tod, NULL) != 0) { 2082 pr_err("gettimeofday failed, cannot set reference time.\n"); 2083 return -1; 2084 } 2085 2086 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 2087 pr_err("clock_gettime failed, cannot set reference time.\n"); 2088 return -1; 2089 } 2090 2091 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 2092 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2093 2094 session->header.env.clock.tod_ns = ref; 2095 2096 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2097 (u64) ref_clockid.tv_nsec; 2098 2099 session->header.env.clock.clockid_ns = ref; 2100 return 0; 2101 } 2102 2103 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2104 { 2105 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2106 trigger_hit(&auxtrace_snapshot_trigger); 2107 auxtrace_record__snapshot_started = 1; 2108 if (auxtrace_record__snapshot_start(rec->itr)) 2109 trigger_error(&auxtrace_snapshot_trigger); 2110 } 2111 } 2112 2113 static void record__uniquify_name(struct record *rec) 2114 { 2115 struct evsel *pos; 2116 struct evlist *evlist = rec->evlist; 2117 char *new_name; 2118 int ret; 2119 2120 if (!perf_pmu__has_hybrid()) 2121 return; 2122 2123 evlist__for_each_entry(evlist, pos) { 2124 if (!evsel__is_hybrid(pos)) 2125 continue; 2126 2127 if (strchr(pos->name, '/')) 2128 continue; 2129 2130 ret = asprintf(&new_name, "%s/%s/", 2131 pos->pmu_name, pos->name); 2132 if (ret) { 2133 free(pos->name); 2134 pos->name = new_name; 2135 } 2136 } 2137 } 2138 2139 static int record__terminate_thread(struct record_thread *thread_data) 2140 { 2141 int err; 2142 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2143 pid_t tid = thread_data->tid; 2144 2145 close(thread_data->pipes.msg[1]); 2146 thread_data->pipes.msg[1] = -1; 2147 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2148 if (err > 0) 2149 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2150 else 2151 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2152 thread->tid, tid); 2153 2154 return 0; 2155 } 2156 2157 static int record__start_threads(struct record *rec) 2158 { 2159 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2160 struct record_thread *thread_data = rec->thread_data; 2161 sigset_t full, mask; 2162 pthread_t handle; 2163 pthread_attr_t attrs; 2164 2165 thread = &thread_data[0]; 2166 2167 if (!record__threads_enabled(rec)) 2168 return 0; 2169 2170 sigfillset(&full); 2171 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2172 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2173 return -1; 2174 } 2175 2176 pthread_attr_init(&attrs); 2177 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2178 2179 for (t = 1; t < nr_threads; t++) { 2180 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2181 2182 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2183 pthread_attr_setaffinity_np(&attrs, 2184 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2185 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2186 #endif 2187 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2188 for (tt = 1; tt < t; tt++) 2189 record__terminate_thread(&thread_data[t]); 2190 pr_err("Failed to start threads: %s\n", strerror(errno)); 2191 ret = -1; 2192 goto out_err; 2193 } 2194 2195 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2196 if (err > 0) 2197 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2198 thread_msg_tags[msg]); 2199 else 2200 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2201 thread->tid, rec->thread_data[t].tid); 2202 } 2203 2204 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2205 (cpu_set_t *)thread->mask->affinity.bits); 2206 2207 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2208 2209 out_err: 2210 pthread_attr_destroy(&attrs); 2211 2212 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2213 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2214 ret = -1; 2215 } 2216 2217 return ret; 2218 } 2219 2220 static int record__stop_threads(struct record *rec) 2221 { 2222 int t; 2223 struct record_thread *thread_data = rec->thread_data; 2224 2225 for (t = 1; t < rec->nr_threads; t++) 2226 record__terminate_thread(&thread_data[t]); 2227 2228 for (t = 0; t < rec->nr_threads; t++) { 2229 rec->samples += thread_data[t].samples; 2230 if (!record__threads_enabled(rec)) 2231 continue; 2232 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2233 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2234 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2235 thread_data[t].samples, thread_data[t].waking); 2236 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2237 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2238 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2239 else 2240 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2241 } 2242 2243 return 0; 2244 } 2245 2246 static unsigned long record__waking(struct record *rec) 2247 { 2248 int t; 2249 unsigned long waking = 0; 2250 struct record_thread *thread_data = rec->thread_data; 2251 2252 for (t = 0; t < rec->nr_threads; t++) 2253 waking += thread_data[t].waking; 2254 2255 return waking; 2256 } 2257 2258 static int __cmd_record(struct record *rec, int argc, const char **argv) 2259 { 2260 int err; 2261 int status = 0; 2262 const bool forks = argc > 0; 2263 struct perf_tool *tool = &rec->tool; 2264 struct record_opts *opts = &rec->opts; 2265 struct perf_data *data = &rec->data; 2266 struct perf_session *session; 2267 bool disabled = false, draining = false; 2268 int fd; 2269 float ratio = 0; 2270 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2271 2272 atexit(record__sig_exit); 2273 signal(SIGCHLD, sig_handler); 2274 signal(SIGINT, sig_handler); 2275 signal(SIGTERM, sig_handler); 2276 signal(SIGSEGV, sigsegv_handler); 2277 2278 if (rec->opts.record_namespaces) 2279 tool->namespace_events = true; 2280 2281 if (rec->opts.record_cgroup) { 2282 #ifdef HAVE_FILE_HANDLE 2283 tool->cgroup_events = true; 2284 #else 2285 pr_err("cgroup tracking is not supported\n"); 2286 return -1; 2287 #endif 2288 } 2289 2290 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2291 signal(SIGUSR2, snapshot_sig_handler); 2292 if (rec->opts.auxtrace_snapshot_mode) 2293 trigger_on(&auxtrace_snapshot_trigger); 2294 if (rec->switch_output.enabled) 2295 trigger_on(&switch_output_trigger); 2296 } else { 2297 signal(SIGUSR2, SIG_IGN); 2298 } 2299 2300 session = perf_session__new(data, tool); 2301 if (IS_ERR(session)) { 2302 pr_err("Perf session creation failed.\n"); 2303 return PTR_ERR(session); 2304 } 2305 2306 if (record__threads_enabled(rec)) { 2307 if (perf_data__is_pipe(&rec->data)) { 2308 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2309 return -1; 2310 } 2311 if (rec->opts.full_auxtrace) { 2312 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2313 return -1; 2314 } 2315 } 2316 2317 fd = perf_data__fd(data); 2318 rec->session = session; 2319 2320 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2321 pr_err("Compression initialization failed.\n"); 2322 return -1; 2323 } 2324 #ifdef HAVE_EVENTFD_SUPPORT 2325 done_fd = eventfd(0, EFD_NONBLOCK); 2326 if (done_fd < 0) { 2327 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2328 status = -1; 2329 goto out_delete_session; 2330 } 2331 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2332 if (err < 0) { 2333 pr_err("Failed to add wakeup eventfd to poll list\n"); 2334 status = err; 2335 goto out_delete_session; 2336 } 2337 #endif // HAVE_EVENTFD_SUPPORT 2338 2339 session->header.env.comp_type = PERF_COMP_ZSTD; 2340 session->header.env.comp_level = rec->opts.comp_level; 2341 2342 if (rec->opts.kcore && 2343 !record__kcore_readable(&session->machines.host)) { 2344 pr_err("ERROR: kcore is not readable.\n"); 2345 return -1; 2346 } 2347 2348 if (record__init_clock(rec)) 2349 return -1; 2350 2351 record__init_features(rec); 2352 2353 if (forks) { 2354 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2355 workload_exec_failed_signal); 2356 if (err < 0) { 2357 pr_err("Couldn't run the workload!\n"); 2358 status = err; 2359 goto out_delete_session; 2360 } 2361 } 2362 2363 /* 2364 * If we have just single event and are sending data 2365 * through pipe, we need to force the ids allocation, 2366 * because we synthesize event name through the pipe 2367 * and need the id for that. 2368 */ 2369 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2370 rec->opts.sample_id = true; 2371 2372 record__uniquify_name(rec); 2373 2374 if (record__open(rec) != 0) { 2375 err = -1; 2376 goto out_free_threads; 2377 } 2378 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2379 2380 if (rec->opts.kcore) { 2381 err = record__kcore_copy(&session->machines.host, data); 2382 if (err) { 2383 pr_err("ERROR: Failed to copy kcore\n"); 2384 goto out_free_threads; 2385 } 2386 } 2387 2388 err = bpf__apply_obj_config(); 2389 if (err) { 2390 char errbuf[BUFSIZ]; 2391 2392 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2393 pr_err("ERROR: Apply config to BPF failed: %s\n", 2394 errbuf); 2395 goto out_free_threads; 2396 } 2397 2398 /* 2399 * Normally perf_session__new would do this, but it doesn't have the 2400 * evlist. 2401 */ 2402 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2403 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2404 rec->tool.ordered_events = false; 2405 } 2406 2407 if (!rec->evlist->core.nr_groups) 2408 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2409 2410 if (data->is_pipe) { 2411 err = perf_header__write_pipe(fd); 2412 if (err < 0) 2413 goto out_free_threads; 2414 } else { 2415 err = perf_session__write_header(session, rec->evlist, fd, false); 2416 if (err < 0) 2417 goto out_free_threads; 2418 } 2419 2420 err = -1; 2421 if (!rec->no_buildid 2422 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2423 pr_err("Couldn't generate buildids. " 2424 "Use --no-buildid to profile anyway.\n"); 2425 goto out_free_threads; 2426 } 2427 2428 err = record__setup_sb_evlist(rec); 2429 if (err) 2430 goto out_free_threads; 2431 2432 err = record__synthesize(rec, false); 2433 if (err < 0) 2434 goto out_free_threads; 2435 2436 if (rec->realtime_prio) { 2437 struct sched_param param; 2438 2439 param.sched_priority = rec->realtime_prio; 2440 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2441 pr_err("Could not set realtime priority.\n"); 2442 err = -1; 2443 goto out_free_threads; 2444 } 2445 } 2446 2447 if (record__start_threads(rec)) 2448 goto out_free_threads; 2449 2450 /* 2451 * When perf is starting the traced process, all the events 2452 * (apart from group members) have enable_on_exec=1 set, 2453 * so don't spoil it by prematurely enabling them. 2454 */ 2455 if (!target__none(&opts->target) && !opts->initial_delay) 2456 evlist__enable(rec->evlist); 2457 2458 /* 2459 * Let the child rip 2460 */ 2461 if (forks) { 2462 struct machine *machine = &session->machines.host; 2463 union perf_event *event; 2464 pid_t tgid; 2465 2466 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2467 if (event == NULL) { 2468 err = -ENOMEM; 2469 goto out_child; 2470 } 2471 2472 /* 2473 * Some H/W events are generated before COMM event 2474 * which is emitted during exec(), so perf script 2475 * cannot see a correct process name for those events. 2476 * Synthesize COMM event to prevent it. 2477 */ 2478 tgid = perf_event__synthesize_comm(tool, event, 2479 rec->evlist->workload.pid, 2480 process_synthesized_event, 2481 machine); 2482 free(event); 2483 2484 if (tgid == -1) 2485 goto out_child; 2486 2487 event = malloc(sizeof(event->namespaces) + 2488 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2489 machine->id_hdr_size); 2490 if (event == NULL) { 2491 err = -ENOMEM; 2492 goto out_child; 2493 } 2494 2495 /* 2496 * Synthesize NAMESPACES event for the command specified. 2497 */ 2498 perf_event__synthesize_namespaces(tool, event, 2499 rec->evlist->workload.pid, 2500 tgid, process_synthesized_event, 2501 machine); 2502 free(event); 2503 2504 evlist__start_workload(rec->evlist); 2505 } 2506 2507 if (opts->initial_delay) { 2508 pr_info(EVLIST_DISABLED_MSG); 2509 if (opts->initial_delay > 0) { 2510 usleep(opts->initial_delay * USEC_PER_MSEC); 2511 evlist__enable(rec->evlist); 2512 pr_info(EVLIST_ENABLED_MSG); 2513 } 2514 } 2515 2516 trigger_ready(&auxtrace_snapshot_trigger); 2517 trigger_ready(&switch_output_trigger); 2518 perf_hooks__invoke_record_start(); 2519 2520 /* 2521 * Must write FINISHED_INIT so it will be seen after all other 2522 * synthesized user events, but before any regular events. 2523 */ 2524 err = write_finished_init(rec, false); 2525 if (err < 0) 2526 goto out_child; 2527 2528 for (;;) { 2529 unsigned long long hits = thread->samples; 2530 2531 /* 2532 * rec->evlist->bkw_mmap_state is possible to be 2533 * BKW_MMAP_EMPTY here: when done == true and 2534 * hits != rec->samples in previous round. 2535 * 2536 * evlist__toggle_bkw_mmap ensure we never 2537 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2538 */ 2539 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2540 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2541 2542 if (record__mmap_read_all(rec, false) < 0) { 2543 trigger_error(&auxtrace_snapshot_trigger); 2544 trigger_error(&switch_output_trigger); 2545 err = -1; 2546 goto out_child; 2547 } 2548 2549 if (auxtrace_record__snapshot_started) { 2550 auxtrace_record__snapshot_started = 0; 2551 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2552 record__read_auxtrace_snapshot(rec, false); 2553 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2554 pr_err("AUX area tracing snapshot failed\n"); 2555 err = -1; 2556 goto out_child; 2557 } 2558 } 2559 2560 if (trigger_is_hit(&switch_output_trigger)) { 2561 /* 2562 * If switch_output_trigger is hit, the data in 2563 * overwritable ring buffer should have been collected, 2564 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2565 * 2566 * If SIGUSR2 raise after or during record__mmap_read_all(), 2567 * record__mmap_read_all() didn't collect data from 2568 * overwritable ring buffer. Read again. 2569 */ 2570 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2571 continue; 2572 trigger_ready(&switch_output_trigger); 2573 2574 /* 2575 * Reenable events in overwrite ring buffer after 2576 * record__mmap_read_all(): we should have collected 2577 * data from it. 2578 */ 2579 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2580 2581 if (!quiet) 2582 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2583 record__waking(rec)); 2584 thread->waking = 0; 2585 fd = record__switch_output(rec, false); 2586 if (fd < 0) { 2587 pr_err("Failed to switch to new file\n"); 2588 trigger_error(&switch_output_trigger); 2589 err = fd; 2590 goto out_child; 2591 } 2592 2593 /* re-arm the alarm */ 2594 if (rec->switch_output.time) 2595 alarm(rec->switch_output.time); 2596 } 2597 2598 if (hits == thread->samples) { 2599 if (done || draining) 2600 break; 2601 err = fdarray__poll(&thread->pollfd, -1); 2602 /* 2603 * Propagate error, only if there's any. Ignore positive 2604 * number of returned events and interrupt error. 2605 */ 2606 if (err > 0 || (err < 0 && errno == EINTR)) 2607 err = 0; 2608 thread->waking++; 2609 2610 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2611 record__thread_munmap_filtered, NULL) == 0) 2612 draining = true; 2613 2614 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread); 2615 if (err) 2616 goto out_child; 2617 evlist__ctlfd_update(rec->evlist, 2618 &thread->pollfd.entries[thread->ctlfd_pos]); 2619 } 2620 2621 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2622 switch (cmd) { 2623 case EVLIST_CTL_CMD_SNAPSHOT: 2624 hit_auxtrace_snapshot_trigger(rec); 2625 evlist__ctlfd_ack(rec->evlist); 2626 break; 2627 case EVLIST_CTL_CMD_STOP: 2628 done = 1; 2629 break; 2630 case EVLIST_CTL_CMD_ACK: 2631 case EVLIST_CTL_CMD_UNSUPPORTED: 2632 case EVLIST_CTL_CMD_ENABLE: 2633 case EVLIST_CTL_CMD_DISABLE: 2634 case EVLIST_CTL_CMD_EVLIST: 2635 case EVLIST_CTL_CMD_PING: 2636 default: 2637 break; 2638 } 2639 } 2640 2641 /* 2642 * When perf is starting the traced process, at the end events 2643 * die with the process and we wait for that. Thus no need to 2644 * disable events in this case. 2645 */ 2646 if (done && !disabled && !target__none(&opts->target)) { 2647 trigger_off(&auxtrace_snapshot_trigger); 2648 evlist__disable(rec->evlist); 2649 disabled = true; 2650 } 2651 } 2652 2653 trigger_off(&auxtrace_snapshot_trigger); 2654 trigger_off(&switch_output_trigger); 2655 2656 if (opts->auxtrace_snapshot_on_exit) 2657 record__auxtrace_snapshot_exit(rec); 2658 2659 if (forks && workload_exec_errno) { 2660 char msg[STRERR_BUFSIZE], strevsels[2048]; 2661 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2662 2663 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2664 2665 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2666 strevsels, argv[0], emsg); 2667 err = -1; 2668 goto out_child; 2669 } 2670 2671 if (!quiet) 2672 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2673 record__waking(rec)); 2674 2675 write_finished_init(rec, true); 2676 2677 if (target__none(&rec->opts.target)) 2678 record__synthesize_workload(rec, true); 2679 2680 out_child: 2681 record__stop_threads(rec); 2682 record__mmap_read_all(rec, true); 2683 out_free_threads: 2684 record__free_thread_data(rec); 2685 evlist__finalize_ctlfd(rec->evlist); 2686 record__aio_mmap_read_sync(rec); 2687 2688 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2689 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2690 session->header.env.comp_ratio = ratio + 0.5; 2691 } 2692 2693 if (forks) { 2694 int exit_status; 2695 2696 if (!child_finished) 2697 kill(rec->evlist->workload.pid, SIGTERM); 2698 2699 wait(&exit_status); 2700 2701 if (err < 0) 2702 status = err; 2703 else if (WIFEXITED(exit_status)) 2704 status = WEXITSTATUS(exit_status); 2705 else if (WIFSIGNALED(exit_status)) 2706 signr = WTERMSIG(exit_status); 2707 } else 2708 status = err; 2709 2710 if (rec->off_cpu) 2711 rec->bytes_written += off_cpu_write(rec->session); 2712 2713 record__synthesize(rec, true); 2714 /* this will be recalculated during process_buildids() */ 2715 rec->samples = 0; 2716 2717 if (!err) { 2718 if (!rec->timestamp_filename) { 2719 record__finish_output(rec); 2720 } else { 2721 fd = record__switch_output(rec, true); 2722 if (fd < 0) { 2723 status = fd; 2724 goto out_delete_session; 2725 } 2726 } 2727 } 2728 2729 perf_hooks__invoke_record_end(); 2730 2731 if (!err && !quiet) { 2732 char samples[128]; 2733 const char *postfix = rec->timestamp_filename ? 2734 ".<timestamp>" : ""; 2735 2736 if (rec->samples && !rec->opts.full_auxtrace) 2737 scnprintf(samples, sizeof(samples), 2738 " (%" PRIu64 " samples)", rec->samples); 2739 else 2740 samples[0] = '\0'; 2741 2742 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2743 perf_data__size(data) / 1024.0 / 1024.0, 2744 data->path, postfix, samples); 2745 if (ratio) { 2746 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2747 rec->session->bytes_transferred / 1024.0 / 1024.0, 2748 ratio); 2749 } 2750 fprintf(stderr, " ]\n"); 2751 } 2752 2753 out_delete_session: 2754 #ifdef HAVE_EVENTFD_SUPPORT 2755 if (done_fd >= 0) 2756 close(done_fd); 2757 #endif 2758 zstd_fini(&session->zstd_data); 2759 perf_session__delete(session); 2760 2761 if (!opts->no_bpf_event) 2762 evlist__stop_sb_thread(rec->sb_evlist); 2763 return status; 2764 } 2765 2766 static void callchain_debug(struct callchain_param *callchain) 2767 { 2768 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2769 2770 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2771 2772 if (callchain->record_mode == CALLCHAIN_DWARF) 2773 pr_debug("callchain: stack dump size %d\n", 2774 callchain->dump_size); 2775 } 2776 2777 int record_opts__parse_callchain(struct record_opts *record, 2778 struct callchain_param *callchain, 2779 const char *arg, bool unset) 2780 { 2781 int ret; 2782 callchain->enabled = !unset; 2783 2784 /* --no-call-graph */ 2785 if (unset) { 2786 callchain->record_mode = CALLCHAIN_NONE; 2787 pr_debug("callchain: disabled\n"); 2788 return 0; 2789 } 2790 2791 ret = parse_callchain_record_opt(arg, callchain); 2792 if (!ret) { 2793 /* Enable data address sampling for DWARF unwind. */ 2794 if (callchain->record_mode == CALLCHAIN_DWARF) 2795 record->sample_address = true; 2796 callchain_debug(callchain); 2797 } 2798 2799 return ret; 2800 } 2801 2802 int record_parse_callchain_opt(const struct option *opt, 2803 const char *arg, 2804 int unset) 2805 { 2806 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2807 } 2808 2809 int record_callchain_opt(const struct option *opt, 2810 const char *arg __maybe_unused, 2811 int unset __maybe_unused) 2812 { 2813 struct callchain_param *callchain = opt->value; 2814 2815 callchain->enabled = true; 2816 2817 if (callchain->record_mode == CALLCHAIN_NONE) 2818 callchain->record_mode = CALLCHAIN_FP; 2819 2820 callchain_debug(callchain); 2821 return 0; 2822 } 2823 2824 static int perf_record_config(const char *var, const char *value, void *cb) 2825 { 2826 struct record *rec = cb; 2827 2828 if (!strcmp(var, "record.build-id")) { 2829 if (!strcmp(value, "cache")) 2830 rec->no_buildid_cache = false; 2831 else if (!strcmp(value, "no-cache")) 2832 rec->no_buildid_cache = true; 2833 else if (!strcmp(value, "skip")) 2834 rec->no_buildid = true; 2835 else if (!strcmp(value, "mmap")) 2836 rec->buildid_mmap = true; 2837 else 2838 return -1; 2839 return 0; 2840 } 2841 if (!strcmp(var, "record.call-graph")) { 2842 var = "call-graph.record-mode"; 2843 return perf_default_config(var, value, cb); 2844 } 2845 #ifdef HAVE_AIO_SUPPORT 2846 if (!strcmp(var, "record.aio")) { 2847 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2848 if (!rec->opts.nr_cblocks) 2849 rec->opts.nr_cblocks = nr_cblocks_default; 2850 } 2851 #endif 2852 if (!strcmp(var, "record.debuginfod")) { 2853 rec->debuginfod.urls = strdup(value); 2854 if (!rec->debuginfod.urls) 2855 return -ENOMEM; 2856 rec->debuginfod.set = true; 2857 } 2858 2859 return 0; 2860 } 2861 2862 2863 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2864 { 2865 struct record_opts *opts = (struct record_opts *)opt->value; 2866 2867 if (unset || !str) 2868 return 0; 2869 2870 if (!strcasecmp(str, "node")) 2871 opts->affinity = PERF_AFFINITY_NODE; 2872 else if (!strcasecmp(str, "cpu")) 2873 opts->affinity = PERF_AFFINITY_CPU; 2874 2875 return 0; 2876 } 2877 2878 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2879 { 2880 mask->nbits = nr_bits; 2881 mask->bits = bitmap_zalloc(mask->nbits); 2882 if (!mask->bits) 2883 return -ENOMEM; 2884 2885 return 0; 2886 } 2887 2888 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2889 { 2890 bitmap_free(mask->bits); 2891 mask->nbits = 0; 2892 } 2893 2894 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2895 { 2896 int ret; 2897 2898 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2899 if (ret) { 2900 mask->affinity.bits = NULL; 2901 return ret; 2902 } 2903 2904 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2905 if (ret) { 2906 record__mmap_cpu_mask_free(&mask->maps); 2907 mask->maps.bits = NULL; 2908 } 2909 2910 return ret; 2911 } 2912 2913 static void record__thread_mask_free(struct thread_mask *mask) 2914 { 2915 record__mmap_cpu_mask_free(&mask->maps); 2916 record__mmap_cpu_mask_free(&mask->affinity); 2917 } 2918 2919 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2920 { 2921 int s; 2922 struct record_opts *opts = opt->value; 2923 2924 if (unset || !str || !strlen(str)) { 2925 opts->threads_spec = THREAD_SPEC__CPU; 2926 } else { 2927 for (s = 1; s < THREAD_SPEC__MAX; s++) { 2928 if (s == THREAD_SPEC__USER) { 2929 opts->threads_user_spec = strdup(str); 2930 if (!opts->threads_user_spec) 2931 return -ENOMEM; 2932 opts->threads_spec = THREAD_SPEC__USER; 2933 break; 2934 } 2935 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 2936 opts->threads_spec = s; 2937 break; 2938 } 2939 } 2940 } 2941 2942 if (opts->threads_spec == THREAD_SPEC__USER) 2943 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 2944 else 2945 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 2946 2947 return 0; 2948 } 2949 2950 static int parse_output_max_size(const struct option *opt, 2951 const char *str, int unset) 2952 { 2953 unsigned long *s = (unsigned long *)opt->value; 2954 static struct parse_tag tags_size[] = { 2955 { .tag = 'B', .mult = 1 }, 2956 { .tag = 'K', .mult = 1 << 10 }, 2957 { .tag = 'M', .mult = 1 << 20 }, 2958 { .tag = 'G', .mult = 1 << 30 }, 2959 { .tag = 0 }, 2960 }; 2961 unsigned long val; 2962 2963 if (unset) { 2964 *s = 0; 2965 return 0; 2966 } 2967 2968 val = parse_tag_value(str, tags_size); 2969 if (val != (unsigned long) -1) { 2970 *s = val; 2971 return 0; 2972 } 2973 2974 return -1; 2975 } 2976 2977 static int record__parse_mmap_pages(const struct option *opt, 2978 const char *str, 2979 int unset __maybe_unused) 2980 { 2981 struct record_opts *opts = opt->value; 2982 char *s, *p; 2983 unsigned int mmap_pages; 2984 int ret; 2985 2986 if (!str) 2987 return -EINVAL; 2988 2989 s = strdup(str); 2990 if (!s) 2991 return -ENOMEM; 2992 2993 p = strchr(s, ','); 2994 if (p) 2995 *p = '\0'; 2996 2997 if (*s) { 2998 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 2999 if (ret) 3000 goto out_free; 3001 opts->mmap_pages = mmap_pages; 3002 } 3003 3004 if (!p) { 3005 ret = 0; 3006 goto out_free; 3007 } 3008 3009 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 3010 if (ret) 3011 goto out_free; 3012 3013 opts->auxtrace_mmap_pages = mmap_pages; 3014 3015 out_free: 3016 free(s); 3017 return ret; 3018 } 3019 3020 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 3021 { 3022 } 3023 3024 static int parse_control_option(const struct option *opt, 3025 const char *str, 3026 int unset __maybe_unused) 3027 { 3028 struct record_opts *opts = opt->value; 3029 3030 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 3031 } 3032 3033 static void switch_output_size_warn(struct record *rec) 3034 { 3035 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 3036 struct switch_output *s = &rec->switch_output; 3037 3038 wakeup_size /= 2; 3039 3040 if (s->size < wakeup_size) { 3041 char buf[100]; 3042 3043 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 3044 pr_warning("WARNING: switch-output data size lower than " 3045 "wakeup kernel buffer size (%s) " 3046 "expect bigger perf.data sizes\n", buf); 3047 } 3048 } 3049 3050 static int switch_output_setup(struct record *rec) 3051 { 3052 struct switch_output *s = &rec->switch_output; 3053 static struct parse_tag tags_size[] = { 3054 { .tag = 'B', .mult = 1 }, 3055 { .tag = 'K', .mult = 1 << 10 }, 3056 { .tag = 'M', .mult = 1 << 20 }, 3057 { .tag = 'G', .mult = 1 << 30 }, 3058 { .tag = 0 }, 3059 }; 3060 static struct parse_tag tags_time[] = { 3061 { .tag = 's', .mult = 1 }, 3062 { .tag = 'm', .mult = 60 }, 3063 { .tag = 'h', .mult = 60*60 }, 3064 { .tag = 'd', .mult = 60*60*24 }, 3065 { .tag = 0 }, 3066 }; 3067 unsigned long val; 3068 3069 /* 3070 * If we're using --switch-output-events, then we imply its 3071 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 3072 * thread to its parent. 3073 */ 3074 if (rec->switch_output_event_set) { 3075 if (record__threads_enabled(rec)) { 3076 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 3077 return 0; 3078 } 3079 goto do_signal; 3080 } 3081 3082 if (!s->set) 3083 return 0; 3084 3085 if (record__threads_enabled(rec)) { 3086 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 3087 return 0; 3088 } 3089 3090 if (!strcmp(s->str, "signal")) { 3091 do_signal: 3092 s->signal = true; 3093 pr_debug("switch-output with SIGUSR2 signal\n"); 3094 goto enabled; 3095 } 3096 3097 val = parse_tag_value(s->str, tags_size); 3098 if (val != (unsigned long) -1) { 3099 s->size = val; 3100 pr_debug("switch-output with %s size threshold\n", s->str); 3101 goto enabled; 3102 } 3103 3104 val = parse_tag_value(s->str, tags_time); 3105 if (val != (unsigned long) -1) { 3106 s->time = val; 3107 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 3108 s->str, s->time); 3109 goto enabled; 3110 } 3111 3112 return -1; 3113 3114 enabled: 3115 rec->timestamp_filename = true; 3116 s->enabled = true; 3117 3118 if (s->size && !rec->opts.no_buffering) 3119 switch_output_size_warn(rec); 3120 3121 return 0; 3122 } 3123 3124 static const char * const __record_usage[] = { 3125 "perf record [<options>] [<command>]", 3126 "perf record [<options>] -- <command> [<options>]", 3127 NULL 3128 }; 3129 const char * const *record_usage = __record_usage; 3130 3131 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3132 struct perf_sample *sample, struct machine *machine) 3133 { 3134 /* 3135 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3136 * no need to add them twice. 3137 */ 3138 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3139 return 0; 3140 return perf_event__process_mmap(tool, event, sample, machine); 3141 } 3142 3143 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3144 struct perf_sample *sample, struct machine *machine) 3145 { 3146 /* 3147 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3148 * no need to add them twice. 3149 */ 3150 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3151 return 0; 3152 3153 return perf_event__process_mmap2(tool, event, sample, machine); 3154 } 3155 3156 static int process_timestamp_boundary(struct perf_tool *tool, 3157 union perf_event *event __maybe_unused, 3158 struct perf_sample *sample, 3159 struct machine *machine __maybe_unused) 3160 { 3161 struct record *rec = container_of(tool, struct record, tool); 3162 3163 set_timestamp_boundary(rec, sample->time); 3164 return 0; 3165 } 3166 3167 static int parse_record_synth_option(const struct option *opt, 3168 const char *str, 3169 int unset __maybe_unused) 3170 { 3171 struct record_opts *opts = opt->value; 3172 char *p = strdup(str); 3173 3174 if (p == NULL) 3175 return -1; 3176 3177 opts->synth = parse_synth_opt(p); 3178 free(p); 3179 3180 if (opts->synth < 0) { 3181 pr_err("Invalid synth option: %s\n", str); 3182 return -1; 3183 } 3184 return 0; 3185 } 3186 3187 /* 3188 * XXX Ideally would be local to cmd_record() and passed to a record__new 3189 * because we need to have access to it in record__exit, that is called 3190 * after cmd_record() exits, but since record_options need to be accessible to 3191 * builtin-script, leave it here. 3192 * 3193 * At least we don't ouch it in all the other functions here directly. 3194 * 3195 * Just say no to tons of global variables, sigh. 3196 */ 3197 static struct record record = { 3198 .opts = { 3199 .sample_time = true, 3200 .mmap_pages = UINT_MAX, 3201 .user_freq = UINT_MAX, 3202 .user_interval = ULLONG_MAX, 3203 .freq = 4000, 3204 .target = { 3205 .uses_mmap = true, 3206 .default_per_cpu = true, 3207 }, 3208 .mmap_flush = MMAP_FLUSH_DEFAULT, 3209 .nr_threads_synthesize = 1, 3210 .ctl_fd = -1, 3211 .ctl_fd_ack = -1, 3212 .synth = PERF_SYNTH_ALL, 3213 }, 3214 .tool = { 3215 .sample = process_sample_event, 3216 .fork = perf_event__process_fork, 3217 .exit = perf_event__process_exit, 3218 .comm = perf_event__process_comm, 3219 .namespaces = perf_event__process_namespaces, 3220 .mmap = build_id__process_mmap, 3221 .mmap2 = build_id__process_mmap2, 3222 .itrace_start = process_timestamp_boundary, 3223 .aux = process_timestamp_boundary, 3224 .ordered_events = true, 3225 }, 3226 }; 3227 3228 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3229 "\n\t\t\t\tDefault: fp"; 3230 3231 static bool dry_run; 3232 3233 /* 3234 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3235 * with it and switch to use the library functions in perf_evlist that came 3236 * from builtin-record.c, i.e. use record_opts, 3237 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3238 * using pipes, etc. 3239 */ 3240 static struct option __record_options[] = { 3241 OPT_CALLBACK('e', "event", &record.evlist, "event", 3242 "event selector. use 'perf list' to list available events", 3243 parse_events_option), 3244 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3245 "event filter", parse_filter), 3246 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3247 NULL, "don't record events from perf itself", 3248 exclude_perf), 3249 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3250 "record events on existing process id"), 3251 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3252 "record events on existing thread id"), 3253 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3254 "collect data with this RT SCHED_FIFO priority"), 3255 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3256 "collect data without buffering"), 3257 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3258 "collect raw sample records from all opened counters"), 3259 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3260 "system-wide collection from all CPUs"), 3261 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3262 "list of cpus to monitor"), 3263 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3264 OPT_STRING('o', "output", &record.data.path, "file", 3265 "output file name"), 3266 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3267 &record.opts.no_inherit_set, 3268 "child tasks do not inherit counters"), 3269 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3270 "synthesize non-sample events at the end of output"), 3271 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3272 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3273 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3274 "Fail if the specified frequency can't be used"), 3275 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3276 "profile at this frequency", 3277 record__parse_freq), 3278 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3279 "number of mmap data pages and AUX area tracing mmap pages", 3280 record__parse_mmap_pages), 3281 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3282 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3283 record__mmap_flush_parse), 3284 OPT_BOOLEAN(0, "group", &record.opts.group, 3285 "put the counters into a counter group"), 3286 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3287 NULL, "enables call-graph recording" , 3288 &record_callchain_opt), 3289 OPT_CALLBACK(0, "call-graph", &record.opts, 3290 "record_mode[,record_size]", record_callchain_help, 3291 &record_parse_callchain_opt), 3292 OPT_INCR('v', "verbose", &verbose, 3293 "be more verbose (show counter open errors, etc)"), 3294 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 3295 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3296 "per thread counts"), 3297 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3298 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3299 "Record the sample physical addresses"), 3300 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3301 "Record the sampled data address data page size"), 3302 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3303 "Record the sampled code address (ip) page size"), 3304 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3305 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, 3306 "Record the sample identifier"), 3307 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3308 &record.opts.sample_time_set, 3309 "Record the sample timestamps"), 3310 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3311 "Record the sample period"), 3312 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3313 "don't sample"), 3314 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3315 &record.no_buildid_cache_set, 3316 "do not update the buildid cache"), 3317 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3318 &record.no_buildid_set, 3319 "do not collect buildids in perf.data"), 3320 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3321 "monitor event in cgroup name only", 3322 parse_cgroups), 3323 OPT_INTEGER('D', "delay", &record.opts.initial_delay, 3324 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 3325 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3326 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3327 "user to profile"), 3328 3329 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3330 "branch any", "sample any taken branches", 3331 parse_branch_stack), 3332 3333 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3334 "branch filter mask", "branch stack filter modes", 3335 parse_branch_stack), 3336 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3337 "sample by weight (on special events only)"), 3338 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3339 "sample transaction flags (special events only)"), 3340 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3341 "use per-thread mmaps"), 3342 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3343 "sample selected machine registers on interrupt," 3344 " use '-I?' to list register names", parse_intr_regs), 3345 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3346 "sample selected machine registers on interrupt," 3347 " use '--user-regs=?' to list register names", parse_user_regs), 3348 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3349 "Record running/enabled time of read (:S) events"), 3350 OPT_CALLBACK('k', "clockid", &record.opts, 3351 "clockid", "clockid to use for events, see clock_gettime()", 3352 parse_clockid), 3353 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3354 "opts", "AUX area tracing Snapshot Mode", ""), 3355 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3356 "opts", "sample AUX area", ""), 3357 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3358 "per thread proc mmap processing timeout in ms"), 3359 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3360 "Record namespaces events"), 3361 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3362 "Record cgroup events"), 3363 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3364 &record.opts.record_switch_events_set, 3365 "Record context switch events"), 3366 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3367 "Configure all used events to run in kernel space.", 3368 PARSE_OPT_EXCLUSIVE), 3369 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3370 "Configure all used events to run in user space.", 3371 PARSE_OPT_EXCLUSIVE), 3372 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3373 "collect kernel callchains"), 3374 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3375 "collect user callchains"), 3376 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3377 "clang binary to use for compiling BPF scriptlets"), 3378 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3379 "options passed to clang when compiling BPF scriptlets"), 3380 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3381 "file", "vmlinux pathname"), 3382 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3383 "Record build-id of all DSOs regardless of hits"), 3384 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3385 "Record build-id in map events"), 3386 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3387 "append timestamp to output filename"), 3388 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3389 "Record timestamp boundary (time of first/last samples)"), 3390 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3391 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3392 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3393 "signal"), 3394 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3395 "switch output event selector. use 'perf list' to list available events", 3396 parse_events_option_new_evlist), 3397 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3398 "Limit number of switch output generated files"), 3399 OPT_BOOLEAN(0, "dry-run", &dry_run, 3400 "Parse options then exit"), 3401 #ifdef HAVE_AIO_SUPPORT 3402 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3403 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3404 record__aio_parse), 3405 #endif 3406 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3407 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3408 record__parse_affinity), 3409 #ifdef HAVE_ZSTD_SUPPORT 3410 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3411 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3412 record__parse_comp_level), 3413 #endif 3414 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3415 "size", "Limit the maximum size of the output file", parse_output_max_size), 3416 OPT_UINTEGER(0, "num-thread-synthesize", 3417 &record.opts.nr_threads_synthesize, 3418 "number of threads to run for event synthesis"), 3419 #ifdef HAVE_LIBPFM 3420 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3421 "libpfm4 event selector. use 'perf list' to list available events", 3422 parse_libpfm_events_option), 3423 #endif 3424 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3425 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3426 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3427 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3428 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3429 parse_control_option), 3430 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3431 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3432 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3433 &record.debuginfod.set, "debuginfod urls", 3434 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3435 "system"), 3436 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3437 "write collected trace data into several data files using parallel threads", 3438 record__parse_threads), 3439 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), 3440 OPT_END() 3441 }; 3442 3443 struct option *record_options = __record_options; 3444 3445 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3446 { 3447 struct perf_cpu cpu; 3448 int idx; 3449 3450 if (cpu_map__is_dummy(cpus)) 3451 return 0; 3452 3453 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 3454 if (cpu.cpu == -1) 3455 continue; 3456 /* Return ENODEV is input cpu is greater than max cpu */ 3457 if ((unsigned long)cpu.cpu > mask->nbits) 3458 return -ENODEV; 3459 set_bit(cpu.cpu, mask->bits); 3460 } 3461 3462 return 0; 3463 } 3464 3465 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3466 { 3467 struct perf_cpu_map *cpus; 3468 3469 cpus = perf_cpu_map__new(mask_spec); 3470 if (!cpus) 3471 return -ENOMEM; 3472 3473 bitmap_zero(mask->bits, mask->nbits); 3474 if (record__mmap_cpu_mask_init(mask, cpus)) 3475 return -ENODEV; 3476 3477 perf_cpu_map__put(cpus); 3478 3479 return 0; 3480 } 3481 3482 static void record__free_thread_masks(struct record *rec, int nr_threads) 3483 { 3484 int t; 3485 3486 if (rec->thread_masks) 3487 for (t = 0; t < nr_threads; t++) 3488 record__thread_mask_free(&rec->thread_masks[t]); 3489 3490 zfree(&rec->thread_masks); 3491 } 3492 3493 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3494 { 3495 int t, ret; 3496 3497 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3498 if (!rec->thread_masks) { 3499 pr_err("Failed to allocate thread masks\n"); 3500 return -ENOMEM; 3501 } 3502 3503 for (t = 0; t < nr_threads; t++) { 3504 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3505 if (ret) { 3506 pr_err("Failed to allocate thread masks[%d]\n", t); 3507 goto out_free; 3508 } 3509 } 3510 3511 return 0; 3512 3513 out_free: 3514 record__free_thread_masks(rec, nr_threads); 3515 3516 return ret; 3517 } 3518 3519 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3520 { 3521 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3522 3523 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3524 if (ret) 3525 return ret; 3526 3527 rec->nr_threads = nr_cpus; 3528 pr_debug("nr_threads: %d\n", rec->nr_threads); 3529 3530 for (t = 0; t < rec->nr_threads; t++) { 3531 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3532 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3533 if (verbose) { 3534 pr_debug("thread_masks[%d]: ", t); 3535 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3536 pr_debug("thread_masks[%d]: ", t); 3537 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3538 } 3539 } 3540 3541 return 0; 3542 } 3543 3544 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3545 const char **maps_spec, const char **affinity_spec, 3546 u32 nr_spec) 3547 { 3548 u32 s; 3549 int ret = 0, t = 0; 3550 struct mmap_cpu_mask cpus_mask; 3551 struct thread_mask thread_mask, full_mask, *thread_masks; 3552 3553 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3554 if (ret) { 3555 pr_err("Failed to allocate CPUs mask\n"); 3556 return ret; 3557 } 3558 3559 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus); 3560 if (ret) { 3561 pr_err("Failed to init cpu mask\n"); 3562 goto out_free_cpu_mask; 3563 } 3564 3565 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3566 if (ret) { 3567 pr_err("Failed to allocate full mask\n"); 3568 goto out_free_cpu_mask; 3569 } 3570 3571 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3572 if (ret) { 3573 pr_err("Failed to allocate thread mask\n"); 3574 goto out_free_full_and_cpu_masks; 3575 } 3576 3577 for (s = 0; s < nr_spec; s++) { 3578 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3579 if (ret) { 3580 pr_err("Failed to initialize maps thread mask\n"); 3581 goto out_free; 3582 } 3583 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3584 if (ret) { 3585 pr_err("Failed to initialize affinity thread mask\n"); 3586 goto out_free; 3587 } 3588 3589 /* ignore invalid CPUs but do not allow empty masks */ 3590 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3591 cpus_mask.bits, thread_mask.maps.nbits)) { 3592 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3593 ret = -EINVAL; 3594 goto out_free; 3595 } 3596 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3597 cpus_mask.bits, thread_mask.affinity.nbits)) { 3598 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3599 ret = -EINVAL; 3600 goto out_free; 3601 } 3602 3603 /* do not allow intersection with other masks (full_mask) */ 3604 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3605 thread_mask.maps.nbits)) { 3606 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3607 ret = -EINVAL; 3608 goto out_free; 3609 } 3610 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3611 thread_mask.affinity.nbits)) { 3612 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3613 ret = -EINVAL; 3614 goto out_free; 3615 } 3616 3617 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3618 thread_mask.maps.bits, full_mask.maps.nbits); 3619 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3620 thread_mask.affinity.bits, full_mask.maps.nbits); 3621 3622 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3623 if (!thread_masks) { 3624 pr_err("Failed to reallocate thread masks\n"); 3625 ret = -ENOMEM; 3626 goto out_free; 3627 } 3628 rec->thread_masks = thread_masks; 3629 rec->thread_masks[t] = thread_mask; 3630 if (verbose) { 3631 pr_debug("thread_masks[%d]: ", t); 3632 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3633 pr_debug("thread_masks[%d]: ", t); 3634 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3635 } 3636 t++; 3637 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3638 if (ret) { 3639 pr_err("Failed to allocate thread mask\n"); 3640 goto out_free_full_and_cpu_masks; 3641 } 3642 } 3643 rec->nr_threads = t; 3644 pr_debug("nr_threads: %d\n", rec->nr_threads); 3645 if (!rec->nr_threads) 3646 ret = -EINVAL; 3647 3648 out_free: 3649 record__thread_mask_free(&thread_mask); 3650 out_free_full_and_cpu_masks: 3651 record__thread_mask_free(&full_mask); 3652 out_free_cpu_mask: 3653 record__mmap_cpu_mask_free(&cpus_mask); 3654 3655 return ret; 3656 } 3657 3658 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3659 { 3660 int ret; 3661 struct cpu_topology *topo; 3662 3663 topo = cpu_topology__new(); 3664 if (!topo) { 3665 pr_err("Failed to allocate CPU topology\n"); 3666 return -ENOMEM; 3667 } 3668 3669 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3670 topo->core_cpus_list, topo->core_cpus_lists); 3671 cpu_topology__delete(topo); 3672 3673 return ret; 3674 } 3675 3676 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3677 { 3678 int ret; 3679 struct cpu_topology *topo; 3680 3681 topo = cpu_topology__new(); 3682 if (!topo) { 3683 pr_err("Failed to allocate CPU topology\n"); 3684 return -ENOMEM; 3685 } 3686 3687 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3688 topo->package_cpus_list, topo->package_cpus_lists); 3689 cpu_topology__delete(topo); 3690 3691 return ret; 3692 } 3693 3694 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3695 { 3696 u32 s; 3697 int ret; 3698 const char **spec; 3699 struct numa_topology *topo; 3700 3701 topo = numa_topology__new(); 3702 if (!topo) { 3703 pr_err("Failed to allocate NUMA topology\n"); 3704 return -ENOMEM; 3705 } 3706 3707 spec = zalloc(topo->nr * sizeof(char *)); 3708 if (!spec) { 3709 pr_err("Failed to allocate NUMA spec\n"); 3710 ret = -ENOMEM; 3711 goto out_delete_topo; 3712 } 3713 for (s = 0; s < topo->nr; s++) 3714 spec[s] = topo->nodes[s].cpus; 3715 3716 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3717 3718 zfree(&spec); 3719 3720 out_delete_topo: 3721 numa_topology__delete(topo); 3722 3723 return ret; 3724 } 3725 3726 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3727 { 3728 int t, ret; 3729 u32 s, nr_spec = 0; 3730 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3731 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3732 3733 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3734 spec = strtok_r(user_spec, ":", &spec_ptr); 3735 if (spec == NULL) 3736 break; 3737 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3738 mask = strtok_r(spec, "/", &mask_ptr); 3739 if (mask == NULL) 3740 break; 3741 pr_debug2(" maps mask: %s\n", mask); 3742 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3743 if (!tmp_spec) { 3744 pr_err("Failed to reallocate maps spec\n"); 3745 ret = -ENOMEM; 3746 goto out_free; 3747 } 3748 maps_spec = tmp_spec; 3749 maps_spec[nr_spec] = dup_mask = strdup(mask); 3750 if (!maps_spec[nr_spec]) { 3751 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3752 ret = -ENOMEM; 3753 goto out_free; 3754 } 3755 mask = strtok_r(NULL, "/", &mask_ptr); 3756 if (mask == NULL) { 3757 pr_err("Invalid thread maps or affinity specs\n"); 3758 ret = -EINVAL; 3759 goto out_free; 3760 } 3761 pr_debug2(" affinity mask: %s\n", mask); 3762 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3763 if (!tmp_spec) { 3764 pr_err("Failed to reallocate affinity spec\n"); 3765 ret = -ENOMEM; 3766 goto out_free; 3767 } 3768 affinity_spec = tmp_spec; 3769 affinity_spec[nr_spec] = strdup(mask); 3770 if (!affinity_spec[nr_spec]) { 3771 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3772 ret = -ENOMEM; 3773 goto out_free; 3774 } 3775 dup_mask = NULL; 3776 nr_spec++; 3777 } 3778 3779 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3780 (const char **)affinity_spec, nr_spec); 3781 3782 out_free: 3783 free(dup_mask); 3784 for (s = 0; s < nr_spec; s++) { 3785 if (maps_spec) 3786 free(maps_spec[s]); 3787 if (affinity_spec) 3788 free(affinity_spec[s]); 3789 } 3790 free(affinity_spec); 3791 free(maps_spec); 3792 3793 return ret; 3794 } 3795 3796 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3797 { 3798 int ret; 3799 3800 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3801 if (ret) 3802 return ret; 3803 3804 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus)) 3805 return -ENODEV; 3806 3807 rec->nr_threads = 1; 3808 3809 return 0; 3810 } 3811 3812 static int record__init_thread_masks(struct record *rec) 3813 { 3814 int ret = 0; 3815 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; 3816 3817 if (!record__threads_enabled(rec)) 3818 return record__init_thread_default_masks(rec, cpus); 3819 3820 if (evlist__per_thread(rec->evlist)) { 3821 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3822 return -EINVAL; 3823 } 3824 3825 switch (rec->opts.threads_spec) { 3826 case THREAD_SPEC__CPU: 3827 ret = record__init_thread_cpu_masks(rec, cpus); 3828 break; 3829 case THREAD_SPEC__CORE: 3830 ret = record__init_thread_core_masks(rec, cpus); 3831 break; 3832 case THREAD_SPEC__PACKAGE: 3833 ret = record__init_thread_package_masks(rec, cpus); 3834 break; 3835 case THREAD_SPEC__NUMA: 3836 ret = record__init_thread_numa_masks(rec, cpus); 3837 break; 3838 case THREAD_SPEC__USER: 3839 ret = record__init_thread_user_masks(rec, cpus); 3840 break; 3841 default: 3842 break; 3843 } 3844 3845 return ret; 3846 } 3847 3848 int cmd_record(int argc, const char **argv) 3849 { 3850 int err; 3851 struct record *rec = &record; 3852 char errbuf[BUFSIZ]; 3853 3854 setlocale(LC_ALL, ""); 3855 3856 #ifndef HAVE_LIBBPF_SUPPORT 3857 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3858 set_nobuild('\0', "clang-path", true); 3859 set_nobuild('\0', "clang-opt", true); 3860 # undef set_nobuild 3861 #endif 3862 3863 #ifndef HAVE_BPF_PROLOGUE 3864 # if !defined (HAVE_DWARF_SUPPORT) 3865 # define REASON "NO_DWARF=1" 3866 # elif !defined (HAVE_LIBBPF_SUPPORT) 3867 # define REASON "NO_LIBBPF=1" 3868 # else 3869 # define REASON "this architecture doesn't support BPF prologue" 3870 # endif 3871 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3872 set_nobuild('\0', "vmlinux", true); 3873 # undef set_nobuild 3874 # undef REASON 3875 #endif 3876 3877 #ifndef HAVE_BPF_SKEL 3878 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) 3879 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); 3880 # undef set_nobuild 3881 #endif 3882 3883 rec->opts.affinity = PERF_AFFINITY_SYS; 3884 3885 rec->evlist = evlist__new(); 3886 if (rec->evlist == NULL) 3887 return -ENOMEM; 3888 3889 err = perf_config(perf_record_config, rec); 3890 if (err) 3891 return err; 3892 3893 argc = parse_options(argc, argv, record_options, record_usage, 3894 PARSE_OPT_STOP_AT_NON_OPTION); 3895 if (quiet) 3896 perf_quiet_option(); 3897 3898 err = symbol__validate_sym_arguments(); 3899 if (err) 3900 return err; 3901 3902 perf_debuginfod_setup(&record.debuginfod); 3903 3904 /* Make system wide (-a) the default target. */ 3905 if (!argc && target__none(&rec->opts.target)) 3906 rec->opts.target.system_wide = true; 3907 3908 if (nr_cgroups && !rec->opts.target.system_wide) { 3909 usage_with_options_msg(record_usage, record_options, 3910 "cgroup monitoring only available in system-wide mode"); 3911 3912 } 3913 3914 if (rec->buildid_mmap) { 3915 if (!perf_can_record_build_id()) { 3916 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3917 err = -EINVAL; 3918 goto out_opts; 3919 } 3920 pr_debug("Enabling build id in mmap2 events.\n"); 3921 /* Enable mmap build id synthesizing. */ 3922 symbol_conf.buildid_mmap2 = true; 3923 /* Enable perf_event_attr::build_id bit. */ 3924 rec->opts.build_id = true; 3925 /* Disable build id cache. */ 3926 rec->no_buildid = true; 3927 } 3928 3929 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3930 pr_err("Kernel has no cgroup sampling support.\n"); 3931 err = -EINVAL; 3932 goto out_opts; 3933 } 3934 3935 if (rec->opts.kcore) 3936 rec->opts.text_poke = true; 3937 3938 if (rec->opts.kcore || record__threads_enabled(rec)) 3939 rec->data.is_dir = true; 3940 3941 if (record__threads_enabled(rec)) { 3942 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 3943 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 3944 goto out_opts; 3945 } 3946 if (record__aio_enabled(rec)) { 3947 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 3948 goto out_opts; 3949 } 3950 } 3951 3952 if (rec->opts.comp_level != 0) { 3953 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 3954 rec->no_buildid = true; 3955 } 3956 3957 if (rec->opts.record_switch_events && 3958 !perf_can_record_switch_events()) { 3959 ui__error("kernel does not support recording context switch events\n"); 3960 parse_options_usage(record_usage, record_options, "switch-events", 0); 3961 err = -EINVAL; 3962 goto out_opts; 3963 } 3964 3965 if (switch_output_setup(rec)) { 3966 parse_options_usage(record_usage, record_options, "switch-output", 0); 3967 err = -EINVAL; 3968 goto out_opts; 3969 } 3970 3971 if (rec->switch_output.time) { 3972 signal(SIGALRM, alarm_sig_handler); 3973 alarm(rec->switch_output.time); 3974 } 3975 3976 if (rec->switch_output.num_files) { 3977 rec->switch_output.filenames = calloc(sizeof(char *), 3978 rec->switch_output.num_files); 3979 if (!rec->switch_output.filenames) { 3980 err = -EINVAL; 3981 goto out_opts; 3982 } 3983 } 3984 3985 if (rec->timestamp_filename && record__threads_enabled(rec)) { 3986 rec->timestamp_filename = false; 3987 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 3988 } 3989 3990 /* 3991 * Allow aliases to facilitate the lookup of symbols for address 3992 * filters. Refer to auxtrace_parse_filters(). 3993 */ 3994 symbol_conf.allow_aliases = true; 3995 3996 symbol__init(NULL); 3997 3998 err = record__auxtrace_init(rec); 3999 if (err) 4000 goto out; 4001 4002 if (dry_run) 4003 goto out; 4004 4005 err = bpf__setup_stdout(rec->evlist); 4006 if (err) { 4007 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 4008 pr_err("ERROR: Setup BPF stdout failed: %s\n", 4009 errbuf); 4010 goto out; 4011 } 4012 4013 err = -ENOMEM; 4014 4015 if (rec->no_buildid_cache || rec->no_buildid) { 4016 disable_buildid_cache(); 4017 } else if (rec->switch_output.enabled) { 4018 /* 4019 * In 'perf record --switch-output', disable buildid 4020 * generation by default to reduce data file switching 4021 * overhead. Still generate buildid if they are required 4022 * explicitly using 4023 * 4024 * perf record --switch-output --no-no-buildid \ 4025 * --no-no-buildid-cache 4026 * 4027 * Following code equals to: 4028 * 4029 * if ((rec->no_buildid || !rec->no_buildid_set) && 4030 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 4031 * disable_buildid_cache(); 4032 */ 4033 bool disable = true; 4034 4035 if (rec->no_buildid_set && !rec->no_buildid) 4036 disable = false; 4037 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 4038 disable = false; 4039 if (disable) { 4040 rec->no_buildid = true; 4041 rec->no_buildid_cache = true; 4042 disable_buildid_cache(); 4043 } 4044 } 4045 4046 if (record.opts.overwrite) 4047 record.opts.tail_synthesize = true; 4048 4049 if (rec->evlist->core.nr_entries == 0) { 4050 if (perf_pmu__has_hybrid()) { 4051 err = evlist__add_default_hybrid(rec->evlist, 4052 !record.opts.no_samples); 4053 } else { 4054 err = __evlist__add_default(rec->evlist, 4055 !record.opts.no_samples); 4056 } 4057 4058 if (err < 0) { 4059 pr_err("Not enough memory for event selector list\n"); 4060 goto out; 4061 } 4062 } 4063 4064 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 4065 rec->opts.no_inherit = true; 4066 4067 err = target__validate(&rec->opts.target); 4068 if (err) { 4069 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4070 ui__warning("%s\n", errbuf); 4071 } 4072 4073 err = target__parse_uid(&rec->opts.target); 4074 if (err) { 4075 int saved_errno = errno; 4076 4077 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 4078 ui__error("%s", errbuf); 4079 4080 err = -saved_errno; 4081 goto out; 4082 } 4083 4084 /* Enable ignoring missing threads when -u/-p option is defined. */ 4085 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 4086 4087 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 4088 pr_err("failed to use cpu list %s\n", 4089 rec->opts.target.cpu_list); 4090 goto out; 4091 } 4092 4093 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 4094 4095 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 4096 arch__add_leaf_frame_record_opts(&rec->opts); 4097 4098 err = -ENOMEM; 4099 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) { 4100 if (rec->opts.target.pid != NULL) { 4101 pr_err("Couldn't create thread/CPU maps: %s\n", 4102 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf))); 4103 goto out; 4104 } 4105 else 4106 usage_with_options(record_usage, record_options); 4107 } 4108 4109 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 4110 if (err) 4111 goto out; 4112 4113 /* 4114 * We take all buildids when the file contains 4115 * AUX area tracing data because we do not decode the 4116 * trace because it would take too long. 4117 */ 4118 if (rec->opts.full_auxtrace) 4119 rec->buildid_all = true; 4120 4121 if (rec->opts.text_poke) { 4122 err = record__config_text_poke(rec->evlist); 4123 if (err) { 4124 pr_err("record__config_text_poke failed, error %d\n", err); 4125 goto out; 4126 } 4127 } 4128 4129 if (rec->off_cpu) { 4130 err = record__config_off_cpu(rec); 4131 if (err) { 4132 pr_err("record__config_off_cpu failed, error %d\n", err); 4133 goto out; 4134 } 4135 } 4136 4137 if (record_opts__config(&rec->opts)) { 4138 err = -EINVAL; 4139 goto out; 4140 } 4141 4142 err = record__init_thread_masks(rec); 4143 if (err) { 4144 pr_err("Failed to initialize parallel data streaming masks\n"); 4145 goto out; 4146 } 4147 4148 if (rec->opts.nr_cblocks > nr_cblocks_max) 4149 rec->opts.nr_cblocks = nr_cblocks_max; 4150 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 4151 4152 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4153 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4154 4155 if (rec->opts.comp_level > comp_level_max) 4156 rec->opts.comp_level = comp_level_max; 4157 pr_debug("comp level: %d\n", rec->opts.comp_level); 4158 4159 err = __cmd_record(&record, argc, argv); 4160 out: 4161 evlist__delete(rec->evlist); 4162 symbol__exit(); 4163 auxtrace_record__free(rec->itr); 4164 out_opts: 4165 record__free_thread_masks(rec, rec->nr_threads); 4166 rec->nr_threads = 0; 4167 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4168 return err; 4169 } 4170 4171 static void snapshot_sig_handler(int sig __maybe_unused) 4172 { 4173 struct record *rec = &record; 4174 4175 hit_auxtrace_snapshot_trigger(rec); 4176 4177 if (switch_output_signal(rec)) 4178 trigger_hit(&switch_output_trigger); 4179 } 4180 4181 static void alarm_sig_handler(int sig __maybe_unused) 4182 { 4183 struct record *rec = &record; 4184 4185 if (switch_output_time(rec)) 4186 trigger_hit(&switch_output_trigger); 4187 } 4188