1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/target.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/record.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/perf_api_probe.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/clockid.h" 50 #include "util/pmu-hybrid.h" 51 #include "util/evlist-hybrid.h" 52 #include "asm/bug.h" 53 #include "perf.h" 54 #include "cputopo.h" 55 56 #include <errno.h> 57 #include <inttypes.h> 58 #include <locale.h> 59 #include <poll.h> 60 #include <pthread.h> 61 #include <unistd.h> 62 #ifndef HAVE_GETTID 63 #include <syscall.h> 64 #endif 65 #include <sched.h> 66 #include <signal.h> 67 #ifdef HAVE_EVENTFD_SUPPORT 68 #include <sys/eventfd.h> 69 #endif 70 #include <sys/mman.h> 71 #include <sys/wait.h> 72 #include <sys/types.h> 73 #include <sys/stat.h> 74 #include <fcntl.h> 75 #include <linux/err.h> 76 #include <linux/string.h> 77 #include <linux/time64.h> 78 #include <linux/zalloc.h> 79 #include <linux/bitmap.h> 80 #include <sys/time.h> 81 82 struct switch_output { 83 bool enabled; 84 bool signal; 85 unsigned long size; 86 unsigned long time; 87 const char *str; 88 bool set; 89 char **filenames; 90 int num_files; 91 int cur_file; 92 }; 93 94 struct thread_mask { 95 struct mmap_cpu_mask maps; 96 struct mmap_cpu_mask affinity; 97 }; 98 99 struct record_thread { 100 pid_t tid; 101 struct thread_mask *mask; 102 struct { 103 int msg[2]; 104 int ack[2]; 105 } pipes; 106 struct fdarray pollfd; 107 int ctlfd_pos; 108 int nr_mmaps; 109 struct mmap **maps; 110 struct mmap **overwrite_maps; 111 struct record *rec; 112 unsigned long long samples; 113 unsigned long waking; 114 u64 bytes_written; 115 u64 bytes_transferred; 116 u64 bytes_compressed; 117 }; 118 119 static __thread struct record_thread *thread; 120 121 enum thread_msg { 122 THREAD_MSG__UNDEFINED = 0, 123 THREAD_MSG__READY, 124 THREAD_MSG__MAX, 125 }; 126 127 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 128 "UNDEFINED", "READY" 129 }; 130 131 enum thread_spec { 132 THREAD_SPEC__UNDEFINED = 0, 133 THREAD_SPEC__CPU, 134 THREAD_SPEC__CORE, 135 THREAD_SPEC__PACKAGE, 136 THREAD_SPEC__NUMA, 137 THREAD_SPEC__USER, 138 THREAD_SPEC__MAX, 139 }; 140 141 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 142 "undefined", "cpu", "core", "package", "numa", "user" 143 }; 144 145 struct record { 146 struct perf_tool tool; 147 struct record_opts opts; 148 u64 bytes_written; 149 struct perf_data data; 150 struct auxtrace_record *itr; 151 struct evlist *evlist; 152 struct perf_session *session; 153 struct evlist *sb_evlist; 154 pthread_t thread_id; 155 int realtime_prio; 156 bool switch_output_event_set; 157 bool no_buildid; 158 bool no_buildid_set; 159 bool no_buildid_cache; 160 bool no_buildid_cache_set; 161 bool buildid_all; 162 bool buildid_mmap; 163 bool timestamp_filename; 164 bool timestamp_boundary; 165 struct switch_output switch_output; 166 unsigned long long samples; 167 unsigned long output_max_size; /* = 0: unlimited */ 168 struct perf_debuginfod debuginfod; 169 int nr_threads; 170 struct thread_mask *thread_masks; 171 struct record_thread *thread_data; 172 }; 173 174 static volatile int done; 175 176 static volatile int auxtrace_record__snapshot_started; 177 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 178 static DEFINE_TRIGGER(switch_output_trigger); 179 180 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 181 "SYS", "NODE", "CPU" 182 }; 183 184 #ifndef HAVE_GETTID 185 static inline pid_t gettid(void) 186 { 187 return (pid_t)syscall(__NR_gettid); 188 } 189 #endif 190 191 static int record__threads_enabled(struct record *rec) 192 { 193 return rec->opts.threads_spec; 194 } 195 196 static bool switch_output_signal(struct record *rec) 197 { 198 return rec->switch_output.signal && 199 trigger_is_ready(&switch_output_trigger); 200 } 201 202 static bool switch_output_size(struct record *rec) 203 { 204 return rec->switch_output.size && 205 trigger_is_ready(&switch_output_trigger) && 206 (rec->bytes_written >= rec->switch_output.size); 207 } 208 209 static bool switch_output_time(struct record *rec) 210 { 211 return rec->switch_output.time && 212 trigger_is_ready(&switch_output_trigger); 213 } 214 215 static u64 record__bytes_written(struct record *rec) 216 { 217 int t; 218 u64 bytes_written = rec->bytes_written; 219 struct record_thread *thread_data = rec->thread_data; 220 221 for (t = 0; t < rec->nr_threads; t++) 222 bytes_written += thread_data[t].bytes_written; 223 224 return bytes_written; 225 } 226 227 static bool record__output_max_size_exceeded(struct record *rec) 228 { 229 return rec->output_max_size && 230 (record__bytes_written(rec) >= rec->output_max_size); 231 } 232 233 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 234 void *bf, size_t size) 235 { 236 struct perf_data_file *file = &rec->session->data->file; 237 238 if (map && map->file) 239 file = map->file; 240 241 if (perf_data_file__write(file, bf, size) < 0) { 242 pr_err("failed to write perf data, error: %m\n"); 243 return -1; 244 } 245 246 if (map && map->file) 247 thread->bytes_written += size; 248 else 249 rec->bytes_written += size; 250 251 if (record__output_max_size_exceeded(rec) && !done) { 252 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 253 " stopping session ]\n", 254 record__bytes_written(rec) >> 10); 255 done = 1; 256 } 257 258 if (switch_output_size(rec)) 259 trigger_hit(&switch_output_trigger); 260 261 return 0; 262 } 263 264 static int record__aio_enabled(struct record *rec); 265 static int record__comp_enabled(struct record *rec); 266 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 267 void *dst, size_t dst_size, void *src, size_t src_size); 268 269 #ifdef HAVE_AIO_SUPPORT 270 static int record__aio_write(struct aiocb *cblock, int trace_fd, 271 void *buf, size_t size, off_t off) 272 { 273 int rc; 274 275 cblock->aio_fildes = trace_fd; 276 cblock->aio_buf = buf; 277 cblock->aio_nbytes = size; 278 cblock->aio_offset = off; 279 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 280 281 do { 282 rc = aio_write(cblock); 283 if (rc == 0) { 284 break; 285 } else if (errno != EAGAIN) { 286 cblock->aio_fildes = -1; 287 pr_err("failed to queue perf data, error: %m\n"); 288 break; 289 } 290 } while (1); 291 292 return rc; 293 } 294 295 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 296 { 297 void *rem_buf; 298 off_t rem_off; 299 size_t rem_size; 300 int rc, aio_errno; 301 ssize_t aio_ret, written; 302 303 aio_errno = aio_error(cblock); 304 if (aio_errno == EINPROGRESS) 305 return 0; 306 307 written = aio_ret = aio_return(cblock); 308 if (aio_ret < 0) { 309 if (aio_errno != EINTR) 310 pr_err("failed to write perf data, error: %m\n"); 311 written = 0; 312 } 313 314 rem_size = cblock->aio_nbytes - written; 315 316 if (rem_size == 0) { 317 cblock->aio_fildes = -1; 318 /* 319 * md->refcount is incremented in record__aio_pushfn() for 320 * every aio write request started in record__aio_push() so 321 * decrement it because the request is now complete. 322 */ 323 perf_mmap__put(&md->core); 324 rc = 1; 325 } else { 326 /* 327 * aio write request may require restart with the 328 * reminder if the kernel didn't write whole 329 * chunk at once. 330 */ 331 rem_off = cblock->aio_offset + written; 332 rem_buf = (void *)(cblock->aio_buf + written); 333 record__aio_write(cblock, cblock->aio_fildes, 334 rem_buf, rem_size, rem_off); 335 rc = 0; 336 } 337 338 return rc; 339 } 340 341 static int record__aio_sync(struct mmap *md, bool sync_all) 342 { 343 struct aiocb **aiocb = md->aio.aiocb; 344 struct aiocb *cblocks = md->aio.cblocks; 345 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 346 int i, do_suspend; 347 348 do { 349 do_suspend = 0; 350 for (i = 0; i < md->aio.nr_cblocks; ++i) { 351 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 352 if (sync_all) 353 aiocb[i] = NULL; 354 else 355 return i; 356 } else { 357 /* 358 * Started aio write is not complete yet 359 * so it has to be waited before the 360 * next allocation. 361 */ 362 aiocb[i] = &cblocks[i]; 363 do_suspend = 1; 364 } 365 } 366 if (!do_suspend) 367 return -1; 368 369 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 370 if (!(errno == EAGAIN || errno == EINTR)) 371 pr_err("failed to sync perf data, error: %m\n"); 372 } 373 } while (1); 374 } 375 376 struct record_aio { 377 struct record *rec; 378 void *data; 379 size_t size; 380 }; 381 382 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 383 { 384 struct record_aio *aio = to; 385 386 /* 387 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 388 * to release space in the kernel buffer as fast as possible, calling 389 * perf_mmap__consume() from perf_mmap__push() function. 390 * 391 * That lets the kernel to proceed with storing more profiling data into 392 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 393 * 394 * Coping can be done in two steps in case the chunk of profiling data 395 * crosses the upper bound of the kernel buffer. In this case we first move 396 * part of data from map->start till the upper bound and then the reminder 397 * from the beginning of the kernel buffer till the end of the data chunk. 398 */ 399 400 if (record__comp_enabled(aio->rec)) { 401 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 402 mmap__mmap_len(map) - aio->size, 403 buf, size); 404 } else { 405 memcpy(aio->data + aio->size, buf, size); 406 } 407 408 if (!aio->size) { 409 /* 410 * Increment map->refcount to guard map->aio.data[] buffer 411 * from premature deallocation because map object can be 412 * released earlier than aio write request started on 413 * map->aio.data[] buffer is complete. 414 * 415 * perf_mmap__put() is done at record__aio_complete() 416 * after started aio request completion or at record__aio_push() 417 * if the request failed to start. 418 */ 419 perf_mmap__get(&map->core); 420 } 421 422 aio->size += size; 423 424 return size; 425 } 426 427 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 428 { 429 int ret, idx; 430 int trace_fd = rec->session->data->file.fd; 431 struct record_aio aio = { .rec = rec, .size = 0 }; 432 433 /* 434 * Call record__aio_sync() to wait till map->aio.data[] buffer 435 * becomes available after previous aio write operation. 436 */ 437 438 idx = record__aio_sync(map, false); 439 aio.data = map->aio.data[idx]; 440 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 441 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 442 return ret; 443 444 rec->samples++; 445 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 446 if (!ret) { 447 *off += aio.size; 448 rec->bytes_written += aio.size; 449 if (switch_output_size(rec)) 450 trigger_hit(&switch_output_trigger); 451 } else { 452 /* 453 * Decrement map->refcount incremented in record__aio_pushfn() 454 * back if record__aio_write() operation failed to start, otherwise 455 * map->refcount is decremented in record__aio_complete() after 456 * aio write operation finishes successfully. 457 */ 458 perf_mmap__put(&map->core); 459 } 460 461 return ret; 462 } 463 464 static off_t record__aio_get_pos(int trace_fd) 465 { 466 return lseek(trace_fd, 0, SEEK_CUR); 467 } 468 469 static void record__aio_set_pos(int trace_fd, off_t pos) 470 { 471 lseek(trace_fd, pos, SEEK_SET); 472 } 473 474 static void record__aio_mmap_read_sync(struct record *rec) 475 { 476 int i; 477 struct evlist *evlist = rec->evlist; 478 struct mmap *maps = evlist->mmap; 479 480 if (!record__aio_enabled(rec)) 481 return; 482 483 for (i = 0; i < evlist->core.nr_mmaps; i++) { 484 struct mmap *map = &maps[i]; 485 486 if (map->core.base) 487 record__aio_sync(map, true); 488 } 489 } 490 491 static int nr_cblocks_default = 1; 492 static int nr_cblocks_max = 4; 493 494 static int record__aio_parse(const struct option *opt, 495 const char *str, 496 int unset) 497 { 498 struct record_opts *opts = (struct record_opts *)opt->value; 499 500 if (unset) { 501 opts->nr_cblocks = 0; 502 } else { 503 if (str) 504 opts->nr_cblocks = strtol(str, NULL, 0); 505 if (!opts->nr_cblocks) 506 opts->nr_cblocks = nr_cblocks_default; 507 } 508 509 return 0; 510 } 511 #else /* HAVE_AIO_SUPPORT */ 512 static int nr_cblocks_max = 0; 513 514 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 515 off_t *off __maybe_unused) 516 { 517 return -1; 518 } 519 520 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 521 { 522 return -1; 523 } 524 525 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 526 { 527 } 528 529 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 530 { 531 } 532 #endif 533 534 static int record__aio_enabled(struct record *rec) 535 { 536 return rec->opts.nr_cblocks > 0; 537 } 538 539 #define MMAP_FLUSH_DEFAULT 1 540 static int record__mmap_flush_parse(const struct option *opt, 541 const char *str, 542 int unset) 543 { 544 int flush_max; 545 struct record_opts *opts = (struct record_opts *)opt->value; 546 static struct parse_tag tags[] = { 547 { .tag = 'B', .mult = 1 }, 548 { .tag = 'K', .mult = 1 << 10 }, 549 { .tag = 'M', .mult = 1 << 20 }, 550 { .tag = 'G', .mult = 1 << 30 }, 551 { .tag = 0 }, 552 }; 553 554 if (unset) 555 return 0; 556 557 if (str) { 558 opts->mmap_flush = parse_tag_value(str, tags); 559 if (opts->mmap_flush == (int)-1) 560 opts->mmap_flush = strtol(str, NULL, 0); 561 } 562 563 if (!opts->mmap_flush) 564 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 565 566 flush_max = evlist__mmap_size(opts->mmap_pages); 567 flush_max /= 4; 568 if (opts->mmap_flush > flush_max) 569 opts->mmap_flush = flush_max; 570 571 return 0; 572 } 573 574 #ifdef HAVE_ZSTD_SUPPORT 575 static unsigned int comp_level_default = 1; 576 577 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 578 { 579 struct record_opts *opts = opt->value; 580 581 if (unset) { 582 opts->comp_level = 0; 583 } else { 584 if (str) 585 opts->comp_level = strtol(str, NULL, 0); 586 if (!opts->comp_level) 587 opts->comp_level = comp_level_default; 588 } 589 590 return 0; 591 } 592 #endif 593 static unsigned int comp_level_max = 22; 594 595 static int record__comp_enabled(struct record *rec) 596 { 597 return rec->opts.comp_level > 0; 598 } 599 600 static int process_synthesized_event(struct perf_tool *tool, 601 union perf_event *event, 602 struct perf_sample *sample __maybe_unused, 603 struct machine *machine __maybe_unused) 604 { 605 struct record *rec = container_of(tool, struct record, tool); 606 return record__write(rec, NULL, event, event->header.size); 607 } 608 609 static int process_locked_synthesized_event(struct perf_tool *tool, 610 union perf_event *event, 611 struct perf_sample *sample __maybe_unused, 612 struct machine *machine __maybe_unused) 613 { 614 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; 615 int ret; 616 617 pthread_mutex_lock(&synth_lock); 618 ret = process_synthesized_event(tool, event, sample, machine); 619 pthread_mutex_unlock(&synth_lock); 620 return ret; 621 } 622 623 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 624 { 625 struct record *rec = to; 626 627 if (record__comp_enabled(rec)) { 628 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 629 bf = map->data; 630 } 631 632 thread->samples++; 633 return record__write(rec, map, bf, size); 634 } 635 636 static volatile int signr = -1; 637 static volatile int child_finished; 638 #ifdef HAVE_EVENTFD_SUPPORT 639 static int done_fd = -1; 640 #endif 641 642 static void sig_handler(int sig) 643 { 644 if (sig == SIGCHLD) 645 child_finished = 1; 646 else 647 signr = sig; 648 649 done = 1; 650 #ifdef HAVE_EVENTFD_SUPPORT 651 { 652 u64 tmp = 1; 653 /* 654 * It is possible for this signal handler to run after done is checked 655 * in the main loop, but before the perf counter fds are polled. If this 656 * happens, the poll() will continue to wait even though done is set, 657 * and will only break out if either another signal is received, or the 658 * counters are ready for read. To ensure the poll() doesn't sleep when 659 * done is set, use an eventfd (done_fd) to wake up the poll(). 660 */ 661 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 662 pr_err("failed to signal wakeup fd, error: %m\n"); 663 } 664 #endif // HAVE_EVENTFD_SUPPORT 665 } 666 667 static void sigsegv_handler(int sig) 668 { 669 perf_hooks__recover(); 670 sighandler_dump_stack(sig); 671 } 672 673 static void record__sig_exit(void) 674 { 675 if (signr == -1) 676 return; 677 678 signal(signr, SIG_DFL); 679 raise(signr); 680 } 681 682 #ifdef HAVE_AUXTRACE_SUPPORT 683 684 static int record__process_auxtrace(struct perf_tool *tool, 685 struct mmap *map, 686 union perf_event *event, void *data1, 687 size_t len1, void *data2, size_t len2) 688 { 689 struct record *rec = container_of(tool, struct record, tool); 690 struct perf_data *data = &rec->data; 691 size_t padding; 692 u8 pad[8] = {0}; 693 694 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 695 off_t file_offset; 696 int fd = perf_data__fd(data); 697 int err; 698 699 file_offset = lseek(fd, 0, SEEK_CUR); 700 if (file_offset == -1) 701 return -1; 702 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 703 event, file_offset); 704 if (err) 705 return err; 706 } 707 708 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 709 padding = (len1 + len2) & 7; 710 if (padding) 711 padding = 8 - padding; 712 713 record__write(rec, map, event, event->header.size); 714 record__write(rec, map, data1, len1); 715 if (len2) 716 record__write(rec, map, data2, len2); 717 record__write(rec, map, &pad, padding); 718 719 return 0; 720 } 721 722 static int record__auxtrace_mmap_read(struct record *rec, 723 struct mmap *map) 724 { 725 int ret; 726 727 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 728 record__process_auxtrace); 729 if (ret < 0) 730 return ret; 731 732 if (ret) 733 rec->samples++; 734 735 return 0; 736 } 737 738 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 739 struct mmap *map) 740 { 741 int ret; 742 743 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 744 record__process_auxtrace, 745 rec->opts.auxtrace_snapshot_size); 746 if (ret < 0) 747 return ret; 748 749 if (ret) 750 rec->samples++; 751 752 return 0; 753 } 754 755 static int record__auxtrace_read_snapshot_all(struct record *rec) 756 { 757 int i; 758 int rc = 0; 759 760 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 761 struct mmap *map = &rec->evlist->mmap[i]; 762 763 if (!map->auxtrace_mmap.base) 764 continue; 765 766 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 767 rc = -1; 768 goto out; 769 } 770 } 771 out: 772 return rc; 773 } 774 775 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 776 { 777 pr_debug("Recording AUX area tracing snapshot\n"); 778 if (record__auxtrace_read_snapshot_all(rec) < 0) { 779 trigger_error(&auxtrace_snapshot_trigger); 780 } else { 781 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 782 trigger_error(&auxtrace_snapshot_trigger); 783 else 784 trigger_ready(&auxtrace_snapshot_trigger); 785 } 786 } 787 788 static int record__auxtrace_snapshot_exit(struct record *rec) 789 { 790 if (trigger_is_error(&auxtrace_snapshot_trigger)) 791 return 0; 792 793 if (!auxtrace_record__snapshot_started && 794 auxtrace_record__snapshot_start(rec->itr)) 795 return -1; 796 797 record__read_auxtrace_snapshot(rec, true); 798 if (trigger_is_error(&auxtrace_snapshot_trigger)) 799 return -1; 800 801 return 0; 802 } 803 804 static int record__auxtrace_init(struct record *rec) 805 { 806 int err; 807 808 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 809 && record__threads_enabled(rec)) { 810 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 811 return -EINVAL; 812 } 813 814 if (!rec->itr) { 815 rec->itr = auxtrace_record__init(rec->evlist, &err); 816 if (err) 817 return err; 818 } 819 820 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 821 rec->opts.auxtrace_snapshot_opts); 822 if (err) 823 return err; 824 825 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 826 rec->opts.auxtrace_sample_opts); 827 if (err) 828 return err; 829 830 auxtrace_regroup_aux_output(rec->evlist); 831 832 return auxtrace_parse_filters(rec->evlist); 833 } 834 835 #else 836 837 static inline 838 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 839 struct mmap *map __maybe_unused) 840 { 841 return 0; 842 } 843 844 static inline 845 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 846 bool on_exit __maybe_unused) 847 { 848 } 849 850 static inline 851 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 852 { 853 return 0; 854 } 855 856 static inline 857 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 858 { 859 return 0; 860 } 861 862 static int record__auxtrace_init(struct record *rec __maybe_unused) 863 { 864 return 0; 865 } 866 867 #endif 868 869 static int record__config_text_poke(struct evlist *evlist) 870 { 871 struct evsel *evsel; 872 int err; 873 874 /* Nothing to do if text poke is already configured */ 875 evlist__for_each_entry(evlist, evsel) { 876 if (evsel->core.attr.text_poke) 877 return 0; 878 } 879 880 err = parse_events(evlist, "dummy:u", NULL); 881 if (err) 882 return err; 883 884 evsel = evlist__last(evlist); 885 886 evsel->core.attr.freq = 0; 887 evsel->core.attr.sample_period = 1; 888 evsel->core.attr.text_poke = 1; 889 evsel->core.attr.ksymbol = 1; 890 891 evsel->core.system_wide = true; 892 evsel->no_aux_samples = true; 893 evsel->immediate = true; 894 895 /* Text poke must be collected on all CPUs */ 896 perf_cpu_map__put(evsel->core.own_cpus); 897 evsel->core.own_cpus = perf_cpu_map__new(NULL); 898 perf_cpu_map__put(evsel->core.cpus); 899 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 900 901 evsel__set_sample_bit(evsel, TIME); 902 903 return 0; 904 } 905 906 static bool record__kcore_readable(struct machine *machine) 907 { 908 char kcore[PATH_MAX]; 909 int fd; 910 911 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 912 913 fd = open(kcore, O_RDONLY); 914 if (fd < 0) 915 return false; 916 917 close(fd); 918 919 return true; 920 } 921 922 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 923 { 924 char from_dir[PATH_MAX]; 925 char kcore_dir[PATH_MAX]; 926 int ret; 927 928 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 929 930 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 931 if (ret) 932 return ret; 933 934 return kcore_copy(from_dir, kcore_dir); 935 } 936 937 static void record__thread_data_init_pipes(struct record_thread *thread_data) 938 { 939 thread_data->pipes.msg[0] = -1; 940 thread_data->pipes.msg[1] = -1; 941 thread_data->pipes.ack[0] = -1; 942 thread_data->pipes.ack[1] = -1; 943 } 944 945 static int record__thread_data_open_pipes(struct record_thread *thread_data) 946 { 947 if (pipe(thread_data->pipes.msg)) 948 return -EINVAL; 949 950 if (pipe(thread_data->pipes.ack)) { 951 close(thread_data->pipes.msg[0]); 952 thread_data->pipes.msg[0] = -1; 953 close(thread_data->pipes.msg[1]); 954 thread_data->pipes.msg[1] = -1; 955 return -EINVAL; 956 } 957 958 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 959 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 960 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 961 962 return 0; 963 } 964 965 static void record__thread_data_close_pipes(struct record_thread *thread_data) 966 { 967 if (thread_data->pipes.msg[0] != -1) { 968 close(thread_data->pipes.msg[0]); 969 thread_data->pipes.msg[0] = -1; 970 } 971 if (thread_data->pipes.msg[1] != -1) { 972 close(thread_data->pipes.msg[1]); 973 thread_data->pipes.msg[1] = -1; 974 } 975 if (thread_data->pipes.ack[0] != -1) { 976 close(thread_data->pipes.ack[0]); 977 thread_data->pipes.ack[0] = -1; 978 } 979 if (thread_data->pipes.ack[1] != -1) { 980 close(thread_data->pipes.ack[1]); 981 thread_data->pipes.ack[1] = -1; 982 } 983 } 984 985 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 986 { 987 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 988 struct mmap *mmap = evlist->mmap; 989 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 990 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 991 992 if (cpu_map__is_dummy(cpus)) 993 thread_data->nr_mmaps = nr_mmaps; 994 else 995 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 996 thread_data->mask->maps.nbits); 997 if (mmap) { 998 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 999 if (!thread_data->maps) 1000 return -ENOMEM; 1001 } 1002 if (overwrite_mmap) { 1003 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1004 if (!thread_data->overwrite_maps) { 1005 zfree(&thread_data->maps); 1006 return -ENOMEM; 1007 } 1008 } 1009 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1010 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1011 1012 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1013 if (cpu_map__is_dummy(cpus) || 1014 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { 1015 if (thread_data->maps) { 1016 thread_data->maps[tm] = &mmap[m]; 1017 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1018 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1019 } 1020 if (thread_data->overwrite_maps) { 1021 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1022 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1023 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m); 1024 } 1025 tm++; 1026 } 1027 } 1028 1029 return 0; 1030 } 1031 1032 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1033 { 1034 int f, tm, pos; 1035 struct mmap *map, *overwrite_map; 1036 1037 fdarray__init(&thread_data->pollfd, 64); 1038 1039 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1040 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1041 overwrite_map = thread_data->overwrite_maps ? 1042 thread_data->overwrite_maps[tm] : NULL; 1043 1044 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1045 void *ptr = evlist->core.pollfd.priv[f].ptr; 1046 1047 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1048 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1049 &evlist->core.pollfd); 1050 if (pos < 0) 1051 return pos; 1052 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1053 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1054 } 1055 } 1056 } 1057 1058 return 0; 1059 } 1060 1061 static void record__free_thread_data(struct record *rec) 1062 { 1063 int t; 1064 struct record_thread *thread_data = rec->thread_data; 1065 1066 if (thread_data == NULL) 1067 return; 1068 1069 for (t = 0; t < rec->nr_threads; t++) { 1070 record__thread_data_close_pipes(&thread_data[t]); 1071 zfree(&thread_data[t].maps); 1072 zfree(&thread_data[t].overwrite_maps); 1073 fdarray__exit(&thread_data[t].pollfd); 1074 } 1075 1076 zfree(&rec->thread_data); 1077 } 1078 1079 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1080 { 1081 int t, ret; 1082 struct record_thread *thread_data; 1083 1084 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1085 if (!rec->thread_data) { 1086 pr_err("Failed to allocate thread data\n"); 1087 return -ENOMEM; 1088 } 1089 thread_data = rec->thread_data; 1090 1091 for (t = 0; t < rec->nr_threads; t++) 1092 record__thread_data_init_pipes(&thread_data[t]); 1093 1094 for (t = 0; t < rec->nr_threads; t++) { 1095 thread_data[t].rec = rec; 1096 thread_data[t].mask = &rec->thread_masks[t]; 1097 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1098 if (ret) { 1099 pr_err("Failed to initialize thread[%d] maps\n", t); 1100 goto out_free; 1101 } 1102 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1103 if (ret) { 1104 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1105 goto out_free; 1106 } 1107 if (t) { 1108 thread_data[t].tid = -1; 1109 ret = record__thread_data_open_pipes(&thread_data[t]); 1110 if (ret) { 1111 pr_err("Failed to open thread[%d] communication pipes\n", t); 1112 goto out_free; 1113 } 1114 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1115 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1116 if (ret < 0) { 1117 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1118 goto out_free; 1119 } 1120 thread_data[t].ctlfd_pos = ret; 1121 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1122 thread_data, thread_data[t].ctlfd_pos, 1123 thread_data[t].pipes.msg[0]); 1124 } else { 1125 thread_data[t].tid = gettid(); 1126 if (evlist->ctl_fd.pos == -1) 1127 continue; 1128 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos, 1129 &evlist->core.pollfd); 1130 if (ret < 0) { 1131 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1132 goto out_free; 1133 } 1134 thread_data[t].ctlfd_pos = ret; 1135 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1136 thread_data, thread_data[t].ctlfd_pos, 1137 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd); 1138 } 1139 } 1140 1141 return 0; 1142 1143 out_free: 1144 record__free_thread_data(rec); 1145 1146 return ret; 1147 } 1148 1149 static int record__mmap_evlist(struct record *rec, 1150 struct evlist *evlist) 1151 { 1152 int i, ret; 1153 struct record_opts *opts = &rec->opts; 1154 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1155 opts->auxtrace_sample_mode; 1156 char msg[512]; 1157 1158 if (opts->affinity != PERF_AFFINITY_SYS) 1159 cpu__setup_cpunode_map(); 1160 1161 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1162 opts->auxtrace_mmap_pages, 1163 auxtrace_overwrite, 1164 opts->nr_cblocks, opts->affinity, 1165 opts->mmap_flush, opts->comp_level) < 0) { 1166 if (errno == EPERM) { 1167 pr_err("Permission error mapping pages.\n" 1168 "Consider increasing " 1169 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1170 "or try again with a smaller value of -m/--mmap_pages.\n" 1171 "(current value: %u,%u)\n", 1172 opts->mmap_pages, opts->auxtrace_mmap_pages); 1173 return -errno; 1174 } else { 1175 pr_err("failed to mmap with %d (%s)\n", errno, 1176 str_error_r(errno, msg, sizeof(msg))); 1177 if (errno) 1178 return -errno; 1179 else 1180 return -EINVAL; 1181 } 1182 } 1183 1184 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1185 return -1; 1186 1187 ret = record__alloc_thread_data(rec, evlist); 1188 if (ret) 1189 return ret; 1190 1191 if (record__threads_enabled(rec)) { 1192 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1193 if (ret) { 1194 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1195 return ret; 1196 } 1197 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1198 if (evlist->mmap) 1199 evlist->mmap[i].file = &rec->data.dir.files[i]; 1200 if (evlist->overwrite_mmap) 1201 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1202 } 1203 } 1204 1205 return 0; 1206 } 1207 1208 static int record__mmap(struct record *rec) 1209 { 1210 return record__mmap_evlist(rec, rec->evlist); 1211 } 1212 1213 static int record__open(struct record *rec) 1214 { 1215 char msg[BUFSIZ]; 1216 struct evsel *pos; 1217 struct evlist *evlist = rec->evlist; 1218 struct perf_session *session = rec->session; 1219 struct record_opts *opts = &rec->opts; 1220 int rc = 0; 1221 1222 /* 1223 * For initial_delay, system wide or a hybrid system, we need to add a 1224 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1225 * of waiting or event synthesis. 1226 */ 1227 if (opts->initial_delay || target__has_cpu(&opts->target) || 1228 perf_pmu__has_hybrid()) { 1229 pos = evlist__get_tracking_event(evlist); 1230 if (!evsel__is_dummy_event(pos)) { 1231 /* Set up dummy event. */ 1232 if (evlist__add_dummy(evlist)) 1233 return -ENOMEM; 1234 pos = evlist__last(evlist); 1235 evlist__set_tracking_event(evlist, pos); 1236 } 1237 1238 /* 1239 * Enable the dummy event when the process is forked for 1240 * initial_delay, immediately for system wide. 1241 */ 1242 if (opts->initial_delay && !pos->immediate && 1243 !target__has_cpu(&opts->target)) 1244 pos->core.attr.enable_on_exec = 1; 1245 else 1246 pos->immediate = 1; 1247 } 1248 1249 evlist__config(evlist, opts, &callchain_param); 1250 1251 evlist__for_each_entry(evlist, pos) { 1252 try_again: 1253 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1254 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1255 if (verbose > 0) 1256 ui__warning("%s\n", msg); 1257 goto try_again; 1258 } 1259 if ((errno == EINVAL || errno == EBADF) && 1260 pos->core.leader != &pos->core && 1261 pos->weak_group) { 1262 pos = evlist__reset_weak_group(evlist, pos, true); 1263 goto try_again; 1264 } 1265 rc = -errno; 1266 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1267 ui__error("%s\n", msg); 1268 goto out; 1269 } 1270 1271 pos->supported = true; 1272 } 1273 1274 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1275 pr_warning( 1276 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1277 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1278 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1279 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1280 "Samples in kernel modules won't be resolved at all.\n\n" 1281 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1282 "even with a suitable vmlinux or kallsyms file.\n\n"); 1283 } 1284 1285 if (evlist__apply_filters(evlist, &pos)) { 1286 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1287 pos->filter, evsel__name(pos), errno, 1288 str_error_r(errno, msg, sizeof(msg))); 1289 rc = -1; 1290 goto out; 1291 } 1292 1293 rc = record__mmap(rec); 1294 if (rc) 1295 goto out; 1296 1297 session->evlist = evlist; 1298 perf_session__set_id_hdr_size(session); 1299 out: 1300 return rc; 1301 } 1302 1303 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1304 { 1305 if (rec->evlist->first_sample_time == 0) 1306 rec->evlist->first_sample_time = sample_time; 1307 1308 if (sample_time) 1309 rec->evlist->last_sample_time = sample_time; 1310 } 1311 1312 static int process_sample_event(struct perf_tool *tool, 1313 union perf_event *event, 1314 struct perf_sample *sample, 1315 struct evsel *evsel, 1316 struct machine *machine) 1317 { 1318 struct record *rec = container_of(tool, struct record, tool); 1319 1320 set_timestamp_boundary(rec, sample->time); 1321 1322 if (rec->buildid_all) 1323 return 0; 1324 1325 rec->samples++; 1326 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1327 } 1328 1329 static int process_buildids(struct record *rec) 1330 { 1331 struct perf_session *session = rec->session; 1332 1333 if (perf_data__size(&rec->data) == 0) 1334 return 0; 1335 1336 /* 1337 * During this process, it'll load kernel map and replace the 1338 * dso->long_name to a real pathname it found. In this case 1339 * we prefer the vmlinux path like 1340 * /lib/modules/3.16.4/build/vmlinux 1341 * 1342 * rather than build-id path (in debug directory). 1343 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1344 */ 1345 symbol_conf.ignore_vmlinux_buildid = true; 1346 1347 /* 1348 * If --buildid-all is given, it marks all DSO regardless of hits, 1349 * so no need to process samples. But if timestamp_boundary is enabled, 1350 * it still needs to walk on all samples to get the timestamps of 1351 * first/last samples. 1352 */ 1353 if (rec->buildid_all && !rec->timestamp_boundary) 1354 rec->tool.sample = NULL; 1355 1356 return perf_session__process_events(session); 1357 } 1358 1359 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1360 { 1361 int err; 1362 struct perf_tool *tool = data; 1363 /* 1364 *As for guest kernel when processing subcommand record&report, 1365 *we arrange module mmap prior to guest kernel mmap and trigger 1366 *a preload dso because default guest module symbols are loaded 1367 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1368 *method is used to avoid symbol missing when the first addr is 1369 *in module instead of in guest kernel. 1370 */ 1371 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1372 machine); 1373 if (err < 0) 1374 pr_err("Couldn't record guest kernel [%d]'s reference" 1375 " relocation symbol.\n", machine->pid); 1376 1377 /* 1378 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1379 * have no _text sometimes. 1380 */ 1381 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1382 machine); 1383 if (err < 0) 1384 pr_err("Couldn't record guest kernel [%d]'s reference" 1385 " relocation symbol.\n", machine->pid); 1386 } 1387 1388 static struct perf_event_header finished_round_event = { 1389 .size = sizeof(struct perf_event_header), 1390 .type = PERF_RECORD_FINISHED_ROUND, 1391 }; 1392 1393 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1394 { 1395 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1396 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1397 thread->mask->affinity.nbits)) { 1398 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1399 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1400 map->affinity_mask.bits, thread->mask->affinity.nbits); 1401 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1402 (cpu_set_t *)thread->mask->affinity.bits); 1403 if (verbose == 2) { 1404 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1405 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1406 } 1407 } 1408 } 1409 1410 static size_t process_comp_header(void *record, size_t increment) 1411 { 1412 struct perf_record_compressed *event = record; 1413 size_t size = sizeof(*event); 1414 1415 if (increment) { 1416 event->header.size += increment; 1417 return increment; 1418 } 1419 1420 event->header.type = PERF_RECORD_COMPRESSED; 1421 event->header.size = size; 1422 1423 return size; 1424 } 1425 1426 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1427 void *dst, size_t dst_size, void *src, size_t src_size) 1428 { 1429 size_t compressed; 1430 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1431 struct zstd_data *zstd_data = &session->zstd_data; 1432 1433 if (map && map->file) 1434 zstd_data = &map->zstd_data; 1435 1436 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1437 max_record_size, process_comp_header); 1438 1439 if (map && map->file) { 1440 thread->bytes_transferred += src_size; 1441 thread->bytes_compressed += compressed; 1442 } else { 1443 session->bytes_transferred += src_size; 1444 session->bytes_compressed += compressed; 1445 } 1446 1447 return compressed; 1448 } 1449 1450 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1451 bool overwrite, bool synch) 1452 { 1453 u64 bytes_written = rec->bytes_written; 1454 int i; 1455 int rc = 0; 1456 int nr_mmaps; 1457 struct mmap **maps; 1458 int trace_fd = rec->data.file.fd; 1459 off_t off = 0; 1460 1461 if (!evlist) 1462 return 0; 1463 1464 nr_mmaps = thread->nr_mmaps; 1465 maps = overwrite ? thread->overwrite_maps : thread->maps; 1466 1467 if (!maps) 1468 return 0; 1469 1470 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1471 return 0; 1472 1473 if (record__aio_enabled(rec)) 1474 off = record__aio_get_pos(trace_fd); 1475 1476 for (i = 0; i < nr_mmaps; i++) { 1477 u64 flush = 0; 1478 struct mmap *map = maps[i]; 1479 1480 if (map->core.base) { 1481 record__adjust_affinity(rec, map); 1482 if (synch) { 1483 flush = map->core.flush; 1484 map->core.flush = 1; 1485 } 1486 if (!record__aio_enabled(rec)) { 1487 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1488 if (synch) 1489 map->core.flush = flush; 1490 rc = -1; 1491 goto out; 1492 } 1493 } else { 1494 if (record__aio_push(rec, map, &off) < 0) { 1495 record__aio_set_pos(trace_fd, off); 1496 if (synch) 1497 map->core.flush = flush; 1498 rc = -1; 1499 goto out; 1500 } 1501 } 1502 if (synch) 1503 map->core.flush = flush; 1504 } 1505 1506 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1507 !rec->opts.auxtrace_sample_mode && 1508 record__auxtrace_mmap_read(rec, map) != 0) { 1509 rc = -1; 1510 goto out; 1511 } 1512 } 1513 1514 if (record__aio_enabled(rec)) 1515 record__aio_set_pos(trace_fd, off); 1516 1517 /* 1518 * Mark the round finished in case we wrote 1519 * at least one event. 1520 * 1521 * No need for round events in directory mode, 1522 * because per-cpu maps and files have data 1523 * sorted by kernel. 1524 */ 1525 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1526 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1527 1528 if (overwrite) 1529 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1530 out: 1531 return rc; 1532 } 1533 1534 static int record__mmap_read_all(struct record *rec, bool synch) 1535 { 1536 int err; 1537 1538 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1539 if (err) 1540 return err; 1541 1542 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1543 } 1544 1545 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1546 void *arg __maybe_unused) 1547 { 1548 struct perf_mmap *map = fda->priv[fd].ptr; 1549 1550 if (map) 1551 perf_mmap__put(map); 1552 } 1553 1554 static void *record__thread(void *arg) 1555 { 1556 enum thread_msg msg = THREAD_MSG__READY; 1557 bool terminate = false; 1558 struct fdarray *pollfd; 1559 int err, ctlfd_pos; 1560 1561 thread = arg; 1562 thread->tid = gettid(); 1563 1564 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1565 if (err == -1) 1566 pr_warning("threads[%d]: failed to notify on start: %s\n", 1567 thread->tid, strerror(errno)); 1568 1569 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1570 1571 pollfd = &thread->pollfd; 1572 ctlfd_pos = thread->ctlfd_pos; 1573 1574 for (;;) { 1575 unsigned long long hits = thread->samples; 1576 1577 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1578 break; 1579 1580 if (hits == thread->samples) { 1581 1582 err = fdarray__poll(pollfd, -1); 1583 /* 1584 * Propagate error, only if there's any. Ignore positive 1585 * number of returned events and interrupt error. 1586 */ 1587 if (err > 0 || (err < 0 && errno == EINTR)) 1588 err = 0; 1589 thread->waking++; 1590 1591 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1592 record__thread_munmap_filtered, NULL) == 0) 1593 break; 1594 } 1595 1596 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1597 terminate = true; 1598 close(thread->pipes.msg[0]); 1599 thread->pipes.msg[0] = -1; 1600 pollfd->entries[ctlfd_pos].fd = -1; 1601 pollfd->entries[ctlfd_pos].events = 0; 1602 } 1603 1604 pollfd->entries[ctlfd_pos].revents = 0; 1605 } 1606 record__mmap_read_all(thread->rec, true); 1607 1608 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1609 if (err == -1) 1610 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1611 thread->tid, strerror(errno)); 1612 1613 return NULL; 1614 } 1615 1616 static void record__init_features(struct record *rec) 1617 { 1618 struct perf_session *session = rec->session; 1619 int feat; 1620 1621 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1622 perf_header__set_feat(&session->header, feat); 1623 1624 if (rec->no_buildid) 1625 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1626 1627 if (!have_tracepoints(&rec->evlist->core.entries)) 1628 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1629 1630 if (!rec->opts.branch_stack) 1631 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1632 1633 if (!rec->opts.full_auxtrace) 1634 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1635 1636 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1637 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1638 1639 if (!rec->opts.use_clockid) 1640 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1641 1642 if (!record__threads_enabled(rec)) 1643 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1644 1645 if (!record__comp_enabled(rec)) 1646 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1647 1648 perf_header__clear_feat(&session->header, HEADER_STAT); 1649 } 1650 1651 static void 1652 record__finish_output(struct record *rec) 1653 { 1654 int i; 1655 struct perf_data *data = &rec->data; 1656 int fd = perf_data__fd(data); 1657 1658 if (data->is_pipe) 1659 return; 1660 1661 rec->session->header.data_size += rec->bytes_written; 1662 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1663 if (record__threads_enabled(rec)) { 1664 for (i = 0; i < data->dir.nr; i++) 1665 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1666 } 1667 1668 if (!rec->no_buildid) { 1669 process_buildids(rec); 1670 1671 if (rec->buildid_all) 1672 dsos__hit_all(rec->session); 1673 } 1674 perf_session__write_header(rec->session, rec->evlist, fd, true); 1675 1676 return; 1677 } 1678 1679 static int record__synthesize_workload(struct record *rec, bool tail) 1680 { 1681 int err; 1682 struct perf_thread_map *thread_map; 1683 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1684 1685 if (rec->opts.tail_synthesize != tail) 1686 return 0; 1687 1688 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1689 if (thread_map == NULL) 1690 return -1; 1691 1692 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1693 process_synthesized_event, 1694 &rec->session->machines.host, 1695 needs_mmap, 1696 rec->opts.sample_address); 1697 perf_thread_map__put(thread_map); 1698 return err; 1699 } 1700 1701 static int record__synthesize(struct record *rec, bool tail); 1702 1703 static int 1704 record__switch_output(struct record *rec, bool at_exit) 1705 { 1706 struct perf_data *data = &rec->data; 1707 int fd, err; 1708 char *new_filename; 1709 1710 /* Same Size: "2015122520103046"*/ 1711 char timestamp[] = "InvalidTimestamp"; 1712 1713 record__aio_mmap_read_sync(rec); 1714 1715 record__synthesize(rec, true); 1716 if (target__none(&rec->opts.target)) 1717 record__synthesize_workload(rec, true); 1718 1719 rec->samples = 0; 1720 record__finish_output(rec); 1721 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1722 if (err) { 1723 pr_err("Failed to get current timestamp\n"); 1724 return -EINVAL; 1725 } 1726 1727 fd = perf_data__switch(data, timestamp, 1728 rec->session->header.data_offset, 1729 at_exit, &new_filename); 1730 if (fd >= 0 && !at_exit) { 1731 rec->bytes_written = 0; 1732 rec->session->header.data_size = 0; 1733 } 1734 1735 if (!quiet) 1736 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1737 data->path, timestamp); 1738 1739 if (rec->switch_output.num_files) { 1740 int n = rec->switch_output.cur_file + 1; 1741 1742 if (n >= rec->switch_output.num_files) 1743 n = 0; 1744 rec->switch_output.cur_file = n; 1745 if (rec->switch_output.filenames[n]) { 1746 remove(rec->switch_output.filenames[n]); 1747 zfree(&rec->switch_output.filenames[n]); 1748 } 1749 rec->switch_output.filenames[n] = new_filename; 1750 } else { 1751 free(new_filename); 1752 } 1753 1754 /* Output tracking events */ 1755 if (!at_exit) { 1756 record__synthesize(rec, false); 1757 1758 /* 1759 * In 'perf record --switch-output' without -a, 1760 * record__synthesize() in record__switch_output() won't 1761 * generate tracking events because there's no thread_map 1762 * in evlist. Which causes newly created perf.data doesn't 1763 * contain map and comm information. 1764 * Create a fake thread_map and directly call 1765 * perf_event__synthesize_thread_map() for those events. 1766 */ 1767 if (target__none(&rec->opts.target)) 1768 record__synthesize_workload(rec, false); 1769 } 1770 return fd; 1771 } 1772 1773 static volatile int workload_exec_errno; 1774 1775 /* 1776 * evlist__prepare_workload will send a SIGUSR1 1777 * if the fork fails, since we asked by setting its 1778 * want_signal to true. 1779 */ 1780 static void workload_exec_failed_signal(int signo __maybe_unused, 1781 siginfo_t *info, 1782 void *ucontext __maybe_unused) 1783 { 1784 workload_exec_errno = info->si_value.sival_int; 1785 done = 1; 1786 child_finished = 1; 1787 } 1788 1789 static void snapshot_sig_handler(int sig); 1790 static void alarm_sig_handler(int sig); 1791 1792 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1793 { 1794 if (evlist) { 1795 if (evlist->mmap && evlist->mmap[0].core.base) 1796 return evlist->mmap[0].core.base; 1797 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1798 return evlist->overwrite_mmap[0].core.base; 1799 } 1800 return NULL; 1801 } 1802 1803 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1804 { 1805 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1806 if (pc) 1807 return pc; 1808 return NULL; 1809 } 1810 1811 static int record__synthesize(struct record *rec, bool tail) 1812 { 1813 struct perf_session *session = rec->session; 1814 struct machine *machine = &session->machines.host; 1815 struct perf_data *data = &rec->data; 1816 struct record_opts *opts = &rec->opts; 1817 struct perf_tool *tool = &rec->tool; 1818 int err = 0; 1819 event_op f = process_synthesized_event; 1820 1821 if (rec->opts.tail_synthesize != tail) 1822 return 0; 1823 1824 if (data->is_pipe) { 1825 err = perf_event__synthesize_for_pipe(tool, session, data, 1826 process_synthesized_event); 1827 if (err < 0) 1828 goto out; 1829 1830 rec->bytes_written += err; 1831 } 1832 1833 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1834 process_synthesized_event, machine); 1835 if (err) 1836 goto out; 1837 1838 /* Synthesize id_index before auxtrace_info */ 1839 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { 1840 err = perf_event__synthesize_id_index(tool, 1841 process_synthesized_event, 1842 session->evlist, machine); 1843 if (err) 1844 goto out; 1845 } 1846 1847 if (rec->opts.full_auxtrace) { 1848 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1849 session, process_synthesized_event); 1850 if (err) 1851 goto out; 1852 } 1853 1854 if (!evlist__exclude_kernel(rec->evlist)) { 1855 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1856 machine); 1857 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1858 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1859 "Check /proc/kallsyms permission or run as root.\n"); 1860 1861 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1862 machine); 1863 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1864 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1865 "Check /proc/modules permission or run as root.\n"); 1866 } 1867 1868 if (perf_guest) { 1869 machines__process_guests(&session->machines, 1870 perf_event__synthesize_guest_os, tool); 1871 } 1872 1873 err = perf_event__synthesize_extra_attr(&rec->tool, 1874 rec->evlist, 1875 process_synthesized_event, 1876 data->is_pipe); 1877 if (err) 1878 goto out; 1879 1880 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1881 process_synthesized_event, 1882 NULL); 1883 if (err < 0) { 1884 pr_err("Couldn't synthesize thread map.\n"); 1885 return err; 1886 } 1887 1888 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus, 1889 process_synthesized_event, NULL); 1890 if (err < 0) { 1891 pr_err("Couldn't synthesize cpu map.\n"); 1892 return err; 1893 } 1894 1895 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1896 machine, opts); 1897 if (err < 0) 1898 pr_warning("Couldn't synthesize bpf events.\n"); 1899 1900 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1901 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1902 machine); 1903 if (err < 0) 1904 pr_warning("Couldn't synthesize cgroup events.\n"); 1905 } 1906 1907 if (rec->opts.nr_threads_synthesize > 1) { 1908 perf_set_multithreaded(); 1909 f = process_locked_synthesized_event; 1910 } 1911 1912 if (rec->opts.synth & PERF_SYNTH_TASK) { 1913 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1914 1915 err = __machine__synthesize_threads(machine, tool, &opts->target, 1916 rec->evlist->core.threads, 1917 f, needs_mmap, opts->sample_address, 1918 rec->opts.nr_threads_synthesize); 1919 } 1920 1921 if (rec->opts.nr_threads_synthesize > 1) 1922 perf_set_singlethreaded(); 1923 1924 out: 1925 return err; 1926 } 1927 1928 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 1929 { 1930 struct record *rec = data; 1931 pthread_kill(rec->thread_id, SIGUSR2); 1932 return 0; 1933 } 1934 1935 static int record__setup_sb_evlist(struct record *rec) 1936 { 1937 struct record_opts *opts = &rec->opts; 1938 1939 if (rec->sb_evlist != NULL) { 1940 /* 1941 * We get here if --switch-output-event populated the 1942 * sb_evlist, so associate a callback that will send a SIGUSR2 1943 * to the main thread. 1944 */ 1945 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 1946 rec->thread_id = pthread_self(); 1947 } 1948 #ifdef HAVE_LIBBPF_SUPPORT 1949 if (!opts->no_bpf_event) { 1950 if (rec->sb_evlist == NULL) { 1951 rec->sb_evlist = evlist__new(); 1952 1953 if (rec->sb_evlist == NULL) { 1954 pr_err("Couldn't create side band evlist.\n."); 1955 return -1; 1956 } 1957 } 1958 1959 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 1960 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 1961 return -1; 1962 } 1963 } 1964 #endif 1965 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 1966 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1967 opts->no_bpf_event = true; 1968 } 1969 1970 return 0; 1971 } 1972 1973 static int record__init_clock(struct record *rec) 1974 { 1975 struct perf_session *session = rec->session; 1976 struct timespec ref_clockid; 1977 struct timeval ref_tod; 1978 u64 ref; 1979 1980 if (!rec->opts.use_clockid) 1981 return 0; 1982 1983 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1984 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 1985 1986 session->header.env.clock.clockid = rec->opts.clockid; 1987 1988 if (gettimeofday(&ref_tod, NULL) != 0) { 1989 pr_err("gettimeofday failed, cannot set reference time.\n"); 1990 return -1; 1991 } 1992 1993 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 1994 pr_err("clock_gettime failed, cannot set reference time.\n"); 1995 return -1; 1996 } 1997 1998 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 1999 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 2000 2001 session->header.env.clock.tod_ns = ref; 2002 2003 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2004 (u64) ref_clockid.tv_nsec; 2005 2006 session->header.env.clock.clockid_ns = ref; 2007 return 0; 2008 } 2009 2010 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2011 { 2012 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2013 trigger_hit(&auxtrace_snapshot_trigger); 2014 auxtrace_record__snapshot_started = 1; 2015 if (auxtrace_record__snapshot_start(rec->itr)) 2016 trigger_error(&auxtrace_snapshot_trigger); 2017 } 2018 } 2019 2020 static void record__uniquify_name(struct record *rec) 2021 { 2022 struct evsel *pos; 2023 struct evlist *evlist = rec->evlist; 2024 char *new_name; 2025 int ret; 2026 2027 if (!perf_pmu__has_hybrid()) 2028 return; 2029 2030 evlist__for_each_entry(evlist, pos) { 2031 if (!evsel__is_hybrid(pos)) 2032 continue; 2033 2034 if (strchr(pos->name, '/')) 2035 continue; 2036 2037 ret = asprintf(&new_name, "%s/%s/", 2038 pos->pmu_name, pos->name); 2039 if (ret) { 2040 free(pos->name); 2041 pos->name = new_name; 2042 } 2043 } 2044 } 2045 2046 static int record__terminate_thread(struct record_thread *thread_data) 2047 { 2048 int err; 2049 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2050 pid_t tid = thread_data->tid; 2051 2052 close(thread_data->pipes.msg[1]); 2053 thread_data->pipes.msg[1] = -1; 2054 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2055 if (err > 0) 2056 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2057 else 2058 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2059 thread->tid, tid); 2060 2061 return 0; 2062 } 2063 2064 static int record__start_threads(struct record *rec) 2065 { 2066 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2067 struct record_thread *thread_data = rec->thread_data; 2068 sigset_t full, mask; 2069 pthread_t handle; 2070 pthread_attr_t attrs; 2071 2072 thread = &thread_data[0]; 2073 2074 if (!record__threads_enabled(rec)) 2075 return 0; 2076 2077 sigfillset(&full); 2078 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2079 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2080 return -1; 2081 } 2082 2083 pthread_attr_init(&attrs); 2084 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2085 2086 for (t = 1; t < nr_threads; t++) { 2087 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2088 2089 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2090 pthread_attr_setaffinity_np(&attrs, 2091 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2092 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2093 #endif 2094 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2095 for (tt = 1; tt < t; tt++) 2096 record__terminate_thread(&thread_data[t]); 2097 pr_err("Failed to start threads: %s\n", strerror(errno)); 2098 ret = -1; 2099 goto out_err; 2100 } 2101 2102 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2103 if (err > 0) 2104 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2105 thread_msg_tags[msg]); 2106 else 2107 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2108 thread->tid, rec->thread_data[t].tid); 2109 } 2110 2111 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2112 (cpu_set_t *)thread->mask->affinity.bits); 2113 2114 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2115 2116 out_err: 2117 pthread_attr_destroy(&attrs); 2118 2119 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2120 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2121 ret = -1; 2122 } 2123 2124 return ret; 2125 } 2126 2127 static int record__stop_threads(struct record *rec) 2128 { 2129 int t; 2130 struct record_thread *thread_data = rec->thread_data; 2131 2132 for (t = 1; t < rec->nr_threads; t++) 2133 record__terminate_thread(&thread_data[t]); 2134 2135 for (t = 0; t < rec->nr_threads; t++) { 2136 rec->samples += thread_data[t].samples; 2137 if (!record__threads_enabled(rec)) 2138 continue; 2139 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2140 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2141 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2142 thread_data[t].samples, thread_data[t].waking); 2143 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2144 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2145 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2146 else 2147 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2148 } 2149 2150 return 0; 2151 } 2152 2153 static unsigned long record__waking(struct record *rec) 2154 { 2155 int t; 2156 unsigned long waking = 0; 2157 struct record_thread *thread_data = rec->thread_data; 2158 2159 for (t = 0; t < rec->nr_threads; t++) 2160 waking += thread_data[t].waking; 2161 2162 return waking; 2163 } 2164 2165 static int __cmd_record(struct record *rec, int argc, const char **argv) 2166 { 2167 int err; 2168 int status = 0; 2169 const bool forks = argc > 0; 2170 struct perf_tool *tool = &rec->tool; 2171 struct record_opts *opts = &rec->opts; 2172 struct perf_data *data = &rec->data; 2173 struct perf_session *session; 2174 bool disabled = false, draining = false; 2175 int fd; 2176 float ratio = 0; 2177 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2178 2179 atexit(record__sig_exit); 2180 signal(SIGCHLD, sig_handler); 2181 signal(SIGINT, sig_handler); 2182 signal(SIGTERM, sig_handler); 2183 signal(SIGSEGV, sigsegv_handler); 2184 2185 if (rec->opts.record_namespaces) 2186 tool->namespace_events = true; 2187 2188 if (rec->opts.record_cgroup) { 2189 #ifdef HAVE_FILE_HANDLE 2190 tool->cgroup_events = true; 2191 #else 2192 pr_err("cgroup tracking is not supported\n"); 2193 return -1; 2194 #endif 2195 } 2196 2197 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2198 signal(SIGUSR2, snapshot_sig_handler); 2199 if (rec->opts.auxtrace_snapshot_mode) 2200 trigger_on(&auxtrace_snapshot_trigger); 2201 if (rec->switch_output.enabled) 2202 trigger_on(&switch_output_trigger); 2203 } else { 2204 signal(SIGUSR2, SIG_IGN); 2205 } 2206 2207 session = perf_session__new(data, tool); 2208 if (IS_ERR(session)) { 2209 pr_err("Perf session creation failed.\n"); 2210 return PTR_ERR(session); 2211 } 2212 2213 if (record__threads_enabled(rec)) { 2214 if (perf_data__is_pipe(&rec->data)) { 2215 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2216 return -1; 2217 } 2218 if (rec->opts.full_auxtrace) { 2219 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2220 return -1; 2221 } 2222 } 2223 2224 fd = perf_data__fd(data); 2225 rec->session = session; 2226 2227 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2228 pr_err("Compression initialization failed.\n"); 2229 return -1; 2230 } 2231 #ifdef HAVE_EVENTFD_SUPPORT 2232 done_fd = eventfd(0, EFD_NONBLOCK); 2233 if (done_fd < 0) { 2234 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2235 status = -1; 2236 goto out_delete_session; 2237 } 2238 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2239 if (err < 0) { 2240 pr_err("Failed to add wakeup eventfd to poll list\n"); 2241 status = err; 2242 goto out_delete_session; 2243 } 2244 #endif // HAVE_EVENTFD_SUPPORT 2245 2246 session->header.env.comp_type = PERF_COMP_ZSTD; 2247 session->header.env.comp_level = rec->opts.comp_level; 2248 2249 if (rec->opts.kcore && 2250 !record__kcore_readable(&session->machines.host)) { 2251 pr_err("ERROR: kcore is not readable.\n"); 2252 return -1; 2253 } 2254 2255 if (record__init_clock(rec)) 2256 return -1; 2257 2258 record__init_features(rec); 2259 2260 if (forks) { 2261 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2262 workload_exec_failed_signal); 2263 if (err < 0) { 2264 pr_err("Couldn't run the workload!\n"); 2265 status = err; 2266 goto out_delete_session; 2267 } 2268 } 2269 2270 /* 2271 * If we have just single event and are sending data 2272 * through pipe, we need to force the ids allocation, 2273 * because we synthesize event name through the pipe 2274 * and need the id for that. 2275 */ 2276 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2277 rec->opts.sample_id = true; 2278 2279 record__uniquify_name(rec); 2280 2281 if (record__open(rec) != 0) { 2282 err = -1; 2283 goto out_free_threads; 2284 } 2285 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2286 2287 if (rec->opts.kcore) { 2288 err = record__kcore_copy(&session->machines.host, data); 2289 if (err) { 2290 pr_err("ERROR: Failed to copy kcore\n"); 2291 goto out_free_threads; 2292 } 2293 } 2294 2295 err = bpf__apply_obj_config(); 2296 if (err) { 2297 char errbuf[BUFSIZ]; 2298 2299 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2300 pr_err("ERROR: Apply config to BPF failed: %s\n", 2301 errbuf); 2302 goto out_free_threads; 2303 } 2304 2305 /* 2306 * Normally perf_session__new would do this, but it doesn't have the 2307 * evlist. 2308 */ 2309 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2310 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2311 rec->tool.ordered_events = false; 2312 } 2313 2314 if (!rec->evlist->core.nr_groups) 2315 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2316 2317 if (data->is_pipe) { 2318 err = perf_header__write_pipe(fd); 2319 if (err < 0) 2320 goto out_free_threads; 2321 } else { 2322 err = perf_session__write_header(session, rec->evlist, fd, false); 2323 if (err < 0) 2324 goto out_free_threads; 2325 } 2326 2327 err = -1; 2328 if (!rec->no_buildid 2329 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2330 pr_err("Couldn't generate buildids. " 2331 "Use --no-buildid to profile anyway.\n"); 2332 goto out_free_threads; 2333 } 2334 2335 err = record__setup_sb_evlist(rec); 2336 if (err) 2337 goto out_free_threads; 2338 2339 err = record__synthesize(rec, false); 2340 if (err < 0) 2341 goto out_free_threads; 2342 2343 if (rec->realtime_prio) { 2344 struct sched_param param; 2345 2346 param.sched_priority = rec->realtime_prio; 2347 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2348 pr_err("Could not set realtime priority.\n"); 2349 err = -1; 2350 goto out_free_threads; 2351 } 2352 } 2353 2354 if (record__start_threads(rec)) 2355 goto out_free_threads; 2356 2357 /* 2358 * When perf is starting the traced process, all the events 2359 * (apart from group members) have enable_on_exec=1 set, 2360 * so don't spoil it by prematurely enabling them. 2361 */ 2362 if (!target__none(&opts->target) && !opts->initial_delay) 2363 evlist__enable(rec->evlist); 2364 2365 /* 2366 * Let the child rip 2367 */ 2368 if (forks) { 2369 struct machine *machine = &session->machines.host; 2370 union perf_event *event; 2371 pid_t tgid; 2372 2373 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2374 if (event == NULL) { 2375 err = -ENOMEM; 2376 goto out_child; 2377 } 2378 2379 /* 2380 * Some H/W events are generated before COMM event 2381 * which is emitted during exec(), so perf script 2382 * cannot see a correct process name for those events. 2383 * Synthesize COMM event to prevent it. 2384 */ 2385 tgid = perf_event__synthesize_comm(tool, event, 2386 rec->evlist->workload.pid, 2387 process_synthesized_event, 2388 machine); 2389 free(event); 2390 2391 if (tgid == -1) 2392 goto out_child; 2393 2394 event = malloc(sizeof(event->namespaces) + 2395 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2396 machine->id_hdr_size); 2397 if (event == NULL) { 2398 err = -ENOMEM; 2399 goto out_child; 2400 } 2401 2402 /* 2403 * Synthesize NAMESPACES event for the command specified. 2404 */ 2405 perf_event__synthesize_namespaces(tool, event, 2406 rec->evlist->workload.pid, 2407 tgid, process_synthesized_event, 2408 machine); 2409 free(event); 2410 2411 evlist__start_workload(rec->evlist); 2412 } 2413 2414 if (opts->initial_delay) { 2415 pr_info(EVLIST_DISABLED_MSG); 2416 if (opts->initial_delay > 0) { 2417 usleep(opts->initial_delay * USEC_PER_MSEC); 2418 evlist__enable(rec->evlist); 2419 pr_info(EVLIST_ENABLED_MSG); 2420 } 2421 } 2422 2423 trigger_ready(&auxtrace_snapshot_trigger); 2424 trigger_ready(&switch_output_trigger); 2425 perf_hooks__invoke_record_start(); 2426 for (;;) { 2427 unsigned long long hits = thread->samples; 2428 2429 /* 2430 * rec->evlist->bkw_mmap_state is possible to be 2431 * BKW_MMAP_EMPTY here: when done == true and 2432 * hits != rec->samples in previous round. 2433 * 2434 * evlist__toggle_bkw_mmap ensure we never 2435 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2436 */ 2437 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2438 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2439 2440 if (record__mmap_read_all(rec, false) < 0) { 2441 trigger_error(&auxtrace_snapshot_trigger); 2442 trigger_error(&switch_output_trigger); 2443 err = -1; 2444 goto out_child; 2445 } 2446 2447 if (auxtrace_record__snapshot_started) { 2448 auxtrace_record__snapshot_started = 0; 2449 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2450 record__read_auxtrace_snapshot(rec, false); 2451 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2452 pr_err("AUX area tracing snapshot failed\n"); 2453 err = -1; 2454 goto out_child; 2455 } 2456 } 2457 2458 if (trigger_is_hit(&switch_output_trigger)) { 2459 /* 2460 * If switch_output_trigger is hit, the data in 2461 * overwritable ring buffer should have been collected, 2462 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2463 * 2464 * If SIGUSR2 raise after or during record__mmap_read_all(), 2465 * record__mmap_read_all() didn't collect data from 2466 * overwritable ring buffer. Read again. 2467 */ 2468 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2469 continue; 2470 trigger_ready(&switch_output_trigger); 2471 2472 /* 2473 * Reenable events in overwrite ring buffer after 2474 * record__mmap_read_all(): we should have collected 2475 * data from it. 2476 */ 2477 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2478 2479 if (!quiet) 2480 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2481 record__waking(rec)); 2482 thread->waking = 0; 2483 fd = record__switch_output(rec, false); 2484 if (fd < 0) { 2485 pr_err("Failed to switch to new file\n"); 2486 trigger_error(&switch_output_trigger); 2487 err = fd; 2488 goto out_child; 2489 } 2490 2491 /* re-arm the alarm */ 2492 if (rec->switch_output.time) 2493 alarm(rec->switch_output.time); 2494 } 2495 2496 if (hits == thread->samples) { 2497 if (done || draining) 2498 break; 2499 err = fdarray__poll(&thread->pollfd, -1); 2500 /* 2501 * Propagate error, only if there's any. Ignore positive 2502 * number of returned events and interrupt error. 2503 */ 2504 if (err > 0 || (err < 0 && errno == EINTR)) 2505 err = 0; 2506 thread->waking++; 2507 2508 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2509 record__thread_munmap_filtered, NULL) == 0) 2510 draining = true; 2511 2512 evlist__ctlfd_update(rec->evlist, 2513 &thread->pollfd.entries[thread->ctlfd_pos]); 2514 } 2515 2516 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2517 switch (cmd) { 2518 case EVLIST_CTL_CMD_SNAPSHOT: 2519 hit_auxtrace_snapshot_trigger(rec); 2520 evlist__ctlfd_ack(rec->evlist); 2521 break; 2522 case EVLIST_CTL_CMD_STOP: 2523 done = 1; 2524 break; 2525 case EVLIST_CTL_CMD_ACK: 2526 case EVLIST_CTL_CMD_UNSUPPORTED: 2527 case EVLIST_CTL_CMD_ENABLE: 2528 case EVLIST_CTL_CMD_DISABLE: 2529 case EVLIST_CTL_CMD_EVLIST: 2530 case EVLIST_CTL_CMD_PING: 2531 default: 2532 break; 2533 } 2534 } 2535 2536 /* 2537 * When perf is starting the traced process, at the end events 2538 * die with the process and we wait for that. Thus no need to 2539 * disable events in this case. 2540 */ 2541 if (done && !disabled && !target__none(&opts->target)) { 2542 trigger_off(&auxtrace_snapshot_trigger); 2543 evlist__disable(rec->evlist); 2544 disabled = true; 2545 } 2546 } 2547 2548 trigger_off(&auxtrace_snapshot_trigger); 2549 trigger_off(&switch_output_trigger); 2550 2551 if (opts->auxtrace_snapshot_on_exit) 2552 record__auxtrace_snapshot_exit(rec); 2553 2554 if (forks && workload_exec_errno) { 2555 char msg[STRERR_BUFSIZE], strevsels[2048]; 2556 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2557 2558 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2559 2560 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2561 strevsels, argv[0], emsg); 2562 err = -1; 2563 goto out_child; 2564 } 2565 2566 if (!quiet) 2567 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2568 record__waking(rec)); 2569 2570 if (target__none(&rec->opts.target)) 2571 record__synthesize_workload(rec, true); 2572 2573 out_child: 2574 record__stop_threads(rec); 2575 record__mmap_read_all(rec, true); 2576 out_free_threads: 2577 record__free_thread_data(rec); 2578 evlist__finalize_ctlfd(rec->evlist); 2579 record__aio_mmap_read_sync(rec); 2580 2581 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2582 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2583 session->header.env.comp_ratio = ratio + 0.5; 2584 } 2585 2586 if (forks) { 2587 int exit_status; 2588 2589 if (!child_finished) 2590 kill(rec->evlist->workload.pid, SIGTERM); 2591 2592 wait(&exit_status); 2593 2594 if (err < 0) 2595 status = err; 2596 else if (WIFEXITED(exit_status)) 2597 status = WEXITSTATUS(exit_status); 2598 else if (WIFSIGNALED(exit_status)) 2599 signr = WTERMSIG(exit_status); 2600 } else 2601 status = err; 2602 2603 record__synthesize(rec, true); 2604 /* this will be recalculated during process_buildids() */ 2605 rec->samples = 0; 2606 2607 if (!err) { 2608 if (!rec->timestamp_filename) { 2609 record__finish_output(rec); 2610 } else { 2611 fd = record__switch_output(rec, true); 2612 if (fd < 0) { 2613 status = fd; 2614 goto out_delete_session; 2615 } 2616 } 2617 } 2618 2619 perf_hooks__invoke_record_end(); 2620 2621 if (!err && !quiet) { 2622 char samples[128]; 2623 const char *postfix = rec->timestamp_filename ? 2624 ".<timestamp>" : ""; 2625 2626 if (rec->samples && !rec->opts.full_auxtrace) 2627 scnprintf(samples, sizeof(samples), 2628 " (%" PRIu64 " samples)", rec->samples); 2629 else 2630 samples[0] = '\0'; 2631 2632 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2633 perf_data__size(data) / 1024.0 / 1024.0, 2634 data->path, postfix, samples); 2635 if (ratio) { 2636 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2637 rec->session->bytes_transferred / 1024.0 / 1024.0, 2638 ratio); 2639 } 2640 fprintf(stderr, " ]\n"); 2641 } 2642 2643 out_delete_session: 2644 #ifdef HAVE_EVENTFD_SUPPORT 2645 if (done_fd >= 0) 2646 close(done_fd); 2647 #endif 2648 zstd_fini(&session->zstd_data); 2649 perf_session__delete(session); 2650 2651 if (!opts->no_bpf_event) 2652 evlist__stop_sb_thread(rec->sb_evlist); 2653 return status; 2654 } 2655 2656 static void callchain_debug(struct callchain_param *callchain) 2657 { 2658 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2659 2660 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2661 2662 if (callchain->record_mode == CALLCHAIN_DWARF) 2663 pr_debug("callchain: stack dump size %d\n", 2664 callchain->dump_size); 2665 } 2666 2667 int record_opts__parse_callchain(struct record_opts *record, 2668 struct callchain_param *callchain, 2669 const char *arg, bool unset) 2670 { 2671 int ret; 2672 callchain->enabled = !unset; 2673 2674 /* --no-call-graph */ 2675 if (unset) { 2676 callchain->record_mode = CALLCHAIN_NONE; 2677 pr_debug("callchain: disabled\n"); 2678 return 0; 2679 } 2680 2681 ret = parse_callchain_record_opt(arg, callchain); 2682 if (!ret) { 2683 /* Enable data address sampling for DWARF unwind. */ 2684 if (callchain->record_mode == CALLCHAIN_DWARF) 2685 record->sample_address = true; 2686 callchain_debug(callchain); 2687 } 2688 2689 return ret; 2690 } 2691 2692 int record_parse_callchain_opt(const struct option *opt, 2693 const char *arg, 2694 int unset) 2695 { 2696 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2697 } 2698 2699 int record_callchain_opt(const struct option *opt, 2700 const char *arg __maybe_unused, 2701 int unset __maybe_unused) 2702 { 2703 struct callchain_param *callchain = opt->value; 2704 2705 callchain->enabled = true; 2706 2707 if (callchain->record_mode == CALLCHAIN_NONE) 2708 callchain->record_mode = CALLCHAIN_FP; 2709 2710 callchain_debug(callchain); 2711 return 0; 2712 } 2713 2714 static int perf_record_config(const char *var, const char *value, void *cb) 2715 { 2716 struct record *rec = cb; 2717 2718 if (!strcmp(var, "record.build-id")) { 2719 if (!strcmp(value, "cache")) 2720 rec->no_buildid_cache = false; 2721 else if (!strcmp(value, "no-cache")) 2722 rec->no_buildid_cache = true; 2723 else if (!strcmp(value, "skip")) 2724 rec->no_buildid = true; 2725 else if (!strcmp(value, "mmap")) 2726 rec->buildid_mmap = true; 2727 else 2728 return -1; 2729 return 0; 2730 } 2731 if (!strcmp(var, "record.call-graph")) { 2732 var = "call-graph.record-mode"; 2733 return perf_default_config(var, value, cb); 2734 } 2735 #ifdef HAVE_AIO_SUPPORT 2736 if (!strcmp(var, "record.aio")) { 2737 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2738 if (!rec->opts.nr_cblocks) 2739 rec->opts.nr_cblocks = nr_cblocks_default; 2740 } 2741 #endif 2742 if (!strcmp(var, "record.debuginfod")) { 2743 rec->debuginfod.urls = strdup(value); 2744 if (!rec->debuginfod.urls) 2745 return -ENOMEM; 2746 rec->debuginfod.set = true; 2747 } 2748 2749 return 0; 2750 } 2751 2752 2753 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2754 { 2755 struct record_opts *opts = (struct record_opts *)opt->value; 2756 2757 if (unset || !str) 2758 return 0; 2759 2760 if (!strcasecmp(str, "node")) 2761 opts->affinity = PERF_AFFINITY_NODE; 2762 else if (!strcasecmp(str, "cpu")) 2763 opts->affinity = PERF_AFFINITY_CPU; 2764 2765 return 0; 2766 } 2767 2768 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2769 { 2770 mask->nbits = nr_bits; 2771 mask->bits = bitmap_zalloc(mask->nbits); 2772 if (!mask->bits) 2773 return -ENOMEM; 2774 2775 return 0; 2776 } 2777 2778 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2779 { 2780 bitmap_free(mask->bits); 2781 mask->nbits = 0; 2782 } 2783 2784 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2785 { 2786 int ret; 2787 2788 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2789 if (ret) { 2790 mask->affinity.bits = NULL; 2791 return ret; 2792 } 2793 2794 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2795 if (ret) { 2796 record__mmap_cpu_mask_free(&mask->maps); 2797 mask->maps.bits = NULL; 2798 } 2799 2800 return ret; 2801 } 2802 2803 static void record__thread_mask_free(struct thread_mask *mask) 2804 { 2805 record__mmap_cpu_mask_free(&mask->maps); 2806 record__mmap_cpu_mask_free(&mask->affinity); 2807 } 2808 2809 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2810 { 2811 int s; 2812 struct record_opts *opts = opt->value; 2813 2814 if (unset || !str || !strlen(str)) { 2815 opts->threads_spec = THREAD_SPEC__CPU; 2816 } else { 2817 for (s = 1; s < THREAD_SPEC__MAX; s++) { 2818 if (s == THREAD_SPEC__USER) { 2819 opts->threads_user_spec = strdup(str); 2820 if (!opts->threads_user_spec) 2821 return -ENOMEM; 2822 opts->threads_spec = THREAD_SPEC__USER; 2823 break; 2824 } 2825 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 2826 opts->threads_spec = s; 2827 break; 2828 } 2829 } 2830 } 2831 2832 if (opts->threads_spec == THREAD_SPEC__USER) 2833 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 2834 else 2835 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 2836 2837 return 0; 2838 } 2839 2840 static int parse_output_max_size(const struct option *opt, 2841 const char *str, int unset) 2842 { 2843 unsigned long *s = (unsigned long *)opt->value; 2844 static struct parse_tag tags_size[] = { 2845 { .tag = 'B', .mult = 1 }, 2846 { .tag = 'K', .mult = 1 << 10 }, 2847 { .tag = 'M', .mult = 1 << 20 }, 2848 { .tag = 'G', .mult = 1 << 30 }, 2849 { .tag = 0 }, 2850 }; 2851 unsigned long val; 2852 2853 if (unset) { 2854 *s = 0; 2855 return 0; 2856 } 2857 2858 val = parse_tag_value(str, tags_size); 2859 if (val != (unsigned long) -1) { 2860 *s = val; 2861 return 0; 2862 } 2863 2864 return -1; 2865 } 2866 2867 static int record__parse_mmap_pages(const struct option *opt, 2868 const char *str, 2869 int unset __maybe_unused) 2870 { 2871 struct record_opts *opts = opt->value; 2872 char *s, *p; 2873 unsigned int mmap_pages; 2874 int ret; 2875 2876 if (!str) 2877 return -EINVAL; 2878 2879 s = strdup(str); 2880 if (!s) 2881 return -ENOMEM; 2882 2883 p = strchr(s, ','); 2884 if (p) 2885 *p = '\0'; 2886 2887 if (*s) { 2888 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 2889 if (ret) 2890 goto out_free; 2891 opts->mmap_pages = mmap_pages; 2892 } 2893 2894 if (!p) { 2895 ret = 0; 2896 goto out_free; 2897 } 2898 2899 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 2900 if (ret) 2901 goto out_free; 2902 2903 opts->auxtrace_mmap_pages = mmap_pages; 2904 2905 out_free: 2906 free(s); 2907 return ret; 2908 } 2909 2910 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 2911 { 2912 } 2913 2914 static int parse_control_option(const struct option *opt, 2915 const char *str, 2916 int unset __maybe_unused) 2917 { 2918 struct record_opts *opts = opt->value; 2919 2920 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 2921 } 2922 2923 static void switch_output_size_warn(struct record *rec) 2924 { 2925 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 2926 struct switch_output *s = &rec->switch_output; 2927 2928 wakeup_size /= 2; 2929 2930 if (s->size < wakeup_size) { 2931 char buf[100]; 2932 2933 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 2934 pr_warning("WARNING: switch-output data size lower than " 2935 "wakeup kernel buffer size (%s) " 2936 "expect bigger perf.data sizes\n", buf); 2937 } 2938 } 2939 2940 static int switch_output_setup(struct record *rec) 2941 { 2942 struct switch_output *s = &rec->switch_output; 2943 static struct parse_tag tags_size[] = { 2944 { .tag = 'B', .mult = 1 }, 2945 { .tag = 'K', .mult = 1 << 10 }, 2946 { .tag = 'M', .mult = 1 << 20 }, 2947 { .tag = 'G', .mult = 1 << 30 }, 2948 { .tag = 0 }, 2949 }; 2950 static struct parse_tag tags_time[] = { 2951 { .tag = 's', .mult = 1 }, 2952 { .tag = 'm', .mult = 60 }, 2953 { .tag = 'h', .mult = 60*60 }, 2954 { .tag = 'd', .mult = 60*60*24 }, 2955 { .tag = 0 }, 2956 }; 2957 unsigned long val; 2958 2959 /* 2960 * If we're using --switch-output-events, then we imply its 2961 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 2962 * thread to its parent. 2963 */ 2964 if (rec->switch_output_event_set) { 2965 if (record__threads_enabled(rec)) { 2966 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 2967 return 0; 2968 } 2969 goto do_signal; 2970 } 2971 2972 if (!s->set) 2973 return 0; 2974 2975 if (record__threads_enabled(rec)) { 2976 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 2977 return 0; 2978 } 2979 2980 if (!strcmp(s->str, "signal")) { 2981 do_signal: 2982 s->signal = true; 2983 pr_debug("switch-output with SIGUSR2 signal\n"); 2984 goto enabled; 2985 } 2986 2987 val = parse_tag_value(s->str, tags_size); 2988 if (val != (unsigned long) -1) { 2989 s->size = val; 2990 pr_debug("switch-output with %s size threshold\n", s->str); 2991 goto enabled; 2992 } 2993 2994 val = parse_tag_value(s->str, tags_time); 2995 if (val != (unsigned long) -1) { 2996 s->time = val; 2997 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2998 s->str, s->time); 2999 goto enabled; 3000 } 3001 3002 return -1; 3003 3004 enabled: 3005 rec->timestamp_filename = true; 3006 s->enabled = true; 3007 3008 if (s->size && !rec->opts.no_buffering) 3009 switch_output_size_warn(rec); 3010 3011 return 0; 3012 } 3013 3014 static const char * const __record_usage[] = { 3015 "perf record [<options>] [<command>]", 3016 "perf record [<options>] -- <command> [<options>]", 3017 NULL 3018 }; 3019 const char * const *record_usage = __record_usage; 3020 3021 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3022 struct perf_sample *sample, struct machine *machine) 3023 { 3024 /* 3025 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3026 * no need to add them twice. 3027 */ 3028 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3029 return 0; 3030 return perf_event__process_mmap(tool, event, sample, machine); 3031 } 3032 3033 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3034 struct perf_sample *sample, struct machine *machine) 3035 { 3036 /* 3037 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3038 * no need to add them twice. 3039 */ 3040 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3041 return 0; 3042 3043 return perf_event__process_mmap2(tool, event, sample, machine); 3044 } 3045 3046 static int process_timestamp_boundary(struct perf_tool *tool, 3047 union perf_event *event __maybe_unused, 3048 struct perf_sample *sample, 3049 struct machine *machine __maybe_unused) 3050 { 3051 struct record *rec = container_of(tool, struct record, tool); 3052 3053 set_timestamp_boundary(rec, sample->time); 3054 return 0; 3055 } 3056 3057 static int parse_record_synth_option(const struct option *opt, 3058 const char *str, 3059 int unset __maybe_unused) 3060 { 3061 struct record_opts *opts = opt->value; 3062 char *p = strdup(str); 3063 3064 if (p == NULL) 3065 return -1; 3066 3067 opts->synth = parse_synth_opt(p); 3068 free(p); 3069 3070 if (opts->synth < 0) { 3071 pr_err("Invalid synth option: %s\n", str); 3072 return -1; 3073 } 3074 return 0; 3075 } 3076 3077 /* 3078 * XXX Ideally would be local to cmd_record() and passed to a record__new 3079 * because we need to have access to it in record__exit, that is called 3080 * after cmd_record() exits, but since record_options need to be accessible to 3081 * builtin-script, leave it here. 3082 * 3083 * At least we don't ouch it in all the other functions here directly. 3084 * 3085 * Just say no to tons of global variables, sigh. 3086 */ 3087 static struct record record = { 3088 .opts = { 3089 .sample_time = true, 3090 .mmap_pages = UINT_MAX, 3091 .user_freq = UINT_MAX, 3092 .user_interval = ULLONG_MAX, 3093 .freq = 4000, 3094 .target = { 3095 .uses_mmap = true, 3096 .default_per_cpu = true, 3097 }, 3098 .mmap_flush = MMAP_FLUSH_DEFAULT, 3099 .nr_threads_synthesize = 1, 3100 .ctl_fd = -1, 3101 .ctl_fd_ack = -1, 3102 .synth = PERF_SYNTH_ALL, 3103 }, 3104 .tool = { 3105 .sample = process_sample_event, 3106 .fork = perf_event__process_fork, 3107 .exit = perf_event__process_exit, 3108 .comm = perf_event__process_comm, 3109 .namespaces = perf_event__process_namespaces, 3110 .mmap = build_id__process_mmap, 3111 .mmap2 = build_id__process_mmap2, 3112 .itrace_start = process_timestamp_boundary, 3113 .aux = process_timestamp_boundary, 3114 .ordered_events = true, 3115 }, 3116 }; 3117 3118 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3119 "\n\t\t\t\tDefault: fp"; 3120 3121 static bool dry_run; 3122 3123 /* 3124 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3125 * with it and switch to use the library functions in perf_evlist that came 3126 * from builtin-record.c, i.e. use record_opts, 3127 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3128 * using pipes, etc. 3129 */ 3130 static struct option __record_options[] = { 3131 OPT_CALLBACK('e', "event", &record.evlist, "event", 3132 "event selector. use 'perf list' to list available events", 3133 parse_events_option), 3134 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3135 "event filter", parse_filter), 3136 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3137 NULL, "don't record events from perf itself", 3138 exclude_perf), 3139 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3140 "record events on existing process id"), 3141 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3142 "record events on existing thread id"), 3143 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3144 "collect data with this RT SCHED_FIFO priority"), 3145 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3146 "collect data without buffering"), 3147 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3148 "collect raw sample records from all opened counters"), 3149 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3150 "system-wide collection from all CPUs"), 3151 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3152 "list of cpus to monitor"), 3153 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3154 OPT_STRING('o', "output", &record.data.path, "file", 3155 "output file name"), 3156 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3157 &record.opts.no_inherit_set, 3158 "child tasks do not inherit counters"), 3159 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3160 "synthesize non-sample events at the end of output"), 3161 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3162 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3163 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3164 "Fail if the specified frequency can't be used"), 3165 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3166 "profile at this frequency", 3167 record__parse_freq), 3168 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3169 "number of mmap data pages and AUX area tracing mmap pages", 3170 record__parse_mmap_pages), 3171 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3172 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3173 record__mmap_flush_parse), 3174 OPT_BOOLEAN(0, "group", &record.opts.group, 3175 "put the counters into a counter group"), 3176 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3177 NULL, "enables call-graph recording" , 3178 &record_callchain_opt), 3179 OPT_CALLBACK(0, "call-graph", &record.opts, 3180 "record_mode[,record_size]", record_callchain_help, 3181 &record_parse_callchain_opt), 3182 OPT_INCR('v', "verbose", &verbose, 3183 "be more verbose (show counter open errors, etc)"), 3184 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 3185 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3186 "per thread counts"), 3187 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3188 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3189 "Record the sample physical addresses"), 3190 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3191 "Record the sampled data address data page size"), 3192 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3193 "Record the sampled code address (ip) page size"), 3194 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3195 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3196 &record.opts.sample_time_set, 3197 "Record the sample timestamps"), 3198 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3199 "Record the sample period"), 3200 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3201 "don't sample"), 3202 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3203 &record.no_buildid_cache_set, 3204 "do not update the buildid cache"), 3205 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3206 &record.no_buildid_set, 3207 "do not collect buildids in perf.data"), 3208 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3209 "monitor event in cgroup name only", 3210 parse_cgroups), 3211 OPT_INTEGER('D', "delay", &record.opts.initial_delay, 3212 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 3213 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3214 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3215 "user to profile"), 3216 3217 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3218 "branch any", "sample any taken branches", 3219 parse_branch_stack), 3220 3221 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3222 "branch filter mask", "branch stack filter modes", 3223 parse_branch_stack), 3224 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3225 "sample by weight (on special events only)"), 3226 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3227 "sample transaction flags (special events only)"), 3228 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3229 "use per-thread mmaps"), 3230 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3231 "sample selected machine registers on interrupt," 3232 " use '-I?' to list register names", parse_intr_regs), 3233 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3234 "sample selected machine registers on interrupt," 3235 " use '--user-regs=?' to list register names", parse_user_regs), 3236 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3237 "Record running/enabled time of read (:S) events"), 3238 OPT_CALLBACK('k', "clockid", &record.opts, 3239 "clockid", "clockid to use for events, see clock_gettime()", 3240 parse_clockid), 3241 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3242 "opts", "AUX area tracing Snapshot Mode", ""), 3243 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3244 "opts", "sample AUX area", ""), 3245 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3246 "per thread proc mmap processing timeout in ms"), 3247 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3248 "Record namespaces events"), 3249 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3250 "Record cgroup events"), 3251 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3252 &record.opts.record_switch_events_set, 3253 "Record context switch events"), 3254 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3255 "Configure all used events to run in kernel space.", 3256 PARSE_OPT_EXCLUSIVE), 3257 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3258 "Configure all used events to run in user space.", 3259 PARSE_OPT_EXCLUSIVE), 3260 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3261 "collect kernel callchains"), 3262 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3263 "collect user callchains"), 3264 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3265 "clang binary to use for compiling BPF scriptlets"), 3266 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3267 "options passed to clang when compiling BPF scriptlets"), 3268 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3269 "file", "vmlinux pathname"), 3270 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3271 "Record build-id of all DSOs regardless of hits"), 3272 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3273 "Record build-id in map events"), 3274 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3275 "append timestamp to output filename"), 3276 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3277 "Record timestamp boundary (time of first/last samples)"), 3278 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3279 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3280 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3281 "signal"), 3282 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3283 "switch output event selector. use 'perf list' to list available events", 3284 parse_events_option_new_evlist), 3285 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3286 "Limit number of switch output generated files"), 3287 OPT_BOOLEAN(0, "dry-run", &dry_run, 3288 "Parse options then exit"), 3289 #ifdef HAVE_AIO_SUPPORT 3290 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3291 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3292 record__aio_parse), 3293 #endif 3294 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3295 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3296 record__parse_affinity), 3297 #ifdef HAVE_ZSTD_SUPPORT 3298 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3299 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3300 record__parse_comp_level), 3301 #endif 3302 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3303 "size", "Limit the maximum size of the output file", parse_output_max_size), 3304 OPT_UINTEGER(0, "num-thread-synthesize", 3305 &record.opts.nr_threads_synthesize, 3306 "number of threads to run for event synthesis"), 3307 #ifdef HAVE_LIBPFM 3308 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3309 "libpfm4 event selector. use 'perf list' to list available events", 3310 parse_libpfm_events_option), 3311 #endif 3312 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3313 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3314 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3315 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3316 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3317 parse_control_option), 3318 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3319 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3320 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3321 &record.debuginfod.set, "debuginfod urls", 3322 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3323 "system"), 3324 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3325 "write collected trace data into several data files using parallel threads", 3326 record__parse_threads), 3327 OPT_END() 3328 }; 3329 3330 struct option *record_options = __record_options; 3331 3332 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3333 { 3334 struct perf_cpu cpu; 3335 int idx; 3336 3337 if (cpu_map__is_dummy(cpus)) 3338 return; 3339 3340 perf_cpu_map__for_each_cpu(cpu, idx, cpus) 3341 set_bit(cpu.cpu, mask->bits); 3342 } 3343 3344 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3345 { 3346 struct perf_cpu_map *cpus; 3347 3348 cpus = perf_cpu_map__new(mask_spec); 3349 if (!cpus) 3350 return -ENOMEM; 3351 3352 bitmap_zero(mask->bits, mask->nbits); 3353 record__mmap_cpu_mask_init(mask, cpus); 3354 perf_cpu_map__put(cpus); 3355 3356 return 0; 3357 } 3358 3359 static void record__free_thread_masks(struct record *rec, int nr_threads) 3360 { 3361 int t; 3362 3363 if (rec->thread_masks) 3364 for (t = 0; t < nr_threads; t++) 3365 record__thread_mask_free(&rec->thread_masks[t]); 3366 3367 zfree(&rec->thread_masks); 3368 } 3369 3370 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3371 { 3372 int t, ret; 3373 3374 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3375 if (!rec->thread_masks) { 3376 pr_err("Failed to allocate thread masks\n"); 3377 return -ENOMEM; 3378 } 3379 3380 for (t = 0; t < nr_threads; t++) { 3381 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3382 if (ret) { 3383 pr_err("Failed to allocate thread masks[%d]\n", t); 3384 goto out_free; 3385 } 3386 } 3387 3388 return 0; 3389 3390 out_free: 3391 record__free_thread_masks(rec, nr_threads); 3392 3393 return ret; 3394 } 3395 3396 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3397 { 3398 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3399 3400 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3401 if (ret) 3402 return ret; 3403 3404 rec->nr_threads = nr_cpus; 3405 pr_debug("nr_threads: %d\n", rec->nr_threads); 3406 3407 for (t = 0; t < rec->nr_threads; t++) { 3408 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits); 3409 set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits); 3410 if (verbose) { 3411 pr_debug("thread_masks[%d]: ", t); 3412 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3413 pr_debug("thread_masks[%d]: ", t); 3414 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3415 } 3416 } 3417 3418 return 0; 3419 } 3420 3421 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3422 const char **maps_spec, const char **affinity_spec, 3423 u32 nr_spec) 3424 { 3425 u32 s; 3426 int ret = 0, t = 0; 3427 struct mmap_cpu_mask cpus_mask; 3428 struct thread_mask thread_mask, full_mask, *thread_masks; 3429 3430 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3431 if (ret) { 3432 pr_err("Failed to allocate CPUs mask\n"); 3433 return ret; 3434 } 3435 record__mmap_cpu_mask_init(&cpus_mask, cpus); 3436 3437 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3438 if (ret) { 3439 pr_err("Failed to allocate full mask\n"); 3440 goto out_free_cpu_mask; 3441 } 3442 3443 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3444 if (ret) { 3445 pr_err("Failed to allocate thread mask\n"); 3446 goto out_free_full_and_cpu_masks; 3447 } 3448 3449 for (s = 0; s < nr_spec; s++) { 3450 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3451 if (ret) { 3452 pr_err("Failed to initialize maps thread mask\n"); 3453 goto out_free; 3454 } 3455 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3456 if (ret) { 3457 pr_err("Failed to initialize affinity thread mask\n"); 3458 goto out_free; 3459 } 3460 3461 /* ignore invalid CPUs but do not allow empty masks */ 3462 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3463 cpus_mask.bits, thread_mask.maps.nbits)) { 3464 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3465 ret = -EINVAL; 3466 goto out_free; 3467 } 3468 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3469 cpus_mask.bits, thread_mask.affinity.nbits)) { 3470 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3471 ret = -EINVAL; 3472 goto out_free; 3473 } 3474 3475 /* do not allow intersection with other masks (full_mask) */ 3476 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3477 thread_mask.maps.nbits)) { 3478 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3479 ret = -EINVAL; 3480 goto out_free; 3481 } 3482 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3483 thread_mask.affinity.nbits)) { 3484 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3485 ret = -EINVAL; 3486 goto out_free; 3487 } 3488 3489 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3490 thread_mask.maps.bits, full_mask.maps.nbits); 3491 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3492 thread_mask.affinity.bits, full_mask.maps.nbits); 3493 3494 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3495 if (!thread_masks) { 3496 pr_err("Failed to reallocate thread masks\n"); 3497 ret = -ENOMEM; 3498 goto out_free; 3499 } 3500 rec->thread_masks = thread_masks; 3501 rec->thread_masks[t] = thread_mask; 3502 if (verbose) { 3503 pr_debug("thread_masks[%d]: ", t); 3504 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3505 pr_debug("thread_masks[%d]: ", t); 3506 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3507 } 3508 t++; 3509 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3510 if (ret) { 3511 pr_err("Failed to allocate thread mask\n"); 3512 goto out_free_full_and_cpu_masks; 3513 } 3514 } 3515 rec->nr_threads = t; 3516 pr_debug("nr_threads: %d\n", rec->nr_threads); 3517 if (!rec->nr_threads) 3518 ret = -EINVAL; 3519 3520 out_free: 3521 record__thread_mask_free(&thread_mask); 3522 out_free_full_and_cpu_masks: 3523 record__thread_mask_free(&full_mask); 3524 out_free_cpu_mask: 3525 record__mmap_cpu_mask_free(&cpus_mask); 3526 3527 return ret; 3528 } 3529 3530 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3531 { 3532 int ret; 3533 struct cpu_topology *topo; 3534 3535 topo = cpu_topology__new(); 3536 if (!topo) { 3537 pr_err("Failed to allocate CPU topology\n"); 3538 return -ENOMEM; 3539 } 3540 3541 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3542 topo->core_cpus_list, topo->core_cpus_lists); 3543 cpu_topology__delete(topo); 3544 3545 return ret; 3546 } 3547 3548 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3549 { 3550 int ret; 3551 struct cpu_topology *topo; 3552 3553 topo = cpu_topology__new(); 3554 if (!topo) { 3555 pr_err("Failed to allocate CPU topology\n"); 3556 return -ENOMEM; 3557 } 3558 3559 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3560 topo->package_cpus_list, topo->package_cpus_lists); 3561 cpu_topology__delete(topo); 3562 3563 return ret; 3564 } 3565 3566 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3567 { 3568 u32 s; 3569 int ret; 3570 const char **spec; 3571 struct numa_topology *topo; 3572 3573 topo = numa_topology__new(); 3574 if (!topo) { 3575 pr_err("Failed to allocate NUMA topology\n"); 3576 return -ENOMEM; 3577 } 3578 3579 spec = zalloc(topo->nr * sizeof(char *)); 3580 if (!spec) { 3581 pr_err("Failed to allocate NUMA spec\n"); 3582 ret = -ENOMEM; 3583 goto out_delete_topo; 3584 } 3585 for (s = 0; s < topo->nr; s++) 3586 spec[s] = topo->nodes[s].cpus; 3587 3588 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3589 3590 zfree(&spec); 3591 3592 out_delete_topo: 3593 numa_topology__delete(topo); 3594 3595 return ret; 3596 } 3597 3598 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3599 { 3600 int t, ret; 3601 u32 s, nr_spec = 0; 3602 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3603 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3604 3605 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3606 spec = strtok_r(user_spec, ":", &spec_ptr); 3607 if (spec == NULL) 3608 break; 3609 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3610 mask = strtok_r(spec, "/", &mask_ptr); 3611 if (mask == NULL) 3612 break; 3613 pr_debug2(" maps mask: %s\n", mask); 3614 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3615 if (!tmp_spec) { 3616 pr_err("Failed to reallocate maps spec\n"); 3617 ret = -ENOMEM; 3618 goto out_free; 3619 } 3620 maps_spec = tmp_spec; 3621 maps_spec[nr_spec] = dup_mask = strdup(mask); 3622 if (!maps_spec[nr_spec]) { 3623 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3624 ret = -ENOMEM; 3625 goto out_free; 3626 } 3627 mask = strtok_r(NULL, "/", &mask_ptr); 3628 if (mask == NULL) { 3629 pr_err("Invalid thread maps or affinity specs\n"); 3630 ret = -EINVAL; 3631 goto out_free; 3632 } 3633 pr_debug2(" affinity mask: %s\n", mask); 3634 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3635 if (!tmp_spec) { 3636 pr_err("Failed to reallocate affinity spec\n"); 3637 ret = -ENOMEM; 3638 goto out_free; 3639 } 3640 affinity_spec = tmp_spec; 3641 affinity_spec[nr_spec] = strdup(mask); 3642 if (!affinity_spec[nr_spec]) { 3643 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3644 ret = -ENOMEM; 3645 goto out_free; 3646 } 3647 dup_mask = NULL; 3648 nr_spec++; 3649 } 3650 3651 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3652 (const char **)affinity_spec, nr_spec); 3653 3654 out_free: 3655 free(dup_mask); 3656 for (s = 0; s < nr_spec; s++) { 3657 if (maps_spec) 3658 free(maps_spec[s]); 3659 if (affinity_spec) 3660 free(affinity_spec[s]); 3661 } 3662 free(affinity_spec); 3663 free(maps_spec); 3664 3665 return ret; 3666 } 3667 3668 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3669 { 3670 int ret; 3671 3672 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3673 if (ret) 3674 return ret; 3675 3676 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus); 3677 3678 rec->nr_threads = 1; 3679 3680 return 0; 3681 } 3682 3683 static int record__init_thread_masks(struct record *rec) 3684 { 3685 int ret = 0; 3686 struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus; 3687 3688 if (!record__threads_enabled(rec)) 3689 return record__init_thread_default_masks(rec, cpus); 3690 3691 if (cpu_map__is_dummy(cpus)) { 3692 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); 3693 return -EINVAL; 3694 } 3695 3696 switch (rec->opts.threads_spec) { 3697 case THREAD_SPEC__CPU: 3698 ret = record__init_thread_cpu_masks(rec, cpus); 3699 break; 3700 case THREAD_SPEC__CORE: 3701 ret = record__init_thread_core_masks(rec, cpus); 3702 break; 3703 case THREAD_SPEC__PACKAGE: 3704 ret = record__init_thread_package_masks(rec, cpus); 3705 break; 3706 case THREAD_SPEC__NUMA: 3707 ret = record__init_thread_numa_masks(rec, cpus); 3708 break; 3709 case THREAD_SPEC__USER: 3710 ret = record__init_thread_user_masks(rec, cpus); 3711 break; 3712 default: 3713 break; 3714 } 3715 3716 return ret; 3717 } 3718 3719 int cmd_record(int argc, const char **argv) 3720 { 3721 int err; 3722 struct record *rec = &record; 3723 char errbuf[BUFSIZ]; 3724 3725 setlocale(LC_ALL, ""); 3726 3727 #ifndef HAVE_LIBBPF_SUPPORT 3728 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3729 set_nobuild('\0', "clang-path", true); 3730 set_nobuild('\0', "clang-opt", true); 3731 # undef set_nobuild 3732 #endif 3733 3734 #ifndef HAVE_BPF_PROLOGUE 3735 # if !defined (HAVE_DWARF_SUPPORT) 3736 # define REASON "NO_DWARF=1" 3737 # elif !defined (HAVE_LIBBPF_SUPPORT) 3738 # define REASON "NO_LIBBPF=1" 3739 # else 3740 # define REASON "this architecture doesn't support BPF prologue" 3741 # endif 3742 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3743 set_nobuild('\0', "vmlinux", true); 3744 # undef set_nobuild 3745 # undef REASON 3746 #endif 3747 3748 rec->opts.affinity = PERF_AFFINITY_SYS; 3749 3750 rec->evlist = evlist__new(); 3751 if (rec->evlist == NULL) 3752 return -ENOMEM; 3753 3754 err = perf_config(perf_record_config, rec); 3755 if (err) 3756 return err; 3757 3758 argc = parse_options(argc, argv, record_options, record_usage, 3759 PARSE_OPT_STOP_AT_NON_OPTION); 3760 if (quiet) 3761 perf_quiet_option(); 3762 3763 err = symbol__validate_sym_arguments(); 3764 if (err) 3765 return err; 3766 3767 perf_debuginfod_setup(&record.debuginfod); 3768 3769 /* Make system wide (-a) the default target. */ 3770 if (!argc && target__none(&rec->opts.target)) 3771 rec->opts.target.system_wide = true; 3772 3773 if (nr_cgroups && !rec->opts.target.system_wide) { 3774 usage_with_options_msg(record_usage, record_options, 3775 "cgroup monitoring only available in system-wide mode"); 3776 3777 } 3778 3779 if (rec->buildid_mmap) { 3780 if (!perf_can_record_build_id()) { 3781 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3782 err = -EINVAL; 3783 goto out_opts; 3784 } 3785 pr_debug("Enabling build id in mmap2 events.\n"); 3786 /* Enable mmap build id synthesizing. */ 3787 symbol_conf.buildid_mmap2 = true; 3788 /* Enable perf_event_attr::build_id bit. */ 3789 rec->opts.build_id = true; 3790 /* Disable build id cache. */ 3791 rec->no_buildid = true; 3792 } 3793 3794 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3795 pr_err("Kernel has no cgroup sampling support.\n"); 3796 err = -EINVAL; 3797 goto out_opts; 3798 } 3799 3800 if (rec->opts.kcore || record__threads_enabled(rec)) 3801 rec->data.is_dir = true; 3802 3803 if (record__threads_enabled(rec)) { 3804 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 3805 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 3806 goto out_opts; 3807 } 3808 if (record__aio_enabled(rec)) { 3809 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 3810 goto out_opts; 3811 } 3812 } 3813 3814 if (rec->opts.comp_level != 0) { 3815 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 3816 rec->no_buildid = true; 3817 } 3818 3819 if (rec->opts.record_switch_events && 3820 !perf_can_record_switch_events()) { 3821 ui__error("kernel does not support recording context switch events\n"); 3822 parse_options_usage(record_usage, record_options, "switch-events", 0); 3823 err = -EINVAL; 3824 goto out_opts; 3825 } 3826 3827 if (switch_output_setup(rec)) { 3828 parse_options_usage(record_usage, record_options, "switch-output", 0); 3829 err = -EINVAL; 3830 goto out_opts; 3831 } 3832 3833 if (rec->switch_output.time) { 3834 signal(SIGALRM, alarm_sig_handler); 3835 alarm(rec->switch_output.time); 3836 } 3837 3838 if (rec->switch_output.num_files) { 3839 rec->switch_output.filenames = calloc(sizeof(char *), 3840 rec->switch_output.num_files); 3841 if (!rec->switch_output.filenames) { 3842 err = -EINVAL; 3843 goto out_opts; 3844 } 3845 } 3846 3847 if (rec->timestamp_filename && record__threads_enabled(rec)) { 3848 rec->timestamp_filename = false; 3849 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 3850 } 3851 3852 /* 3853 * Allow aliases to facilitate the lookup of symbols for address 3854 * filters. Refer to auxtrace_parse_filters(). 3855 */ 3856 symbol_conf.allow_aliases = true; 3857 3858 symbol__init(NULL); 3859 3860 err = record__auxtrace_init(rec); 3861 if (err) 3862 goto out; 3863 3864 if (dry_run) 3865 goto out; 3866 3867 err = bpf__setup_stdout(rec->evlist); 3868 if (err) { 3869 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 3870 pr_err("ERROR: Setup BPF stdout failed: %s\n", 3871 errbuf); 3872 goto out; 3873 } 3874 3875 err = -ENOMEM; 3876 3877 if (rec->no_buildid_cache || rec->no_buildid) { 3878 disable_buildid_cache(); 3879 } else if (rec->switch_output.enabled) { 3880 /* 3881 * In 'perf record --switch-output', disable buildid 3882 * generation by default to reduce data file switching 3883 * overhead. Still generate buildid if they are required 3884 * explicitly using 3885 * 3886 * perf record --switch-output --no-no-buildid \ 3887 * --no-no-buildid-cache 3888 * 3889 * Following code equals to: 3890 * 3891 * if ((rec->no_buildid || !rec->no_buildid_set) && 3892 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 3893 * disable_buildid_cache(); 3894 */ 3895 bool disable = true; 3896 3897 if (rec->no_buildid_set && !rec->no_buildid) 3898 disable = false; 3899 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 3900 disable = false; 3901 if (disable) { 3902 rec->no_buildid = true; 3903 rec->no_buildid_cache = true; 3904 disable_buildid_cache(); 3905 } 3906 } 3907 3908 if (record.opts.overwrite) 3909 record.opts.tail_synthesize = true; 3910 3911 if (rec->evlist->core.nr_entries == 0) { 3912 if (perf_pmu__has_hybrid()) { 3913 err = evlist__add_default_hybrid(rec->evlist, 3914 !record.opts.no_samples); 3915 } else { 3916 err = __evlist__add_default(rec->evlist, 3917 !record.opts.no_samples); 3918 } 3919 3920 if (err < 0) { 3921 pr_err("Not enough memory for event selector list\n"); 3922 goto out; 3923 } 3924 } 3925 3926 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 3927 rec->opts.no_inherit = true; 3928 3929 err = target__validate(&rec->opts.target); 3930 if (err) { 3931 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3932 ui__warning("%s\n", errbuf); 3933 } 3934 3935 err = target__parse_uid(&rec->opts.target); 3936 if (err) { 3937 int saved_errno = errno; 3938 3939 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3940 ui__error("%s", errbuf); 3941 3942 err = -saved_errno; 3943 goto out; 3944 } 3945 3946 /* Enable ignoring missing threads when -u/-p option is defined. */ 3947 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 3948 3949 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 3950 pr_err("failed to use cpu list %s\n", 3951 rec->opts.target.cpu_list); 3952 goto out; 3953 } 3954 3955 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 3956 3957 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 3958 arch__add_leaf_frame_record_opts(&rec->opts); 3959 3960 err = -ENOMEM; 3961 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 3962 usage_with_options(record_usage, record_options); 3963 3964 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 3965 if (err) 3966 goto out; 3967 3968 /* 3969 * We take all buildids when the file contains 3970 * AUX area tracing data because we do not decode the 3971 * trace because it would take too long. 3972 */ 3973 if (rec->opts.full_auxtrace) 3974 rec->buildid_all = true; 3975 3976 if (rec->opts.text_poke) { 3977 err = record__config_text_poke(rec->evlist); 3978 if (err) { 3979 pr_err("record__config_text_poke failed, error %d\n", err); 3980 goto out; 3981 } 3982 } 3983 3984 if (record_opts__config(&rec->opts)) { 3985 err = -EINVAL; 3986 goto out; 3987 } 3988 3989 err = record__init_thread_masks(rec); 3990 if (err) { 3991 pr_err("Failed to initialize parallel data streaming masks\n"); 3992 goto out; 3993 } 3994 3995 if (rec->opts.nr_cblocks > nr_cblocks_max) 3996 rec->opts.nr_cblocks = nr_cblocks_max; 3997 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 3998 3999 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 4000 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 4001 4002 if (rec->opts.comp_level > comp_level_max) 4003 rec->opts.comp_level = comp_level_max; 4004 pr_debug("comp level: %d\n", rec->opts.comp_level); 4005 4006 err = __cmd_record(&record, argc, argv); 4007 out: 4008 evlist__delete(rec->evlist); 4009 symbol__exit(); 4010 auxtrace_record__free(rec->itr); 4011 out_opts: 4012 record__free_thread_masks(rec, rec->nr_threads); 4013 rec->nr_threads = 0; 4014 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4015 return err; 4016 } 4017 4018 static void snapshot_sig_handler(int sig __maybe_unused) 4019 { 4020 struct record *rec = &record; 4021 4022 hit_auxtrace_snapshot_trigger(rec); 4023 4024 if (switch_output_signal(rec)) 4025 trigger_hit(&switch_output_trigger); 4026 } 4027 4028 static void alarm_sig_handler(int sig __maybe_unused) 4029 { 4030 struct record *rec = &record; 4031 4032 if (switch_output_time(rec)) 4033 trigger_hit(&switch_output_trigger); 4034 } 4035