1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * builtin-record.c 4 * 5 * Builtin record command: Record the profile of a workload 6 * (or a CPU, or a PID) into the perf.data output file - for 7 * later analysis via perf report. 8 */ 9 #include "builtin.h" 10 11 #include "util/build-id.h" 12 #include <subcmd/parse-options.h> 13 #include "util/parse-events.h" 14 #include "util/config.h" 15 16 #include "util/callchain.h" 17 #include "util/cgroup.h" 18 #include "util/header.h" 19 #include "util/event.h" 20 #include "util/evlist.h" 21 #include "util/evsel.h" 22 #include "util/debug.h" 23 #include "util/mmap.h" 24 #include "util/target.h" 25 #include "util/session.h" 26 #include "util/tool.h" 27 #include "util/symbol.h" 28 #include "util/record.h" 29 #include "util/cpumap.h" 30 #include "util/thread_map.h" 31 #include "util/data.h" 32 #include "util/perf_regs.h" 33 #include "util/auxtrace.h" 34 #include "util/tsc.h" 35 #include "util/parse-branch-options.h" 36 #include "util/parse-regs-options.h" 37 #include "util/perf_api_probe.h" 38 #include "util/llvm-utils.h" 39 #include "util/bpf-loader.h" 40 #include "util/trigger.h" 41 #include "util/perf-hooks.h" 42 #include "util/cpu-set-sched.h" 43 #include "util/synthetic-events.h" 44 #include "util/time-utils.h" 45 #include "util/units.h" 46 #include "util/bpf-event.h" 47 #include "util/util.h" 48 #include "util/pfm.h" 49 #include "util/clockid.h" 50 #include "util/pmu-hybrid.h" 51 #include "util/evlist-hybrid.h" 52 #include "asm/bug.h" 53 #include "perf.h" 54 #include "cputopo.h" 55 56 #include <errno.h> 57 #include <inttypes.h> 58 #include <locale.h> 59 #include <poll.h> 60 #include <pthread.h> 61 #include <unistd.h> 62 #ifndef HAVE_GETTID 63 #include <syscall.h> 64 #endif 65 #include <sched.h> 66 #include <signal.h> 67 #ifdef HAVE_EVENTFD_SUPPORT 68 #include <sys/eventfd.h> 69 #endif 70 #include <sys/mman.h> 71 #include <sys/wait.h> 72 #include <sys/types.h> 73 #include <sys/stat.h> 74 #include <fcntl.h> 75 #include <linux/err.h> 76 #include <linux/string.h> 77 #include <linux/time64.h> 78 #include <linux/zalloc.h> 79 #include <linux/bitmap.h> 80 #include <sys/time.h> 81 82 struct switch_output { 83 bool enabled; 84 bool signal; 85 unsigned long size; 86 unsigned long time; 87 const char *str; 88 bool set; 89 char **filenames; 90 int num_files; 91 int cur_file; 92 }; 93 94 struct thread_mask { 95 struct mmap_cpu_mask maps; 96 struct mmap_cpu_mask affinity; 97 }; 98 99 struct record_thread { 100 pid_t tid; 101 struct thread_mask *mask; 102 struct { 103 int msg[2]; 104 int ack[2]; 105 } pipes; 106 struct fdarray pollfd; 107 int ctlfd_pos; 108 int nr_mmaps; 109 struct mmap **maps; 110 struct mmap **overwrite_maps; 111 struct record *rec; 112 unsigned long long samples; 113 unsigned long waking; 114 u64 bytes_written; 115 u64 bytes_transferred; 116 u64 bytes_compressed; 117 }; 118 119 static __thread struct record_thread *thread; 120 121 enum thread_msg { 122 THREAD_MSG__UNDEFINED = 0, 123 THREAD_MSG__READY, 124 THREAD_MSG__MAX, 125 }; 126 127 static const char *thread_msg_tags[THREAD_MSG__MAX] = { 128 "UNDEFINED", "READY" 129 }; 130 131 enum thread_spec { 132 THREAD_SPEC__UNDEFINED = 0, 133 THREAD_SPEC__CPU, 134 THREAD_SPEC__CORE, 135 THREAD_SPEC__PACKAGE, 136 THREAD_SPEC__NUMA, 137 THREAD_SPEC__USER, 138 THREAD_SPEC__MAX, 139 }; 140 141 static const char *thread_spec_tags[THREAD_SPEC__MAX] = { 142 "undefined", "cpu", "core", "package", "numa", "user" 143 }; 144 145 struct record { 146 struct perf_tool tool; 147 struct record_opts opts; 148 u64 bytes_written; 149 struct perf_data data; 150 struct auxtrace_record *itr; 151 struct evlist *evlist; 152 struct perf_session *session; 153 struct evlist *sb_evlist; 154 pthread_t thread_id; 155 int realtime_prio; 156 bool switch_output_event_set; 157 bool no_buildid; 158 bool no_buildid_set; 159 bool no_buildid_cache; 160 bool no_buildid_cache_set; 161 bool buildid_all; 162 bool buildid_mmap; 163 bool timestamp_filename; 164 bool timestamp_boundary; 165 struct switch_output switch_output; 166 unsigned long long samples; 167 unsigned long output_max_size; /* = 0: unlimited */ 168 struct perf_debuginfod debuginfod; 169 int nr_threads; 170 struct thread_mask *thread_masks; 171 struct record_thread *thread_data; 172 }; 173 174 static volatile int done; 175 176 static volatile int auxtrace_record__snapshot_started; 177 static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 178 static DEFINE_TRIGGER(switch_output_trigger); 179 180 static const char *affinity_tags[PERF_AFFINITY_MAX] = { 181 "SYS", "NODE", "CPU" 182 }; 183 184 #ifndef HAVE_GETTID 185 static inline pid_t gettid(void) 186 { 187 return (pid_t)syscall(__NR_gettid); 188 } 189 #endif 190 191 static int record__threads_enabled(struct record *rec) 192 { 193 return rec->opts.threads_spec; 194 } 195 196 static bool switch_output_signal(struct record *rec) 197 { 198 return rec->switch_output.signal && 199 trigger_is_ready(&switch_output_trigger); 200 } 201 202 static bool switch_output_size(struct record *rec) 203 { 204 return rec->switch_output.size && 205 trigger_is_ready(&switch_output_trigger) && 206 (rec->bytes_written >= rec->switch_output.size); 207 } 208 209 static bool switch_output_time(struct record *rec) 210 { 211 return rec->switch_output.time && 212 trigger_is_ready(&switch_output_trigger); 213 } 214 215 static u64 record__bytes_written(struct record *rec) 216 { 217 int t; 218 u64 bytes_written = rec->bytes_written; 219 struct record_thread *thread_data = rec->thread_data; 220 221 for (t = 0; t < rec->nr_threads; t++) 222 bytes_written += thread_data[t].bytes_written; 223 224 return bytes_written; 225 } 226 227 static bool record__output_max_size_exceeded(struct record *rec) 228 { 229 return rec->output_max_size && 230 (record__bytes_written(rec) >= rec->output_max_size); 231 } 232 233 static int record__write(struct record *rec, struct mmap *map __maybe_unused, 234 void *bf, size_t size) 235 { 236 struct perf_data_file *file = &rec->session->data->file; 237 238 if (map && map->file) 239 file = map->file; 240 241 if (perf_data_file__write(file, bf, size) < 0) { 242 pr_err("failed to write perf data, error: %m\n"); 243 return -1; 244 } 245 246 if (map && map->file) 247 thread->bytes_written += size; 248 else 249 rec->bytes_written += size; 250 251 if (record__output_max_size_exceeded(rec) && !done) { 252 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," 253 " stopping session ]\n", 254 record__bytes_written(rec) >> 10); 255 done = 1; 256 } 257 258 if (switch_output_size(rec)) 259 trigger_hit(&switch_output_trigger); 260 261 return 0; 262 } 263 264 static int record__aio_enabled(struct record *rec); 265 static int record__comp_enabled(struct record *rec); 266 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 267 void *dst, size_t dst_size, void *src, size_t src_size); 268 269 #ifdef HAVE_AIO_SUPPORT 270 static int record__aio_write(struct aiocb *cblock, int trace_fd, 271 void *buf, size_t size, off_t off) 272 { 273 int rc; 274 275 cblock->aio_fildes = trace_fd; 276 cblock->aio_buf = buf; 277 cblock->aio_nbytes = size; 278 cblock->aio_offset = off; 279 cblock->aio_sigevent.sigev_notify = SIGEV_NONE; 280 281 do { 282 rc = aio_write(cblock); 283 if (rc == 0) { 284 break; 285 } else if (errno != EAGAIN) { 286 cblock->aio_fildes = -1; 287 pr_err("failed to queue perf data, error: %m\n"); 288 break; 289 } 290 } while (1); 291 292 return rc; 293 } 294 295 static int record__aio_complete(struct mmap *md, struct aiocb *cblock) 296 { 297 void *rem_buf; 298 off_t rem_off; 299 size_t rem_size; 300 int rc, aio_errno; 301 ssize_t aio_ret, written; 302 303 aio_errno = aio_error(cblock); 304 if (aio_errno == EINPROGRESS) 305 return 0; 306 307 written = aio_ret = aio_return(cblock); 308 if (aio_ret < 0) { 309 if (aio_errno != EINTR) 310 pr_err("failed to write perf data, error: %m\n"); 311 written = 0; 312 } 313 314 rem_size = cblock->aio_nbytes - written; 315 316 if (rem_size == 0) { 317 cblock->aio_fildes = -1; 318 /* 319 * md->refcount is incremented in record__aio_pushfn() for 320 * every aio write request started in record__aio_push() so 321 * decrement it because the request is now complete. 322 */ 323 perf_mmap__put(&md->core); 324 rc = 1; 325 } else { 326 /* 327 * aio write request may require restart with the 328 * reminder if the kernel didn't write whole 329 * chunk at once. 330 */ 331 rem_off = cblock->aio_offset + written; 332 rem_buf = (void *)(cblock->aio_buf + written); 333 record__aio_write(cblock, cblock->aio_fildes, 334 rem_buf, rem_size, rem_off); 335 rc = 0; 336 } 337 338 return rc; 339 } 340 341 static int record__aio_sync(struct mmap *md, bool sync_all) 342 { 343 struct aiocb **aiocb = md->aio.aiocb; 344 struct aiocb *cblocks = md->aio.cblocks; 345 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */ 346 int i, do_suspend; 347 348 do { 349 do_suspend = 0; 350 for (i = 0; i < md->aio.nr_cblocks; ++i) { 351 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) { 352 if (sync_all) 353 aiocb[i] = NULL; 354 else 355 return i; 356 } else { 357 /* 358 * Started aio write is not complete yet 359 * so it has to be waited before the 360 * next allocation. 361 */ 362 aiocb[i] = &cblocks[i]; 363 do_suspend = 1; 364 } 365 } 366 if (!do_suspend) 367 return -1; 368 369 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) { 370 if (!(errno == EAGAIN || errno == EINTR)) 371 pr_err("failed to sync perf data, error: %m\n"); 372 } 373 } while (1); 374 } 375 376 struct record_aio { 377 struct record *rec; 378 void *data; 379 size_t size; 380 }; 381 382 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size) 383 { 384 struct record_aio *aio = to; 385 386 /* 387 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer 388 * to release space in the kernel buffer as fast as possible, calling 389 * perf_mmap__consume() from perf_mmap__push() function. 390 * 391 * That lets the kernel to proceed with storing more profiling data into 392 * the kernel buffer earlier than other per-cpu kernel buffers are handled. 393 * 394 * Coping can be done in two steps in case the chunk of profiling data 395 * crosses the upper bound of the kernel buffer. In this case we first move 396 * part of data from map->start till the upper bound and then the reminder 397 * from the beginning of the kernel buffer till the end of the data chunk. 398 */ 399 400 if (record__comp_enabled(aio->rec)) { 401 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, 402 mmap__mmap_len(map) - aio->size, 403 buf, size); 404 } else { 405 memcpy(aio->data + aio->size, buf, size); 406 } 407 408 if (!aio->size) { 409 /* 410 * Increment map->refcount to guard map->aio.data[] buffer 411 * from premature deallocation because map object can be 412 * released earlier than aio write request started on 413 * map->aio.data[] buffer is complete. 414 * 415 * perf_mmap__put() is done at record__aio_complete() 416 * after started aio request completion or at record__aio_push() 417 * if the request failed to start. 418 */ 419 perf_mmap__get(&map->core); 420 } 421 422 aio->size += size; 423 424 return size; 425 } 426 427 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) 428 { 429 int ret, idx; 430 int trace_fd = rec->session->data->file.fd; 431 struct record_aio aio = { .rec = rec, .size = 0 }; 432 433 /* 434 * Call record__aio_sync() to wait till map->aio.data[] buffer 435 * becomes available after previous aio write operation. 436 */ 437 438 idx = record__aio_sync(map, false); 439 aio.data = map->aio.data[idx]; 440 ret = perf_mmap__push(map, &aio, record__aio_pushfn); 441 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ 442 return ret; 443 444 rec->samples++; 445 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); 446 if (!ret) { 447 *off += aio.size; 448 rec->bytes_written += aio.size; 449 if (switch_output_size(rec)) 450 trigger_hit(&switch_output_trigger); 451 } else { 452 /* 453 * Decrement map->refcount incremented in record__aio_pushfn() 454 * back if record__aio_write() operation failed to start, otherwise 455 * map->refcount is decremented in record__aio_complete() after 456 * aio write operation finishes successfully. 457 */ 458 perf_mmap__put(&map->core); 459 } 460 461 return ret; 462 } 463 464 static off_t record__aio_get_pos(int trace_fd) 465 { 466 return lseek(trace_fd, 0, SEEK_CUR); 467 } 468 469 static void record__aio_set_pos(int trace_fd, off_t pos) 470 { 471 lseek(trace_fd, pos, SEEK_SET); 472 } 473 474 static void record__aio_mmap_read_sync(struct record *rec) 475 { 476 int i; 477 struct evlist *evlist = rec->evlist; 478 struct mmap *maps = evlist->mmap; 479 480 if (!record__aio_enabled(rec)) 481 return; 482 483 for (i = 0; i < evlist->core.nr_mmaps; i++) { 484 struct mmap *map = &maps[i]; 485 486 if (map->core.base) 487 record__aio_sync(map, true); 488 } 489 } 490 491 static int nr_cblocks_default = 1; 492 static int nr_cblocks_max = 4; 493 494 static int record__aio_parse(const struct option *opt, 495 const char *str, 496 int unset) 497 { 498 struct record_opts *opts = (struct record_opts *)opt->value; 499 500 if (unset) { 501 opts->nr_cblocks = 0; 502 } else { 503 if (str) 504 opts->nr_cblocks = strtol(str, NULL, 0); 505 if (!opts->nr_cblocks) 506 opts->nr_cblocks = nr_cblocks_default; 507 } 508 509 return 0; 510 } 511 #else /* HAVE_AIO_SUPPORT */ 512 static int nr_cblocks_max = 0; 513 514 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused, 515 off_t *off __maybe_unused) 516 { 517 return -1; 518 } 519 520 static off_t record__aio_get_pos(int trace_fd __maybe_unused) 521 { 522 return -1; 523 } 524 525 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused) 526 { 527 } 528 529 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused) 530 { 531 } 532 #endif 533 534 static int record__aio_enabled(struct record *rec) 535 { 536 return rec->opts.nr_cblocks > 0; 537 } 538 539 #define MMAP_FLUSH_DEFAULT 1 540 static int record__mmap_flush_parse(const struct option *opt, 541 const char *str, 542 int unset) 543 { 544 int flush_max; 545 struct record_opts *opts = (struct record_opts *)opt->value; 546 static struct parse_tag tags[] = { 547 { .tag = 'B', .mult = 1 }, 548 { .tag = 'K', .mult = 1 << 10 }, 549 { .tag = 'M', .mult = 1 << 20 }, 550 { .tag = 'G', .mult = 1 << 30 }, 551 { .tag = 0 }, 552 }; 553 554 if (unset) 555 return 0; 556 557 if (str) { 558 opts->mmap_flush = parse_tag_value(str, tags); 559 if (opts->mmap_flush == (int)-1) 560 opts->mmap_flush = strtol(str, NULL, 0); 561 } 562 563 if (!opts->mmap_flush) 564 opts->mmap_flush = MMAP_FLUSH_DEFAULT; 565 566 flush_max = evlist__mmap_size(opts->mmap_pages); 567 flush_max /= 4; 568 if (opts->mmap_flush > flush_max) 569 opts->mmap_flush = flush_max; 570 571 return 0; 572 } 573 574 #ifdef HAVE_ZSTD_SUPPORT 575 static unsigned int comp_level_default = 1; 576 577 static int record__parse_comp_level(const struct option *opt, const char *str, int unset) 578 { 579 struct record_opts *opts = opt->value; 580 581 if (unset) { 582 opts->comp_level = 0; 583 } else { 584 if (str) 585 opts->comp_level = strtol(str, NULL, 0); 586 if (!opts->comp_level) 587 opts->comp_level = comp_level_default; 588 } 589 590 return 0; 591 } 592 #endif 593 static unsigned int comp_level_max = 22; 594 595 static int record__comp_enabled(struct record *rec) 596 { 597 return rec->opts.comp_level > 0; 598 } 599 600 static int process_synthesized_event(struct perf_tool *tool, 601 union perf_event *event, 602 struct perf_sample *sample __maybe_unused, 603 struct machine *machine __maybe_unused) 604 { 605 struct record *rec = container_of(tool, struct record, tool); 606 return record__write(rec, NULL, event, event->header.size); 607 } 608 609 static int process_locked_synthesized_event(struct perf_tool *tool, 610 union perf_event *event, 611 struct perf_sample *sample __maybe_unused, 612 struct machine *machine __maybe_unused) 613 { 614 static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; 615 int ret; 616 617 pthread_mutex_lock(&synth_lock); 618 ret = process_synthesized_event(tool, event, sample, machine); 619 pthread_mutex_unlock(&synth_lock); 620 return ret; 621 } 622 623 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) 624 { 625 struct record *rec = to; 626 627 if (record__comp_enabled(rec)) { 628 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); 629 bf = map->data; 630 } 631 632 thread->samples++; 633 return record__write(rec, map, bf, size); 634 } 635 636 static volatile int signr = -1; 637 static volatile int child_finished; 638 #ifdef HAVE_EVENTFD_SUPPORT 639 static int done_fd = -1; 640 #endif 641 642 static void sig_handler(int sig) 643 { 644 if (sig == SIGCHLD) 645 child_finished = 1; 646 else 647 signr = sig; 648 649 done = 1; 650 #ifdef HAVE_EVENTFD_SUPPORT 651 { 652 u64 tmp = 1; 653 /* 654 * It is possible for this signal handler to run after done is checked 655 * in the main loop, but before the perf counter fds are polled. If this 656 * happens, the poll() will continue to wait even though done is set, 657 * and will only break out if either another signal is received, or the 658 * counters are ready for read. To ensure the poll() doesn't sleep when 659 * done is set, use an eventfd (done_fd) to wake up the poll(). 660 */ 661 if (write(done_fd, &tmp, sizeof(tmp)) < 0) 662 pr_err("failed to signal wakeup fd, error: %m\n"); 663 } 664 #endif // HAVE_EVENTFD_SUPPORT 665 } 666 667 static void sigsegv_handler(int sig) 668 { 669 perf_hooks__recover(); 670 sighandler_dump_stack(sig); 671 } 672 673 static void record__sig_exit(void) 674 { 675 if (signr == -1) 676 return; 677 678 signal(signr, SIG_DFL); 679 raise(signr); 680 } 681 682 #ifdef HAVE_AUXTRACE_SUPPORT 683 684 static int record__process_auxtrace(struct perf_tool *tool, 685 struct mmap *map, 686 union perf_event *event, void *data1, 687 size_t len1, void *data2, size_t len2) 688 { 689 struct record *rec = container_of(tool, struct record, tool); 690 struct perf_data *data = &rec->data; 691 size_t padding; 692 u8 pad[8] = {0}; 693 694 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) { 695 off_t file_offset; 696 int fd = perf_data__fd(data); 697 int err; 698 699 file_offset = lseek(fd, 0, SEEK_CUR); 700 if (file_offset == -1) 701 return -1; 702 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, 703 event, file_offset); 704 if (err) 705 return err; 706 } 707 708 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ 709 padding = (len1 + len2) & 7; 710 if (padding) 711 padding = 8 - padding; 712 713 record__write(rec, map, event, event->header.size); 714 record__write(rec, map, data1, len1); 715 if (len2) 716 record__write(rec, map, data2, len2); 717 record__write(rec, map, &pad, padding); 718 719 return 0; 720 } 721 722 static int record__auxtrace_mmap_read(struct record *rec, 723 struct mmap *map) 724 { 725 int ret; 726 727 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, 728 record__process_auxtrace); 729 if (ret < 0) 730 return ret; 731 732 if (ret) 733 rec->samples++; 734 735 return 0; 736 } 737 738 static int record__auxtrace_mmap_read_snapshot(struct record *rec, 739 struct mmap *map) 740 { 741 int ret; 742 743 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, 744 record__process_auxtrace, 745 rec->opts.auxtrace_snapshot_size); 746 if (ret < 0) 747 return ret; 748 749 if (ret) 750 rec->samples++; 751 752 return 0; 753 } 754 755 static int record__auxtrace_read_snapshot_all(struct record *rec) 756 { 757 int i; 758 int rc = 0; 759 760 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) { 761 struct mmap *map = &rec->evlist->mmap[i]; 762 763 if (!map->auxtrace_mmap.base) 764 continue; 765 766 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { 767 rc = -1; 768 goto out; 769 } 770 } 771 out: 772 return rc; 773 } 774 775 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit) 776 { 777 pr_debug("Recording AUX area tracing snapshot\n"); 778 if (record__auxtrace_read_snapshot_all(rec) < 0) { 779 trigger_error(&auxtrace_snapshot_trigger); 780 } else { 781 if (auxtrace_record__snapshot_finish(rec->itr, on_exit)) 782 trigger_error(&auxtrace_snapshot_trigger); 783 else 784 trigger_ready(&auxtrace_snapshot_trigger); 785 } 786 } 787 788 static int record__auxtrace_snapshot_exit(struct record *rec) 789 { 790 if (trigger_is_error(&auxtrace_snapshot_trigger)) 791 return 0; 792 793 if (!auxtrace_record__snapshot_started && 794 auxtrace_record__snapshot_start(rec->itr)) 795 return -1; 796 797 record__read_auxtrace_snapshot(rec, true); 798 if (trigger_is_error(&auxtrace_snapshot_trigger)) 799 return -1; 800 801 return 0; 802 } 803 804 static int record__auxtrace_init(struct record *rec) 805 { 806 int err; 807 808 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts) 809 && record__threads_enabled(rec)) { 810 pr_err("AUX area tracing options are not available in parallel streaming mode.\n"); 811 return -EINVAL; 812 } 813 814 if (!rec->itr) { 815 rec->itr = auxtrace_record__init(rec->evlist, &err); 816 if (err) 817 return err; 818 } 819 820 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, 821 rec->opts.auxtrace_snapshot_opts); 822 if (err) 823 return err; 824 825 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts, 826 rec->opts.auxtrace_sample_opts); 827 if (err) 828 return err; 829 830 auxtrace_regroup_aux_output(rec->evlist); 831 832 return auxtrace_parse_filters(rec->evlist); 833 } 834 835 #else 836 837 static inline 838 int record__auxtrace_mmap_read(struct record *rec __maybe_unused, 839 struct mmap *map __maybe_unused) 840 { 841 return 0; 842 } 843 844 static inline 845 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused, 846 bool on_exit __maybe_unused) 847 { 848 } 849 850 static inline 851 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) 852 { 853 return 0; 854 } 855 856 static inline 857 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused) 858 { 859 return 0; 860 } 861 862 static int record__auxtrace_init(struct record *rec __maybe_unused) 863 { 864 return 0; 865 } 866 867 #endif 868 869 static int record__config_text_poke(struct evlist *evlist) 870 { 871 struct evsel *evsel; 872 int err; 873 874 /* Nothing to do if text poke is already configured */ 875 evlist__for_each_entry(evlist, evsel) { 876 if (evsel->core.attr.text_poke) 877 return 0; 878 } 879 880 err = parse_events(evlist, "dummy:u", NULL); 881 if (err) 882 return err; 883 884 evsel = evlist__last(evlist); 885 886 evsel->core.attr.freq = 0; 887 evsel->core.attr.sample_period = 1; 888 evsel->core.attr.text_poke = 1; 889 evsel->core.attr.ksymbol = 1; 890 891 evsel->core.system_wide = true; 892 evsel->no_aux_samples = true; 893 evsel->immediate = true; 894 895 /* Text poke must be collected on all CPUs */ 896 perf_cpu_map__put(evsel->core.own_cpus); 897 evsel->core.own_cpus = perf_cpu_map__new(NULL); 898 perf_cpu_map__put(evsel->core.cpus); 899 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 900 901 evsel__set_sample_bit(evsel, TIME); 902 903 return 0; 904 } 905 906 static bool record__kcore_readable(struct machine *machine) 907 { 908 char kcore[PATH_MAX]; 909 int fd; 910 911 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir); 912 913 fd = open(kcore, O_RDONLY); 914 if (fd < 0) 915 return false; 916 917 close(fd); 918 919 return true; 920 } 921 922 static int record__kcore_copy(struct machine *machine, struct perf_data *data) 923 { 924 char from_dir[PATH_MAX]; 925 char kcore_dir[PATH_MAX]; 926 int ret; 927 928 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir); 929 930 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir)); 931 if (ret) 932 return ret; 933 934 return kcore_copy(from_dir, kcore_dir); 935 } 936 937 static void record__thread_data_init_pipes(struct record_thread *thread_data) 938 { 939 thread_data->pipes.msg[0] = -1; 940 thread_data->pipes.msg[1] = -1; 941 thread_data->pipes.ack[0] = -1; 942 thread_data->pipes.ack[1] = -1; 943 } 944 945 static int record__thread_data_open_pipes(struct record_thread *thread_data) 946 { 947 if (pipe(thread_data->pipes.msg)) 948 return -EINVAL; 949 950 if (pipe(thread_data->pipes.ack)) { 951 close(thread_data->pipes.msg[0]); 952 thread_data->pipes.msg[0] = -1; 953 close(thread_data->pipes.msg[1]); 954 thread_data->pipes.msg[1] = -1; 955 return -EINVAL; 956 } 957 958 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data, 959 thread_data->pipes.msg[0], thread_data->pipes.msg[1], 960 thread_data->pipes.ack[0], thread_data->pipes.ack[1]); 961 962 return 0; 963 } 964 965 static void record__thread_data_close_pipes(struct record_thread *thread_data) 966 { 967 if (thread_data->pipes.msg[0] != -1) { 968 close(thread_data->pipes.msg[0]); 969 thread_data->pipes.msg[0] = -1; 970 } 971 if (thread_data->pipes.msg[1] != -1) { 972 close(thread_data->pipes.msg[1]); 973 thread_data->pipes.msg[1] = -1; 974 } 975 if (thread_data->pipes.ack[0] != -1) { 976 close(thread_data->pipes.ack[0]); 977 thread_data->pipes.ack[0] = -1; 978 } 979 if (thread_data->pipes.ack[1] != -1) { 980 close(thread_data->pipes.ack[1]); 981 thread_data->pipes.ack[1] = -1; 982 } 983 } 984 985 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) 986 { 987 int m, tm, nr_mmaps = evlist->core.nr_mmaps; 988 struct mmap *mmap = evlist->mmap; 989 struct mmap *overwrite_mmap = evlist->overwrite_mmap; 990 struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 991 992 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 993 thread_data->mask->maps.nbits); 994 if (mmap) { 995 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 996 if (!thread_data->maps) 997 return -ENOMEM; 998 } 999 if (overwrite_mmap) { 1000 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *)); 1001 if (!thread_data->overwrite_maps) { 1002 zfree(&thread_data->maps); 1003 return -ENOMEM; 1004 } 1005 } 1006 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data, 1007 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); 1008 1009 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { 1010 if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) { 1011 if (thread_data->maps) { 1012 thread_data->maps[tm] = &mmap[m]; 1013 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n", 1014 thread_data, cpus->map[m].cpu, tm, m); 1015 } 1016 if (thread_data->overwrite_maps) { 1017 thread_data->overwrite_maps[tm] = &overwrite_mmap[m]; 1018 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n", 1019 thread_data, cpus->map[m].cpu, tm, m); 1020 } 1021 tm++; 1022 } 1023 } 1024 1025 return 0; 1026 } 1027 1028 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist) 1029 { 1030 int f, tm, pos; 1031 struct mmap *map, *overwrite_map; 1032 1033 fdarray__init(&thread_data->pollfd, 64); 1034 1035 for (tm = 0; tm < thread_data->nr_mmaps; tm++) { 1036 map = thread_data->maps ? thread_data->maps[tm] : NULL; 1037 overwrite_map = thread_data->overwrite_maps ? 1038 thread_data->overwrite_maps[tm] : NULL; 1039 1040 for (f = 0; f < evlist->core.pollfd.nr; f++) { 1041 void *ptr = evlist->core.pollfd.priv[f].ptr; 1042 1043 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) { 1044 pos = fdarray__dup_entry_from(&thread_data->pollfd, f, 1045 &evlist->core.pollfd); 1046 if (pos < 0) 1047 return pos; 1048 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n", 1049 thread_data, pos, evlist->core.pollfd.entries[f].fd); 1050 } 1051 } 1052 } 1053 1054 return 0; 1055 } 1056 1057 static void record__free_thread_data(struct record *rec) 1058 { 1059 int t; 1060 struct record_thread *thread_data = rec->thread_data; 1061 1062 if (thread_data == NULL) 1063 return; 1064 1065 for (t = 0; t < rec->nr_threads; t++) { 1066 record__thread_data_close_pipes(&thread_data[t]); 1067 zfree(&thread_data[t].maps); 1068 zfree(&thread_data[t].overwrite_maps); 1069 fdarray__exit(&thread_data[t].pollfd); 1070 } 1071 1072 zfree(&rec->thread_data); 1073 } 1074 1075 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist) 1076 { 1077 int t, ret; 1078 struct record_thread *thread_data; 1079 1080 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data))); 1081 if (!rec->thread_data) { 1082 pr_err("Failed to allocate thread data\n"); 1083 return -ENOMEM; 1084 } 1085 thread_data = rec->thread_data; 1086 1087 for (t = 0; t < rec->nr_threads; t++) 1088 record__thread_data_init_pipes(&thread_data[t]); 1089 1090 for (t = 0; t < rec->nr_threads; t++) { 1091 thread_data[t].rec = rec; 1092 thread_data[t].mask = &rec->thread_masks[t]; 1093 ret = record__thread_data_init_maps(&thread_data[t], evlist); 1094 if (ret) { 1095 pr_err("Failed to initialize thread[%d] maps\n", t); 1096 goto out_free; 1097 } 1098 ret = record__thread_data_init_pollfd(&thread_data[t], evlist); 1099 if (ret) { 1100 pr_err("Failed to initialize thread[%d] pollfd\n", t); 1101 goto out_free; 1102 } 1103 if (t) { 1104 thread_data[t].tid = -1; 1105 ret = record__thread_data_open_pipes(&thread_data[t]); 1106 if (ret) { 1107 pr_err("Failed to open thread[%d] communication pipes\n", t); 1108 goto out_free; 1109 } 1110 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0], 1111 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable); 1112 if (ret < 0) { 1113 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t); 1114 goto out_free; 1115 } 1116 thread_data[t].ctlfd_pos = ret; 1117 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1118 thread_data, thread_data[t].ctlfd_pos, 1119 thread_data[t].pipes.msg[0]); 1120 } else { 1121 thread_data[t].tid = gettid(); 1122 if (evlist->ctl_fd.pos == -1) 1123 continue; 1124 ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos, 1125 &evlist->core.pollfd); 1126 if (ret < 0) { 1127 pr_err("Failed to duplicate descriptor in main thread pollfd\n"); 1128 goto out_free; 1129 } 1130 thread_data[t].ctlfd_pos = ret; 1131 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n", 1132 thread_data, thread_data[t].ctlfd_pos, 1133 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd); 1134 } 1135 } 1136 1137 return 0; 1138 1139 out_free: 1140 record__free_thread_data(rec); 1141 1142 return ret; 1143 } 1144 1145 static int record__mmap_evlist(struct record *rec, 1146 struct evlist *evlist) 1147 { 1148 int i, ret; 1149 struct record_opts *opts = &rec->opts; 1150 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode || 1151 opts->auxtrace_sample_mode; 1152 char msg[512]; 1153 1154 if (opts->affinity != PERF_AFFINITY_SYS) 1155 cpu__setup_cpunode_map(); 1156 1157 if (evlist__mmap_ex(evlist, opts->mmap_pages, 1158 opts->auxtrace_mmap_pages, 1159 auxtrace_overwrite, 1160 opts->nr_cblocks, opts->affinity, 1161 opts->mmap_flush, opts->comp_level) < 0) { 1162 if (errno == EPERM) { 1163 pr_err("Permission error mapping pages.\n" 1164 "Consider increasing " 1165 "/proc/sys/kernel/perf_event_mlock_kb,\n" 1166 "or try again with a smaller value of -m/--mmap_pages.\n" 1167 "(current value: %u,%u)\n", 1168 opts->mmap_pages, opts->auxtrace_mmap_pages); 1169 return -errno; 1170 } else { 1171 pr_err("failed to mmap with %d (%s)\n", errno, 1172 str_error_r(errno, msg, sizeof(msg))); 1173 if (errno) 1174 return -errno; 1175 else 1176 return -EINVAL; 1177 } 1178 } 1179 1180 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack)) 1181 return -1; 1182 1183 ret = record__alloc_thread_data(rec, evlist); 1184 if (ret) 1185 return ret; 1186 1187 if (record__threads_enabled(rec)) { 1188 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps); 1189 if (ret) { 1190 pr_err("Failed to create data directory: %s\n", strerror(-ret)); 1191 return ret; 1192 } 1193 for (i = 0; i < evlist->core.nr_mmaps; i++) { 1194 if (evlist->mmap) 1195 evlist->mmap[i].file = &rec->data.dir.files[i]; 1196 if (evlist->overwrite_mmap) 1197 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i]; 1198 } 1199 } 1200 1201 return 0; 1202 } 1203 1204 static int record__mmap(struct record *rec) 1205 { 1206 return record__mmap_evlist(rec, rec->evlist); 1207 } 1208 1209 static int record__open(struct record *rec) 1210 { 1211 char msg[BUFSIZ]; 1212 struct evsel *pos; 1213 struct evlist *evlist = rec->evlist; 1214 struct perf_session *session = rec->session; 1215 struct record_opts *opts = &rec->opts; 1216 int rc = 0; 1217 1218 /* 1219 * For initial_delay, system wide or a hybrid system, we need to add a 1220 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay 1221 * of waiting or event synthesis. 1222 */ 1223 if (opts->initial_delay || target__has_cpu(&opts->target) || 1224 perf_pmu__has_hybrid()) { 1225 pos = evlist__get_tracking_event(evlist); 1226 if (!evsel__is_dummy_event(pos)) { 1227 /* Set up dummy event. */ 1228 if (evlist__add_dummy(evlist)) 1229 return -ENOMEM; 1230 pos = evlist__last(evlist); 1231 evlist__set_tracking_event(evlist, pos); 1232 } 1233 1234 /* 1235 * Enable the dummy event when the process is forked for 1236 * initial_delay, immediately for system wide. 1237 */ 1238 if (opts->initial_delay && !pos->immediate && 1239 !target__has_cpu(&opts->target)) 1240 pos->core.attr.enable_on_exec = 1; 1241 else 1242 pos->immediate = 1; 1243 } 1244 1245 evlist__config(evlist, opts, &callchain_param); 1246 1247 evlist__for_each_entry(evlist, pos) { 1248 try_again: 1249 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { 1250 if (evsel__fallback(pos, errno, msg, sizeof(msg))) { 1251 if (verbose > 0) 1252 ui__warning("%s\n", msg); 1253 goto try_again; 1254 } 1255 if ((errno == EINVAL || errno == EBADF) && 1256 pos->core.leader != &pos->core && 1257 pos->weak_group) { 1258 pos = evlist__reset_weak_group(evlist, pos, true); 1259 goto try_again; 1260 } 1261 rc = -errno; 1262 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); 1263 ui__error("%s\n", msg); 1264 goto out; 1265 } 1266 1267 pos->supported = true; 1268 } 1269 1270 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) { 1271 pr_warning( 1272 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" 1273 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" 1274 "Samples in kernel functions may not be resolved if a suitable vmlinux\n" 1275 "file is not found in the buildid cache or in the vmlinux path.\n\n" 1276 "Samples in kernel modules won't be resolved at all.\n\n" 1277 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n" 1278 "even with a suitable vmlinux or kallsyms file.\n\n"); 1279 } 1280 1281 if (evlist__apply_filters(evlist, &pos)) { 1282 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", 1283 pos->filter, evsel__name(pos), errno, 1284 str_error_r(errno, msg, sizeof(msg))); 1285 rc = -1; 1286 goto out; 1287 } 1288 1289 rc = record__mmap(rec); 1290 if (rc) 1291 goto out; 1292 1293 session->evlist = evlist; 1294 perf_session__set_id_hdr_size(session); 1295 out: 1296 return rc; 1297 } 1298 1299 static void set_timestamp_boundary(struct record *rec, u64 sample_time) 1300 { 1301 if (rec->evlist->first_sample_time == 0) 1302 rec->evlist->first_sample_time = sample_time; 1303 1304 if (sample_time) 1305 rec->evlist->last_sample_time = sample_time; 1306 } 1307 1308 static int process_sample_event(struct perf_tool *tool, 1309 union perf_event *event, 1310 struct perf_sample *sample, 1311 struct evsel *evsel, 1312 struct machine *machine) 1313 { 1314 struct record *rec = container_of(tool, struct record, tool); 1315 1316 set_timestamp_boundary(rec, sample->time); 1317 1318 if (rec->buildid_all) 1319 return 0; 1320 1321 rec->samples++; 1322 return build_id__mark_dso_hit(tool, event, sample, evsel, machine); 1323 } 1324 1325 static int process_buildids(struct record *rec) 1326 { 1327 struct perf_session *session = rec->session; 1328 1329 if (perf_data__size(&rec->data) == 0) 1330 return 0; 1331 1332 /* 1333 * During this process, it'll load kernel map and replace the 1334 * dso->long_name to a real pathname it found. In this case 1335 * we prefer the vmlinux path like 1336 * /lib/modules/3.16.4/build/vmlinux 1337 * 1338 * rather than build-id path (in debug directory). 1339 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 1340 */ 1341 symbol_conf.ignore_vmlinux_buildid = true; 1342 1343 /* 1344 * If --buildid-all is given, it marks all DSO regardless of hits, 1345 * so no need to process samples. But if timestamp_boundary is enabled, 1346 * it still needs to walk on all samples to get the timestamps of 1347 * first/last samples. 1348 */ 1349 if (rec->buildid_all && !rec->timestamp_boundary) 1350 rec->tool.sample = NULL; 1351 1352 return perf_session__process_events(session); 1353 } 1354 1355 static void perf_event__synthesize_guest_os(struct machine *machine, void *data) 1356 { 1357 int err; 1358 struct perf_tool *tool = data; 1359 /* 1360 *As for guest kernel when processing subcommand record&report, 1361 *we arrange module mmap prior to guest kernel mmap and trigger 1362 *a preload dso because default guest module symbols are loaded 1363 *from guest kallsyms instead of /lib/modules/XXX/XXX. This 1364 *method is used to avoid symbol missing when the first addr is 1365 *in module instead of in guest kernel. 1366 */ 1367 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1368 machine); 1369 if (err < 0) 1370 pr_err("Couldn't record guest kernel [%d]'s reference" 1371 " relocation symbol.\n", machine->pid); 1372 1373 /* 1374 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 1375 * have no _text sometimes. 1376 */ 1377 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1378 machine); 1379 if (err < 0) 1380 pr_err("Couldn't record guest kernel [%d]'s reference" 1381 " relocation symbol.\n", machine->pid); 1382 } 1383 1384 static struct perf_event_header finished_round_event = { 1385 .size = sizeof(struct perf_event_header), 1386 .type = PERF_RECORD_FINISHED_ROUND, 1387 }; 1388 1389 static void record__adjust_affinity(struct record *rec, struct mmap *map) 1390 { 1391 if (rec->opts.affinity != PERF_AFFINITY_SYS && 1392 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits, 1393 thread->mask->affinity.nbits)) { 1394 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits); 1395 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits, 1396 map->affinity_mask.bits, thread->mask->affinity.nbits); 1397 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 1398 (cpu_set_t *)thread->mask->affinity.bits); 1399 if (verbose == 2) { 1400 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu()); 1401 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity"); 1402 } 1403 } 1404 } 1405 1406 static size_t process_comp_header(void *record, size_t increment) 1407 { 1408 struct perf_record_compressed *event = record; 1409 size_t size = sizeof(*event); 1410 1411 if (increment) { 1412 event->header.size += increment; 1413 return increment; 1414 } 1415 1416 event->header.type = PERF_RECORD_COMPRESSED; 1417 event->header.size = size; 1418 1419 return size; 1420 } 1421 1422 static size_t zstd_compress(struct perf_session *session, struct mmap *map, 1423 void *dst, size_t dst_size, void *src, size_t src_size) 1424 { 1425 size_t compressed; 1426 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; 1427 struct zstd_data *zstd_data = &session->zstd_data; 1428 1429 if (map && map->file) 1430 zstd_data = &map->zstd_data; 1431 1432 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, 1433 max_record_size, process_comp_header); 1434 1435 if (map && map->file) { 1436 thread->bytes_transferred += src_size; 1437 thread->bytes_compressed += compressed; 1438 } else { 1439 session->bytes_transferred += src_size; 1440 session->bytes_compressed += compressed; 1441 } 1442 1443 return compressed; 1444 } 1445 1446 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist, 1447 bool overwrite, bool synch) 1448 { 1449 u64 bytes_written = rec->bytes_written; 1450 int i; 1451 int rc = 0; 1452 int nr_mmaps; 1453 struct mmap **maps; 1454 int trace_fd = rec->data.file.fd; 1455 off_t off = 0; 1456 1457 if (!evlist) 1458 return 0; 1459 1460 nr_mmaps = thread->nr_mmaps; 1461 maps = overwrite ? thread->overwrite_maps : thread->maps; 1462 1463 if (!maps) 1464 return 0; 1465 1466 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) 1467 return 0; 1468 1469 if (record__aio_enabled(rec)) 1470 off = record__aio_get_pos(trace_fd); 1471 1472 for (i = 0; i < nr_mmaps; i++) { 1473 u64 flush = 0; 1474 struct mmap *map = maps[i]; 1475 1476 if (map->core.base) { 1477 record__adjust_affinity(rec, map); 1478 if (synch) { 1479 flush = map->core.flush; 1480 map->core.flush = 1; 1481 } 1482 if (!record__aio_enabled(rec)) { 1483 if (perf_mmap__push(map, rec, record__pushfn) < 0) { 1484 if (synch) 1485 map->core.flush = flush; 1486 rc = -1; 1487 goto out; 1488 } 1489 } else { 1490 if (record__aio_push(rec, map, &off) < 0) { 1491 record__aio_set_pos(trace_fd, off); 1492 if (synch) 1493 map->core.flush = flush; 1494 rc = -1; 1495 goto out; 1496 } 1497 } 1498 if (synch) 1499 map->core.flush = flush; 1500 } 1501 1502 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && 1503 !rec->opts.auxtrace_sample_mode && 1504 record__auxtrace_mmap_read(rec, map) != 0) { 1505 rc = -1; 1506 goto out; 1507 } 1508 } 1509 1510 if (record__aio_enabled(rec)) 1511 record__aio_set_pos(trace_fd, off); 1512 1513 /* 1514 * Mark the round finished in case we wrote 1515 * at least one event. 1516 * 1517 * No need for round events in directory mode, 1518 * because per-cpu maps and files have data 1519 * sorted by kernel. 1520 */ 1521 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written) 1522 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); 1523 1524 if (overwrite) 1525 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); 1526 out: 1527 return rc; 1528 } 1529 1530 static int record__mmap_read_all(struct record *rec, bool synch) 1531 { 1532 int err; 1533 1534 err = record__mmap_read_evlist(rec, rec->evlist, false, synch); 1535 if (err) 1536 return err; 1537 1538 return record__mmap_read_evlist(rec, rec->evlist, true, synch); 1539 } 1540 1541 static void record__thread_munmap_filtered(struct fdarray *fda, int fd, 1542 void *arg __maybe_unused) 1543 { 1544 struct perf_mmap *map = fda->priv[fd].ptr; 1545 1546 if (map) 1547 perf_mmap__put(map); 1548 } 1549 1550 static void *record__thread(void *arg) 1551 { 1552 enum thread_msg msg = THREAD_MSG__READY; 1553 bool terminate = false; 1554 struct fdarray *pollfd; 1555 int err, ctlfd_pos; 1556 1557 thread = arg; 1558 thread->tid = gettid(); 1559 1560 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1561 if (err == -1) 1562 pr_warning("threads[%d]: failed to notify on start: %s\n", 1563 thread->tid, strerror(errno)); 1564 1565 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 1566 1567 pollfd = &thread->pollfd; 1568 ctlfd_pos = thread->ctlfd_pos; 1569 1570 for (;;) { 1571 unsigned long long hits = thread->samples; 1572 1573 if (record__mmap_read_all(thread->rec, false) < 0 || terminate) 1574 break; 1575 1576 if (hits == thread->samples) { 1577 1578 err = fdarray__poll(pollfd, -1); 1579 /* 1580 * Propagate error, only if there's any. Ignore positive 1581 * number of returned events and interrupt error. 1582 */ 1583 if (err > 0 || (err < 0 && errno == EINTR)) 1584 err = 0; 1585 thread->waking++; 1586 1587 if (fdarray__filter(pollfd, POLLERR | POLLHUP, 1588 record__thread_munmap_filtered, NULL) == 0) 1589 break; 1590 } 1591 1592 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) { 1593 terminate = true; 1594 close(thread->pipes.msg[0]); 1595 thread->pipes.msg[0] = -1; 1596 pollfd->entries[ctlfd_pos].fd = -1; 1597 pollfd->entries[ctlfd_pos].events = 0; 1598 } 1599 1600 pollfd->entries[ctlfd_pos].revents = 0; 1601 } 1602 record__mmap_read_all(thread->rec, true); 1603 1604 err = write(thread->pipes.ack[1], &msg, sizeof(msg)); 1605 if (err == -1) 1606 pr_warning("threads[%d]: failed to notify on termination: %s\n", 1607 thread->tid, strerror(errno)); 1608 1609 return NULL; 1610 } 1611 1612 static void record__init_features(struct record *rec) 1613 { 1614 struct perf_session *session = rec->session; 1615 int feat; 1616 1617 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1618 perf_header__set_feat(&session->header, feat); 1619 1620 if (rec->no_buildid) 1621 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1622 1623 if (!have_tracepoints(&rec->evlist->core.entries)) 1624 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1625 1626 if (!rec->opts.branch_stack) 1627 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); 1628 1629 if (!rec->opts.full_auxtrace) 1630 perf_header__clear_feat(&session->header, HEADER_AUXTRACE); 1631 1632 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) 1633 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1634 1635 if (!rec->opts.use_clockid) 1636 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); 1637 1638 if (!record__threads_enabled(rec)) 1639 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1640 1641 if (!record__comp_enabled(rec)) 1642 perf_header__clear_feat(&session->header, HEADER_COMPRESSED); 1643 1644 perf_header__clear_feat(&session->header, HEADER_STAT); 1645 } 1646 1647 static void 1648 record__finish_output(struct record *rec) 1649 { 1650 int i; 1651 struct perf_data *data = &rec->data; 1652 int fd = perf_data__fd(data); 1653 1654 if (data->is_pipe) 1655 return; 1656 1657 rec->session->header.data_size += rec->bytes_written; 1658 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); 1659 if (record__threads_enabled(rec)) { 1660 for (i = 0; i < data->dir.nr; i++) 1661 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR); 1662 } 1663 1664 if (!rec->no_buildid) { 1665 process_buildids(rec); 1666 1667 if (rec->buildid_all) 1668 dsos__hit_all(rec->session); 1669 } 1670 perf_session__write_header(rec->session, rec->evlist, fd, true); 1671 1672 return; 1673 } 1674 1675 static int record__synthesize_workload(struct record *rec, bool tail) 1676 { 1677 int err; 1678 struct perf_thread_map *thread_map; 1679 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1680 1681 if (rec->opts.tail_synthesize != tail) 1682 return 0; 1683 1684 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid); 1685 if (thread_map == NULL) 1686 return -1; 1687 1688 err = perf_event__synthesize_thread_map(&rec->tool, thread_map, 1689 process_synthesized_event, 1690 &rec->session->machines.host, 1691 needs_mmap, 1692 rec->opts.sample_address); 1693 perf_thread_map__put(thread_map); 1694 return err; 1695 } 1696 1697 static int record__synthesize(struct record *rec, bool tail); 1698 1699 static int 1700 record__switch_output(struct record *rec, bool at_exit) 1701 { 1702 struct perf_data *data = &rec->data; 1703 int fd, err; 1704 char *new_filename; 1705 1706 /* Same Size: "2015122520103046"*/ 1707 char timestamp[] = "InvalidTimestamp"; 1708 1709 record__aio_mmap_read_sync(rec); 1710 1711 record__synthesize(rec, true); 1712 if (target__none(&rec->opts.target)) 1713 record__synthesize_workload(rec, true); 1714 1715 rec->samples = 0; 1716 record__finish_output(rec); 1717 err = fetch_current_timestamp(timestamp, sizeof(timestamp)); 1718 if (err) { 1719 pr_err("Failed to get current timestamp\n"); 1720 return -EINVAL; 1721 } 1722 1723 fd = perf_data__switch(data, timestamp, 1724 rec->session->header.data_offset, 1725 at_exit, &new_filename); 1726 if (fd >= 0 && !at_exit) { 1727 rec->bytes_written = 0; 1728 rec->session->header.data_size = 0; 1729 } 1730 1731 if (!quiet) 1732 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 1733 data->path, timestamp); 1734 1735 if (rec->switch_output.num_files) { 1736 int n = rec->switch_output.cur_file + 1; 1737 1738 if (n >= rec->switch_output.num_files) 1739 n = 0; 1740 rec->switch_output.cur_file = n; 1741 if (rec->switch_output.filenames[n]) { 1742 remove(rec->switch_output.filenames[n]); 1743 zfree(&rec->switch_output.filenames[n]); 1744 } 1745 rec->switch_output.filenames[n] = new_filename; 1746 } else { 1747 free(new_filename); 1748 } 1749 1750 /* Output tracking events */ 1751 if (!at_exit) { 1752 record__synthesize(rec, false); 1753 1754 /* 1755 * In 'perf record --switch-output' without -a, 1756 * record__synthesize() in record__switch_output() won't 1757 * generate tracking events because there's no thread_map 1758 * in evlist. Which causes newly created perf.data doesn't 1759 * contain map and comm information. 1760 * Create a fake thread_map and directly call 1761 * perf_event__synthesize_thread_map() for those events. 1762 */ 1763 if (target__none(&rec->opts.target)) 1764 record__synthesize_workload(rec, false); 1765 } 1766 return fd; 1767 } 1768 1769 static volatile int workload_exec_errno; 1770 1771 /* 1772 * evlist__prepare_workload will send a SIGUSR1 1773 * if the fork fails, since we asked by setting its 1774 * want_signal to true. 1775 */ 1776 static void workload_exec_failed_signal(int signo __maybe_unused, 1777 siginfo_t *info, 1778 void *ucontext __maybe_unused) 1779 { 1780 workload_exec_errno = info->si_value.sival_int; 1781 done = 1; 1782 child_finished = 1; 1783 } 1784 1785 static void snapshot_sig_handler(int sig); 1786 static void alarm_sig_handler(int sig); 1787 1788 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist) 1789 { 1790 if (evlist) { 1791 if (evlist->mmap && evlist->mmap[0].core.base) 1792 return evlist->mmap[0].core.base; 1793 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base) 1794 return evlist->overwrite_mmap[0].core.base; 1795 } 1796 return NULL; 1797 } 1798 1799 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) 1800 { 1801 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist); 1802 if (pc) 1803 return pc; 1804 return NULL; 1805 } 1806 1807 static int record__synthesize(struct record *rec, bool tail) 1808 { 1809 struct perf_session *session = rec->session; 1810 struct machine *machine = &session->machines.host; 1811 struct perf_data *data = &rec->data; 1812 struct record_opts *opts = &rec->opts; 1813 struct perf_tool *tool = &rec->tool; 1814 int err = 0; 1815 event_op f = process_synthesized_event; 1816 1817 if (rec->opts.tail_synthesize != tail) 1818 return 0; 1819 1820 if (data->is_pipe) { 1821 err = perf_event__synthesize_for_pipe(tool, session, data, 1822 process_synthesized_event); 1823 if (err < 0) 1824 goto out; 1825 1826 rec->bytes_written += err; 1827 } 1828 1829 err = perf_event__synth_time_conv(record__pick_pc(rec), tool, 1830 process_synthesized_event, machine); 1831 if (err) 1832 goto out; 1833 1834 /* Synthesize id_index before auxtrace_info */ 1835 if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { 1836 err = perf_event__synthesize_id_index(tool, 1837 process_synthesized_event, 1838 session->evlist, machine); 1839 if (err) 1840 goto out; 1841 } 1842 1843 if (rec->opts.full_auxtrace) { 1844 err = perf_event__synthesize_auxtrace_info(rec->itr, tool, 1845 session, process_synthesized_event); 1846 if (err) 1847 goto out; 1848 } 1849 1850 if (!evlist__exclude_kernel(rec->evlist)) { 1851 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, 1852 machine); 1853 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" 1854 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1855 "Check /proc/kallsyms permission or run as root.\n"); 1856 1857 err = perf_event__synthesize_modules(tool, process_synthesized_event, 1858 machine); 1859 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" 1860 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 1861 "Check /proc/modules permission or run as root.\n"); 1862 } 1863 1864 if (perf_guest) { 1865 machines__process_guests(&session->machines, 1866 perf_event__synthesize_guest_os, tool); 1867 } 1868 1869 err = perf_event__synthesize_extra_attr(&rec->tool, 1870 rec->evlist, 1871 process_synthesized_event, 1872 data->is_pipe); 1873 if (err) 1874 goto out; 1875 1876 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads, 1877 process_synthesized_event, 1878 NULL); 1879 if (err < 0) { 1880 pr_err("Couldn't synthesize thread map.\n"); 1881 return err; 1882 } 1883 1884 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus, 1885 process_synthesized_event, NULL); 1886 if (err < 0) { 1887 pr_err("Couldn't synthesize cpu map.\n"); 1888 return err; 1889 } 1890 1891 err = perf_event__synthesize_bpf_events(session, process_synthesized_event, 1892 machine, opts); 1893 if (err < 0) 1894 pr_warning("Couldn't synthesize bpf events.\n"); 1895 1896 if (rec->opts.synth & PERF_SYNTH_CGROUP) { 1897 err = perf_event__synthesize_cgroups(tool, process_synthesized_event, 1898 machine); 1899 if (err < 0) 1900 pr_warning("Couldn't synthesize cgroup events.\n"); 1901 } 1902 1903 if (rec->opts.nr_threads_synthesize > 1) { 1904 perf_set_multithreaded(); 1905 f = process_locked_synthesized_event; 1906 } 1907 1908 if (rec->opts.synth & PERF_SYNTH_TASK) { 1909 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; 1910 1911 err = __machine__synthesize_threads(machine, tool, &opts->target, 1912 rec->evlist->core.threads, 1913 f, needs_mmap, opts->sample_address, 1914 rec->opts.nr_threads_synthesize); 1915 } 1916 1917 if (rec->opts.nr_threads_synthesize > 1) 1918 perf_set_singlethreaded(); 1919 1920 out: 1921 return err; 1922 } 1923 1924 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) 1925 { 1926 struct record *rec = data; 1927 pthread_kill(rec->thread_id, SIGUSR2); 1928 return 0; 1929 } 1930 1931 static int record__setup_sb_evlist(struct record *rec) 1932 { 1933 struct record_opts *opts = &rec->opts; 1934 1935 if (rec->sb_evlist != NULL) { 1936 /* 1937 * We get here if --switch-output-event populated the 1938 * sb_evlist, so associate a callback that will send a SIGUSR2 1939 * to the main thread. 1940 */ 1941 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); 1942 rec->thread_id = pthread_self(); 1943 } 1944 #ifdef HAVE_LIBBPF_SUPPORT 1945 if (!opts->no_bpf_event) { 1946 if (rec->sb_evlist == NULL) { 1947 rec->sb_evlist = evlist__new(); 1948 1949 if (rec->sb_evlist == NULL) { 1950 pr_err("Couldn't create side band evlist.\n."); 1951 return -1; 1952 } 1953 } 1954 1955 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { 1956 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); 1957 return -1; 1958 } 1959 } 1960 #endif 1961 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { 1962 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); 1963 opts->no_bpf_event = true; 1964 } 1965 1966 return 0; 1967 } 1968 1969 static int record__init_clock(struct record *rec) 1970 { 1971 struct perf_session *session = rec->session; 1972 struct timespec ref_clockid; 1973 struct timeval ref_tod; 1974 u64 ref; 1975 1976 if (!rec->opts.use_clockid) 1977 return 0; 1978 1979 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1980 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; 1981 1982 session->header.env.clock.clockid = rec->opts.clockid; 1983 1984 if (gettimeofday(&ref_tod, NULL) != 0) { 1985 pr_err("gettimeofday failed, cannot set reference time.\n"); 1986 return -1; 1987 } 1988 1989 if (clock_gettime(rec->opts.clockid, &ref_clockid)) { 1990 pr_err("clock_gettime failed, cannot set reference time.\n"); 1991 return -1; 1992 } 1993 1994 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + 1995 (u64) ref_tod.tv_usec * NSEC_PER_USEC; 1996 1997 session->header.env.clock.tod_ns = ref; 1998 1999 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + 2000 (u64) ref_clockid.tv_nsec; 2001 2002 session->header.env.clock.clockid_ns = ref; 2003 return 0; 2004 } 2005 2006 static void hit_auxtrace_snapshot_trigger(struct record *rec) 2007 { 2008 if (trigger_is_ready(&auxtrace_snapshot_trigger)) { 2009 trigger_hit(&auxtrace_snapshot_trigger); 2010 auxtrace_record__snapshot_started = 1; 2011 if (auxtrace_record__snapshot_start(rec->itr)) 2012 trigger_error(&auxtrace_snapshot_trigger); 2013 } 2014 } 2015 2016 static void record__uniquify_name(struct record *rec) 2017 { 2018 struct evsel *pos; 2019 struct evlist *evlist = rec->evlist; 2020 char *new_name; 2021 int ret; 2022 2023 if (!perf_pmu__has_hybrid()) 2024 return; 2025 2026 evlist__for_each_entry(evlist, pos) { 2027 if (!evsel__is_hybrid(pos)) 2028 continue; 2029 2030 if (strchr(pos->name, '/')) 2031 continue; 2032 2033 ret = asprintf(&new_name, "%s/%s/", 2034 pos->pmu_name, pos->name); 2035 if (ret) { 2036 free(pos->name); 2037 pos->name = new_name; 2038 } 2039 } 2040 } 2041 2042 static int record__terminate_thread(struct record_thread *thread_data) 2043 { 2044 int err; 2045 enum thread_msg ack = THREAD_MSG__UNDEFINED; 2046 pid_t tid = thread_data->tid; 2047 2048 close(thread_data->pipes.msg[1]); 2049 thread_data->pipes.msg[1] = -1; 2050 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack)); 2051 if (err > 0) 2052 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]); 2053 else 2054 pr_warning("threads[%d]: failed to receive termination notification from %d\n", 2055 thread->tid, tid); 2056 2057 return 0; 2058 } 2059 2060 static int record__start_threads(struct record *rec) 2061 { 2062 int t, tt, err, ret = 0, nr_threads = rec->nr_threads; 2063 struct record_thread *thread_data = rec->thread_data; 2064 sigset_t full, mask; 2065 pthread_t handle; 2066 pthread_attr_t attrs; 2067 2068 thread = &thread_data[0]; 2069 2070 if (!record__threads_enabled(rec)) 2071 return 0; 2072 2073 sigfillset(&full); 2074 if (sigprocmask(SIG_SETMASK, &full, &mask)) { 2075 pr_err("Failed to block signals on threads start: %s\n", strerror(errno)); 2076 return -1; 2077 } 2078 2079 pthread_attr_init(&attrs); 2080 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); 2081 2082 for (t = 1; t < nr_threads; t++) { 2083 enum thread_msg msg = THREAD_MSG__UNDEFINED; 2084 2085 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP 2086 pthread_attr_setaffinity_np(&attrs, 2087 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)), 2088 (cpu_set_t *)(thread_data[t].mask->affinity.bits)); 2089 #endif 2090 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) { 2091 for (tt = 1; tt < t; tt++) 2092 record__terminate_thread(&thread_data[t]); 2093 pr_err("Failed to start threads: %s\n", strerror(errno)); 2094 ret = -1; 2095 goto out_err; 2096 } 2097 2098 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg)); 2099 if (err > 0) 2100 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid, 2101 thread_msg_tags[msg]); 2102 else 2103 pr_warning("threads[%d]: failed to receive start notification from %d\n", 2104 thread->tid, rec->thread_data[t].tid); 2105 } 2106 2107 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity), 2108 (cpu_set_t *)thread->mask->affinity.bits); 2109 2110 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu()); 2111 2112 out_err: 2113 pthread_attr_destroy(&attrs); 2114 2115 if (sigprocmask(SIG_SETMASK, &mask, NULL)) { 2116 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno)); 2117 ret = -1; 2118 } 2119 2120 return ret; 2121 } 2122 2123 static int record__stop_threads(struct record *rec) 2124 { 2125 int t; 2126 struct record_thread *thread_data = rec->thread_data; 2127 2128 for (t = 1; t < rec->nr_threads; t++) 2129 record__terminate_thread(&thread_data[t]); 2130 2131 for (t = 0; t < rec->nr_threads; t++) { 2132 rec->samples += thread_data[t].samples; 2133 if (!record__threads_enabled(rec)) 2134 continue; 2135 rec->session->bytes_transferred += thread_data[t].bytes_transferred; 2136 rec->session->bytes_compressed += thread_data[t].bytes_compressed; 2137 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid, 2138 thread_data[t].samples, thread_data[t].waking); 2139 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed) 2140 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n", 2141 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed); 2142 else 2143 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written); 2144 } 2145 2146 return 0; 2147 } 2148 2149 static unsigned long record__waking(struct record *rec) 2150 { 2151 int t; 2152 unsigned long waking = 0; 2153 struct record_thread *thread_data = rec->thread_data; 2154 2155 for (t = 0; t < rec->nr_threads; t++) 2156 waking += thread_data[t].waking; 2157 2158 return waking; 2159 } 2160 2161 static int __cmd_record(struct record *rec, int argc, const char **argv) 2162 { 2163 int err; 2164 int status = 0; 2165 const bool forks = argc > 0; 2166 struct perf_tool *tool = &rec->tool; 2167 struct record_opts *opts = &rec->opts; 2168 struct perf_data *data = &rec->data; 2169 struct perf_session *session; 2170 bool disabled = false, draining = false; 2171 int fd; 2172 float ratio = 0; 2173 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; 2174 2175 atexit(record__sig_exit); 2176 signal(SIGCHLD, sig_handler); 2177 signal(SIGINT, sig_handler); 2178 signal(SIGTERM, sig_handler); 2179 signal(SIGSEGV, sigsegv_handler); 2180 2181 if (rec->opts.record_namespaces) 2182 tool->namespace_events = true; 2183 2184 if (rec->opts.record_cgroup) { 2185 #ifdef HAVE_FILE_HANDLE 2186 tool->cgroup_events = true; 2187 #else 2188 pr_err("cgroup tracking is not supported\n"); 2189 return -1; 2190 #endif 2191 } 2192 2193 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) { 2194 signal(SIGUSR2, snapshot_sig_handler); 2195 if (rec->opts.auxtrace_snapshot_mode) 2196 trigger_on(&auxtrace_snapshot_trigger); 2197 if (rec->switch_output.enabled) 2198 trigger_on(&switch_output_trigger); 2199 } else { 2200 signal(SIGUSR2, SIG_IGN); 2201 } 2202 2203 session = perf_session__new(data, tool); 2204 if (IS_ERR(session)) { 2205 pr_err("Perf session creation failed.\n"); 2206 return PTR_ERR(session); 2207 } 2208 2209 if (record__threads_enabled(rec)) { 2210 if (perf_data__is_pipe(&rec->data)) { 2211 pr_err("Parallel trace streaming is not available in pipe mode.\n"); 2212 return -1; 2213 } 2214 if (rec->opts.full_auxtrace) { 2215 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n"); 2216 return -1; 2217 } 2218 } 2219 2220 fd = perf_data__fd(data); 2221 rec->session = session; 2222 2223 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { 2224 pr_err("Compression initialization failed.\n"); 2225 return -1; 2226 } 2227 #ifdef HAVE_EVENTFD_SUPPORT 2228 done_fd = eventfd(0, EFD_NONBLOCK); 2229 if (done_fd < 0) { 2230 pr_err("Failed to create wakeup eventfd, error: %m\n"); 2231 status = -1; 2232 goto out_delete_session; 2233 } 2234 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); 2235 if (err < 0) { 2236 pr_err("Failed to add wakeup eventfd to poll list\n"); 2237 status = err; 2238 goto out_delete_session; 2239 } 2240 #endif // HAVE_EVENTFD_SUPPORT 2241 2242 session->header.env.comp_type = PERF_COMP_ZSTD; 2243 session->header.env.comp_level = rec->opts.comp_level; 2244 2245 if (rec->opts.kcore && 2246 !record__kcore_readable(&session->machines.host)) { 2247 pr_err("ERROR: kcore is not readable.\n"); 2248 return -1; 2249 } 2250 2251 if (record__init_clock(rec)) 2252 return -1; 2253 2254 record__init_features(rec); 2255 2256 if (forks) { 2257 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, 2258 workload_exec_failed_signal); 2259 if (err < 0) { 2260 pr_err("Couldn't run the workload!\n"); 2261 status = err; 2262 goto out_delete_session; 2263 } 2264 } 2265 2266 /* 2267 * If we have just single event and are sending data 2268 * through pipe, we need to force the ids allocation, 2269 * because we synthesize event name through the pipe 2270 * and need the id for that. 2271 */ 2272 if (data->is_pipe && rec->evlist->core.nr_entries == 1) 2273 rec->opts.sample_id = true; 2274 2275 record__uniquify_name(rec); 2276 2277 if (record__open(rec) != 0) { 2278 err = -1; 2279 goto out_free_threads; 2280 } 2281 session->header.env.comp_mmap_len = session->evlist->core.mmap_len; 2282 2283 if (rec->opts.kcore) { 2284 err = record__kcore_copy(&session->machines.host, data); 2285 if (err) { 2286 pr_err("ERROR: Failed to copy kcore\n"); 2287 goto out_free_threads; 2288 } 2289 } 2290 2291 err = bpf__apply_obj_config(); 2292 if (err) { 2293 char errbuf[BUFSIZ]; 2294 2295 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); 2296 pr_err("ERROR: Apply config to BPF failed: %s\n", 2297 errbuf); 2298 goto out_free_threads; 2299 } 2300 2301 /* 2302 * Normally perf_session__new would do this, but it doesn't have the 2303 * evlist. 2304 */ 2305 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { 2306 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); 2307 rec->tool.ordered_events = false; 2308 } 2309 2310 if (!rec->evlist->core.nr_groups) 2311 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); 2312 2313 if (data->is_pipe) { 2314 err = perf_header__write_pipe(fd); 2315 if (err < 0) 2316 goto out_free_threads; 2317 } else { 2318 err = perf_session__write_header(session, rec->evlist, fd, false); 2319 if (err < 0) 2320 goto out_free_threads; 2321 } 2322 2323 err = -1; 2324 if (!rec->no_buildid 2325 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 2326 pr_err("Couldn't generate buildids. " 2327 "Use --no-buildid to profile anyway.\n"); 2328 goto out_free_threads; 2329 } 2330 2331 err = record__setup_sb_evlist(rec); 2332 if (err) 2333 goto out_free_threads; 2334 2335 err = record__synthesize(rec, false); 2336 if (err < 0) 2337 goto out_free_threads; 2338 2339 if (rec->realtime_prio) { 2340 struct sched_param param; 2341 2342 param.sched_priority = rec->realtime_prio; 2343 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 2344 pr_err("Could not set realtime priority.\n"); 2345 err = -1; 2346 goto out_free_threads; 2347 } 2348 } 2349 2350 if (record__start_threads(rec)) 2351 goto out_free_threads; 2352 2353 /* 2354 * When perf is starting the traced process, all the events 2355 * (apart from group members) have enable_on_exec=1 set, 2356 * so don't spoil it by prematurely enabling them. 2357 */ 2358 if (!target__none(&opts->target) && !opts->initial_delay) 2359 evlist__enable(rec->evlist); 2360 2361 /* 2362 * Let the child rip 2363 */ 2364 if (forks) { 2365 struct machine *machine = &session->machines.host; 2366 union perf_event *event; 2367 pid_t tgid; 2368 2369 event = malloc(sizeof(event->comm) + machine->id_hdr_size); 2370 if (event == NULL) { 2371 err = -ENOMEM; 2372 goto out_child; 2373 } 2374 2375 /* 2376 * Some H/W events are generated before COMM event 2377 * which is emitted during exec(), so perf script 2378 * cannot see a correct process name for those events. 2379 * Synthesize COMM event to prevent it. 2380 */ 2381 tgid = perf_event__synthesize_comm(tool, event, 2382 rec->evlist->workload.pid, 2383 process_synthesized_event, 2384 machine); 2385 free(event); 2386 2387 if (tgid == -1) 2388 goto out_child; 2389 2390 event = malloc(sizeof(event->namespaces) + 2391 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) + 2392 machine->id_hdr_size); 2393 if (event == NULL) { 2394 err = -ENOMEM; 2395 goto out_child; 2396 } 2397 2398 /* 2399 * Synthesize NAMESPACES event for the command specified. 2400 */ 2401 perf_event__synthesize_namespaces(tool, event, 2402 rec->evlist->workload.pid, 2403 tgid, process_synthesized_event, 2404 machine); 2405 free(event); 2406 2407 evlist__start_workload(rec->evlist); 2408 } 2409 2410 if (opts->initial_delay) { 2411 pr_info(EVLIST_DISABLED_MSG); 2412 if (opts->initial_delay > 0) { 2413 usleep(opts->initial_delay * USEC_PER_MSEC); 2414 evlist__enable(rec->evlist); 2415 pr_info(EVLIST_ENABLED_MSG); 2416 } 2417 } 2418 2419 trigger_ready(&auxtrace_snapshot_trigger); 2420 trigger_ready(&switch_output_trigger); 2421 perf_hooks__invoke_record_start(); 2422 for (;;) { 2423 unsigned long long hits = thread->samples; 2424 2425 /* 2426 * rec->evlist->bkw_mmap_state is possible to be 2427 * BKW_MMAP_EMPTY here: when done == true and 2428 * hits != rec->samples in previous round. 2429 * 2430 * evlist__toggle_bkw_mmap ensure we never 2431 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING. 2432 */ 2433 if (trigger_is_hit(&switch_output_trigger) || done || draining) 2434 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING); 2435 2436 if (record__mmap_read_all(rec, false) < 0) { 2437 trigger_error(&auxtrace_snapshot_trigger); 2438 trigger_error(&switch_output_trigger); 2439 err = -1; 2440 goto out_child; 2441 } 2442 2443 if (auxtrace_record__snapshot_started) { 2444 auxtrace_record__snapshot_started = 0; 2445 if (!trigger_is_error(&auxtrace_snapshot_trigger)) 2446 record__read_auxtrace_snapshot(rec, false); 2447 if (trigger_is_error(&auxtrace_snapshot_trigger)) { 2448 pr_err("AUX area tracing snapshot failed\n"); 2449 err = -1; 2450 goto out_child; 2451 } 2452 } 2453 2454 if (trigger_is_hit(&switch_output_trigger)) { 2455 /* 2456 * If switch_output_trigger is hit, the data in 2457 * overwritable ring buffer should have been collected, 2458 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY. 2459 * 2460 * If SIGUSR2 raise after or during record__mmap_read_all(), 2461 * record__mmap_read_all() didn't collect data from 2462 * overwritable ring buffer. Read again. 2463 */ 2464 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING) 2465 continue; 2466 trigger_ready(&switch_output_trigger); 2467 2468 /* 2469 * Reenable events in overwrite ring buffer after 2470 * record__mmap_read_all(): we should have collected 2471 * data from it. 2472 */ 2473 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING); 2474 2475 if (!quiet) 2476 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n", 2477 record__waking(rec)); 2478 thread->waking = 0; 2479 fd = record__switch_output(rec, false); 2480 if (fd < 0) { 2481 pr_err("Failed to switch to new file\n"); 2482 trigger_error(&switch_output_trigger); 2483 err = fd; 2484 goto out_child; 2485 } 2486 2487 /* re-arm the alarm */ 2488 if (rec->switch_output.time) 2489 alarm(rec->switch_output.time); 2490 } 2491 2492 if (hits == thread->samples) { 2493 if (done || draining) 2494 break; 2495 err = fdarray__poll(&thread->pollfd, -1); 2496 /* 2497 * Propagate error, only if there's any. Ignore positive 2498 * number of returned events and interrupt error. 2499 */ 2500 if (err > 0 || (err < 0 && errno == EINTR)) 2501 err = 0; 2502 thread->waking++; 2503 2504 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP, 2505 record__thread_munmap_filtered, NULL) == 0) 2506 draining = true; 2507 2508 evlist__ctlfd_update(rec->evlist, 2509 &thread->pollfd.entries[thread->ctlfd_pos]); 2510 } 2511 2512 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { 2513 switch (cmd) { 2514 case EVLIST_CTL_CMD_SNAPSHOT: 2515 hit_auxtrace_snapshot_trigger(rec); 2516 evlist__ctlfd_ack(rec->evlist); 2517 break; 2518 case EVLIST_CTL_CMD_STOP: 2519 done = 1; 2520 break; 2521 case EVLIST_CTL_CMD_ACK: 2522 case EVLIST_CTL_CMD_UNSUPPORTED: 2523 case EVLIST_CTL_CMD_ENABLE: 2524 case EVLIST_CTL_CMD_DISABLE: 2525 case EVLIST_CTL_CMD_EVLIST: 2526 case EVLIST_CTL_CMD_PING: 2527 default: 2528 break; 2529 } 2530 } 2531 2532 /* 2533 * When perf is starting the traced process, at the end events 2534 * die with the process and we wait for that. Thus no need to 2535 * disable events in this case. 2536 */ 2537 if (done && !disabled && !target__none(&opts->target)) { 2538 trigger_off(&auxtrace_snapshot_trigger); 2539 evlist__disable(rec->evlist); 2540 disabled = true; 2541 } 2542 } 2543 2544 trigger_off(&auxtrace_snapshot_trigger); 2545 trigger_off(&switch_output_trigger); 2546 2547 if (opts->auxtrace_snapshot_on_exit) 2548 record__auxtrace_snapshot_exit(rec); 2549 2550 if (forks && workload_exec_errno) { 2551 char msg[STRERR_BUFSIZE], strevsels[2048]; 2552 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); 2553 2554 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); 2555 2556 pr_err("Failed to collect '%s' for the '%s' workload: %s\n", 2557 strevsels, argv[0], emsg); 2558 err = -1; 2559 goto out_child; 2560 } 2561 2562 if (!quiet) 2563 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", 2564 record__waking(rec)); 2565 2566 if (target__none(&rec->opts.target)) 2567 record__synthesize_workload(rec, true); 2568 2569 out_child: 2570 record__stop_threads(rec); 2571 record__mmap_read_all(rec, true); 2572 out_free_threads: 2573 record__free_thread_data(rec); 2574 evlist__finalize_ctlfd(rec->evlist); 2575 record__aio_mmap_read_sync(rec); 2576 2577 if (rec->session->bytes_transferred && rec->session->bytes_compressed) { 2578 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; 2579 session->header.env.comp_ratio = ratio + 0.5; 2580 } 2581 2582 if (forks) { 2583 int exit_status; 2584 2585 if (!child_finished) 2586 kill(rec->evlist->workload.pid, SIGTERM); 2587 2588 wait(&exit_status); 2589 2590 if (err < 0) 2591 status = err; 2592 else if (WIFEXITED(exit_status)) 2593 status = WEXITSTATUS(exit_status); 2594 else if (WIFSIGNALED(exit_status)) 2595 signr = WTERMSIG(exit_status); 2596 } else 2597 status = err; 2598 2599 record__synthesize(rec, true); 2600 /* this will be recalculated during process_buildids() */ 2601 rec->samples = 0; 2602 2603 if (!err) { 2604 if (!rec->timestamp_filename) { 2605 record__finish_output(rec); 2606 } else { 2607 fd = record__switch_output(rec, true); 2608 if (fd < 0) { 2609 status = fd; 2610 goto out_delete_session; 2611 } 2612 } 2613 } 2614 2615 perf_hooks__invoke_record_end(); 2616 2617 if (!err && !quiet) { 2618 char samples[128]; 2619 const char *postfix = rec->timestamp_filename ? 2620 ".<timestamp>" : ""; 2621 2622 if (rec->samples && !rec->opts.full_auxtrace) 2623 scnprintf(samples, sizeof(samples), 2624 " (%" PRIu64 " samples)", rec->samples); 2625 else 2626 samples[0] = '\0'; 2627 2628 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", 2629 perf_data__size(data) / 1024.0 / 1024.0, 2630 data->path, postfix, samples); 2631 if (ratio) { 2632 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", 2633 rec->session->bytes_transferred / 1024.0 / 1024.0, 2634 ratio); 2635 } 2636 fprintf(stderr, " ]\n"); 2637 } 2638 2639 out_delete_session: 2640 #ifdef HAVE_EVENTFD_SUPPORT 2641 if (done_fd >= 0) 2642 close(done_fd); 2643 #endif 2644 zstd_fini(&session->zstd_data); 2645 perf_session__delete(session); 2646 2647 if (!opts->no_bpf_event) 2648 evlist__stop_sb_thread(rec->sb_evlist); 2649 return status; 2650 } 2651 2652 static void callchain_debug(struct callchain_param *callchain) 2653 { 2654 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; 2655 2656 pr_debug("callchain: type %s\n", str[callchain->record_mode]); 2657 2658 if (callchain->record_mode == CALLCHAIN_DWARF) 2659 pr_debug("callchain: stack dump size %d\n", 2660 callchain->dump_size); 2661 } 2662 2663 int record_opts__parse_callchain(struct record_opts *record, 2664 struct callchain_param *callchain, 2665 const char *arg, bool unset) 2666 { 2667 int ret; 2668 callchain->enabled = !unset; 2669 2670 /* --no-call-graph */ 2671 if (unset) { 2672 callchain->record_mode = CALLCHAIN_NONE; 2673 pr_debug("callchain: disabled\n"); 2674 return 0; 2675 } 2676 2677 ret = parse_callchain_record_opt(arg, callchain); 2678 if (!ret) { 2679 /* Enable data address sampling for DWARF unwind. */ 2680 if (callchain->record_mode == CALLCHAIN_DWARF) 2681 record->sample_address = true; 2682 callchain_debug(callchain); 2683 } 2684 2685 return ret; 2686 } 2687 2688 int record_parse_callchain_opt(const struct option *opt, 2689 const char *arg, 2690 int unset) 2691 { 2692 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset); 2693 } 2694 2695 int record_callchain_opt(const struct option *opt, 2696 const char *arg __maybe_unused, 2697 int unset __maybe_unused) 2698 { 2699 struct callchain_param *callchain = opt->value; 2700 2701 callchain->enabled = true; 2702 2703 if (callchain->record_mode == CALLCHAIN_NONE) 2704 callchain->record_mode = CALLCHAIN_FP; 2705 2706 callchain_debug(callchain); 2707 return 0; 2708 } 2709 2710 static int perf_record_config(const char *var, const char *value, void *cb) 2711 { 2712 struct record *rec = cb; 2713 2714 if (!strcmp(var, "record.build-id")) { 2715 if (!strcmp(value, "cache")) 2716 rec->no_buildid_cache = false; 2717 else if (!strcmp(value, "no-cache")) 2718 rec->no_buildid_cache = true; 2719 else if (!strcmp(value, "skip")) 2720 rec->no_buildid = true; 2721 else if (!strcmp(value, "mmap")) 2722 rec->buildid_mmap = true; 2723 else 2724 return -1; 2725 return 0; 2726 } 2727 if (!strcmp(var, "record.call-graph")) { 2728 var = "call-graph.record-mode"; 2729 return perf_default_config(var, value, cb); 2730 } 2731 #ifdef HAVE_AIO_SUPPORT 2732 if (!strcmp(var, "record.aio")) { 2733 rec->opts.nr_cblocks = strtol(value, NULL, 0); 2734 if (!rec->opts.nr_cblocks) 2735 rec->opts.nr_cblocks = nr_cblocks_default; 2736 } 2737 #endif 2738 if (!strcmp(var, "record.debuginfod")) { 2739 rec->debuginfod.urls = strdup(value); 2740 if (!rec->debuginfod.urls) 2741 return -ENOMEM; 2742 rec->debuginfod.set = true; 2743 } 2744 2745 return 0; 2746 } 2747 2748 2749 static int record__parse_affinity(const struct option *opt, const char *str, int unset) 2750 { 2751 struct record_opts *opts = (struct record_opts *)opt->value; 2752 2753 if (unset || !str) 2754 return 0; 2755 2756 if (!strcasecmp(str, "node")) 2757 opts->affinity = PERF_AFFINITY_NODE; 2758 else if (!strcasecmp(str, "cpu")) 2759 opts->affinity = PERF_AFFINITY_CPU; 2760 2761 return 0; 2762 } 2763 2764 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits) 2765 { 2766 mask->nbits = nr_bits; 2767 mask->bits = bitmap_zalloc(mask->nbits); 2768 if (!mask->bits) 2769 return -ENOMEM; 2770 2771 return 0; 2772 } 2773 2774 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask) 2775 { 2776 bitmap_free(mask->bits); 2777 mask->nbits = 0; 2778 } 2779 2780 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits) 2781 { 2782 int ret; 2783 2784 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits); 2785 if (ret) { 2786 mask->affinity.bits = NULL; 2787 return ret; 2788 } 2789 2790 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits); 2791 if (ret) { 2792 record__mmap_cpu_mask_free(&mask->maps); 2793 mask->maps.bits = NULL; 2794 } 2795 2796 return ret; 2797 } 2798 2799 static void record__thread_mask_free(struct thread_mask *mask) 2800 { 2801 record__mmap_cpu_mask_free(&mask->maps); 2802 record__mmap_cpu_mask_free(&mask->affinity); 2803 } 2804 2805 static int record__parse_threads(const struct option *opt, const char *str, int unset) 2806 { 2807 int s; 2808 struct record_opts *opts = opt->value; 2809 2810 if (unset || !str || !strlen(str)) { 2811 opts->threads_spec = THREAD_SPEC__CPU; 2812 } else { 2813 for (s = 1; s < THREAD_SPEC__MAX; s++) { 2814 if (s == THREAD_SPEC__USER) { 2815 opts->threads_user_spec = strdup(str); 2816 if (!opts->threads_user_spec) 2817 return -ENOMEM; 2818 opts->threads_spec = THREAD_SPEC__USER; 2819 break; 2820 } 2821 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) { 2822 opts->threads_spec = s; 2823 break; 2824 } 2825 } 2826 } 2827 2828 if (opts->threads_spec == THREAD_SPEC__USER) 2829 pr_debug("threads_spec: %s\n", opts->threads_user_spec); 2830 else 2831 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]); 2832 2833 return 0; 2834 } 2835 2836 static int parse_output_max_size(const struct option *opt, 2837 const char *str, int unset) 2838 { 2839 unsigned long *s = (unsigned long *)opt->value; 2840 static struct parse_tag tags_size[] = { 2841 { .tag = 'B', .mult = 1 }, 2842 { .tag = 'K', .mult = 1 << 10 }, 2843 { .tag = 'M', .mult = 1 << 20 }, 2844 { .tag = 'G', .mult = 1 << 30 }, 2845 { .tag = 0 }, 2846 }; 2847 unsigned long val; 2848 2849 if (unset) { 2850 *s = 0; 2851 return 0; 2852 } 2853 2854 val = parse_tag_value(str, tags_size); 2855 if (val != (unsigned long) -1) { 2856 *s = val; 2857 return 0; 2858 } 2859 2860 return -1; 2861 } 2862 2863 static int record__parse_mmap_pages(const struct option *opt, 2864 const char *str, 2865 int unset __maybe_unused) 2866 { 2867 struct record_opts *opts = opt->value; 2868 char *s, *p; 2869 unsigned int mmap_pages; 2870 int ret; 2871 2872 if (!str) 2873 return -EINVAL; 2874 2875 s = strdup(str); 2876 if (!s) 2877 return -ENOMEM; 2878 2879 p = strchr(s, ','); 2880 if (p) 2881 *p = '\0'; 2882 2883 if (*s) { 2884 ret = __evlist__parse_mmap_pages(&mmap_pages, s); 2885 if (ret) 2886 goto out_free; 2887 opts->mmap_pages = mmap_pages; 2888 } 2889 2890 if (!p) { 2891 ret = 0; 2892 goto out_free; 2893 } 2894 2895 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1); 2896 if (ret) 2897 goto out_free; 2898 2899 opts->auxtrace_mmap_pages = mmap_pages; 2900 2901 out_free: 2902 free(s); 2903 return ret; 2904 } 2905 2906 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) 2907 { 2908 } 2909 2910 static int parse_control_option(const struct option *opt, 2911 const char *str, 2912 int unset __maybe_unused) 2913 { 2914 struct record_opts *opts = opt->value; 2915 2916 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); 2917 } 2918 2919 static void switch_output_size_warn(struct record *rec) 2920 { 2921 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); 2922 struct switch_output *s = &rec->switch_output; 2923 2924 wakeup_size /= 2; 2925 2926 if (s->size < wakeup_size) { 2927 char buf[100]; 2928 2929 unit_number__scnprintf(buf, sizeof(buf), wakeup_size); 2930 pr_warning("WARNING: switch-output data size lower than " 2931 "wakeup kernel buffer size (%s) " 2932 "expect bigger perf.data sizes\n", buf); 2933 } 2934 } 2935 2936 static int switch_output_setup(struct record *rec) 2937 { 2938 struct switch_output *s = &rec->switch_output; 2939 static struct parse_tag tags_size[] = { 2940 { .tag = 'B', .mult = 1 }, 2941 { .tag = 'K', .mult = 1 << 10 }, 2942 { .tag = 'M', .mult = 1 << 20 }, 2943 { .tag = 'G', .mult = 1 << 30 }, 2944 { .tag = 0 }, 2945 }; 2946 static struct parse_tag tags_time[] = { 2947 { .tag = 's', .mult = 1 }, 2948 { .tag = 'm', .mult = 60 }, 2949 { .tag = 'h', .mult = 60*60 }, 2950 { .tag = 'd', .mult = 60*60*24 }, 2951 { .tag = 0 }, 2952 }; 2953 unsigned long val; 2954 2955 /* 2956 * If we're using --switch-output-events, then we imply its 2957 * --switch-output=signal, as we'll send a SIGUSR2 from the side band 2958 * thread to its parent. 2959 */ 2960 if (rec->switch_output_event_set) { 2961 if (record__threads_enabled(rec)) { 2962 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n"); 2963 return 0; 2964 } 2965 goto do_signal; 2966 } 2967 2968 if (!s->set) 2969 return 0; 2970 2971 if (record__threads_enabled(rec)) { 2972 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n"); 2973 return 0; 2974 } 2975 2976 if (!strcmp(s->str, "signal")) { 2977 do_signal: 2978 s->signal = true; 2979 pr_debug("switch-output with SIGUSR2 signal\n"); 2980 goto enabled; 2981 } 2982 2983 val = parse_tag_value(s->str, tags_size); 2984 if (val != (unsigned long) -1) { 2985 s->size = val; 2986 pr_debug("switch-output with %s size threshold\n", s->str); 2987 goto enabled; 2988 } 2989 2990 val = parse_tag_value(s->str, tags_time); 2991 if (val != (unsigned long) -1) { 2992 s->time = val; 2993 pr_debug("switch-output with %s time threshold (%lu seconds)\n", 2994 s->str, s->time); 2995 goto enabled; 2996 } 2997 2998 return -1; 2999 3000 enabled: 3001 rec->timestamp_filename = true; 3002 s->enabled = true; 3003 3004 if (s->size && !rec->opts.no_buffering) 3005 switch_output_size_warn(rec); 3006 3007 return 0; 3008 } 3009 3010 static const char * const __record_usage[] = { 3011 "perf record [<options>] [<command>]", 3012 "perf record [<options>] -- <command> [<options>]", 3013 NULL 3014 }; 3015 const char * const *record_usage = __record_usage; 3016 3017 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event, 3018 struct perf_sample *sample, struct machine *machine) 3019 { 3020 /* 3021 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3022 * no need to add them twice. 3023 */ 3024 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3025 return 0; 3026 return perf_event__process_mmap(tool, event, sample, machine); 3027 } 3028 3029 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event, 3030 struct perf_sample *sample, struct machine *machine) 3031 { 3032 /* 3033 * We already have the kernel maps, put in place via perf_session__create_kernel_maps() 3034 * no need to add them twice. 3035 */ 3036 if (!(event->header.misc & PERF_RECORD_MISC_USER)) 3037 return 0; 3038 3039 return perf_event__process_mmap2(tool, event, sample, machine); 3040 } 3041 3042 static int process_timestamp_boundary(struct perf_tool *tool, 3043 union perf_event *event __maybe_unused, 3044 struct perf_sample *sample, 3045 struct machine *machine __maybe_unused) 3046 { 3047 struct record *rec = container_of(tool, struct record, tool); 3048 3049 set_timestamp_boundary(rec, sample->time); 3050 return 0; 3051 } 3052 3053 static int parse_record_synth_option(const struct option *opt, 3054 const char *str, 3055 int unset __maybe_unused) 3056 { 3057 struct record_opts *opts = opt->value; 3058 char *p = strdup(str); 3059 3060 if (p == NULL) 3061 return -1; 3062 3063 opts->synth = parse_synth_opt(p); 3064 free(p); 3065 3066 if (opts->synth < 0) { 3067 pr_err("Invalid synth option: %s\n", str); 3068 return -1; 3069 } 3070 return 0; 3071 } 3072 3073 /* 3074 * XXX Ideally would be local to cmd_record() and passed to a record__new 3075 * because we need to have access to it in record__exit, that is called 3076 * after cmd_record() exits, but since record_options need to be accessible to 3077 * builtin-script, leave it here. 3078 * 3079 * At least we don't ouch it in all the other functions here directly. 3080 * 3081 * Just say no to tons of global variables, sigh. 3082 */ 3083 static struct record record = { 3084 .opts = { 3085 .sample_time = true, 3086 .mmap_pages = UINT_MAX, 3087 .user_freq = UINT_MAX, 3088 .user_interval = ULLONG_MAX, 3089 .freq = 4000, 3090 .target = { 3091 .uses_mmap = true, 3092 .default_per_cpu = true, 3093 }, 3094 .mmap_flush = MMAP_FLUSH_DEFAULT, 3095 .nr_threads_synthesize = 1, 3096 .ctl_fd = -1, 3097 .ctl_fd_ack = -1, 3098 .synth = PERF_SYNTH_ALL, 3099 }, 3100 .tool = { 3101 .sample = process_sample_event, 3102 .fork = perf_event__process_fork, 3103 .exit = perf_event__process_exit, 3104 .comm = perf_event__process_comm, 3105 .namespaces = perf_event__process_namespaces, 3106 .mmap = build_id__process_mmap, 3107 .mmap2 = build_id__process_mmap2, 3108 .itrace_start = process_timestamp_boundary, 3109 .aux = process_timestamp_boundary, 3110 .ordered_events = true, 3111 }, 3112 }; 3113 3114 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP 3115 "\n\t\t\t\tDefault: fp"; 3116 3117 static bool dry_run; 3118 3119 /* 3120 * XXX Will stay a global variable till we fix builtin-script.c to stop messing 3121 * with it and switch to use the library functions in perf_evlist that came 3122 * from builtin-record.c, i.e. use record_opts, 3123 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record', 3124 * using pipes, etc. 3125 */ 3126 static struct option __record_options[] = { 3127 OPT_CALLBACK('e', "event", &record.evlist, "event", 3128 "event selector. use 'perf list' to list available events", 3129 parse_events_option), 3130 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 3131 "event filter", parse_filter), 3132 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, 3133 NULL, "don't record events from perf itself", 3134 exclude_perf), 3135 OPT_STRING('p', "pid", &record.opts.target.pid, "pid", 3136 "record events on existing process id"), 3137 OPT_STRING('t', "tid", &record.opts.target.tid, "tid", 3138 "record events on existing thread id"), 3139 OPT_INTEGER('r', "realtime", &record.realtime_prio, 3140 "collect data with this RT SCHED_FIFO priority"), 3141 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering, 3142 "collect data without buffering"), 3143 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples, 3144 "collect raw sample records from all opened counters"), 3145 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide, 3146 "system-wide collection from all CPUs"), 3147 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 3148 "list of cpus to monitor"), 3149 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 3150 OPT_STRING('o', "output", &record.data.path, "file", 3151 "output file name"), 3152 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 3153 &record.opts.no_inherit_set, 3154 "child tasks do not inherit counters"), 3155 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 3156 "synthesize non-sample events at the end of output"), 3157 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 3158 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"), 3159 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 3160 "Fail if the specified frequency can't be used"), 3161 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 3162 "profile at this frequency", 3163 record__parse_freq), 3164 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", 3165 "number of mmap data pages and AUX area tracing mmap pages", 3166 record__parse_mmap_pages), 3167 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number", 3168 "Minimal number of bytes that is extracted from mmap data pages (default: 1)", 3169 record__mmap_flush_parse), 3170 OPT_BOOLEAN(0, "group", &record.opts.group, 3171 "put the counters into a counter group"), 3172 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param, 3173 NULL, "enables call-graph recording" , 3174 &record_callchain_opt), 3175 OPT_CALLBACK(0, "call-graph", &record.opts, 3176 "record_mode[,record_size]", record_callchain_help, 3177 &record_parse_callchain_opt), 3178 OPT_INCR('v', "verbose", &verbose, 3179 "be more verbose (show counter open errors, etc)"), 3180 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), 3181 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, 3182 "per thread counts"), 3183 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), 3184 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, 3185 "Record the sample physical addresses"), 3186 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size, 3187 "Record the sampled data address data page size"), 3188 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, 3189 "Record the sampled code address (ip) page size"), 3190 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), 3191 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, 3192 &record.opts.sample_time_set, 3193 "Record the sample timestamps"), 3194 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set, 3195 "Record the sample period"), 3196 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, 3197 "don't sample"), 3198 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, 3199 &record.no_buildid_cache_set, 3200 "do not update the buildid cache"), 3201 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, 3202 &record.no_buildid_set, 3203 "do not collect buildids in perf.data"), 3204 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 3205 "monitor event in cgroup name only", 3206 parse_cgroups), 3207 OPT_INTEGER('D', "delay", &record.opts.initial_delay, 3208 "ms to wait before starting measurement after program start (-1: start with events disabled)"), 3209 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), 3210 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", 3211 "user to profile"), 3212 3213 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, 3214 "branch any", "sample any taken branches", 3215 parse_branch_stack), 3216 3217 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, 3218 "branch filter mask", "branch stack filter modes", 3219 parse_branch_stack), 3220 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight, 3221 "sample by weight (on special events only)"), 3222 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, 3223 "sample transaction flags (special events only)"), 3224 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, 3225 "use per-thread mmaps"), 3226 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 3227 "sample selected machine registers on interrupt," 3228 " use '-I?' to list register names", parse_intr_regs), 3229 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 3230 "sample selected machine registers on interrupt," 3231 " use '--user-regs=?' to list register names", parse_user_regs), 3232 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 3233 "Record running/enabled time of read (:S) events"), 3234 OPT_CALLBACK('k', "clockid", &record.opts, 3235 "clockid", "clockid to use for events, see clock_gettime()", 3236 parse_clockid), 3237 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, 3238 "opts", "AUX area tracing Snapshot Mode", ""), 3239 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts, 3240 "opts", "sample AUX area", ""), 3241 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout, 3242 "per thread proc mmap processing timeout in ms"), 3243 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces, 3244 "Record namespaces events"), 3245 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup, 3246 "Record cgroup events"), 3247 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events, 3248 &record.opts.record_switch_events_set, 3249 "Record context switch events"), 3250 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, 3251 "Configure all used events to run in kernel space.", 3252 PARSE_OPT_EXCLUSIVE), 3253 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 3254 "Configure all used events to run in user space.", 3255 PARSE_OPT_EXCLUSIVE), 3256 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains, 3257 "collect kernel callchains"), 3258 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains, 3259 "collect user callchains"), 3260 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 3261 "clang binary to use for compiling BPF scriptlets"), 3262 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 3263 "options passed to clang when compiling BPF scriptlets"), 3264 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, 3265 "file", "vmlinux pathname"), 3266 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, 3267 "Record build-id of all DSOs regardless of hits"), 3268 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap, 3269 "Record build-id in map events"), 3270 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, 3271 "append timestamp to output filename"), 3272 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, 3273 "Record timestamp boundary (time of first/last samples)"), 3274 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, 3275 &record.switch_output.set, "signal or size[BKMG] or time[smhd]", 3276 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", 3277 "signal"), 3278 OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", 3279 "switch output event selector. use 'perf list' to list available events", 3280 parse_events_option_new_evlist), 3281 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, 3282 "Limit number of switch output generated files"), 3283 OPT_BOOLEAN(0, "dry-run", &dry_run, 3284 "Parse options then exit"), 3285 #ifdef HAVE_AIO_SUPPORT 3286 OPT_CALLBACK_OPTARG(0, "aio", &record.opts, 3287 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 3288 record__aio_parse), 3289 #endif 3290 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 3291 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 3292 record__parse_affinity), 3293 #ifdef HAVE_ZSTD_SUPPORT 3294 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n", 3295 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)", 3296 record__parse_comp_level), 3297 #endif 3298 OPT_CALLBACK(0, "max-size", &record.output_max_size, 3299 "size", "Limit the maximum size of the output file", parse_output_max_size), 3300 OPT_UINTEGER(0, "num-thread-synthesize", 3301 &record.opts.nr_threads_synthesize, 3302 "number of threads to run for event synthesis"), 3303 #ifdef HAVE_LIBPFM 3304 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event", 3305 "libpfm4 event selector. use 'perf list' to list available events", 3306 parse_libpfm_events_option), 3307 #endif 3308 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", 3309 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" 3310 "\t\t\t 'snapshot': AUX area tracing snapshot).\n" 3311 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" 3312 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", 3313 parse_control_option), 3314 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", 3315 "Fine-tune event synthesis: default=all", parse_record_synth_option), 3316 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, 3317 &record.debuginfod.set, "debuginfod urls", 3318 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", 3319 "system"), 3320 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", 3321 "write collected trace data into several data files using parallel threads", 3322 record__parse_threads), 3323 OPT_END() 3324 }; 3325 3326 struct option *record_options = __record_options; 3327 3328 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus) 3329 { 3330 int c; 3331 3332 for (c = 0; c < cpus->nr; c++) 3333 set_bit(cpus->map[c].cpu, mask->bits); 3334 } 3335 3336 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec) 3337 { 3338 struct perf_cpu_map *cpus; 3339 3340 cpus = perf_cpu_map__new(mask_spec); 3341 if (!cpus) 3342 return -ENOMEM; 3343 3344 bitmap_zero(mask->bits, mask->nbits); 3345 record__mmap_cpu_mask_init(mask, cpus); 3346 perf_cpu_map__put(cpus); 3347 3348 return 0; 3349 } 3350 3351 static void record__free_thread_masks(struct record *rec, int nr_threads) 3352 { 3353 int t; 3354 3355 if (rec->thread_masks) 3356 for (t = 0; t < nr_threads; t++) 3357 record__thread_mask_free(&rec->thread_masks[t]); 3358 3359 zfree(&rec->thread_masks); 3360 } 3361 3362 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits) 3363 { 3364 int t, ret; 3365 3366 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks))); 3367 if (!rec->thread_masks) { 3368 pr_err("Failed to allocate thread masks\n"); 3369 return -ENOMEM; 3370 } 3371 3372 for (t = 0; t < nr_threads; t++) { 3373 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits); 3374 if (ret) { 3375 pr_err("Failed to allocate thread masks[%d]\n", t); 3376 goto out_free; 3377 } 3378 } 3379 3380 return 0; 3381 3382 out_free: 3383 record__free_thread_masks(rec, nr_threads); 3384 3385 return ret; 3386 } 3387 3388 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus) 3389 { 3390 int t, ret, nr_cpus = perf_cpu_map__nr(cpus); 3391 3392 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu); 3393 if (ret) 3394 return ret; 3395 3396 rec->nr_threads = nr_cpus; 3397 pr_debug("nr_threads: %d\n", rec->nr_threads); 3398 3399 for (t = 0; t < rec->nr_threads; t++) { 3400 set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits); 3401 set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits); 3402 if (verbose) { 3403 pr_debug("thread_masks[%d]: ", t); 3404 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3405 pr_debug("thread_masks[%d]: ", t); 3406 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3407 } 3408 } 3409 3410 return 0; 3411 } 3412 3413 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus, 3414 const char **maps_spec, const char **affinity_spec, 3415 u32 nr_spec) 3416 { 3417 u32 s; 3418 int ret = 0, t = 0; 3419 struct mmap_cpu_mask cpus_mask; 3420 struct thread_mask thread_mask, full_mask, *thread_masks; 3421 3422 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu); 3423 if (ret) { 3424 pr_err("Failed to allocate CPUs mask\n"); 3425 return ret; 3426 } 3427 record__mmap_cpu_mask_init(&cpus_mask, cpus); 3428 3429 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu); 3430 if (ret) { 3431 pr_err("Failed to allocate full mask\n"); 3432 goto out_free_cpu_mask; 3433 } 3434 3435 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3436 if (ret) { 3437 pr_err("Failed to allocate thread mask\n"); 3438 goto out_free_full_and_cpu_masks; 3439 } 3440 3441 for (s = 0; s < nr_spec; s++) { 3442 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]); 3443 if (ret) { 3444 pr_err("Failed to initialize maps thread mask\n"); 3445 goto out_free; 3446 } 3447 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]); 3448 if (ret) { 3449 pr_err("Failed to initialize affinity thread mask\n"); 3450 goto out_free; 3451 } 3452 3453 /* ignore invalid CPUs but do not allow empty masks */ 3454 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits, 3455 cpus_mask.bits, thread_mask.maps.nbits)) { 3456 pr_err("Empty maps mask: %s\n", maps_spec[s]); 3457 ret = -EINVAL; 3458 goto out_free; 3459 } 3460 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits, 3461 cpus_mask.bits, thread_mask.affinity.nbits)) { 3462 pr_err("Empty affinity mask: %s\n", affinity_spec[s]); 3463 ret = -EINVAL; 3464 goto out_free; 3465 } 3466 3467 /* do not allow intersection with other masks (full_mask) */ 3468 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits, 3469 thread_mask.maps.nbits)) { 3470 pr_err("Intersecting maps mask: %s\n", maps_spec[s]); 3471 ret = -EINVAL; 3472 goto out_free; 3473 } 3474 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits, 3475 thread_mask.affinity.nbits)) { 3476 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]); 3477 ret = -EINVAL; 3478 goto out_free; 3479 } 3480 3481 bitmap_or(full_mask.maps.bits, full_mask.maps.bits, 3482 thread_mask.maps.bits, full_mask.maps.nbits); 3483 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits, 3484 thread_mask.affinity.bits, full_mask.maps.nbits); 3485 3486 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask)); 3487 if (!thread_masks) { 3488 pr_err("Failed to reallocate thread masks\n"); 3489 ret = -ENOMEM; 3490 goto out_free; 3491 } 3492 rec->thread_masks = thread_masks; 3493 rec->thread_masks[t] = thread_mask; 3494 if (verbose) { 3495 pr_debug("thread_masks[%d]: ", t); 3496 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps"); 3497 pr_debug("thread_masks[%d]: ", t); 3498 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity"); 3499 } 3500 t++; 3501 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu); 3502 if (ret) { 3503 pr_err("Failed to allocate thread mask\n"); 3504 goto out_free_full_and_cpu_masks; 3505 } 3506 } 3507 rec->nr_threads = t; 3508 pr_debug("nr_threads: %d\n", rec->nr_threads); 3509 if (!rec->nr_threads) 3510 ret = -EINVAL; 3511 3512 out_free: 3513 record__thread_mask_free(&thread_mask); 3514 out_free_full_and_cpu_masks: 3515 record__thread_mask_free(&full_mask); 3516 out_free_cpu_mask: 3517 record__mmap_cpu_mask_free(&cpus_mask); 3518 3519 return ret; 3520 } 3521 3522 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus) 3523 { 3524 int ret; 3525 struct cpu_topology *topo; 3526 3527 topo = cpu_topology__new(); 3528 if (!topo) { 3529 pr_err("Failed to allocate CPU topology\n"); 3530 return -ENOMEM; 3531 } 3532 3533 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list, 3534 topo->core_cpus_list, topo->core_cpus_lists); 3535 cpu_topology__delete(topo); 3536 3537 return ret; 3538 } 3539 3540 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus) 3541 { 3542 int ret; 3543 struct cpu_topology *topo; 3544 3545 topo = cpu_topology__new(); 3546 if (!topo) { 3547 pr_err("Failed to allocate CPU topology\n"); 3548 return -ENOMEM; 3549 } 3550 3551 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list, 3552 topo->package_cpus_list, topo->package_cpus_lists); 3553 cpu_topology__delete(topo); 3554 3555 return ret; 3556 } 3557 3558 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus) 3559 { 3560 u32 s; 3561 int ret; 3562 const char **spec; 3563 struct numa_topology *topo; 3564 3565 topo = numa_topology__new(); 3566 if (!topo) { 3567 pr_err("Failed to allocate NUMA topology\n"); 3568 return -ENOMEM; 3569 } 3570 3571 spec = zalloc(topo->nr * sizeof(char *)); 3572 if (!spec) { 3573 pr_err("Failed to allocate NUMA spec\n"); 3574 ret = -ENOMEM; 3575 goto out_delete_topo; 3576 } 3577 for (s = 0; s < topo->nr; s++) 3578 spec[s] = topo->nodes[s].cpus; 3579 3580 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr); 3581 3582 zfree(&spec); 3583 3584 out_delete_topo: 3585 numa_topology__delete(topo); 3586 3587 return ret; 3588 } 3589 3590 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus) 3591 { 3592 int t, ret; 3593 u32 s, nr_spec = 0; 3594 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec; 3595 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL; 3596 3597 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) { 3598 spec = strtok_r(user_spec, ":", &spec_ptr); 3599 if (spec == NULL) 3600 break; 3601 pr_debug2("threads_spec[%d]: %s\n", t, spec); 3602 mask = strtok_r(spec, "/", &mask_ptr); 3603 if (mask == NULL) 3604 break; 3605 pr_debug2(" maps mask: %s\n", mask); 3606 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *)); 3607 if (!tmp_spec) { 3608 pr_err("Failed to reallocate maps spec\n"); 3609 ret = -ENOMEM; 3610 goto out_free; 3611 } 3612 maps_spec = tmp_spec; 3613 maps_spec[nr_spec] = dup_mask = strdup(mask); 3614 if (!maps_spec[nr_spec]) { 3615 pr_err("Failed to allocate maps spec[%d]\n", nr_spec); 3616 ret = -ENOMEM; 3617 goto out_free; 3618 } 3619 mask = strtok_r(NULL, "/", &mask_ptr); 3620 if (mask == NULL) { 3621 pr_err("Invalid thread maps or affinity specs\n"); 3622 ret = -EINVAL; 3623 goto out_free; 3624 } 3625 pr_debug2(" affinity mask: %s\n", mask); 3626 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *)); 3627 if (!tmp_spec) { 3628 pr_err("Failed to reallocate affinity spec\n"); 3629 ret = -ENOMEM; 3630 goto out_free; 3631 } 3632 affinity_spec = tmp_spec; 3633 affinity_spec[nr_spec] = strdup(mask); 3634 if (!affinity_spec[nr_spec]) { 3635 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec); 3636 ret = -ENOMEM; 3637 goto out_free; 3638 } 3639 dup_mask = NULL; 3640 nr_spec++; 3641 } 3642 3643 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec, 3644 (const char **)affinity_spec, nr_spec); 3645 3646 out_free: 3647 free(dup_mask); 3648 for (s = 0; s < nr_spec; s++) { 3649 if (maps_spec) 3650 free(maps_spec[s]); 3651 if (affinity_spec) 3652 free(affinity_spec[s]); 3653 } 3654 free(affinity_spec); 3655 free(maps_spec); 3656 3657 return ret; 3658 } 3659 3660 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus) 3661 { 3662 int ret; 3663 3664 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu); 3665 if (ret) 3666 return ret; 3667 3668 record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus); 3669 3670 rec->nr_threads = 1; 3671 3672 return 0; 3673 } 3674 3675 static int record__init_thread_masks(struct record *rec) 3676 { 3677 int ret = 0; 3678 struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus; 3679 3680 if (!record__threads_enabled(rec)) 3681 return record__init_thread_default_masks(rec, cpus); 3682 3683 switch (rec->opts.threads_spec) { 3684 case THREAD_SPEC__CPU: 3685 ret = record__init_thread_cpu_masks(rec, cpus); 3686 break; 3687 case THREAD_SPEC__CORE: 3688 ret = record__init_thread_core_masks(rec, cpus); 3689 break; 3690 case THREAD_SPEC__PACKAGE: 3691 ret = record__init_thread_package_masks(rec, cpus); 3692 break; 3693 case THREAD_SPEC__NUMA: 3694 ret = record__init_thread_numa_masks(rec, cpus); 3695 break; 3696 case THREAD_SPEC__USER: 3697 ret = record__init_thread_user_masks(rec, cpus); 3698 break; 3699 default: 3700 break; 3701 } 3702 3703 return ret; 3704 } 3705 3706 int cmd_record(int argc, const char **argv) 3707 { 3708 int err; 3709 struct record *rec = &record; 3710 char errbuf[BUFSIZ]; 3711 3712 setlocale(LC_ALL, ""); 3713 3714 #ifndef HAVE_LIBBPF_SUPPORT 3715 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) 3716 set_nobuild('\0', "clang-path", true); 3717 set_nobuild('\0', "clang-opt", true); 3718 # undef set_nobuild 3719 #endif 3720 3721 #ifndef HAVE_BPF_PROLOGUE 3722 # if !defined (HAVE_DWARF_SUPPORT) 3723 # define REASON "NO_DWARF=1" 3724 # elif !defined (HAVE_LIBBPF_SUPPORT) 3725 # define REASON "NO_LIBBPF=1" 3726 # else 3727 # define REASON "this architecture doesn't support BPF prologue" 3728 # endif 3729 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) 3730 set_nobuild('\0', "vmlinux", true); 3731 # undef set_nobuild 3732 # undef REASON 3733 #endif 3734 3735 rec->opts.affinity = PERF_AFFINITY_SYS; 3736 3737 rec->evlist = evlist__new(); 3738 if (rec->evlist == NULL) 3739 return -ENOMEM; 3740 3741 err = perf_config(perf_record_config, rec); 3742 if (err) 3743 return err; 3744 3745 argc = parse_options(argc, argv, record_options, record_usage, 3746 PARSE_OPT_STOP_AT_NON_OPTION); 3747 if (quiet) 3748 perf_quiet_option(); 3749 3750 err = symbol__validate_sym_arguments(); 3751 if (err) 3752 return err; 3753 3754 perf_debuginfod_setup(&record.debuginfod); 3755 3756 /* Make system wide (-a) the default target. */ 3757 if (!argc && target__none(&rec->opts.target)) 3758 rec->opts.target.system_wide = true; 3759 3760 if (nr_cgroups && !rec->opts.target.system_wide) { 3761 usage_with_options_msg(record_usage, record_options, 3762 "cgroup monitoring only available in system-wide mode"); 3763 3764 } 3765 3766 if (rec->buildid_mmap) { 3767 if (!perf_can_record_build_id()) { 3768 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n"); 3769 err = -EINVAL; 3770 goto out_opts; 3771 } 3772 pr_debug("Enabling build id in mmap2 events.\n"); 3773 /* Enable mmap build id synthesizing. */ 3774 symbol_conf.buildid_mmap2 = true; 3775 /* Enable perf_event_attr::build_id bit. */ 3776 rec->opts.build_id = true; 3777 /* Disable build id cache. */ 3778 rec->no_buildid = true; 3779 } 3780 3781 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { 3782 pr_err("Kernel has no cgroup sampling support.\n"); 3783 err = -EINVAL; 3784 goto out_opts; 3785 } 3786 3787 if (rec->opts.kcore || record__threads_enabled(rec)) 3788 rec->data.is_dir = true; 3789 3790 if (record__threads_enabled(rec)) { 3791 if (rec->opts.affinity != PERF_AFFINITY_SYS) { 3792 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n"); 3793 goto out_opts; 3794 } 3795 if (record__aio_enabled(rec)) { 3796 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n"); 3797 goto out_opts; 3798 } 3799 } 3800 3801 if (rec->opts.comp_level != 0) { 3802 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); 3803 rec->no_buildid = true; 3804 } 3805 3806 if (rec->opts.record_switch_events && 3807 !perf_can_record_switch_events()) { 3808 ui__error("kernel does not support recording context switch events\n"); 3809 parse_options_usage(record_usage, record_options, "switch-events", 0); 3810 err = -EINVAL; 3811 goto out_opts; 3812 } 3813 3814 if (switch_output_setup(rec)) { 3815 parse_options_usage(record_usage, record_options, "switch-output", 0); 3816 err = -EINVAL; 3817 goto out_opts; 3818 } 3819 3820 if (rec->switch_output.time) { 3821 signal(SIGALRM, alarm_sig_handler); 3822 alarm(rec->switch_output.time); 3823 } 3824 3825 if (rec->switch_output.num_files) { 3826 rec->switch_output.filenames = calloc(sizeof(char *), 3827 rec->switch_output.num_files); 3828 if (!rec->switch_output.filenames) { 3829 err = -EINVAL; 3830 goto out_opts; 3831 } 3832 } 3833 3834 if (rec->timestamp_filename && record__threads_enabled(rec)) { 3835 rec->timestamp_filename = false; 3836 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n"); 3837 } 3838 3839 /* 3840 * Allow aliases to facilitate the lookup of symbols for address 3841 * filters. Refer to auxtrace_parse_filters(). 3842 */ 3843 symbol_conf.allow_aliases = true; 3844 3845 symbol__init(NULL); 3846 3847 err = record__auxtrace_init(rec); 3848 if (err) 3849 goto out; 3850 3851 if (dry_run) 3852 goto out; 3853 3854 err = bpf__setup_stdout(rec->evlist); 3855 if (err) { 3856 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf)); 3857 pr_err("ERROR: Setup BPF stdout failed: %s\n", 3858 errbuf); 3859 goto out; 3860 } 3861 3862 err = -ENOMEM; 3863 3864 if (rec->no_buildid_cache || rec->no_buildid) { 3865 disable_buildid_cache(); 3866 } else if (rec->switch_output.enabled) { 3867 /* 3868 * In 'perf record --switch-output', disable buildid 3869 * generation by default to reduce data file switching 3870 * overhead. Still generate buildid if they are required 3871 * explicitly using 3872 * 3873 * perf record --switch-output --no-no-buildid \ 3874 * --no-no-buildid-cache 3875 * 3876 * Following code equals to: 3877 * 3878 * if ((rec->no_buildid || !rec->no_buildid_set) && 3879 * (rec->no_buildid_cache || !rec->no_buildid_cache_set)) 3880 * disable_buildid_cache(); 3881 */ 3882 bool disable = true; 3883 3884 if (rec->no_buildid_set && !rec->no_buildid) 3885 disable = false; 3886 if (rec->no_buildid_cache_set && !rec->no_buildid_cache) 3887 disable = false; 3888 if (disable) { 3889 rec->no_buildid = true; 3890 rec->no_buildid_cache = true; 3891 disable_buildid_cache(); 3892 } 3893 } 3894 3895 if (record.opts.overwrite) 3896 record.opts.tail_synthesize = true; 3897 3898 if (rec->evlist->core.nr_entries == 0) { 3899 if (perf_pmu__has_hybrid()) { 3900 err = evlist__add_default_hybrid(rec->evlist, 3901 !record.opts.no_samples); 3902 } else { 3903 err = __evlist__add_default(rec->evlist, 3904 !record.opts.no_samples); 3905 } 3906 3907 if (err < 0) { 3908 pr_err("Not enough memory for event selector list\n"); 3909 goto out; 3910 } 3911 } 3912 3913 if (rec->opts.target.tid && !rec->opts.no_inherit_set) 3914 rec->opts.no_inherit = true; 3915 3916 err = target__validate(&rec->opts.target); 3917 if (err) { 3918 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3919 ui__warning("%s\n", errbuf); 3920 } 3921 3922 err = target__parse_uid(&rec->opts.target); 3923 if (err) { 3924 int saved_errno = errno; 3925 3926 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 3927 ui__error("%s", errbuf); 3928 3929 err = -saved_errno; 3930 goto out; 3931 } 3932 3933 /* Enable ignoring missing threads when -u/-p option is defined. */ 3934 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; 3935 3936 if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) { 3937 pr_err("failed to use cpu list %s\n", 3938 rec->opts.target.cpu_list); 3939 goto out; 3940 } 3941 3942 rec->opts.target.hybrid = perf_pmu__has_hybrid(); 3943 3944 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) 3945 arch__add_leaf_frame_record_opts(&rec->opts); 3946 3947 err = -ENOMEM; 3948 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) 3949 usage_with_options(record_usage, record_options); 3950 3951 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); 3952 if (err) 3953 goto out; 3954 3955 /* 3956 * We take all buildids when the file contains 3957 * AUX area tracing data because we do not decode the 3958 * trace because it would take too long. 3959 */ 3960 if (rec->opts.full_auxtrace) 3961 rec->buildid_all = true; 3962 3963 if (rec->opts.text_poke) { 3964 err = record__config_text_poke(rec->evlist); 3965 if (err) { 3966 pr_err("record__config_text_poke failed, error %d\n", err); 3967 goto out; 3968 } 3969 } 3970 3971 if (record_opts__config(&rec->opts)) { 3972 err = -EINVAL; 3973 goto out; 3974 } 3975 3976 err = record__init_thread_masks(rec); 3977 if (err) { 3978 pr_err("Failed to initialize parallel data streaming masks\n"); 3979 goto out; 3980 } 3981 3982 if (rec->opts.nr_cblocks > nr_cblocks_max) 3983 rec->opts.nr_cblocks = nr_cblocks_max; 3984 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); 3985 3986 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 3987 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 3988 3989 if (rec->opts.comp_level > comp_level_max) 3990 rec->opts.comp_level = comp_level_max; 3991 pr_debug("comp level: %d\n", rec->opts.comp_level); 3992 3993 err = __cmd_record(&record, argc, argv); 3994 out: 3995 evlist__delete(rec->evlist); 3996 symbol__exit(); 3997 auxtrace_record__free(rec->itr); 3998 out_opts: 3999 record__free_thread_masks(rec, rec->nr_threads); 4000 rec->nr_threads = 0; 4001 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); 4002 return err; 4003 } 4004 4005 static void snapshot_sig_handler(int sig __maybe_unused) 4006 { 4007 struct record *rec = &record; 4008 4009 hit_auxtrace_snapshot_trigger(rec); 4010 4011 if (switch_output_signal(rec)) 4012 trigger_hit(&switch_output_trigger); 4013 } 4014 4015 static void alarm_sig_handler(int sig __maybe_unused) 4016 { 4017 struct record *rec = &record; 4018 4019 if (switch_output_time(rec)) 4020 trigger_hit(&switch_output_trigger); 4021 } 4022