1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9 #include "builtin.h"
10
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include <internal/xyarray.h>
14 #include "util/parse-events.h"
15 #include "util/config.h"
16
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/mmap.h"
25 #include "util/mutex.h"
26 #include "util/target.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/record.h"
31 #include "util/cpumap.h"
32 #include "util/thread_map.h"
33 #include "util/data.h"
34 #include "util/perf_regs.h"
35 #include "util/auxtrace.h"
36 #include "util/tsc.h"
37 #include "util/parse-branch-options.h"
38 #include "util/parse-regs-options.h"
39 #include "util/perf_api_probe.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/pmu.h"
50 #include "util/pmus.h"
51 #include "util/clockid.h"
52 #include "util/off_cpu.h"
53 #include "util/bpf-filter.h"
54 #include "asm/bug.h"
55 #include "perf.h"
56 #include "cputopo.h"
57
58 #include <errno.h>
59 #include <inttypes.h>
60 #include <locale.h>
61 #include <poll.h>
62 #include <pthread.h>
63 #include <unistd.h>
64 #ifndef HAVE_GETTID
65 #include <syscall.h>
66 #endif
67 #include <sched.h>
68 #include <signal.h>
69 #ifdef HAVE_EVENTFD_SUPPORT
70 #include <sys/eventfd.h>
71 #endif
72 #include <sys/mman.h>
73 #include <sys/wait.h>
74 #include <sys/types.h>
75 #include <sys/stat.h>
76 #include <fcntl.h>
77 #include <linux/err.h>
78 #include <linux/string.h>
79 #include <linux/time64.h>
80 #include <linux/zalloc.h>
81 #include <linux/bitmap.h>
82 #include <sys/time.h>
83
84 struct switch_output {
85 bool enabled;
86 bool signal;
87 unsigned long size;
88 unsigned long time;
89 const char *str;
90 bool set;
91 char **filenames;
92 int num_files;
93 int cur_file;
94 };
95
96 struct thread_mask {
97 struct mmap_cpu_mask maps;
98 struct mmap_cpu_mask affinity;
99 };
100
101 struct record_thread {
102 pid_t tid;
103 struct thread_mask *mask;
104 struct {
105 int msg[2];
106 int ack[2];
107 } pipes;
108 struct fdarray pollfd;
109 int ctlfd_pos;
110 int nr_mmaps;
111 struct mmap **maps;
112 struct mmap **overwrite_maps;
113 struct record *rec;
114 unsigned long long samples;
115 unsigned long waking;
116 u64 bytes_written;
117 u64 bytes_transferred;
118 u64 bytes_compressed;
119 };
120
121 static __thread struct record_thread *thread;
122
123 enum thread_msg {
124 THREAD_MSG__UNDEFINED = 0,
125 THREAD_MSG__READY,
126 THREAD_MSG__MAX,
127 };
128
129 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130 "UNDEFINED", "READY"
131 };
132
133 enum thread_spec {
134 THREAD_SPEC__UNDEFINED = 0,
135 THREAD_SPEC__CPU,
136 THREAD_SPEC__CORE,
137 THREAD_SPEC__PACKAGE,
138 THREAD_SPEC__NUMA,
139 THREAD_SPEC__USER,
140 THREAD_SPEC__MAX,
141 };
142
143 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144 "undefined", "cpu", "core", "package", "numa", "user"
145 };
146
147 struct pollfd_index_map {
148 int evlist_pollfd_index;
149 int thread_pollfd_index;
150 };
151
152 struct record {
153 struct perf_tool tool;
154 struct record_opts opts;
155 u64 bytes_written;
156 u64 thread_bytes_written;
157 struct perf_data data;
158 struct auxtrace_record *itr;
159 struct evlist *evlist;
160 struct perf_session *session;
161 struct evlist *sb_evlist;
162 pthread_t thread_id;
163 int realtime_prio;
164 bool switch_output_event_set;
165 bool no_buildid;
166 bool no_buildid_set;
167 bool no_buildid_cache;
168 bool no_buildid_cache_set;
169 bool buildid_all;
170 bool buildid_mmap;
171 bool timestamp_filename;
172 bool timestamp_boundary;
173 bool off_cpu;
174 struct switch_output switch_output;
175 unsigned long long samples;
176 unsigned long output_max_size; /* = 0: unlimited */
177 struct perf_debuginfod debuginfod;
178 int nr_threads;
179 struct thread_mask *thread_masks;
180 struct record_thread *thread_data;
181 struct pollfd_index_map *index_map;
182 size_t index_map_sz;
183 size_t index_map_cnt;
184 };
185
186 static volatile int done;
187
188 static volatile int auxtrace_record__snapshot_started;
189 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
190 static DEFINE_TRIGGER(switch_output_trigger);
191
192 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
193 "SYS", "NODE", "CPU"
194 };
195
196 #ifndef HAVE_GETTID
gettid(void)197 static inline pid_t gettid(void)
198 {
199 return (pid_t)syscall(__NR_gettid);
200 }
201 #endif
202
record__threads_enabled(struct record * rec)203 static int record__threads_enabled(struct record *rec)
204 {
205 return rec->opts.threads_spec;
206 }
207
switch_output_signal(struct record * rec)208 static bool switch_output_signal(struct record *rec)
209 {
210 return rec->switch_output.signal &&
211 trigger_is_ready(&switch_output_trigger);
212 }
213
switch_output_size(struct record * rec)214 static bool switch_output_size(struct record *rec)
215 {
216 return rec->switch_output.size &&
217 trigger_is_ready(&switch_output_trigger) &&
218 (rec->bytes_written >= rec->switch_output.size);
219 }
220
switch_output_time(struct record * rec)221 static bool switch_output_time(struct record *rec)
222 {
223 return rec->switch_output.time &&
224 trigger_is_ready(&switch_output_trigger);
225 }
226
record__bytes_written(struct record * rec)227 static u64 record__bytes_written(struct record *rec)
228 {
229 return rec->bytes_written + rec->thread_bytes_written;
230 }
231
record__output_max_size_exceeded(struct record * rec)232 static bool record__output_max_size_exceeded(struct record *rec)
233 {
234 return rec->output_max_size &&
235 (record__bytes_written(rec) >= rec->output_max_size);
236 }
237
record__write(struct record * rec,struct mmap * map __maybe_unused,void * bf,size_t size)238 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
239 void *bf, size_t size)
240 {
241 struct perf_data_file *file = &rec->session->data->file;
242
243 if (map && map->file)
244 file = map->file;
245
246 if (perf_data_file__write(file, bf, size) < 0) {
247 pr_err("failed to write perf data, error: %m\n");
248 return -1;
249 }
250
251 if (map && map->file) {
252 thread->bytes_written += size;
253 rec->thread_bytes_written += size;
254 } else {
255 rec->bytes_written += size;
256 }
257
258 if (record__output_max_size_exceeded(rec) && !done) {
259 fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
260 " stopping session ]\n",
261 record__bytes_written(rec) >> 10);
262 done = 1;
263 }
264
265 if (switch_output_size(rec))
266 trigger_hit(&switch_output_trigger);
267
268 return 0;
269 }
270
271 static int record__aio_enabled(struct record *rec);
272 static int record__comp_enabled(struct record *rec);
273 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
274 void *dst, size_t dst_size, void *src, size_t src_size);
275
276 #ifdef HAVE_AIO_SUPPORT
record__aio_write(struct aiocb * cblock,int trace_fd,void * buf,size_t size,off_t off)277 static int record__aio_write(struct aiocb *cblock, int trace_fd,
278 void *buf, size_t size, off_t off)
279 {
280 int rc;
281
282 cblock->aio_fildes = trace_fd;
283 cblock->aio_buf = buf;
284 cblock->aio_nbytes = size;
285 cblock->aio_offset = off;
286 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
287
288 do {
289 rc = aio_write(cblock);
290 if (rc == 0) {
291 break;
292 } else if (errno != EAGAIN) {
293 cblock->aio_fildes = -1;
294 pr_err("failed to queue perf data, error: %m\n");
295 break;
296 }
297 } while (1);
298
299 return rc;
300 }
301
record__aio_complete(struct mmap * md,struct aiocb * cblock)302 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
303 {
304 void *rem_buf;
305 off_t rem_off;
306 size_t rem_size;
307 int rc, aio_errno;
308 ssize_t aio_ret, written;
309
310 aio_errno = aio_error(cblock);
311 if (aio_errno == EINPROGRESS)
312 return 0;
313
314 written = aio_ret = aio_return(cblock);
315 if (aio_ret < 0) {
316 if (aio_errno != EINTR)
317 pr_err("failed to write perf data, error: %m\n");
318 written = 0;
319 }
320
321 rem_size = cblock->aio_nbytes - written;
322
323 if (rem_size == 0) {
324 cblock->aio_fildes = -1;
325 /*
326 * md->refcount is incremented in record__aio_pushfn() for
327 * every aio write request started in record__aio_push() so
328 * decrement it because the request is now complete.
329 */
330 perf_mmap__put(&md->core);
331 rc = 1;
332 } else {
333 /*
334 * aio write request may require restart with the
335 * reminder if the kernel didn't write whole
336 * chunk at once.
337 */
338 rem_off = cblock->aio_offset + written;
339 rem_buf = (void *)(cblock->aio_buf + written);
340 record__aio_write(cblock, cblock->aio_fildes,
341 rem_buf, rem_size, rem_off);
342 rc = 0;
343 }
344
345 return rc;
346 }
347
record__aio_sync(struct mmap * md,bool sync_all)348 static int record__aio_sync(struct mmap *md, bool sync_all)
349 {
350 struct aiocb **aiocb = md->aio.aiocb;
351 struct aiocb *cblocks = md->aio.cblocks;
352 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
353 int i, do_suspend;
354
355 do {
356 do_suspend = 0;
357 for (i = 0; i < md->aio.nr_cblocks; ++i) {
358 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
359 if (sync_all)
360 aiocb[i] = NULL;
361 else
362 return i;
363 } else {
364 /*
365 * Started aio write is not complete yet
366 * so it has to be waited before the
367 * next allocation.
368 */
369 aiocb[i] = &cblocks[i];
370 do_suspend = 1;
371 }
372 }
373 if (!do_suspend)
374 return -1;
375
376 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
377 if (!(errno == EAGAIN || errno == EINTR))
378 pr_err("failed to sync perf data, error: %m\n");
379 }
380 } while (1);
381 }
382
383 struct record_aio {
384 struct record *rec;
385 void *data;
386 size_t size;
387 };
388
record__aio_pushfn(struct mmap * map,void * to,void * buf,size_t size)389 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
390 {
391 struct record_aio *aio = to;
392
393 /*
394 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
395 * to release space in the kernel buffer as fast as possible, calling
396 * perf_mmap__consume() from perf_mmap__push() function.
397 *
398 * That lets the kernel to proceed with storing more profiling data into
399 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
400 *
401 * Coping can be done in two steps in case the chunk of profiling data
402 * crosses the upper bound of the kernel buffer. In this case we first move
403 * part of data from map->start till the upper bound and then the reminder
404 * from the beginning of the kernel buffer till the end of the data chunk.
405 */
406
407 if (record__comp_enabled(aio->rec)) {
408 size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
409 mmap__mmap_len(map) - aio->size,
410 buf, size);
411 } else {
412 memcpy(aio->data + aio->size, buf, size);
413 }
414
415 if (!aio->size) {
416 /*
417 * Increment map->refcount to guard map->aio.data[] buffer
418 * from premature deallocation because map object can be
419 * released earlier than aio write request started on
420 * map->aio.data[] buffer is complete.
421 *
422 * perf_mmap__put() is done at record__aio_complete()
423 * after started aio request completion or at record__aio_push()
424 * if the request failed to start.
425 */
426 perf_mmap__get(&map->core);
427 }
428
429 aio->size += size;
430
431 return size;
432 }
433
record__aio_push(struct record * rec,struct mmap * map,off_t * off)434 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
435 {
436 int ret, idx;
437 int trace_fd = rec->session->data->file.fd;
438 struct record_aio aio = { .rec = rec, .size = 0 };
439
440 /*
441 * Call record__aio_sync() to wait till map->aio.data[] buffer
442 * becomes available after previous aio write operation.
443 */
444
445 idx = record__aio_sync(map, false);
446 aio.data = map->aio.data[idx];
447 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
448 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
449 return ret;
450
451 rec->samples++;
452 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
453 if (!ret) {
454 *off += aio.size;
455 rec->bytes_written += aio.size;
456 if (switch_output_size(rec))
457 trigger_hit(&switch_output_trigger);
458 } else {
459 /*
460 * Decrement map->refcount incremented in record__aio_pushfn()
461 * back if record__aio_write() operation failed to start, otherwise
462 * map->refcount is decremented in record__aio_complete() after
463 * aio write operation finishes successfully.
464 */
465 perf_mmap__put(&map->core);
466 }
467
468 return ret;
469 }
470
record__aio_get_pos(int trace_fd)471 static off_t record__aio_get_pos(int trace_fd)
472 {
473 return lseek(trace_fd, 0, SEEK_CUR);
474 }
475
record__aio_set_pos(int trace_fd,off_t pos)476 static void record__aio_set_pos(int trace_fd, off_t pos)
477 {
478 lseek(trace_fd, pos, SEEK_SET);
479 }
480
record__aio_mmap_read_sync(struct record * rec)481 static void record__aio_mmap_read_sync(struct record *rec)
482 {
483 int i;
484 struct evlist *evlist = rec->evlist;
485 struct mmap *maps = evlist->mmap;
486
487 if (!record__aio_enabled(rec))
488 return;
489
490 for (i = 0; i < evlist->core.nr_mmaps; i++) {
491 struct mmap *map = &maps[i];
492
493 if (map->core.base)
494 record__aio_sync(map, true);
495 }
496 }
497
498 static int nr_cblocks_default = 1;
499 static int nr_cblocks_max = 4;
500
record__aio_parse(const struct option * opt,const char * str,int unset)501 static int record__aio_parse(const struct option *opt,
502 const char *str,
503 int unset)
504 {
505 struct record_opts *opts = (struct record_opts *)opt->value;
506
507 if (unset) {
508 opts->nr_cblocks = 0;
509 } else {
510 if (str)
511 opts->nr_cblocks = strtol(str, NULL, 0);
512 if (!opts->nr_cblocks)
513 opts->nr_cblocks = nr_cblocks_default;
514 }
515
516 return 0;
517 }
518 #else /* HAVE_AIO_SUPPORT */
519 static int nr_cblocks_max = 0;
520
record__aio_push(struct record * rec __maybe_unused,struct mmap * map __maybe_unused,off_t * off __maybe_unused)521 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
522 off_t *off __maybe_unused)
523 {
524 return -1;
525 }
526
record__aio_get_pos(int trace_fd __maybe_unused)527 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
528 {
529 return -1;
530 }
531
record__aio_set_pos(int trace_fd __maybe_unused,off_t pos __maybe_unused)532 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
533 {
534 }
535
record__aio_mmap_read_sync(struct record * rec __maybe_unused)536 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
537 {
538 }
539 #endif
540
record__aio_enabled(struct record * rec)541 static int record__aio_enabled(struct record *rec)
542 {
543 return rec->opts.nr_cblocks > 0;
544 }
545
546 #define MMAP_FLUSH_DEFAULT 1
record__mmap_flush_parse(const struct option * opt,const char * str,int unset)547 static int record__mmap_flush_parse(const struct option *opt,
548 const char *str,
549 int unset)
550 {
551 int flush_max;
552 struct record_opts *opts = (struct record_opts *)opt->value;
553 static struct parse_tag tags[] = {
554 { .tag = 'B', .mult = 1 },
555 { .tag = 'K', .mult = 1 << 10 },
556 { .tag = 'M', .mult = 1 << 20 },
557 { .tag = 'G', .mult = 1 << 30 },
558 { .tag = 0 },
559 };
560
561 if (unset)
562 return 0;
563
564 if (str) {
565 opts->mmap_flush = parse_tag_value(str, tags);
566 if (opts->mmap_flush == (int)-1)
567 opts->mmap_flush = strtol(str, NULL, 0);
568 }
569
570 if (!opts->mmap_flush)
571 opts->mmap_flush = MMAP_FLUSH_DEFAULT;
572
573 flush_max = evlist__mmap_size(opts->mmap_pages);
574 flush_max /= 4;
575 if (opts->mmap_flush > flush_max)
576 opts->mmap_flush = flush_max;
577
578 return 0;
579 }
580
581 #ifdef HAVE_ZSTD_SUPPORT
582 static unsigned int comp_level_default = 1;
583
record__parse_comp_level(const struct option * opt,const char * str,int unset)584 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
585 {
586 struct record_opts *opts = opt->value;
587
588 if (unset) {
589 opts->comp_level = 0;
590 } else {
591 if (str)
592 opts->comp_level = strtol(str, NULL, 0);
593 if (!opts->comp_level)
594 opts->comp_level = comp_level_default;
595 }
596
597 return 0;
598 }
599 #endif
600 static unsigned int comp_level_max = 22;
601
record__comp_enabled(struct record * rec)602 static int record__comp_enabled(struct record *rec)
603 {
604 return rec->opts.comp_level > 0;
605 }
606
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)607 static int process_synthesized_event(struct perf_tool *tool,
608 union perf_event *event,
609 struct perf_sample *sample __maybe_unused,
610 struct machine *machine __maybe_unused)
611 {
612 struct record *rec = container_of(tool, struct record, tool);
613 return record__write(rec, NULL, event, event->header.size);
614 }
615
616 static struct mutex synth_lock;
617
process_locked_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)618 static int process_locked_synthesized_event(struct perf_tool *tool,
619 union perf_event *event,
620 struct perf_sample *sample __maybe_unused,
621 struct machine *machine __maybe_unused)
622 {
623 int ret;
624
625 mutex_lock(&synth_lock);
626 ret = process_synthesized_event(tool, event, sample, machine);
627 mutex_unlock(&synth_lock);
628 return ret;
629 }
630
record__pushfn(struct mmap * map,void * to,void * bf,size_t size)631 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
632 {
633 struct record *rec = to;
634
635 if (record__comp_enabled(rec)) {
636 size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
637 bf = map->data;
638 }
639
640 thread->samples++;
641 return record__write(rec, map, bf, size);
642 }
643
644 static volatile sig_atomic_t signr = -1;
645 static volatile sig_atomic_t child_finished;
646 #ifdef HAVE_EVENTFD_SUPPORT
647 static volatile sig_atomic_t done_fd = -1;
648 #endif
649
sig_handler(int sig)650 static void sig_handler(int sig)
651 {
652 if (sig == SIGCHLD)
653 child_finished = 1;
654 else
655 signr = sig;
656
657 done = 1;
658 #ifdef HAVE_EVENTFD_SUPPORT
659 if (done_fd >= 0) {
660 u64 tmp = 1;
661 int orig_errno = errno;
662
663 /*
664 * It is possible for this signal handler to run after done is
665 * checked in the main loop, but before the perf counter fds are
666 * polled. If this happens, the poll() will continue to wait
667 * even though done is set, and will only break out if either
668 * another signal is received, or the counters are ready for
669 * read. To ensure the poll() doesn't sleep when done is set,
670 * use an eventfd (done_fd) to wake up the poll().
671 */
672 if (write(done_fd, &tmp, sizeof(tmp)) < 0)
673 pr_err("failed to signal wakeup fd, error: %m\n");
674
675 errno = orig_errno;
676 }
677 #endif // HAVE_EVENTFD_SUPPORT
678 }
679
sigsegv_handler(int sig)680 static void sigsegv_handler(int sig)
681 {
682 perf_hooks__recover();
683 sighandler_dump_stack(sig);
684 }
685
record__sig_exit(void)686 static void record__sig_exit(void)
687 {
688 if (signr == -1)
689 return;
690
691 signal(signr, SIG_DFL);
692 raise(signr);
693 }
694
695 #ifdef HAVE_AUXTRACE_SUPPORT
696
record__process_auxtrace(struct perf_tool * tool,struct mmap * map,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)697 static int record__process_auxtrace(struct perf_tool *tool,
698 struct mmap *map,
699 union perf_event *event, void *data1,
700 size_t len1, void *data2, size_t len2)
701 {
702 struct record *rec = container_of(tool, struct record, tool);
703 struct perf_data *data = &rec->data;
704 size_t padding;
705 u8 pad[8] = {0};
706
707 if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
708 off_t file_offset;
709 int fd = perf_data__fd(data);
710 int err;
711
712 file_offset = lseek(fd, 0, SEEK_CUR);
713 if (file_offset == -1)
714 return -1;
715 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
716 event, file_offset);
717 if (err)
718 return err;
719 }
720
721 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
722 padding = (len1 + len2) & 7;
723 if (padding)
724 padding = 8 - padding;
725
726 record__write(rec, map, event, event->header.size);
727 record__write(rec, map, data1, len1);
728 if (len2)
729 record__write(rec, map, data2, len2);
730 record__write(rec, map, &pad, padding);
731
732 return 0;
733 }
734
record__auxtrace_mmap_read(struct record * rec,struct mmap * map)735 static int record__auxtrace_mmap_read(struct record *rec,
736 struct mmap *map)
737 {
738 int ret;
739
740 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
741 record__process_auxtrace);
742 if (ret < 0)
743 return ret;
744
745 if (ret)
746 rec->samples++;
747
748 return 0;
749 }
750
record__auxtrace_mmap_read_snapshot(struct record * rec,struct mmap * map)751 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
752 struct mmap *map)
753 {
754 int ret;
755
756 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
757 record__process_auxtrace,
758 rec->opts.auxtrace_snapshot_size);
759 if (ret < 0)
760 return ret;
761
762 if (ret)
763 rec->samples++;
764
765 return 0;
766 }
767
record__auxtrace_read_snapshot_all(struct record * rec)768 static int record__auxtrace_read_snapshot_all(struct record *rec)
769 {
770 int i;
771 int rc = 0;
772
773 for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
774 struct mmap *map = &rec->evlist->mmap[i];
775
776 if (!map->auxtrace_mmap.base)
777 continue;
778
779 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
780 rc = -1;
781 goto out;
782 }
783 }
784 out:
785 return rc;
786 }
787
record__read_auxtrace_snapshot(struct record * rec,bool on_exit)788 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
789 {
790 pr_debug("Recording AUX area tracing snapshot\n");
791 if (record__auxtrace_read_snapshot_all(rec) < 0) {
792 trigger_error(&auxtrace_snapshot_trigger);
793 } else {
794 if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
795 trigger_error(&auxtrace_snapshot_trigger);
796 else
797 trigger_ready(&auxtrace_snapshot_trigger);
798 }
799 }
800
record__auxtrace_snapshot_exit(struct record * rec)801 static int record__auxtrace_snapshot_exit(struct record *rec)
802 {
803 if (trigger_is_error(&auxtrace_snapshot_trigger))
804 return 0;
805
806 if (!auxtrace_record__snapshot_started &&
807 auxtrace_record__snapshot_start(rec->itr))
808 return -1;
809
810 record__read_auxtrace_snapshot(rec, true);
811 if (trigger_is_error(&auxtrace_snapshot_trigger))
812 return -1;
813
814 return 0;
815 }
816
record__auxtrace_init(struct record * rec)817 static int record__auxtrace_init(struct record *rec)
818 {
819 int err;
820
821 if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
822 && record__threads_enabled(rec)) {
823 pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
824 return -EINVAL;
825 }
826
827 if (!rec->itr) {
828 rec->itr = auxtrace_record__init(rec->evlist, &err);
829 if (err)
830 return err;
831 }
832
833 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
834 rec->opts.auxtrace_snapshot_opts);
835 if (err)
836 return err;
837
838 err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
839 rec->opts.auxtrace_sample_opts);
840 if (err)
841 return err;
842
843 auxtrace_regroup_aux_output(rec->evlist);
844
845 return auxtrace_parse_filters(rec->evlist);
846 }
847
848 #else
849
850 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct mmap * map __maybe_unused)851 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
852 struct mmap *map __maybe_unused)
853 {
854 return 0;
855 }
856
857 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused,bool on_exit __maybe_unused)858 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
859 bool on_exit __maybe_unused)
860 {
861 }
862
863 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)864 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
865 {
866 return 0;
867 }
868
869 static inline
record__auxtrace_snapshot_exit(struct record * rec __maybe_unused)870 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
871 {
872 return 0;
873 }
874
record__auxtrace_init(struct record * rec __maybe_unused)875 static int record__auxtrace_init(struct record *rec __maybe_unused)
876 {
877 return 0;
878 }
879
880 #endif
881
record__config_text_poke(struct evlist * evlist)882 static int record__config_text_poke(struct evlist *evlist)
883 {
884 struct evsel *evsel;
885
886 /* Nothing to do if text poke is already configured */
887 evlist__for_each_entry(evlist, evsel) {
888 if (evsel->core.attr.text_poke)
889 return 0;
890 }
891
892 evsel = evlist__add_dummy_on_all_cpus(evlist);
893 if (!evsel)
894 return -ENOMEM;
895
896 evsel->core.attr.text_poke = 1;
897 evsel->core.attr.ksymbol = 1;
898 evsel->immediate = true;
899 evsel__set_sample_bit(evsel, TIME);
900
901 return 0;
902 }
903
record__config_off_cpu(struct record * rec)904 static int record__config_off_cpu(struct record *rec)
905 {
906 return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
907 }
908
record__config_tracking_events(struct record * rec)909 static int record__config_tracking_events(struct record *rec)
910 {
911 struct record_opts *opts = &rec->opts;
912 struct evlist *evlist = rec->evlist;
913 struct evsel *evsel;
914
915 /*
916 * For initial_delay, system wide or a hybrid system, we need to add
917 * tracking event so that we can track PERF_RECORD_MMAP to cover the
918 * delay of waiting or event synthesis.
919 */
920 if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
921 perf_pmus__num_core_pmus() > 1) {
922 evsel = evlist__findnew_tracking_event(evlist, false);
923 if (!evsel)
924 return -ENOMEM;
925
926 /*
927 * Enable the tracking event when the process is forked for
928 * initial_delay, immediately for system wide.
929 */
930 if (opts->target.initial_delay && !evsel->immediate &&
931 !target__has_cpu(&opts->target))
932 evsel->core.attr.enable_on_exec = 1;
933 else
934 evsel->immediate = 1;
935 }
936
937 return 0;
938 }
939
record__kcore_readable(struct machine * machine)940 static bool record__kcore_readable(struct machine *machine)
941 {
942 char kcore[PATH_MAX];
943 int fd;
944
945 scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
946
947 fd = open(kcore, O_RDONLY);
948 if (fd < 0)
949 return false;
950
951 close(fd);
952
953 return true;
954 }
955
record__kcore_copy(struct machine * machine,struct perf_data * data)956 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
957 {
958 char from_dir[PATH_MAX];
959 char kcore_dir[PATH_MAX];
960 int ret;
961
962 snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
963
964 ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
965 if (ret)
966 return ret;
967
968 return kcore_copy(from_dir, kcore_dir);
969 }
970
record__thread_data_init_pipes(struct record_thread * thread_data)971 static void record__thread_data_init_pipes(struct record_thread *thread_data)
972 {
973 thread_data->pipes.msg[0] = -1;
974 thread_data->pipes.msg[1] = -1;
975 thread_data->pipes.ack[0] = -1;
976 thread_data->pipes.ack[1] = -1;
977 }
978
record__thread_data_open_pipes(struct record_thread * thread_data)979 static int record__thread_data_open_pipes(struct record_thread *thread_data)
980 {
981 if (pipe(thread_data->pipes.msg))
982 return -EINVAL;
983
984 if (pipe(thread_data->pipes.ack)) {
985 close(thread_data->pipes.msg[0]);
986 thread_data->pipes.msg[0] = -1;
987 close(thread_data->pipes.msg[1]);
988 thread_data->pipes.msg[1] = -1;
989 return -EINVAL;
990 }
991
992 pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
993 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
994 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
995
996 return 0;
997 }
998
record__thread_data_close_pipes(struct record_thread * thread_data)999 static void record__thread_data_close_pipes(struct record_thread *thread_data)
1000 {
1001 if (thread_data->pipes.msg[0] != -1) {
1002 close(thread_data->pipes.msg[0]);
1003 thread_data->pipes.msg[0] = -1;
1004 }
1005 if (thread_data->pipes.msg[1] != -1) {
1006 close(thread_data->pipes.msg[1]);
1007 thread_data->pipes.msg[1] = -1;
1008 }
1009 if (thread_data->pipes.ack[0] != -1) {
1010 close(thread_data->pipes.ack[0]);
1011 thread_data->pipes.ack[0] = -1;
1012 }
1013 if (thread_data->pipes.ack[1] != -1) {
1014 close(thread_data->pipes.ack[1]);
1015 thread_data->pipes.ack[1] = -1;
1016 }
1017 }
1018
evlist__per_thread(struct evlist * evlist)1019 static bool evlist__per_thread(struct evlist *evlist)
1020 {
1021 return cpu_map__is_dummy(evlist->core.user_requested_cpus);
1022 }
1023
record__thread_data_init_maps(struct record_thread * thread_data,struct evlist * evlist)1024 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
1025 {
1026 int m, tm, nr_mmaps = evlist->core.nr_mmaps;
1027 struct mmap *mmap = evlist->mmap;
1028 struct mmap *overwrite_mmap = evlist->overwrite_mmap;
1029 struct perf_cpu_map *cpus = evlist->core.all_cpus;
1030 bool per_thread = evlist__per_thread(evlist);
1031
1032 if (per_thread)
1033 thread_data->nr_mmaps = nr_mmaps;
1034 else
1035 thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1036 thread_data->mask->maps.nbits);
1037 if (mmap) {
1038 thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1039 if (!thread_data->maps)
1040 return -ENOMEM;
1041 }
1042 if (overwrite_mmap) {
1043 thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1044 if (!thread_data->overwrite_maps) {
1045 zfree(&thread_data->maps);
1046 return -ENOMEM;
1047 }
1048 }
1049 pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1050 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1051
1052 for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1053 if (per_thread ||
1054 test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1055 if (thread_data->maps) {
1056 thread_data->maps[tm] = &mmap[m];
1057 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1058 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1059 }
1060 if (thread_data->overwrite_maps) {
1061 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1062 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1063 thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1064 }
1065 tm++;
1066 }
1067 }
1068
1069 return 0;
1070 }
1071
record__thread_data_init_pollfd(struct record_thread * thread_data,struct evlist * evlist)1072 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1073 {
1074 int f, tm, pos;
1075 struct mmap *map, *overwrite_map;
1076
1077 fdarray__init(&thread_data->pollfd, 64);
1078
1079 for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1080 map = thread_data->maps ? thread_data->maps[tm] : NULL;
1081 overwrite_map = thread_data->overwrite_maps ?
1082 thread_data->overwrite_maps[tm] : NULL;
1083
1084 for (f = 0; f < evlist->core.pollfd.nr; f++) {
1085 void *ptr = evlist->core.pollfd.priv[f].ptr;
1086
1087 if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1088 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1089 &evlist->core.pollfd);
1090 if (pos < 0)
1091 return pos;
1092 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1093 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1094 }
1095 }
1096 }
1097
1098 return 0;
1099 }
1100
record__free_thread_data(struct record * rec)1101 static void record__free_thread_data(struct record *rec)
1102 {
1103 int t;
1104 struct record_thread *thread_data = rec->thread_data;
1105
1106 if (thread_data == NULL)
1107 return;
1108
1109 for (t = 0; t < rec->nr_threads; t++) {
1110 record__thread_data_close_pipes(&thread_data[t]);
1111 zfree(&thread_data[t].maps);
1112 zfree(&thread_data[t].overwrite_maps);
1113 fdarray__exit(&thread_data[t].pollfd);
1114 }
1115
1116 zfree(&rec->thread_data);
1117 }
1118
record__map_thread_evlist_pollfd_indexes(struct record * rec,int evlist_pollfd_index,int thread_pollfd_index)1119 static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1120 int evlist_pollfd_index,
1121 int thread_pollfd_index)
1122 {
1123 size_t x = rec->index_map_cnt;
1124
1125 if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1126 return -ENOMEM;
1127 rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1128 rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1129 rec->index_map_cnt += 1;
1130 return 0;
1131 }
1132
record__update_evlist_pollfd_from_thread(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1133 static int record__update_evlist_pollfd_from_thread(struct record *rec,
1134 struct evlist *evlist,
1135 struct record_thread *thread_data)
1136 {
1137 struct pollfd *e_entries = evlist->core.pollfd.entries;
1138 struct pollfd *t_entries = thread_data->pollfd.entries;
1139 int err = 0;
1140 size_t i;
1141
1142 for (i = 0; i < rec->index_map_cnt; i++) {
1143 int e_pos = rec->index_map[i].evlist_pollfd_index;
1144 int t_pos = rec->index_map[i].thread_pollfd_index;
1145
1146 if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1147 e_entries[e_pos].events != t_entries[t_pos].events) {
1148 pr_err("Thread and evlist pollfd index mismatch\n");
1149 err = -EINVAL;
1150 continue;
1151 }
1152 e_entries[e_pos].revents = t_entries[t_pos].revents;
1153 }
1154 return err;
1155 }
1156
record__dup_non_perf_events(struct record * rec,struct evlist * evlist,struct record_thread * thread_data)1157 static int record__dup_non_perf_events(struct record *rec,
1158 struct evlist *evlist,
1159 struct record_thread *thread_data)
1160 {
1161 struct fdarray *fda = &evlist->core.pollfd;
1162 int i, ret;
1163
1164 for (i = 0; i < fda->nr; i++) {
1165 if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1166 continue;
1167 ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1168 if (ret < 0) {
1169 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1170 return ret;
1171 }
1172 pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1173 thread_data, ret, fda->entries[i].fd);
1174 ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1175 if (ret < 0) {
1176 pr_err("Failed to map thread and evlist pollfd indexes\n");
1177 return ret;
1178 }
1179 }
1180 return 0;
1181 }
1182
record__alloc_thread_data(struct record * rec,struct evlist * evlist)1183 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1184 {
1185 int t, ret;
1186 struct record_thread *thread_data;
1187
1188 rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1189 if (!rec->thread_data) {
1190 pr_err("Failed to allocate thread data\n");
1191 return -ENOMEM;
1192 }
1193 thread_data = rec->thread_data;
1194
1195 for (t = 0; t < rec->nr_threads; t++)
1196 record__thread_data_init_pipes(&thread_data[t]);
1197
1198 for (t = 0; t < rec->nr_threads; t++) {
1199 thread_data[t].rec = rec;
1200 thread_data[t].mask = &rec->thread_masks[t];
1201 ret = record__thread_data_init_maps(&thread_data[t], evlist);
1202 if (ret) {
1203 pr_err("Failed to initialize thread[%d] maps\n", t);
1204 goto out_free;
1205 }
1206 ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1207 if (ret) {
1208 pr_err("Failed to initialize thread[%d] pollfd\n", t);
1209 goto out_free;
1210 }
1211 if (t) {
1212 thread_data[t].tid = -1;
1213 ret = record__thread_data_open_pipes(&thread_data[t]);
1214 if (ret) {
1215 pr_err("Failed to open thread[%d] communication pipes\n", t);
1216 goto out_free;
1217 }
1218 ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1219 POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1220 if (ret < 0) {
1221 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1222 goto out_free;
1223 }
1224 thread_data[t].ctlfd_pos = ret;
1225 pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1226 thread_data, thread_data[t].ctlfd_pos,
1227 thread_data[t].pipes.msg[0]);
1228 } else {
1229 thread_data[t].tid = gettid();
1230
1231 ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1232 if (ret < 0)
1233 goto out_free;
1234
1235 thread_data[t].ctlfd_pos = -1; /* Not used */
1236 }
1237 }
1238
1239 return 0;
1240
1241 out_free:
1242 record__free_thread_data(rec);
1243
1244 return ret;
1245 }
1246
record__mmap_evlist(struct record * rec,struct evlist * evlist)1247 static int record__mmap_evlist(struct record *rec,
1248 struct evlist *evlist)
1249 {
1250 int i, ret;
1251 struct record_opts *opts = &rec->opts;
1252 bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1253 opts->auxtrace_sample_mode;
1254 char msg[512];
1255
1256 if (opts->affinity != PERF_AFFINITY_SYS)
1257 cpu__setup_cpunode_map();
1258
1259 if (evlist__mmap_ex(evlist, opts->mmap_pages,
1260 opts->auxtrace_mmap_pages,
1261 auxtrace_overwrite,
1262 opts->nr_cblocks, opts->affinity,
1263 opts->mmap_flush, opts->comp_level) < 0) {
1264 if (errno == EPERM) {
1265 pr_err("Permission error mapping pages.\n"
1266 "Consider increasing "
1267 "/proc/sys/kernel/perf_event_mlock_kb,\n"
1268 "or try again with a smaller value of -m/--mmap_pages.\n"
1269 "(current value: %u,%u)\n",
1270 opts->mmap_pages, opts->auxtrace_mmap_pages);
1271 return -errno;
1272 } else {
1273 pr_err("failed to mmap with %d (%s)\n", errno,
1274 str_error_r(errno, msg, sizeof(msg)));
1275 if (errno)
1276 return -errno;
1277 else
1278 return -EINVAL;
1279 }
1280 }
1281
1282 if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1283 return -1;
1284
1285 ret = record__alloc_thread_data(rec, evlist);
1286 if (ret)
1287 return ret;
1288
1289 if (record__threads_enabled(rec)) {
1290 ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1291 if (ret) {
1292 pr_err("Failed to create data directory: %s\n", strerror(-ret));
1293 return ret;
1294 }
1295 for (i = 0; i < evlist->core.nr_mmaps; i++) {
1296 if (evlist->mmap)
1297 evlist->mmap[i].file = &rec->data.dir.files[i];
1298 if (evlist->overwrite_mmap)
1299 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1300 }
1301 }
1302
1303 return 0;
1304 }
1305
record__mmap(struct record * rec)1306 static int record__mmap(struct record *rec)
1307 {
1308 return record__mmap_evlist(rec, rec->evlist);
1309 }
1310
record__open(struct record * rec)1311 static int record__open(struct record *rec)
1312 {
1313 char msg[BUFSIZ];
1314 struct evsel *pos;
1315 struct evlist *evlist = rec->evlist;
1316 struct perf_session *session = rec->session;
1317 struct record_opts *opts = &rec->opts;
1318 int rc = 0;
1319
1320 evlist__for_each_entry(evlist, pos) {
1321 try_again:
1322 if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1323 if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1324 if (verbose > 0)
1325 ui__warning("%s\n", msg);
1326 goto try_again;
1327 }
1328 if ((errno == EINVAL || errno == EBADF) &&
1329 pos->core.leader != &pos->core &&
1330 pos->weak_group) {
1331 pos = evlist__reset_weak_group(evlist, pos, true);
1332 goto try_again;
1333 }
1334 rc = -errno;
1335 evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1336 ui__error("%s\n", msg);
1337 goto out;
1338 }
1339
1340 pos->supported = true;
1341 }
1342
1343 if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1344 pr_warning(
1345 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1346 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1347 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1348 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1349 "Samples in kernel modules won't be resolved at all.\n\n"
1350 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1351 "even with a suitable vmlinux or kallsyms file.\n\n");
1352 }
1353
1354 if (evlist__apply_filters(evlist, &pos)) {
1355 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1356 pos->filter ?: "BPF", evsel__name(pos), errno,
1357 str_error_r(errno, msg, sizeof(msg)));
1358 rc = -1;
1359 goto out;
1360 }
1361
1362 rc = record__mmap(rec);
1363 if (rc)
1364 goto out;
1365
1366 session->evlist = evlist;
1367 perf_session__set_id_hdr_size(session);
1368 out:
1369 return rc;
1370 }
1371
set_timestamp_boundary(struct record * rec,u64 sample_time)1372 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1373 {
1374 if (rec->evlist->first_sample_time == 0)
1375 rec->evlist->first_sample_time = sample_time;
1376
1377 if (sample_time)
1378 rec->evlist->last_sample_time = sample_time;
1379 }
1380
process_sample_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1381 static int process_sample_event(struct perf_tool *tool,
1382 union perf_event *event,
1383 struct perf_sample *sample,
1384 struct evsel *evsel,
1385 struct machine *machine)
1386 {
1387 struct record *rec = container_of(tool, struct record, tool);
1388
1389 set_timestamp_boundary(rec, sample->time);
1390
1391 if (rec->buildid_all)
1392 return 0;
1393
1394 rec->samples++;
1395 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1396 }
1397
process_buildids(struct record * rec)1398 static int process_buildids(struct record *rec)
1399 {
1400 struct perf_session *session = rec->session;
1401
1402 if (perf_data__size(&rec->data) == 0)
1403 return 0;
1404
1405 /*
1406 * During this process, it'll load kernel map and replace the
1407 * dso->long_name to a real pathname it found. In this case
1408 * we prefer the vmlinux path like
1409 * /lib/modules/3.16.4/build/vmlinux
1410 *
1411 * rather than build-id path (in debug directory).
1412 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1413 */
1414 symbol_conf.ignore_vmlinux_buildid = true;
1415
1416 /*
1417 * If --buildid-all is given, it marks all DSO regardless of hits,
1418 * so no need to process samples. But if timestamp_boundary is enabled,
1419 * it still needs to walk on all samples to get the timestamps of
1420 * first/last samples.
1421 */
1422 if (rec->buildid_all && !rec->timestamp_boundary)
1423 rec->tool.sample = NULL;
1424
1425 return perf_session__process_events(session);
1426 }
1427
perf_event__synthesize_guest_os(struct machine * machine,void * data)1428 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1429 {
1430 int err;
1431 struct perf_tool *tool = data;
1432 /*
1433 *As for guest kernel when processing subcommand record&report,
1434 *we arrange module mmap prior to guest kernel mmap and trigger
1435 *a preload dso because default guest module symbols are loaded
1436 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1437 *method is used to avoid symbol missing when the first addr is
1438 *in module instead of in guest kernel.
1439 */
1440 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1441 machine);
1442 if (err < 0)
1443 pr_err("Couldn't record guest kernel [%d]'s reference"
1444 " relocation symbol.\n", machine->pid);
1445
1446 /*
1447 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1448 * have no _text sometimes.
1449 */
1450 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1451 machine);
1452 if (err < 0)
1453 pr_err("Couldn't record guest kernel [%d]'s reference"
1454 " relocation symbol.\n", machine->pid);
1455 }
1456
1457 static struct perf_event_header finished_round_event = {
1458 .size = sizeof(struct perf_event_header),
1459 .type = PERF_RECORD_FINISHED_ROUND,
1460 };
1461
1462 static struct perf_event_header finished_init_event = {
1463 .size = sizeof(struct perf_event_header),
1464 .type = PERF_RECORD_FINISHED_INIT,
1465 };
1466
record__adjust_affinity(struct record * rec,struct mmap * map)1467 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1468 {
1469 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1470 !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1471 thread->mask->affinity.nbits)) {
1472 bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1473 bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1474 map->affinity_mask.bits, thread->mask->affinity.nbits);
1475 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1476 (cpu_set_t *)thread->mask->affinity.bits);
1477 if (verbose == 2) {
1478 pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1479 mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1480 }
1481 }
1482 }
1483
process_comp_header(void * record,size_t increment)1484 static size_t process_comp_header(void *record, size_t increment)
1485 {
1486 struct perf_record_compressed *event = record;
1487 size_t size = sizeof(*event);
1488
1489 if (increment) {
1490 event->header.size += increment;
1491 return increment;
1492 }
1493
1494 event->header.type = PERF_RECORD_COMPRESSED;
1495 event->header.size = size;
1496
1497 return size;
1498 }
1499
zstd_compress(struct perf_session * session,struct mmap * map,void * dst,size_t dst_size,void * src,size_t src_size)1500 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1501 void *dst, size_t dst_size, void *src, size_t src_size)
1502 {
1503 size_t compressed;
1504 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1505 struct zstd_data *zstd_data = &session->zstd_data;
1506
1507 if (map && map->file)
1508 zstd_data = &map->zstd_data;
1509
1510 compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1511 max_record_size, process_comp_header);
1512
1513 if (map && map->file) {
1514 thread->bytes_transferred += src_size;
1515 thread->bytes_compressed += compressed;
1516 } else {
1517 session->bytes_transferred += src_size;
1518 session->bytes_compressed += compressed;
1519 }
1520
1521 return compressed;
1522 }
1523
record__mmap_read_evlist(struct record * rec,struct evlist * evlist,bool overwrite,bool synch)1524 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1525 bool overwrite, bool synch)
1526 {
1527 u64 bytes_written = rec->bytes_written;
1528 int i;
1529 int rc = 0;
1530 int nr_mmaps;
1531 struct mmap **maps;
1532 int trace_fd = rec->data.file.fd;
1533 off_t off = 0;
1534
1535 if (!evlist)
1536 return 0;
1537
1538 nr_mmaps = thread->nr_mmaps;
1539 maps = overwrite ? thread->overwrite_maps : thread->maps;
1540
1541 if (!maps)
1542 return 0;
1543
1544 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1545 return 0;
1546
1547 if (record__aio_enabled(rec))
1548 off = record__aio_get_pos(trace_fd);
1549
1550 for (i = 0; i < nr_mmaps; i++) {
1551 u64 flush = 0;
1552 struct mmap *map = maps[i];
1553
1554 if (map->core.base) {
1555 record__adjust_affinity(rec, map);
1556 if (synch) {
1557 flush = map->core.flush;
1558 map->core.flush = 1;
1559 }
1560 if (!record__aio_enabled(rec)) {
1561 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1562 if (synch)
1563 map->core.flush = flush;
1564 rc = -1;
1565 goto out;
1566 }
1567 } else {
1568 if (record__aio_push(rec, map, &off) < 0) {
1569 record__aio_set_pos(trace_fd, off);
1570 if (synch)
1571 map->core.flush = flush;
1572 rc = -1;
1573 goto out;
1574 }
1575 }
1576 if (synch)
1577 map->core.flush = flush;
1578 }
1579
1580 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1581 !rec->opts.auxtrace_sample_mode &&
1582 record__auxtrace_mmap_read(rec, map) != 0) {
1583 rc = -1;
1584 goto out;
1585 }
1586 }
1587
1588 if (record__aio_enabled(rec))
1589 record__aio_set_pos(trace_fd, off);
1590
1591 /*
1592 * Mark the round finished in case we wrote
1593 * at least one event.
1594 *
1595 * No need for round events in directory mode,
1596 * because per-cpu maps and files have data
1597 * sorted by kernel.
1598 */
1599 if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1600 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1601
1602 if (overwrite)
1603 evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1604 out:
1605 return rc;
1606 }
1607
record__mmap_read_all(struct record * rec,bool synch)1608 static int record__mmap_read_all(struct record *rec, bool synch)
1609 {
1610 int err;
1611
1612 err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1613 if (err)
1614 return err;
1615
1616 return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1617 }
1618
record__thread_munmap_filtered(struct fdarray * fda,int fd,void * arg __maybe_unused)1619 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1620 void *arg __maybe_unused)
1621 {
1622 struct perf_mmap *map = fda->priv[fd].ptr;
1623
1624 if (map)
1625 perf_mmap__put(map);
1626 }
1627
record__thread(void * arg)1628 static void *record__thread(void *arg)
1629 {
1630 enum thread_msg msg = THREAD_MSG__READY;
1631 bool terminate = false;
1632 struct fdarray *pollfd;
1633 int err, ctlfd_pos;
1634
1635 thread = arg;
1636 thread->tid = gettid();
1637
1638 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1639 if (err == -1)
1640 pr_warning("threads[%d]: failed to notify on start: %s\n",
1641 thread->tid, strerror(errno));
1642
1643 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1644
1645 pollfd = &thread->pollfd;
1646 ctlfd_pos = thread->ctlfd_pos;
1647
1648 for (;;) {
1649 unsigned long long hits = thread->samples;
1650
1651 if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1652 break;
1653
1654 if (hits == thread->samples) {
1655
1656 err = fdarray__poll(pollfd, -1);
1657 /*
1658 * Propagate error, only if there's any. Ignore positive
1659 * number of returned events and interrupt error.
1660 */
1661 if (err > 0 || (err < 0 && errno == EINTR))
1662 err = 0;
1663 thread->waking++;
1664
1665 if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1666 record__thread_munmap_filtered, NULL) == 0)
1667 break;
1668 }
1669
1670 if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1671 terminate = true;
1672 close(thread->pipes.msg[0]);
1673 thread->pipes.msg[0] = -1;
1674 pollfd->entries[ctlfd_pos].fd = -1;
1675 pollfd->entries[ctlfd_pos].events = 0;
1676 }
1677
1678 pollfd->entries[ctlfd_pos].revents = 0;
1679 }
1680 record__mmap_read_all(thread->rec, true);
1681
1682 err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1683 if (err == -1)
1684 pr_warning("threads[%d]: failed to notify on termination: %s\n",
1685 thread->tid, strerror(errno));
1686
1687 return NULL;
1688 }
1689
record__init_features(struct record * rec)1690 static void record__init_features(struct record *rec)
1691 {
1692 struct perf_session *session = rec->session;
1693 int feat;
1694
1695 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1696 perf_header__set_feat(&session->header, feat);
1697
1698 if (rec->no_buildid)
1699 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1700
1701 #ifdef HAVE_LIBTRACEEVENT
1702 if (!have_tracepoints(&rec->evlist->core.entries))
1703 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1704 #endif
1705
1706 if (!rec->opts.branch_stack)
1707 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1708
1709 if (!rec->opts.full_auxtrace)
1710 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1711
1712 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1713 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1714
1715 if (!rec->opts.use_clockid)
1716 perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1717
1718 if (!record__threads_enabled(rec))
1719 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1720
1721 if (!record__comp_enabled(rec))
1722 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1723
1724 perf_header__clear_feat(&session->header, HEADER_STAT);
1725 }
1726
1727 static void
record__finish_output(struct record * rec)1728 record__finish_output(struct record *rec)
1729 {
1730 int i;
1731 struct perf_data *data = &rec->data;
1732 int fd = perf_data__fd(data);
1733
1734 if (data->is_pipe)
1735 return;
1736
1737 rec->session->header.data_size += rec->bytes_written;
1738 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1739 if (record__threads_enabled(rec)) {
1740 for (i = 0; i < data->dir.nr; i++)
1741 data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1742 }
1743
1744 if (!rec->no_buildid) {
1745 process_buildids(rec);
1746
1747 if (rec->buildid_all)
1748 dsos__hit_all(rec->session);
1749 }
1750 perf_session__write_header(rec->session, rec->evlist, fd, true);
1751
1752 return;
1753 }
1754
record__synthesize_workload(struct record * rec,bool tail)1755 static int record__synthesize_workload(struct record *rec, bool tail)
1756 {
1757 int err;
1758 struct perf_thread_map *thread_map;
1759 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1760
1761 if (rec->opts.tail_synthesize != tail)
1762 return 0;
1763
1764 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1765 if (thread_map == NULL)
1766 return -1;
1767
1768 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1769 process_synthesized_event,
1770 &rec->session->machines.host,
1771 needs_mmap,
1772 rec->opts.sample_address);
1773 perf_thread_map__put(thread_map);
1774 return err;
1775 }
1776
write_finished_init(struct record * rec,bool tail)1777 static int write_finished_init(struct record *rec, bool tail)
1778 {
1779 if (rec->opts.tail_synthesize != tail)
1780 return 0;
1781
1782 return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1783 }
1784
1785 static int record__synthesize(struct record *rec, bool tail);
1786
1787 static int
record__switch_output(struct record * rec,bool at_exit)1788 record__switch_output(struct record *rec, bool at_exit)
1789 {
1790 struct perf_data *data = &rec->data;
1791 char *new_filename = NULL;
1792 int fd, err;
1793
1794 /* Same Size: "2015122520103046"*/
1795 char timestamp[] = "InvalidTimestamp";
1796
1797 record__aio_mmap_read_sync(rec);
1798
1799 write_finished_init(rec, true);
1800
1801 record__synthesize(rec, true);
1802 if (target__none(&rec->opts.target))
1803 record__synthesize_workload(rec, true);
1804
1805 rec->samples = 0;
1806 record__finish_output(rec);
1807 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1808 if (err) {
1809 pr_err("Failed to get current timestamp\n");
1810 return -EINVAL;
1811 }
1812
1813 fd = perf_data__switch(data, timestamp,
1814 rec->session->header.data_offset,
1815 at_exit, &new_filename);
1816 if (fd >= 0 && !at_exit) {
1817 rec->bytes_written = 0;
1818 rec->session->header.data_size = 0;
1819 }
1820
1821 if (!quiet)
1822 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1823 data->path, timestamp);
1824
1825 if (rec->switch_output.num_files) {
1826 int n = rec->switch_output.cur_file + 1;
1827
1828 if (n >= rec->switch_output.num_files)
1829 n = 0;
1830 rec->switch_output.cur_file = n;
1831 if (rec->switch_output.filenames[n]) {
1832 remove(rec->switch_output.filenames[n]);
1833 zfree(&rec->switch_output.filenames[n]);
1834 }
1835 rec->switch_output.filenames[n] = new_filename;
1836 } else {
1837 free(new_filename);
1838 }
1839
1840 /* Output tracking events */
1841 if (!at_exit) {
1842 record__synthesize(rec, false);
1843
1844 /*
1845 * In 'perf record --switch-output' without -a,
1846 * record__synthesize() in record__switch_output() won't
1847 * generate tracking events because there's no thread_map
1848 * in evlist. Which causes newly created perf.data doesn't
1849 * contain map and comm information.
1850 * Create a fake thread_map and directly call
1851 * perf_event__synthesize_thread_map() for those events.
1852 */
1853 if (target__none(&rec->opts.target))
1854 record__synthesize_workload(rec, false);
1855 write_finished_init(rec, false);
1856 }
1857 return fd;
1858 }
1859
__record__save_lost_samples(struct record * rec,struct evsel * evsel,struct perf_record_lost_samples * lost,int cpu_idx,int thread_idx,u64 lost_count,u16 misc_flag)1860 static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1861 struct perf_record_lost_samples *lost,
1862 int cpu_idx, int thread_idx, u64 lost_count,
1863 u16 misc_flag)
1864 {
1865 struct perf_sample_id *sid;
1866 struct perf_sample sample = {};
1867 int id_hdr_size;
1868
1869 lost->lost = lost_count;
1870 if (evsel->core.ids) {
1871 sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1872 sample.id = sid->id;
1873 }
1874
1875 id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1876 evsel->core.attr.sample_type, &sample);
1877 lost->header.size = sizeof(*lost) + id_hdr_size;
1878 lost->header.misc = misc_flag;
1879 record__write(rec, NULL, lost, lost->header.size);
1880 }
1881
record__read_lost_samples(struct record * rec)1882 static void record__read_lost_samples(struct record *rec)
1883 {
1884 struct perf_session *session = rec->session;
1885 struct perf_record_lost_samples *lost;
1886 struct evsel *evsel;
1887
1888 /* there was an error during record__open */
1889 if (session->evlist == NULL)
1890 return;
1891
1892 lost = zalloc(PERF_SAMPLE_MAX_SIZE);
1893 if (lost == NULL) {
1894 pr_debug("Memory allocation failed\n");
1895 return;
1896 }
1897
1898 lost->header.type = PERF_RECORD_LOST_SAMPLES;
1899
1900 evlist__for_each_entry(session->evlist, evsel) {
1901 struct xyarray *xy = evsel->core.sample_id;
1902 u64 lost_count;
1903
1904 if (xy == NULL || evsel->core.fd == NULL)
1905 continue;
1906 if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1907 xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1908 pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1909 continue;
1910 }
1911
1912 for (int x = 0; x < xyarray__max_x(xy); x++) {
1913 for (int y = 0; y < xyarray__max_y(xy); y++) {
1914 struct perf_counts_values count;
1915
1916 if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1917 pr_debug("read LOST count failed\n");
1918 goto out;
1919 }
1920
1921 if (count.lost) {
1922 __record__save_lost_samples(rec, evsel, lost,
1923 x, y, count.lost, 0);
1924 }
1925 }
1926 }
1927
1928 lost_count = perf_bpf_filter__lost_count(evsel);
1929 if (lost_count)
1930 __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
1931 PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1932 }
1933 out:
1934 free(lost);
1935 }
1936
1937 static volatile sig_atomic_t workload_exec_errno;
1938
1939 /*
1940 * evlist__prepare_workload will send a SIGUSR1
1941 * if the fork fails, since we asked by setting its
1942 * want_signal to true.
1943 */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)1944 static void workload_exec_failed_signal(int signo __maybe_unused,
1945 siginfo_t *info,
1946 void *ucontext __maybe_unused)
1947 {
1948 workload_exec_errno = info->si_value.sival_int;
1949 done = 1;
1950 child_finished = 1;
1951 }
1952
1953 static void snapshot_sig_handler(int sig);
1954 static void alarm_sig_handler(int sig);
1955
evlist__pick_pc(struct evlist * evlist)1956 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1957 {
1958 if (evlist) {
1959 if (evlist->mmap && evlist->mmap[0].core.base)
1960 return evlist->mmap[0].core.base;
1961 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1962 return evlist->overwrite_mmap[0].core.base;
1963 }
1964 return NULL;
1965 }
1966
record__pick_pc(struct record * rec)1967 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1968 {
1969 const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1970 if (pc)
1971 return pc;
1972 return NULL;
1973 }
1974
record__synthesize(struct record * rec,bool tail)1975 static int record__synthesize(struct record *rec, bool tail)
1976 {
1977 struct perf_session *session = rec->session;
1978 struct machine *machine = &session->machines.host;
1979 struct perf_data *data = &rec->data;
1980 struct record_opts *opts = &rec->opts;
1981 struct perf_tool *tool = &rec->tool;
1982 int err = 0;
1983 event_op f = process_synthesized_event;
1984
1985 if (rec->opts.tail_synthesize != tail)
1986 return 0;
1987
1988 if (data->is_pipe) {
1989 err = perf_event__synthesize_for_pipe(tool, session, data,
1990 process_synthesized_event);
1991 if (err < 0)
1992 goto out;
1993
1994 rec->bytes_written += err;
1995 }
1996
1997 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1998 process_synthesized_event, machine);
1999 if (err)
2000 goto out;
2001
2002 /* Synthesize id_index before auxtrace_info */
2003 err = perf_event__synthesize_id_index(tool,
2004 process_synthesized_event,
2005 session->evlist, machine);
2006 if (err)
2007 goto out;
2008
2009 if (rec->opts.full_auxtrace) {
2010 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2011 session, process_synthesized_event);
2012 if (err)
2013 goto out;
2014 }
2015
2016 if (!evlist__exclude_kernel(rec->evlist)) {
2017 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2018 machine);
2019 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2020 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2021 "Check /proc/kallsyms permission or run as root.\n");
2022
2023 err = perf_event__synthesize_modules(tool, process_synthesized_event,
2024 machine);
2025 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2026 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2027 "Check /proc/modules permission or run as root.\n");
2028 }
2029
2030 if (perf_guest) {
2031 machines__process_guests(&session->machines,
2032 perf_event__synthesize_guest_os, tool);
2033 }
2034
2035 err = perf_event__synthesize_extra_attr(&rec->tool,
2036 rec->evlist,
2037 process_synthesized_event,
2038 data->is_pipe);
2039 if (err)
2040 goto out;
2041
2042 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2043 process_synthesized_event,
2044 NULL);
2045 if (err < 0) {
2046 pr_err("Couldn't synthesize thread map.\n");
2047 return err;
2048 }
2049
2050 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2051 process_synthesized_event, NULL);
2052 if (err < 0) {
2053 pr_err("Couldn't synthesize cpu map.\n");
2054 return err;
2055 }
2056
2057 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2058 machine, opts);
2059 if (err < 0) {
2060 pr_warning("Couldn't synthesize bpf events.\n");
2061 err = 0;
2062 }
2063
2064 if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2065 err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2066 machine);
2067 if (err < 0) {
2068 pr_warning("Couldn't synthesize cgroup events.\n");
2069 err = 0;
2070 }
2071 }
2072
2073 if (rec->opts.nr_threads_synthesize > 1) {
2074 mutex_init(&synth_lock);
2075 perf_set_multithreaded();
2076 f = process_locked_synthesized_event;
2077 }
2078
2079 if (rec->opts.synth & PERF_SYNTH_TASK) {
2080 bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2081
2082 err = __machine__synthesize_threads(machine, tool, &opts->target,
2083 rec->evlist->core.threads,
2084 f, needs_mmap, opts->sample_address,
2085 rec->opts.nr_threads_synthesize);
2086 }
2087
2088 if (rec->opts.nr_threads_synthesize > 1) {
2089 perf_set_singlethreaded();
2090 mutex_destroy(&synth_lock);
2091 }
2092
2093 out:
2094 return err;
2095 }
2096
record__process_signal_event(union perf_event * event __maybe_unused,void * data)2097 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2098 {
2099 struct record *rec = data;
2100 pthread_kill(rec->thread_id, SIGUSR2);
2101 return 0;
2102 }
2103
record__setup_sb_evlist(struct record * rec)2104 static int record__setup_sb_evlist(struct record *rec)
2105 {
2106 struct record_opts *opts = &rec->opts;
2107
2108 if (rec->sb_evlist != NULL) {
2109 /*
2110 * We get here if --switch-output-event populated the
2111 * sb_evlist, so associate a callback that will send a SIGUSR2
2112 * to the main thread.
2113 */
2114 evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2115 rec->thread_id = pthread_self();
2116 }
2117 #ifdef HAVE_LIBBPF_SUPPORT
2118 if (!opts->no_bpf_event) {
2119 if (rec->sb_evlist == NULL) {
2120 rec->sb_evlist = evlist__new();
2121
2122 if (rec->sb_evlist == NULL) {
2123 pr_err("Couldn't create side band evlist.\n.");
2124 return -1;
2125 }
2126 }
2127
2128 if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2129 pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2130 return -1;
2131 }
2132 }
2133 #endif
2134 if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2135 pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2136 opts->no_bpf_event = true;
2137 }
2138
2139 return 0;
2140 }
2141
record__init_clock(struct record * rec)2142 static int record__init_clock(struct record *rec)
2143 {
2144 struct perf_session *session = rec->session;
2145 struct timespec ref_clockid;
2146 struct timeval ref_tod;
2147 u64 ref;
2148
2149 if (!rec->opts.use_clockid)
2150 return 0;
2151
2152 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2153 session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2154
2155 session->header.env.clock.clockid = rec->opts.clockid;
2156
2157 if (gettimeofday(&ref_tod, NULL) != 0) {
2158 pr_err("gettimeofday failed, cannot set reference time.\n");
2159 return -1;
2160 }
2161
2162 if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2163 pr_err("clock_gettime failed, cannot set reference time.\n");
2164 return -1;
2165 }
2166
2167 ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2168 (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2169
2170 session->header.env.clock.tod_ns = ref;
2171
2172 ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2173 (u64) ref_clockid.tv_nsec;
2174
2175 session->header.env.clock.clockid_ns = ref;
2176 return 0;
2177 }
2178
hit_auxtrace_snapshot_trigger(struct record * rec)2179 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2180 {
2181 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2182 trigger_hit(&auxtrace_snapshot_trigger);
2183 auxtrace_record__snapshot_started = 1;
2184 if (auxtrace_record__snapshot_start(rec->itr))
2185 trigger_error(&auxtrace_snapshot_trigger);
2186 }
2187 }
2188
record__terminate_thread(struct record_thread * thread_data)2189 static int record__terminate_thread(struct record_thread *thread_data)
2190 {
2191 int err;
2192 enum thread_msg ack = THREAD_MSG__UNDEFINED;
2193 pid_t tid = thread_data->tid;
2194
2195 close(thread_data->pipes.msg[1]);
2196 thread_data->pipes.msg[1] = -1;
2197 err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2198 if (err > 0)
2199 pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2200 else
2201 pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2202 thread->tid, tid);
2203
2204 return 0;
2205 }
2206
record__start_threads(struct record * rec)2207 static int record__start_threads(struct record *rec)
2208 {
2209 int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2210 struct record_thread *thread_data = rec->thread_data;
2211 sigset_t full, mask;
2212 pthread_t handle;
2213 pthread_attr_t attrs;
2214
2215 thread = &thread_data[0];
2216
2217 if (!record__threads_enabled(rec))
2218 return 0;
2219
2220 sigfillset(&full);
2221 if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2222 pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2223 return -1;
2224 }
2225
2226 pthread_attr_init(&attrs);
2227 pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2228
2229 for (t = 1; t < nr_threads; t++) {
2230 enum thread_msg msg = THREAD_MSG__UNDEFINED;
2231
2232 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2233 pthread_attr_setaffinity_np(&attrs,
2234 MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2235 (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2236 #endif
2237 if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2238 for (tt = 1; tt < t; tt++)
2239 record__terminate_thread(&thread_data[t]);
2240 pr_err("Failed to start threads: %s\n", strerror(errno));
2241 ret = -1;
2242 goto out_err;
2243 }
2244
2245 err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2246 if (err > 0)
2247 pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2248 thread_msg_tags[msg]);
2249 else
2250 pr_warning("threads[%d]: failed to receive start notification from %d\n",
2251 thread->tid, rec->thread_data[t].tid);
2252 }
2253
2254 sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2255 (cpu_set_t *)thread->mask->affinity.bits);
2256
2257 pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2258
2259 out_err:
2260 pthread_attr_destroy(&attrs);
2261
2262 if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2263 pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2264 ret = -1;
2265 }
2266
2267 return ret;
2268 }
2269
record__stop_threads(struct record * rec)2270 static int record__stop_threads(struct record *rec)
2271 {
2272 int t;
2273 struct record_thread *thread_data = rec->thread_data;
2274
2275 for (t = 1; t < rec->nr_threads; t++)
2276 record__terminate_thread(&thread_data[t]);
2277
2278 for (t = 0; t < rec->nr_threads; t++) {
2279 rec->samples += thread_data[t].samples;
2280 if (!record__threads_enabled(rec))
2281 continue;
2282 rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2283 rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2284 pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2285 thread_data[t].samples, thread_data[t].waking);
2286 if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2287 pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2288 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2289 else
2290 pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2291 }
2292
2293 return 0;
2294 }
2295
record__waking(struct record * rec)2296 static unsigned long record__waking(struct record *rec)
2297 {
2298 int t;
2299 unsigned long waking = 0;
2300 struct record_thread *thread_data = rec->thread_data;
2301
2302 for (t = 0; t < rec->nr_threads; t++)
2303 waking += thread_data[t].waking;
2304
2305 return waking;
2306 }
2307
__cmd_record(struct record * rec,int argc,const char ** argv)2308 static int __cmd_record(struct record *rec, int argc, const char **argv)
2309 {
2310 int err;
2311 int status = 0;
2312 const bool forks = argc > 0;
2313 struct perf_tool *tool = &rec->tool;
2314 struct record_opts *opts = &rec->opts;
2315 struct perf_data *data = &rec->data;
2316 struct perf_session *session;
2317 bool disabled = false, draining = false;
2318 int fd;
2319 float ratio = 0;
2320 enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2321
2322 atexit(record__sig_exit);
2323 signal(SIGCHLD, sig_handler);
2324 signal(SIGINT, sig_handler);
2325 signal(SIGTERM, sig_handler);
2326 signal(SIGSEGV, sigsegv_handler);
2327
2328 if (rec->opts.record_namespaces)
2329 tool->namespace_events = true;
2330
2331 if (rec->opts.record_cgroup) {
2332 #ifdef HAVE_FILE_HANDLE
2333 tool->cgroup_events = true;
2334 #else
2335 pr_err("cgroup tracking is not supported\n");
2336 return -1;
2337 #endif
2338 }
2339
2340 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2341 signal(SIGUSR2, snapshot_sig_handler);
2342 if (rec->opts.auxtrace_snapshot_mode)
2343 trigger_on(&auxtrace_snapshot_trigger);
2344 if (rec->switch_output.enabled)
2345 trigger_on(&switch_output_trigger);
2346 } else {
2347 signal(SIGUSR2, SIG_IGN);
2348 }
2349
2350 session = perf_session__new(data, tool);
2351 if (IS_ERR(session)) {
2352 pr_err("Perf session creation failed.\n");
2353 return PTR_ERR(session);
2354 }
2355
2356 if (record__threads_enabled(rec)) {
2357 if (perf_data__is_pipe(&rec->data)) {
2358 pr_err("Parallel trace streaming is not available in pipe mode.\n");
2359 return -1;
2360 }
2361 if (rec->opts.full_auxtrace) {
2362 pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2363 return -1;
2364 }
2365 }
2366
2367 fd = perf_data__fd(data);
2368 rec->session = session;
2369
2370 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2371 pr_err("Compression initialization failed.\n");
2372 return -1;
2373 }
2374 #ifdef HAVE_EVENTFD_SUPPORT
2375 done_fd = eventfd(0, EFD_NONBLOCK);
2376 if (done_fd < 0) {
2377 pr_err("Failed to create wakeup eventfd, error: %m\n");
2378 status = -1;
2379 goto out_delete_session;
2380 }
2381 err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2382 if (err < 0) {
2383 pr_err("Failed to add wakeup eventfd to poll list\n");
2384 status = err;
2385 goto out_delete_session;
2386 }
2387 #endif // HAVE_EVENTFD_SUPPORT
2388
2389 session->header.env.comp_type = PERF_COMP_ZSTD;
2390 session->header.env.comp_level = rec->opts.comp_level;
2391
2392 if (rec->opts.kcore &&
2393 !record__kcore_readable(&session->machines.host)) {
2394 pr_err("ERROR: kcore is not readable.\n");
2395 return -1;
2396 }
2397
2398 if (record__init_clock(rec))
2399 return -1;
2400
2401 record__init_features(rec);
2402
2403 if (forks) {
2404 err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2405 workload_exec_failed_signal);
2406 if (err < 0) {
2407 pr_err("Couldn't run the workload!\n");
2408 status = err;
2409 goto out_delete_session;
2410 }
2411 }
2412
2413 /*
2414 * If we have just single event and are sending data
2415 * through pipe, we need to force the ids allocation,
2416 * because we synthesize event name through the pipe
2417 * and need the id for that.
2418 */
2419 if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2420 rec->opts.sample_id = true;
2421
2422 if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2423 rec->timestamp_filename = false;
2424 pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2425 }
2426
2427 evlist__uniquify_name(rec->evlist);
2428
2429 evlist__config(rec->evlist, opts, &callchain_param);
2430
2431 /* Debug message used by test scripts */
2432 pr_debug3("perf record opening and mmapping events\n");
2433 if (record__open(rec) != 0) {
2434 err = -1;
2435 goto out_free_threads;
2436 }
2437 /* Debug message used by test scripts */
2438 pr_debug3("perf record done opening and mmapping events\n");
2439 session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2440
2441 if (rec->opts.kcore) {
2442 err = record__kcore_copy(&session->machines.host, data);
2443 if (err) {
2444 pr_err("ERROR: Failed to copy kcore\n");
2445 goto out_free_threads;
2446 }
2447 }
2448
2449 /*
2450 * Normally perf_session__new would do this, but it doesn't have the
2451 * evlist.
2452 */
2453 if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2454 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2455 rec->tool.ordered_events = false;
2456 }
2457
2458 if (evlist__nr_groups(rec->evlist) == 0)
2459 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2460
2461 if (data->is_pipe) {
2462 err = perf_header__write_pipe(fd);
2463 if (err < 0)
2464 goto out_free_threads;
2465 } else {
2466 err = perf_session__write_header(session, rec->evlist, fd, false);
2467 if (err < 0)
2468 goto out_free_threads;
2469 }
2470
2471 err = -1;
2472 if (!rec->no_buildid
2473 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2474 pr_err("Couldn't generate buildids. "
2475 "Use --no-buildid to profile anyway.\n");
2476 goto out_free_threads;
2477 }
2478
2479 err = record__setup_sb_evlist(rec);
2480 if (err)
2481 goto out_free_threads;
2482
2483 err = record__synthesize(rec, false);
2484 if (err < 0)
2485 goto out_free_threads;
2486
2487 if (rec->realtime_prio) {
2488 struct sched_param param;
2489
2490 param.sched_priority = rec->realtime_prio;
2491 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
2492 pr_err("Could not set realtime priority.\n");
2493 err = -1;
2494 goto out_free_threads;
2495 }
2496 }
2497
2498 if (record__start_threads(rec))
2499 goto out_free_threads;
2500
2501 /*
2502 * When perf is starting the traced process, all the events
2503 * (apart from group members) have enable_on_exec=1 set,
2504 * so don't spoil it by prematurely enabling them.
2505 */
2506 if (!target__none(&opts->target) && !opts->target.initial_delay)
2507 evlist__enable(rec->evlist);
2508
2509 /*
2510 * Let the child rip
2511 */
2512 if (forks) {
2513 struct machine *machine = &session->machines.host;
2514 union perf_event *event;
2515 pid_t tgid;
2516
2517 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2518 if (event == NULL) {
2519 err = -ENOMEM;
2520 goto out_child;
2521 }
2522
2523 /*
2524 * Some H/W events are generated before COMM event
2525 * which is emitted during exec(), so perf script
2526 * cannot see a correct process name for those events.
2527 * Synthesize COMM event to prevent it.
2528 */
2529 tgid = perf_event__synthesize_comm(tool, event,
2530 rec->evlist->workload.pid,
2531 process_synthesized_event,
2532 machine);
2533 free(event);
2534
2535 if (tgid == -1)
2536 goto out_child;
2537
2538 event = malloc(sizeof(event->namespaces) +
2539 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2540 machine->id_hdr_size);
2541 if (event == NULL) {
2542 err = -ENOMEM;
2543 goto out_child;
2544 }
2545
2546 /*
2547 * Synthesize NAMESPACES event for the command specified.
2548 */
2549 perf_event__synthesize_namespaces(tool, event,
2550 rec->evlist->workload.pid,
2551 tgid, process_synthesized_event,
2552 machine);
2553 free(event);
2554
2555 evlist__start_workload(rec->evlist);
2556 }
2557
2558 if (opts->target.initial_delay) {
2559 pr_info(EVLIST_DISABLED_MSG);
2560 if (opts->target.initial_delay > 0) {
2561 usleep(opts->target.initial_delay * USEC_PER_MSEC);
2562 evlist__enable(rec->evlist);
2563 pr_info(EVLIST_ENABLED_MSG);
2564 }
2565 }
2566
2567 err = event_enable_timer__start(rec->evlist->eet);
2568 if (err)
2569 goto out_child;
2570
2571 /* Debug message used by test scripts */
2572 pr_debug3("perf record has started\n");
2573 fflush(stderr);
2574
2575 trigger_ready(&auxtrace_snapshot_trigger);
2576 trigger_ready(&switch_output_trigger);
2577 perf_hooks__invoke_record_start();
2578
2579 /*
2580 * Must write FINISHED_INIT so it will be seen after all other
2581 * synthesized user events, but before any regular events.
2582 */
2583 err = write_finished_init(rec, false);
2584 if (err < 0)
2585 goto out_child;
2586
2587 for (;;) {
2588 unsigned long long hits = thread->samples;
2589
2590 /*
2591 * rec->evlist->bkw_mmap_state is possible to be
2592 * BKW_MMAP_EMPTY here: when done == true and
2593 * hits != rec->samples in previous round.
2594 *
2595 * evlist__toggle_bkw_mmap ensure we never
2596 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2597 */
2598 if (trigger_is_hit(&switch_output_trigger) || done || draining)
2599 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2600
2601 if (record__mmap_read_all(rec, false) < 0) {
2602 trigger_error(&auxtrace_snapshot_trigger);
2603 trigger_error(&switch_output_trigger);
2604 err = -1;
2605 goto out_child;
2606 }
2607
2608 if (auxtrace_record__snapshot_started) {
2609 auxtrace_record__snapshot_started = 0;
2610 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2611 record__read_auxtrace_snapshot(rec, false);
2612 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2613 pr_err("AUX area tracing snapshot failed\n");
2614 err = -1;
2615 goto out_child;
2616 }
2617 }
2618
2619 if (trigger_is_hit(&switch_output_trigger)) {
2620 /*
2621 * If switch_output_trigger is hit, the data in
2622 * overwritable ring buffer should have been collected,
2623 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2624 *
2625 * If SIGUSR2 raise after or during record__mmap_read_all(),
2626 * record__mmap_read_all() didn't collect data from
2627 * overwritable ring buffer. Read again.
2628 */
2629 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2630 continue;
2631 trigger_ready(&switch_output_trigger);
2632
2633 /*
2634 * Reenable events in overwrite ring buffer after
2635 * record__mmap_read_all(): we should have collected
2636 * data from it.
2637 */
2638 evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2639
2640 if (!quiet)
2641 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2642 record__waking(rec));
2643 thread->waking = 0;
2644 fd = record__switch_output(rec, false);
2645 if (fd < 0) {
2646 pr_err("Failed to switch to new file\n");
2647 trigger_error(&switch_output_trigger);
2648 err = fd;
2649 goto out_child;
2650 }
2651
2652 /* re-arm the alarm */
2653 if (rec->switch_output.time)
2654 alarm(rec->switch_output.time);
2655 }
2656
2657 if (hits == thread->samples) {
2658 if (done || draining)
2659 break;
2660 err = fdarray__poll(&thread->pollfd, -1);
2661 /*
2662 * Propagate error, only if there's any. Ignore positive
2663 * number of returned events and interrupt error.
2664 */
2665 if (err > 0 || (err < 0 && errno == EINTR))
2666 err = 0;
2667 thread->waking++;
2668
2669 if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2670 record__thread_munmap_filtered, NULL) == 0)
2671 draining = true;
2672
2673 err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2674 if (err)
2675 goto out_child;
2676 }
2677
2678 if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2679 switch (cmd) {
2680 case EVLIST_CTL_CMD_SNAPSHOT:
2681 hit_auxtrace_snapshot_trigger(rec);
2682 evlist__ctlfd_ack(rec->evlist);
2683 break;
2684 case EVLIST_CTL_CMD_STOP:
2685 done = 1;
2686 break;
2687 case EVLIST_CTL_CMD_ACK:
2688 case EVLIST_CTL_CMD_UNSUPPORTED:
2689 case EVLIST_CTL_CMD_ENABLE:
2690 case EVLIST_CTL_CMD_DISABLE:
2691 case EVLIST_CTL_CMD_EVLIST:
2692 case EVLIST_CTL_CMD_PING:
2693 default:
2694 break;
2695 }
2696 }
2697
2698 err = event_enable_timer__process(rec->evlist->eet);
2699 if (err < 0)
2700 goto out_child;
2701 if (err) {
2702 err = 0;
2703 done = 1;
2704 }
2705
2706 /*
2707 * When perf is starting the traced process, at the end events
2708 * die with the process and we wait for that. Thus no need to
2709 * disable events in this case.
2710 */
2711 if (done && !disabled && !target__none(&opts->target)) {
2712 trigger_off(&auxtrace_snapshot_trigger);
2713 evlist__disable(rec->evlist);
2714 disabled = true;
2715 }
2716 }
2717
2718 trigger_off(&auxtrace_snapshot_trigger);
2719 trigger_off(&switch_output_trigger);
2720
2721 if (opts->auxtrace_snapshot_on_exit)
2722 record__auxtrace_snapshot_exit(rec);
2723
2724 if (forks && workload_exec_errno) {
2725 char msg[STRERR_BUFSIZE], strevsels[2048];
2726 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2727
2728 evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2729
2730 pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2731 strevsels, argv[0], emsg);
2732 err = -1;
2733 goto out_child;
2734 }
2735
2736 if (!quiet)
2737 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2738 record__waking(rec));
2739
2740 write_finished_init(rec, true);
2741
2742 if (target__none(&rec->opts.target))
2743 record__synthesize_workload(rec, true);
2744
2745 out_child:
2746 record__stop_threads(rec);
2747 record__mmap_read_all(rec, true);
2748 out_free_threads:
2749 record__free_thread_data(rec);
2750 evlist__finalize_ctlfd(rec->evlist);
2751 record__aio_mmap_read_sync(rec);
2752
2753 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2754 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2755 session->header.env.comp_ratio = ratio + 0.5;
2756 }
2757
2758 if (forks) {
2759 int exit_status;
2760
2761 if (!child_finished)
2762 kill(rec->evlist->workload.pid, SIGTERM);
2763
2764 wait(&exit_status);
2765
2766 if (err < 0)
2767 status = err;
2768 else if (WIFEXITED(exit_status))
2769 status = WEXITSTATUS(exit_status);
2770 else if (WIFSIGNALED(exit_status))
2771 signr = WTERMSIG(exit_status);
2772 } else
2773 status = err;
2774
2775 if (rec->off_cpu)
2776 rec->bytes_written += off_cpu_write(rec->session);
2777
2778 record__read_lost_samples(rec);
2779 record__synthesize(rec, true);
2780 /* this will be recalculated during process_buildids() */
2781 rec->samples = 0;
2782
2783 if (!err) {
2784 if (!rec->timestamp_filename) {
2785 record__finish_output(rec);
2786 } else {
2787 fd = record__switch_output(rec, true);
2788 if (fd < 0) {
2789 status = fd;
2790 goto out_delete_session;
2791 }
2792 }
2793 }
2794
2795 perf_hooks__invoke_record_end();
2796
2797 if (!err && !quiet) {
2798 char samples[128];
2799 const char *postfix = rec->timestamp_filename ?
2800 ".<timestamp>" : "";
2801
2802 if (rec->samples && !rec->opts.full_auxtrace)
2803 scnprintf(samples, sizeof(samples),
2804 " (%" PRIu64 " samples)", rec->samples);
2805 else
2806 samples[0] = '\0';
2807
2808 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2809 perf_data__size(data) / 1024.0 / 1024.0,
2810 data->path, postfix, samples);
2811 if (ratio) {
2812 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2813 rec->session->bytes_transferred / 1024.0 / 1024.0,
2814 ratio);
2815 }
2816 fprintf(stderr, " ]\n");
2817 }
2818
2819 out_delete_session:
2820 #ifdef HAVE_EVENTFD_SUPPORT
2821 if (done_fd >= 0) {
2822 fd = done_fd;
2823 done_fd = -1;
2824
2825 close(fd);
2826 }
2827 #endif
2828 zstd_fini(&session->zstd_data);
2829 if (!opts->no_bpf_event)
2830 evlist__stop_sb_thread(rec->sb_evlist);
2831
2832 perf_session__delete(session);
2833 return status;
2834 }
2835
callchain_debug(struct callchain_param * callchain)2836 static void callchain_debug(struct callchain_param *callchain)
2837 {
2838 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2839
2840 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2841
2842 if (callchain->record_mode == CALLCHAIN_DWARF)
2843 pr_debug("callchain: stack dump size %d\n",
2844 callchain->dump_size);
2845 }
2846
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)2847 int record_opts__parse_callchain(struct record_opts *record,
2848 struct callchain_param *callchain,
2849 const char *arg, bool unset)
2850 {
2851 int ret;
2852 callchain->enabled = !unset;
2853
2854 /* --no-call-graph */
2855 if (unset) {
2856 callchain->record_mode = CALLCHAIN_NONE;
2857 pr_debug("callchain: disabled\n");
2858 return 0;
2859 }
2860
2861 ret = parse_callchain_record_opt(arg, callchain);
2862 if (!ret) {
2863 /* Enable data address sampling for DWARF unwind. */
2864 if (callchain->record_mode == CALLCHAIN_DWARF)
2865 record->sample_address = true;
2866 callchain_debug(callchain);
2867 }
2868
2869 return ret;
2870 }
2871
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)2872 int record_parse_callchain_opt(const struct option *opt,
2873 const char *arg,
2874 int unset)
2875 {
2876 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2877 }
2878
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)2879 int record_callchain_opt(const struct option *opt,
2880 const char *arg __maybe_unused,
2881 int unset __maybe_unused)
2882 {
2883 struct callchain_param *callchain = opt->value;
2884
2885 callchain->enabled = true;
2886
2887 if (callchain->record_mode == CALLCHAIN_NONE)
2888 callchain->record_mode = CALLCHAIN_FP;
2889
2890 callchain_debug(callchain);
2891 return 0;
2892 }
2893
perf_record_config(const char * var,const char * value,void * cb)2894 static int perf_record_config(const char *var, const char *value, void *cb)
2895 {
2896 struct record *rec = cb;
2897
2898 if (!strcmp(var, "record.build-id")) {
2899 if (!strcmp(value, "cache"))
2900 rec->no_buildid_cache = false;
2901 else if (!strcmp(value, "no-cache"))
2902 rec->no_buildid_cache = true;
2903 else if (!strcmp(value, "skip"))
2904 rec->no_buildid = true;
2905 else if (!strcmp(value, "mmap"))
2906 rec->buildid_mmap = true;
2907 else
2908 return -1;
2909 return 0;
2910 }
2911 if (!strcmp(var, "record.call-graph")) {
2912 var = "call-graph.record-mode";
2913 return perf_default_config(var, value, cb);
2914 }
2915 #ifdef HAVE_AIO_SUPPORT
2916 if (!strcmp(var, "record.aio")) {
2917 rec->opts.nr_cblocks = strtol(value, NULL, 0);
2918 if (!rec->opts.nr_cblocks)
2919 rec->opts.nr_cblocks = nr_cblocks_default;
2920 }
2921 #endif
2922 if (!strcmp(var, "record.debuginfod")) {
2923 rec->debuginfod.urls = strdup(value);
2924 if (!rec->debuginfod.urls)
2925 return -ENOMEM;
2926 rec->debuginfod.set = true;
2927 }
2928
2929 return 0;
2930 }
2931
record__parse_event_enable_time(const struct option * opt,const char * str,int unset)2932 static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
2933 {
2934 struct record *rec = (struct record *)opt->value;
2935
2936 return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
2937 }
2938
record__parse_affinity(const struct option * opt,const char * str,int unset)2939 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2940 {
2941 struct record_opts *opts = (struct record_opts *)opt->value;
2942
2943 if (unset || !str)
2944 return 0;
2945
2946 if (!strcasecmp(str, "node"))
2947 opts->affinity = PERF_AFFINITY_NODE;
2948 else if (!strcasecmp(str, "cpu"))
2949 opts->affinity = PERF_AFFINITY_CPU;
2950
2951 return 0;
2952 }
2953
record__mmap_cpu_mask_alloc(struct mmap_cpu_mask * mask,int nr_bits)2954 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2955 {
2956 mask->nbits = nr_bits;
2957 mask->bits = bitmap_zalloc(mask->nbits);
2958 if (!mask->bits)
2959 return -ENOMEM;
2960
2961 return 0;
2962 }
2963
record__mmap_cpu_mask_free(struct mmap_cpu_mask * mask)2964 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2965 {
2966 bitmap_free(mask->bits);
2967 mask->nbits = 0;
2968 }
2969
record__thread_mask_alloc(struct thread_mask * mask,int nr_bits)2970 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2971 {
2972 int ret;
2973
2974 ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2975 if (ret) {
2976 mask->affinity.bits = NULL;
2977 return ret;
2978 }
2979
2980 ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2981 if (ret) {
2982 record__mmap_cpu_mask_free(&mask->maps);
2983 mask->maps.bits = NULL;
2984 }
2985
2986 return ret;
2987 }
2988
record__thread_mask_free(struct thread_mask * mask)2989 static void record__thread_mask_free(struct thread_mask *mask)
2990 {
2991 record__mmap_cpu_mask_free(&mask->maps);
2992 record__mmap_cpu_mask_free(&mask->affinity);
2993 }
2994
record__parse_threads(const struct option * opt,const char * str,int unset)2995 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2996 {
2997 int s;
2998 struct record_opts *opts = opt->value;
2999
3000 if (unset || !str || !strlen(str)) {
3001 opts->threads_spec = THREAD_SPEC__CPU;
3002 } else {
3003 for (s = 1; s < THREAD_SPEC__MAX; s++) {
3004 if (s == THREAD_SPEC__USER) {
3005 opts->threads_user_spec = strdup(str);
3006 if (!opts->threads_user_spec)
3007 return -ENOMEM;
3008 opts->threads_spec = THREAD_SPEC__USER;
3009 break;
3010 }
3011 if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3012 opts->threads_spec = s;
3013 break;
3014 }
3015 }
3016 }
3017
3018 if (opts->threads_spec == THREAD_SPEC__USER)
3019 pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3020 else
3021 pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3022
3023 return 0;
3024 }
3025
parse_output_max_size(const struct option * opt,const char * str,int unset)3026 static int parse_output_max_size(const struct option *opt,
3027 const char *str, int unset)
3028 {
3029 unsigned long *s = (unsigned long *)opt->value;
3030 static struct parse_tag tags_size[] = {
3031 { .tag = 'B', .mult = 1 },
3032 { .tag = 'K', .mult = 1 << 10 },
3033 { .tag = 'M', .mult = 1 << 20 },
3034 { .tag = 'G', .mult = 1 << 30 },
3035 { .tag = 0 },
3036 };
3037 unsigned long val;
3038
3039 if (unset) {
3040 *s = 0;
3041 return 0;
3042 }
3043
3044 val = parse_tag_value(str, tags_size);
3045 if (val != (unsigned long) -1) {
3046 *s = val;
3047 return 0;
3048 }
3049
3050 return -1;
3051 }
3052
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)3053 static int record__parse_mmap_pages(const struct option *opt,
3054 const char *str,
3055 int unset __maybe_unused)
3056 {
3057 struct record_opts *opts = opt->value;
3058 char *s, *p;
3059 unsigned int mmap_pages;
3060 int ret;
3061
3062 if (!str)
3063 return -EINVAL;
3064
3065 s = strdup(str);
3066 if (!s)
3067 return -ENOMEM;
3068
3069 p = strchr(s, ',');
3070 if (p)
3071 *p = '\0';
3072
3073 if (*s) {
3074 ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3075 if (ret)
3076 goto out_free;
3077 opts->mmap_pages = mmap_pages;
3078 }
3079
3080 if (!p) {
3081 ret = 0;
3082 goto out_free;
3083 }
3084
3085 ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3086 if (ret)
3087 goto out_free;
3088
3089 opts->auxtrace_mmap_pages = mmap_pages;
3090
3091 out_free:
3092 free(s);
3093 return ret;
3094 }
3095
arch__add_leaf_frame_record_opts(struct record_opts * opts __maybe_unused)3096 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3097 {
3098 }
3099
parse_control_option(const struct option * opt,const char * str,int unset __maybe_unused)3100 static int parse_control_option(const struct option *opt,
3101 const char *str,
3102 int unset __maybe_unused)
3103 {
3104 struct record_opts *opts = opt->value;
3105
3106 return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3107 }
3108
switch_output_size_warn(struct record * rec)3109 static void switch_output_size_warn(struct record *rec)
3110 {
3111 u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3112 struct switch_output *s = &rec->switch_output;
3113
3114 wakeup_size /= 2;
3115
3116 if (s->size < wakeup_size) {
3117 char buf[100];
3118
3119 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3120 pr_warning("WARNING: switch-output data size lower than "
3121 "wakeup kernel buffer size (%s) "
3122 "expect bigger perf.data sizes\n", buf);
3123 }
3124 }
3125
switch_output_setup(struct record * rec)3126 static int switch_output_setup(struct record *rec)
3127 {
3128 struct switch_output *s = &rec->switch_output;
3129 static struct parse_tag tags_size[] = {
3130 { .tag = 'B', .mult = 1 },
3131 { .tag = 'K', .mult = 1 << 10 },
3132 { .tag = 'M', .mult = 1 << 20 },
3133 { .tag = 'G', .mult = 1 << 30 },
3134 { .tag = 0 },
3135 };
3136 static struct parse_tag tags_time[] = {
3137 { .tag = 's', .mult = 1 },
3138 { .tag = 'm', .mult = 60 },
3139 { .tag = 'h', .mult = 60*60 },
3140 { .tag = 'd', .mult = 60*60*24 },
3141 { .tag = 0 },
3142 };
3143 unsigned long val;
3144
3145 /*
3146 * If we're using --switch-output-events, then we imply its
3147 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3148 * thread to its parent.
3149 */
3150 if (rec->switch_output_event_set) {
3151 if (record__threads_enabled(rec)) {
3152 pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3153 return 0;
3154 }
3155 goto do_signal;
3156 }
3157
3158 if (!s->set)
3159 return 0;
3160
3161 if (record__threads_enabled(rec)) {
3162 pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3163 return 0;
3164 }
3165
3166 if (!strcmp(s->str, "signal")) {
3167 do_signal:
3168 s->signal = true;
3169 pr_debug("switch-output with SIGUSR2 signal\n");
3170 goto enabled;
3171 }
3172
3173 val = parse_tag_value(s->str, tags_size);
3174 if (val != (unsigned long) -1) {
3175 s->size = val;
3176 pr_debug("switch-output with %s size threshold\n", s->str);
3177 goto enabled;
3178 }
3179
3180 val = parse_tag_value(s->str, tags_time);
3181 if (val != (unsigned long) -1) {
3182 s->time = val;
3183 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3184 s->str, s->time);
3185 goto enabled;
3186 }
3187
3188 return -1;
3189
3190 enabled:
3191 rec->timestamp_filename = true;
3192 s->enabled = true;
3193
3194 if (s->size && !rec->opts.no_buffering)
3195 switch_output_size_warn(rec);
3196
3197 return 0;
3198 }
3199
3200 static const char * const __record_usage[] = {
3201 "perf record [<options>] [<command>]",
3202 "perf record [<options>] -- <command> [<options>]",
3203 NULL
3204 };
3205 const char * const *record_usage = __record_usage;
3206
build_id__process_mmap(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3207 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3208 struct perf_sample *sample, struct machine *machine)
3209 {
3210 /*
3211 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3212 * no need to add them twice.
3213 */
3214 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3215 return 0;
3216 return perf_event__process_mmap(tool, event, sample, machine);
3217 }
3218
build_id__process_mmap2(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)3219 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3220 struct perf_sample *sample, struct machine *machine)
3221 {
3222 /*
3223 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3224 * no need to add them twice.
3225 */
3226 if (!(event->header.misc & PERF_RECORD_MISC_USER))
3227 return 0;
3228
3229 return perf_event__process_mmap2(tool, event, sample, machine);
3230 }
3231
process_timestamp_boundary(struct perf_tool * tool,union perf_event * event __maybe_unused,struct perf_sample * sample,struct machine * machine __maybe_unused)3232 static int process_timestamp_boundary(struct perf_tool *tool,
3233 union perf_event *event __maybe_unused,
3234 struct perf_sample *sample,
3235 struct machine *machine __maybe_unused)
3236 {
3237 struct record *rec = container_of(tool, struct record, tool);
3238
3239 set_timestamp_boundary(rec, sample->time);
3240 return 0;
3241 }
3242
parse_record_synth_option(const struct option * opt,const char * str,int unset __maybe_unused)3243 static int parse_record_synth_option(const struct option *opt,
3244 const char *str,
3245 int unset __maybe_unused)
3246 {
3247 struct record_opts *opts = opt->value;
3248 char *p = strdup(str);
3249
3250 if (p == NULL)
3251 return -1;
3252
3253 opts->synth = parse_synth_opt(p);
3254 free(p);
3255
3256 if (opts->synth < 0) {
3257 pr_err("Invalid synth option: %s\n", str);
3258 return -1;
3259 }
3260 return 0;
3261 }
3262
3263 /*
3264 * XXX Ideally would be local to cmd_record() and passed to a record__new
3265 * because we need to have access to it in record__exit, that is called
3266 * after cmd_record() exits, but since record_options need to be accessible to
3267 * builtin-script, leave it here.
3268 *
3269 * At least we don't ouch it in all the other functions here directly.
3270 *
3271 * Just say no to tons of global variables, sigh.
3272 */
3273 static struct record record = {
3274 .opts = {
3275 .sample_time = true,
3276 .mmap_pages = UINT_MAX,
3277 .user_freq = UINT_MAX,
3278 .user_interval = ULLONG_MAX,
3279 .freq = 4000,
3280 .target = {
3281 .uses_mmap = true,
3282 .default_per_cpu = true,
3283 },
3284 .mmap_flush = MMAP_FLUSH_DEFAULT,
3285 .nr_threads_synthesize = 1,
3286 .ctl_fd = -1,
3287 .ctl_fd_ack = -1,
3288 .synth = PERF_SYNTH_ALL,
3289 },
3290 .tool = {
3291 .sample = process_sample_event,
3292 .fork = perf_event__process_fork,
3293 .exit = perf_event__process_exit,
3294 .comm = perf_event__process_comm,
3295 .namespaces = perf_event__process_namespaces,
3296 .mmap = build_id__process_mmap,
3297 .mmap2 = build_id__process_mmap2,
3298 .itrace_start = process_timestamp_boundary,
3299 .aux = process_timestamp_boundary,
3300 .ordered_events = true,
3301 },
3302 };
3303
3304 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3305 "\n\t\t\t\tDefault: fp";
3306
3307 static bool dry_run;
3308
3309 static struct parse_events_option_args parse_events_option_args = {
3310 .evlistp = &record.evlist,
3311 };
3312
3313 static struct parse_events_option_args switch_output_parse_events_option_args = {
3314 .evlistp = &record.sb_evlist,
3315 };
3316
3317 /*
3318 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3319 * with it and switch to use the library functions in perf_evlist that came
3320 * from builtin-record.c, i.e. use record_opts,
3321 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3322 * using pipes, etc.
3323 */
3324 static struct option __record_options[] = {
3325 OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3326 "event selector. use 'perf list' to list available events",
3327 parse_events_option),
3328 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3329 "event filter", parse_filter),
3330 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3331 NULL, "don't record events from perf itself",
3332 exclude_perf),
3333 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3334 "record events on existing process id"),
3335 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3336 "record events on existing thread id"),
3337 OPT_INTEGER('r', "realtime", &record.realtime_prio,
3338 "collect data with this RT SCHED_FIFO priority"),
3339 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3340 "collect data without buffering"),
3341 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3342 "collect raw sample records from all opened counters"),
3343 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3344 "system-wide collection from all CPUs"),
3345 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3346 "list of cpus to monitor"),
3347 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3348 OPT_STRING('o', "output", &record.data.path, "file",
3349 "output file name"),
3350 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3351 &record.opts.no_inherit_set,
3352 "child tasks do not inherit counters"),
3353 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3354 "synthesize non-sample events at the end of output"),
3355 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3356 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3357 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3358 "Fail if the specified frequency can't be used"),
3359 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3360 "profile at this frequency",
3361 record__parse_freq),
3362 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3363 "number of mmap data pages and AUX area tracing mmap pages",
3364 record__parse_mmap_pages),
3365 OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3366 "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3367 record__mmap_flush_parse),
3368 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3369 NULL, "enables call-graph recording" ,
3370 &record_callchain_opt),
3371 OPT_CALLBACK(0, "call-graph", &record.opts,
3372 "record_mode[,record_size]", record_callchain_help,
3373 &record_parse_callchain_opt),
3374 OPT_INCR('v', "verbose", &verbose,
3375 "be more verbose (show counter open errors, etc)"),
3376 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3377 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3378 "per thread counts"),
3379 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3380 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3381 "Record the sample physical addresses"),
3382 OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3383 "Record the sampled data address data page size"),
3384 OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3385 "Record the sampled code address (ip) page size"),
3386 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3387 OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3388 "Record the sample identifier"),
3389 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3390 &record.opts.sample_time_set,
3391 "Record the sample timestamps"),
3392 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3393 "Record the sample period"),
3394 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3395 "don't sample"),
3396 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3397 &record.no_buildid_cache_set,
3398 "do not update the buildid cache"),
3399 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3400 &record.no_buildid_set,
3401 "do not collect buildids in perf.data"),
3402 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3403 "monitor event in cgroup name only",
3404 parse_cgroups),
3405 OPT_CALLBACK('D', "delay", &record, "ms",
3406 "ms to wait before starting measurement after program start (-1: start with events disabled), "
3407 "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3408 record__parse_event_enable_time),
3409 OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3410 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3411 "user to profile"),
3412
3413 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3414 "branch any", "sample any taken branches",
3415 parse_branch_stack),
3416
3417 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3418 "branch filter mask", "branch stack filter modes",
3419 parse_branch_stack),
3420 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3421 "sample by weight (on special events only)"),
3422 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3423 "sample transaction flags (special events only)"),
3424 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3425 "use per-thread mmaps"),
3426 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3427 "sample selected machine registers on interrupt,"
3428 " use '-I?' to list register names", parse_intr_regs),
3429 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3430 "sample selected machine registers on interrupt,"
3431 " use '--user-regs=?' to list register names", parse_user_regs),
3432 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3433 "Record running/enabled time of read (:S) events"),
3434 OPT_CALLBACK('k', "clockid", &record.opts,
3435 "clockid", "clockid to use for events, see clock_gettime()",
3436 parse_clockid),
3437 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3438 "opts", "AUX area tracing Snapshot Mode", ""),
3439 OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3440 "opts", "sample AUX area", ""),
3441 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3442 "per thread proc mmap processing timeout in ms"),
3443 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3444 "Record namespaces events"),
3445 OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3446 "Record cgroup events"),
3447 OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3448 &record.opts.record_switch_events_set,
3449 "Record context switch events"),
3450 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3451 "Configure all used events to run in kernel space.",
3452 PARSE_OPT_EXCLUSIVE),
3453 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3454 "Configure all used events to run in user space.",
3455 PARSE_OPT_EXCLUSIVE),
3456 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3457 "collect kernel callchains"),
3458 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3459 "collect user callchains"),
3460 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3461 "file", "vmlinux pathname"),
3462 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3463 "Record build-id of all DSOs regardless of hits"),
3464 OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3465 "Record build-id in map events"),
3466 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3467 "append timestamp to output filename"),
3468 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3469 "Record timestamp boundary (time of first/last samples)"),
3470 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3471 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3472 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3473 "signal"),
3474 OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3475 &record.switch_output_event_set, "switch output event",
3476 "switch output event selector. use 'perf list' to list available events",
3477 parse_events_option_new_evlist),
3478 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3479 "Limit number of switch output generated files"),
3480 OPT_BOOLEAN(0, "dry-run", &dry_run,
3481 "Parse options then exit"),
3482 #ifdef HAVE_AIO_SUPPORT
3483 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3484 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3485 record__aio_parse),
3486 #endif
3487 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3488 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3489 record__parse_affinity),
3490 #ifdef HAVE_ZSTD_SUPPORT
3491 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3492 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3493 record__parse_comp_level),
3494 #endif
3495 OPT_CALLBACK(0, "max-size", &record.output_max_size,
3496 "size", "Limit the maximum size of the output file", parse_output_max_size),
3497 OPT_UINTEGER(0, "num-thread-synthesize",
3498 &record.opts.nr_threads_synthesize,
3499 "number of threads to run for event synthesis"),
3500 #ifdef HAVE_LIBPFM
3501 OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3502 "libpfm4 event selector. use 'perf list' to list available events",
3503 parse_libpfm_events_option),
3504 #endif
3505 OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3506 "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3507 "\t\t\t 'snapshot': AUX area tracing snapshot).\n"
3508 "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3509 "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3510 parse_control_option),
3511 OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3512 "Fine-tune event synthesis: default=all", parse_record_synth_option),
3513 OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3514 &record.debuginfod.set, "debuginfod urls",
3515 "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3516 "system"),
3517 OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3518 "write collected trace data into several data files using parallel threads",
3519 record__parse_threads),
3520 OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3521 OPT_END()
3522 };
3523
3524 struct option *record_options = __record_options;
3525
record__mmap_cpu_mask_init(struct mmap_cpu_mask * mask,struct perf_cpu_map * cpus)3526 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3527 {
3528 struct perf_cpu cpu;
3529 int idx;
3530
3531 if (cpu_map__is_dummy(cpus))
3532 return 0;
3533
3534 perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3535 if (cpu.cpu == -1)
3536 continue;
3537 /* Return ENODEV is input cpu is greater than max cpu */
3538 if ((unsigned long)cpu.cpu > mask->nbits)
3539 return -ENODEV;
3540 __set_bit(cpu.cpu, mask->bits);
3541 }
3542
3543 return 0;
3544 }
3545
record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask * mask,const char * mask_spec)3546 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3547 {
3548 struct perf_cpu_map *cpus;
3549
3550 cpus = perf_cpu_map__new(mask_spec);
3551 if (!cpus)
3552 return -ENOMEM;
3553
3554 bitmap_zero(mask->bits, mask->nbits);
3555 if (record__mmap_cpu_mask_init(mask, cpus))
3556 return -ENODEV;
3557
3558 perf_cpu_map__put(cpus);
3559
3560 return 0;
3561 }
3562
record__free_thread_masks(struct record * rec,int nr_threads)3563 static void record__free_thread_masks(struct record *rec, int nr_threads)
3564 {
3565 int t;
3566
3567 if (rec->thread_masks)
3568 for (t = 0; t < nr_threads; t++)
3569 record__thread_mask_free(&rec->thread_masks[t]);
3570
3571 zfree(&rec->thread_masks);
3572 }
3573
record__alloc_thread_masks(struct record * rec,int nr_threads,int nr_bits)3574 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3575 {
3576 int t, ret;
3577
3578 rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3579 if (!rec->thread_masks) {
3580 pr_err("Failed to allocate thread masks\n");
3581 return -ENOMEM;
3582 }
3583
3584 for (t = 0; t < nr_threads; t++) {
3585 ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3586 if (ret) {
3587 pr_err("Failed to allocate thread masks[%d]\n", t);
3588 goto out_free;
3589 }
3590 }
3591
3592 return 0;
3593
3594 out_free:
3595 record__free_thread_masks(rec, nr_threads);
3596
3597 return ret;
3598 }
3599
record__init_thread_cpu_masks(struct record * rec,struct perf_cpu_map * cpus)3600 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3601 {
3602 int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3603
3604 ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3605 if (ret)
3606 return ret;
3607
3608 rec->nr_threads = nr_cpus;
3609 pr_debug("nr_threads: %d\n", rec->nr_threads);
3610
3611 for (t = 0; t < rec->nr_threads; t++) {
3612 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3613 __set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3614 if (verbose > 0) {
3615 pr_debug("thread_masks[%d]: ", t);
3616 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3617 pr_debug("thread_masks[%d]: ", t);
3618 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3619 }
3620 }
3621
3622 return 0;
3623 }
3624
record__init_thread_masks_spec(struct record * rec,struct perf_cpu_map * cpus,const char ** maps_spec,const char ** affinity_spec,u32 nr_spec)3625 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3626 const char **maps_spec, const char **affinity_spec,
3627 u32 nr_spec)
3628 {
3629 u32 s;
3630 int ret = 0, t = 0;
3631 struct mmap_cpu_mask cpus_mask;
3632 struct thread_mask thread_mask, full_mask, *thread_masks;
3633
3634 ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3635 if (ret) {
3636 pr_err("Failed to allocate CPUs mask\n");
3637 return ret;
3638 }
3639
3640 ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3641 if (ret) {
3642 pr_err("Failed to init cpu mask\n");
3643 goto out_free_cpu_mask;
3644 }
3645
3646 ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3647 if (ret) {
3648 pr_err("Failed to allocate full mask\n");
3649 goto out_free_cpu_mask;
3650 }
3651
3652 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3653 if (ret) {
3654 pr_err("Failed to allocate thread mask\n");
3655 goto out_free_full_and_cpu_masks;
3656 }
3657
3658 for (s = 0; s < nr_spec; s++) {
3659 ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3660 if (ret) {
3661 pr_err("Failed to initialize maps thread mask\n");
3662 goto out_free;
3663 }
3664 ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3665 if (ret) {
3666 pr_err("Failed to initialize affinity thread mask\n");
3667 goto out_free;
3668 }
3669
3670 /* ignore invalid CPUs but do not allow empty masks */
3671 if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3672 cpus_mask.bits, thread_mask.maps.nbits)) {
3673 pr_err("Empty maps mask: %s\n", maps_spec[s]);
3674 ret = -EINVAL;
3675 goto out_free;
3676 }
3677 if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3678 cpus_mask.bits, thread_mask.affinity.nbits)) {
3679 pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3680 ret = -EINVAL;
3681 goto out_free;
3682 }
3683
3684 /* do not allow intersection with other masks (full_mask) */
3685 if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3686 thread_mask.maps.nbits)) {
3687 pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3688 ret = -EINVAL;
3689 goto out_free;
3690 }
3691 if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3692 thread_mask.affinity.nbits)) {
3693 pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3694 ret = -EINVAL;
3695 goto out_free;
3696 }
3697
3698 bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3699 thread_mask.maps.bits, full_mask.maps.nbits);
3700 bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3701 thread_mask.affinity.bits, full_mask.maps.nbits);
3702
3703 thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3704 if (!thread_masks) {
3705 pr_err("Failed to reallocate thread masks\n");
3706 ret = -ENOMEM;
3707 goto out_free;
3708 }
3709 rec->thread_masks = thread_masks;
3710 rec->thread_masks[t] = thread_mask;
3711 if (verbose > 0) {
3712 pr_debug("thread_masks[%d]: ", t);
3713 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3714 pr_debug("thread_masks[%d]: ", t);
3715 mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3716 }
3717 t++;
3718 ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3719 if (ret) {
3720 pr_err("Failed to allocate thread mask\n");
3721 goto out_free_full_and_cpu_masks;
3722 }
3723 }
3724 rec->nr_threads = t;
3725 pr_debug("nr_threads: %d\n", rec->nr_threads);
3726 if (!rec->nr_threads)
3727 ret = -EINVAL;
3728
3729 out_free:
3730 record__thread_mask_free(&thread_mask);
3731 out_free_full_and_cpu_masks:
3732 record__thread_mask_free(&full_mask);
3733 out_free_cpu_mask:
3734 record__mmap_cpu_mask_free(&cpus_mask);
3735
3736 return ret;
3737 }
3738
record__init_thread_core_masks(struct record * rec,struct perf_cpu_map * cpus)3739 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3740 {
3741 int ret;
3742 struct cpu_topology *topo;
3743
3744 topo = cpu_topology__new();
3745 if (!topo) {
3746 pr_err("Failed to allocate CPU topology\n");
3747 return -ENOMEM;
3748 }
3749
3750 ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3751 topo->core_cpus_list, topo->core_cpus_lists);
3752 cpu_topology__delete(topo);
3753
3754 return ret;
3755 }
3756
record__init_thread_package_masks(struct record * rec,struct perf_cpu_map * cpus)3757 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3758 {
3759 int ret;
3760 struct cpu_topology *topo;
3761
3762 topo = cpu_topology__new();
3763 if (!topo) {
3764 pr_err("Failed to allocate CPU topology\n");
3765 return -ENOMEM;
3766 }
3767
3768 ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3769 topo->package_cpus_list, topo->package_cpus_lists);
3770 cpu_topology__delete(topo);
3771
3772 return ret;
3773 }
3774
record__init_thread_numa_masks(struct record * rec,struct perf_cpu_map * cpus)3775 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3776 {
3777 u32 s;
3778 int ret;
3779 const char **spec;
3780 struct numa_topology *topo;
3781
3782 topo = numa_topology__new();
3783 if (!topo) {
3784 pr_err("Failed to allocate NUMA topology\n");
3785 return -ENOMEM;
3786 }
3787
3788 spec = zalloc(topo->nr * sizeof(char *));
3789 if (!spec) {
3790 pr_err("Failed to allocate NUMA spec\n");
3791 ret = -ENOMEM;
3792 goto out_delete_topo;
3793 }
3794 for (s = 0; s < topo->nr; s++)
3795 spec[s] = topo->nodes[s].cpus;
3796
3797 ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3798
3799 zfree(&spec);
3800
3801 out_delete_topo:
3802 numa_topology__delete(topo);
3803
3804 return ret;
3805 }
3806
record__init_thread_user_masks(struct record * rec,struct perf_cpu_map * cpus)3807 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3808 {
3809 int t, ret;
3810 u32 s, nr_spec = 0;
3811 char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3812 char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3813
3814 for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3815 spec = strtok_r(user_spec, ":", &spec_ptr);
3816 if (spec == NULL)
3817 break;
3818 pr_debug2("threads_spec[%d]: %s\n", t, spec);
3819 mask = strtok_r(spec, "/", &mask_ptr);
3820 if (mask == NULL)
3821 break;
3822 pr_debug2(" maps mask: %s\n", mask);
3823 tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3824 if (!tmp_spec) {
3825 pr_err("Failed to reallocate maps spec\n");
3826 ret = -ENOMEM;
3827 goto out_free;
3828 }
3829 maps_spec = tmp_spec;
3830 maps_spec[nr_spec] = dup_mask = strdup(mask);
3831 if (!maps_spec[nr_spec]) {
3832 pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3833 ret = -ENOMEM;
3834 goto out_free;
3835 }
3836 mask = strtok_r(NULL, "/", &mask_ptr);
3837 if (mask == NULL) {
3838 pr_err("Invalid thread maps or affinity specs\n");
3839 ret = -EINVAL;
3840 goto out_free;
3841 }
3842 pr_debug2(" affinity mask: %s\n", mask);
3843 tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3844 if (!tmp_spec) {
3845 pr_err("Failed to reallocate affinity spec\n");
3846 ret = -ENOMEM;
3847 goto out_free;
3848 }
3849 affinity_spec = tmp_spec;
3850 affinity_spec[nr_spec] = strdup(mask);
3851 if (!affinity_spec[nr_spec]) {
3852 pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3853 ret = -ENOMEM;
3854 goto out_free;
3855 }
3856 dup_mask = NULL;
3857 nr_spec++;
3858 }
3859
3860 ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3861 (const char **)affinity_spec, nr_spec);
3862
3863 out_free:
3864 free(dup_mask);
3865 for (s = 0; s < nr_spec; s++) {
3866 if (maps_spec)
3867 free(maps_spec[s]);
3868 if (affinity_spec)
3869 free(affinity_spec[s]);
3870 }
3871 free(affinity_spec);
3872 free(maps_spec);
3873
3874 return ret;
3875 }
3876
record__init_thread_default_masks(struct record * rec,struct perf_cpu_map * cpus)3877 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3878 {
3879 int ret;
3880
3881 ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3882 if (ret)
3883 return ret;
3884
3885 if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3886 return -ENODEV;
3887
3888 rec->nr_threads = 1;
3889
3890 return 0;
3891 }
3892
record__init_thread_masks(struct record * rec)3893 static int record__init_thread_masks(struct record *rec)
3894 {
3895 int ret = 0;
3896 struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3897
3898 if (!record__threads_enabled(rec))
3899 return record__init_thread_default_masks(rec, cpus);
3900
3901 if (evlist__per_thread(rec->evlist)) {
3902 pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3903 return -EINVAL;
3904 }
3905
3906 switch (rec->opts.threads_spec) {
3907 case THREAD_SPEC__CPU:
3908 ret = record__init_thread_cpu_masks(rec, cpus);
3909 break;
3910 case THREAD_SPEC__CORE:
3911 ret = record__init_thread_core_masks(rec, cpus);
3912 break;
3913 case THREAD_SPEC__PACKAGE:
3914 ret = record__init_thread_package_masks(rec, cpus);
3915 break;
3916 case THREAD_SPEC__NUMA:
3917 ret = record__init_thread_numa_masks(rec, cpus);
3918 break;
3919 case THREAD_SPEC__USER:
3920 ret = record__init_thread_user_masks(rec, cpus);
3921 break;
3922 default:
3923 break;
3924 }
3925
3926 return ret;
3927 }
3928
cmd_record(int argc,const char ** argv)3929 int cmd_record(int argc, const char **argv)
3930 {
3931 int err;
3932 struct record *rec = &record;
3933 char errbuf[BUFSIZ];
3934
3935 setlocale(LC_ALL, "");
3936
3937 #ifndef HAVE_BPF_SKEL
3938 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3939 set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3940 # undef set_nobuild
3941 #endif
3942
3943 /* Disable eager loading of kernel symbols that adds overhead to perf record. */
3944 symbol_conf.lazy_load_kernel_maps = true;
3945 rec->opts.affinity = PERF_AFFINITY_SYS;
3946
3947 rec->evlist = evlist__new();
3948 if (rec->evlist == NULL)
3949 return -ENOMEM;
3950
3951 err = perf_config(perf_record_config, rec);
3952 if (err)
3953 return err;
3954
3955 argc = parse_options(argc, argv, record_options, record_usage,
3956 PARSE_OPT_STOP_AT_NON_OPTION);
3957 if (quiet)
3958 perf_quiet_option();
3959
3960 err = symbol__validate_sym_arguments();
3961 if (err)
3962 return err;
3963
3964 perf_debuginfod_setup(&record.debuginfod);
3965
3966 /* Make system wide (-a) the default target. */
3967 if (!argc && target__none(&rec->opts.target))
3968 rec->opts.target.system_wide = true;
3969
3970 if (nr_cgroups && !rec->opts.target.system_wide) {
3971 usage_with_options_msg(record_usage, record_options,
3972 "cgroup monitoring only available in system-wide mode");
3973
3974 }
3975
3976 if (rec->buildid_mmap) {
3977 if (!perf_can_record_build_id()) {
3978 pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3979 err = -EINVAL;
3980 goto out_opts;
3981 }
3982 pr_debug("Enabling build id in mmap2 events.\n");
3983 /* Enable mmap build id synthesizing. */
3984 symbol_conf.buildid_mmap2 = true;
3985 /* Enable perf_event_attr::build_id bit. */
3986 rec->opts.build_id = true;
3987 /* Disable build id cache. */
3988 rec->no_buildid = true;
3989 }
3990
3991 if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3992 pr_err("Kernel has no cgroup sampling support.\n");
3993 err = -EINVAL;
3994 goto out_opts;
3995 }
3996
3997 if (rec->opts.kcore)
3998 rec->opts.text_poke = true;
3999
4000 if (rec->opts.kcore || record__threads_enabled(rec))
4001 rec->data.is_dir = true;
4002
4003 if (record__threads_enabled(rec)) {
4004 if (rec->opts.affinity != PERF_AFFINITY_SYS) {
4005 pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4006 goto out_opts;
4007 }
4008 if (record__aio_enabled(rec)) {
4009 pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4010 goto out_opts;
4011 }
4012 }
4013
4014 if (rec->opts.comp_level != 0) {
4015 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4016 rec->no_buildid = true;
4017 }
4018
4019 if (rec->opts.record_switch_events &&
4020 !perf_can_record_switch_events()) {
4021 ui__error("kernel does not support recording context switch events\n");
4022 parse_options_usage(record_usage, record_options, "switch-events", 0);
4023 err = -EINVAL;
4024 goto out_opts;
4025 }
4026
4027 if (switch_output_setup(rec)) {
4028 parse_options_usage(record_usage, record_options, "switch-output", 0);
4029 err = -EINVAL;
4030 goto out_opts;
4031 }
4032
4033 if (rec->switch_output.time) {
4034 signal(SIGALRM, alarm_sig_handler);
4035 alarm(rec->switch_output.time);
4036 }
4037
4038 if (rec->switch_output.num_files) {
4039 rec->switch_output.filenames = calloc(sizeof(char *),
4040 rec->switch_output.num_files);
4041 if (!rec->switch_output.filenames) {
4042 err = -EINVAL;
4043 goto out_opts;
4044 }
4045 }
4046
4047 if (rec->timestamp_filename && record__threads_enabled(rec)) {
4048 rec->timestamp_filename = false;
4049 pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4050 }
4051
4052 /*
4053 * Allow aliases to facilitate the lookup of symbols for address
4054 * filters. Refer to auxtrace_parse_filters().
4055 */
4056 symbol_conf.allow_aliases = true;
4057
4058 symbol__init(NULL);
4059
4060 err = record__auxtrace_init(rec);
4061 if (err)
4062 goto out;
4063
4064 if (dry_run)
4065 goto out;
4066
4067 err = -ENOMEM;
4068
4069 if (rec->no_buildid_cache || rec->no_buildid) {
4070 disable_buildid_cache();
4071 } else if (rec->switch_output.enabled) {
4072 /*
4073 * In 'perf record --switch-output', disable buildid
4074 * generation by default to reduce data file switching
4075 * overhead. Still generate buildid if they are required
4076 * explicitly using
4077 *
4078 * perf record --switch-output --no-no-buildid \
4079 * --no-no-buildid-cache
4080 *
4081 * Following code equals to:
4082 *
4083 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4084 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4085 * disable_buildid_cache();
4086 */
4087 bool disable = true;
4088
4089 if (rec->no_buildid_set && !rec->no_buildid)
4090 disable = false;
4091 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4092 disable = false;
4093 if (disable) {
4094 rec->no_buildid = true;
4095 rec->no_buildid_cache = true;
4096 disable_buildid_cache();
4097 }
4098 }
4099
4100 if (record.opts.overwrite)
4101 record.opts.tail_synthesize = true;
4102
4103 if (rec->evlist->core.nr_entries == 0) {
4104 bool can_profile_kernel = perf_event_paranoid_check(1);
4105
4106 err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
4107 if (err)
4108 goto out;
4109 }
4110
4111 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4112 rec->opts.no_inherit = true;
4113
4114 err = target__validate(&rec->opts.target);
4115 if (err) {
4116 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4117 ui__warning("%s\n", errbuf);
4118 }
4119
4120 err = target__parse_uid(&rec->opts.target);
4121 if (err) {
4122 int saved_errno = errno;
4123
4124 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4125 ui__error("%s", errbuf);
4126
4127 err = -saved_errno;
4128 goto out;
4129 }
4130
4131 /* Enable ignoring missing threads when -u/-p option is defined. */
4132 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4133
4134 evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4135
4136 if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4137 arch__add_leaf_frame_record_opts(&rec->opts);
4138
4139 err = -ENOMEM;
4140 if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4141 if (rec->opts.target.pid != NULL) {
4142 pr_err("Couldn't create thread/CPU maps: %s\n",
4143 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4144 goto out;
4145 }
4146 else
4147 usage_with_options(record_usage, record_options);
4148 }
4149
4150 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4151 if (err)
4152 goto out;
4153
4154 /*
4155 * We take all buildids when the file contains
4156 * AUX area tracing data because we do not decode the
4157 * trace because it would take too long.
4158 */
4159 if (rec->opts.full_auxtrace)
4160 rec->buildid_all = true;
4161
4162 if (rec->opts.text_poke) {
4163 err = record__config_text_poke(rec->evlist);
4164 if (err) {
4165 pr_err("record__config_text_poke failed, error %d\n", err);
4166 goto out;
4167 }
4168 }
4169
4170 if (rec->off_cpu) {
4171 err = record__config_off_cpu(rec);
4172 if (err) {
4173 pr_err("record__config_off_cpu failed, error %d\n", err);
4174 goto out;
4175 }
4176 }
4177
4178 if (record_opts__config(&rec->opts)) {
4179 err = -EINVAL;
4180 goto out;
4181 }
4182
4183 err = record__config_tracking_events(rec);
4184 if (err) {
4185 pr_err("record__config_tracking_events failed, error %d\n", err);
4186 goto out;
4187 }
4188
4189 err = record__init_thread_masks(rec);
4190 if (err) {
4191 pr_err("Failed to initialize parallel data streaming masks\n");
4192 goto out;
4193 }
4194
4195 if (rec->opts.nr_cblocks > nr_cblocks_max)
4196 rec->opts.nr_cblocks = nr_cblocks_max;
4197 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4198
4199 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4200 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4201
4202 if (rec->opts.comp_level > comp_level_max)
4203 rec->opts.comp_level = comp_level_max;
4204 pr_debug("comp level: %d\n", rec->opts.comp_level);
4205
4206 err = __cmd_record(&record, argc, argv);
4207 out:
4208 evlist__delete(rec->evlist);
4209 symbol__exit();
4210 auxtrace_record__free(rec->itr);
4211 out_opts:
4212 record__free_thread_masks(rec, rec->nr_threads);
4213 rec->nr_threads = 0;
4214 evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4215 return err;
4216 }
4217
snapshot_sig_handler(int sig __maybe_unused)4218 static void snapshot_sig_handler(int sig __maybe_unused)
4219 {
4220 struct record *rec = &record;
4221
4222 hit_auxtrace_snapshot_trigger(rec);
4223
4224 if (switch_output_signal(rec))
4225 trigger_hit(&switch_output_trigger);
4226 }
4227
alarm_sig_handler(int sig __maybe_unused)4228 static void alarm_sig_handler(int sig __maybe_unused)
4229 {
4230 struct record *rec = &record;
4231
4232 if (switch_output_time(rec))
4233 trigger_hit(&switch_output_trigger);
4234 }
4235