xref: /openbmc/linux/tools/perf/builtin-record.c (revision 6246ed09111fbb17168619006b4380103c6673c3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
52 #include "util/off_cpu.h"
53 #include "asm/bug.h"
54 #include "perf.h"
55 #include "cputopo.h"
56 
57 #include <errno.h>
58 #include <inttypes.h>
59 #include <locale.h>
60 #include <poll.h>
61 #include <pthread.h>
62 #include <unistd.h>
63 #ifndef HAVE_GETTID
64 #include <syscall.h>
65 #endif
66 #include <sched.h>
67 #include <signal.h>
68 #ifdef HAVE_EVENTFD_SUPPORT
69 #include <sys/eventfd.h>
70 #endif
71 #include <sys/mman.h>
72 #include <sys/wait.h>
73 #include <sys/types.h>
74 #include <sys/stat.h>
75 #include <fcntl.h>
76 #include <linux/err.h>
77 #include <linux/string.h>
78 #include <linux/time64.h>
79 #include <linux/zalloc.h>
80 #include <linux/bitmap.h>
81 #include <sys/time.h>
82 
83 struct switch_output {
84 	bool		 enabled;
85 	bool		 signal;
86 	unsigned long	 size;
87 	unsigned long	 time;
88 	const char	*str;
89 	bool		 set;
90 	char		 **filenames;
91 	int		 num_files;
92 	int		 cur_file;
93 };
94 
95 struct thread_mask {
96 	struct mmap_cpu_mask	maps;
97 	struct mmap_cpu_mask	affinity;
98 };
99 
100 struct record_thread {
101 	pid_t			tid;
102 	struct thread_mask	*mask;
103 	struct {
104 		int		msg[2];
105 		int		ack[2];
106 	} pipes;
107 	struct fdarray		pollfd;
108 	int			ctlfd_pos;
109 	int			nr_mmaps;
110 	struct mmap		**maps;
111 	struct mmap		**overwrite_maps;
112 	struct record		*rec;
113 	unsigned long long	samples;
114 	unsigned long		waking;
115 	u64			bytes_written;
116 	u64			bytes_transferred;
117 	u64			bytes_compressed;
118 };
119 
120 static __thread struct record_thread *thread;
121 
122 enum thread_msg {
123 	THREAD_MSG__UNDEFINED = 0,
124 	THREAD_MSG__READY,
125 	THREAD_MSG__MAX,
126 };
127 
128 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
129 	"UNDEFINED", "READY"
130 };
131 
132 enum thread_spec {
133 	THREAD_SPEC__UNDEFINED = 0,
134 	THREAD_SPEC__CPU,
135 	THREAD_SPEC__CORE,
136 	THREAD_SPEC__PACKAGE,
137 	THREAD_SPEC__NUMA,
138 	THREAD_SPEC__USER,
139 	THREAD_SPEC__MAX,
140 };
141 
142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
143 	"undefined", "cpu", "core", "package", "numa", "user"
144 };
145 
146 struct record {
147 	struct perf_tool	tool;
148 	struct record_opts	opts;
149 	u64			bytes_written;
150 	struct perf_data	data;
151 	struct auxtrace_record	*itr;
152 	struct evlist	*evlist;
153 	struct perf_session	*session;
154 	struct evlist		*sb_evlist;
155 	pthread_t		thread_id;
156 	int			realtime_prio;
157 	bool			switch_output_event_set;
158 	bool			no_buildid;
159 	bool			no_buildid_set;
160 	bool			no_buildid_cache;
161 	bool			no_buildid_cache_set;
162 	bool			buildid_all;
163 	bool			buildid_mmap;
164 	bool			timestamp_filename;
165 	bool			timestamp_boundary;
166 	bool			off_cpu;
167 	struct switch_output	switch_output;
168 	unsigned long long	samples;
169 	unsigned long		output_max_size;	/* = 0: unlimited */
170 	struct perf_debuginfod	debuginfod;
171 	int			nr_threads;
172 	struct thread_mask	*thread_masks;
173 	struct record_thread	*thread_data;
174 };
175 
176 static volatile int done;
177 
178 static volatile int auxtrace_record__snapshot_started;
179 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
180 static DEFINE_TRIGGER(switch_output_trigger);
181 
182 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
183 	"SYS", "NODE", "CPU"
184 };
185 
186 #ifndef HAVE_GETTID
187 static inline pid_t gettid(void)
188 {
189 	return (pid_t)syscall(__NR_gettid);
190 }
191 #endif
192 
193 static int record__threads_enabled(struct record *rec)
194 {
195 	return rec->opts.threads_spec;
196 }
197 
198 static bool switch_output_signal(struct record *rec)
199 {
200 	return rec->switch_output.signal &&
201 	       trigger_is_ready(&switch_output_trigger);
202 }
203 
204 static bool switch_output_size(struct record *rec)
205 {
206 	return rec->switch_output.size &&
207 	       trigger_is_ready(&switch_output_trigger) &&
208 	       (rec->bytes_written >= rec->switch_output.size);
209 }
210 
211 static bool switch_output_time(struct record *rec)
212 {
213 	return rec->switch_output.time &&
214 	       trigger_is_ready(&switch_output_trigger);
215 }
216 
217 static u64 record__bytes_written(struct record *rec)
218 {
219 	int t;
220 	u64 bytes_written = rec->bytes_written;
221 	struct record_thread *thread_data = rec->thread_data;
222 
223 	for (t = 0; t < rec->nr_threads; t++)
224 		bytes_written += thread_data[t].bytes_written;
225 
226 	return bytes_written;
227 }
228 
229 static bool record__output_max_size_exceeded(struct record *rec)
230 {
231 	return rec->output_max_size &&
232 	       (record__bytes_written(rec) >= rec->output_max_size);
233 }
234 
235 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
236 			 void *bf, size_t size)
237 {
238 	struct perf_data_file *file = &rec->session->data->file;
239 
240 	if (map && map->file)
241 		file = map->file;
242 
243 	if (perf_data_file__write(file, bf, size) < 0) {
244 		pr_err("failed to write perf data, error: %m\n");
245 		return -1;
246 	}
247 
248 	if (map && map->file)
249 		thread->bytes_written += size;
250 	else
251 		rec->bytes_written += size;
252 
253 	if (record__output_max_size_exceeded(rec) && !done) {
254 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
255 				" stopping session ]\n",
256 				record__bytes_written(rec) >> 10);
257 		done = 1;
258 	}
259 
260 	if (switch_output_size(rec))
261 		trigger_hit(&switch_output_trigger);
262 
263 	return 0;
264 }
265 
266 static int record__aio_enabled(struct record *rec);
267 static int record__comp_enabled(struct record *rec);
268 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
269 			    void *dst, size_t dst_size, void *src, size_t src_size);
270 
271 #ifdef HAVE_AIO_SUPPORT
272 static int record__aio_write(struct aiocb *cblock, int trace_fd,
273 		void *buf, size_t size, off_t off)
274 {
275 	int rc;
276 
277 	cblock->aio_fildes = trace_fd;
278 	cblock->aio_buf    = buf;
279 	cblock->aio_nbytes = size;
280 	cblock->aio_offset = off;
281 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
282 
283 	do {
284 		rc = aio_write(cblock);
285 		if (rc == 0) {
286 			break;
287 		} else if (errno != EAGAIN) {
288 			cblock->aio_fildes = -1;
289 			pr_err("failed to queue perf data, error: %m\n");
290 			break;
291 		}
292 	} while (1);
293 
294 	return rc;
295 }
296 
297 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
298 {
299 	void *rem_buf;
300 	off_t rem_off;
301 	size_t rem_size;
302 	int rc, aio_errno;
303 	ssize_t aio_ret, written;
304 
305 	aio_errno = aio_error(cblock);
306 	if (aio_errno == EINPROGRESS)
307 		return 0;
308 
309 	written = aio_ret = aio_return(cblock);
310 	if (aio_ret < 0) {
311 		if (aio_errno != EINTR)
312 			pr_err("failed to write perf data, error: %m\n");
313 		written = 0;
314 	}
315 
316 	rem_size = cblock->aio_nbytes - written;
317 
318 	if (rem_size == 0) {
319 		cblock->aio_fildes = -1;
320 		/*
321 		 * md->refcount is incremented in record__aio_pushfn() for
322 		 * every aio write request started in record__aio_push() so
323 		 * decrement it because the request is now complete.
324 		 */
325 		perf_mmap__put(&md->core);
326 		rc = 1;
327 	} else {
328 		/*
329 		 * aio write request may require restart with the
330 		 * reminder if the kernel didn't write whole
331 		 * chunk at once.
332 		 */
333 		rem_off = cblock->aio_offset + written;
334 		rem_buf = (void *)(cblock->aio_buf + written);
335 		record__aio_write(cblock, cblock->aio_fildes,
336 				rem_buf, rem_size, rem_off);
337 		rc = 0;
338 	}
339 
340 	return rc;
341 }
342 
343 static int record__aio_sync(struct mmap *md, bool sync_all)
344 {
345 	struct aiocb **aiocb = md->aio.aiocb;
346 	struct aiocb *cblocks = md->aio.cblocks;
347 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
348 	int i, do_suspend;
349 
350 	do {
351 		do_suspend = 0;
352 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
353 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
354 				if (sync_all)
355 					aiocb[i] = NULL;
356 				else
357 					return i;
358 			} else {
359 				/*
360 				 * Started aio write is not complete yet
361 				 * so it has to be waited before the
362 				 * next allocation.
363 				 */
364 				aiocb[i] = &cblocks[i];
365 				do_suspend = 1;
366 			}
367 		}
368 		if (!do_suspend)
369 			return -1;
370 
371 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
372 			if (!(errno == EAGAIN || errno == EINTR))
373 				pr_err("failed to sync perf data, error: %m\n");
374 		}
375 	} while (1);
376 }
377 
378 struct record_aio {
379 	struct record	*rec;
380 	void		*data;
381 	size_t		size;
382 };
383 
384 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
385 {
386 	struct record_aio *aio = to;
387 
388 	/*
389 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
390 	 * to release space in the kernel buffer as fast as possible, calling
391 	 * perf_mmap__consume() from perf_mmap__push() function.
392 	 *
393 	 * That lets the kernel to proceed with storing more profiling data into
394 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
395 	 *
396 	 * Coping can be done in two steps in case the chunk of profiling data
397 	 * crosses the upper bound of the kernel buffer. In this case we first move
398 	 * part of data from map->start till the upper bound and then the reminder
399 	 * from the beginning of the kernel buffer till the end of the data chunk.
400 	 */
401 
402 	if (record__comp_enabled(aio->rec)) {
403 		size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
404 				     mmap__mmap_len(map) - aio->size,
405 				     buf, size);
406 	} else {
407 		memcpy(aio->data + aio->size, buf, size);
408 	}
409 
410 	if (!aio->size) {
411 		/*
412 		 * Increment map->refcount to guard map->aio.data[] buffer
413 		 * from premature deallocation because map object can be
414 		 * released earlier than aio write request started on
415 		 * map->aio.data[] buffer is complete.
416 		 *
417 		 * perf_mmap__put() is done at record__aio_complete()
418 		 * after started aio request completion or at record__aio_push()
419 		 * if the request failed to start.
420 		 */
421 		perf_mmap__get(&map->core);
422 	}
423 
424 	aio->size += size;
425 
426 	return size;
427 }
428 
429 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
430 {
431 	int ret, idx;
432 	int trace_fd = rec->session->data->file.fd;
433 	struct record_aio aio = { .rec = rec, .size = 0 };
434 
435 	/*
436 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
437 	 * becomes available after previous aio write operation.
438 	 */
439 
440 	idx = record__aio_sync(map, false);
441 	aio.data = map->aio.data[idx];
442 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
443 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
444 		return ret;
445 
446 	rec->samples++;
447 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
448 	if (!ret) {
449 		*off += aio.size;
450 		rec->bytes_written += aio.size;
451 		if (switch_output_size(rec))
452 			trigger_hit(&switch_output_trigger);
453 	} else {
454 		/*
455 		 * Decrement map->refcount incremented in record__aio_pushfn()
456 		 * back if record__aio_write() operation failed to start, otherwise
457 		 * map->refcount is decremented in record__aio_complete() after
458 		 * aio write operation finishes successfully.
459 		 */
460 		perf_mmap__put(&map->core);
461 	}
462 
463 	return ret;
464 }
465 
466 static off_t record__aio_get_pos(int trace_fd)
467 {
468 	return lseek(trace_fd, 0, SEEK_CUR);
469 }
470 
471 static void record__aio_set_pos(int trace_fd, off_t pos)
472 {
473 	lseek(trace_fd, pos, SEEK_SET);
474 }
475 
476 static void record__aio_mmap_read_sync(struct record *rec)
477 {
478 	int i;
479 	struct evlist *evlist = rec->evlist;
480 	struct mmap *maps = evlist->mmap;
481 
482 	if (!record__aio_enabled(rec))
483 		return;
484 
485 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
486 		struct mmap *map = &maps[i];
487 
488 		if (map->core.base)
489 			record__aio_sync(map, true);
490 	}
491 }
492 
493 static int nr_cblocks_default = 1;
494 static int nr_cblocks_max = 4;
495 
496 static int record__aio_parse(const struct option *opt,
497 			     const char *str,
498 			     int unset)
499 {
500 	struct record_opts *opts = (struct record_opts *)opt->value;
501 
502 	if (unset) {
503 		opts->nr_cblocks = 0;
504 	} else {
505 		if (str)
506 			opts->nr_cblocks = strtol(str, NULL, 0);
507 		if (!opts->nr_cblocks)
508 			opts->nr_cblocks = nr_cblocks_default;
509 	}
510 
511 	return 0;
512 }
513 #else /* HAVE_AIO_SUPPORT */
514 static int nr_cblocks_max = 0;
515 
516 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
517 			    off_t *off __maybe_unused)
518 {
519 	return -1;
520 }
521 
522 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
523 {
524 	return -1;
525 }
526 
527 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
528 {
529 }
530 
531 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
532 {
533 }
534 #endif
535 
536 static int record__aio_enabled(struct record *rec)
537 {
538 	return rec->opts.nr_cblocks > 0;
539 }
540 
541 #define MMAP_FLUSH_DEFAULT 1
542 static int record__mmap_flush_parse(const struct option *opt,
543 				    const char *str,
544 				    int unset)
545 {
546 	int flush_max;
547 	struct record_opts *opts = (struct record_opts *)opt->value;
548 	static struct parse_tag tags[] = {
549 			{ .tag  = 'B', .mult = 1       },
550 			{ .tag  = 'K', .mult = 1 << 10 },
551 			{ .tag  = 'M', .mult = 1 << 20 },
552 			{ .tag  = 'G', .mult = 1 << 30 },
553 			{ .tag  = 0 },
554 	};
555 
556 	if (unset)
557 		return 0;
558 
559 	if (str) {
560 		opts->mmap_flush = parse_tag_value(str, tags);
561 		if (opts->mmap_flush == (int)-1)
562 			opts->mmap_flush = strtol(str, NULL, 0);
563 	}
564 
565 	if (!opts->mmap_flush)
566 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
567 
568 	flush_max = evlist__mmap_size(opts->mmap_pages);
569 	flush_max /= 4;
570 	if (opts->mmap_flush > flush_max)
571 		opts->mmap_flush = flush_max;
572 
573 	return 0;
574 }
575 
576 #ifdef HAVE_ZSTD_SUPPORT
577 static unsigned int comp_level_default = 1;
578 
579 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
580 {
581 	struct record_opts *opts = opt->value;
582 
583 	if (unset) {
584 		opts->comp_level = 0;
585 	} else {
586 		if (str)
587 			opts->comp_level = strtol(str, NULL, 0);
588 		if (!opts->comp_level)
589 			opts->comp_level = comp_level_default;
590 	}
591 
592 	return 0;
593 }
594 #endif
595 static unsigned int comp_level_max = 22;
596 
597 static int record__comp_enabled(struct record *rec)
598 {
599 	return rec->opts.comp_level > 0;
600 }
601 
602 static int process_synthesized_event(struct perf_tool *tool,
603 				     union perf_event *event,
604 				     struct perf_sample *sample __maybe_unused,
605 				     struct machine *machine __maybe_unused)
606 {
607 	struct record *rec = container_of(tool, struct record, tool);
608 	return record__write(rec, NULL, event, event->header.size);
609 }
610 
611 static int process_locked_synthesized_event(struct perf_tool *tool,
612 				     union perf_event *event,
613 				     struct perf_sample *sample __maybe_unused,
614 				     struct machine *machine __maybe_unused)
615 {
616 	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
617 	int ret;
618 
619 	pthread_mutex_lock(&synth_lock);
620 	ret = process_synthesized_event(tool, event, sample, machine);
621 	pthread_mutex_unlock(&synth_lock);
622 	return ret;
623 }
624 
625 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
626 {
627 	struct record *rec = to;
628 
629 	if (record__comp_enabled(rec)) {
630 		size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
631 		bf   = map->data;
632 	}
633 
634 	thread->samples++;
635 	return record__write(rec, map, bf, size);
636 }
637 
638 static volatile int signr = -1;
639 static volatile int child_finished;
640 #ifdef HAVE_EVENTFD_SUPPORT
641 static int done_fd = -1;
642 #endif
643 
644 static void sig_handler(int sig)
645 {
646 	if (sig == SIGCHLD)
647 		child_finished = 1;
648 	else
649 		signr = sig;
650 
651 	done = 1;
652 #ifdef HAVE_EVENTFD_SUPPORT
653 {
654 	u64 tmp = 1;
655 	/*
656 	 * It is possible for this signal handler to run after done is checked
657 	 * in the main loop, but before the perf counter fds are polled. If this
658 	 * happens, the poll() will continue to wait even though done is set,
659 	 * and will only break out if either another signal is received, or the
660 	 * counters are ready for read. To ensure the poll() doesn't sleep when
661 	 * done is set, use an eventfd (done_fd) to wake up the poll().
662 	 */
663 	if (write(done_fd, &tmp, sizeof(tmp)) < 0)
664 		pr_err("failed to signal wakeup fd, error: %m\n");
665 }
666 #endif // HAVE_EVENTFD_SUPPORT
667 }
668 
669 static void sigsegv_handler(int sig)
670 {
671 	perf_hooks__recover();
672 	sighandler_dump_stack(sig);
673 }
674 
675 static void record__sig_exit(void)
676 {
677 	if (signr == -1)
678 		return;
679 
680 	signal(signr, SIG_DFL);
681 	raise(signr);
682 }
683 
684 #ifdef HAVE_AUXTRACE_SUPPORT
685 
686 static int record__process_auxtrace(struct perf_tool *tool,
687 				    struct mmap *map,
688 				    union perf_event *event, void *data1,
689 				    size_t len1, void *data2, size_t len2)
690 {
691 	struct record *rec = container_of(tool, struct record, tool);
692 	struct perf_data *data = &rec->data;
693 	size_t padding;
694 	u8 pad[8] = {0};
695 
696 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
697 		off_t file_offset;
698 		int fd = perf_data__fd(data);
699 		int err;
700 
701 		file_offset = lseek(fd, 0, SEEK_CUR);
702 		if (file_offset == -1)
703 			return -1;
704 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
705 						     event, file_offset);
706 		if (err)
707 			return err;
708 	}
709 
710 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
711 	padding = (len1 + len2) & 7;
712 	if (padding)
713 		padding = 8 - padding;
714 
715 	record__write(rec, map, event, event->header.size);
716 	record__write(rec, map, data1, len1);
717 	if (len2)
718 		record__write(rec, map, data2, len2);
719 	record__write(rec, map, &pad, padding);
720 
721 	return 0;
722 }
723 
724 static int record__auxtrace_mmap_read(struct record *rec,
725 				      struct mmap *map)
726 {
727 	int ret;
728 
729 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
730 				  record__process_auxtrace);
731 	if (ret < 0)
732 		return ret;
733 
734 	if (ret)
735 		rec->samples++;
736 
737 	return 0;
738 }
739 
740 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
741 					       struct mmap *map)
742 {
743 	int ret;
744 
745 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
746 					   record__process_auxtrace,
747 					   rec->opts.auxtrace_snapshot_size);
748 	if (ret < 0)
749 		return ret;
750 
751 	if (ret)
752 		rec->samples++;
753 
754 	return 0;
755 }
756 
757 static int record__auxtrace_read_snapshot_all(struct record *rec)
758 {
759 	int i;
760 	int rc = 0;
761 
762 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
763 		struct mmap *map = &rec->evlist->mmap[i];
764 
765 		if (!map->auxtrace_mmap.base)
766 			continue;
767 
768 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
769 			rc = -1;
770 			goto out;
771 		}
772 	}
773 out:
774 	return rc;
775 }
776 
777 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
778 {
779 	pr_debug("Recording AUX area tracing snapshot\n");
780 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
781 		trigger_error(&auxtrace_snapshot_trigger);
782 	} else {
783 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
784 			trigger_error(&auxtrace_snapshot_trigger);
785 		else
786 			trigger_ready(&auxtrace_snapshot_trigger);
787 	}
788 }
789 
790 static int record__auxtrace_snapshot_exit(struct record *rec)
791 {
792 	if (trigger_is_error(&auxtrace_snapshot_trigger))
793 		return 0;
794 
795 	if (!auxtrace_record__snapshot_started &&
796 	    auxtrace_record__snapshot_start(rec->itr))
797 		return -1;
798 
799 	record__read_auxtrace_snapshot(rec, true);
800 	if (trigger_is_error(&auxtrace_snapshot_trigger))
801 		return -1;
802 
803 	return 0;
804 }
805 
806 static int record__auxtrace_init(struct record *rec)
807 {
808 	int err;
809 
810 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
811 	    && record__threads_enabled(rec)) {
812 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
813 		return -EINVAL;
814 	}
815 
816 	if (!rec->itr) {
817 		rec->itr = auxtrace_record__init(rec->evlist, &err);
818 		if (err)
819 			return err;
820 	}
821 
822 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
823 					      rec->opts.auxtrace_snapshot_opts);
824 	if (err)
825 		return err;
826 
827 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
828 					    rec->opts.auxtrace_sample_opts);
829 	if (err)
830 		return err;
831 
832 	auxtrace_regroup_aux_output(rec->evlist);
833 
834 	return auxtrace_parse_filters(rec->evlist);
835 }
836 
837 #else
838 
839 static inline
840 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
841 			       struct mmap *map __maybe_unused)
842 {
843 	return 0;
844 }
845 
846 static inline
847 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
848 				    bool on_exit __maybe_unused)
849 {
850 }
851 
852 static inline
853 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
854 {
855 	return 0;
856 }
857 
858 static inline
859 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
860 {
861 	return 0;
862 }
863 
864 static int record__auxtrace_init(struct record *rec __maybe_unused)
865 {
866 	return 0;
867 }
868 
869 #endif
870 
871 static int record__config_text_poke(struct evlist *evlist)
872 {
873 	struct evsel *evsel;
874 
875 	/* Nothing to do if text poke is already configured */
876 	evlist__for_each_entry(evlist, evsel) {
877 		if (evsel->core.attr.text_poke)
878 			return 0;
879 	}
880 
881 	evsel = evlist__add_dummy_on_all_cpus(evlist);
882 	if (!evsel)
883 		return -ENOMEM;
884 
885 	evsel->core.attr.text_poke = 1;
886 	evsel->core.attr.ksymbol = 1;
887 	evsel->immediate = true;
888 	evsel__set_sample_bit(evsel, TIME);
889 
890 	return 0;
891 }
892 
893 static int record__config_off_cpu(struct record *rec)
894 {
895 	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
896 }
897 
898 static bool record__kcore_readable(struct machine *machine)
899 {
900 	char kcore[PATH_MAX];
901 	int fd;
902 
903 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
904 
905 	fd = open(kcore, O_RDONLY);
906 	if (fd < 0)
907 		return false;
908 
909 	close(fd);
910 
911 	return true;
912 }
913 
914 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
915 {
916 	char from_dir[PATH_MAX];
917 	char kcore_dir[PATH_MAX];
918 	int ret;
919 
920 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
921 
922 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
923 	if (ret)
924 		return ret;
925 
926 	return kcore_copy(from_dir, kcore_dir);
927 }
928 
929 static void record__thread_data_init_pipes(struct record_thread *thread_data)
930 {
931 	thread_data->pipes.msg[0] = -1;
932 	thread_data->pipes.msg[1] = -1;
933 	thread_data->pipes.ack[0] = -1;
934 	thread_data->pipes.ack[1] = -1;
935 }
936 
937 static int record__thread_data_open_pipes(struct record_thread *thread_data)
938 {
939 	if (pipe(thread_data->pipes.msg))
940 		return -EINVAL;
941 
942 	if (pipe(thread_data->pipes.ack)) {
943 		close(thread_data->pipes.msg[0]);
944 		thread_data->pipes.msg[0] = -1;
945 		close(thread_data->pipes.msg[1]);
946 		thread_data->pipes.msg[1] = -1;
947 		return -EINVAL;
948 	}
949 
950 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
951 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
952 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
953 
954 	return 0;
955 }
956 
957 static void record__thread_data_close_pipes(struct record_thread *thread_data)
958 {
959 	if (thread_data->pipes.msg[0] != -1) {
960 		close(thread_data->pipes.msg[0]);
961 		thread_data->pipes.msg[0] = -1;
962 	}
963 	if (thread_data->pipes.msg[1] != -1) {
964 		close(thread_data->pipes.msg[1]);
965 		thread_data->pipes.msg[1] = -1;
966 	}
967 	if (thread_data->pipes.ack[0] != -1) {
968 		close(thread_data->pipes.ack[0]);
969 		thread_data->pipes.ack[0] = -1;
970 	}
971 	if (thread_data->pipes.ack[1] != -1) {
972 		close(thread_data->pipes.ack[1]);
973 		thread_data->pipes.ack[1] = -1;
974 	}
975 }
976 
977 static bool evlist__per_thread(struct evlist *evlist)
978 {
979 	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
980 }
981 
982 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
983 {
984 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
985 	struct mmap *mmap = evlist->mmap;
986 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
987 	struct perf_cpu_map *cpus = evlist->core.all_cpus;
988 	bool per_thread = evlist__per_thread(evlist);
989 
990 	if (per_thread)
991 		thread_data->nr_mmaps = nr_mmaps;
992 	else
993 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
994 						      thread_data->mask->maps.nbits);
995 	if (mmap) {
996 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
997 		if (!thread_data->maps)
998 			return -ENOMEM;
999 	}
1000 	if (overwrite_mmap) {
1001 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1002 		if (!thread_data->overwrite_maps) {
1003 			zfree(&thread_data->maps);
1004 			return -ENOMEM;
1005 		}
1006 	}
1007 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1008 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1009 
1010 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1011 		if (per_thread ||
1012 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1013 			if (thread_data->maps) {
1014 				thread_data->maps[tm] = &mmap[m];
1015 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1016 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1017 			}
1018 			if (thread_data->overwrite_maps) {
1019 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1020 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1021 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1022 			}
1023 			tm++;
1024 		}
1025 	}
1026 
1027 	return 0;
1028 }
1029 
1030 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1031 {
1032 	int f, tm, pos;
1033 	struct mmap *map, *overwrite_map;
1034 
1035 	fdarray__init(&thread_data->pollfd, 64);
1036 
1037 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1038 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1039 		overwrite_map = thread_data->overwrite_maps ?
1040 				thread_data->overwrite_maps[tm] : NULL;
1041 
1042 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1043 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1044 
1045 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1046 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1047 							      &evlist->core.pollfd);
1048 				if (pos < 0)
1049 					return pos;
1050 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1051 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1052 			}
1053 		}
1054 	}
1055 
1056 	return 0;
1057 }
1058 
1059 static void record__free_thread_data(struct record *rec)
1060 {
1061 	int t;
1062 	struct record_thread *thread_data = rec->thread_data;
1063 
1064 	if (thread_data == NULL)
1065 		return;
1066 
1067 	for (t = 0; t < rec->nr_threads; t++) {
1068 		record__thread_data_close_pipes(&thread_data[t]);
1069 		zfree(&thread_data[t].maps);
1070 		zfree(&thread_data[t].overwrite_maps);
1071 		fdarray__exit(&thread_data[t].pollfd);
1072 	}
1073 
1074 	zfree(&rec->thread_data);
1075 }
1076 
1077 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1078 {
1079 	int t, ret;
1080 	struct record_thread *thread_data;
1081 
1082 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1083 	if (!rec->thread_data) {
1084 		pr_err("Failed to allocate thread data\n");
1085 		return -ENOMEM;
1086 	}
1087 	thread_data = rec->thread_data;
1088 
1089 	for (t = 0; t < rec->nr_threads; t++)
1090 		record__thread_data_init_pipes(&thread_data[t]);
1091 
1092 	for (t = 0; t < rec->nr_threads; t++) {
1093 		thread_data[t].rec = rec;
1094 		thread_data[t].mask = &rec->thread_masks[t];
1095 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1096 		if (ret) {
1097 			pr_err("Failed to initialize thread[%d] maps\n", t);
1098 			goto out_free;
1099 		}
1100 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1101 		if (ret) {
1102 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1103 			goto out_free;
1104 		}
1105 		if (t) {
1106 			thread_data[t].tid = -1;
1107 			ret = record__thread_data_open_pipes(&thread_data[t]);
1108 			if (ret) {
1109 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1110 				goto out_free;
1111 			}
1112 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1113 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1114 			if (ret < 0) {
1115 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1116 				goto out_free;
1117 			}
1118 			thread_data[t].ctlfd_pos = ret;
1119 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1120 				 thread_data, thread_data[t].ctlfd_pos,
1121 				 thread_data[t].pipes.msg[0]);
1122 		} else {
1123 			thread_data[t].tid = gettid();
1124 			if (evlist->ctl_fd.pos == -1)
1125 				continue;
1126 			ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1127 						      &evlist->core.pollfd);
1128 			if (ret < 0) {
1129 				pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1130 				goto out_free;
1131 			}
1132 			thread_data[t].ctlfd_pos = ret;
1133 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1134 				 thread_data, thread_data[t].ctlfd_pos,
1135 				 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1136 		}
1137 	}
1138 
1139 	return 0;
1140 
1141 out_free:
1142 	record__free_thread_data(rec);
1143 
1144 	return ret;
1145 }
1146 
1147 static int record__mmap_evlist(struct record *rec,
1148 			       struct evlist *evlist)
1149 {
1150 	int i, ret;
1151 	struct record_opts *opts = &rec->opts;
1152 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1153 				  opts->auxtrace_sample_mode;
1154 	char msg[512];
1155 
1156 	if (opts->affinity != PERF_AFFINITY_SYS)
1157 		cpu__setup_cpunode_map();
1158 
1159 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1160 				 opts->auxtrace_mmap_pages,
1161 				 auxtrace_overwrite,
1162 				 opts->nr_cblocks, opts->affinity,
1163 				 opts->mmap_flush, opts->comp_level) < 0) {
1164 		if (errno == EPERM) {
1165 			pr_err("Permission error mapping pages.\n"
1166 			       "Consider increasing "
1167 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1168 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1169 			       "(current value: %u,%u)\n",
1170 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1171 			return -errno;
1172 		} else {
1173 			pr_err("failed to mmap with %d (%s)\n", errno,
1174 				str_error_r(errno, msg, sizeof(msg)));
1175 			if (errno)
1176 				return -errno;
1177 			else
1178 				return -EINVAL;
1179 		}
1180 	}
1181 
1182 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1183 		return -1;
1184 
1185 	ret = record__alloc_thread_data(rec, evlist);
1186 	if (ret)
1187 		return ret;
1188 
1189 	if (record__threads_enabled(rec)) {
1190 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1191 		if (ret) {
1192 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1193 			return ret;
1194 		}
1195 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1196 			if (evlist->mmap)
1197 				evlist->mmap[i].file = &rec->data.dir.files[i];
1198 			if (evlist->overwrite_mmap)
1199 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1200 		}
1201 	}
1202 
1203 	return 0;
1204 }
1205 
1206 static int record__mmap(struct record *rec)
1207 {
1208 	return record__mmap_evlist(rec, rec->evlist);
1209 }
1210 
1211 static int record__open(struct record *rec)
1212 {
1213 	char msg[BUFSIZ];
1214 	struct evsel *pos;
1215 	struct evlist *evlist = rec->evlist;
1216 	struct perf_session *session = rec->session;
1217 	struct record_opts *opts = &rec->opts;
1218 	int rc = 0;
1219 
1220 	/*
1221 	 * For initial_delay, system wide or a hybrid system, we need to add a
1222 	 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1223 	 * of waiting or event synthesis.
1224 	 */
1225 	if (opts->initial_delay || target__has_cpu(&opts->target) ||
1226 	    perf_pmu__has_hybrid()) {
1227 		pos = evlist__get_tracking_event(evlist);
1228 		if (!evsel__is_dummy_event(pos)) {
1229 			/* Set up dummy event. */
1230 			if (evlist__add_dummy(evlist))
1231 				return -ENOMEM;
1232 			pos = evlist__last(evlist);
1233 			evlist__set_tracking_event(evlist, pos);
1234 		}
1235 
1236 		/*
1237 		 * Enable the dummy event when the process is forked for
1238 		 * initial_delay, immediately for system wide.
1239 		 */
1240 		if (opts->initial_delay && !pos->immediate &&
1241 		    !target__has_cpu(&opts->target))
1242 			pos->core.attr.enable_on_exec = 1;
1243 		else
1244 			pos->immediate = 1;
1245 	}
1246 
1247 	evlist__config(evlist, opts, &callchain_param);
1248 
1249 	evlist__for_each_entry(evlist, pos) {
1250 try_again:
1251 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1252 			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1253 				if (verbose > 0)
1254 					ui__warning("%s\n", msg);
1255 				goto try_again;
1256 			}
1257 			if ((errno == EINVAL || errno == EBADF) &&
1258 			    pos->core.leader != &pos->core &&
1259 			    pos->weak_group) {
1260 			        pos = evlist__reset_weak_group(evlist, pos, true);
1261 				goto try_again;
1262 			}
1263 			rc = -errno;
1264 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1265 			ui__error("%s\n", msg);
1266 			goto out;
1267 		}
1268 
1269 		pos->supported = true;
1270 	}
1271 
1272 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1273 		pr_warning(
1274 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1275 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1276 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1277 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1278 "Samples in kernel modules won't be resolved at all.\n\n"
1279 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1280 "even with a suitable vmlinux or kallsyms file.\n\n");
1281 	}
1282 
1283 	if (evlist__apply_filters(evlist, &pos)) {
1284 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1285 			pos->filter, evsel__name(pos), errno,
1286 			str_error_r(errno, msg, sizeof(msg)));
1287 		rc = -1;
1288 		goto out;
1289 	}
1290 
1291 	rc = record__mmap(rec);
1292 	if (rc)
1293 		goto out;
1294 
1295 	session->evlist = evlist;
1296 	perf_session__set_id_hdr_size(session);
1297 out:
1298 	return rc;
1299 }
1300 
1301 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1302 {
1303 	if (rec->evlist->first_sample_time == 0)
1304 		rec->evlist->first_sample_time = sample_time;
1305 
1306 	if (sample_time)
1307 		rec->evlist->last_sample_time = sample_time;
1308 }
1309 
1310 static int process_sample_event(struct perf_tool *tool,
1311 				union perf_event *event,
1312 				struct perf_sample *sample,
1313 				struct evsel *evsel,
1314 				struct machine *machine)
1315 {
1316 	struct record *rec = container_of(tool, struct record, tool);
1317 
1318 	set_timestamp_boundary(rec, sample->time);
1319 
1320 	if (rec->buildid_all)
1321 		return 0;
1322 
1323 	rec->samples++;
1324 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1325 }
1326 
1327 static int process_buildids(struct record *rec)
1328 {
1329 	struct perf_session *session = rec->session;
1330 
1331 	if (perf_data__size(&rec->data) == 0)
1332 		return 0;
1333 
1334 	/*
1335 	 * During this process, it'll load kernel map and replace the
1336 	 * dso->long_name to a real pathname it found.  In this case
1337 	 * we prefer the vmlinux path like
1338 	 *   /lib/modules/3.16.4/build/vmlinux
1339 	 *
1340 	 * rather than build-id path (in debug directory).
1341 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1342 	 */
1343 	symbol_conf.ignore_vmlinux_buildid = true;
1344 
1345 	/*
1346 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1347 	 * so no need to process samples. But if timestamp_boundary is enabled,
1348 	 * it still needs to walk on all samples to get the timestamps of
1349 	 * first/last samples.
1350 	 */
1351 	if (rec->buildid_all && !rec->timestamp_boundary)
1352 		rec->tool.sample = NULL;
1353 
1354 	return perf_session__process_events(session);
1355 }
1356 
1357 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1358 {
1359 	int err;
1360 	struct perf_tool *tool = data;
1361 	/*
1362 	 *As for guest kernel when processing subcommand record&report,
1363 	 *we arrange module mmap prior to guest kernel mmap and trigger
1364 	 *a preload dso because default guest module symbols are loaded
1365 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1366 	 *method is used to avoid symbol missing when the first addr is
1367 	 *in module instead of in guest kernel.
1368 	 */
1369 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1370 					     machine);
1371 	if (err < 0)
1372 		pr_err("Couldn't record guest kernel [%d]'s reference"
1373 		       " relocation symbol.\n", machine->pid);
1374 
1375 	/*
1376 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1377 	 * have no _text sometimes.
1378 	 */
1379 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1380 						 machine);
1381 	if (err < 0)
1382 		pr_err("Couldn't record guest kernel [%d]'s reference"
1383 		       " relocation symbol.\n", machine->pid);
1384 }
1385 
1386 static struct perf_event_header finished_round_event = {
1387 	.size = sizeof(struct perf_event_header),
1388 	.type = PERF_RECORD_FINISHED_ROUND,
1389 };
1390 
1391 static struct perf_event_header finished_init_event = {
1392 	.size = sizeof(struct perf_event_header),
1393 	.type = PERF_RECORD_FINISHED_INIT,
1394 };
1395 
1396 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1397 {
1398 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1399 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1400 			  thread->mask->affinity.nbits)) {
1401 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1402 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1403 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1404 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1405 					(cpu_set_t *)thread->mask->affinity.bits);
1406 		if (verbose == 2) {
1407 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1408 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1409 		}
1410 	}
1411 }
1412 
1413 static size_t process_comp_header(void *record, size_t increment)
1414 {
1415 	struct perf_record_compressed *event = record;
1416 	size_t size = sizeof(*event);
1417 
1418 	if (increment) {
1419 		event->header.size += increment;
1420 		return increment;
1421 	}
1422 
1423 	event->header.type = PERF_RECORD_COMPRESSED;
1424 	event->header.size = size;
1425 
1426 	return size;
1427 }
1428 
1429 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1430 			    void *dst, size_t dst_size, void *src, size_t src_size)
1431 {
1432 	size_t compressed;
1433 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1434 	struct zstd_data *zstd_data = &session->zstd_data;
1435 
1436 	if (map && map->file)
1437 		zstd_data = &map->zstd_data;
1438 
1439 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1440 						     max_record_size, process_comp_header);
1441 
1442 	if (map && map->file) {
1443 		thread->bytes_transferred += src_size;
1444 		thread->bytes_compressed  += compressed;
1445 	} else {
1446 		session->bytes_transferred += src_size;
1447 		session->bytes_compressed  += compressed;
1448 	}
1449 
1450 	return compressed;
1451 }
1452 
1453 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1454 				    bool overwrite, bool synch)
1455 {
1456 	u64 bytes_written = rec->bytes_written;
1457 	int i;
1458 	int rc = 0;
1459 	int nr_mmaps;
1460 	struct mmap **maps;
1461 	int trace_fd = rec->data.file.fd;
1462 	off_t off = 0;
1463 
1464 	if (!evlist)
1465 		return 0;
1466 
1467 	nr_mmaps = thread->nr_mmaps;
1468 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1469 
1470 	if (!maps)
1471 		return 0;
1472 
1473 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1474 		return 0;
1475 
1476 	if (record__aio_enabled(rec))
1477 		off = record__aio_get_pos(trace_fd);
1478 
1479 	for (i = 0; i < nr_mmaps; i++) {
1480 		u64 flush = 0;
1481 		struct mmap *map = maps[i];
1482 
1483 		if (map->core.base) {
1484 			record__adjust_affinity(rec, map);
1485 			if (synch) {
1486 				flush = map->core.flush;
1487 				map->core.flush = 1;
1488 			}
1489 			if (!record__aio_enabled(rec)) {
1490 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1491 					if (synch)
1492 						map->core.flush = flush;
1493 					rc = -1;
1494 					goto out;
1495 				}
1496 			} else {
1497 				if (record__aio_push(rec, map, &off) < 0) {
1498 					record__aio_set_pos(trace_fd, off);
1499 					if (synch)
1500 						map->core.flush = flush;
1501 					rc = -1;
1502 					goto out;
1503 				}
1504 			}
1505 			if (synch)
1506 				map->core.flush = flush;
1507 		}
1508 
1509 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1510 		    !rec->opts.auxtrace_sample_mode &&
1511 		    record__auxtrace_mmap_read(rec, map) != 0) {
1512 			rc = -1;
1513 			goto out;
1514 		}
1515 	}
1516 
1517 	if (record__aio_enabled(rec))
1518 		record__aio_set_pos(trace_fd, off);
1519 
1520 	/*
1521 	 * Mark the round finished in case we wrote
1522 	 * at least one event.
1523 	 *
1524 	 * No need for round events in directory mode,
1525 	 * because per-cpu maps and files have data
1526 	 * sorted by kernel.
1527 	 */
1528 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1529 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1530 
1531 	if (overwrite)
1532 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1533 out:
1534 	return rc;
1535 }
1536 
1537 static int record__mmap_read_all(struct record *rec, bool synch)
1538 {
1539 	int err;
1540 
1541 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1542 	if (err)
1543 		return err;
1544 
1545 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1546 }
1547 
1548 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1549 					   void *arg __maybe_unused)
1550 {
1551 	struct perf_mmap *map = fda->priv[fd].ptr;
1552 
1553 	if (map)
1554 		perf_mmap__put(map);
1555 }
1556 
1557 static void *record__thread(void *arg)
1558 {
1559 	enum thread_msg msg = THREAD_MSG__READY;
1560 	bool terminate = false;
1561 	struct fdarray *pollfd;
1562 	int err, ctlfd_pos;
1563 
1564 	thread = arg;
1565 	thread->tid = gettid();
1566 
1567 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1568 	if (err == -1)
1569 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1570 			   thread->tid, strerror(errno));
1571 
1572 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1573 
1574 	pollfd = &thread->pollfd;
1575 	ctlfd_pos = thread->ctlfd_pos;
1576 
1577 	for (;;) {
1578 		unsigned long long hits = thread->samples;
1579 
1580 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1581 			break;
1582 
1583 		if (hits == thread->samples) {
1584 
1585 			err = fdarray__poll(pollfd, -1);
1586 			/*
1587 			 * Propagate error, only if there's any. Ignore positive
1588 			 * number of returned events and interrupt error.
1589 			 */
1590 			if (err > 0 || (err < 0 && errno == EINTR))
1591 				err = 0;
1592 			thread->waking++;
1593 
1594 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1595 					    record__thread_munmap_filtered, NULL) == 0)
1596 				break;
1597 		}
1598 
1599 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1600 			terminate = true;
1601 			close(thread->pipes.msg[0]);
1602 			thread->pipes.msg[0] = -1;
1603 			pollfd->entries[ctlfd_pos].fd = -1;
1604 			pollfd->entries[ctlfd_pos].events = 0;
1605 		}
1606 
1607 		pollfd->entries[ctlfd_pos].revents = 0;
1608 	}
1609 	record__mmap_read_all(thread->rec, true);
1610 
1611 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1612 	if (err == -1)
1613 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1614 			   thread->tid, strerror(errno));
1615 
1616 	return NULL;
1617 }
1618 
1619 static void record__init_features(struct record *rec)
1620 {
1621 	struct perf_session *session = rec->session;
1622 	int feat;
1623 
1624 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1625 		perf_header__set_feat(&session->header, feat);
1626 
1627 	if (rec->no_buildid)
1628 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1629 
1630 	if (!have_tracepoints(&rec->evlist->core.entries))
1631 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1632 
1633 	if (!rec->opts.branch_stack)
1634 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1635 
1636 	if (!rec->opts.full_auxtrace)
1637 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1638 
1639 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1640 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1641 
1642 	if (!rec->opts.use_clockid)
1643 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1644 
1645 	if (!record__threads_enabled(rec))
1646 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1647 
1648 	if (!record__comp_enabled(rec))
1649 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1650 
1651 	perf_header__clear_feat(&session->header, HEADER_STAT);
1652 }
1653 
1654 static void
1655 record__finish_output(struct record *rec)
1656 {
1657 	int i;
1658 	struct perf_data *data = &rec->data;
1659 	int fd = perf_data__fd(data);
1660 
1661 	if (data->is_pipe)
1662 		return;
1663 
1664 	rec->session->header.data_size += rec->bytes_written;
1665 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1666 	if (record__threads_enabled(rec)) {
1667 		for (i = 0; i < data->dir.nr; i++)
1668 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1669 	}
1670 
1671 	if (!rec->no_buildid) {
1672 		process_buildids(rec);
1673 
1674 		if (rec->buildid_all)
1675 			dsos__hit_all(rec->session);
1676 	}
1677 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1678 
1679 	return;
1680 }
1681 
1682 static int record__synthesize_workload(struct record *rec, bool tail)
1683 {
1684 	int err;
1685 	struct perf_thread_map *thread_map;
1686 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1687 
1688 	if (rec->opts.tail_synthesize != tail)
1689 		return 0;
1690 
1691 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1692 	if (thread_map == NULL)
1693 		return -1;
1694 
1695 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1696 						 process_synthesized_event,
1697 						 &rec->session->machines.host,
1698 						 needs_mmap,
1699 						 rec->opts.sample_address);
1700 	perf_thread_map__put(thread_map);
1701 	return err;
1702 }
1703 
1704 static int write_finished_init(struct record *rec, bool tail)
1705 {
1706 	if (rec->opts.tail_synthesize != tail)
1707 		return 0;
1708 
1709 	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1710 }
1711 
1712 static int record__synthesize(struct record *rec, bool tail);
1713 
1714 static int
1715 record__switch_output(struct record *rec, bool at_exit)
1716 {
1717 	struct perf_data *data = &rec->data;
1718 	int fd, err;
1719 	char *new_filename;
1720 
1721 	/* Same Size:      "2015122520103046"*/
1722 	char timestamp[] = "InvalidTimestamp";
1723 
1724 	record__aio_mmap_read_sync(rec);
1725 
1726 	write_finished_init(rec, true);
1727 
1728 	record__synthesize(rec, true);
1729 	if (target__none(&rec->opts.target))
1730 		record__synthesize_workload(rec, true);
1731 
1732 	rec->samples = 0;
1733 	record__finish_output(rec);
1734 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1735 	if (err) {
1736 		pr_err("Failed to get current timestamp\n");
1737 		return -EINVAL;
1738 	}
1739 
1740 	fd = perf_data__switch(data, timestamp,
1741 				    rec->session->header.data_offset,
1742 				    at_exit, &new_filename);
1743 	if (fd >= 0 && !at_exit) {
1744 		rec->bytes_written = 0;
1745 		rec->session->header.data_size = 0;
1746 	}
1747 
1748 	if (!quiet)
1749 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1750 			data->path, timestamp);
1751 
1752 	if (rec->switch_output.num_files) {
1753 		int n = rec->switch_output.cur_file + 1;
1754 
1755 		if (n >= rec->switch_output.num_files)
1756 			n = 0;
1757 		rec->switch_output.cur_file = n;
1758 		if (rec->switch_output.filenames[n]) {
1759 			remove(rec->switch_output.filenames[n]);
1760 			zfree(&rec->switch_output.filenames[n]);
1761 		}
1762 		rec->switch_output.filenames[n] = new_filename;
1763 	} else {
1764 		free(new_filename);
1765 	}
1766 
1767 	/* Output tracking events */
1768 	if (!at_exit) {
1769 		record__synthesize(rec, false);
1770 
1771 		/*
1772 		 * In 'perf record --switch-output' without -a,
1773 		 * record__synthesize() in record__switch_output() won't
1774 		 * generate tracking events because there's no thread_map
1775 		 * in evlist. Which causes newly created perf.data doesn't
1776 		 * contain map and comm information.
1777 		 * Create a fake thread_map and directly call
1778 		 * perf_event__synthesize_thread_map() for those events.
1779 		 */
1780 		if (target__none(&rec->opts.target))
1781 			record__synthesize_workload(rec, false);
1782 		write_finished_init(rec, false);
1783 	}
1784 	return fd;
1785 }
1786 
1787 static volatile int workload_exec_errno;
1788 
1789 /*
1790  * evlist__prepare_workload will send a SIGUSR1
1791  * if the fork fails, since we asked by setting its
1792  * want_signal to true.
1793  */
1794 static void workload_exec_failed_signal(int signo __maybe_unused,
1795 					siginfo_t *info,
1796 					void *ucontext __maybe_unused)
1797 {
1798 	workload_exec_errno = info->si_value.sival_int;
1799 	done = 1;
1800 	child_finished = 1;
1801 }
1802 
1803 static void snapshot_sig_handler(int sig);
1804 static void alarm_sig_handler(int sig);
1805 
1806 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1807 {
1808 	if (evlist) {
1809 		if (evlist->mmap && evlist->mmap[0].core.base)
1810 			return evlist->mmap[0].core.base;
1811 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1812 			return evlist->overwrite_mmap[0].core.base;
1813 	}
1814 	return NULL;
1815 }
1816 
1817 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1818 {
1819 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1820 	if (pc)
1821 		return pc;
1822 	return NULL;
1823 }
1824 
1825 static int record__synthesize(struct record *rec, bool tail)
1826 {
1827 	struct perf_session *session = rec->session;
1828 	struct machine *machine = &session->machines.host;
1829 	struct perf_data *data = &rec->data;
1830 	struct record_opts *opts = &rec->opts;
1831 	struct perf_tool *tool = &rec->tool;
1832 	int err = 0;
1833 	event_op f = process_synthesized_event;
1834 
1835 	if (rec->opts.tail_synthesize != tail)
1836 		return 0;
1837 
1838 	if (data->is_pipe) {
1839 		err = perf_event__synthesize_for_pipe(tool, session, data,
1840 						      process_synthesized_event);
1841 		if (err < 0)
1842 			goto out;
1843 
1844 		rec->bytes_written += err;
1845 	}
1846 
1847 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1848 					  process_synthesized_event, machine);
1849 	if (err)
1850 		goto out;
1851 
1852 	/* Synthesize id_index before auxtrace_info */
1853 	err = perf_event__synthesize_id_index(tool,
1854 					      process_synthesized_event,
1855 					      session->evlist, machine);
1856 	if (err)
1857 		goto out;
1858 
1859 	if (rec->opts.full_auxtrace) {
1860 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1861 					session, process_synthesized_event);
1862 		if (err)
1863 			goto out;
1864 	}
1865 
1866 	if (!evlist__exclude_kernel(rec->evlist)) {
1867 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1868 							 machine);
1869 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1870 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1871 				   "Check /proc/kallsyms permission or run as root.\n");
1872 
1873 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1874 						     machine);
1875 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1876 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1877 				   "Check /proc/modules permission or run as root.\n");
1878 	}
1879 
1880 	if (perf_guest) {
1881 		machines__process_guests(&session->machines,
1882 					 perf_event__synthesize_guest_os, tool);
1883 	}
1884 
1885 	err = perf_event__synthesize_extra_attr(&rec->tool,
1886 						rec->evlist,
1887 						process_synthesized_event,
1888 						data->is_pipe);
1889 	if (err)
1890 		goto out;
1891 
1892 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1893 						 process_synthesized_event,
1894 						NULL);
1895 	if (err < 0) {
1896 		pr_err("Couldn't synthesize thread map.\n");
1897 		return err;
1898 	}
1899 
1900 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
1901 					     process_synthesized_event, NULL);
1902 	if (err < 0) {
1903 		pr_err("Couldn't synthesize cpu map.\n");
1904 		return err;
1905 	}
1906 
1907 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1908 						machine, opts);
1909 	if (err < 0) {
1910 		pr_warning("Couldn't synthesize bpf events.\n");
1911 		err = 0;
1912 	}
1913 
1914 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1915 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1916 						     machine);
1917 		if (err < 0) {
1918 			pr_warning("Couldn't synthesize cgroup events.\n");
1919 			err = 0;
1920 		}
1921 	}
1922 
1923 	if (rec->opts.nr_threads_synthesize > 1) {
1924 		perf_set_multithreaded();
1925 		f = process_locked_synthesized_event;
1926 	}
1927 
1928 	if (rec->opts.synth & PERF_SYNTH_TASK) {
1929 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1930 
1931 		err = __machine__synthesize_threads(machine, tool, &opts->target,
1932 						    rec->evlist->core.threads,
1933 						    f, needs_mmap, opts->sample_address,
1934 						    rec->opts.nr_threads_synthesize);
1935 	}
1936 
1937 	if (rec->opts.nr_threads_synthesize > 1)
1938 		perf_set_singlethreaded();
1939 
1940 out:
1941 	return err;
1942 }
1943 
1944 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1945 {
1946 	struct record *rec = data;
1947 	pthread_kill(rec->thread_id, SIGUSR2);
1948 	return 0;
1949 }
1950 
1951 static int record__setup_sb_evlist(struct record *rec)
1952 {
1953 	struct record_opts *opts = &rec->opts;
1954 
1955 	if (rec->sb_evlist != NULL) {
1956 		/*
1957 		 * We get here if --switch-output-event populated the
1958 		 * sb_evlist, so associate a callback that will send a SIGUSR2
1959 		 * to the main thread.
1960 		 */
1961 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1962 		rec->thread_id = pthread_self();
1963 	}
1964 #ifdef HAVE_LIBBPF_SUPPORT
1965 	if (!opts->no_bpf_event) {
1966 		if (rec->sb_evlist == NULL) {
1967 			rec->sb_evlist = evlist__new();
1968 
1969 			if (rec->sb_evlist == NULL) {
1970 				pr_err("Couldn't create side band evlist.\n.");
1971 				return -1;
1972 			}
1973 		}
1974 
1975 		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1976 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1977 			return -1;
1978 		}
1979 	}
1980 #endif
1981 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1982 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1983 		opts->no_bpf_event = true;
1984 	}
1985 
1986 	return 0;
1987 }
1988 
1989 static int record__init_clock(struct record *rec)
1990 {
1991 	struct perf_session *session = rec->session;
1992 	struct timespec ref_clockid;
1993 	struct timeval ref_tod;
1994 	u64 ref;
1995 
1996 	if (!rec->opts.use_clockid)
1997 		return 0;
1998 
1999 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2000 		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2001 
2002 	session->header.env.clock.clockid = rec->opts.clockid;
2003 
2004 	if (gettimeofday(&ref_tod, NULL) != 0) {
2005 		pr_err("gettimeofday failed, cannot set reference time.\n");
2006 		return -1;
2007 	}
2008 
2009 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2010 		pr_err("clock_gettime failed, cannot set reference time.\n");
2011 		return -1;
2012 	}
2013 
2014 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2015 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2016 
2017 	session->header.env.clock.tod_ns = ref;
2018 
2019 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2020 	      (u64) ref_clockid.tv_nsec;
2021 
2022 	session->header.env.clock.clockid_ns = ref;
2023 	return 0;
2024 }
2025 
2026 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2027 {
2028 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2029 		trigger_hit(&auxtrace_snapshot_trigger);
2030 		auxtrace_record__snapshot_started = 1;
2031 		if (auxtrace_record__snapshot_start(rec->itr))
2032 			trigger_error(&auxtrace_snapshot_trigger);
2033 	}
2034 }
2035 
2036 static void record__uniquify_name(struct record *rec)
2037 {
2038 	struct evsel *pos;
2039 	struct evlist *evlist = rec->evlist;
2040 	char *new_name;
2041 	int ret;
2042 
2043 	if (!perf_pmu__has_hybrid())
2044 		return;
2045 
2046 	evlist__for_each_entry(evlist, pos) {
2047 		if (!evsel__is_hybrid(pos))
2048 			continue;
2049 
2050 		if (strchr(pos->name, '/'))
2051 			continue;
2052 
2053 		ret = asprintf(&new_name, "%s/%s/",
2054 			       pos->pmu_name, pos->name);
2055 		if (ret) {
2056 			free(pos->name);
2057 			pos->name = new_name;
2058 		}
2059 	}
2060 }
2061 
2062 static int record__terminate_thread(struct record_thread *thread_data)
2063 {
2064 	int err;
2065 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2066 	pid_t tid = thread_data->tid;
2067 
2068 	close(thread_data->pipes.msg[1]);
2069 	thread_data->pipes.msg[1] = -1;
2070 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2071 	if (err > 0)
2072 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2073 	else
2074 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2075 			   thread->tid, tid);
2076 
2077 	return 0;
2078 }
2079 
2080 static int record__start_threads(struct record *rec)
2081 {
2082 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2083 	struct record_thread *thread_data = rec->thread_data;
2084 	sigset_t full, mask;
2085 	pthread_t handle;
2086 	pthread_attr_t attrs;
2087 
2088 	thread = &thread_data[0];
2089 
2090 	if (!record__threads_enabled(rec))
2091 		return 0;
2092 
2093 	sigfillset(&full);
2094 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2095 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2096 		return -1;
2097 	}
2098 
2099 	pthread_attr_init(&attrs);
2100 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2101 
2102 	for (t = 1; t < nr_threads; t++) {
2103 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2104 
2105 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2106 		pthread_attr_setaffinity_np(&attrs,
2107 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2108 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2109 #endif
2110 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2111 			for (tt = 1; tt < t; tt++)
2112 				record__terminate_thread(&thread_data[t]);
2113 			pr_err("Failed to start threads: %s\n", strerror(errno));
2114 			ret = -1;
2115 			goto out_err;
2116 		}
2117 
2118 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2119 		if (err > 0)
2120 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2121 				  thread_msg_tags[msg]);
2122 		else
2123 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2124 				   thread->tid, rec->thread_data[t].tid);
2125 	}
2126 
2127 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2128 			(cpu_set_t *)thread->mask->affinity.bits);
2129 
2130 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2131 
2132 out_err:
2133 	pthread_attr_destroy(&attrs);
2134 
2135 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2136 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2137 		ret = -1;
2138 	}
2139 
2140 	return ret;
2141 }
2142 
2143 static int record__stop_threads(struct record *rec)
2144 {
2145 	int t;
2146 	struct record_thread *thread_data = rec->thread_data;
2147 
2148 	for (t = 1; t < rec->nr_threads; t++)
2149 		record__terminate_thread(&thread_data[t]);
2150 
2151 	for (t = 0; t < rec->nr_threads; t++) {
2152 		rec->samples += thread_data[t].samples;
2153 		if (!record__threads_enabled(rec))
2154 			continue;
2155 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2156 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2157 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2158 			 thread_data[t].samples, thread_data[t].waking);
2159 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2160 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2161 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2162 		else
2163 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2164 	}
2165 
2166 	return 0;
2167 }
2168 
2169 static unsigned long record__waking(struct record *rec)
2170 {
2171 	int t;
2172 	unsigned long waking = 0;
2173 	struct record_thread *thread_data = rec->thread_data;
2174 
2175 	for (t = 0; t < rec->nr_threads; t++)
2176 		waking += thread_data[t].waking;
2177 
2178 	return waking;
2179 }
2180 
2181 static int __cmd_record(struct record *rec, int argc, const char **argv)
2182 {
2183 	int err;
2184 	int status = 0;
2185 	const bool forks = argc > 0;
2186 	struct perf_tool *tool = &rec->tool;
2187 	struct record_opts *opts = &rec->opts;
2188 	struct perf_data *data = &rec->data;
2189 	struct perf_session *session;
2190 	bool disabled = false, draining = false;
2191 	int fd;
2192 	float ratio = 0;
2193 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2194 
2195 	atexit(record__sig_exit);
2196 	signal(SIGCHLD, sig_handler);
2197 	signal(SIGINT, sig_handler);
2198 	signal(SIGTERM, sig_handler);
2199 	signal(SIGSEGV, sigsegv_handler);
2200 
2201 	if (rec->opts.record_namespaces)
2202 		tool->namespace_events = true;
2203 
2204 	if (rec->opts.record_cgroup) {
2205 #ifdef HAVE_FILE_HANDLE
2206 		tool->cgroup_events = true;
2207 #else
2208 		pr_err("cgroup tracking is not supported\n");
2209 		return -1;
2210 #endif
2211 	}
2212 
2213 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2214 		signal(SIGUSR2, snapshot_sig_handler);
2215 		if (rec->opts.auxtrace_snapshot_mode)
2216 			trigger_on(&auxtrace_snapshot_trigger);
2217 		if (rec->switch_output.enabled)
2218 			trigger_on(&switch_output_trigger);
2219 	} else {
2220 		signal(SIGUSR2, SIG_IGN);
2221 	}
2222 
2223 	session = perf_session__new(data, tool);
2224 	if (IS_ERR(session)) {
2225 		pr_err("Perf session creation failed.\n");
2226 		return PTR_ERR(session);
2227 	}
2228 
2229 	if (record__threads_enabled(rec)) {
2230 		if (perf_data__is_pipe(&rec->data)) {
2231 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2232 			return -1;
2233 		}
2234 		if (rec->opts.full_auxtrace) {
2235 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2236 			return -1;
2237 		}
2238 	}
2239 
2240 	fd = perf_data__fd(data);
2241 	rec->session = session;
2242 
2243 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2244 		pr_err("Compression initialization failed.\n");
2245 		return -1;
2246 	}
2247 #ifdef HAVE_EVENTFD_SUPPORT
2248 	done_fd = eventfd(0, EFD_NONBLOCK);
2249 	if (done_fd < 0) {
2250 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2251 		status = -1;
2252 		goto out_delete_session;
2253 	}
2254 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2255 	if (err < 0) {
2256 		pr_err("Failed to add wakeup eventfd to poll list\n");
2257 		status = err;
2258 		goto out_delete_session;
2259 	}
2260 #endif // HAVE_EVENTFD_SUPPORT
2261 
2262 	session->header.env.comp_type  = PERF_COMP_ZSTD;
2263 	session->header.env.comp_level = rec->opts.comp_level;
2264 
2265 	if (rec->opts.kcore &&
2266 	    !record__kcore_readable(&session->machines.host)) {
2267 		pr_err("ERROR: kcore is not readable.\n");
2268 		return -1;
2269 	}
2270 
2271 	if (record__init_clock(rec))
2272 		return -1;
2273 
2274 	record__init_features(rec);
2275 
2276 	if (forks) {
2277 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2278 					       workload_exec_failed_signal);
2279 		if (err < 0) {
2280 			pr_err("Couldn't run the workload!\n");
2281 			status = err;
2282 			goto out_delete_session;
2283 		}
2284 	}
2285 
2286 	/*
2287 	 * If we have just single event and are sending data
2288 	 * through pipe, we need to force the ids allocation,
2289 	 * because we synthesize event name through the pipe
2290 	 * and need the id for that.
2291 	 */
2292 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2293 		rec->opts.sample_id = true;
2294 
2295 	record__uniquify_name(rec);
2296 
2297 	if (record__open(rec) != 0) {
2298 		err = -1;
2299 		goto out_free_threads;
2300 	}
2301 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2302 
2303 	if (rec->opts.kcore) {
2304 		err = record__kcore_copy(&session->machines.host, data);
2305 		if (err) {
2306 			pr_err("ERROR: Failed to copy kcore\n");
2307 			goto out_free_threads;
2308 		}
2309 	}
2310 
2311 	err = bpf__apply_obj_config();
2312 	if (err) {
2313 		char errbuf[BUFSIZ];
2314 
2315 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2316 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2317 			 errbuf);
2318 		goto out_free_threads;
2319 	}
2320 
2321 	/*
2322 	 * Normally perf_session__new would do this, but it doesn't have the
2323 	 * evlist.
2324 	 */
2325 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2326 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2327 		rec->tool.ordered_events = false;
2328 	}
2329 
2330 	if (!rec->evlist->core.nr_groups)
2331 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2332 
2333 	if (data->is_pipe) {
2334 		err = perf_header__write_pipe(fd);
2335 		if (err < 0)
2336 			goto out_free_threads;
2337 	} else {
2338 		err = perf_session__write_header(session, rec->evlist, fd, false);
2339 		if (err < 0)
2340 			goto out_free_threads;
2341 	}
2342 
2343 	err = -1;
2344 	if (!rec->no_buildid
2345 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2346 		pr_err("Couldn't generate buildids. "
2347 		       "Use --no-buildid to profile anyway.\n");
2348 		goto out_free_threads;
2349 	}
2350 
2351 	err = record__setup_sb_evlist(rec);
2352 	if (err)
2353 		goto out_free_threads;
2354 
2355 	err = record__synthesize(rec, false);
2356 	if (err < 0)
2357 		goto out_free_threads;
2358 
2359 	if (rec->realtime_prio) {
2360 		struct sched_param param;
2361 
2362 		param.sched_priority = rec->realtime_prio;
2363 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2364 			pr_err("Could not set realtime priority.\n");
2365 			err = -1;
2366 			goto out_free_threads;
2367 		}
2368 	}
2369 
2370 	if (record__start_threads(rec))
2371 		goto out_free_threads;
2372 
2373 	/*
2374 	 * When perf is starting the traced process, all the events
2375 	 * (apart from group members) have enable_on_exec=1 set,
2376 	 * so don't spoil it by prematurely enabling them.
2377 	 */
2378 	if (!target__none(&opts->target) && !opts->initial_delay)
2379 		evlist__enable(rec->evlist);
2380 
2381 	/*
2382 	 * Let the child rip
2383 	 */
2384 	if (forks) {
2385 		struct machine *machine = &session->machines.host;
2386 		union perf_event *event;
2387 		pid_t tgid;
2388 
2389 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2390 		if (event == NULL) {
2391 			err = -ENOMEM;
2392 			goto out_child;
2393 		}
2394 
2395 		/*
2396 		 * Some H/W events are generated before COMM event
2397 		 * which is emitted during exec(), so perf script
2398 		 * cannot see a correct process name for those events.
2399 		 * Synthesize COMM event to prevent it.
2400 		 */
2401 		tgid = perf_event__synthesize_comm(tool, event,
2402 						   rec->evlist->workload.pid,
2403 						   process_synthesized_event,
2404 						   machine);
2405 		free(event);
2406 
2407 		if (tgid == -1)
2408 			goto out_child;
2409 
2410 		event = malloc(sizeof(event->namespaces) +
2411 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2412 			       machine->id_hdr_size);
2413 		if (event == NULL) {
2414 			err = -ENOMEM;
2415 			goto out_child;
2416 		}
2417 
2418 		/*
2419 		 * Synthesize NAMESPACES event for the command specified.
2420 		 */
2421 		perf_event__synthesize_namespaces(tool, event,
2422 						  rec->evlist->workload.pid,
2423 						  tgid, process_synthesized_event,
2424 						  machine);
2425 		free(event);
2426 
2427 		evlist__start_workload(rec->evlist);
2428 	}
2429 
2430 	if (opts->initial_delay) {
2431 		pr_info(EVLIST_DISABLED_MSG);
2432 		if (opts->initial_delay > 0) {
2433 			usleep(opts->initial_delay * USEC_PER_MSEC);
2434 			evlist__enable(rec->evlist);
2435 			pr_info(EVLIST_ENABLED_MSG);
2436 		}
2437 	}
2438 
2439 	trigger_ready(&auxtrace_snapshot_trigger);
2440 	trigger_ready(&switch_output_trigger);
2441 	perf_hooks__invoke_record_start();
2442 
2443 	/*
2444 	 * Must write FINISHED_INIT so it will be seen after all other
2445 	 * synthesized user events, but before any regular events.
2446 	 */
2447 	err = write_finished_init(rec, false);
2448 	if (err < 0)
2449 		goto out_child;
2450 
2451 	for (;;) {
2452 		unsigned long long hits = thread->samples;
2453 
2454 		/*
2455 		 * rec->evlist->bkw_mmap_state is possible to be
2456 		 * BKW_MMAP_EMPTY here: when done == true and
2457 		 * hits != rec->samples in previous round.
2458 		 *
2459 		 * evlist__toggle_bkw_mmap ensure we never
2460 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2461 		 */
2462 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2463 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2464 
2465 		if (record__mmap_read_all(rec, false) < 0) {
2466 			trigger_error(&auxtrace_snapshot_trigger);
2467 			trigger_error(&switch_output_trigger);
2468 			err = -1;
2469 			goto out_child;
2470 		}
2471 
2472 		if (auxtrace_record__snapshot_started) {
2473 			auxtrace_record__snapshot_started = 0;
2474 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2475 				record__read_auxtrace_snapshot(rec, false);
2476 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2477 				pr_err("AUX area tracing snapshot failed\n");
2478 				err = -1;
2479 				goto out_child;
2480 			}
2481 		}
2482 
2483 		if (trigger_is_hit(&switch_output_trigger)) {
2484 			/*
2485 			 * If switch_output_trigger is hit, the data in
2486 			 * overwritable ring buffer should have been collected,
2487 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2488 			 *
2489 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2490 			 * record__mmap_read_all() didn't collect data from
2491 			 * overwritable ring buffer. Read again.
2492 			 */
2493 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2494 				continue;
2495 			trigger_ready(&switch_output_trigger);
2496 
2497 			/*
2498 			 * Reenable events in overwrite ring buffer after
2499 			 * record__mmap_read_all(): we should have collected
2500 			 * data from it.
2501 			 */
2502 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2503 
2504 			if (!quiet)
2505 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2506 					record__waking(rec));
2507 			thread->waking = 0;
2508 			fd = record__switch_output(rec, false);
2509 			if (fd < 0) {
2510 				pr_err("Failed to switch to new file\n");
2511 				trigger_error(&switch_output_trigger);
2512 				err = fd;
2513 				goto out_child;
2514 			}
2515 
2516 			/* re-arm the alarm */
2517 			if (rec->switch_output.time)
2518 				alarm(rec->switch_output.time);
2519 		}
2520 
2521 		if (hits == thread->samples) {
2522 			if (done || draining)
2523 				break;
2524 			err = fdarray__poll(&thread->pollfd, -1);
2525 			/*
2526 			 * Propagate error, only if there's any. Ignore positive
2527 			 * number of returned events and interrupt error.
2528 			 */
2529 			if (err > 0 || (err < 0 && errno == EINTR))
2530 				err = 0;
2531 			thread->waking++;
2532 
2533 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2534 					    record__thread_munmap_filtered, NULL) == 0)
2535 				draining = true;
2536 
2537 			evlist__ctlfd_update(rec->evlist,
2538 				&thread->pollfd.entries[thread->ctlfd_pos]);
2539 		}
2540 
2541 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2542 			switch (cmd) {
2543 			case EVLIST_CTL_CMD_SNAPSHOT:
2544 				hit_auxtrace_snapshot_trigger(rec);
2545 				evlist__ctlfd_ack(rec->evlist);
2546 				break;
2547 			case EVLIST_CTL_CMD_STOP:
2548 				done = 1;
2549 				break;
2550 			case EVLIST_CTL_CMD_ACK:
2551 			case EVLIST_CTL_CMD_UNSUPPORTED:
2552 			case EVLIST_CTL_CMD_ENABLE:
2553 			case EVLIST_CTL_CMD_DISABLE:
2554 			case EVLIST_CTL_CMD_EVLIST:
2555 			case EVLIST_CTL_CMD_PING:
2556 			default:
2557 				break;
2558 			}
2559 		}
2560 
2561 		/*
2562 		 * When perf is starting the traced process, at the end events
2563 		 * die with the process and we wait for that. Thus no need to
2564 		 * disable events in this case.
2565 		 */
2566 		if (done && !disabled && !target__none(&opts->target)) {
2567 			trigger_off(&auxtrace_snapshot_trigger);
2568 			evlist__disable(rec->evlist);
2569 			disabled = true;
2570 		}
2571 	}
2572 
2573 	trigger_off(&auxtrace_snapshot_trigger);
2574 	trigger_off(&switch_output_trigger);
2575 
2576 	if (opts->auxtrace_snapshot_on_exit)
2577 		record__auxtrace_snapshot_exit(rec);
2578 
2579 	if (forks && workload_exec_errno) {
2580 		char msg[STRERR_BUFSIZE], strevsels[2048];
2581 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2582 
2583 		evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2584 
2585 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2586 			strevsels, argv[0], emsg);
2587 		err = -1;
2588 		goto out_child;
2589 	}
2590 
2591 	if (!quiet)
2592 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2593 			record__waking(rec));
2594 
2595 	write_finished_init(rec, true);
2596 
2597 	if (target__none(&rec->opts.target))
2598 		record__synthesize_workload(rec, true);
2599 
2600 out_child:
2601 	record__stop_threads(rec);
2602 	record__mmap_read_all(rec, true);
2603 out_free_threads:
2604 	record__free_thread_data(rec);
2605 	evlist__finalize_ctlfd(rec->evlist);
2606 	record__aio_mmap_read_sync(rec);
2607 
2608 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2609 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2610 		session->header.env.comp_ratio = ratio + 0.5;
2611 	}
2612 
2613 	if (forks) {
2614 		int exit_status;
2615 
2616 		if (!child_finished)
2617 			kill(rec->evlist->workload.pid, SIGTERM);
2618 
2619 		wait(&exit_status);
2620 
2621 		if (err < 0)
2622 			status = err;
2623 		else if (WIFEXITED(exit_status))
2624 			status = WEXITSTATUS(exit_status);
2625 		else if (WIFSIGNALED(exit_status))
2626 			signr = WTERMSIG(exit_status);
2627 	} else
2628 		status = err;
2629 
2630 	if (rec->off_cpu)
2631 		rec->bytes_written += off_cpu_write(rec->session);
2632 
2633 	record__synthesize(rec, true);
2634 	/* this will be recalculated during process_buildids() */
2635 	rec->samples = 0;
2636 
2637 	if (!err) {
2638 		if (!rec->timestamp_filename) {
2639 			record__finish_output(rec);
2640 		} else {
2641 			fd = record__switch_output(rec, true);
2642 			if (fd < 0) {
2643 				status = fd;
2644 				goto out_delete_session;
2645 			}
2646 		}
2647 	}
2648 
2649 	perf_hooks__invoke_record_end();
2650 
2651 	if (!err && !quiet) {
2652 		char samples[128];
2653 		const char *postfix = rec->timestamp_filename ?
2654 					".<timestamp>" : "";
2655 
2656 		if (rec->samples && !rec->opts.full_auxtrace)
2657 			scnprintf(samples, sizeof(samples),
2658 				  " (%" PRIu64 " samples)", rec->samples);
2659 		else
2660 			samples[0] = '\0';
2661 
2662 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2663 			perf_data__size(data) / 1024.0 / 1024.0,
2664 			data->path, postfix, samples);
2665 		if (ratio) {
2666 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2667 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2668 					ratio);
2669 		}
2670 		fprintf(stderr, " ]\n");
2671 	}
2672 
2673 out_delete_session:
2674 #ifdef HAVE_EVENTFD_SUPPORT
2675 	if (done_fd >= 0)
2676 		close(done_fd);
2677 #endif
2678 	zstd_fini(&session->zstd_data);
2679 	perf_session__delete(session);
2680 
2681 	if (!opts->no_bpf_event)
2682 		evlist__stop_sb_thread(rec->sb_evlist);
2683 	return status;
2684 }
2685 
2686 static void callchain_debug(struct callchain_param *callchain)
2687 {
2688 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2689 
2690 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2691 
2692 	if (callchain->record_mode == CALLCHAIN_DWARF)
2693 		pr_debug("callchain: stack dump size %d\n",
2694 			 callchain->dump_size);
2695 }
2696 
2697 int record_opts__parse_callchain(struct record_opts *record,
2698 				 struct callchain_param *callchain,
2699 				 const char *arg, bool unset)
2700 {
2701 	int ret;
2702 	callchain->enabled = !unset;
2703 
2704 	/* --no-call-graph */
2705 	if (unset) {
2706 		callchain->record_mode = CALLCHAIN_NONE;
2707 		pr_debug("callchain: disabled\n");
2708 		return 0;
2709 	}
2710 
2711 	ret = parse_callchain_record_opt(arg, callchain);
2712 	if (!ret) {
2713 		/* Enable data address sampling for DWARF unwind. */
2714 		if (callchain->record_mode == CALLCHAIN_DWARF)
2715 			record->sample_address = true;
2716 		callchain_debug(callchain);
2717 	}
2718 
2719 	return ret;
2720 }
2721 
2722 int record_parse_callchain_opt(const struct option *opt,
2723 			       const char *arg,
2724 			       int unset)
2725 {
2726 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2727 }
2728 
2729 int record_callchain_opt(const struct option *opt,
2730 			 const char *arg __maybe_unused,
2731 			 int unset __maybe_unused)
2732 {
2733 	struct callchain_param *callchain = opt->value;
2734 
2735 	callchain->enabled = true;
2736 
2737 	if (callchain->record_mode == CALLCHAIN_NONE)
2738 		callchain->record_mode = CALLCHAIN_FP;
2739 
2740 	callchain_debug(callchain);
2741 	return 0;
2742 }
2743 
2744 static int perf_record_config(const char *var, const char *value, void *cb)
2745 {
2746 	struct record *rec = cb;
2747 
2748 	if (!strcmp(var, "record.build-id")) {
2749 		if (!strcmp(value, "cache"))
2750 			rec->no_buildid_cache = false;
2751 		else if (!strcmp(value, "no-cache"))
2752 			rec->no_buildid_cache = true;
2753 		else if (!strcmp(value, "skip"))
2754 			rec->no_buildid = true;
2755 		else if (!strcmp(value, "mmap"))
2756 			rec->buildid_mmap = true;
2757 		else
2758 			return -1;
2759 		return 0;
2760 	}
2761 	if (!strcmp(var, "record.call-graph")) {
2762 		var = "call-graph.record-mode";
2763 		return perf_default_config(var, value, cb);
2764 	}
2765 #ifdef HAVE_AIO_SUPPORT
2766 	if (!strcmp(var, "record.aio")) {
2767 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
2768 		if (!rec->opts.nr_cblocks)
2769 			rec->opts.nr_cblocks = nr_cblocks_default;
2770 	}
2771 #endif
2772 	if (!strcmp(var, "record.debuginfod")) {
2773 		rec->debuginfod.urls = strdup(value);
2774 		if (!rec->debuginfod.urls)
2775 			return -ENOMEM;
2776 		rec->debuginfod.set = true;
2777 	}
2778 
2779 	return 0;
2780 }
2781 
2782 
2783 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2784 {
2785 	struct record_opts *opts = (struct record_opts *)opt->value;
2786 
2787 	if (unset || !str)
2788 		return 0;
2789 
2790 	if (!strcasecmp(str, "node"))
2791 		opts->affinity = PERF_AFFINITY_NODE;
2792 	else if (!strcasecmp(str, "cpu"))
2793 		opts->affinity = PERF_AFFINITY_CPU;
2794 
2795 	return 0;
2796 }
2797 
2798 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2799 {
2800 	mask->nbits = nr_bits;
2801 	mask->bits = bitmap_zalloc(mask->nbits);
2802 	if (!mask->bits)
2803 		return -ENOMEM;
2804 
2805 	return 0;
2806 }
2807 
2808 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2809 {
2810 	bitmap_free(mask->bits);
2811 	mask->nbits = 0;
2812 }
2813 
2814 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2815 {
2816 	int ret;
2817 
2818 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2819 	if (ret) {
2820 		mask->affinity.bits = NULL;
2821 		return ret;
2822 	}
2823 
2824 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2825 	if (ret) {
2826 		record__mmap_cpu_mask_free(&mask->maps);
2827 		mask->maps.bits = NULL;
2828 	}
2829 
2830 	return ret;
2831 }
2832 
2833 static void record__thread_mask_free(struct thread_mask *mask)
2834 {
2835 	record__mmap_cpu_mask_free(&mask->maps);
2836 	record__mmap_cpu_mask_free(&mask->affinity);
2837 }
2838 
2839 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2840 {
2841 	int s;
2842 	struct record_opts *opts = opt->value;
2843 
2844 	if (unset || !str || !strlen(str)) {
2845 		opts->threads_spec = THREAD_SPEC__CPU;
2846 	} else {
2847 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
2848 			if (s == THREAD_SPEC__USER) {
2849 				opts->threads_user_spec = strdup(str);
2850 				if (!opts->threads_user_spec)
2851 					return -ENOMEM;
2852 				opts->threads_spec = THREAD_SPEC__USER;
2853 				break;
2854 			}
2855 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2856 				opts->threads_spec = s;
2857 				break;
2858 			}
2859 		}
2860 	}
2861 
2862 	if (opts->threads_spec == THREAD_SPEC__USER)
2863 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2864 	else
2865 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2866 
2867 	return 0;
2868 }
2869 
2870 static int parse_output_max_size(const struct option *opt,
2871 				 const char *str, int unset)
2872 {
2873 	unsigned long *s = (unsigned long *)opt->value;
2874 	static struct parse_tag tags_size[] = {
2875 		{ .tag  = 'B', .mult = 1       },
2876 		{ .tag  = 'K', .mult = 1 << 10 },
2877 		{ .tag  = 'M', .mult = 1 << 20 },
2878 		{ .tag  = 'G', .mult = 1 << 30 },
2879 		{ .tag  = 0 },
2880 	};
2881 	unsigned long val;
2882 
2883 	if (unset) {
2884 		*s = 0;
2885 		return 0;
2886 	}
2887 
2888 	val = parse_tag_value(str, tags_size);
2889 	if (val != (unsigned long) -1) {
2890 		*s = val;
2891 		return 0;
2892 	}
2893 
2894 	return -1;
2895 }
2896 
2897 static int record__parse_mmap_pages(const struct option *opt,
2898 				    const char *str,
2899 				    int unset __maybe_unused)
2900 {
2901 	struct record_opts *opts = opt->value;
2902 	char *s, *p;
2903 	unsigned int mmap_pages;
2904 	int ret;
2905 
2906 	if (!str)
2907 		return -EINVAL;
2908 
2909 	s = strdup(str);
2910 	if (!s)
2911 		return -ENOMEM;
2912 
2913 	p = strchr(s, ',');
2914 	if (p)
2915 		*p = '\0';
2916 
2917 	if (*s) {
2918 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2919 		if (ret)
2920 			goto out_free;
2921 		opts->mmap_pages = mmap_pages;
2922 	}
2923 
2924 	if (!p) {
2925 		ret = 0;
2926 		goto out_free;
2927 	}
2928 
2929 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2930 	if (ret)
2931 		goto out_free;
2932 
2933 	opts->auxtrace_mmap_pages = mmap_pages;
2934 
2935 out_free:
2936 	free(s);
2937 	return ret;
2938 }
2939 
2940 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2941 {
2942 }
2943 
2944 static int parse_control_option(const struct option *opt,
2945 				const char *str,
2946 				int unset __maybe_unused)
2947 {
2948 	struct record_opts *opts = opt->value;
2949 
2950 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2951 }
2952 
2953 static void switch_output_size_warn(struct record *rec)
2954 {
2955 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2956 	struct switch_output *s = &rec->switch_output;
2957 
2958 	wakeup_size /= 2;
2959 
2960 	if (s->size < wakeup_size) {
2961 		char buf[100];
2962 
2963 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2964 		pr_warning("WARNING: switch-output data size lower than "
2965 			   "wakeup kernel buffer size (%s) "
2966 			   "expect bigger perf.data sizes\n", buf);
2967 	}
2968 }
2969 
2970 static int switch_output_setup(struct record *rec)
2971 {
2972 	struct switch_output *s = &rec->switch_output;
2973 	static struct parse_tag tags_size[] = {
2974 		{ .tag  = 'B', .mult = 1       },
2975 		{ .tag  = 'K', .mult = 1 << 10 },
2976 		{ .tag  = 'M', .mult = 1 << 20 },
2977 		{ .tag  = 'G', .mult = 1 << 30 },
2978 		{ .tag  = 0 },
2979 	};
2980 	static struct parse_tag tags_time[] = {
2981 		{ .tag  = 's', .mult = 1        },
2982 		{ .tag  = 'm', .mult = 60       },
2983 		{ .tag  = 'h', .mult = 60*60    },
2984 		{ .tag  = 'd', .mult = 60*60*24 },
2985 		{ .tag  = 0 },
2986 	};
2987 	unsigned long val;
2988 
2989 	/*
2990 	 * If we're using --switch-output-events, then we imply its
2991 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2992 	 *  thread to its parent.
2993 	 */
2994 	if (rec->switch_output_event_set) {
2995 		if (record__threads_enabled(rec)) {
2996 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2997 			return 0;
2998 		}
2999 		goto do_signal;
3000 	}
3001 
3002 	if (!s->set)
3003 		return 0;
3004 
3005 	if (record__threads_enabled(rec)) {
3006 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3007 		return 0;
3008 	}
3009 
3010 	if (!strcmp(s->str, "signal")) {
3011 do_signal:
3012 		s->signal = true;
3013 		pr_debug("switch-output with SIGUSR2 signal\n");
3014 		goto enabled;
3015 	}
3016 
3017 	val = parse_tag_value(s->str, tags_size);
3018 	if (val != (unsigned long) -1) {
3019 		s->size = val;
3020 		pr_debug("switch-output with %s size threshold\n", s->str);
3021 		goto enabled;
3022 	}
3023 
3024 	val = parse_tag_value(s->str, tags_time);
3025 	if (val != (unsigned long) -1) {
3026 		s->time = val;
3027 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3028 			 s->str, s->time);
3029 		goto enabled;
3030 	}
3031 
3032 	return -1;
3033 
3034 enabled:
3035 	rec->timestamp_filename = true;
3036 	s->enabled              = true;
3037 
3038 	if (s->size && !rec->opts.no_buffering)
3039 		switch_output_size_warn(rec);
3040 
3041 	return 0;
3042 }
3043 
3044 static const char * const __record_usage[] = {
3045 	"perf record [<options>] [<command>]",
3046 	"perf record [<options>] -- <command> [<options>]",
3047 	NULL
3048 };
3049 const char * const *record_usage = __record_usage;
3050 
3051 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3052 				  struct perf_sample *sample, struct machine *machine)
3053 {
3054 	/*
3055 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3056 	 * no need to add them twice.
3057 	 */
3058 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3059 		return 0;
3060 	return perf_event__process_mmap(tool, event, sample, machine);
3061 }
3062 
3063 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3064 				   struct perf_sample *sample, struct machine *machine)
3065 {
3066 	/*
3067 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3068 	 * no need to add them twice.
3069 	 */
3070 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3071 		return 0;
3072 
3073 	return perf_event__process_mmap2(tool, event, sample, machine);
3074 }
3075 
3076 static int process_timestamp_boundary(struct perf_tool *tool,
3077 				      union perf_event *event __maybe_unused,
3078 				      struct perf_sample *sample,
3079 				      struct machine *machine __maybe_unused)
3080 {
3081 	struct record *rec = container_of(tool, struct record, tool);
3082 
3083 	set_timestamp_boundary(rec, sample->time);
3084 	return 0;
3085 }
3086 
3087 static int parse_record_synth_option(const struct option *opt,
3088 				     const char *str,
3089 				     int unset __maybe_unused)
3090 {
3091 	struct record_opts *opts = opt->value;
3092 	char *p = strdup(str);
3093 
3094 	if (p == NULL)
3095 		return -1;
3096 
3097 	opts->synth = parse_synth_opt(p);
3098 	free(p);
3099 
3100 	if (opts->synth < 0) {
3101 		pr_err("Invalid synth option: %s\n", str);
3102 		return -1;
3103 	}
3104 	return 0;
3105 }
3106 
3107 /*
3108  * XXX Ideally would be local to cmd_record() and passed to a record__new
3109  * because we need to have access to it in record__exit, that is called
3110  * after cmd_record() exits, but since record_options need to be accessible to
3111  * builtin-script, leave it here.
3112  *
3113  * At least we don't ouch it in all the other functions here directly.
3114  *
3115  * Just say no to tons of global variables, sigh.
3116  */
3117 static struct record record = {
3118 	.opts = {
3119 		.sample_time	     = true,
3120 		.mmap_pages	     = UINT_MAX,
3121 		.user_freq	     = UINT_MAX,
3122 		.user_interval	     = ULLONG_MAX,
3123 		.freq		     = 4000,
3124 		.target		     = {
3125 			.uses_mmap   = true,
3126 			.default_per_cpu = true,
3127 		},
3128 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3129 		.nr_threads_synthesize = 1,
3130 		.ctl_fd              = -1,
3131 		.ctl_fd_ack          = -1,
3132 		.synth               = PERF_SYNTH_ALL,
3133 	},
3134 	.tool = {
3135 		.sample		= process_sample_event,
3136 		.fork		= perf_event__process_fork,
3137 		.exit		= perf_event__process_exit,
3138 		.comm		= perf_event__process_comm,
3139 		.namespaces	= perf_event__process_namespaces,
3140 		.mmap		= build_id__process_mmap,
3141 		.mmap2		= build_id__process_mmap2,
3142 		.itrace_start	= process_timestamp_boundary,
3143 		.aux		= process_timestamp_boundary,
3144 		.ordered_events	= true,
3145 	},
3146 };
3147 
3148 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3149 	"\n\t\t\t\tDefault: fp";
3150 
3151 static bool dry_run;
3152 
3153 /*
3154  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3155  * with it and switch to use the library functions in perf_evlist that came
3156  * from builtin-record.c, i.e. use record_opts,
3157  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3158  * using pipes, etc.
3159  */
3160 static struct option __record_options[] = {
3161 	OPT_CALLBACK('e', "event", &record.evlist, "event",
3162 		     "event selector. use 'perf list' to list available events",
3163 		     parse_events_option),
3164 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3165 		     "event filter", parse_filter),
3166 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3167 			   NULL, "don't record events from perf itself",
3168 			   exclude_perf),
3169 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3170 		    "record events on existing process id"),
3171 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3172 		    "record events on existing thread id"),
3173 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3174 		    "collect data with this RT SCHED_FIFO priority"),
3175 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3176 		    "collect data without buffering"),
3177 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3178 		    "collect raw sample records from all opened counters"),
3179 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3180 			    "system-wide collection from all CPUs"),
3181 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3182 		    "list of cpus to monitor"),
3183 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3184 	OPT_STRING('o', "output", &record.data.path, "file",
3185 		    "output file name"),
3186 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3187 			&record.opts.no_inherit_set,
3188 			"child tasks do not inherit counters"),
3189 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3190 		    "synthesize non-sample events at the end of output"),
3191 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3192 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3193 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3194 		    "Fail if the specified frequency can't be used"),
3195 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3196 		     "profile at this frequency",
3197 		      record__parse_freq),
3198 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3199 		     "number of mmap data pages and AUX area tracing mmap pages",
3200 		     record__parse_mmap_pages),
3201 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3202 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3203 		     record__mmap_flush_parse),
3204 	OPT_BOOLEAN(0, "group", &record.opts.group,
3205 		    "put the counters into a counter group"),
3206 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3207 			   NULL, "enables call-graph recording" ,
3208 			   &record_callchain_opt),
3209 	OPT_CALLBACK(0, "call-graph", &record.opts,
3210 		     "record_mode[,record_size]", record_callchain_help,
3211 		     &record_parse_callchain_opt),
3212 	OPT_INCR('v', "verbose", &verbose,
3213 		    "be more verbose (show counter open errors, etc)"),
3214 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3215 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3216 		    "per thread counts"),
3217 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3218 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3219 		    "Record the sample physical addresses"),
3220 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3221 		    "Record the sampled data address data page size"),
3222 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3223 		    "Record the sampled code address (ip) page size"),
3224 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3225 	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3226 		    "Record the sample identifier"),
3227 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3228 			&record.opts.sample_time_set,
3229 			"Record the sample timestamps"),
3230 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3231 			"Record the sample period"),
3232 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3233 		    "don't sample"),
3234 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3235 			&record.no_buildid_cache_set,
3236 			"do not update the buildid cache"),
3237 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3238 			&record.no_buildid_set,
3239 			"do not collect buildids in perf.data"),
3240 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3241 		     "monitor event in cgroup name only",
3242 		     parse_cgroups),
3243 	OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3244 		  "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3245 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3246 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3247 		   "user to profile"),
3248 
3249 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3250 		     "branch any", "sample any taken branches",
3251 		     parse_branch_stack),
3252 
3253 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3254 		     "branch filter mask", "branch stack filter modes",
3255 		     parse_branch_stack),
3256 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3257 		    "sample by weight (on special events only)"),
3258 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3259 		    "sample transaction flags (special events only)"),
3260 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3261 		    "use per-thread mmaps"),
3262 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3263 		    "sample selected machine registers on interrupt,"
3264 		    " use '-I?' to list register names", parse_intr_regs),
3265 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3266 		    "sample selected machine registers on interrupt,"
3267 		    " use '--user-regs=?' to list register names", parse_user_regs),
3268 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3269 		    "Record running/enabled time of read (:S) events"),
3270 	OPT_CALLBACK('k', "clockid", &record.opts,
3271 	"clockid", "clockid to use for events, see clock_gettime()",
3272 	parse_clockid),
3273 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3274 			  "opts", "AUX area tracing Snapshot Mode", ""),
3275 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3276 			  "opts", "sample AUX area", ""),
3277 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3278 			"per thread proc mmap processing timeout in ms"),
3279 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3280 		    "Record namespaces events"),
3281 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3282 		    "Record cgroup events"),
3283 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3284 			&record.opts.record_switch_events_set,
3285 			"Record context switch events"),
3286 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3287 			 "Configure all used events to run in kernel space.",
3288 			 PARSE_OPT_EXCLUSIVE),
3289 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3290 			 "Configure all used events to run in user space.",
3291 			 PARSE_OPT_EXCLUSIVE),
3292 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3293 		    "collect kernel callchains"),
3294 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3295 		    "collect user callchains"),
3296 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3297 		   "clang binary to use for compiling BPF scriptlets"),
3298 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3299 		   "options passed to clang when compiling BPF scriptlets"),
3300 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3301 		   "file", "vmlinux pathname"),
3302 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3303 		    "Record build-id of all DSOs regardless of hits"),
3304 	OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3305 		    "Record build-id in map events"),
3306 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3307 		    "append timestamp to output filename"),
3308 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3309 		    "Record timestamp boundary (time of first/last samples)"),
3310 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3311 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3312 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3313 			  "signal"),
3314 	OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3315 			 "switch output event selector. use 'perf list' to list available events",
3316 			 parse_events_option_new_evlist),
3317 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3318 		   "Limit number of switch output generated files"),
3319 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3320 		    "Parse options then exit"),
3321 #ifdef HAVE_AIO_SUPPORT
3322 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3323 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3324 		     record__aio_parse),
3325 #endif
3326 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3327 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3328 		     record__parse_affinity),
3329 #ifdef HAVE_ZSTD_SUPPORT
3330 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3331 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3332 			    record__parse_comp_level),
3333 #endif
3334 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3335 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3336 	OPT_UINTEGER(0, "num-thread-synthesize",
3337 		     &record.opts.nr_threads_synthesize,
3338 		     "number of threads to run for event synthesis"),
3339 #ifdef HAVE_LIBPFM
3340 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3341 		"libpfm4 event selector. use 'perf list' to list available events",
3342 		parse_libpfm_events_option),
3343 #endif
3344 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3345 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3346 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3347 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3348 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3349 		      parse_control_option),
3350 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3351 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3352 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3353 			  &record.debuginfod.set, "debuginfod urls",
3354 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3355 			  "system"),
3356 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3357 			    "write collected trace data into several data files using parallel threads",
3358 			    record__parse_threads),
3359 	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3360 	OPT_END()
3361 };
3362 
3363 struct option *record_options = __record_options;
3364 
3365 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3366 {
3367 	struct perf_cpu cpu;
3368 	int idx;
3369 
3370 	if (cpu_map__is_dummy(cpus))
3371 		return 0;
3372 
3373 	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3374 		if (cpu.cpu == -1)
3375 			continue;
3376 		/* Return ENODEV is input cpu is greater than max cpu */
3377 		if ((unsigned long)cpu.cpu > mask->nbits)
3378 			return -ENODEV;
3379 		set_bit(cpu.cpu, mask->bits);
3380 	}
3381 
3382 	return 0;
3383 }
3384 
3385 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3386 {
3387 	struct perf_cpu_map *cpus;
3388 
3389 	cpus = perf_cpu_map__new(mask_spec);
3390 	if (!cpus)
3391 		return -ENOMEM;
3392 
3393 	bitmap_zero(mask->bits, mask->nbits);
3394 	if (record__mmap_cpu_mask_init(mask, cpus))
3395 		return -ENODEV;
3396 
3397 	perf_cpu_map__put(cpus);
3398 
3399 	return 0;
3400 }
3401 
3402 static void record__free_thread_masks(struct record *rec, int nr_threads)
3403 {
3404 	int t;
3405 
3406 	if (rec->thread_masks)
3407 		for (t = 0; t < nr_threads; t++)
3408 			record__thread_mask_free(&rec->thread_masks[t]);
3409 
3410 	zfree(&rec->thread_masks);
3411 }
3412 
3413 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3414 {
3415 	int t, ret;
3416 
3417 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3418 	if (!rec->thread_masks) {
3419 		pr_err("Failed to allocate thread masks\n");
3420 		return -ENOMEM;
3421 	}
3422 
3423 	for (t = 0; t < nr_threads; t++) {
3424 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3425 		if (ret) {
3426 			pr_err("Failed to allocate thread masks[%d]\n", t);
3427 			goto out_free;
3428 		}
3429 	}
3430 
3431 	return 0;
3432 
3433 out_free:
3434 	record__free_thread_masks(rec, nr_threads);
3435 
3436 	return ret;
3437 }
3438 
3439 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3440 {
3441 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3442 
3443 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3444 	if (ret)
3445 		return ret;
3446 
3447 	rec->nr_threads = nr_cpus;
3448 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3449 
3450 	for (t = 0; t < rec->nr_threads; t++) {
3451 		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3452 		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3453 		if (verbose) {
3454 			pr_debug("thread_masks[%d]: ", t);
3455 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3456 			pr_debug("thread_masks[%d]: ", t);
3457 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3458 		}
3459 	}
3460 
3461 	return 0;
3462 }
3463 
3464 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3465 					  const char **maps_spec, const char **affinity_spec,
3466 					  u32 nr_spec)
3467 {
3468 	u32 s;
3469 	int ret = 0, t = 0;
3470 	struct mmap_cpu_mask cpus_mask;
3471 	struct thread_mask thread_mask, full_mask, *thread_masks;
3472 
3473 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3474 	if (ret) {
3475 		pr_err("Failed to allocate CPUs mask\n");
3476 		return ret;
3477 	}
3478 
3479 	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3480 	if (ret) {
3481 		pr_err("Failed to init cpu mask\n");
3482 		goto out_free_cpu_mask;
3483 	}
3484 
3485 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3486 	if (ret) {
3487 		pr_err("Failed to allocate full mask\n");
3488 		goto out_free_cpu_mask;
3489 	}
3490 
3491 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3492 	if (ret) {
3493 		pr_err("Failed to allocate thread mask\n");
3494 		goto out_free_full_and_cpu_masks;
3495 	}
3496 
3497 	for (s = 0; s < nr_spec; s++) {
3498 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3499 		if (ret) {
3500 			pr_err("Failed to initialize maps thread mask\n");
3501 			goto out_free;
3502 		}
3503 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3504 		if (ret) {
3505 			pr_err("Failed to initialize affinity thread mask\n");
3506 			goto out_free;
3507 		}
3508 
3509 		/* ignore invalid CPUs but do not allow empty masks */
3510 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3511 				cpus_mask.bits, thread_mask.maps.nbits)) {
3512 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3513 			ret = -EINVAL;
3514 			goto out_free;
3515 		}
3516 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3517 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3518 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3519 			ret = -EINVAL;
3520 			goto out_free;
3521 		}
3522 
3523 		/* do not allow intersection with other masks (full_mask) */
3524 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3525 				      thread_mask.maps.nbits)) {
3526 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3527 			ret = -EINVAL;
3528 			goto out_free;
3529 		}
3530 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3531 				      thread_mask.affinity.nbits)) {
3532 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3533 			ret = -EINVAL;
3534 			goto out_free;
3535 		}
3536 
3537 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3538 			  thread_mask.maps.bits, full_mask.maps.nbits);
3539 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3540 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3541 
3542 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3543 		if (!thread_masks) {
3544 			pr_err("Failed to reallocate thread masks\n");
3545 			ret = -ENOMEM;
3546 			goto out_free;
3547 		}
3548 		rec->thread_masks = thread_masks;
3549 		rec->thread_masks[t] = thread_mask;
3550 		if (verbose) {
3551 			pr_debug("thread_masks[%d]: ", t);
3552 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3553 			pr_debug("thread_masks[%d]: ", t);
3554 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3555 		}
3556 		t++;
3557 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3558 		if (ret) {
3559 			pr_err("Failed to allocate thread mask\n");
3560 			goto out_free_full_and_cpu_masks;
3561 		}
3562 	}
3563 	rec->nr_threads = t;
3564 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3565 	if (!rec->nr_threads)
3566 		ret = -EINVAL;
3567 
3568 out_free:
3569 	record__thread_mask_free(&thread_mask);
3570 out_free_full_and_cpu_masks:
3571 	record__thread_mask_free(&full_mask);
3572 out_free_cpu_mask:
3573 	record__mmap_cpu_mask_free(&cpus_mask);
3574 
3575 	return ret;
3576 }
3577 
3578 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3579 {
3580 	int ret;
3581 	struct cpu_topology *topo;
3582 
3583 	topo = cpu_topology__new();
3584 	if (!topo) {
3585 		pr_err("Failed to allocate CPU topology\n");
3586 		return -ENOMEM;
3587 	}
3588 
3589 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3590 					     topo->core_cpus_list, topo->core_cpus_lists);
3591 	cpu_topology__delete(topo);
3592 
3593 	return ret;
3594 }
3595 
3596 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3597 {
3598 	int ret;
3599 	struct cpu_topology *topo;
3600 
3601 	topo = cpu_topology__new();
3602 	if (!topo) {
3603 		pr_err("Failed to allocate CPU topology\n");
3604 		return -ENOMEM;
3605 	}
3606 
3607 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3608 					     topo->package_cpus_list, topo->package_cpus_lists);
3609 	cpu_topology__delete(topo);
3610 
3611 	return ret;
3612 }
3613 
3614 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3615 {
3616 	u32 s;
3617 	int ret;
3618 	const char **spec;
3619 	struct numa_topology *topo;
3620 
3621 	topo = numa_topology__new();
3622 	if (!topo) {
3623 		pr_err("Failed to allocate NUMA topology\n");
3624 		return -ENOMEM;
3625 	}
3626 
3627 	spec = zalloc(topo->nr * sizeof(char *));
3628 	if (!spec) {
3629 		pr_err("Failed to allocate NUMA spec\n");
3630 		ret = -ENOMEM;
3631 		goto out_delete_topo;
3632 	}
3633 	for (s = 0; s < topo->nr; s++)
3634 		spec[s] = topo->nodes[s].cpus;
3635 
3636 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3637 
3638 	zfree(&spec);
3639 
3640 out_delete_topo:
3641 	numa_topology__delete(topo);
3642 
3643 	return ret;
3644 }
3645 
3646 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3647 {
3648 	int t, ret;
3649 	u32 s, nr_spec = 0;
3650 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3651 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3652 
3653 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3654 		spec = strtok_r(user_spec, ":", &spec_ptr);
3655 		if (spec == NULL)
3656 			break;
3657 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3658 		mask = strtok_r(spec, "/", &mask_ptr);
3659 		if (mask == NULL)
3660 			break;
3661 		pr_debug2("  maps mask: %s\n", mask);
3662 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3663 		if (!tmp_spec) {
3664 			pr_err("Failed to reallocate maps spec\n");
3665 			ret = -ENOMEM;
3666 			goto out_free;
3667 		}
3668 		maps_spec = tmp_spec;
3669 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3670 		if (!maps_spec[nr_spec]) {
3671 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3672 			ret = -ENOMEM;
3673 			goto out_free;
3674 		}
3675 		mask = strtok_r(NULL, "/", &mask_ptr);
3676 		if (mask == NULL) {
3677 			pr_err("Invalid thread maps or affinity specs\n");
3678 			ret = -EINVAL;
3679 			goto out_free;
3680 		}
3681 		pr_debug2("  affinity mask: %s\n", mask);
3682 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3683 		if (!tmp_spec) {
3684 			pr_err("Failed to reallocate affinity spec\n");
3685 			ret = -ENOMEM;
3686 			goto out_free;
3687 		}
3688 		affinity_spec = tmp_spec;
3689 		affinity_spec[nr_spec] = strdup(mask);
3690 		if (!affinity_spec[nr_spec]) {
3691 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3692 			ret = -ENOMEM;
3693 			goto out_free;
3694 		}
3695 		dup_mask = NULL;
3696 		nr_spec++;
3697 	}
3698 
3699 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3700 					     (const char **)affinity_spec, nr_spec);
3701 
3702 out_free:
3703 	free(dup_mask);
3704 	for (s = 0; s < nr_spec; s++) {
3705 		if (maps_spec)
3706 			free(maps_spec[s]);
3707 		if (affinity_spec)
3708 			free(affinity_spec[s]);
3709 	}
3710 	free(affinity_spec);
3711 	free(maps_spec);
3712 
3713 	return ret;
3714 }
3715 
3716 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3717 {
3718 	int ret;
3719 
3720 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3721 	if (ret)
3722 		return ret;
3723 
3724 	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3725 		return -ENODEV;
3726 
3727 	rec->nr_threads = 1;
3728 
3729 	return 0;
3730 }
3731 
3732 static int record__init_thread_masks(struct record *rec)
3733 {
3734 	int ret = 0;
3735 	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3736 
3737 	if (!record__threads_enabled(rec))
3738 		return record__init_thread_default_masks(rec, cpus);
3739 
3740 	if (evlist__per_thread(rec->evlist)) {
3741 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3742 		return -EINVAL;
3743 	}
3744 
3745 	switch (rec->opts.threads_spec) {
3746 	case THREAD_SPEC__CPU:
3747 		ret = record__init_thread_cpu_masks(rec, cpus);
3748 		break;
3749 	case THREAD_SPEC__CORE:
3750 		ret = record__init_thread_core_masks(rec, cpus);
3751 		break;
3752 	case THREAD_SPEC__PACKAGE:
3753 		ret = record__init_thread_package_masks(rec, cpus);
3754 		break;
3755 	case THREAD_SPEC__NUMA:
3756 		ret = record__init_thread_numa_masks(rec, cpus);
3757 		break;
3758 	case THREAD_SPEC__USER:
3759 		ret = record__init_thread_user_masks(rec, cpus);
3760 		break;
3761 	default:
3762 		break;
3763 	}
3764 
3765 	return ret;
3766 }
3767 
3768 int cmd_record(int argc, const char **argv)
3769 {
3770 	int err;
3771 	struct record *rec = &record;
3772 	char errbuf[BUFSIZ];
3773 
3774 	setlocale(LC_ALL, "");
3775 
3776 #ifndef HAVE_LIBBPF_SUPPORT
3777 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3778 	set_nobuild('\0', "clang-path", true);
3779 	set_nobuild('\0', "clang-opt", true);
3780 # undef set_nobuild
3781 #endif
3782 
3783 #ifndef HAVE_BPF_PROLOGUE
3784 # if !defined (HAVE_DWARF_SUPPORT)
3785 #  define REASON  "NO_DWARF=1"
3786 # elif !defined (HAVE_LIBBPF_SUPPORT)
3787 #  define REASON  "NO_LIBBPF=1"
3788 # else
3789 #  define REASON  "this architecture doesn't support BPF prologue"
3790 # endif
3791 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3792 	set_nobuild('\0', "vmlinux", true);
3793 # undef set_nobuild
3794 # undef REASON
3795 #endif
3796 
3797 #ifndef HAVE_BPF_SKEL
3798 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3799 	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3800 # undef set_nobuild
3801 #endif
3802 
3803 	rec->opts.affinity = PERF_AFFINITY_SYS;
3804 
3805 	rec->evlist = evlist__new();
3806 	if (rec->evlist == NULL)
3807 		return -ENOMEM;
3808 
3809 	err = perf_config(perf_record_config, rec);
3810 	if (err)
3811 		return err;
3812 
3813 	argc = parse_options(argc, argv, record_options, record_usage,
3814 			    PARSE_OPT_STOP_AT_NON_OPTION);
3815 	if (quiet)
3816 		perf_quiet_option();
3817 
3818 	err = symbol__validate_sym_arguments();
3819 	if (err)
3820 		return err;
3821 
3822 	perf_debuginfod_setup(&record.debuginfod);
3823 
3824 	/* Make system wide (-a) the default target. */
3825 	if (!argc && target__none(&rec->opts.target))
3826 		rec->opts.target.system_wide = true;
3827 
3828 	if (nr_cgroups && !rec->opts.target.system_wide) {
3829 		usage_with_options_msg(record_usage, record_options,
3830 			"cgroup monitoring only available in system-wide mode");
3831 
3832 	}
3833 
3834 	if (rec->buildid_mmap) {
3835 		if (!perf_can_record_build_id()) {
3836 			pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3837 			err = -EINVAL;
3838 			goto out_opts;
3839 		}
3840 		pr_debug("Enabling build id in mmap2 events.\n");
3841 		/* Enable mmap build id synthesizing. */
3842 		symbol_conf.buildid_mmap2 = true;
3843 		/* Enable perf_event_attr::build_id bit. */
3844 		rec->opts.build_id = true;
3845 		/* Disable build id cache. */
3846 		rec->no_buildid = true;
3847 	}
3848 
3849 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3850 		pr_err("Kernel has no cgroup sampling support.\n");
3851 		err = -EINVAL;
3852 		goto out_opts;
3853 	}
3854 
3855 	if (rec->opts.kcore)
3856 		rec->opts.text_poke = true;
3857 
3858 	if (rec->opts.kcore || record__threads_enabled(rec))
3859 		rec->data.is_dir = true;
3860 
3861 	if (record__threads_enabled(rec)) {
3862 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3863 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3864 			goto out_opts;
3865 		}
3866 		if (record__aio_enabled(rec)) {
3867 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3868 			goto out_opts;
3869 		}
3870 	}
3871 
3872 	if (rec->opts.comp_level != 0) {
3873 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3874 		rec->no_buildid = true;
3875 	}
3876 
3877 	if (rec->opts.record_switch_events &&
3878 	    !perf_can_record_switch_events()) {
3879 		ui__error("kernel does not support recording context switch events\n");
3880 		parse_options_usage(record_usage, record_options, "switch-events", 0);
3881 		err = -EINVAL;
3882 		goto out_opts;
3883 	}
3884 
3885 	if (switch_output_setup(rec)) {
3886 		parse_options_usage(record_usage, record_options, "switch-output", 0);
3887 		err = -EINVAL;
3888 		goto out_opts;
3889 	}
3890 
3891 	if (rec->switch_output.time) {
3892 		signal(SIGALRM, alarm_sig_handler);
3893 		alarm(rec->switch_output.time);
3894 	}
3895 
3896 	if (rec->switch_output.num_files) {
3897 		rec->switch_output.filenames = calloc(sizeof(char *),
3898 						      rec->switch_output.num_files);
3899 		if (!rec->switch_output.filenames) {
3900 			err = -EINVAL;
3901 			goto out_opts;
3902 		}
3903 	}
3904 
3905 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
3906 		rec->timestamp_filename = false;
3907 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3908 	}
3909 
3910 	/*
3911 	 * Allow aliases to facilitate the lookup of symbols for address
3912 	 * filters. Refer to auxtrace_parse_filters().
3913 	 */
3914 	symbol_conf.allow_aliases = true;
3915 
3916 	symbol__init(NULL);
3917 
3918 	err = record__auxtrace_init(rec);
3919 	if (err)
3920 		goto out;
3921 
3922 	if (dry_run)
3923 		goto out;
3924 
3925 	err = bpf__setup_stdout(rec->evlist);
3926 	if (err) {
3927 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3928 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
3929 			 errbuf);
3930 		goto out;
3931 	}
3932 
3933 	err = -ENOMEM;
3934 
3935 	if (rec->no_buildid_cache || rec->no_buildid) {
3936 		disable_buildid_cache();
3937 	} else if (rec->switch_output.enabled) {
3938 		/*
3939 		 * In 'perf record --switch-output', disable buildid
3940 		 * generation by default to reduce data file switching
3941 		 * overhead. Still generate buildid if they are required
3942 		 * explicitly using
3943 		 *
3944 		 *  perf record --switch-output --no-no-buildid \
3945 		 *              --no-no-buildid-cache
3946 		 *
3947 		 * Following code equals to:
3948 		 *
3949 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
3950 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3951 		 *         disable_buildid_cache();
3952 		 */
3953 		bool disable = true;
3954 
3955 		if (rec->no_buildid_set && !rec->no_buildid)
3956 			disable = false;
3957 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3958 			disable = false;
3959 		if (disable) {
3960 			rec->no_buildid = true;
3961 			rec->no_buildid_cache = true;
3962 			disable_buildid_cache();
3963 		}
3964 	}
3965 
3966 	if (record.opts.overwrite)
3967 		record.opts.tail_synthesize = true;
3968 
3969 	if (rec->evlist->core.nr_entries == 0) {
3970 		if (perf_pmu__has_hybrid()) {
3971 			err = evlist__add_default_hybrid(rec->evlist,
3972 							 !record.opts.no_samples);
3973 		} else {
3974 			err = __evlist__add_default(rec->evlist,
3975 						    !record.opts.no_samples);
3976 		}
3977 
3978 		if (err < 0) {
3979 			pr_err("Not enough memory for event selector list\n");
3980 			goto out;
3981 		}
3982 	}
3983 
3984 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3985 		rec->opts.no_inherit = true;
3986 
3987 	err = target__validate(&rec->opts.target);
3988 	if (err) {
3989 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3990 		ui__warning("%s\n", errbuf);
3991 	}
3992 
3993 	err = target__parse_uid(&rec->opts.target);
3994 	if (err) {
3995 		int saved_errno = errno;
3996 
3997 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3998 		ui__error("%s", errbuf);
3999 
4000 		err = -saved_errno;
4001 		goto out;
4002 	}
4003 
4004 	/* Enable ignoring missing threads when -u/-p option is defined. */
4005 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4006 
4007 	if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
4008 		pr_err("failed to use cpu list %s\n",
4009 		       rec->opts.target.cpu_list);
4010 		goto out;
4011 	}
4012 
4013 	rec->opts.target.hybrid = perf_pmu__has_hybrid();
4014 
4015 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4016 		arch__add_leaf_frame_record_opts(&rec->opts);
4017 
4018 	err = -ENOMEM;
4019 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4020 		if (rec->opts.target.pid != NULL) {
4021 			pr_err("Couldn't create thread/CPU maps: %s\n",
4022 				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4023 			goto out;
4024 		}
4025 		else
4026 			usage_with_options(record_usage, record_options);
4027 	}
4028 
4029 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4030 	if (err)
4031 		goto out;
4032 
4033 	/*
4034 	 * We take all buildids when the file contains
4035 	 * AUX area tracing data because we do not decode the
4036 	 * trace because it would take too long.
4037 	 */
4038 	if (rec->opts.full_auxtrace)
4039 		rec->buildid_all = true;
4040 
4041 	if (rec->opts.text_poke) {
4042 		err = record__config_text_poke(rec->evlist);
4043 		if (err) {
4044 			pr_err("record__config_text_poke failed, error %d\n", err);
4045 			goto out;
4046 		}
4047 	}
4048 
4049 	if (rec->off_cpu) {
4050 		err = record__config_off_cpu(rec);
4051 		if (err) {
4052 			pr_err("record__config_off_cpu failed, error %d\n", err);
4053 			goto out;
4054 		}
4055 	}
4056 
4057 	if (record_opts__config(&rec->opts)) {
4058 		err = -EINVAL;
4059 		goto out;
4060 	}
4061 
4062 	err = record__init_thread_masks(rec);
4063 	if (err) {
4064 		pr_err("Failed to initialize parallel data streaming masks\n");
4065 		goto out;
4066 	}
4067 
4068 	if (rec->opts.nr_cblocks > nr_cblocks_max)
4069 		rec->opts.nr_cblocks = nr_cblocks_max;
4070 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4071 
4072 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4073 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4074 
4075 	if (rec->opts.comp_level > comp_level_max)
4076 		rec->opts.comp_level = comp_level_max;
4077 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4078 
4079 	err = __cmd_record(&record, argc, argv);
4080 out:
4081 	evlist__delete(rec->evlist);
4082 	symbol__exit();
4083 	auxtrace_record__free(rec->itr);
4084 out_opts:
4085 	record__free_thread_masks(rec, rec->nr_threads);
4086 	rec->nr_threads = 0;
4087 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4088 	return err;
4089 }
4090 
4091 static void snapshot_sig_handler(int sig __maybe_unused)
4092 {
4093 	struct record *rec = &record;
4094 
4095 	hit_auxtrace_snapshot_trigger(rec);
4096 
4097 	if (switch_output_signal(rec))
4098 		trigger_hit(&switch_output_trigger);
4099 }
4100 
4101 static void alarm_sig_handler(int sig __maybe_unused)
4102 {
4103 	struct record *rec = &record;
4104 
4105 	if (switch_output_time(rec))
4106 		trigger_hit(&switch_output_trigger);
4107 }
4108