xref: /openbmc/linux/tools/perf/builtin-record.c (revision d63670d2)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
52 #include "asm/bug.h"
53 #include "perf.h"
54 #include "cputopo.h"
55 
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <locale.h>
59 #include <poll.h>
60 #include <pthread.h>
61 #include <unistd.h>
62 #ifndef HAVE_GETTID
63 #include <syscall.h>
64 #endif
65 #include <sched.h>
66 #include <signal.h>
67 #ifdef HAVE_EVENTFD_SUPPORT
68 #include <sys/eventfd.h>
69 #endif
70 #include <sys/mman.h>
71 #include <sys/wait.h>
72 #include <sys/types.h>
73 #include <sys/stat.h>
74 #include <fcntl.h>
75 #include <linux/err.h>
76 #include <linux/string.h>
77 #include <linux/time64.h>
78 #include <linux/zalloc.h>
79 #include <linux/bitmap.h>
80 #include <sys/time.h>
81 
82 struct switch_output {
83 	bool		 enabled;
84 	bool		 signal;
85 	unsigned long	 size;
86 	unsigned long	 time;
87 	const char	*str;
88 	bool		 set;
89 	char		 **filenames;
90 	int		 num_files;
91 	int		 cur_file;
92 };
93 
94 struct thread_mask {
95 	struct mmap_cpu_mask	maps;
96 	struct mmap_cpu_mask	affinity;
97 };
98 
99 struct record_thread {
100 	pid_t			tid;
101 	struct thread_mask	*mask;
102 	struct {
103 		int		msg[2];
104 		int		ack[2];
105 	} pipes;
106 	struct fdarray		pollfd;
107 	int			ctlfd_pos;
108 	int			nr_mmaps;
109 	struct mmap		**maps;
110 	struct mmap		**overwrite_maps;
111 	struct record		*rec;
112 	unsigned long long	samples;
113 	unsigned long		waking;
114 	u64			bytes_written;
115 	u64			bytes_transferred;
116 	u64			bytes_compressed;
117 };
118 
119 static __thread struct record_thread *thread;
120 
121 enum thread_msg {
122 	THREAD_MSG__UNDEFINED = 0,
123 	THREAD_MSG__READY,
124 	THREAD_MSG__MAX,
125 };
126 
127 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
128 	"UNDEFINED", "READY"
129 };
130 
131 enum thread_spec {
132 	THREAD_SPEC__UNDEFINED = 0,
133 	THREAD_SPEC__CPU,
134 	THREAD_SPEC__CORE,
135 	THREAD_SPEC__PACKAGE,
136 	THREAD_SPEC__NUMA,
137 	THREAD_SPEC__USER,
138 	THREAD_SPEC__MAX,
139 };
140 
141 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
142 	"undefined", "cpu", "core", "package", "numa", "user"
143 };
144 
145 struct record {
146 	struct perf_tool	tool;
147 	struct record_opts	opts;
148 	u64			bytes_written;
149 	struct perf_data	data;
150 	struct auxtrace_record	*itr;
151 	struct evlist	*evlist;
152 	struct perf_session	*session;
153 	struct evlist		*sb_evlist;
154 	pthread_t		thread_id;
155 	int			realtime_prio;
156 	bool			switch_output_event_set;
157 	bool			no_buildid;
158 	bool			no_buildid_set;
159 	bool			no_buildid_cache;
160 	bool			no_buildid_cache_set;
161 	bool			buildid_all;
162 	bool			buildid_mmap;
163 	bool			timestamp_filename;
164 	bool			timestamp_boundary;
165 	struct switch_output	switch_output;
166 	unsigned long long	samples;
167 	unsigned long		output_max_size;	/* = 0: unlimited */
168 	struct perf_debuginfod	debuginfod;
169 	int			nr_threads;
170 	struct thread_mask	*thread_masks;
171 	struct record_thread	*thread_data;
172 };
173 
174 static volatile int done;
175 
176 static volatile int auxtrace_record__snapshot_started;
177 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
178 static DEFINE_TRIGGER(switch_output_trigger);
179 
180 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
181 	"SYS", "NODE", "CPU"
182 };
183 
184 #ifndef HAVE_GETTID
185 static inline pid_t gettid(void)
186 {
187 	return (pid_t)syscall(__NR_gettid);
188 }
189 #endif
190 
191 static int record__threads_enabled(struct record *rec)
192 {
193 	return rec->opts.threads_spec;
194 }
195 
196 static bool switch_output_signal(struct record *rec)
197 {
198 	return rec->switch_output.signal &&
199 	       trigger_is_ready(&switch_output_trigger);
200 }
201 
202 static bool switch_output_size(struct record *rec)
203 {
204 	return rec->switch_output.size &&
205 	       trigger_is_ready(&switch_output_trigger) &&
206 	       (rec->bytes_written >= rec->switch_output.size);
207 }
208 
209 static bool switch_output_time(struct record *rec)
210 {
211 	return rec->switch_output.time &&
212 	       trigger_is_ready(&switch_output_trigger);
213 }
214 
215 static u64 record__bytes_written(struct record *rec)
216 {
217 	int t;
218 	u64 bytes_written = rec->bytes_written;
219 	struct record_thread *thread_data = rec->thread_data;
220 
221 	for (t = 0; t < rec->nr_threads; t++)
222 		bytes_written += thread_data[t].bytes_written;
223 
224 	return bytes_written;
225 }
226 
227 static bool record__output_max_size_exceeded(struct record *rec)
228 {
229 	return rec->output_max_size &&
230 	       (record__bytes_written(rec) >= rec->output_max_size);
231 }
232 
233 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
234 			 void *bf, size_t size)
235 {
236 	struct perf_data_file *file = &rec->session->data->file;
237 
238 	if (map && map->file)
239 		file = map->file;
240 
241 	if (perf_data_file__write(file, bf, size) < 0) {
242 		pr_err("failed to write perf data, error: %m\n");
243 		return -1;
244 	}
245 
246 	if (map && map->file)
247 		thread->bytes_written += size;
248 	else
249 		rec->bytes_written += size;
250 
251 	if (record__output_max_size_exceeded(rec) && !done) {
252 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
253 				" stopping session ]\n",
254 				record__bytes_written(rec) >> 10);
255 		done = 1;
256 	}
257 
258 	if (switch_output_size(rec))
259 		trigger_hit(&switch_output_trigger);
260 
261 	return 0;
262 }
263 
264 static int record__aio_enabled(struct record *rec);
265 static int record__comp_enabled(struct record *rec);
266 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
267 			    void *dst, size_t dst_size, void *src, size_t src_size);
268 
269 #ifdef HAVE_AIO_SUPPORT
270 static int record__aio_write(struct aiocb *cblock, int trace_fd,
271 		void *buf, size_t size, off_t off)
272 {
273 	int rc;
274 
275 	cblock->aio_fildes = trace_fd;
276 	cblock->aio_buf    = buf;
277 	cblock->aio_nbytes = size;
278 	cblock->aio_offset = off;
279 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
280 
281 	do {
282 		rc = aio_write(cblock);
283 		if (rc == 0) {
284 			break;
285 		} else if (errno != EAGAIN) {
286 			cblock->aio_fildes = -1;
287 			pr_err("failed to queue perf data, error: %m\n");
288 			break;
289 		}
290 	} while (1);
291 
292 	return rc;
293 }
294 
295 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
296 {
297 	void *rem_buf;
298 	off_t rem_off;
299 	size_t rem_size;
300 	int rc, aio_errno;
301 	ssize_t aio_ret, written;
302 
303 	aio_errno = aio_error(cblock);
304 	if (aio_errno == EINPROGRESS)
305 		return 0;
306 
307 	written = aio_ret = aio_return(cblock);
308 	if (aio_ret < 0) {
309 		if (aio_errno != EINTR)
310 			pr_err("failed to write perf data, error: %m\n");
311 		written = 0;
312 	}
313 
314 	rem_size = cblock->aio_nbytes - written;
315 
316 	if (rem_size == 0) {
317 		cblock->aio_fildes = -1;
318 		/*
319 		 * md->refcount is incremented in record__aio_pushfn() for
320 		 * every aio write request started in record__aio_push() so
321 		 * decrement it because the request is now complete.
322 		 */
323 		perf_mmap__put(&md->core);
324 		rc = 1;
325 	} else {
326 		/*
327 		 * aio write request may require restart with the
328 		 * reminder if the kernel didn't write whole
329 		 * chunk at once.
330 		 */
331 		rem_off = cblock->aio_offset + written;
332 		rem_buf = (void *)(cblock->aio_buf + written);
333 		record__aio_write(cblock, cblock->aio_fildes,
334 				rem_buf, rem_size, rem_off);
335 		rc = 0;
336 	}
337 
338 	return rc;
339 }
340 
341 static int record__aio_sync(struct mmap *md, bool sync_all)
342 {
343 	struct aiocb **aiocb = md->aio.aiocb;
344 	struct aiocb *cblocks = md->aio.cblocks;
345 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
346 	int i, do_suspend;
347 
348 	do {
349 		do_suspend = 0;
350 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
351 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
352 				if (sync_all)
353 					aiocb[i] = NULL;
354 				else
355 					return i;
356 			} else {
357 				/*
358 				 * Started aio write is not complete yet
359 				 * so it has to be waited before the
360 				 * next allocation.
361 				 */
362 				aiocb[i] = &cblocks[i];
363 				do_suspend = 1;
364 			}
365 		}
366 		if (!do_suspend)
367 			return -1;
368 
369 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
370 			if (!(errno == EAGAIN || errno == EINTR))
371 				pr_err("failed to sync perf data, error: %m\n");
372 		}
373 	} while (1);
374 }
375 
376 struct record_aio {
377 	struct record	*rec;
378 	void		*data;
379 	size_t		size;
380 };
381 
382 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
383 {
384 	struct record_aio *aio = to;
385 
386 	/*
387 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
388 	 * to release space in the kernel buffer as fast as possible, calling
389 	 * perf_mmap__consume() from perf_mmap__push() function.
390 	 *
391 	 * That lets the kernel to proceed with storing more profiling data into
392 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
393 	 *
394 	 * Coping can be done in two steps in case the chunk of profiling data
395 	 * crosses the upper bound of the kernel buffer. In this case we first move
396 	 * part of data from map->start till the upper bound and then the reminder
397 	 * from the beginning of the kernel buffer till the end of the data chunk.
398 	 */
399 
400 	if (record__comp_enabled(aio->rec)) {
401 		size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
402 				     mmap__mmap_len(map) - aio->size,
403 				     buf, size);
404 	} else {
405 		memcpy(aio->data + aio->size, buf, size);
406 	}
407 
408 	if (!aio->size) {
409 		/*
410 		 * Increment map->refcount to guard map->aio.data[] buffer
411 		 * from premature deallocation because map object can be
412 		 * released earlier than aio write request started on
413 		 * map->aio.data[] buffer is complete.
414 		 *
415 		 * perf_mmap__put() is done at record__aio_complete()
416 		 * after started aio request completion or at record__aio_push()
417 		 * if the request failed to start.
418 		 */
419 		perf_mmap__get(&map->core);
420 	}
421 
422 	aio->size += size;
423 
424 	return size;
425 }
426 
427 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
428 {
429 	int ret, idx;
430 	int trace_fd = rec->session->data->file.fd;
431 	struct record_aio aio = { .rec = rec, .size = 0 };
432 
433 	/*
434 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
435 	 * becomes available after previous aio write operation.
436 	 */
437 
438 	idx = record__aio_sync(map, false);
439 	aio.data = map->aio.data[idx];
440 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
441 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
442 		return ret;
443 
444 	rec->samples++;
445 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
446 	if (!ret) {
447 		*off += aio.size;
448 		rec->bytes_written += aio.size;
449 		if (switch_output_size(rec))
450 			trigger_hit(&switch_output_trigger);
451 	} else {
452 		/*
453 		 * Decrement map->refcount incremented in record__aio_pushfn()
454 		 * back if record__aio_write() operation failed to start, otherwise
455 		 * map->refcount is decremented in record__aio_complete() after
456 		 * aio write operation finishes successfully.
457 		 */
458 		perf_mmap__put(&map->core);
459 	}
460 
461 	return ret;
462 }
463 
464 static off_t record__aio_get_pos(int trace_fd)
465 {
466 	return lseek(trace_fd, 0, SEEK_CUR);
467 }
468 
469 static void record__aio_set_pos(int trace_fd, off_t pos)
470 {
471 	lseek(trace_fd, pos, SEEK_SET);
472 }
473 
474 static void record__aio_mmap_read_sync(struct record *rec)
475 {
476 	int i;
477 	struct evlist *evlist = rec->evlist;
478 	struct mmap *maps = evlist->mmap;
479 
480 	if (!record__aio_enabled(rec))
481 		return;
482 
483 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
484 		struct mmap *map = &maps[i];
485 
486 		if (map->core.base)
487 			record__aio_sync(map, true);
488 	}
489 }
490 
491 static int nr_cblocks_default = 1;
492 static int nr_cblocks_max = 4;
493 
494 static int record__aio_parse(const struct option *opt,
495 			     const char *str,
496 			     int unset)
497 {
498 	struct record_opts *opts = (struct record_opts *)opt->value;
499 
500 	if (unset) {
501 		opts->nr_cblocks = 0;
502 	} else {
503 		if (str)
504 			opts->nr_cblocks = strtol(str, NULL, 0);
505 		if (!opts->nr_cblocks)
506 			opts->nr_cblocks = nr_cblocks_default;
507 	}
508 
509 	return 0;
510 }
511 #else /* HAVE_AIO_SUPPORT */
512 static int nr_cblocks_max = 0;
513 
514 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
515 			    off_t *off __maybe_unused)
516 {
517 	return -1;
518 }
519 
520 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
521 {
522 	return -1;
523 }
524 
525 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
526 {
527 }
528 
529 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
530 {
531 }
532 #endif
533 
534 static int record__aio_enabled(struct record *rec)
535 {
536 	return rec->opts.nr_cblocks > 0;
537 }
538 
539 #define MMAP_FLUSH_DEFAULT 1
540 static int record__mmap_flush_parse(const struct option *opt,
541 				    const char *str,
542 				    int unset)
543 {
544 	int flush_max;
545 	struct record_opts *opts = (struct record_opts *)opt->value;
546 	static struct parse_tag tags[] = {
547 			{ .tag  = 'B', .mult = 1       },
548 			{ .tag  = 'K', .mult = 1 << 10 },
549 			{ .tag  = 'M', .mult = 1 << 20 },
550 			{ .tag  = 'G', .mult = 1 << 30 },
551 			{ .tag  = 0 },
552 	};
553 
554 	if (unset)
555 		return 0;
556 
557 	if (str) {
558 		opts->mmap_flush = parse_tag_value(str, tags);
559 		if (opts->mmap_flush == (int)-1)
560 			opts->mmap_flush = strtol(str, NULL, 0);
561 	}
562 
563 	if (!opts->mmap_flush)
564 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
565 
566 	flush_max = evlist__mmap_size(opts->mmap_pages);
567 	flush_max /= 4;
568 	if (opts->mmap_flush > flush_max)
569 		opts->mmap_flush = flush_max;
570 
571 	return 0;
572 }
573 
574 #ifdef HAVE_ZSTD_SUPPORT
575 static unsigned int comp_level_default = 1;
576 
577 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
578 {
579 	struct record_opts *opts = opt->value;
580 
581 	if (unset) {
582 		opts->comp_level = 0;
583 	} else {
584 		if (str)
585 			opts->comp_level = strtol(str, NULL, 0);
586 		if (!opts->comp_level)
587 			opts->comp_level = comp_level_default;
588 	}
589 
590 	return 0;
591 }
592 #endif
593 static unsigned int comp_level_max = 22;
594 
595 static int record__comp_enabled(struct record *rec)
596 {
597 	return rec->opts.comp_level > 0;
598 }
599 
600 static int process_synthesized_event(struct perf_tool *tool,
601 				     union perf_event *event,
602 				     struct perf_sample *sample __maybe_unused,
603 				     struct machine *machine __maybe_unused)
604 {
605 	struct record *rec = container_of(tool, struct record, tool);
606 	return record__write(rec, NULL, event, event->header.size);
607 }
608 
609 static int process_locked_synthesized_event(struct perf_tool *tool,
610 				     union perf_event *event,
611 				     struct perf_sample *sample __maybe_unused,
612 				     struct machine *machine __maybe_unused)
613 {
614 	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
615 	int ret;
616 
617 	pthread_mutex_lock(&synth_lock);
618 	ret = process_synthesized_event(tool, event, sample, machine);
619 	pthread_mutex_unlock(&synth_lock);
620 	return ret;
621 }
622 
623 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
624 {
625 	struct record *rec = to;
626 
627 	if (record__comp_enabled(rec)) {
628 		size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
629 		bf   = map->data;
630 	}
631 
632 	thread->samples++;
633 	return record__write(rec, map, bf, size);
634 }
635 
636 static volatile int signr = -1;
637 static volatile int child_finished;
638 #ifdef HAVE_EVENTFD_SUPPORT
639 static int done_fd = -1;
640 #endif
641 
642 static void sig_handler(int sig)
643 {
644 	if (sig == SIGCHLD)
645 		child_finished = 1;
646 	else
647 		signr = sig;
648 
649 	done = 1;
650 #ifdef HAVE_EVENTFD_SUPPORT
651 {
652 	u64 tmp = 1;
653 	/*
654 	 * It is possible for this signal handler to run after done is checked
655 	 * in the main loop, but before the perf counter fds are polled. If this
656 	 * happens, the poll() will continue to wait even though done is set,
657 	 * and will only break out if either another signal is received, or the
658 	 * counters are ready for read. To ensure the poll() doesn't sleep when
659 	 * done is set, use an eventfd (done_fd) to wake up the poll().
660 	 */
661 	if (write(done_fd, &tmp, sizeof(tmp)) < 0)
662 		pr_err("failed to signal wakeup fd, error: %m\n");
663 }
664 #endif // HAVE_EVENTFD_SUPPORT
665 }
666 
667 static void sigsegv_handler(int sig)
668 {
669 	perf_hooks__recover();
670 	sighandler_dump_stack(sig);
671 }
672 
673 static void record__sig_exit(void)
674 {
675 	if (signr == -1)
676 		return;
677 
678 	signal(signr, SIG_DFL);
679 	raise(signr);
680 }
681 
682 #ifdef HAVE_AUXTRACE_SUPPORT
683 
684 static int record__process_auxtrace(struct perf_tool *tool,
685 				    struct mmap *map,
686 				    union perf_event *event, void *data1,
687 				    size_t len1, void *data2, size_t len2)
688 {
689 	struct record *rec = container_of(tool, struct record, tool);
690 	struct perf_data *data = &rec->data;
691 	size_t padding;
692 	u8 pad[8] = {0};
693 
694 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
695 		off_t file_offset;
696 		int fd = perf_data__fd(data);
697 		int err;
698 
699 		file_offset = lseek(fd, 0, SEEK_CUR);
700 		if (file_offset == -1)
701 			return -1;
702 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
703 						     event, file_offset);
704 		if (err)
705 			return err;
706 	}
707 
708 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
709 	padding = (len1 + len2) & 7;
710 	if (padding)
711 		padding = 8 - padding;
712 
713 	record__write(rec, map, event, event->header.size);
714 	record__write(rec, map, data1, len1);
715 	if (len2)
716 		record__write(rec, map, data2, len2);
717 	record__write(rec, map, &pad, padding);
718 
719 	return 0;
720 }
721 
722 static int record__auxtrace_mmap_read(struct record *rec,
723 				      struct mmap *map)
724 {
725 	int ret;
726 
727 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
728 				  record__process_auxtrace);
729 	if (ret < 0)
730 		return ret;
731 
732 	if (ret)
733 		rec->samples++;
734 
735 	return 0;
736 }
737 
738 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
739 					       struct mmap *map)
740 {
741 	int ret;
742 
743 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
744 					   record__process_auxtrace,
745 					   rec->opts.auxtrace_snapshot_size);
746 	if (ret < 0)
747 		return ret;
748 
749 	if (ret)
750 		rec->samples++;
751 
752 	return 0;
753 }
754 
755 static int record__auxtrace_read_snapshot_all(struct record *rec)
756 {
757 	int i;
758 	int rc = 0;
759 
760 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
761 		struct mmap *map = &rec->evlist->mmap[i];
762 
763 		if (!map->auxtrace_mmap.base)
764 			continue;
765 
766 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
767 			rc = -1;
768 			goto out;
769 		}
770 	}
771 out:
772 	return rc;
773 }
774 
775 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
776 {
777 	pr_debug("Recording AUX area tracing snapshot\n");
778 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
779 		trigger_error(&auxtrace_snapshot_trigger);
780 	} else {
781 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
782 			trigger_error(&auxtrace_snapshot_trigger);
783 		else
784 			trigger_ready(&auxtrace_snapshot_trigger);
785 	}
786 }
787 
788 static int record__auxtrace_snapshot_exit(struct record *rec)
789 {
790 	if (trigger_is_error(&auxtrace_snapshot_trigger))
791 		return 0;
792 
793 	if (!auxtrace_record__snapshot_started &&
794 	    auxtrace_record__snapshot_start(rec->itr))
795 		return -1;
796 
797 	record__read_auxtrace_snapshot(rec, true);
798 	if (trigger_is_error(&auxtrace_snapshot_trigger))
799 		return -1;
800 
801 	return 0;
802 }
803 
804 static int record__auxtrace_init(struct record *rec)
805 {
806 	int err;
807 
808 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
809 	    && record__threads_enabled(rec)) {
810 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
811 		return -EINVAL;
812 	}
813 
814 	if (!rec->itr) {
815 		rec->itr = auxtrace_record__init(rec->evlist, &err);
816 		if (err)
817 			return err;
818 	}
819 
820 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
821 					      rec->opts.auxtrace_snapshot_opts);
822 	if (err)
823 		return err;
824 
825 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
826 					    rec->opts.auxtrace_sample_opts);
827 	if (err)
828 		return err;
829 
830 	auxtrace_regroup_aux_output(rec->evlist);
831 
832 	return auxtrace_parse_filters(rec->evlist);
833 }
834 
835 #else
836 
837 static inline
838 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
839 			       struct mmap *map __maybe_unused)
840 {
841 	return 0;
842 }
843 
844 static inline
845 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
846 				    bool on_exit __maybe_unused)
847 {
848 }
849 
850 static inline
851 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
852 {
853 	return 0;
854 }
855 
856 static inline
857 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
858 {
859 	return 0;
860 }
861 
862 static int record__auxtrace_init(struct record *rec __maybe_unused)
863 {
864 	return 0;
865 }
866 
867 #endif
868 
869 static int record__config_text_poke(struct evlist *evlist)
870 {
871 	struct evsel *evsel;
872 	int err;
873 
874 	/* Nothing to do if text poke is already configured */
875 	evlist__for_each_entry(evlist, evsel) {
876 		if (evsel->core.attr.text_poke)
877 			return 0;
878 	}
879 
880 	err = parse_events(evlist, "dummy:u", NULL);
881 	if (err)
882 		return err;
883 
884 	evsel = evlist__last(evlist);
885 
886 	evsel->core.attr.freq = 0;
887 	evsel->core.attr.sample_period = 1;
888 	evsel->core.attr.text_poke = 1;
889 	evsel->core.attr.ksymbol = 1;
890 
891 	evsel->core.system_wide = true;
892 	evsel->no_aux_samples = true;
893 	evsel->immediate = true;
894 
895 	/* Text poke must be collected on all CPUs */
896 	perf_cpu_map__put(evsel->core.own_cpus);
897 	evsel->core.own_cpus = perf_cpu_map__new(NULL);
898 	perf_cpu_map__put(evsel->core.cpus);
899 	evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
900 
901 	evsel__set_sample_bit(evsel, TIME);
902 
903 	return 0;
904 }
905 
906 static bool record__kcore_readable(struct machine *machine)
907 {
908 	char kcore[PATH_MAX];
909 	int fd;
910 
911 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
912 
913 	fd = open(kcore, O_RDONLY);
914 	if (fd < 0)
915 		return false;
916 
917 	close(fd);
918 
919 	return true;
920 }
921 
922 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
923 {
924 	char from_dir[PATH_MAX];
925 	char kcore_dir[PATH_MAX];
926 	int ret;
927 
928 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
929 
930 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
931 	if (ret)
932 		return ret;
933 
934 	return kcore_copy(from_dir, kcore_dir);
935 }
936 
937 static void record__thread_data_init_pipes(struct record_thread *thread_data)
938 {
939 	thread_data->pipes.msg[0] = -1;
940 	thread_data->pipes.msg[1] = -1;
941 	thread_data->pipes.ack[0] = -1;
942 	thread_data->pipes.ack[1] = -1;
943 }
944 
945 static int record__thread_data_open_pipes(struct record_thread *thread_data)
946 {
947 	if (pipe(thread_data->pipes.msg))
948 		return -EINVAL;
949 
950 	if (pipe(thread_data->pipes.ack)) {
951 		close(thread_data->pipes.msg[0]);
952 		thread_data->pipes.msg[0] = -1;
953 		close(thread_data->pipes.msg[1]);
954 		thread_data->pipes.msg[1] = -1;
955 		return -EINVAL;
956 	}
957 
958 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
959 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
960 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
961 
962 	return 0;
963 }
964 
965 static void record__thread_data_close_pipes(struct record_thread *thread_data)
966 {
967 	if (thread_data->pipes.msg[0] != -1) {
968 		close(thread_data->pipes.msg[0]);
969 		thread_data->pipes.msg[0] = -1;
970 	}
971 	if (thread_data->pipes.msg[1] != -1) {
972 		close(thread_data->pipes.msg[1]);
973 		thread_data->pipes.msg[1] = -1;
974 	}
975 	if (thread_data->pipes.ack[0] != -1) {
976 		close(thread_data->pipes.ack[0]);
977 		thread_data->pipes.ack[0] = -1;
978 	}
979 	if (thread_data->pipes.ack[1] != -1) {
980 		close(thread_data->pipes.ack[1]);
981 		thread_data->pipes.ack[1] = -1;
982 	}
983 }
984 
985 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
986 {
987 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
988 	struct mmap *mmap = evlist->mmap;
989 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
990 	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
991 
992 	thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
993 					      thread_data->mask->maps.nbits);
994 	if (mmap) {
995 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
996 		if (!thread_data->maps)
997 			return -ENOMEM;
998 	}
999 	if (overwrite_mmap) {
1000 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1001 		if (!thread_data->overwrite_maps) {
1002 			zfree(&thread_data->maps);
1003 			return -ENOMEM;
1004 		}
1005 	}
1006 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1007 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1008 
1009 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1010 		if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
1011 			if (thread_data->maps) {
1012 				thread_data->maps[tm] = &mmap[m];
1013 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1014 					  thread_data, cpus->map[m].cpu, tm, m);
1015 			}
1016 			if (thread_data->overwrite_maps) {
1017 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1018 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1019 					  thread_data, cpus->map[m].cpu, tm, m);
1020 			}
1021 			tm++;
1022 		}
1023 	}
1024 
1025 	return 0;
1026 }
1027 
1028 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1029 {
1030 	int f, tm, pos;
1031 	struct mmap *map, *overwrite_map;
1032 
1033 	fdarray__init(&thread_data->pollfd, 64);
1034 
1035 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1036 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1037 		overwrite_map = thread_data->overwrite_maps ?
1038 				thread_data->overwrite_maps[tm] : NULL;
1039 
1040 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1041 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1042 
1043 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1044 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1045 							      &evlist->core.pollfd);
1046 				if (pos < 0)
1047 					return pos;
1048 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1049 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1050 			}
1051 		}
1052 	}
1053 
1054 	return 0;
1055 }
1056 
1057 static void record__free_thread_data(struct record *rec)
1058 {
1059 	int t;
1060 	struct record_thread *thread_data = rec->thread_data;
1061 
1062 	if (thread_data == NULL)
1063 		return;
1064 
1065 	for (t = 0; t < rec->nr_threads; t++) {
1066 		record__thread_data_close_pipes(&thread_data[t]);
1067 		zfree(&thread_data[t].maps);
1068 		zfree(&thread_data[t].overwrite_maps);
1069 		fdarray__exit(&thread_data[t].pollfd);
1070 	}
1071 
1072 	zfree(&rec->thread_data);
1073 }
1074 
1075 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1076 {
1077 	int t, ret;
1078 	struct record_thread *thread_data;
1079 
1080 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1081 	if (!rec->thread_data) {
1082 		pr_err("Failed to allocate thread data\n");
1083 		return -ENOMEM;
1084 	}
1085 	thread_data = rec->thread_data;
1086 
1087 	for (t = 0; t < rec->nr_threads; t++)
1088 		record__thread_data_init_pipes(&thread_data[t]);
1089 
1090 	for (t = 0; t < rec->nr_threads; t++) {
1091 		thread_data[t].rec = rec;
1092 		thread_data[t].mask = &rec->thread_masks[t];
1093 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1094 		if (ret) {
1095 			pr_err("Failed to initialize thread[%d] maps\n", t);
1096 			goto out_free;
1097 		}
1098 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1099 		if (ret) {
1100 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1101 			goto out_free;
1102 		}
1103 		if (t) {
1104 			thread_data[t].tid = -1;
1105 			ret = record__thread_data_open_pipes(&thread_data[t]);
1106 			if (ret) {
1107 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1108 				goto out_free;
1109 			}
1110 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1111 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1112 			if (ret < 0) {
1113 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1114 				goto out_free;
1115 			}
1116 			thread_data[t].ctlfd_pos = ret;
1117 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1118 				 thread_data, thread_data[t].ctlfd_pos,
1119 				 thread_data[t].pipes.msg[0]);
1120 		} else {
1121 			thread_data[t].tid = gettid();
1122 			if (evlist->ctl_fd.pos == -1)
1123 				continue;
1124 			ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1125 						      &evlist->core.pollfd);
1126 			if (ret < 0) {
1127 				pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1128 				goto out_free;
1129 			}
1130 			thread_data[t].ctlfd_pos = ret;
1131 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1132 				 thread_data, thread_data[t].ctlfd_pos,
1133 				 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1134 		}
1135 	}
1136 
1137 	return 0;
1138 
1139 out_free:
1140 	record__free_thread_data(rec);
1141 
1142 	return ret;
1143 }
1144 
1145 static int record__mmap_evlist(struct record *rec,
1146 			       struct evlist *evlist)
1147 {
1148 	int i, ret;
1149 	struct record_opts *opts = &rec->opts;
1150 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1151 				  opts->auxtrace_sample_mode;
1152 	char msg[512];
1153 
1154 	if (opts->affinity != PERF_AFFINITY_SYS)
1155 		cpu__setup_cpunode_map();
1156 
1157 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1158 				 opts->auxtrace_mmap_pages,
1159 				 auxtrace_overwrite,
1160 				 opts->nr_cblocks, opts->affinity,
1161 				 opts->mmap_flush, opts->comp_level) < 0) {
1162 		if (errno == EPERM) {
1163 			pr_err("Permission error mapping pages.\n"
1164 			       "Consider increasing "
1165 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1166 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1167 			       "(current value: %u,%u)\n",
1168 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1169 			return -errno;
1170 		} else {
1171 			pr_err("failed to mmap with %d (%s)\n", errno,
1172 				str_error_r(errno, msg, sizeof(msg)));
1173 			if (errno)
1174 				return -errno;
1175 			else
1176 				return -EINVAL;
1177 		}
1178 	}
1179 
1180 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1181 		return -1;
1182 
1183 	ret = record__alloc_thread_data(rec, evlist);
1184 	if (ret)
1185 		return ret;
1186 
1187 	if (record__threads_enabled(rec)) {
1188 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1189 		if (ret) {
1190 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1191 			return ret;
1192 		}
1193 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1194 			if (evlist->mmap)
1195 				evlist->mmap[i].file = &rec->data.dir.files[i];
1196 			if (evlist->overwrite_mmap)
1197 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1198 		}
1199 	}
1200 
1201 	return 0;
1202 }
1203 
1204 static int record__mmap(struct record *rec)
1205 {
1206 	return record__mmap_evlist(rec, rec->evlist);
1207 }
1208 
1209 static int record__open(struct record *rec)
1210 {
1211 	char msg[BUFSIZ];
1212 	struct evsel *pos;
1213 	struct evlist *evlist = rec->evlist;
1214 	struct perf_session *session = rec->session;
1215 	struct record_opts *opts = &rec->opts;
1216 	int rc = 0;
1217 
1218 	/*
1219 	 * For initial_delay, system wide or a hybrid system, we need to add a
1220 	 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1221 	 * of waiting or event synthesis.
1222 	 */
1223 	if (opts->initial_delay || target__has_cpu(&opts->target) ||
1224 	    perf_pmu__has_hybrid()) {
1225 		pos = evlist__get_tracking_event(evlist);
1226 		if (!evsel__is_dummy_event(pos)) {
1227 			/* Set up dummy event. */
1228 			if (evlist__add_dummy(evlist))
1229 				return -ENOMEM;
1230 			pos = evlist__last(evlist);
1231 			evlist__set_tracking_event(evlist, pos);
1232 		}
1233 
1234 		/*
1235 		 * Enable the dummy event when the process is forked for
1236 		 * initial_delay, immediately for system wide.
1237 		 */
1238 		if (opts->initial_delay && !pos->immediate &&
1239 		    !target__has_cpu(&opts->target))
1240 			pos->core.attr.enable_on_exec = 1;
1241 		else
1242 			pos->immediate = 1;
1243 	}
1244 
1245 	evlist__config(evlist, opts, &callchain_param);
1246 
1247 	evlist__for_each_entry(evlist, pos) {
1248 try_again:
1249 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1250 			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1251 				if (verbose > 0)
1252 					ui__warning("%s\n", msg);
1253 				goto try_again;
1254 			}
1255 			if ((errno == EINVAL || errno == EBADF) &&
1256 			    pos->core.leader != &pos->core &&
1257 			    pos->weak_group) {
1258 			        pos = evlist__reset_weak_group(evlist, pos, true);
1259 				goto try_again;
1260 			}
1261 			rc = -errno;
1262 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1263 			ui__error("%s\n", msg);
1264 			goto out;
1265 		}
1266 
1267 		pos->supported = true;
1268 	}
1269 
1270 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1271 		pr_warning(
1272 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1273 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1274 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1275 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1276 "Samples in kernel modules won't be resolved at all.\n\n"
1277 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1278 "even with a suitable vmlinux or kallsyms file.\n\n");
1279 	}
1280 
1281 	if (evlist__apply_filters(evlist, &pos)) {
1282 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1283 			pos->filter, evsel__name(pos), errno,
1284 			str_error_r(errno, msg, sizeof(msg)));
1285 		rc = -1;
1286 		goto out;
1287 	}
1288 
1289 	rc = record__mmap(rec);
1290 	if (rc)
1291 		goto out;
1292 
1293 	session->evlist = evlist;
1294 	perf_session__set_id_hdr_size(session);
1295 out:
1296 	return rc;
1297 }
1298 
1299 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1300 {
1301 	if (rec->evlist->first_sample_time == 0)
1302 		rec->evlist->first_sample_time = sample_time;
1303 
1304 	if (sample_time)
1305 		rec->evlist->last_sample_time = sample_time;
1306 }
1307 
1308 static int process_sample_event(struct perf_tool *tool,
1309 				union perf_event *event,
1310 				struct perf_sample *sample,
1311 				struct evsel *evsel,
1312 				struct machine *machine)
1313 {
1314 	struct record *rec = container_of(tool, struct record, tool);
1315 
1316 	set_timestamp_boundary(rec, sample->time);
1317 
1318 	if (rec->buildid_all)
1319 		return 0;
1320 
1321 	rec->samples++;
1322 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1323 }
1324 
1325 static int process_buildids(struct record *rec)
1326 {
1327 	struct perf_session *session = rec->session;
1328 
1329 	if (perf_data__size(&rec->data) == 0)
1330 		return 0;
1331 
1332 	/*
1333 	 * During this process, it'll load kernel map and replace the
1334 	 * dso->long_name to a real pathname it found.  In this case
1335 	 * we prefer the vmlinux path like
1336 	 *   /lib/modules/3.16.4/build/vmlinux
1337 	 *
1338 	 * rather than build-id path (in debug directory).
1339 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1340 	 */
1341 	symbol_conf.ignore_vmlinux_buildid = true;
1342 
1343 	/*
1344 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1345 	 * so no need to process samples. But if timestamp_boundary is enabled,
1346 	 * it still needs to walk on all samples to get the timestamps of
1347 	 * first/last samples.
1348 	 */
1349 	if (rec->buildid_all && !rec->timestamp_boundary)
1350 		rec->tool.sample = NULL;
1351 
1352 	return perf_session__process_events(session);
1353 }
1354 
1355 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1356 {
1357 	int err;
1358 	struct perf_tool *tool = data;
1359 	/*
1360 	 *As for guest kernel when processing subcommand record&report,
1361 	 *we arrange module mmap prior to guest kernel mmap and trigger
1362 	 *a preload dso because default guest module symbols are loaded
1363 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1364 	 *method is used to avoid symbol missing when the first addr is
1365 	 *in module instead of in guest kernel.
1366 	 */
1367 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1368 					     machine);
1369 	if (err < 0)
1370 		pr_err("Couldn't record guest kernel [%d]'s reference"
1371 		       " relocation symbol.\n", machine->pid);
1372 
1373 	/*
1374 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1375 	 * have no _text sometimes.
1376 	 */
1377 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1378 						 machine);
1379 	if (err < 0)
1380 		pr_err("Couldn't record guest kernel [%d]'s reference"
1381 		       " relocation symbol.\n", machine->pid);
1382 }
1383 
1384 static struct perf_event_header finished_round_event = {
1385 	.size = sizeof(struct perf_event_header),
1386 	.type = PERF_RECORD_FINISHED_ROUND,
1387 };
1388 
1389 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1390 {
1391 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1392 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1393 			  thread->mask->affinity.nbits)) {
1394 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1395 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1396 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1397 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1398 					(cpu_set_t *)thread->mask->affinity.bits);
1399 		if (verbose == 2) {
1400 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1401 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1402 		}
1403 	}
1404 }
1405 
1406 static size_t process_comp_header(void *record, size_t increment)
1407 {
1408 	struct perf_record_compressed *event = record;
1409 	size_t size = sizeof(*event);
1410 
1411 	if (increment) {
1412 		event->header.size += increment;
1413 		return increment;
1414 	}
1415 
1416 	event->header.type = PERF_RECORD_COMPRESSED;
1417 	event->header.size = size;
1418 
1419 	return size;
1420 }
1421 
1422 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1423 			    void *dst, size_t dst_size, void *src, size_t src_size)
1424 {
1425 	size_t compressed;
1426 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1427 	struct zstd_data *zstd_data = &session->zstd_data;
1428 
1429 	if (map && map->file)
1430 		zstd_data = &map->zstd_data;
1431 
1432 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1433 						     max_record_size, process_comp_header);
1434 
1435 	if (map && map->file) {
1436 		thread->bytes_transferred += src_size;
1437 		thread->bytes_compressed  += compressed;
1438 	} else {
1439 		session->bytes_transferred += src_size;
1440 		session->bytes_compressed  += compressed;
1441 	}
1442 
1443 	return compressed;
1444 }
1445 
1446 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1447 				    bool overwrite, bool synch)
1448 {
1449 	u64 bytes_written = rec->bytes_written;
1450 	int i;
1451 	int rc = 0;
1452 	int nr_mmaps;
1453 	struct mmap **maps;
1454 	int trace_fd = rec->data.file.fd;
1455 	off_t off = 0;
1456 
1457 	if (!evlist)
1458 		return 0;
1459 
1460 	nr_mmaps = thread->nr_mmaps;
1461 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1462 
1463 	if (!maps)
1464 		return 0;
1465 
1466 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1467 		return 0;
1468 
1469 	if (record__aio_enabled(rec))
1470 		off = record__aio_get_pos(trace_fd);
1471 
1472 	for (i = 0; i < nr_mmaps; i++) {
1473 		u64 flush = 0;
1474 		struct mmap *map = maps[i];
1475 
1476 		if (map->core.base) {
1477 			record__adjust_affinity(rec, map);
1478 			if (synch) {
1479 				flush = map->core.flush;
1480 				map->core.flush = 1;
1481 			}
1482 			if (!record__aio_enabled(rec)) {
1483 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1484 					if (synch)
1485 						map->core.flush = flush;
1486 					rc = -1;
1487 					goto out;
1488 				}
1489 			} else {
1490 				if (record__aio_push(rec, map, &off) < 0) {
1491 					record__aio_set_pos(trace_fd, off);
1492 					if (synch)
1493 						map->core.flush = flush;
1494 					rc = -1;
1495 					goto out;
1496 				}
1497 			}
1498 			if (synch)
1499 				map->core.flush = flush;
1500 		}
1501 
1502 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1503 		    !rec->opts.auxtrace_sample_mode &&
1504 		    record__auxtrace_mmap_read(rec, map) != 0) {
1505 			rc = -1;
1506 			goto out;
1507 		}
1508 	}
1509 
1510 	if (record__aio_enabled(rec))
1511 		record__aio_set_pos(trace_fd, off);
1512 
1513 	/*
1514 	 * Mark the round finished in case we wrote
1515 	 * at least one event.
1516 	 *
1517 	 * No need for round events in directory mode,
1518 	 * because per-cpu maps and files have data
1519 	 * sorted by kernel.
1520 	 */
1521 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1522 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1523 
1524 	if (overwrite)
1525 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1526 out:
1527 	return rc;
1528 }
1529 
1530 static int record__mmap_read_all(struct record *rec, bool synch)
1531 {
1532 	int err;
1533 
1534 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1535 	if (err)
1536 		return err;
1537 
1538 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1539 }
1540 
1541 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1542 					   void *arg __maybe_unused)
1543 {
1544 	struct perf_mmap *map = fda->priv[fd].ptr;
1545 
1546 	if (map)
1547 		perf_mmap__put(map);
1548 }
1549 
1550 static void *record__thread(void *arg)
1551 {
1552 	enum thread_msg msg = THREAD_MSG__READY;
1553 	bool terminate = false;
1554 	struct fdarray *pollfd;
1555 	int err, ctlfd_pos;
1556 
1557 	thread = arg;
1558 	thread->tid = gettid();
1559 
1560 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1561 	if (err == -1)
1562 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1563 			   thread->tid, strerror(errno));
1564 
1565 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1566 
1567 	pollfd = &thread->pollfd;
1568 	ctlfd_pos = thread->ctlfd_pos;
1569 
1570 	for (;;) {
1571 		unsigned long long hits = thread->samples;
1572 
1573 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1574 			break;
1575 
1576 		if (hits == thread->samples) {
1577 
1578 			err = fdarray__poll(pollfd, -1);
1579 			/*
1580 			 * Propagate error, only if there's any. Ignore positive
1581 			 * number of returned events and interrupt error.
1582 			 */
1583 			if (err > 0 || (err < 0 && errno == EINTR))
1584 				err = 0;
1585 			thread->waking++;
1586 
1587 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1588 					    record__thread_munmap_filtered, NULL) == 0)
1589 				break;
1590 		}
1591 
1592 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1593 			terminate = true;
1594 			close(thread->pipes.msg[0]);
1595 			thread->pipes.msg[0] = -1;
1596 			pollfd->entries[ctlfd_pos].fd = -1;
1597 			pollfd->entries[ctlfd_pos].events = 0;
1598 		}
1599 
1600 		pollfd->entries[ctlfd_pos].revents = 0;
1601 	}
1602 	record__mmap_read_all(thread->rec, true);
1603 
1604 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1605 	if (err == -1)
1606 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1607 			   thread->tid, strerror(errno));
1608 
1609 	return NULL;
1610 }
1611 
1612 static void record__init_features(struct record *rec)
1613 {
1614 	struct perf_session *session = rec->session;
1615 	int feat;
1616 
1617 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1618 		perf_header__set_feat(&session->header, feat);
1619 
1620 	if (rec->no_buildid)
1621 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1622 
1623 	if (!have_tracepoints(&rec->evlist->core.entries))
1624 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1625 
1626 	if (!rec->opts.branch_stack)
1627 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1628 
1629 	if (!rec->opts.full_auxtrace)
1630 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1631 
1632 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1633 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1634 
1635 	if (!rec->opts.use_clockid)
1636 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1637 
1638 	if (!record__threads_enabled(rec))
1639 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1640 
1641 	if (!record__comp_enabled(rec))
1642 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1643 
1644 	perf_header__clear_feat(&session->header, HEADER_STAT);
1645 }
1646 
1647 static void
1648 record__finish_output(struct record *rec)
1649 {
1650 	int i;
1651 	struct perf_data *data = &rec->data;
1652 	int fd = perf_data__fd(data);
1653 
1654 	if (data->is_pipe)
1655 		return;
1656 
1657 	rec->session->header.data_size += rec->bytes_written;
1658 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1659 	if (record__threads_enabled(rec)) {
1660 		for (i = 0; i < data->dir.nr; i++)
1661 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1662 	}
1663 
1664 	if (!rec->no_buildid) {
1665 		process_buildids(rec);
1666 
1667 		if (rec->buildid_all)
1668 			dsos__hit_all(rec->session);
1669 	}
1670 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1671 
1672 	return;
1673 }
1674 
1675 static int record__synthesize_workload(struct record *rec, bool tail)
1676 {
1677 	int err;
1678 	struct perf_thread_map *thread_map;
1679 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1680 
1681 	if (rec->opts.tail_synthesize != tail)
1682 		return 0;
1683 
1684 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1685 	if (thread_map == NULL)
1686 		return -1;
1687 
1688 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1689 						 process_synthesized_event,
1690 						 &rec->session->machines.host,
1691 						 needs_mmap,
1692 						 rec->opts.sample_address);
1693 	perf_thread_map__put(thread_map);
1694 	return err;
1695 }
1696 
1697 static int record__synthesize(struct record *rec, bool tail);
1698 
1699 static int
1700 record__switch_output(struct record *rec, bool at_exit)
1701 {
1702 	struct perf_data *data = &rec->data;
1703 	int fd, err;
1704 	char *new_filename;
1705 
1706 	/* Same Size:      "2015122520103046"*/
1707 	char timestamp[] = "InvalidTimestamp";
1708 
1709 	record__aio_mmap_read_sync(rec);
1710 
1711 	record__synthesize(rec, true);
1712 	if (target__none(&rec->opts.target))
1713 		record__synthesize_workload(rec, true);
1714 
1715 	rec->samples = 0;
1716 	record__finish_output(rec);
1717 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1718 	if (err) {
1719 		pr_err("Failed to get current timestamp\n");
1720 		return -EINVAL;
1721 	}
1722 
1723 	fd = perf_data__switch(data, timestamp,
1724 				    rec->session->header.data_offset,
1725 				    at_exit, &new_filename);
1726 	if (fd >= 0 && !at_exit) {
1727 		rec->bytes_written = 0;
1728 		rec->session->header.data_size = 0;
1729 	}
1730 
1731 	if (!quiet)
1732 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1733 			data->path, timestamp);
1734 
1735 	if (rec->switch_output.num_files) {
1736 		int n = rec->switch_output.cur_file + 1;
1737 
1738 		if (n >= rec->switch_output.num_files)
1739 			n = 0;
1740 		rec->switch_output.cur_file = n;
1741 		if (rec->switch_output.filenames[n]) {
1742 			remove(rec->switch_output.filenames[n]);
1743 			zfree(&rec->switch_output.filenames[n]);
1744 		}
1745 		rec->switch_output.filenames[n] = new_filename;
1746 	} else {
1747 		free(new_filename);
1748 	}
1749 
1750 	/* Output tracking events */
1751 	if (!at_exit) {
1752 		record__synthesize(rec, false);
1753 
1754 		/*
1755 		 * In 'perf record --switch-output' without -a,
1756 		 * record__synthesize() in record__switch_output() won't
1757 		 * generate tracking events because there's no thread_map
1758 		 * in evlist. Which causes newly created perf.data doesn't
1759 		 * contain map and comm information.
1760 		 * Create a fake thread_map and directly call
1761 		 * perf_event__synthesize_thread_map() for those events.
1762 		 */
1763 		if (target__none(&rec->opts.target))
1764 			record__synthesize_workload(rec, false);
1765 	}
1766 	return fd;
1767 }
1768 
1769 static volatile int workload_exec_errno;
1770 
1771 /*
1772  * evlist__prepare_workload will send a SIGUSR1
1773  * if the fork fails, since we asked by setting its
1774  * want_signal to true.
1775  */
1776 static void workload_exec_failed_signal(int signo __maybe_unused,
1777 					siginfo_t *info,
1778 					void *ucontext __maybe_unused)
1779 {
1780 	workload_exec_errno = info->si_value.sival_int;
1781 	done = 1;
1782 	child_finished = 1;
1783 }
1784 
1785 static void snapshot_sig_handler(int sig);
1786 static void alarm_sig_handler(int sig);
1787 
1788 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1789 {
1790 	if (evlist) {
1791 		if (evlist->mmap && evlist->mmap[0].core.base)
1792 			return evlist->mmap[0].core.base;
1793 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1794 			return evlist->overwrite_mmap[0].core.base;
1795 	}
1796 	return NULL;
1797 }
1798 
1799 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1800 {
1801 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1802 	if (pc)
1803 		return pc;
1804 	return NULL;
1805 }
1806 
1807 static int record__synthesize(struct record *rec, bool tail)
1808 {
1809 	struct perf_session *session = rec->session;
1810 	struct machine *machine = &session->machines.host;
1811 	struct perf_data *data = &rec->data;
1812 	struct record_opts *opts = &rec->opts;
1813 	struct perf_tool *tool = &rec->tool;
1814 	int err = 0;
1815 	event_op f = process_synthesized_event;
1816 
1817 	if (rec->opts.tail_synthesize != tail)
1818 		return 0;
1819 
1820 	if (data->is_pipe) {
1821 		err = perf_event__synthesize_for_pipe(tool, session, data,
1822 						      process_synthesized_event);
1823 		if (err < 0)
1824 			goto out;
1825 
1826 		rec->bytes_written += err;
1827 	}
1828 
1829 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1830 					  process_synthesized_event, machine);
1831 	if (err)
1832 		goto out;
1833 
1834 	/* Synthesize id_index before auxtrace_info */
1835 	if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
1836 		err = perf_event__synthesize_id_index(tool,
1837 						      process_synthesized_event,
1838 						      session->evlist, machine);
1839 		if (err)
1840 			goto out;
1841 	}
1842 
1843 	if (rec->opts.full_auxtrace) {
1844 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1845 					session, process_synthesized_event);
1846 		if (err)
1847 			goto out;
1848 	}
1849 
1850 	if (!evlist__exclude_kernel(rec->evlist)) {
1851 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1852 							 machine);
1853 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1854 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1855 				   "Check /proc/kallsyms permission or run as root.\n");
1856 
1857 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1858 						     machine);
1859 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1860 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1861 				   "Check /proc/modules permission or run as root.\n");
1862 	}
1863 
1864 	if (perf_guest) {
1865 		machines__process_guests(&session->machines,
1866 					 perf_event__synthesize_guest_os, tool);
1867 	}
1868 
1869 	err = perf_event__synthesize_extra_attr(&rec->tool,
1870 						rec->evlist,
1871 						process_synthesized_event,
1872 						data->is_pipe);
1873 	if (err)
1874 		goto out;
1875 
1876 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1877 						 process_synthesized_event,
1878 						NULL);
1879 	if (err < 0) {
1880 		pr_err("Couldn't synthesize thread map.\n");
1881 		return err;
1882 	}
1883 
1884 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
1885 					     process_synthesized_event, NULL);
1886 	if (err < 0) {
1887 		pr_err("Couldn't synthesize cpu map.\n");
1888 		return err;
1889 	}
1890 
1891 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1892 						machine, opts);
1893 	if (err < 0)
1894 		pr_warning("Couldn't synthesize bpf events.\n");
1895 
1896 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1897 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1898 						     machine);
1899 		if (err < 0)
1900 			pr_warning("Couldn't synthesize cgroup events.\n");
1901 	}
1902 
1903 	if (rec->opts.nr_threads_synthesize > 1) {
1904 		perf_set_multithreaded();
1905 		f = process_locked_synthesized_event;
1906 	}
1907 
1908 	if (rec->opts.synth & PERF_SYNTH_TASK) {
1909 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1910 
1911 		err = __machine__synthesize_threads(machine, tool, &opts->target,
1912 						    rec->evlist->core.threads,
1913 						    f, needs_mmap, opts->sample_address,
1914 						    rec->opts.nr_threads_synthesize);
1915 	}
1916 
1917 	if (rec->opts.nr_threads_synthesize > 1)
1918 		perf_set_singlethreaded();
1919 
1920 out:
1921 	return err;
1922 }
1923 
1924 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1925 {
1926 	struct record *rec = data;
1927 	pthread_kill(rec->thread_id, SIGUSR2);
1928 	return 0;
1929 }
1930 
1931 static int record__setup_sb_evlist(struct record *rec)
1932 {
1933 	struct record_opts *opts = &rec->opts;
1934 
1935 	if (rec->sb_evlist != NULL) {
1936 		/*
1937 		 * We get here if --switch-output-event populated the
1938 		 * sb_evlist, so associate a callback that will send a SIGUSR2
1939 		 * to the main thread.
1940 		 */
1941 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1942 		rec->thread_id = pthread_self();
1943 	}
1944 #ifdef HAVE_LIBBPF_SUPPORT
1945 	if (!opts->no_bpf_event) {
1946 		if (rec->sb_evlist == NULL) {
1947 			rec->sb_evlist = evlist__new();
1948 
1949 			if (rec->sb_evlist == NULL) {
1950 				pr_err("Couldn't create side band evlist.\n.");
1951 				return -1;
1952 			}
1953 		}
1954 
1955 		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1956 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1957 			return -1;
1958 		}
1959 	}
1960 #endif
1961 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1962 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1963 		opts->no_bpf_event = true;
1964 	}
1965 
1966 	return 0;
1967 }
1968 
1969 static int record__init_clock(struct record *rec)
1970 {
1971 	struct perf_session *session = rec->session;
1972 	struct timespec ref_clockid;
1973 	struct timeval ref_tod;
1974 	u64 ref;
1975 
1976 	if (!rec->opts.use_clockid)
1977 		return 0;
1978 
1979 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1980 		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1981 
1982 	session->header.env.clock.clockid = rec->opts.clockid;
1983 
1984 	if (gettimeofday(&ref_tod, NULL) != 0) {
1985 		pr_err("gettimeofday failed, cannot set reference time.\n");
1986 		return -1;
1987 	}
1988 
1989 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1990 		pr_err("clock_gettime failed, cannot set reference time.\n");
1991 		return -1;
1992 	}
1993 
1994 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1995 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1996 
1997 	session->header.env.clock.tod_ns = ref;
1998 
1999 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2000 	      (u64) ref_clockid.tv_nsec;
2001 
2002 	session->header.env.clock.clockid_ns = ref;
2003 	return 0;
2004 }
2005 
2006 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2007 {
2008 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2009 		trigger_hit(&auxtrace_snapshot_trigger);
2010 		auxtrace_record__snapshot_started = 1;
2011 		if (auxtrace_record__snapshot_start(rec->itr))
2012 			trigger_error(&auxtrace_snapshot_trigger);
2013 	}
2014 }
2015 
2016 static void record__uniquify_name(struct record *rec)
2017 {
2018 	struct evsel *pos;
2019 	struct evlist *evlist = rec->evlist;
2020 	char *new_name;
2021 	int ret;
2022 
2023 	if (!perf_pmu__has_hybrid())
2024 		return;
2025 
2026 	evlist__for_each_entry(evlist, pos) {
2027 		if (!evsel__is_hybrid(pos))
2028 			continue;
2029 
2030 		if (strchr(pos->name, '/'))
2031 			continue;
2032 
2033 		ret = asprintf(&new_name, "%s/%s/",
2034 			       pos->pmu_name, pos->name);
2035 		if (ret) {
2036 			free(pos->name);
2037 			pos->name = new_name;
2038 		}
2039 	}
2040 }
2041 
2042 static int record__terminate_thread(struct record_thread *thread_data)
2043 {
2044 	int err;
2045 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2046 	pid_t tid = thread_data->tid;
2047 
2048 	close(thread_data->pipes.msg[1]);
2049 	thread_data->pipes.msg[1] = -1;
2050 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2051 	if (err > 0)
2052 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2053 	else
2054 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2055 			   thread->tid, tid);
2056 
2057 	return 0;
2058 }
2059 
2060 static int record__start_threads(struct record *rec)
2061 {
2062 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2063 	struct record_thread *thread_data = rec->thread_data;
2064 	sigset_t full, mask;
2065 	pthread_t handle;
2066 	pthread_attr_t attrs;
2067 
2068 	thread = &thread_data[0];
2069 
2070 	if (!record__threads_enabled(rec))
2071 		return 0;
2072 
2073 	sigfillset(&full);
2074 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2075 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2076 		return -1;
2077 	}
2078 
2079 	pthread_attr_init(&attrs);
2080 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2081 
2082 	for (t = 1; t < nr_threads; t++) {
2083 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2084 
2085 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2086 		pthread_attr_setaffinity_np(&attrs,
2087 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2088 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2089 #endif
2090 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2091 			for (tt = 1; tt < t; tt++)
2092 				record__terminate_thread(&thread_data[t]);
2093 			pr_err("Failed to start threads: %s\n", strerror(errno));
2094 			ret = -1;
2095 			goto out_err;
2096 		}
2097 
2098 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2099 		if (err > 0)
2100 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2101 				  thread_msg_tags[msg]);
2102 		else
2103 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2104 				   thread->tid, rec->thread_data[t].tid);
2105 	}
2106 
2107 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2108 			(cpu_set_t *)thread->mask->affinity.bits);
2109 
2110 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2111 
2112 out_err:
2113 	pthread_attr_destroy(&attrs);
2114 
2115 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2116 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2117 		ret = -1;
2118 	}
2119 
2120 	return ret;
2121 }
2122 
2123 static int record__stop_threads(struct record *rec)
2124 {
2125 	int t;
2126 	struct record_thread *thread_data = rec->thread_data;
2127 
2128 	for (t = 1; t < rec->nr_threads; t++)
2129 		record__terminate_thread(&thread_data[t]);
2130 
2131 	for (t = 0; t < rec->nr_threads; t++) {
2132 		rec->samples += thread_data[t].samples;
2133 		if (!record__threads_enabled(rec))
2134 			continue;
2135 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2136 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2137 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2138 			 thread_data[t].samples, thread_data[t].waking);
2139 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2140 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2141 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2142 		else
2143 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2144 	}
2145 
2146 	return 0;
2147 }
2148 
2149 static unsigned long record__waking(struct record *rec)
2150 {
2151 	int t;
2152 	unsigned long waking = 0;
2153 	struct record_thread *thread_data = rec->thread_data;
2154 
2155 	for (t = 0; t < rec->nr_threads; t++)
2156 		waking += thread_data[t].waking;
2157 
2158 	return waking;
2159 }
2160 
2161 static int __cmd_record(struct record *rec, int argc, const char **argv)
2162 {
2163 	int err;
2164 	int status = 0;
2165 	const bool forks = argc > 0;
2166 	struct perf_tool *tool = &rec->tool;
2167 	struct record_opts *opts = &rec->opts;
2168 	struct perf_data *data = &rec->data;
2169 	struct perf_session *session;
2170 	bool disabled = false, draining = false;
2171 	int fd;
2172 	float ratio = 0;
2173 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2174 
2175 	atexit(record__sig_exit);
2176 	signal(SIGCHLD, sig_handler);
2177 	signal(SIGINT, sig_handler);
2178 	signal(SIGTERM, sig_handler);
2179 	signal(SIGSEGV, sigsegv_handler);
2180 
2181 	if (rec->opts.record_namespaces)
2182 		tool->namespace_events = true;
2183 
2184 	if (rec->opts.record_cgroup) {
2185 #ifdef HAVE_FILE_HANDLE
2186 		tool->cgroup_events = true;
2187 #else
2188 		pr_err("cgroup tracking is not supported\n");
2189 		return -1;
2190 #endif
2191 	}
2192 
2193 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2194 		signal(SIGUSR2, snapshot_sig_handler);
2195 		if (rec->opts.auxtrace_snapshot_mode)
2196 			trigger_on(&auxtrace_snapshot_trigger);
2197 		if (rec->switch_output.enabled)
2198 			trigger_on(&switch_output_trigger);
2199 	} else {
2200 		signal(SIGUSR2, SIG_IGN);
2201 	}
2202 
2203 	session = perf_session__new(data, tool);
2204 	if (IS_ERR(session)) {
2205 		pr_err("Perf session creation failed.\n");
2206 		return PTR_ERR(session);
2207 	}
2208 
2209 	if (record__threads_enabled(rec)) {
2210 		if (perf_data__is_pipe(&rec->data)) {
2211 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2212 			return -1;
2213 		}
2214 		if (rec->opts.full_auxtrace) {
2215 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2216 			return -1;
2217 		}
2218 	}
2219 
2220 	fd = perf_data__fd(data);
2221 	rec->session = session;
2222 
2223 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2224 		pr_err("Compression initialization failed.\n");
2225 		return -1;
2226 	}
2227 #ifdef HAVE_EVENTFD_SUPPORT
2228 	done_fd = eventfd(0, EFD_NONBLOCK);
2229 	if (done_fd < 0) {
2230 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2231 		status = -1;
2232 		goto out_delete_session;
2233 	}
2234 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2235 	if (err < 0) {
2236 		pr_err("Failed to add wakeup eventfd to poll list\n");
2237 		status = err;
2238 		goto out_delete_session;
2239 	}
2240 #endif // HAVE_EVENTFD_SUPPORT
2241 
2242 	session->header.env.comp_type  = PERF_COMP_ZSTD;
2243 	session->header.env.comp_level = rec->opts.comp_level;
2244 
2245 	if (rec->opts.kcore &&
2246 	    !record__kcore_readable(&session->machines.host)) {
2247 		pr_err("ERROR: kcore is not readable.\n");
2248 		return -1;
2249 	}
2250 
2251 	if (record__init_clock(rec))
2252 		return -1;
2253 
2254 	record__init_features(rec);
2255 
2256 	if (forks) {
2257 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2258 					       workload_exec_failed_signal);
2259 		if (err < 0) {
2260 			pr_err("Couldn't run the workload!\n");
2261 			status = err;
2262 			goto out_delete_session;
2263 		}
2264 	}
2265 
2266 	/*
2267 	 * If we have just single event and are sending data
2268 	 * through pipe, we need to force the ids allocation,
2269 	 * because we synthesize event name through the pipe
2270 	 * and need the id for that.
2271 	 */
2272 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2273 		rec->opts.sample_id = true;
2274 
2275 	record__uniquify_name(rec);
2276 
2277 	if (record__open(rec) != 0) {
2278 		err = -1;
2279 		goto out_free_threads;
2280 	}
2281 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2282 
2283 	if (rec->opts.kcore) {
2284 		err = record__kcore_copy(&session->machines.host, data);
2285 		if (err) {
2286 			pr_err("ERROR: Failed to copy kcore\n");
2287 			goto out_free_threads;
2288 		}
2289 	}
2290 
2291 	err = bpf__apply_obj_config();
2292 	if (err) {
2293 		char errbuf[BUFSIZ];
2294 
2295 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2296 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2297 			 errbuf);
2298 		goto out_free_threads;
2299 	}
2300 
2301 	/*
2302 	 * Normally perf_session__new would do this, but it doesn't have the
2303 	 * evlist.
2304 	 */
2305 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2306 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2307 		rec->tool.ordered_events = false;
2308 	}
2309 
2310 	if (!rec->evlist->core.nr_groups)
2311 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2312 
2313 	if (data->is_pipe) {
2314 		err = perf_header__write_pipe(fd);
2315 		if (err < 0)
2316 			goto out_free_threads;
2317 	} else {
2318 		err = perf_session__write_header(session, rec->evlist, fd, false);
2319 		if (err < 0)
2320 			goto out_free_threads;
2321 	}
2322 
2323 	err = -1;
2324 	if (!rec->no_buildid
2325 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2326 		pr_err("Couldn't generate buildids. "
2327 		       "Use --no-buildid to profile anyway.\n");
2328 		goto out_free_threads;
2329 	}
2330 
2331 	err = record__setup_sb_evlist(rec);
2332 	if (err)
2333 		goto out_free_threads;
2334 
2335 	err = record__synthesize(rec, false);
2336 	if (err < 0)
2337 		goto out_free_threads;
2338 
2339 	if (rec->realtime_prio) {
2340 		struct sched_param param;
2341 
2342 		param.sched_priority = rec->realtime_prio;
2343 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2344 			pr_err("Could not set realtime priority.\n");
2345 			err = -1;
2346 			goto out_free_threads;
2347 		}
2348 	}
2349 
2350 	if (record__start_threads(rec))
2351 		goto out_free_threads;
2352 
2353 	/*
2354 	 * When perf is starting the traced process, all the events
2355 	 * (apart from group members) have enable_on_exec=1 set,
2356 	 * so don't spoil it by prematurely enabling them.
2357 	 */
2358 	if (!target__none(&opts->target) && !opts->initial_delay)
2359 		evlist__enable(rec->evlist);
2360 
2361 	/*
2362 	 * Let the child rip
2363 	 */
2364 	if (forks) {
2365 		struct machine *machine = &session->machines.host;
2366 		union perf_event *event;
2367 		pid_t tgid;
2368 
2369 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2370 		if (event == NULL) {
2371 			err = -ENOMEM;
2372 			goto out_child;
2373 		}
2374 
2375 		/*
2376 		 * Some H/W events are generated before COMM event
2377 		 * which is emitted during exec(), so perf script
2378 		 * cannot see a correct process name for those events.
2379 		 * Synthesize COMM event to prevent it.
2380 		 */
2381 		tgid = perf_event__synthesize_comm(tool, event,
2382 						   rec->evlist->workload.pid,
2383 						   process_synthesized_event,
2384 						   machine);
2385 		free(event);
2386 
2387 		if (tgid == -1)
2388 			goto out_child;
2389 
2390 		event = malloc(sizeof(event->namespaces) +
2391 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2392 			       machine->id_hdr_size);
2393 		if (event == NULL) {
2394 			err = -ENOMEM;
2395 			goto out_child;
2396 		}
2397 
2398 		/*
2399 		 * Synthesize NAMESPACES event for the command specified.
2400 		 */
2401 		perf_event__synthesize_namespaces(tool, event,
2402 						  rec->evlist->workload.pid,
2403 						  tgid, process_synthesized_event,
2404 						  machine);
2405 		free(event);
2406 
2407 		evlist__start_workload(rec->evlist);
2408 	}
2409 
2410 	if (opts->initial_delay) {
2411 		pr_info(EVLIST_DISABLED_MSG);
2412 		if (opts->initial_delay > 0) {
2413 			usleep(opts->initial_delay * USEC_PER_MSEC);
2414 			evlist__enable(rec->evlist);
2415 			pr_info(EVLIST_ENABLED_MSG);
2416 		}
2417 	}
2418 
2419 	trigger_ready(&auxtrace_snapshot_trigger);
2420 	trigger_ready(&switch_output_trigger);
2421 	perf_hooks__invoke_record_start();
2422 	for (;;) {
2423 		unsigned long long hits = thread->samples;
2424 
2425 		/*
2426 		 * rec->evlist->bkw_mmap_state is possible to be
2427 		 * BKW_MMAP_EMPTY here: when done == true and
2428 		 * hits != rec->samples in previous round.
2429 		 *
2430 		 * evlist__toggle_bkw_mmap ensure we never
2431 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2432 		 */
2433 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2434 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2435 
2436 		if (record__mmap_read_all(rec, false) < 0) {
2437 			trigger_error(&auxtrace_snapshot_trigger);
2438 			trigger_error(&switch_output_trigger);
2439 			err = -1;
2440 			goto out_child;
2441 		}
2442 
2443 		if (auxtrace_record__snapshot_started) {
2444 			auxtrace_record__snapshot_started = 0;
2445 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2446 				record__read_auxtrace_snapshot(rec, false);
2447 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2448 				pr_err("AUX area tracing snapshot failed\n");
2449 				err = -1;
2450 				goto out_child;
2451 			}
2452 		}
2453 
2454 		if (trigger_is_hit(&switch_output_trigger)) {
2455 			/*
2456 			 * If switch_output_trigger is hit, the data in
2457 			 * overwritable ring buffer should have been collected,
2458 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2459 			 *
2460 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2461 			 * record__mmap_read_all() didn't collect data from
2462 			 * overwritable ring buffer. Read again.
2463 			 */
2464 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2465 				continue;
2466 			trigger_ready(&switch_output_trigger);
2467 
2468 			/*
2469 			 * Reenable events in overwrite ring buffer after
2470 			 * record__mmap_read_all(): we should have collected
2471 			 * data from it.
2472 			 */
2473 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2474 
2475 			if (!quiet)
2476 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2477 					record__waking(rec));
2478 			thread->waking = 0;
2479 			fd = record__switch_output(rec, false);
2480 			if (fd < 0) {
2481 				pr_err("Failed to switch to new file\n");
2482 				trigger_error(&switch_output_trigger);
2483 				err = fd;
2484 				goto out_child;
2485 			}
2486 
2487 			/* re-arm the alarm */
2488 			if (rec->switch_output.time)
2489 				alarm(rec->switch_output.time);
2490 		}
2491 
2492 		if (hits == thread->samples) {
2493 			if (done || draining)
2494 				break;
2495 			err = fdarray__poll(&thread->pollfd, -1);
2496 			/*
2497 			 * Propagate error, only if there's any. Ignore positive
2498 			 * number of returned events and interrupt error.
2499 			 */
2500 			if (err > 0 || (err < 0 && errno == EINTR))
2501 				err = 0;
2502 			thread->waking++;
2503 
2504 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2505 					    record__thread_munmap_filtered, NULL) == 0)
2506 				draining = true;
2507 
2508 			evlist__ctlfd_update(rec->evlist,
2509 				&thread->pollfd.entries[thread->ctlfd_pos]);
2510 		}
2511 
2512 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2513 			switch (cmd) {
2514 			case EVLIST_CTL_CMD_SNAPSHOT:
2515 				hit_auxtrace_snapshot_trigger(rec);
2516 				evlist__ctlfd_ack(rec->evlist);
2517 				break;
2518 			case EVLIST_CTL_CMD_STOP:
2519 				done = 1;
2520 				break;
2521 			case EVLIST_CTL_CMD_ACK:
2522 			case EVLIST_CTL_CMD_UNSUPPORTED:
2523 			case EVLIST_CTL_CMD_ENABLE:
2524 			case EVLIST_CTL_CMD_DISABLE:
2525 			case EVLIST_CTL_CMD_EVLIST:
2526 			case EVLIST_CTL_CMD_PING:
2527 			default:
2528 				break;
2529 			}
2530 		}
2531 
2532 		/*
2533 		 * When perf is starting the traced process, at the end events
2534 		 * die with the process and we wait for that. Thus no need to
2535 		 * disable events in this case.
2536 		 */
2537 		if (done && !disabled && !target__none(&opts->target)) {
2538 			trigger_off(&auxtrace_snapshot_trigger);
2539 			evlist__disable(rec->evlist);
2540 			disabled = true;
2541 		}
2542 	}
2543 
2544 	trigger_off(&auxtrace_snapshot_trigger);
2545 	trigger_off(&switch_output_trigger);
2546 
2547 	if (opts->auxtrace_snapshot_on_exit)
2548 		record__auxtrace_snapshot_exit(rec);
2549 
2550 	if (forks && workload_exec_errno) {
2551 		char msg[STRERR_BUFSIZE], strevsels[2048];
2552 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2553 
2554 		evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2555 
2556 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2557 			strevsels, argv[0], emsg);
2558 		err = -1;
2559 		goto out_child;
2560 	}
2561 
2562 	if (!quiet)
2563 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2564 			record__waking(rec));
2565 
2566 	if (target__none(&rec->opts.target))
2567 		record__synthesize_workload(rec, true);
2568 
2569 out_child:
2570 	record__stop_threads(rec);
2571 	record__mmap_read_all(rec, true);
2572 out_free_threads:
2573 	record__free_thread_data(rec);
2574 	evlist__finalize_ctlfd(rec->evlist);
2575 	record__aio_mmap_read_sync(rec);
2576 
2577 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2578 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2579 		session->header.env.comp_ratio = ratio + 0.5;
2580 	}
2581 
2582 	if (forks) {
2583 		int exit_status;
2584 
2585 		if (!child_finished)
2586 			kill(rec->evlist->workload.pid, SIGTERM);
2587 
2588 		wait(&exit_status);
2589 
2590 		if (err < 0)
2591 			status = err;
2592 		else if (WIFEXITED(exit_status))
2593 			status = WEXITSTATUS(exit_status);
2594 		else if (WIFSIGNALED(exit_status))
2595 			signr = WTERMSIG(exit_status);
2596 	} else
2597 		status = err;
2598 
2599 	record__synthesize(rec, true);
2600 	/* this will be recalculated during process_buildids() */
2601 	rec->samples = 0;
2602 
2603 	if (!err) {
2604 		if (!rec->timestamp_filename) {
2605 			record__finish_output(rec);
2606 		} else {
2607 			fd = record__switch_output(rec, true);
2608 			if (fd < 0) {
2609 				status = fd;
2610 				goto out_delete_session;
2611 			}
2612 		}
2613 	}
2614 
2615 	perf_hooks__invoke_record_end();
2616 
2617 	if (!err && !quiet) {
2618 		char samples[128];
2619 		const char *postfix = rec->timestamp_filename ?
2620 					".<timestamp>" : "";
2621 
2622 		if (rec->samples && !rec->opts.full_auxtrace)
2623 			scnprintf(samples, sizeof(samples),
2624 				  " (%" PRIu64 " samples)", rec->samples);
2625 		else
2626 			samples[0] = '\0';
2627 
2628 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2629 			perf_data__size(data) / 1024.0 / 1024.0,
2630 			data->path, postfix, samples);
2631 		if (ratio) {
2632 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2633 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2634 					ratio);
2635 		}
2636 		fprintf(stderr, " ]\n");
2637 	}
2638 
2639 out_delete_session:
2640 #ifdef HAVE_EVENTFD_SUPPORT
2641 	if (done_fd >= 0)
2642 		close(done_fd);
2643 #endif
2644 	zstd_fini(&session->zstd_data);
2645 	perf_session__delete(session);
2646 
2647 	if (!opts->no_bpf_event)
2648 		evlist__stop_sb_thread(rec->sb_evlist);
2649 	return status;
2650 }
2651 
2652 static void callchain_debug(struct callchain_param *callchain)
2653 {
2654 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2655 
2656 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2657 
2658 	if (callchain->record_mode == CALLCHAIN_DWARF)
2659 		pr_debug("callchain: stack dump size %d\n",
2660 			 callchain->dump_size);
2661 }
2662 
2663 int record_opts__parse_callchain(struct record_opts *record,
2664 				 struct callchain_param *callchain,
2665 				 const char *arg, bool unset)
2666 {
2667 	int ret;
2668 	callchain->enabled = !unset;
2669 
2670 	/* --no-call-graph */
2671 	if (unset) {
2672 		callchain->record_mode = CALLCHAIN_NONE;
2673 		pr_debug("callchain: disabled\n");
2674 		return 0;
2675 	}
2676 
2677 	ret = parse_callchain_record_opt(arg, callchain);
2678 	if (!ret) {
2679 		/* Enable data address sampling for DWARF unwind. */
2680 		if (callchain->record_mode == CALLCHAIN_DWARF)
2681 			record->sample_address = true;
2682 		callchain_debug(callchain);
2683 	}
2684 
2685 	return ret;
2686 }
2687 
2688 int record_parse_callchain_opt(const struct option *opt,
2689 			       const char *arg,
2690 			       int unset)
2691 {
2692 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2693 }
2694 
2695 int record_callchain_opt(const struct option *opt,
2696 			 const char *arg __maybe_unused,
2697 			 int unset __maybe_unused)
2698 {
2699 	struct callchain_param *callchain = opt->value;
2700 
2701 	callchain->enabled = true;
2702 
2703 	if (callchain->record_mode == CALLCHAIN_NONE)
2704 		callchain->record_mode = CALLCHAIN_FP;
2705 
2706 	callchain_debug(callchain);
2707 	return 0;
2708 }
2709 
2710 static int perf_record_config(const char *var, const char *value, void *cb)
2711 {
2712 	struct record *rec = cb;
2713 
2714 	if (!strcmp(var, "record.build-id")) {
2715 		if (!strcmp(value, "cache"))
2716 			rec->no_buildid_cache = false;
2717 		else if (!strcmp(value, "no-cache"))
2718 			rec->no_buildid_cache = true;
2719 		else if (!strcmp(value, "skip"))
2720 			rec->no_buildid = true;
2721 		else if (!strcmp(value, "mmap"))
2722 			rec->buildid_mmap = true;
2723 		else
2724 			return -1;
2725 		return 0;
2726 	}
2727 	if (!strcmp(var, "record.call-graph")) {
2728 		var = "call-graph.record-mode";
2729 		return perf_default_config(var, value, cb);
2730 	}
2731 #ifdef HAVE_AIO_SUPPORT
2732 	if (!strcmp(var, "record.aio")) {
2733 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
2734 		if (!rec->opts.nr_cblocks)
2735 			rec->opts.nr_cblocks = nr_cblocks_default;
2736 	}
2737 #endif
2738 	if (!strcmp(var, "record.debuginfod")) {
2739 		rec->debuginfod.urls = strdup(value);
2740 		if (!rec->debuginfod.urls)
2741 			return -ENOMEM;
2742 		rec->debuginfod.set = true;
2743 	}
2744 
2745 	return 0;
2746 }
2747 
2748 
2749 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2750 {
2751 	struct record_opts *opts = (struct record_opts *)opt->value;
2752 
2753 	if (unset || !str)
2754 		return 0;
2755 
2756 	if (!strcasecmp(str, "node"))
2757 		opts->affinity = PERF_AFFINITY_NODE;
2758 	else if (!strcasecmp(str, "cpu"))
2759 		opts->affinity = PERF_AFFINITY_CPU;
2760 
2761 	return 0;
2762 }
2763 
2764 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2765 {
2766 	mask->nbits = nr_bits;
2767 	mask->bits = bitmap_zalloc(mask->nbits);
2768 	if (!mask->bits)
2769 		return -ENOMEM;
2770 
2771 	return 0;
2772 }
2773 
2774 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2775 {
2776 	bitmap_free(mask->bits);
2777 	mask->nbits = 0;
2778 }
2779 
2780 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2781 {
2782 	int ret;
2783 
2784 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2785 	if (ret) {
2786 		mask->affinity.bits = NULL;
2787 		return ret;
2788 	}
2789 
2790 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2791 	if (ret) {
2792 		record__mmap_cpu_mask_free(&mask->maps);
2793 		mask->maps.bits = NULL;
2794 	}
2795 
2796 	return ret;
2797 }
2798 
2799 static void record__thread_mask_free(struct thread_mask *mask)
2800 {
2801 	record__mmap_cpu_mask_free(&mask->maps);
2802 	record__mmap_cpu_mask_free(&mask->affinity);
2803 }
2804 
2805 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2806 {
2807 	int s;
2808 	struct record_opts *opts = opt->value;
2809 
2810 	if (unset || !str || !strlen(str)) {
2811 		opts->threads_spec = THREAD_SPEC__CPU;
2812 	} else {
2813 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
2814 			if (s == THREAD_SPEC__USER) {
2815 				opts->threads_user_spec = strdup(str);
2816 				if (!opts->threads_user_spec)
2817 					return -ENOMEM;
2818 				opts->threads_spec = THREAD_SPEC__USER;
2819 				break;
2820 			}
2821 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2822 				opts->threads_spec = s;
2823 				break;
2824 			}
2825 		}
2826 	}
2827 
2828 	if (opts->threads_spec == THREAD_SPEC__USER)
2829 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2830 	else
2831 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2832 
2833 	return 0;
2834 }
2835 
2836 static int parse_output_max_size(const struct option *opt,
2837 				 const char *str, int unset)
2838 {
2839 	unsigned long *s = (unsigned long *)opt->value;
2840 	static struct parse_tag tags_size[] = {
2841 		{ .tag  = 'B', .mult = 1       },
2842 		{ .tag  = 'K', .mult = 1 << 10 },
2843 		{ .tag  = 'M', .mult = 1 << 20 },
2844 		{ .tag  = 'G', .mult = 1 << 30 },
2845 		{ .tag  = 0 },
2846 	};
2847 	unsigned long val;
2848 
2849 	if (unset) {
2850 		*s = 0;
2851 		return 0;
2852 	}
2853 
2854 	val = parse_tag_value(str, tags_size);
2855 	if (val != (unsigned long) -1) {
2856 		*s = val;
2857 		return 0;
2858 	}
2859 
2860 	return -1;
2861 }
2862 
2863 static int record__parse_mmap_pages(const struct option *opt,
2864 				    const char *str,
2865 				    int unset __maybe_unused)
2866 {
2867 	struct record_opts *opts = opt->value;
2868 	char *s, *p;
2869 	unsigned int mmap_pages;
2870 	int ret;
2871 
2872 	if (!str)
2873 		return -EINVAL;
2874 
2875 	s = strdup(str);
2876 	if (!s)
2877 		return -ENOMEM;
2878 
2879 	p = strchr(s, ',');
2880 	if (p)
2881 		*p = '\0';
2882 
2883 	if (*s) {
2884 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2885 		if (ret)
2886 			goto out_free;
2887 		opts->mmap_pages = mmap_pages;
2888 	}
2889 
2890 	if (!p) {
2891 		ret = 0;
2892 		goto out_free;
2893 	}
2894 
2895 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2896 	if (ret)
2897 		goto out_free;
2898 
2899 	opts->auxtrace_mmap_pages = mmap_pages;
2900 
2901 out_free:
2902 	free(s);
2903 	return ret;
2904 }
2905 
2906 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2907 {
2908 }
2909 
2910 static int parse_control_option(const struct option *opt,
2911 				const char *str,
2912 				int unset __maybe_unused)
2913 {
2914 	struct record_opts *opts = opt->value;
2915 
2916 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2917 }
2918 
2919 static void switch_output_size_warn(struct record *rec)
2920 {
2921 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2922 	struct switch_output *s = &rec->switch_output;
2923 
2924 	wakeup_size /= 2;
2925 
2926 	if (s->size < wakeup_size) {
2927 		char buf[100];
2928 
2929 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2930 		pr_warning("WARNING: switch-output data size lower than "
2931 			   "wakeup kernel buffer size (%s) "
2932 			   "expect bigger perf.data sizes\n", buf);
2933 	}
2934 }
2935 
2936 static int switch_output_setup(struct record *rec)
2937 {
2938 	struct switch_output *s = &rec->switch_output;
2939 	static struct parse_tag tags_size[] = {
2940 		{ .tag  = 'B', .mult = 1       },
2941 		{ .tag  = 'K', .mult = 1 << 10 },
2942 		{ .tag  = 'M', .mult = 1 << 20 },
2943 		{ .tag  = 'G', .mult = 1 << 30 },
2944 		{ .tag  = 0 },
2945 	};
2946 	static struct parse_tag tags_time[] = {
2947 		{ .tag  = 's', .mult = 1        },
2948 		{ .tag  = 'm', .mult = 60       },
2949 		{ .tag  = 'h', .mult = 60*60    },
2950 		{ .tag  = 'd', .mult = 60*60*24 },
2951 		{ .tag  = 0 },
2952 	};
2953 	unsigned long val;
2954 
2955 	/*
2956 	 * If we're using --switch-output-events, then we imply its
2957 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2958 	 *  thread to its parent.
2959 	 */
2960 	if (rec->switch_output_event_set) {
2961 		if (record__threads_enabled(rec)) {
2962 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2963 			return 0;
2964 		}
2965 		goto do_signal;
2966 	}
2967 
2968 	if (!s->set)
2969 		return 0;
2970 
2971 	if (record__threads_enabled(rec)) {
2972 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2973 		return 0;
2974 	}
2975 
2976 	if (!strcmp(s->str, "signal")) {
2977 do_signal:
2978 		s->signal = true;
2979 		pr_debug("switch-output with SIGUSR2 signal\n");
2980 		goto enabled;
2981 	}
2982 
2983 	val = parse_tag_value(s->str, tags_size);
2984 	if (val != (unsigned long) -1) {
2985 		s->size = val;
2986 		pr_debug("switch-output with %s size threshold\n", s->str);
2987 		goto enabled;
2988 	}
2989 
2990 	val = parse_tag_value(s->str, tags_time);
2991 	if (val != (unsigned long) -1) {
2992 		s->time = val;
2993 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2994 			 s->str, s->time);
2995 		goto enabled;
2996 	}
2997 
2998 	return -1;
2999 
3000 enabled:
3001 	rec->timestamp_filename = true;
3002 	s->enabled              = true;
3003 
3004 	if (s->size && !rec->opts.no_buffering)
3005 		switch_output_size_warn(rec);
3006 
3007 	return 0;
3008 }
3009 
3010 static const char * const __record_usage[] = {
3011 	"perf record [<options>] [<command>]",
3012 	"perf record [<options>] -- <command> [<options>]",
3013 	NULL
3014 };
3015 const char * const *record_usage = __record_usage;
3016 
3017 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3018 				  struct perf_sample *sample, struct machine *machine)
3019 {
3020 	/*
3021 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3022 	 * no need to add them twice.
3023 	 */
3024 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3025 		return 0;
3026 	return perf_event__process_mmap(tool, event, sample, machine);
3027 }
3028 
3029 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3030 				   struct perf_sample *sample, struct machine *machine)
3031 {
3032 	/*
3033 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3034 	 * no need to add them twice.
3035 	 */
3036 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3037 		return 0;
3038 
3039 	return perf_event__process_mmap2(tool, event, sample, machine);
3040 }
3041 
3042 static int process_timestamp_boundary(struct perf_tool *tool,
3043 				      union perf_event *event __maybe_unused,
3044 				      struct perf_sample *sample,
3045 				      struct machine *machine __maybe_unused)
3046 {
3047 	struct record *rec = container_of(tool, struct record, tool);
3048 
3049 	set_timestamp_boundary(rec, sample->time);
3050 	return 0;
3051 }
3052 
3053 static int parse_record_synth_option(const struct option *opt,
3054 				     const char *str,
3055 				     int unset __maybe_unused)
3056 {
3057 	struct record_opts *opts = opt->value;
3058 	char *p = strdup(str);
3059 
3060 	if (p == NULL)
3061 		return -1;
3062 
3063 	opts->synth = parse_synth_opt(p);
3064 	free(p);
3065 
3066 	if (opts->synth < 0) {
3067 		pr_err("Invalid synth option: %s\n", str);
3068 		return -1;
3069 	}
3070 	return 0;
3071 }
3072 
3073 /*
3074  * XXX Ideally would be local to cmd_record() and passed to a record__new
3075  * because we need to have access to it in record__exit, that is called
3076  * after cmd_record() exits, but since record_options need to be accessible to
3077  * builtin-script, leave it here.
3078  *
3079  * At least we don't ouch it in all the other functions here directly.
3080  *
3081  * Just say no to tons of global variables, sigh.
3082  */
3083 static struct record record = {
3084 	.opts = {
3085 		.sample_time	     = true,
3086 		.mmap_pages	     = UINT_MAX,
3087 		.user_freq	     = UINT_MAX,
3088 		.user_interval	     = ULLONG_MAX,
3089 		.freq		     = 4000,
3090 		.target		     = {
3091 			.uses_mmap   = true,
3092 			.default_per_cpu = true,
3093 		},
3094 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3095 		.nr_threads_synthesize = 1,
3096 		.ctl_fd              = -1,
3097 		.ctl_fd_ack          = -1,
3098 		.synth               = PERF_SYNTH_ALL,
3099 	},
3100 	.tool = {
3101 		.sample		= process_sample_event,
3102 		.fork		= perf_event__process_fork,
3103 		.exit		= perf_event__process_exit,
3104 		.comm		= perf_event__process_comm,
3105 		.namespaces	= perf_event__process_namespaces,
3106 		.mmap		= build_id__process_mmap,
3107 		.mmap2		= build_id__process_mmap2,
3108 		.itrace_start	= process_timestamp_boundary,
3109 		.aux		= process_timestamp_boundary,
3110 		.ordered_events	= true,
3111 	},
3112 };
3113 
3114 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3115 	"\n\t\t\t\tDefault: fp";
3116 
3117 static bool dry_run;
3118 
3119 /*
3120  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3121  * with it and switch to use the library functions in perf_evlist that came
3122  * from builtin-record.c, i.e. use record_opts,
3123  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3124  * using pipes, etc.
3125  */
3126 static struct option __record_options[] = {
3127 	OPT_CALLBACK('e', "event", &record.evlist, "event",
3128 		     "event selector. use 'perf list' to list available events",
3129 		     parse_events_option),
3130 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3131 		     "event filter", parse_filter),
3132 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3133 			   NULL, "don't record events from perf itself",
3134 			   exclude_perf),
3135 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3136 		    "record events on existing process id"),
3137 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3138 		    "record events on existing thread id"),
3139 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3140 		    "collect data with this RT SCHED_FIFO priority"),
3141 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3142 		    "collect data without buffering"),
3143 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3144 		    "collect raw sample records from all opened counters"),
3145 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3146 			    "system-wide collection from all CPUs"),
3147 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3148 		    "list of cpus to monitor"),
3149 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3150 	OPT_STRING('o', "output", &record.data.path, "file",
3151 		    "output file name"),
3152 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3153 			&record.opts.no_inherit_set,
3154 			"child tasks do not inherit counters"),
3155 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3156 		    "synthesize non-sample events at the end of output"),
3157 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3158 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3159 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3160 		    "Fail if the specified frequency can't be used"),
3161 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3162 		     "profile at this frequency",
3163 		      record__parse_freq),
3164 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3165 		     "number of mmap data pages and AUX area tracing mmap pages",
3166 		     record__parse_mmap_pages),
3167 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3168 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3169 		     record__mmap_flush_parse),
3170 	OPT_BOOLEAN(0, "group", &record.opts.group,
3171 		    "put the counters into a counter group"),
3172 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3173 			   NULL, "enables call-graph recording" ,
3174 			   &record_callchain_opt),
3175 	OPT_CALLBACK(0, "call-graph", &record.opts,
3176 		     "record_mode[,record_size]", record_callchain_help,
3177 		     &record_parse_callchain_opt),
3178 	OPT_INCR('v', "verbose", &verbose,
3179 		    "be more verbose (show counter open errors, etc)"),
3180 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3181 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3182 		    "per thread counts"),
3183 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3184 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3185 		    "Record the sample physical addresses"),
3186 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3187 		    "Record the sampled data address data page size"),
3188 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3189 		    "Record the sampled code address (ip) page size"),
3190 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3191 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3192 			&record.opts.sample_time_set,
3193 			"Record the sample timestamps"),
3194 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3195 			"Record the sample period"),
3196 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3197 		    "don't sample"),
3198 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3199 			&record.no_buildid_cache_set,
3200 			"do not update the buildid cache"),
3201 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3202 			&record.no_buildid_set,
3203 			"do not collect buildids in perf.data"),
3204 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3205 		     "monitor event in cgroup name only",
3206 		     parse_cgroups),
3207 	OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3208 		  "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3209 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3210 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3211 		   "user to profile"),
3212 
3213 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3214 		     "branch any", "sample any taken branches",
3215 		     parse_branch_stack),
3216 
3217 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3218 		     "branch filter mask", "branch stack filter modes",
3219 		     parse_branch_stack),
3220 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3221 		    "sample by weight (on special events only)"),
3222 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3223 		    "sample transaction flags (special events only)"),
3224 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3225 		    "use per-thread mmaps"),
3226 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3227 		    "sample selected machine registers on interrupt,"
3228 		    " use '-I?' to list register names", parse_intr_regs),
3229 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3230 		    "sample selected machine registers on interrupt,"
3231 		    " use '--user-regs=?' to list register names", parse_user_regs),
3232 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3233 		    "Record running/enabled time of read (:S) events"),
3234 	OPT_CALLBACK('k', "clockid", &record.opts,
3235 	"clockid", "clockid to use for events, see clock_gettime()",
3236 	parse_clockid),
3237 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3238 			  "opts", "AUX area tracing Snapshot Mode", ""),
3239 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3240 			  "opts", "sample AUX area", ""),
3241 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3242 			"per thread proc mmap processing timeout in ms"),
3243 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3244 		    "Record namespaces events"),
3245 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3246 		    "Record cgroup events"),
3247 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3248 			&record.opts.record_switch_events_set,
3249 			"Record context switch events"),
3250 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3251 			 "Configure all used events to run in kernel space.",
3252 			 PARSE_OPT_EXCLUSIVE),
3253 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3254 			 "Configure all used events to run in user space.",
3255 			 PARSE_OPT_EXCLUSIVE),
3256 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3257 		    "collect kernel callchains"),
3258 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3259 		    "collect user callchains"),
3260 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3261 		   "clang binary to use for compiling BPF scriptlets"),
3262 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3263 		   "options passed to clang when compiling BPF scriptlets"),
3264 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3265 		   "file", "vmlinux pathname"),
3266 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3267 		    "Record build-id of all DSOs regardless of hits"),
3268 	OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3269 		    "Record build-id in map events"),
3270 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3271 		    "append timestamp to output filename"),
3272 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3273 		    "Record timestamp boundary (time of first/last samples)"),
3274 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3275 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3276 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3277 			  "signal"),
3278 	OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3279 			 "switch output event selector. use 'perf list' to list available events",
3280 			 parse_events_option_new_evlist),
3281 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3282 		   "Limit number of switch output generated files"),
3283 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3284 		    "Parse options then exit"),
3285 #ifdef HAVE_AIO_SUPPORT
3286 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3287 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3288 		     record__aio_parse),
3289 #endif
3290 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3291 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3292 		     record__parse_affinity),
3293 #ifdef HAVE_ZSTD_SUPPORT
3294 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3295 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3296 			    record__parse_comp_level),
3297 #endif
3298 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3299 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3300 	OPT_UINTEGER(0, "num-thread-synthesize",
3301 		     &record.opts.nr_threads_synthesize,
3302 		     "number of threads to run for event synthesis"),
3303 #ifdef HAVE_LIBPFM
3304 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3305 		"libpfm4 event selector. use 'perf list' to list available events",
3306 		parse_libpfm_events_option),
3307 #endif
3308 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3309 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3310 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3311 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3312 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3313 		      parse_control_option),
3314 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3315 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3316 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3317 			  &record.debuginfod.set, "debuginfod urls",
3318 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3319 			  "system"),
3320 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3321 			    "write collected trace data into several data files using parallel threads",
3322 			    record__parse_threads),
3323 	OPT_END()
3324 };
3325 
3326 struct option *record_options = __record_options;
3327 
3328 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3329 {
3330 	int c;
3331 
3332 	for (c = 0; c < cpus->nr; c++)
3333 		set_bit(cpus->map[c].cpu, mask->bits);
3334 }
3335 
3336 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3337 {
3338 	struct perf_cpu_map *cpus;
3339 
3340 	cpus = perf_cpu_map__new(mask_spec);
3341 	if (!cpus)
3342 		return -ENOMEM;
3343 
3344 	bitmap_zero(mask->bits, mask->nbits);
3345 	record__mmap_cpu_mask_init(mask, cpus);
3346 	perf_cpu_map__put(cpus);
3347 
3348 	return 0;
3349 }
3350 
3351 static void record__free_thread_masks(struct record *rec, int nr_threads)
3352 {
3353 	int t;
3354 
3355 	if (rec->thread_masks)
3356 		for (t = 0; t < nr_threads; t++)
3357 			record__thread_mask_free(&rec->thread_masks[t]);
3358 
3359 	zfree(&rec->thread_masks);
3360 }
3361 
3362 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3363 {
3364 	int t, ret;
3365 
3366 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3367 	if (!rec->thread_masks) {
3368 		pr_err("Failed to allocate thread masks\n");
3369 		return -ENOMEM;
3370 	}
3371 
3372 	for (t = 0; t < nr_threads; t++) {
3373 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3374 		if (ret) {
3375 			pr_err("Failed to allocate thread masks[%d]\n", t);
3376 			goto out_free;
3377 		}
3378 	}
3379 
3380 	return 0;
3381 
3382 out_free:
3383 	record__free_thread_masks(rec, nr_threads);
3384 
3385 	return ret;
3386 }
3387 
3388 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3389 {
3390 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3391 
3392 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3393 	if (ret)
3394 		return ret;
3395 
3396 	rec->nr_threads = nr_cpus;
3397 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3398 
3399 	for (t = 0; t < rec->nr_threads; t++) {
3400 		set_bit(cpus->map[t].cpu, rec->thread_masks[t].maps.bits);
3401 		set_bit(cpus->map[t].cpu, rec->thread_masks[t].affinity.bits);
3402 		if (verbose) {
3403 			pr_debug("thread_masks[%d]: ", t);
3404 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3405 			pr_debug("thread_masks[%d]: ", t);
3406 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3407 		}
3408 	}
3409 
3410 	return 0;
3411 }
3412 
3413 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3414 					  const char **maps_spec, const char **affinity_spec,
3415 					  u32 nr_spec)
3416 {
3417 	u32 s;
3418 	int ret = 0, t = 0;
3419 	struct mmap_cpu_mask cpus_mask;
3420 	struct thread_mask thread_mask, full_mask, *thread_masks;
3421 
3422 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3423 	if (ret) {
3424 		pr_err("Failed to allocate CPUs mask\n");
3425 		return ret;
3426 	}
3427 	record__mmap_cpu_mask_init(&cpus_mask, cpus);
3428 
3429 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3430 	if (ret) {
3431 		pr_err("Failed to allocate full mask\n");
3432 		goto out_free_cpu_mask;
3433 	}
3434 
3435 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3436 	if (ret) {
3437 		pr_err("Failed to allocate thread mask\n");
3438 		goto out_free_full_and_cpu_masks;
3439 	}
3440 
3441 	for (s = 0; s < nr_spec; s++) {
3442 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3443 		if (ret) {
3444 			pr_err("Failed to initialize maps thread mask\n");
3445 			goto out_free;
3446 		}
3447 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3448 		if (ret) {
3449 			pr_err("Failed to initialize affinity thread mask\n");
3450 			goto out_free;
3451 		}
3452 
3453 		/* ignore invalid CPUs but do not allow empty masks */
3454 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3455 				cpus_mask.bits, thread_mask.maps.nbits)) {
3456 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3457 			ret = -EINVAL;
3458 			goto out_free;
3459 		}
3460 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3461 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3462 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3463 			ret = -EINVAL;
3464 			goto out_free;
3465 		}
3466 
3467 		/* do not allow intersection with other masks (full_mask) */
3468 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3469 				      thread_mask.maps.nbits)) {
3470 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3471 			ret = -EINVAL;
3472 			goto out_free;
3473 		}
3474 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3475 				      thread_mask.affinity.nbits)) {
3476 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3477 			ret = -EINVAL;
3478 			goto out_free;
3479 		}
3480 
3481 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3482 			  thread_mask.maps.bits, full_mask.maps.nbits);
3483 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3484 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3485 
3486 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3487 		if (!thread_masks) {
3488 			pr_err("Failed to reallocate thread masks\n");
3489 			ret = -ENOMEM;
3490 			goto out_free;
3491 		}
3492 		rec->thread_masks = thread_masks;
3493 		rec->thread_masks[t] = thread_mask;
3494 		if (verbose) {
3495 			pr_debug("thread_masks[%d]: ", t);
3496 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3497 			pr_debug("thread_masks[%d]: ", t);
3498 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3499 		}
3500 		t++;
3501 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3502 		if (ret) {
3503 			pr_err("Failed to allocate thread mask\n");
3504 			goto out_free_full_and_cpu_masks;
3505 		}
3506 	}
3507 	rec->nr_threads = t;
3508 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3509 	if (!rec->nr_threads)
3510 		ret = -EINVAL;
3511 
3512 out_free:
3513 	record__thread_mask_free(&thread_mask);
3514 out_free_full_and_cpu_masks:
3515 	record__thread_mask_free(&full_mask);
3516 out_free_cpu_mask:
3517 	record__mmap_cpu_mask_free(&cpus_mask);
3518 
3519 	return ret;
3520 }
3521 
3522 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3523 {
3524 	int ret;
3525 	struct cpu_topology *topo;
3526 
3527 	topo = cpu_topology__new();
3528 	if (!topo) {
3529 		pr_err("Failed to allocate CPU topology\n");
3530 		return -ENOMEM;
3531 	}
3532 
3533 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3534 					     topo->core_cpus_list, topo->core_cpus_lists);
3535 	cpu_topology__delete(topo);
3536 
3537 	return ret;
3538 }
3539 
3540 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3541 {
3542 	int ret;
3543 	struct cpu_topology *topo;
3544 
3545 	topo = cpu_topology__new();
3546 	if (!topo) {
3547 		pr_err("Failed to allocate CPU topology\n");
3548 		return -ENOMEM;
3549 	}
3550 
3551 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3552 					     topo->package_cpus_list, topo->package_cpus_lists);
3553 	cpu_topology__delete(topo);
3554 
3555 	return ret;
3556 }
3557 
3558 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3559 {
3560 	u32 s;
3561 	int ret;
3562 	const char **spec;
3563 	struct numa_topology *topo;
3564 
3565 	topo = numa_topology__new();
3566 	if (!topo) {
3567 		pr_err("Failed to allocate NUMA topology\n");
3568 		return -ENOMEM;
3569 	}
3570 
3571 	spec = zalloc(topo->nr * sizeof(char *));
3572 	if (!spec) {
3573 		pr_err("Failed to allocate NUMA spec\n");
3574 		ret = -ENOMEM;
3575 		goto out_delete_topo;
3576 	}
3577 	for (s = 0; s < topo->nr; s++)
3578 		spec[s] = topo->nodes[s].cpus;
3579 
3580 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3581 
3582 	zfree(&spec);
3583 
3584 out_delete_topo:
3585 	numa_topology__delete(topo);
3586 
3587 	return ret;
3588 }
3589 
3590 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3591 {
3592 	int t, ret;
3593 	u32 s, nr_spec = 0;
3594 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3595 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3596 
3597 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3598 		spec = strtok_r(user_spec, ":", &spec_ptr);
3599 		if (spec == NULL)
3600 			break;
3601 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3602 		mask = strtok_r(spec, "/", &mask_ptr);
3603 		if (mask == NULL)
3604 			break;
3605 		pr_debug2("  maps mask: %s\n", mask);
3606 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3607 		if (!tmp_spec) {
3608 			pr_err("Failed to reallocate maps spec\n");
3609 			ret = -ENOMEM;
3610 			goto out_free;
3611 		}
3612 		maps_spec = tmp_spec;
3613 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3614 		if (!maps_spec[nr_spec]) {
3615 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3616 			ret = -ENOMEM;
3617 			goto out_free;
3618 		}
3619 		mask = strtok_r(NULL, "/", &mask_ptr);
3620 		if (mask == NULL) {
3621 			pr_err("Invalid thread maps or affinity specs\n");
3622 			ret = -EINVAL;
3623 			goto out_free;
3624 		}
3625 		pr_debug2("  affinity mask: %s\n", mask);
3626 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3627 		if (!tmp_spec) {
3628 			pr_err("Failed to reallocate affinity spec\n");
3629 			ret = -ENOMEM;
3630 			goto out_free;
3631 		}
3632 		affinity_spec = tmp_spec;
3633 		affinity_spec[nr_spec] = strdup(mask);
3634 		if (!affinity_spec[nr_spec]) {
3635 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3636 			ret = -ENOMEM;
3637 			goto out_free;
3638 		}
3639 		dup_mask = NULL;
3640 		nr_spec++;
3641 	}
3642 
3643 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3644 					     (const char **)affinity_spec, nr_spec);
3645 
3646 out_free:
3647 	free(dup_mask);
3648 	for (s = 0; s < nr_spec; s++) {
3649 		if (maps_spec)
3650 			free(maps_spec[s]);
3651 		if (affinity_spec)
3652 			free(affinity_spec[s]);
3653 	}
3654 	free(affinity_spec);
3655 	free(maps_spec);
3656 
3657 	return ret;
3658 }
3659 
3660 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3661 {
3662 	int ret;
3663 
3664 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3665 	if (ret)
3666 		return ret;
3667 
3668 	record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3669 
3670 	rec->nr_threads = 1;
3671 
3672 	return 0;
3673 }
3674 
3675 static int record__init_thread_masks(struct record *rec)
3676 {
3677 	int ret = 0;
3678 	struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
3679 
3680 	if (!record__threads_enabled(rec))
3681 		return record__init_thread_default_masks(rec, cpus);
3682 
3683 	switch (rec->opts.threads_spec) {
3684 	case THREAD_SPEC__CPU:
3685 		ret = record__init_thread_cpu_masks(rec, cpus);
3686 		break;
3687 	case THREAD_SPEC__CORE:
3688 		ret = record__init_thread_core_masks(rec, cpus);
3689 		break;
3690 	case THREAD_SPEC__PACKAGE:
3691 		ret = record__init_thread_package_masks(rec, cpus);
3692 		break;
3693 	case THREAD_SPEC__NUMA:
3694 		ret = record__init_thread_numa_masks(rec, cpus);
3695 		break;
3696 	case THREAD_SPEC__USER:
3697 		ret = record__init_thread_user_masks(rec, cpus);
3698 		break;
3699 	default:
3700 		break;
3701 	}
3702 
3703 	return ret;
3704 }
3705 
3706 int cmd_record(int argc, const char **argv)
3707 {
3708 	int err;
3709 	struct record *rec = &record;
3710 	char errbuf[BUFSIZ];
3711 
3712 	setlocale(LC_ALL, "");
3713 
3714 #ifndef HAVE_LIBBPF_SUPPORT
3715 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3716 	set_nobuild('\0', "clang-path", true);
3717 	set_nobuild('\0', "clang-opt", true);
3718 # undef set_nobuild
3719 #endif
3720 
3721 #ifndef HAVE_BPF_PROLOGUE
3722 # if !defined (HAVE_DWARF_SUPPORT)
3723 #  define REASON  "NO_DWARF=1"
3724 # elif !defined (HAVE_LIBBPF_SUPPORT)
3725 #  define REASON  "NO_LIBBPF=1"
3726 # else
3727 #  define REASON  "this architecture doesn't support BPF prologue"
3728 # endif
3729 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3730 	set_nobuild('\0', "vmlinux", true);
3731 # undef set_nobuild
3732 # undef REASON
3733 #endif
3734 
3735 	rec->opts.affinity = PERF_AFFINITY_SYS;
3736 
3737 	rec->evlist = evlist__new();
3738 	if (rec->evlist == NULL)
3739 		return -ENOMEM;
3740 
3741 	err = perf_config(perf_record_config, rec);
3742 	if (err)
3743 		return err;
3744 
3745 	argc = parse_options(argc, argv, record_options, record_usage,
3746 			    PARSE_OPT_STOP_AT_NON_OPTION);
3747 	if (quiet)
3748 		perf_quiet_option();
3749 
3750 	err = symbol__validate_sym_arguments();
3751 	if (err)
3752 		return err;
3753 
3754 	perf_debuginfod_setup(&record.debuginfod);
3755 
3756 	/* Make system wide (-a) the default target. */
3757 	if (!argc && target__none(&rec->opts.target))
3758 		rec->opts.target.system_wide = true;
3759 
3760 	if (nr_cgroups && !rec->opts.target.system_wide) {
3761 		usage_with_options_msg(record_usage, record_options,
3762 			"cgroup monitoring only available in system-wide mode");
3763 
3764 	}
3765 
3766 	if (rec->buildid_mmap) {
3767 		if (!perf_can_record_build_id()) {
3768 			pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3769 			err = -EINVAL;
3770 			goto out_opts;
3771 		}
3772 		pr_debug("Enabling build id in mmap2 events.\n");
3773 		/* Enable mmap build id synthesizing. */
3774 		symbol_conf.buildid_mmap2 = true;
3775 		/* Enable perf_event_attr::build_id bit. */
3776 		rec->opts.build_id = true;
3777 		/* Disable build id cache. */
3778 		rec->no_buildid = true;
3779 	}
3780 
3781 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3782 		pr_err("Kernel has no cgroup sampling support.\n");
3783 		err = -EINVAL;
3784 		goto out_opts;
3785 	}
3786 
3787 	if (rec->opts.kcore || record__threads_enabled(rec))
3788 		rec->data.is_dir = true;
3789 
3790 	if (record__threads_enabled(rec)) {
3791 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3792 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3793 			goto out_opts;
3794 		}
3795 		if (record__aio_enabled(rec)) {
3796 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3797 			goto out_opts;
3798 		}
3799 	}
3800 
3801 	if (rec->opts.comp_level != 0) {
3802 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3803 		rec->no_buildid = true;
3804 	}
3805 
3806 	if (rec->opts.record_switch_events &&
3807 	    !perf_can_record_switch_events()) {
3808 		ui__error("kernel does not support recording context switch events\n");
3809 		parse_options_usage(record_usage, record_options, "switch-events", 0);
3810 		err = -EINVAL;
3811 		goto out_opts;
3812 	}
3813 
3814 	if (switch_output_setup(rec)) {
3815 		parse_options_usage(record_usage, record_options, "switch-output", 0);
3816 		err = -EINVAL;
3817 		goto out_opts;
3818 	}
3819 
3820 	if (rec->switch_output.time) {
3821 		signal(SIGALRM, alarm_sig_handler);
3822 		alarm(rec->switch_output.time);
3823 	}
3824 
3825 	if (rec->switch_output.num_files) {
3826 		rec->switch_output.filenames = calloc(sizeof(char *),
3827 						      rec->switch_output.num_files);
3828 		if (!rec->switch_output.filenames) {
3829 			err = -EINVAL;
3830 			goto out_opts;
3831 		}
3832 	}
3833 
3834 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
3835 		rec->timestamp_filename = false;
3836 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3837 	}
3838 
3839 	/*
3840 	 * Allow aliases to facilitate the lookup of symbols for address
3841 	 * filters. Refer to auxtrace_parse_filters().
3842 	 */
3843 	symbol_conf.allow_aliases = true;
3844 
3845 	symbol__init(NULL);
3846 
3847 	err = record__auxtrace_init(rec);
3848 	if (err)
3849 		goto out;
3850 
3851 	if (dry_run)
3852 		goto out;
3853 
3854 	err = bpf__setup_stdout(rec->evlist);
3855 	if (err) {
3856 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3857 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
3858 			 errbuf);
3859 		goto out;
3860 	}
3861 
3862 	err = -ENOMEM;
3863 
3864 	if (rec->no_buildid_cache || rec->no_buildid) {
3865 		disable_buildid_cache();
3866 	} else if (rec->switch_output.enabled) {
3867 		/*
3868 		 * In 'perf record --switch-output', disable buildid
3869 		 * generation by default to reduce data file switching
3870 		 * overhead. Still generate buildid if they are required
3871 		 * explicitly using
3872 		 *
3873 		 *  perf record --switch-output --no-no-buildid \
3874 		 *              --no-no-buildid-cache
3875 		 *
3876 		 * Following code equals to:
3877 		 *
3878 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
3879 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3880 		 *         disable_buildid_cache();
3881 		 */
3882 		bool disable = true;
3883 
3884 		if (rec->no_buildid_set && !rec->no_buildid)
3885 			disable = false;
3886 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3887 			disable = false;
3888 		if (disable) {
3889 			rec->no_buildid = true;
3890 			rec->no_buildid_cache = true;
3891 			disable_buildid_cache();
3892 		}
3893 	}
3894 
3895 	if (record.opts.overwrite)
3896 		record.opts.tail_synthesize = true;
3897 
3898 	if (rec->evlist->core.nr_entries == 0) {
3899 		if (perf_pmu__has_hybrid()) {
3900 			err = evlist__add_default_hybrid(rec->evlist,
3901 							 !record.opts.no_samples);
3902 		} else {
3903 			err = __evlist__add_default(rec->evlist,
3904 						    !record.opts.no_samples);
3905 		}
3906 
3907 		if (err < 0) {
3908 			pr_err("Not enough memory for event selector list\n");
3909 			goto out;
3910 		}
3911 	}
3912 
3913 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3914 		rec->opts.no_inherit = true;
3915 
3916 	err = target__validate(&rec->opts.target);
3917 	if (err) {
3918 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3919 		ui__warning("%s\n", errbuf);
3920 	}
3921 
3922 	err = target__parse_uid(&rec->opts.target);
3923 	if (err) {
3924 		int saved_errno = errno;
3925 
3926 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3927 		ui__error("%s", errbuf);
3928 
3929 		err = -saved_errno;
3930 		goto out;
3931 	}
3932 
3933 	/* Enable ignoring missing threads when -u/-p option is defined. */
3934 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
3935 
3936 	if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3937 		pr_err("failed to use cpu list %s\n",
3938 		       rec->opts.target.cpu_list);
3939 		goto out;
3940 	}
3941 
3942 	rec->opts.target.hybrid = perf_pmu__has_hybrid();
3943 
3944 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3945 		arch__add_leaf_frame_record_opts(&rec->opts);
3946 
3947 	err = -ENOMEM;
3948 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
3949 		usage_with_options(record_usage, record_options);
3950 
3951 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3952 	if (err)
3953 		goto out;
3954 
3955 	/*
3956 	 * We take all buildids when the file contains
3957 	 * AUX area tracing data because we do not decode the
3958 	 * trace because it would take too long.
3959 	 */
3960 	if (rec->opts.full_auxtrace)
3961 		rec->buildid_all = true;
3962 
3963 	if (rec->opts.text_poke) {
3964 		err = record__config_text_poke(rec->evlist);
3965 		if (err) {
3966 			pr_err("record__config_text_poke failed, error %d\n", err);
3967 			goto out;
3968 		}
3969 	}
3970 
3971 	if (record_opts__config(&rec->opts)) {
3972 		err = -EINVAL;
3973 		goto out;
3974 	}
3975 
3976 	err = record__init_thread_masks(rec);
3977 	if (err) {
3978 		pr_err("Failed to initialize parallel data streaming masks\n");
3979 		goto out;
3980 	}
3981 
3982 	if (rec->opts.nr_cblocks > nr_cblocks_max)
3983 		rec->opts.nr_cblocks = nr_cblocks_max;
3984 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
3985 
3986 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
3987 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
3988 
3989 	if (rec->opts.comp_level > comp_level_max)
3990 		rec->opts.comp_level = comp_level_max;
3991 	pr_debug("comp level: %d\n", rec->opts.comp_level);
3992 
3993 	err = __cmd_record(&record, argc, argv);
3994 out:
3995 	evlist__delete(rec->evlist);
3996 	symbol__exit();
3997 	auxtrace_record__free(rec->itr);
3998 out_opts:
3999 	record__free_thread_masks(rec, rec->nr_threads);
4000 	rec->nr_threads = 0;
4001 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4002 	return err;
4003 }
4004 
4005 static void snapshot_sig_handler(int sig __maybe_unused)
4006 {
4007 	struct record *rec = &record;
4008 
4009 	hit_auxtrace_snapshot_trigger(rec);
4010 
4011 	if (switch_output_signal(rec))
4012 		trigger_hit(&switch_output_trigger);
4013 }
4014 
4015 static void alarm_sig_handler(int sig __maybe_unused)
4016 {
4017 	struct record *rec = &record;
4018 
4019 	if (switch_output_time(rec))
4020 		trigger_hit(&switch_output_trigger);
4021 }
4022