xref: /openbmc/linux/tools/perf/builtin-record.c (revision fbb6b31a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/clockid.h"
50 #include "util/pmu-hybrid.h"
51 #include "util/evlist-hybrid.h"
52 #include "asm/bug.h"
53 #include "perf.h"
54 #include "cputopo.h"
55 
56 #include <errno.h>
57 #include <inttypes.h>
58 #include <locale.h>
59 #include <poll.h>
60 #include <pthread.h>
61 #include <unistd.h>
62 #ifndef HAVE_GETTID
63 #include <syscall.h>
64 #endif
65 #include <sched.h>
66 #include <signal.h>
67 #ifdef HAVE_EVENTFD_SUPPORT
68 #include <sys/eventfd.h>
69 #endif
70 #include <sys/mman.h>
71 #include <sys/wait.h>
72 #include <sys/types.h>
73 #include <sys/stat.h>
74 #include <fcntl.h>
75 #include <linux/err.h>
76 #include <linux/string.h>
77 #include <linux/time64.h>
78 #include <linux/zalloc.h>
79 #include <linux/bitmap.h>
80 #include <sys/time.h>
81 
82 struct switch_output {
83 	bool		 enabled;
84 	bool		 signal;
85 	unsigned long	 size;
86 	unsigned long	 time;
87 	const char	*str;
88 	bool		 set;
89 	char		 **filenames;
90 	int		 num_files;
91 	int		 cur_file;
92 };
93 
94 struct thread_mask {
95 	struct mmap_cpu_mask	maps;
96 	struct mmap_cpu_mask	affinity;
97 };
98 
99 struct record_thread {
100 	pid_t			tid;
101 	struct thread_mask	*mask;
102 	struct {
103 		int		msg[2];
104 		int		ack[2];
105 	} pipes;
106 	struct fdarray		pollfd;
107 	int			ctlfd_pos;
108 	int			nr_mmaps;
109 	struct mmap		**maps;
110 	struct mmap		**overwrite_maps;
111 	struct record		*rec;
112 	unsigned long long	samples;
113 	unsigned long		waking;
114 	u64			bytes_written;
115 	u64			bytes_transferred;
116 	u64			bytes_compressed;
117 };
118 
119 static __thread struct record_thread *thread;
120 
121 enum thread_msg {
122 	THREAD_MSG__UNDEFINED = 0,
123 	THREAD_MSG__READY,
124 	THREAD_MSG__MAX,
125 };
126 
127 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
128 	"UNDEFINED", "READY"
129 };
130 
131 enum thread_spec {
132 	THREAD_SPEC__UNDEFINED = 0,
133 	THREAD_SPEC__CPU,
134 	THREAD_SPEC__CORE,
135 	THREAD_SPEC__PACKAGE,
136 	THREAD_SPEC__NUMA,
137 	THREAD_SPEC__USER,
138 	THREAD_SPEC__MAX,
139 };
140 
141 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
142 	"undefined", "cpu", "core", "package", "numa", "user"
143 };
144 
145 struct record {
146 	struct perf_tool	tool;
147 	struct record_opts	opts;
148 	u64			bytes_written;
149 	struct perf_data	data;
150 	struct auxtrace_record	*itr;
151 	struct evlist	*evlist;
152 	struct perf_session	*session;
153 	struct evlist		*sb_evlist;
154 	pthread_t		thread_id;
155 	int			realtime_prio;
156 	bool			switch_output_event_set;
157 	bool			no_buildid;
158 	bool			no_buildid_set;
159 	bool			no_buildid_cache;
160 	bool			no_buildid_cache_set;
161 	bool			buildid_all;
162 	bool			buildid_mmap;
163 	bool			timestamp_filename;
164 	bool			timestamp_boundary;
165 	struct switch_output	switch_output;
166 	unsigned long long	samples;
167 	unsigned long		output_max_size;	/* = 0: unlimited */
168 	struct perf_debuginfod	debuginfod;
169 	int			nr_threads;
170 	struct thread_mask	*thread_masks;
171 	struct record_thread	*thread_data;
172 };
173 
174 static volatile int done;
175 
176 static volatile int auxtrace_record__snapshot_started;
177 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
178 static DEFINE_TRIGGER(switch_output_trigger);
179 
180 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
181 	"SYS", "NODE", "CPU"
182 };
183 
184 #ifndef HAVE_GETTID
185 static inline pid_t gettid(void)
186 {
187 	return (pid_t)syscall(__NR_gettid);
188 }
189 #endif
190 
191 static int record__threads_enabled(struct record *rec)
192 {
193 	return rec->opts.threads_spec;
194 }
195 
196 static bool switch_output_signal(struct record *rec)
197 {
198 	return rec->switch_output.signal &&
199 	       trigger_is_ready(&switch_output_trigger);
200 }
201 
202 static bool switch_output_size(struct record *rec)
203 {
204 	return rec->switch_output.size &&
205 	       trigger_is_ready(&switch_output_trigger) &&
206 	       (rec->bytes_written >= rec->switch_output.size);
207 }
208 
209 static bool switch_output_time(struct record *rec)
210 {
211 	return rec->switch_output.time &&
212 	       trigger_is_ready(&switch_output_trigger);
213 }
214 
215 static u64 record__bytes_written(struct record *rec)
216 {
217 	int t;
218 	u64 bytes_written = rec->bytes_written;
219 	struct record_thread *thread_data = rec->thread_data;
220 
221 	for (t = 0; t < rec->nr_threads; t++)
222 		bytes_written += thread_data[t].bytes_written;
223 
224 	return bytes_written;
225 }
226 
227 static bool record__output_max_size_exceeded(struct record *rec)
228 {
229 	return rec->output_max_size &&
230 	       (record__bytes_written(rec) >= rec->output_max_size);
231 }
232 
233 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
234 			 void *bf, size_t size)
235 {
236 	struct perf_data_file *file = &rec->session->data->file;
237 
238 	if (map && map->file)
239 		file = map->file;
240 
241 	if (perf_data_file__write(file, bf, size) < 0) {
242 		pr_err("failed to write perf data, error: %m\n");
243 		return -1;
244 	}
245 
246 	if (map && map->file)
247 		thread->bytes_written += size;
248 	else
249 		rec->bytes_written += size;
250 
251 	if (record__output_max_size_exceeded(rec) && !done) {
252 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
253 				" stopping session ]\n",
254 				record__bytes_written(rec) >> 10);
255 		done = 1;
256 	}
257 
258 	if (switch_output_size(rec))
259 		trigger_hit(&switch_output_trigger);
260 
261 	return 0;
262 }
263 
264 static int record__aio_enabled(struct record *rec);
265 static int record__comp_enabled(struct record *rec);
266 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
267 			    void *dst, size_t dst_size, void *src, size_t src_size);
268 
269 #ifdef HAVE_AIO_SUPPORT
270 static int record__aio_write(struct aiocb *cblock, int trace_fd,
271 		void *buf, size_t size, off_t off)
272 {
273 	int rc;
274 
275 	cblock->aio_fildes = trace_fd;
276 	cblock->aio_buf    = buf;
277 	cblock->aio_nbytes = size;
278 	cblock->aio_offset = off;
279 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
280 
281 	do {
282 		rc = aio_write(cblock);
283 		if (rc == 0) {
284 			break;
285 		} else if (errno != EAGAIN) {
286 			cblock->aio_fildes = -1;
287 			pr_err("failed to queue perf data, error: %m\n");
288 			break;
289 		}
290 	} while (1);
291 
292 	return rc;
293 }
294 
295 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
296 {
297 	void *rem_buf;
298 	off_t rem_off;
299 	size_t rem_size;
300 	int rc, aio_errno;
301 	ssize_t aio_ret, written;
302 
303 	aio_errno = aio_error(cblock);
304 	if (aio_errno == EINPROGRESS)
305 		return 0;
306 
307 	written = aio_ret = aio_return(cblock);
308 	if (aio_ret < 0) {
309 		if (aio_errno != EINTR)
310 			pr_err("failed to write perf data, error: %m\n");
311 		written = 0;
312 	}
313 
314 	rem_size = cblock->aio_nbytes - written;
315 
316 	if (rem_size == 0) {
317 		cblock->aio_fildes = -1;
318 		/*
319 		 * md->refcount is incremented in record__aio_pushfn() for
320 		 * every aio write request started in record__aio_push() so
321 		 * decrement it because the request is now complete.
322 		 */
323 		perf_mmap__put(&md->core);
324 		rc = 1;
325 	} else {
326 		/*
327 		 * aio write request may require restart with the
328 		 * reminder if the kernel didn't write whole
329 		 * chunk at once.
330 		 */
331 		rem_off = cblock->aio_offset + written;
332 		rem_buf = (void *)(cblock->aio_buf + written);
333 		record__aio_write(cblock, cblock->aio_fildes,
334 				rem_buf, rem_size, rem_off);
335 		rc = 0;
336 	}
337 
338 	return rc;
339 }
340 
341 static int record__aio_sync(struct mmap *md, bool sync_all)
342 {
343 	struct aiocb **aiocb = md->aio.aiocb;
344 	struct aiocb *cblocks = md->aio.cblocks;
345 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
346 	int i, do_suspend;
347 
348 	do {
349 		do_suspend = 0;
350 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
351 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
352 				if (sync_all)
353 					aiocb[i] = NULL;
354 				else
355 					return i;
356 			} else {
357 				/*
358 				 * Started aio write is not complete yet
359 				 * so it has to be waited before the
360 				 * next allocation.
361 				 */
362 				aiocb[i] = &cblocks[i];
363 				do_suspend = 1;
364 			}
365 		}
366 		if (!do_suspend)
367 			return -1;
368 
369 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
370 			if (!(errno == EAGAIN || errno == EINTR))
371 				pr_err("failed to sync perf data, error: %m\n");
372 		}
373 	} while (1);
374 }
375 
376 struct record_aio {
377 	struct record	*rec;
378 	void		*data;
379 	size_t		size;
380 };
381 
382 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
383 {
384 	struct record_aio *aio = to;
385 
386 	/*
387 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
388 	 * to release space in the kernel buffer as fast as possible, calling
389 	 * perf_mmap__consume() from perf_mmap__push() function.
390 	 *
391 	 * That lets the kernel to proceed with storing more profiling data into
392 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
393 	 *
394 	 * Coping can be done in two steps in case the chunk of profiling data
395 	 * crosses the upper bound of the kernel buffer. In this case we first move
396 	 * part of data from map->start till the upper bound and then the reminder
397 	 * from the beginning of the kernel buffer till the end of the data chunk.
398 	 */
399 
400 	if (record__comp_enabled(aio->rec)) {
401 		size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
402 				     mmap__mmap_len(map) - aio->size,
403 				     buf, size);
404 	} else {
405 		memcpy(aio->data + aio->size, buf, size);
406 	}
407 
408 	if (!aio->size) {
409 		/*
410 		 * Increment map->refcount to guard map->aio.data[] buffer
411 		 * from premature deallocation because map object can be
412 		 * released earlier than aio write request started on
413 		 * map->aio.data[] buffer is complete.
414 		 *
415 		 * perf_mmap__put() is done at record__aio_complete()
416 		 * after started aio request completion or at record__aio_push()
417 		 * if the request failed to start.
418 		 */
419 		perf_mmap__get(&map->core);
420 	}
421 
422 	aio->size += size;
423 
424 	return size;
425 }
426 
427 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
428 {
429 	int ret, idx;
430 	int trace_fd = rec->session->data->file.fd;
431 	struct record_aio aio = { .rec = rec, .size = 0 };
432 
433 	/*
434 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
435 	 * becomes available after previous aio write operation.
436 	 */
437 
438 	idx = record__aio_sync(map, false);
439 	aio.data = map->aio.data[idx];
440 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
441 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
442 		return ret;
443 
444 	rec->samples++;
445 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
446 	if (!ret) {
447 		*off += aio.size;
448 		rec->bytes_written += aio.size;
449 		if (switch_output_size(rec))
450 			trigger_hit(&switch_output_trigger);
451 	} else {
452 		/*
453 		 * Decrement map->refcount incremented in record__aio_pushfn()
454 		 * back if record__aio_write() operation failed to start, otherwise
455 		 * map->refcount is decremented in record__aio_complete() after
456 		 * aio write operation finishes successfully.
457 		 */
458 		perf_mmap__put(&map->core);
459 	}
460 
461 	return ret;
462 }
463 
464 static off_t record__aio_get_pos(int trace_fd)
465 {
466 	return lseek(trace_fd, 0, SEEK_CUR);
467 }
468 
469 static void record__aio_set_pos(int trace_fd, off_t pos)
470 {
471 	lseek(trace_fd, pos, SEEK_SET);
472 }
473 
474 static void record__aio_mmap_read_sync(struct record *rec)
475 {
476 	int i;
477 	struct evlist *evlist = rec->evlist;
478 	struct mmap *maps = evlist->mmap;
479 
480 	if (!record__aio_enabled(rec))
481 		return;
482 
483 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
484 		struct mmap *map = &maps[i];
485 
486 		if (map->core.base)
487 			record__aio_sync(map, true);
488 	}
489 }
490 
491 static int nr_cblocks_default = 1;
492 static int nr_cblocks_max = 4;
493 
494 static int record__aio_parse(const struct option *opt,
495 			     const char *str,
496 			     int unset)
497 {
498 	struct record_opts *opts = (struct record_opts *)opt->value;
499 
500 	if (unset) {
501 		opts->nr_cblocks = 0;
502 	} else {
503 		if (str)
504 			opts->nr_cblocks = strtol(str, NULL, 0);
505 		if (!opts->nr_cblocks)
506 			opts->nr_cblocks = nr_cblocks_default;
507 	}
508 
509 	return 0;
510 }
511 #else /* HAVE_AIO_SUPPORT */
512 static int nr_cblocks_max = 0;
513 
514 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
515 			    off_t *off __maybe_unused)
516 {
517 	return -1;
518 }
519 
520 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
521 {
522 	return -1;
523 }
524 
525 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
526 {
527 }
528 
529 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
530 {
531 }
532 #endif
533 
534 static int record__aio_enabled(struct record *rec)
535 {
536 	return rec->opts.nr_cblocks > 0;
537 }
538 
539 #define MMAP_FLUSH_DEFAULT 1
540 static int record__mmap_flush_parse(const struct option *opt,
541 				    const char *str,
542 				    int unset)
543 {
544 	int flush_max;
545 	struct record_opts *opts = (struct record_opts *)opt->value;
546 	static struct parse_tag tags[] = {
547 			{ .tag  = 'B', .mult = 1       },
548 			{ .tag  = 'K', .mult = 1 << 10 },
549 			{ .tag  = 'M', .mult = 1 << 20 },
550 			{ .tag  = 'G', .mult = 1 << 30 },
551 			{ .tag  = 0 },
552 	};
553 
554 	if (unset)
555 		return 0;
556 
557 	if (str) {
558 		opts->mmap_flush = parse_tag_value(str, tags);
559 		if (opts->mmap_flush == (int)-1)
560 			opts->mmap_flush = strtol(str, NULL, 0);
561 	}
562 
563 	if (!opts->mmap_flush)
564 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
565 
566 	flush_max = evlist__mmap_size(opts->mmap_pages);
567 	flush_max /= 4;
568 	if (opts->mmap_flush > flush_max)
569 		opts->mmap_flush = flush_max;
570 
571 	return 0;
572 }
573 
574 #ifdef HAVE_ZSTD_SUPPORT
575 static unsigned int comp_level_default = 1;
576 
577 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
578 {
579 	struct record_opts *opts = opt->value;
580 
581 	if (unset) {
582 		opts->comp_level = 0;
583 	} else {
584 		if (str)
585 			opts->comp_level = strtol(str, NULL, 0);
586 		if (!opts->comp_level)
587 			opts->comp_level = comp_level_default;
588 	}
589 
590 	return 0;
591 }
592 #endif
593 static unsigned int comp_level_max = 22;
594 
595 static int record__comp_enabled(struct record *rec)
596 {
597 	return rec->opts.comp_level > 0;
598 }
599 
600 static int process_synthesized_event(struct perf_tool *tool,
601 				     union perf_event *event,
602 				     struct perf_sample *sample __maybe_unused,
603 				     struct machine *machine __maybe_unused)
604 {
605 	struct record *rec = container_of(tool, struct record, tool);
606 	return record__write(rec, NULL, event, event->header.size);
607 }
608 
609 static int process_locked_synthesized_event(struct perf_tool *tool,
610 				     union perf_event *event,
611 				     struct perf_sample *sample __maybe_unused,
612 				     struct machine *machine __maybe_unused)
613 {
614 	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
615 	int ret;
616 
617 	pthread_mutex_lock(&synth_lock);
618 	ret = process_synthesized_event(tool, event, sample, machine);
619 	pthread_mutex_unlock(&synth_lock);
620 	return ret;
621 }
622 
623 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
624 {
625 	struct record *rec = to;
626 
627 	if (record__comp_enabled(rec)) {
628 		size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
629 		bf   = map->data;
630 	}
631 
632 	thread->samples++;
633 	return record__write(rec, map, bf, size);
634 }
635 
636 static volatile int signr = -1;
637 static volatile int child_finished;
638 #ifdef HAVE_EVENTFD_SUPPORT
639 static int done_fd = -1;
640 #endif
641 
642 static void sig_handler(int sig)
643 {
644 	if (sig == SIGCHLD)
645 		child_finished = 1;
646 	else
647 		signr = sig;
648 
649 	done = 1;
650 #ifdef HAVE_EVENTFD_SUPPORT
651 {
652 	u64 tmp = 1;
653 	/*
654 	 * It is possible for this signal handler to run after done is checked
655 	 * in the main loop, but before the perf counter fds are polled. If this
656 	 * happens, the poll() will continue to wait even though done is set,
657 	 * and will only break out if either another signal is received, or the
658 	 * counters are ready for read. To ensure the poll() doesn't sleep when
659 	 * done is set, use an eventfd (done_fd) to wake up the poll().
660 	 */
661 	if (write(done_fd, &tmp, sizeof(tmp)) < 0)
662 		pr_err("failed to signal wakeup fd, error: %m\n");
663 }
664 #endif // HAVE_EVENTFD_SUPPORT
665 }
666 
667 static void sigsegv_handler(int sig)
668 {
669 	perf_hooks__recover();
670 	sighandler_dump_stack(sig);
671 }
672 
673 static void record__sig_exit(void)
674 {
675 	if (signr == -1)
676 		return;
677 
678 	signal(signr, SIG_DFL);
679 	raise(signr);
680 }
681 
682 #ifdef HAVE_AUXTRACE_SUPPORT
683 
684 static int record__process_auxtrace(struct perf_tool *tool,
685 				    struct mmap *map,
686 				    union perf_event *event, void *data1,
687 				    size_t len1, void *data2, size_t len2)
688 {
689 	struct record *rec = container_of(tool, struct record, tool);
690 	struct perf_data *data = &rec->data;
691 	size_t padding;
692 	u8 pad[8] = {0};
693 
694 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
695 		off_t file_offset;
696 		int fd = perf_data__fd(data);
697 		int err;
698 
699 		file_offset = lseek(fd, 0, SEEK_CUR);
700 		if (file_offset == -1)
701 			return -1;
702 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
703 						     event, file_offset);
704 		if (err)
705 			return err;
706 	}
707 
708 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
709 	padding = (len1 + len2) & 7;
710 	if (padding)
711 		padding = 8 - padding;
712 
713 	record__write(rec, map, event, event->header.size);
714 	record__write(rec, map, data1, len1);
715 	if (len2)
716 		record__write(rec, map, data2, len2);
717 	record__write(rec, map, &pad, padding);
718 
719 	return 0;
720 }
721 
722 static int record__auxtrace_mmap_read(struct record *rec,
723 				      struct mmap *map)
724 {
725 	int ret;
726 
727 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
728 				  record__process_auxtrace);
729 	if (ret < 0)
730 		return ret;
731 
732 	if (ret)
733 		rec->samples++;
734 
735 	return 0;
736 }
737 
738 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
739 					       struct mmap *map)
740 {
741 	int ret;
742 
743 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
744 					   record__process_auxtrace,
745 					   rec->opts.auxtrace_snapshot_size);
746 	if (ret < 0)
747 		return ret;
748 
749 	if (ret)
750 		rec->samples++;
751 
752 	return 0;
753 }
754 
755 static int record__auxtrace_read_snapshot_all(struct record *rec)
756 {
757 	int i;
758 	int rc = 0;
759 
760 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
761 		struct mmap *map = &rec->evlist->mmap[i];
762 
763 		if (!map->auxtrace_mmap.base)
764 			continue;
765 
766 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
767 			rc = -1;
768 			goto out;
769 		}
770 	}
771 out:
772 	return rc;
773 }
774 
775 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
776 {
777 	pr_debug("Recording AUX area tracing snapshot\n");
778 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
779 		trigger_error(&auxtrace_snapshot_trigger);
780 	} else {
781 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
782 			trigger_error(&auxtrace_snapshot_trigger);
783 		else
784 			trigger_ready(&auxtrace_snapshot_trigger);
785 	}
786 }
787 
788 static int record__auxtrace_snapshot_exit(struct record *rec)
789 {
790 	if (trigger_is_error(&auxtrace_snapshot_trigger))
791 		return 0;
792 
793 	if (!auxtrace_record__snapshot_started &&
794 	    auxtrace_record__snapshot_start(rec->itr))
795 		return -1;
796 
797 	record__read_auxtrace_snapshot(rec, true);
798 	if (trigger_is_error(&auxtrace_snapshot_trigger))
799 		return -1;
800 
801 	return 0;
802 }
803 
804 static int record__auxtrace_init(struct record *rec)
805 {
806 	int err;
807 
808 	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
809 	    && record__threads_enabled(rec)) {
810 		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
811 		return -EINVAL;
812 	}
813 
814 	if (!rec->itr) {
815 		rec->itr = auxtrace_record__init(rec->evlist, &err);
816 		if (err)
817 			return err;
818 	}
819 
820 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
821 					      rec->opts.auxtrace_snapshot_opts);
822 	if (err)
823 		return err;
824 
825 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
826 					    rec->opts.auxtrace_sample_opts);
827 	if (err)
828 		return err;
829 
830 	auxtrace_regroup_aux_output(rec->evlist);
831 
832 	return auxtrace_parse_filters(rec->evlist);
833 }
834 
835 #else
836 
837 static inline
838 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
839 			       struct mmap *map __maybe_unused)
840 {
841 	return 0;
842 }
843 
844 static inline
845 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
846 				    bool on_exit __maybe_unused)
847 {
848 }
849 
850 static inline
851 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
852 {
853 	return 0;
854 }
855 
856 static inline
857 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
858 {
859 	return 0;
860 }
861 
862 static int record__auxtrace_init(struct record *rec __maybe_unused)
863 {
864 	return 0;
865 }
866 
867 #endif
868 
869 static int record__config_text_poke(struct evlist *evlist)
870 {
871 	struct evsel *evsel;
872 	int err;
873 
874 	/* Nothing to do if text poke is already configured */
875 	evlist__for_each_entry(evlist, evsel) {
876 		if (evsel->core.attr.text_poke)
877 			return 0;
878 	}
879 
880 	err = parse_events(evlist, "dummy:u", NULL);
881 	if (err)
882 		return err;
883 
884 	evsel = evlist__last(evlist);
885 
886 	evsel->core.attr.freq = 0;
887 	evsel->core.attr.sample_period = 1;
888 	evsel->core.attr.text_poke = 1;
889 	evsel->core.attr.ksymbol = 1;
890 
891 	evsel->core.system_wide = true;
892 	evsel->no_aux_samples = true;
893 	evsel->immediate = true;
894 
895 	/* Text poke must be collected on all CPUs */
896 	perf_cpu_map__put(evsel->core.own_cpus);
897 	evsel->core.own_cpus = perf_cpu_map__new(NULL);
898 	perf_cpu_map__put(evsel->core.cpus);
899 	evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
900 
901 	evsel__set_sample_bit(evsel, TIME);
902 
903 	return 0;
904 }
905 
906 static bool record__kcore_readable(struct machine *machine)
907 {
908 	char kcore[PATH_MAX];
909 	int fd;
910 
911 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
912 
913 	fd = open(kcore, O_RDONLY);
914 	if (fd < 0)
915 		return false;
916 
917 	close(fd);
918 
919 	return true;
920 }
921 
922 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
923 {
924 	char from_dir[PATH_MAX];
925 	char kcore_dir[PATH_MAX];
926 	int ret;
927 
928 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
929 
930 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
931 	if (ret)
932 		return ret;
933 
934 	return kcore_copy(from_dir, kcore_dir);
935 }
936 
937 static void record__thread_data_init_pipes(struct record_thread *thread_data)
938 {
939 	thread_data->pipes.msg[0] = -1;
940 	thread_data->pipes.msg[1] = -1;
941 	thread_data->pipes.ack[0] = -1;
942 	thread_data->pipes.ack[1] = -1;
943 }
944 
945 static int record__thread_data_open_pipes(struct record_thread *thread_data)
946 {
947 	if (pipe(thread_data->pipes.msg))
948 		return -EINVAL;
949 
950 	if (pipe(thread_data->pipes.ack)) {
951 		close(thread_data->pipes.msg[0]);
952 		thread_data->pipes.msg[0] = -1;
953 		close(thread_data->pipes.msg[1]);
954 		thread_data->pipes.msg[1] = -1;
955 		return -EINVAL;
956 	}
957 
958 	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
959 		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
960 		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
961 
962 	return 0;
963 }
964 
965 static void record__thread_data_close_pipes(struct record_thread *thread_data)
966 {
967 	if (thread_data->pipes.msg[0] != -1) {
968 		close(thread_data->pipes.msg[0]);
969 		thread_data->pipes.msg[0] = -1;
970 	}
971 	if (thread_data->pipes.msg[1] != -1) {
972 		close(thread_data->pipes.msg[1]);
973 		thread_data->pipes.msg[1] = -1;
974 	}
975 	if (thread_data->pipes.ack[0] != -1) {
976 		close(thread_data->pipes.ack[0]);
977 		thread_data->pipes.ack[0] = -1;
978 	}
979 	if (thread_data->pipes.ack[1] != -1) {
980 		close(thread_data->pipes.ack[1]);
981 		thread_data->pipes.ack[1] = -1;
982 	}
983 }
984 
985 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
986 {
987 	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
988 	struct mmap *mmap = evlist->mmap;
989 	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
990 	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
991 
992 	if (cpu_map__is_dummy(cpus))
993 		thread_data->nr_mmaps = nr_mmaps;
994 	else
995 		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
996 						      thread_data->mask->maps.nbits);
997 	if (mmap) {
998 		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
999 		if (!thread_data->maps)
1000 			return -ENOMEM;
1001 	}
1002 	if (overwrite_mmap) {
1003 		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1004 		if (!thread_data->overwrite_maps) {
1005 			zfree(&thread_data->maps);
1006 			return -ENOMEM;
1007 		}
1008 	}
1009 	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1010 		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1011 
1012 	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1013 		if (cpu_map__is_dummy(cpus) ||
1014 		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1015 			if (thread_data->maps) {
1016 				thread_data->maps[tm] = &mmap[m];
1017 				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1018 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1019 			}
1020 			if (thread_data->overwrite_maps) {
1021 				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1022 				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1023 					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1024 			}
1025 			tm++;
1026 		}
1027 	}
1028 
1029 	return 0;
1030 }
1031 
1032 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1033 {
1034 	int f, tm, pos;
1035 	struct mmap *map, *overwrite_map;
1036 
1037 	fdarray__init(&thread_data->pollfd, 64);
1038 
1039 	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1040 		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1041 		overwrite_map = thread_data->overwrite_maps ?
1042 				thread_data->overwrite_maps[tm] : NULL;
1043 
1044 		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1045 			void *ptr = evlist->core.pollfd.priv[f].ptr;
1046 
1047 			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1048 				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1049 							      &evlist->core.pollfd);
1050 				if (pos < 0)
1051 					return pos;
1052 				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1053 					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1054 			}
1055 		}
1056 	}
1057 
1058 	return 0;
1059 }
1060 
1061 static void record__free_thread_data(struct record *rec)
1062 {
1063 	int t;
1064 	struct record_thread *thread_data = rec->thread_data;
1065 
1066 	if (thread_data == NULL)
1067 		return;
1068 
1069 	for (t = 0; t < rec->nr_threads; t++) {
1070 		record__thread_data_close_pipes(&thread_data[t]);
1071 		zfree(&thread_data[t].maps);
1072 		zfree(&thread_data[t].overwrite_maps);
1073 		fdarray__exit(&thread_data[t].pollfd);
1074 	}
1075 
1076 	zfree(&rec->thread_data);
1077 }
1078 
1079 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1080 {
1081 	int t, ret;
1082 	struct record_thread *thread_data;
1083 
1084 	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1085 	if (!rec->thread_data) {
1086 		pr_err("Failed to allocate thread data\n");
1087 		return -ENOMEM;
1088 	}
1089 	thread_data = rec->thread_data;
1090 
1091 	for (t = 0; t < rec->nr_threads; t++)
1092 		record__thread_data_init_pipes(&thread_data[t]);
1093 
1094 	for (t = 0; t < rec->nr_threads; t++) {
1095 		thread_data[t].rec = rec;
1096 		thread_data[t].mask = &rec->thread_masks[t];
1097 		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1098 		if (ret) {
1099 			pr_err("Failed to initialize thread[%d] maps\n", t);
1100 			goto out_free;
1101 		}
1102 		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1103 		if (ret) {
1104 			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1105 			goto out_free;
1106 		}
1107 		if (t) {
1108 			thread_data[t].tid = -1;
1109 			ret = record__thread_data_open_pipes(&thread_data[t]);
1110 			if (ret) {
1111 				pr_err("Failed to open thread[%d] communication pipes\n", t);
1112 				goto out_free;
1113 			}
1114 			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1115 					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1116 			if (ret < 0) {
1117 				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1118 				goto out_free;
1119 			}
1120 			thread_data[t].ctlfd_pos = ret;
1121 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1122 				 thread_data, thread_data[t].ctlfd_pos,
1123 				 thread_data[t].pipes.msg[0]);
1124 		} else {
1125 			thread_data[t].tid = gettid();
1126 			if (evlist->ctl_fd.pos == -1)
1127 				continue;
1128 			ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1129 						      &evlist->core.pollfd);
1130 			if (ret < 0) {
1131 				pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1132 				goto out_free;
1133 			}
1134 			thread_data[t].ctlfd_pos = ret;
1135 			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1136 				 thread_data, thread_data[t].ctlfd_pos,
1137 				 evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1138 		}
1139 	}
1140 
1141 	return 0;
1142 
1143 out_free:
1144 	record__free_thread_data(rec);
1145 
1146 	return ret;
1147 }
1148 
1149 static int record__mmap_evlist(struct record *rec,
1150 			       struct evlist *evlist)
1151 {
1152 	int i, ret;
1153 	struct record_opts *opts = &rec->opts;
1154 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1155 				  opts->auxtrace_sample_mode;
1156 	char msg[512];
1157 
1158 	if (opts->affinity != PERF_AFFINITY_SYS)
1159 		cpu__setup_cpunode_map();
1160 
1161 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1162 				 opts->auxtrace_mmap_pages,
1163 				 auxtrace_overwrite,
1164 				 opts->nr_cblocks, opts->affinity,
1165 				 opts->mmap_flush, opts->comp_level) < 0) {
1166 		if (errno == EPERM) {
1167 			pr_err("Permission error mapping pages.\n"
1168 			       "Consider increasing "
1169 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1170 			       "or try again with a smaller value of -m/--mmap_pages.\n"
1171 			       "(current value: %u,%u)\n",
1172 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1173 			return -errno;
1174 		} else {
1175 			pr_err("failed to mmap with %d (%s)\n", errno,
1176 				str_error_r(errno, msg, sizeof(msg)));
1177 			if (errno)
1178 				return -errno;
1179 			else
1180 				return -EINVAL;
1181 		}
1182 	}
1183 
1184 	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1185 		return -1;
1186 
1187 	ret = record__alloc_thread_data(rec, evlist);
1188 	if (ret)
1189 		return ret;
1190 
1191 	if (record__threads_enabled(rec)) {
1192 		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1193 		if (ret) {
1194 			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1195 			return ret;
1196 		}
1197 		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1198 			if (evlist->mmap)
1199 				evlist->mmap[i].file = &rec->data.dir.files[i];
1200 			if (evlist->overwrite_mmap)
1201 				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1202 		}
1203 	}
1204 
1205 	return 0;
1206 }
1207 
1208 static int record__mmap(struct record *rec)
1209 {
1210 	return record__mmap_evlist(rec, rec->evlist);
1211 }
1212 
1213 static int record__open(struct record *rec)
1214 {
1215 	char msg[BUFSIZ];
1216 	struct evsel *pos;
1217 	struct evlist *evlist = rec->evlist;
1218 	struct perf_session *session = rec->session;
1219 	struct record_opts *opts = &rec->opts;
1220 	int rc = 0;
1221 
1222 	/*
1223 	 * For initial_delay, system wide or a hybrid system, we need to add a
1224 	 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1225 	 * of waiting or event synthesis.
1226 	 */
1227 	if (opts->initial_delay || target__has_cpu(&opts->target) ||
1228 	    perf_pmu__has_hybrid()) {
1229 		pos = evlist__get_tracking_event(evlist);
1230 		if (!evsel__is_dummy_event(pos)) {
1231 			/* Set up dummy event. */
1232 			if (evlist__add_dummy(evlist))
1233 				return -ENOMEM;
1234 			pos = evlist__last(evlist);
1235 			evlist__set_tracking_event(evlist, pos);
1236 		}
1237 
1238 		/*
1239 		 * Enable the dummy event when the process is forked for
1240 		 * initial_delay, immediately for system wide.
1241 		 */
1242 		if (opts->initial_delay && !pos->immediate &&
1243 		    !target__has_cpu(&opts->target))
1244 			pos->core.attr.enable_on_exec = 1;
1245 		else
1246 			pos->immediate = 1;
1247 	}
1248 
1249 	evlist__config(evlist, opts, &callchain_param);
1250 
1251 	evlist__for_each_entry(evlist, pos) {
1252 try_again:
1253 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1254 			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1255 				if (verbose > 0)
1256 					ui__warning("%s\n", msg);
1257 				goto try_again;
1258 			}
1259 			if ((errno == EINVAL || errno == EBADF) &&
1260 			    pos->core.leader != &pos->core &&
1261 			    pos->weak_group) {
1262 			        pos = evlist__reset_weak_group(evlist, pos, true);
1263 				goto try_again;
1264 			}
1265 			rc = -errno;
1266 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1267 			ui__error("%s\n", msg);
1268 			goto out;
1269 		}
1270 
1271 		pos->supported = true;
1272 	}
1273 
1274 	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1275 		pr_warning(
1276 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1277 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1278 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1279 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1280 "Samples in kernel modules won't be resolved at all.\n\n"
1281 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1282 "even with a suitable vmlinux or kallsyms file.\n\n");
1283 	}
1284 
1285 	if (evlist__apply_filters(evlist, &pos)) {
1286 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1287 			pos->filter, evsel__name(pos), errno,
1288 			str_error_r(errno, msg, sizeof(msg)));
1289 		rc = -1;
1290 		goto out;
1291 	}
1292 
1293 	rc = record__mmap(rec);
1294 	if (rc)
1295 		goto out;
1296 
1297 	session->evlist = evlist;
1298 	perf_session__set_id_hdr_size(session);
1299 out:
1300 	return rc;
1301 }
1302 
1303 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1304 {
1305 	if (rec->evlist->first_sample_time == 0)
1306 		rec->evlist->first_sample_time = sample_time;
1307 
1308 	if (sample_time)
1309 		rec->evlist->last_sample_time = sample_time;
1310 }
1311 
1312 static int process_sample_event(struct perf_tool *tool,
1313 				union perf_event *event,
1314 				struct perf_sample *sample,
1315 				struct evsel *evsel,
1316 				struct machine *machine)
1317 {
1318 	struct record *rec = container_of(tool, struct record, tool);
1319 
1320 	set_timestamp_boundary(rec, sample->time);
1321 
1322 	if (rec->buildid_all)
1323 		return 0;
1324 
1325 	rec->samples++;
1326 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1327 }
1328 
1329 static int process_buildids(struct record *rec)
1330 {
1331 	struct perf_session *session = rec->session;
1332 
1333 	if (perf_data__size(&rec->data) == 0)
1334 		return 0;
1335 
1336 	/*
1337 	 * During this process, it'll load kernel map and replace the
1338 	 * dso->long_name to a real pathname it found.  In this case
1339 	 * we prefer the vmlinux path like
1340 	 *   /lib/modules/3.16.4/build/vmlinux
1341 	 *
1342 	 * rather than build-id path (in debug directory).
1343 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1344 	 */
1345 	symbol_conf.ignore_vmlinux_buildid = true;
1346 
1347 	/*
1348 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1349 	 * so no need to process samples. But if timestamp_boundary is enabled,
1350 	 * it still needs to walk on all samples to get the timestamps of
1351 	 * first/last samples.
1352 	 */
1353 	if (rec->buildid_all && !rec->timestamp_boundary)
1354 		rec->tool.sample = NULL;
1355 
1356 	return perf_session__process_events(session);
1357 }
1358 
1359 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1360 {
1361 	int err;
1362 	struct perf_tool *tool = data;
1363 	/*
1364 	 *As for guest kernel when processing subcommand record&report,
1365 	 *we arrange module mmap prior to guest kernel mmap and trigger
1366 	 *a preload dso because default guest module symbols are loaded
1367 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1368 	 *method is used to avoid symbol missing when the first addr is
1369 	 *in module instead of in guest kernel.
1370 	 */
1371 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1372 					     machine);
1373 	if (err < 0)
1374 		pr_err("Couldn't record guest kernel [%d]'s reference"
1375 		       " relocation symbol.\n", machine->pid);
1376 
1377 	/*
1378 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1379 	 * have no _text sometimes.
1380 	 */
1381 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1382 						 machine);
1383 	if (err < 0)
1384 		pr_err("Couldn't record guest kernel [%d]'s reference"
1385 		       " relocation symbol.\n", machine->pid);
1386 }
1387 
1388 static struct perf_event_header finished_round_event = {
1389 	.size = sizeof(struct perf_event_header),
1390 	.type = PERF_RECORD_FINISHED_ROUND,
1391 };
1392 
1393 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1394 {
1395 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1396 	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1397 			  thread->mask->affinity.nbits)) {
1398 		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1399 		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1400 			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1401 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1402 					(cpu_set_t *)thread->mask->affinity.bits);
1403 		if (verbose == 2) {
1404 			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1405 			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1406 		}
1407 	}
1408 }
1409 
1410 static size_t process_comp_header(void *record, size_t increment)
1411 {
1412 	struct perf_record_compressed *event = record;
1413 	size_t size = sizeof(*event);
1414 
1415 	if (increment) {
1416 		event->header.size += increment;
1417 		return increment;
1418 	}
1419 
1420 	event->header.type = PERF_RECORD_COMPRESSED;
1421 	event->header.size = size;
1422 
1423 	return size;
1424 }
1425 
1426 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1427 			    void *dst, size_t dst_size, void *src, size_t src_size)
1428 {
1429 	size_t compressed;
1430 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1431 	struct zstd_data *zstd_data = &session->zstd_data;
1432 
1433 	if (map && map->file)
1434 		zstd_data = &map->zstd_data;
1435 
1436 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1437 						     max_record_size, process_comp_header);
1438 
1439 	if (map && map->file) {
1440 		thread->bytes_transferred += src_size;
1441 		thread->bytes_compressed  += compressed;
1442 	} else {
1443 		session->bytes_transferred += src_size;
1444 		session->bytes_compressed  += compressed;
1445 	}
1446 
1447 	return compressed;
1448 }
1449 
1450 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1451 				    bool overwrite, bool synch)
1452 {
1453 	u64 bytes_written = rec->bytes_written;
1454 	int i;
1455 	int rc = 0;
1456 	int nr_mmaps;
1457 	struct mmap **maps;
1458 	int trace_fd = rec->data.file.fd;
1459 	off_t off = 0;
1460 
1461 	if (!evlist)
1462 		return 0;
1463 
1464 	nr_mmaps = thread->nr_mmaps;
1465 	maps = overwrite ? thread->overwrite_maps : thread->maps;
1466 
1467 	if (!maps)
1468 		return 0;
1469 
1470 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1471 		return 0;
1472 
1473 	if (record__aio_enabled(rec))
1474 		off = record__aio_get_pos(trace_fd);
1475 
1476 	for (i = 0; i < nr_mmaps; i++) {
1477 		u64 flush = 0;
1478 		struct mmap *map = maps[i];
1479 
1480 		if (map->core.base) {
1481 			record__adjust_affinity(rec, map);
1482 			if (synch) {
1483 				flush = map->core.flush;
1484 				map->core.flush = 1;
1485 			}
1486 			if (!record__aio_enabled(rec)) {
1487 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1488 					if (synch)
1489 						map->core.flush = flush;
1490 					rc = -1;
1491 					goto out;
1492 				}
1493 			} else {
1494 				if (record__aio_push(rec, map, &off) < 0) {
1495 					record__aio_set_pos(trace_fd, off);
1496 					if (synch)
1497 						map->core.flush = flush;
1498 					rc = -1;
1499 					goto out;
1500 				}
1501 			}
1502 			if (synch)
1503 				map->core.flush = flush;
1504 		}
1505 
1506 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1507 		    !rec->opts.auxtrace_sample_mode &&
1508 		    record__auxtrace_mmap_read(rec, map) != 0) {
1509 			rc = -1;
1510 			goto out;
1511 		}
1512 	}
1513 
1514 	if (record__aio_enabled(rec))
1515 		record__aio_set_pos(trace_fd, off);
1516 
1517 	/*
1518 	 * Mark the round finished in case we wrote
1519 	 * at least one event.
1520 	 *
1521 	 * No need for round events in directory mode,
1522 	 * because per-cpu maps and files have data
1523 	 * sorted by kernel.
1524 	 */
1525 	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1526 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1527 
1528 	if (overwrite)
1529 		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1530 out:
1531 	return rc;
1532 }
1533 
1534 static int record__mmap_read_all(struct record *rec, bool synch)
1535 {
1536 	int err;
1537 
1538 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1539 	if (err)
1540 		return err;
1541 
1542 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1543 }
1544 
1545 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1546 					   void *arg __maybe_unused)
1547 {
1548 	struct perf_mmap *map = fda->priv[fd].ptr;
1549 
1550 	if (map)
1551 		perf_mmap__put(map);
1552 }
1553 
1554 static void *record__thread(void *arg)
1555 {
1556 	enum thread_msg msg = THREAD_MSG__READY;
1557 	bool terminate = false;
1558 	struct fdarray *pollfd;
1559 	int err, ctlfd_pos;
1560 
1561 	thread = arg;
1562 	thread->tid = gettid();
1563 
1564 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1565 	if (err == -1)
1566 		pr_warning("threads[%d]: failed to notify on start: %s\n",
1567 			   thread->tid, strerror(errno));
1568 
1569 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1570 
1571 	pollfd = &thread->pollfd;
1572 	ctlfd_pos = thread->ctlfd_pos;
1573 
1574 	for (;;) {
1575 		unsigned long long hits = thread->samples;
1576 
1577 		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1578 			break;
1579 
1580 		if (hits == thread->samples) {
1581 
1582 			err = fdarray__poll(pollfd, -1);
1583 			/*
1584 			 * Propagate error, only if there's any. Ignore positive
1585 			 * number of returned events and interrupt error.
1586 			 */
1587 			if (err > 0 || (err < 0 && errno == EINTR))
1588 				err = 0;
1589 			thread->waking++;
1590 
1591 			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1592 					    record__thread_munmap_filtered, NULL) == 0)
1593 				break;
1594 		}
1595 
1596 		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1597 			terminate = true;
1598 			close(thread->pipes.msg[0]);
1599 			thread->pipes.msg[0] = -1;
1600 			pollfd->entries[ctlfd_pos].fd = -1;
1601 			pollfd->entries[ctlfd_pos].events = 0;
1602 		}
1603 
1604 		pollfd->entries[ctlfd_pos].revents = 0;
1605 	}
1606 	record__mmap_read_all(thread->rec, true);
1607 
1608 	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1609 	if (err == -1)
1610 		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1611 			   thread->tid, strerror(errno));
1612 
1613 	return NULL;
1614 }
1615 
1616 static void record__init_features(struct record *rec)
1617 {
1618 	struct perf_session *session = rec->session;
1619 	int feat;
1620 
1621 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1622 		perf_header__set_feat(&session->header, feat);
1623 
1624 	if (rec->no_buildid)
1625 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1626 
1627 	if (!have_tracepoints(&rec->evlist->core.entries))
1628 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1629 
1630 	if (!rec->opts.branch_stack)
1631 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1632 
1633 	if (!rec->opts.full_auxtrace)
1634 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1635 
1636 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1637 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1638 
1639 	if (!rec->opts.use_clockid)
1640 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1641 
1642 	if (!record__threads_enabled(rec))
1643 		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1644 
1645 	if (!record__comp_enabled(rec))
1646 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1647 
1648 	perf_header__clear_feat(&session->header, HEADER_STAT);
1649 }
1650 
1651 static void
1652 record__finish_output(struct record *rec)
1653 {
1654 	int i;
1655 	struct perf_data *data = &rec->data;
1656 	int fd = perf_data__fd(data);
1657 
1658 	if (data->is_pipe)
1659 		return;
1660 
1661 	rec->session->header.data_size += rec->bytes_written;
1662 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1663 	if (record__threads_enabled(rec)) {
1664 		for (i = 0; i < data->dir.nr; i++)
1665 			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1666 	}
1667 
1668 	if (!rec->no_buildid) {
1669 		process_buildids(rec);
1670 
1671 		if (rec->buildid_all)
1672 			dsos__hit_all(rec->session);
1673 	}
1674 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1675 
1676 	return;
1677 }
1678 
1679 static int record__synthesize_workload(struct record *rec, bool tail)
1680 {
1681 	int err;
1682 	struct perf_thread_map *thread_map;
1683 	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1684 
1685 	if (rec->opts.tail_synthesize != tail)
1686 		return 0;
1687 
1688 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1689 	if (thread_map == NULL)
1690 		return -1;
1691 
1692 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1693 						 process_synthesized_event,
1694 						 &rec->session->machines.host,
1695 						 needs_mmap,
1696 						 rec->opts.sample_address);
1697 	perf_thread_map__put(thread_map);
1698 	return err;
1699 }
1700 
1701 static int record__synthesize(struct record *rec, bool tail);
1702 
1703 static int
1704 record__switch_output(struct record *rec, bool at_exit)
1705 {
1706 	struct perf_data *data = &rec->data;
1707 	int fd, err;
1708 	char *new_filename;
1709 
1710 	/* Same Size:      "2015122520103046"*/
1711 	char timestamp[] = "InvalidTimestamp";
1712 
1713 	record__aio_mmap_read_sync(rec);
1714 
1715 	record__synthesize(rec, true);
1716 	if (target__none(&rec->opts.target))
1717 		record__synthesize_workload(rec, true);
1718 
1719 	rec->samples = 0;
1720 	record__finish_output(rec);
1721 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1722 	if (err) {
1723 		pr_err("Failed to get current timestamp\n");
1724 		return -EINVAL;
1725 	}
1726 
1727 	fd = perf_data__switch(data, timestamp,
1728 				    rec->session->header.data_offset,
1729 				    at_exit, &new_filename);
1730 	if (fd >= 0 && !at_exit) {
1731 		rec->bytes_written = 0;
1732 		rec->session->header.data_size = 0;
1733 	}
1734 
1735 	if (!quiet)
1736 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1737 			data->path, timestamp);
1738 
1739 	if (rec->switch_output.num_files) {
1740 		int n = rec->switch_output.cur_file + 1;
1741 
1742 		if (n >= rec->switch_output.num_files)
1743 			n = 0;
1744 		rec->switch_output.cur_file = n;
1745 		if (rec->switch_output.filenames[n]) {
1746 			remove(rec->switch_output.filenames[n]);
1747 			zfree(&rec->switch_output.filenames[n]);
1748 		}
1749 		rec->switch_output.filenames[n] = new_filename;
1750 	} else {
1751 		free(new_filename);
1752 	}
1753 
1754 	/* Output tracking events */
1755 	if (!at_exit) {
1756 		record__synthesize(rec, false);
1757 
1758 		/*
1759 		 * In 'perf record --switch-output' without -a,
1760 		 * record__synthesize() in record__switch_output() won't
1761 		 * generate tracking events because there's no thread_map
1762 		 * in evlist. Which causes newly created perf.data doesn't
1763 		 * contain map and comm information.
1764 		 * Create a fake thread_map and directly call
1765 		 * perf_event__synthesize_thread_map() for those events.
1766 		 */
1767 		if (target__none(&rec->opts.target))
1768 			record__synthesize_workload(rec, false);
1769 	}
1770 	return fd;
1771 }
1772 
1773 static volatile int workload_exec_errno;
1774 
1775 /*
1776  * evlist__prepare_workload will send a SIGUSR1
1777  * if the fork fails, since we asked by setting its
1778  * want_signal to true.
1779  */
1780 static void workload_exec_failed_signal(int signo __maybe_unused,
1781 					siginfo_t *info,
1782 					void *ucontext __maybe_unused)
1783 {
1784 	workload_exec_errno = info->si_value.sival_int;
1785 	done = 1;
1786 	child_finished = 1;
1787 }
1788 
1789 static void snapshot_sig_handler(int sig);
1790 static void alarm_sig_handler(int sig);
1791 
1792 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1793 {
1794 	if (evlist) {
1795 		if (evlist->mmap && evlist->mmap[0].core.base)
1796 			return evlist->mmap[0].core.base;
1797 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1798 			return evlist->overwrite_mmap[0].core.base;
1799 	}
1800 	return NULL;
1801 }
1802 
1803 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1804 {
1805 	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1806 	if (pc)
1807 		return pc;
1808 	return NULL;
1809 }
1810 
1811 static int record__synthesize(struct record *rec, bool tail)
1812 {
1813 	struct perf_session *session = rec->session;
1814 	struct machine *machine = &session->machines.host;
1815 	struct perf_data *data = &rec->data;
1816 	struct record_opts *opts = &rec->opts;
1817 	struct perf_tool *tool = &rec->tool;
1818 	int err = 0;
1819 	event_op f = process_synthesized_event;
1820 
1821 	if (rec->opts.tail_synthesize != tail)
1822 		return 0;
1823 
1824 	if (data->is_pipe) {
1825 		err = perf_event__synthesize_for_pipe(tool, session, data,
1826 						      process_synthesized_event);
1827 		if (err < 0)
1828 			goto out;
1829 
1830 		rec->bytes_written += err;
1831 	}
1832 
1833 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1834 					  process_synthesized_event, machine);
1835 	if (err)
1836 		goto out;
1837 
1838 	/* Synthesize id_index before auxtrace_info */
1839 	if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
1840 		err = perf_event__synthesize_id_index(tool,
1841 						      process_synthesized_event,
1842 						      session->evlist, machine);
1843 		if (err)
1844 			goto out;
1845 	}
1846 
1847 	if (rec->opts.full_auxtrace) {
1848 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1849 					session, process_synthesized_event);
1850 		if (err)
1851 			goto out;
1852 	}
1853 
1854 	if (!evlist__exclude_kernel(rec->evlist)) {
1855 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1856 							 machine);
1857 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1858 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1859 				   "Check /proc/kallsyms permission or run as root.\n");
1860 
1861 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1862 						     machine);
1863 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1864 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1865 				   "Check /proc/modules permission or run as root.\n");
1866 	}
1867 
1868 	if (perf_guest) {
1869 		machines__process_guests(&session->machines,
1870 					 perf_event__synthesize_guest_os, tool);
1871 	}
1872 
1873 	err = perf_event__synthesize_extra_attr(&rec->tool,
1874 						rec->evlist,
1875 						process_synthesized_event,
1876 						data->is_pipe);
1877 	if (err)
1878 		goto out;
1879 
1880 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1881 						 process_synthesized_event,
1882 						NULL);
1883 	if (err < 0) {
1884 		pr_err("Couldn't synthesize thread map.\n");
1885 		return err;
1886 	}
1887 
1888 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
1889 					     process_synthesized_event, NULL);
1890 	if (err < 0) {
1891 		pr_err("Couldn't synthesize cpu map.\n");
1892 		return err;
1893 	}
1894 
1895 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1896 						machine, opts);
1897 	if (err < 0)
1898 		pr_warning("Couldn't synthesize bpf events.\n");
1899 
1900 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1901 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1902 						     machine);
1903 		if (err < 0)
1904 			pr_warning("Couldn't synthesize cgroup events.\n");
1905 	}
1906 
1907 	if (rec->opts.nr_threads_synthesize > 1) {
1908 		perf_set_multithreaded();
1909 		f = process_locked_synthesized_event;
1910 	}
1911 
1912 	if (rec->opts.synth & PERF_SYNTH_TASK) {
1913 		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1914 
1915 		err = __machine__synthesize_threads(machine, tool, &opts->target,
1916 						    rec->evlist->core.threads,
1917 						    f, needs_mmap, opts->sample_address,
1918 						    rec->opts.nr_threads_synthesize);
1919 	}
1920 
1921 	if (rec->opts.nr_threads_synthesize > 1)
1922 		perf_set_singlethreaded();
1923 
1924 out:
1925 	return err;
1926 }
1927 
1928 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1929 {
1930 	struct record *rec = data;
1931 	pthread_kill(rec->thread_id, SIGUSR2);
1932 	return 0;
1933 }
1934 
1935 static int record__setup_sb_evlist(struct record *rec)
1936 {
1937 	struct record_opts *opts = &rec->opts;
1938 
1939 	if (rec->sb_evlist != NULL) {
1940 		/*
1941 		 * We get here if --switch-output-event populated the
1942 		 * sb_evlist, so associate a callback that will send a SIGUSR2
1943 		 * to the main thread.
1944 		 */
1945 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1946 		rec->thread_id = pthread_self();
1947 	}
1948 #ifdef HAVE_LIBBPF_SUPPORT
1949 	if (!opts->no_bpf_event) {
1950 		if (rec->sb_evlist == NULL) {
1951 			rec->sb_evlist = evlist__new();
1952 
1953 			if (rec->sb_evlist == NULL) {
1954 				pr_err("Couldn't create side band evlist.\n.");
1955 				return -1;
1956 			}
1957 		}
1958 
1959 		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1960 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1961 			return -1;
1962 		}
1963 	}
1964 #endif
1965 	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1966 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1967 		opts->no_bpf_event = true;
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 static int record__init_clock(struct record *rec)
1974 {
1975 	struct perf_session *session = rec->session;
1976 	struct timespec ref_clockid;
1977 	struct timeval ref_tod;
1978 	u64 ref;
1979 
1980 	if (!rec->opts.use_clockid)
1981 		return 0;
1982 
1983 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1984 		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1985 
1986 	session->header.env.clock.clockid = rec->opts.clockid;
1987 
1988 	if (gettimeofday(&ref_tod, NULL) != 0) {
1989 		pr_err("gettimeofday failed, cannot set reference time.\n");
1990 		return -1;
1991 	}
1992 
1993 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1994 		pr_err("clock_gettime failed, cannot set reference time.\n");
1995 		return -1;
1996 	}
1997 
1998 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1999 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2000 
2001 	session->header.env.clock.tod_ns = ref;
2002 
2003 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2004 	      (u64) ref_clockid.tv_nsec;
2005 
2006 	session->header.env.clock.clockid_ns = ref;
2007 	return 0;
2008 }
2009 
2010 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2011 {
2012 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2013 		trigger_hit(&auxtrace_snapshot_trigger);
2014 		auxtrace_record__snapshot_started = 1;
2015 		if (auxtrace_record__snapshot_start(rec->itr))
2016 			trigger_error(&auxtrace_snapshot_trigger);
2017 	}
2018 }
2019 
2020 static void record__uniquify_name(struct record *rec)
2021 {
2022 	struct evsel *pos;
2023 	struct evlist *evlist = rec->evlist;
2024 	char *new_name;
2025 	int ret;
2026 
2027 	if (!perf_pmu__has_hybrid())
2028 		return;
2029 
2030 	evlist__for_each_entry(evlist, pos) {
2031 		if (!evsel__is_hybrid(pos))
2032 			continue;
2033 
2034 		if (strchr(pos->name, '/'))
2035 			continue;
2036 
2037 		ret = asprintf(&new_name, "%s/%s/",
2038 			       pos->pmu_name, pos->name);
2039 		if (ret) {
2040 			free(pos->name);
2041 			pos->name = new_name;
2042 		}
2043 	}
2044 }
2045 
2046 static int record__terminate_thread(struct record_thread *thread_data)
2047 {
2048 	int err;
2049 	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2050 	pid_t tid = thread_data->tid;
2051 
2052 	close(thread_data->pipes.msg[1]);
2053 	thread_data->pipes.msg[1] = -1;
2054 	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2055 	if (err > 0)
2056 		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2057 	else
2058 		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2059 			   thread->tid, tid);
2060 
2061 	return 0;
2062 }
2063 
2064 static int record__start_threads(struct record *rec)
2065 {
2066 	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2067 	struct record_thread *thread_data = rec->thread_data;
2068 	sigset_t full, mask;
2069 	pthread_t handle;
2070 	pthread_attr_t attrs;
2071 
2072 	thread = &thread_data[0];
2073 
2074 	if (!record__threads_enabled(rec))
2075 		return 0;
2076 
2077 	sigfillset(&full);
2078 	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2079 		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2080 		return -1;
2081 	}
2082 
2083 	pthread_attr_init(&attrs);
2084 	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2085 
2086 	for (t = 1; t < nr_threads; t++) {
2087 		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2088 
2089 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2090 		pthread_attr_setaffinity_np(&attrs,
2091 					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2092 					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2093 #endif
2094 		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2095 			for (tt = 1; tt < t; tt++)
2096 				record__terminate_thread(&thread_data[t]);
2097 			pr_err("Failed to start threads: %s\n", strerror(errno));
2098 			ret = -1;
2099 			goto out_err;
2100 		}
2101 
2102 		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2103 		if (err > 0)
2104 			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2105 				  thread_msg_tags[msg]);
2106 		else
2107 			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2108 				   thread->tid, rec->thread_data[t].tid);
2109 	}
2110 
2111 	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2112 			(cpu_set_t *)thread->mask->affinity.bits);
2113 
2114 	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2115 
2116 out_err:
2117 	pthread_attr_destroy(&attrs);
2118 
2119 	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2120 		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2121 		ret = -1;
2122 	}
2123 
2124 	return ret;
2125 }
2126 
2127 static int record__stop_threads(struct record *rec)
2128 {
2129 	int t;
2130 	struct record_thread *thread_data = rec->thread_data;
2131 
2132 	for (t = 1; t < rec->nr_threads; t++)
2133 		record__terminate_thread(&thread_data[t]);
2134 
2135 	for (t = 0; t < rec->nr_threads; t++) {
2136 		rec->samples += thread_data[t].samples;
2137 		if (!record__threads_enabled(rec))
2138 			continue;
2139 		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2140 		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2141 		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2142 			 thread_data[t].samples, thread_data[t].waking);
2143 		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2144 			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2145 				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2146 		else
2147 			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2148 	}
2149 
2150 	return 0;
2151 }
2152 
2153 static unsigned long record__waking(struct record *rec)
2154 {
2155 	int t;
2156 	unsigned long waking = 0;
2157 	struct record_thread *thread_data = rec->thread_data;
2158 
2159 	for (t = 0; t < rec->nr_threads; t++)
2160 		waking += thread_data[t].waking;
2161 
2162 	return waking;
2163 }
2164 
2165 static int __cmd_record(struct record *rec, int argc, const char **argv)
2166 {
2167 	int err;
2168 	int status = 0;
2169 	const bool forks = argc > 0;
2170 	struct perf_tool *tool = &rec->tool;
2171 	struct record_opts *opts = &rec->opts;
2172 	struct perf_data *data = &rec->data;
2173 	struct perf_session *session;
2174 	bool disabled = false, draining = false;
2175 	int fd;
2176 	float ratio = 0;
2177 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2178 
2179 	atexit(record__sig_exit);
2180 	signal(SIGCHLD, sig_handler);
2181 	signal(SIGINT, sig_handler);
2182 	signal(SIGTERM, sig_handler);
2183 	signal(SIGSEGV, sigsegv_handler);
2184 
2185 	if (rec->opts.record_namespaces)
2186 		tool->namespace_events = true;
2187 
2188 	if (rec->opts.record_cgroup) {
2189 #ifdef HAVE_FILE_HANDLE
2190 		tool->cgroup_events = true;
2191 #else
2192 		pr_err("cgroup tracking is not supported\n");
2193 		return -1;
2194 #endif
2195 	}
2196 
2197 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2198 		signal(SIGUSR2, snapshot_sig_handler);
2199 		if (rec->opts.auxtrace_snapshot_mode)
2200 			trigger_on(&auxtrace_snapshot_trigger);
2201 		if (rec->switch_output.enabled)
2202 			trigger_on(&switch_output_trigger);
2203 	} else {
2204 		signal(SIGUSR2, SIG_IGN);
2205 	}
2206 
2207 	session = perf_session__new(data, tool);
2208 	if (IS_ERR(session)) {
2209 		pr_err("Perf session creation failed.\n");
2210 		return PTR_ERR(session);
2211 	}
2212 
2213 	if (record__threads_enabled(rec)) {
2214 		if (perf_data__is_pipe(&rec->data)) {
2215 			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2216 			return -1;
2217 		}
2218 		if (rec->opts.full_auxtrace) {
2219 			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2220 			return -1;
2221 		}
2222 	}
2223 
2224 	fd = perf_data__fd(data);
2225 	rec->session = session;
2226 
2227 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2228 		pr_err("Compression initialization failed.\n");
2229 		return -1;
2230 	}
2231 #ifdef HAVE_EVENTFD_SUPPORT
2232 	done_fd = eventfd(0, EFD_NONBLOCK);
2233 	if (done_fd < 0) {
2234 		pr_err("Failed to create wakeup eventfd, error: %m\n");
2235 		status = -1;
2236 		goto out_delete_session;
2237 	}
2238 	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2239 	if (err < 0) {
2240 		pr_err("Failed to add wakeup eventfd to poll list\n");
2241 		status = err;
2242 		goto out_delete_session;
2243 	}
2244 #endif // HAVE_EVENTFD_SUPPORT
2245 
2246 	session->header.env.comp_type  = PERF_COMP_ZSTD;
2247 	session->header.env.comp_level = rec->opts.comp_level;
2248 
2249 	if (rec->opts.kcore &&
2250 	    !record__kcore_readable(&session->machines.host)) {
2251 		pr_err("ERROR: kcore is not readable.\n");
2252 		return -1;
2253 	}
2254 
2255 	if (record__init_clock(rec))
2256 		return -1;
2257 
2258 	record__init_features(rec);
2259 
2260 	if (forks) {
2261 		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2262 					       workload_exec_failed_signal);
2263 		if (err < 0) {
2264 			pr_err("Couldn't run the workload!\n");
2265 			status = err;
2266 			goto out_delete_session;
2267 		}
2268 	}
2269 
2270 	/*
2271 	 * If we have just single event and are sending data
2272 	 * through pipe, we need to force the ids allocation,
2273 	 * because we synthesize event name through the pipe
2274 	 * and need the id for that.
2275 	 */
2276 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2277 		rec->opts.sample_id = true;
2278 
2279 	record__uniquify_name(rec);
2280 
2281 	if (record__open(rec) != 0) {
2282 		err = -1;
2283 		goto out_free_threads;
2284 	}
2285 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2286 
2287 	if (rec->opts.kcore) {
2288 		err = record__kcore_copy(&session->machines.host, data);
2289 		if (err) {
2290 			pr_err("ERROR: Failed to copy kcore\n");
2291 			goto out_free_threads;
2292 		}
2293 	}
2294 
2295 	err = bpf__apply_obj_config();
2296 	if (err) {
2297 		char errbuf[BUFSIZ];
2298 
2299 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2300 		pr_err("ERROR: Apply config to BPF failed: %s\n",
2301 			 errbuf);
2302 		goto out_free_threads;
2303 	}
2304 
2305 	/*
2306 	 * Normally perf_session__new would do this, but it doesn't have the
2307 	 * evlist.
2308 	 */
2309 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2310 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2311 		rec->tool.ordered_events = false;
2312 	}
2313 
2314 	if (!rec->evlist->core.nr_groups)
2315 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2316 
2317 	if (data->is_pipe) {
2318 		err = perf_header__write_pipe(fd);
2319 		if (err < 0)
2320 			goto out_free_threads;
2321 	} else {
2322 		err = perf_session__write_header(session, rec->evlist, fd, false);
2323 		if (err < 0)
2324 			goto out_free_threads;
2325 	}
2326 
2327 	err = -1;
2328 	if (!rec->no_buildid
2329 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2330 		pr_err("Couldn't generate buildids. "
2331 		       "Use --no-buildid to profile anyway.\n");
2332 		goto out_free_threads;
2333 	}
2334 
2335 	err = record__setup_sb_evlist(rec);
2336 	if (err)
2337 		goto out_free_threads;
2338 
2339 	err = record__synthesize(rec, false);
2340 	if (err < 0)
2341 		goto out_free_threads;
2342 
2343 	if (rec->realtime_prio) {
2344 		struct sched_param param;
2345 
2346 		param.sched_priority = rec->realtime_prio;
2347 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2348 			pr_err("Could not set realtime priority.\n");
2349 			err = -1;
2350 			goto out_free_threads;
2351 		}
2352 	}
2353 
2354 	if (record__start_threads(rec))
2355 		goto out_free_threads;
2356 
2357 	/*
2358 	 * When perf is starting the traced process, all the events
2359 	 * (apart from group members) have enable_on_exec=1 set,
2360 	 * so don't spoil it by prematurely enabling them.
2361 	 */
2362 	if (!target__none(&opts->target) && !opts->initial_delay)
2363 		evlist__enable(rec->evlist);
2364 
2365 	/*
2366 	 * Let the child rip
2367 	 */
2368 	if (forks) {
2369 		struct machine *machine = &session->machines.host;
2370 		union perf_event *event;
2371 		pid_t tgid;
2372 
2373 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2374 		if (event == NULL) {
2375 			err = -ENOMEM;
2376 			goto out_child;
2377 		}
2378 
2379 		/*
2380 		 * Some H/W events are generated before COMM event
2381 		 * which is emitted during exec(), so perf script
2382 		 * cannot see a correct process name for those events.
2383 		 * Synthesize COMM event to prevent it.
2384 		 */
2385 		tgid = perf_event__synthesize_comm(tool, event,
2386 						   rec->evlist->workload.pid,
2387 						   process_synthesized_event,
2388 						   machine);
2389 		free(event);
2390 
2391 		if (tgid == -1)
2392 			goto out_child;
2393 
2394 		event = malloc(sizeof(event->namespaces) +
2395 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2396 			       machine->id_hdr_size);
2397 		if (event == NULL) {
2398 			err = -ENOMEM;
2399 			goto out_child;
2400 		}
2401 
2402 		/*
2403 		 * Synthesize NAMESPACES event for the command specified.
2404 		 */
2405 		perf_event__synthesize_namespaces(tool, event,
2406 						  rec->evlist->workload.pid,
2407 						  tgid, process_synthesized_event,
2408 						  machine);
2409 		free(event);
2410 
2411 		evlist__start_workload(rec->evlist);
2412 	}
2413 
2414 	if (opts->initial_delay) {
2415 		pr_info(EVLIST_DISABLED_MSG);
2416 		if (opts->initial_delay > 0) {
2417 			usleep(opts->initial_delay * USEC_PER_MSEC);
2418 			evlist__enable(rec->evlist);
2419 			pr_info(EVLIST_ENABLED_MSG);
2420 		}
2421 	}
2422 
2423 	trigger_ready(&auxtrace_snapshot_trigger);
2424 	trigger_ready(&switch_output_trigger);
2425 	perf_hooks__invoke_record_start();
2426 	for (;;) {
2427 		unsigned long long hits = thread->samples;
2428 
2429 		/*
2430 		 * rec->evlist->bkw_mmap_state is possible to be
2431 		 * BKW_MMAP_EMPTY here: when done == true and
2432 		 * hits != rec->samples in previous round.
2433 		 *
2434 		 * evlist__toggle_bkw_mmap ensure we never
2435 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2436 		 */
2437 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2438 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2439 
2440 		if (record__mmap_read_all(rec, false) < 0) {
2441 			trigger_error(&auxtrace_snapshot_trigger);
2442 			trigger_error(&switch_output_trigger);
2443 			err = -1;
2444 			goto out_child;
2445 		}
2446 
2447 		if (auxtrace_record__snapshot_started) {
2448 			auxtrace_record__snapshot_started = 0;
2449 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2450 				record__read_auxtrace_snapshot(rec, false);
2451 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2452 				pr_err("AUX area tracing snapshot failed\n");
2453 				err = -1;
2454 				goto out_child;
2455 			}
2456 		}
2457 
2458 		if (trigger_is_hit(&switch_output_trigger)) {
2459 			/*
2460 			 * If switch_output_trigger is hit, the data in
2461 			 * overwritable ring buffer should have been collected,
2462 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2463 			 *
2464 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2465 			 * record__mmap_read_all() didn't collect data from
2466 			 * overwritable ring buffer. Read again.
2467 			 */
2468 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2469 				continue;
2470 			trigger_ready(&switch_output_trigger);
2471 
2472 			/*
2473 			 * Reenable events in overwrite ring buffer after
2474 			 * record__mmap_read_all(): we should have collected
2475 			 * data from it.
2476 			 */
2477 			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2478 
2479 			if (!quiet)
2480 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2481 					record__waking(rec));
2482 			thread->waking = 0;
2483 			fd = record__switch_output(rec, false);
2484 			if (fd < 0) {
2485 				pr_err("Failed to switch to new file\n");
2486 				trigger_error(&switch_output_trigger);
2487 				err = fd;
2488 				goto out_child;
2489 			}
2490 
2491 			/* re-arm the alarm */
2492 			if (rec->switch_output.time)
2493 				alarm(rec->switch_output.time);
2494 		}
2495 
2496 		if (hits == thread->samples) {
2497 			if (done || draining)
2498 				break;
2499 			err = fdarray__poll(&thread->pollfd, -1);
2500 			/*
2501 			 * Propagate error, only if there's any. Ignore positive
2502 			 * number of returned events and interrupt error.
2503 			 */
2504 			if (err > 0 || (err < 0 && errno == EINTR))
2505 				err = 0;
2506 			thread->waking++;
2507 
2508 			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2509 					    record__thread_munmap_filtered, NULL) == 0)
2510 				draining = true;
2511 
2512 			evlist__ctlfd_update(rec->evlist,
2513 				&thread->pollfd.entries[thread->ctlfd_pos]);
2514 		}
2515 
2516 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2517 			switch (cmd) {
2518 			case EVLIST_CTL_CMD_SNAPSHOT:
2519 				hit_auxtrace_snapshot_trigger(rec);
2520 				evlist__ctlfd_ack(rec->evlist);
2521 				break;
2522 			case EVLIST_CTL_CMD_STOP:
2523 				done = 1;
2524 				break;
2525 			case EVLIST_CTL_CMD_ACK:
2526 			case EVLIST_CTL_CMD_UNSUPPORTED:
2527 			case EVLIST_CTL_CMD_ENABLE:
2528 			case EVLIST_CTL_CMD_DISABLE:
2529 			case EVLIST_CTL_CMD_EVLIST:
2530 			case EVLIST_CTL_CMD_PING:
2531 			default:
2532 				break;
2533 			}
2534 		}
2535 
2536 		/*
2537 		 * When perf is starting the traced process, at the end events
2538 		 * die with the process and we wait for that. Thus no need to
2539 		 * disable events in this case.
2540 		 */
2541 		if (done && !disabled && !target__none(&opts->target)) {
2542 			trigger_off(&auxtrace_snapshot_trigger);
2543 			evlist__disable(rec->evlist);
2544 			disabled = true;
2545 		}
2546 	}
2547 
2548 	trigger_off(&auxtrace_snapshot_trigger);
2549 	trigger_off(&switch_output_trigger);
2550 
2551 	if (opts->auxtrace_snapshot_on_exit)
2552 		record__auxtrace_snapshot_exit(rec);
2553 
2554 	if (forks && workload_exec_errno) {
2555 		char msg[STRERR_BUFSIZE], strevsels[2048];
2556 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2557 
2558 		evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2559 
2560 		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2561 			strevsels, argv[0], emsg);
2562 		err = -1;
2563 		goto out_child;
2564 	}
2565 
2566 	if (!quiet)
2567 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2568 			record__waking(rec));
2569 
2570 	if (target__none(&rec->opts.target))
2571 		record__synthesize_workload(rec, true);
2572 
2573 out_child:
2574 	record__stop_threads(rec);
2575 	record__mmap_read_all(rec, true);
2576 out_free_threads:
2577 	record__free_thread_data(rec);
2578 	evlist__finalize_ctlfd(rec->evlist);
2579 	record__aio_mmap_read_sync(rec);
2580 
2581 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2582 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2583 		session->header.env.comp_ratio = ratio + 0.5;
2584 	}
2585 
2586 	if (forks) {
2587 		int exit_status;
2588 
2589 		if (!child_finished)
2590 			kill(rec->evlist->workload.pid, SIGTERM);
2591 
2592 		wait(&exit_status);
2593 
2594 		if (err < 0)
2595 			status = err;
2596 		else if (WIFEXITED(exit_status))
2597 			status = WEXITSTATUS(exit_status);
2598 		else if (WIFSIGNALED(exit_status))
2599 			signr = WTERMSIG(exit_status);
2600 	} else
2601 		status = err;
2602 
2603 	record__synthesize(rec, true);
2604 	/* this will be recalculated during process_buildids() */
2605 	rec->samples = 0;
2606 
2607 	if (!err) {
2608 		if (!rec->timestamp_filename) {
2609 			record__finish_output(rec);
2610 		} else {
2611 			fd = record__switch_output(rec, true);
2612 			if (fd < 0) {
2613 				status = fd;
2614 				goto out_delete_session;
2615 			}
2616 		}
2617 	}
2618 
2619 	perf_hooks__invoke_record_end();
2620 
2621 	if (!err && !quiet) {
2622 		char samples[128];
2623 		const char *postfix = rec->timestamp_filename ?
2624 					".<timestamp>" : "";
2625 
2626 		if (rec->samples && !rec->opts.full_auxtrace)
2627 			scnprintf(samples, sizeof(samples),
2628 				  " (%" PRIu64 " samples)", rec->samples);
2629 		else
2630 			samples[0] = '\0';
2631 
2632 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2633 			perf_data__size(data) / 1024.0 / 1024.0,
2634 			data->path, postfix, samples);
2635 		if (ratio) {
2636 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2637 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2638 					ratio);
2639 		}
2640 		fprintf(stderr, " ]\n");
2641 	}
2642 
2643 out_delete_session:
2644 #ifdef HAVE_EVENTFD_SUPPORT
2645 	if (done_fd >= 0)
2646 		close(done_fd);
2647 #endif
2648 	zstd_fini(&session->zstd_data);
2649 	perf_session__delete(session);
2650 
2651 	if (!opts->no_bpf_event)
2652 		evlist__stop_sb_thread(rec->sb_evlist);
2653 	return status;
2654 }
2655 
2656 static void callchain_debug(struct callchain_param *callchain)
2657 {
2658 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2659 
2660 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2661 
2662 	if (callchain->record_mode == CALLCHAIN_DWARF)
2663 		pr_debug("callchain: stack dump size %d\n",
2664 			 callchain->dump_size);
2665 }
2666 
2667 int record_opts__parse_callchain(struct record_opts *record,
2668 				 struct callchain_param *callchain,
2669 				 const char *arg, bool unset)
2670 {
2671 	int ret;
2672 	callchain->enabled = !unset;
2673 
2674 	/* --no-call-graph */
2675 	if (unset) {
2676 		callchain->record_mode = CALLCHAIN_NONE;
2677 		pr_debug("callchain: disabled\n");
2678 		return 0;
2679 	}
2680 
2681 	ret = parse_callchain_record_opt(arg, callchain);
2682 	if (!ret) {
2683 		/* Enable data address sampling for DWARF unwind. */
2684 		if (callchain->record_mode == CALLCHAIN_DWARF)
2685 			record->sample_address = true;
2686 		callchain_debug(callchain);
2687 	}
2688 
2689 	return ret;
2690 }
2691 
2692 int record_parse_callchain_opt(const struct option *opt,
2693 			       const char *arg,
2694 			       int unset)
2695 {
2696 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2697 }
2698 
2699 int record_callchain_opt(const struct option *opt,
2700 			 const char *arg __maybe_unused,
2701 			 int unset __maybe_unused)
2702 {
2703 	struct callchain_param *callchain = opt->value;
2704 
2705 	callchain->enabled = true;
2706 
2707 	if (callchain->record_mode == CALLCHAIN_NONE)
2708 		callchain->record_mode = CALLCHAIN_FP;
2709 
2710 	callchain_debug(callchain);
2711 	return 0;
2712 }
2713 
2714 static int perf_record_config(const char *var, const char *value, void *cb)
2715 {
2716 	struct record *rec = cb;
2717 
2718 	if (!strcmp(var, "record.build-id")) {
2719 		if (!strcmp(value, "cache"))
2720 			rec->no_buildid_cache = false;
2721 		else if (!strcmp(value, "no-cache"))
2722 			rec->no_buildid_cache = true;
2723 		else if (!strcmp(value, "skip"))
2724 			rec->no_buildid = true;
2725 		else if (!strcmp(value, "mmap"))
2726 			rec->buildid_mmap = true;
2727 		else
2728 			return -1;
2729 		return 0;
2730 	}
2731 	if (!strcmp(var, "record.call-graph")) {
2732 		var = "call-graph.record-mode";
2733 		return perf_default_config(var, value, cb);
2734 	}
2735 #ifdef HAVE_AIO_SUPPORT
2736 	if (!strcmp(var, "record.aio")) {
2737 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
2738 		if (!rec->opts.nr_cblocks)
2739 			rec->opts.nr_cblocks = nr_cblocks_default;
2740 	}
2741 #endif
2742 	if (!strcmp(var, "record.debuginfod")) {
2743 		rec->debuginfod.urls = strdup(value);
2744 		if (!rec->debuginfod.urls)
2745 			return -ENOMEM;
2746 		rec->debuginfod.set = true;
2747 	}
2748 
2749 	return 0;
2750 }
2751 
2752 
2753 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2754 {
2755 	struct record_opts *opts = (struct record_opts *)opt->value;
2756 
2757 	if (unset || !str)
2758 		return 0;
2759 
2760 	if (!strcasecmp(str, "node"))
2761 		opts->affinity = PERF_AFFINITY_NODE;
2762 	else if (!strcasecmp(str, "cpu"))
2763 		opts->affinity = PERF_AFFINITY_CPU;
2764 
2765 	return 0;
2766 }
2767 
2768 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2769 {
2770 	mask->nbits = nr_bits;
2771 	mask->bits = bitmap_zalloc(mask->nbits);
2772 	if (!mask->bits)
2773 		return -ENOMEM;
2774 
2775 	return 0;
2776 }
2777 
2778 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2779 {
2780 	bitmap_free(mask->bits);
2781 	mask->nbits = 0;
2782 }
2783 
2784 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2785 {
2786 	int ret;
2787 
2788 	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2789 	if (ret) {
2790 		mask->affinity.bits = NULL;
2791 		return ret;
2792 	}
2793 
2794 	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2795 	if (ret) {
2796 		record__mmap_cpu_mask_free(&mask->maps);
2797 		mask->maps.bits = NULL;
2798 	}
2799 
2800 	return ret;
2801 }
2802 
2803 static void record__thread_mask_free(struct thread_mask *mask)
2804 {
2805 	record__mmap_cpu_mask_free(&mask->maps);
2806 	record__mmap_cpu_mask_free(&mask->affinity);
2807 }
2808 
2809 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2810 {
2811 	int s;
2812 	struct record_opts *opts = opt->value;
2813 
2814 	if (unset || !str || !strlen(str)) {
2815 		opts->threads_spec = THREAD_SPEC__CPU;
2816 	} else {
2817 		for (s = 1; s < THREAD_SPEC__MAX; s++) {
2818 			if (s == THREAD_SPEC__USER) {
2819 				opts->threads_user_spec = strdup(str);
2820 				if (!opts->threads_user_spec)
2821 					return -ENOMEM;
2822 				opts->threads_spec = THREAD_SPEC__USER;
2823 				break;
2824 			}
2825 			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2826 				opts->threads_spec = s;
2827 				break;
2828 			}
2829 		}
2830 	}
2831 
2832 	if (opts->threads_spec == THREAD_SPEC__USER)
2833 		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2834 	else
2835 		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2836 
2837 	return 0;
2838 }
2839 
2840 static int parse_output_max_size(const struct option *opt,
2841 				 const char *str, int unset)
2842 {
2843 	unsigned long *s = (unsigned long *)opt->value;
2844 	static struct parse_tag tags_size[] = {
2845 		{ .tag  = 'B', .mult = 1       },
2846 		{ .tag  = 'K', .mult = 1 << 10 },
2847 		{ .tag  = 'M', .mult = 1 << 20 },
2848 		{ .tag  = 'G', .mult = 1 << 30 },
2849 		{ .tag  = 0 },
2850 	};
2851 	unsigned long val;
2852 
2853 	if (unset) {
2854 		*s = 0;
2855 		return 0;
2856 	}
2857 
2858 	val = parse_tag_value(str, tags_size);
2859 	if (val != (unsigned long) -1) {
2860 		*s = val;
2861 		return 0;
2862 	}
2863 
2864 	return -1;
2865 }
2866 
2867 static int record__parse_mmap_pages(const struct option *opt,
2868 				    const char *str,
2869 				    int unset __maybe_unused)
2870 {
2871 	struct record_opts *opts = opt->value;
2872 	char *s, *p;
2873 	unsigned int mmap_pages;
2874 	int ret;
2875 
2876 	if (!str)
2877 		return -EINVAL;
2878 
2879 	s = strdup(str);
2880 	if (!s)
2881 		return -ENOMEM;
2882 
2883 	p = strchr(s, ',');
2884 	if (p)
2885 		*p = '\0';
2886 
2887 	if (*s) {
2888 		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2889 		if (ret)
2890 			goto out_free;
2891 		opts->mmap_pages = mmap_pages;
2892 	}
2893 
2894 	if (!p) {
2895 		ret = 0;
2896 		goto out_free;
2897 	}
2898 
2899 	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2900 	if (ret)
2901 		goto out_free;
2902 
2903 	opts->auxtrace_mmap_pages = mmap_pages;
2904 
2905 out_free:
2906 	free(s);
2907 	return ret;
2908 }
2909 
2910 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2911 {
2912 }
2913 
2914 static int parse_control_option(const struct option *opt,
2915 				const char *str,
2916 				int unset __maybe_unused)
2917 {
2918 	struct record_opts *opts = opt->value;
2919 
2920 	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2921 }
2922 
2923 static void switch_output_size_warn(struct record *rec)
2924 {
2925 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2926 	struct switch_output *s = &rec->switch_output;
2927 
2928 	wakeup_size /= 2;
2929 
2930 	if (s->size < wakeup_size) {
2931 		char buf[100];
2932 
2933 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2934 		pr_warning("WARNING: switch-output data size lower than "
2935 			   "wakeup kernel buffer size (%s) "
2936 			   "expect bigger perf.data sizes\n", buf);
2937 	}
2938 }
2939 
2940 static int switch_output_setup(struct record *rec)
2941 {
2942 	struct switch_output *s = &rec->switch_output;
2943 	static struct parse_tag tags_size[] = {
2944 		{ .tag  = 'B', .mult = 1       },
2945 		{ .tag  = 'K', .mult = 1 << 10 },
2946 		{ .tag  = 'M', .mult = 1 << 20 },
2947 		{ .tag  = 'G', .mult = 1 << 30 },
2948 		{ .tag  = 0 },
2949 	};
2950 	static struct parse_tag tags_time[] = {
2951 		{ .tag  = 's', .mult = 1        },
2952 		{ .tag  = 'm', .mult = 60       },
2953 		{ .tag  = 'h', .mult = 60*60    },
2954 		{ .tag  = 'd', .mult = 60*60*24 },
2955 		{ .tag  = 0 },
2956 	};
2957 	unsigned long val;
2958 
2959 	/*
2960 	 * If we're using --switch-output-events, then we imply its
2961 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2962 	 *  thread to its parent.
2963 	 */
2964 	if (rec->switch_output_event_set) {
2965 		if (record__threads_enabled(rec)) {
2966 			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2967 			return 0;
2968 		}
2969 		goto do_signal;
2970 	}
2971 
2972 	if (!s->set)
2973 		return 0;
2974 
2975 	if (record__threads_enabled(rec)) {
2976 		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
2977 		return 0;
2978 	}
2979 
2980 	if (!strcmp(s->str, "signal")) {
2981 do_signal:
2982 		s->signal = true;
2983 		pr_debug("switch-output with SIGUSR2 signal\n");
2984 		goto enabled;
2985 	}
2986 
2987 	val = parse_tag_value(s->str, tags_size);
2988 	if (val != (unsigned long) -1) {
2989 		s->size = val;
2990 		pr_debug("switch-output with %s size threshold\n", s->str);
2991 		goto enabled;
2992 	}
2993 
2994 	val = parse_tag_value(s->str, tags_time);
2995 	if (val != (unsigned long) -1) {
2996 		s->time = val;
2997 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2998 			 s->str, s->time);
2999 		goto enabled;
3000 	}
3001 
3002 	return -1;
3003 
3004 enabled:
3005 	rec->timestamp_filename = true;
3006 	s->enabled              = true;
3007 
3008 	if (s->size && !rec->opts.no_buffering)
3009 		switch_output_size_warn(rec);
3010 
3011 	return 0;
3012 }
3013 
3014 static const char * const __record_usage[] = {
3015 	"perf record [<options>] [<command>]",
3016 	"perf record [<options>] -- <command> [<options>]",
3017 	NULL
3018 };
3019 const char * const *record_usage = __record_usage;
3020 
3021 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3022 				  struct perf_sample *sample, struct machine *machine)
3023 {
3024 	/*
3025 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3026 	 * no need to add them twice.
3027 	 */
3028 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3029 		return 0;
3030 	return perf_event__process_mmap(tool, event, sample, machine);
3031 }
3032 
3033 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3034 				   struct perf_sample *sample, struct machine *machine)
3035 {
3036 	/*
3037 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3038 	 * no need to add them twice.
3039 	 */
3040 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3041 		return 0;
3042 
3043 	return perf_event__process_mmap2(tool, event, sample, machine);
3044 }
3045 
3046 static int process_timestamp_boundary(struct perf_tool *tool,
3047 				      union perf_event *event __maybe_unused,
3048 				      struct perf_sample *sample,
3049 				      struct machine *machine __maybe_unused)
3050 {
3051 	struct record *rec = container_of(tool, struct record, tool);
3052 
3053 	set_timestamp_boundary(rec, sample->time);
3054 	return 0;
3055 }
3056 
3057 static int parse_record_synth_option(const struct option *opt,
3058 				     const char *str,
3059 				     int unset __maybe_unused)
3060 {
3061 	struct record_opts *opts = opt->value;
3062 	char *p = strdup(str);
3063 
3064 	if (p == NULL)
3065 		return -1;
3066 
3067 	opts->synth = parse_synth_opt(p);
3068 	free(p);
3069 
3070 	if (opts->synth < 0) {
3071 		pr_err("Invalid synth option: %s\n", str);
3072 		return -1;
3073 	}
3074 	return 0;
3075 }
3076 
3077 /*
3078  * XXX Ideally would be local to cmd_record() and passed to a record__new
3079  * because we need to have access to it in record__exit, that is called
3080  * after cmd_record() exits, but since record_options need to be accessible to
3081  * builtin-script, leave it here.
3082  *
3083  * At least we don't ouch it in all the other functions here directly.
3084  *
3085  * Just say no to tons of global variables, sigh.
3086  */
3087 static struct record record = {
3088 	.opts = {
3089 		.sample_time	     = true,
3090 		.mmap_pages	     = UINT_MAX,
3091 		.user_freq	     = UINT_MAX,
3092 		.user_interval	     = ULLONG_MAX,
3093 		.freq		     = 4000,
3094 		.target		     = {
3095 			.uses_mmap   = true,
3096 			.default_per_cpu = true,
3097 		},
3098 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3099 		.nr_threads_synthesize = 1,
3100 		.ctl_fd              = -1,
3101 		.ctl_fd_ack          = -1,
3102 		.synth               = PERF_SYNTH_ALL,
3103 	},
3104 	.tool = {
3105 		.sample		= process_sample_event,
3106 		.fork		= perf_event__process_fork,
3107 		.exit		= perf_event__process_exit,
3108 		.comm		= perf_event__process_comm,
3109 		.namespaces	= perf_event__process_namespaces,
3110 		.mmap		= build_id__process_mmap,
3111 		.mmap2		= build_id__process_mmap2,
3112 		.itrace_start	= process_timestamp_boundary,
3113 		.aux		= process_timestamp_boundary,
3114 		.ordered_events	= true,
3115 	},
3116 };
3117 
3118 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3119 	"\n\t\t\t\tDefault: fp";
3120 
3121 static bool dry_run;
3122 
3123 /*
3124  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3125  * with it and switch to use the library functions in perf_evlist that came
3126  * from builtin-record.c, i.e. use record_opts,
3127  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3128  * using pipes, etc.
3129  */
3130 static struct option __record_options[] = {
3131 	OPT_CALLBACK('e', "event", &record.evlist, "event",
3132 		     "event selector. use 'perf list' to list available events",
3133 		     parse_events_option),
3134 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3135 		     "event filter", parse_filter),
3136 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3137 			   NULL, "don't record events from perf itself",
3138 			   exclude_perf),
3139 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3140 		    "record events on existing process id"),
3141 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3142 		    "record events on existing thread id"),
3143 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3144 		    "collect data with this RT SCHED_FIFO priority"),
3145 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3146 		    "collect data without buffering"),
3147 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3148 		    "collect raw sample records from all opened counters"),
3149 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3150 			    "system-wide collection from all CPUs"),
3151 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3152 		    "list of cpus to monitor"),
3153 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3154 	OPT_STRING('o', "output", &record.data.path, "file",
3155 		    "output file name"),
3156 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3157 			&record.opts.no_inherit_set,
3158 			"child tasks do not inherit counters"),
3159 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3160 		    "synthesize non-sample events at the end of output"),
3161 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3162 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3163 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3164 		    "Fail if the specified frequency can't be used"),
3165 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3166 		     "profile at this frequency",
3167 		      record__parse_freq),
3168 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3169 		     "number of mmap data pages and AUX area tracing mmap pages",
3170 		     record__parse_mmap_pages),
3171 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3172 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3173 		     record__mmap_flush_parse),
3174 	OPT_BOOLEAN(0, "group", &record.opts.group,
3175 		    "put the counters into a counter group"),
3176 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3177 			   NULL, "enables call-graph recording" ,
3178 			   &record_callchain_opt),
3179 	OPT_CALLBACK(0, "call-graph", &record.opts,
3180 		     "record_mode[,record_size]", record_callchain_help,
3181 		     &record_parse_callchain_opt),
3182 	OPT_INCR('v', "verbose", &verbose,
3183 		    "be more verbose (show counter open errors, etc)"),
3184 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3185 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3186 		    "per thread counts"),
3187 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3188 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3189 		    "Record the sample physical addresses"),
3190 	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3191 		    "Record the sampled data address data page size"),
3192 	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3193 		    "Record the sampled code address (ip) page size"),
3194 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3195 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3196 			&record.opts.sample_time_set,
3197 			"Record the sample timestamps"),
3198 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3199 			"Record the sample period"),
3200 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3201 		    "don't sample"),
3202 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3203 			&record.no_buildid_cache_set,
3204 			"do not update the buildid cache"),
3205 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3206 			&record.no_buildid_set,
3207 			"do not collect buildids in perf.data"),
3208 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3209 		     "monitor event in cgroup name only",
3210 		     parse_cgroups),
3211 	OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3212 		  "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3213 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3214 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3215 		   "user to profile"),
3216 
3217 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3218 		     "branch any", "sample any taken branches",
3219 		     parse_branch_stack),
3220 
3221 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3222 		     "branch filter mask", "branch stack filter modes",
3223 		     parse_branch_stack),
3224 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3225 		    "sample by weight (on special events only)"),
3226 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3227 		    "sample transaction flags (special events only)"),
3228 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3229 		    "use per-thread mmaps"),
3230 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3231 		    "sample selected machine registers on interrupt,"
3232 		    " use '-I?' to list register names", parse_intr_regs),
3233 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3234 		    "sample selected machine registers on interrupt,"
3235 		    " use '--user-regs=?' to list register names", parse_user_regs),
3236 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3237 		    "Record running/enabled time of read (:S) events"),
3238 	OPT_CALLBACK('k', "clockid", &record.opts,
3239 	"clockid", "clockid to use for events, see clock_gettime()",
3240 	parse_clockid),
3241 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3242 			  "opts", "AUX area tracing Snapshot Mode", ""),
3243 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3244 			  "opts", "sample AUX area", ""),
3245 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3246 			"per thread proc mmap processing timeout in ms"),
3247 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3248 		    "Record namespaces events"),
3249 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3250 		    "Record cgroup events"),
3251 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3252 			&record.opts.record_switch_events_set,
3253 			"Record context switch events"),
3254 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3255 			 "Configure all used events to run in kernel space.",
3256 			 PARSE_OPT_EXCLUSIVE),
3257 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3258 			 "Configure all used events to run in user space.",
3259 			 PARSE_OPT_EXCLUSIVE),
3260 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3261 		    "collect kernel callchains"),
3262 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3263 		    "collect user callchains"),
3264 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3265 		   "clang binary to use for compiling BPF scriptlets"),
3266 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3267 		   "options passed to clang when compiling BPF scriptlets"),
3268 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3269 		   "file", "vmlinux pathname"),
3270 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3271 		    "Record build-id of all DSOs regardless of hits"),
3272 	OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3273 		    "Record build-id in map events"),
3274 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3275 		    "append timestamp to output filename"),
3276 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3277 		    "Record timestamp boundary (time of first/last samples)"),
3278 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3279 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3280 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3281 			  "signal"),
3282 	OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3283 			 "switch output event selector. use 'perf list' to list available events",
3284 			 parse_events_option_new_evlist),
3285 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3286 		   "Limit number of switch output generated files"),
3287 	OPT_BOOLEAN(0, "dry-run", &dry_run,
3288 		    "Parse options then exit"),
3289 #ifdef HAVE_AIO_SUPPORT
3290 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3291 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3292 		     record__aio_parse),
3293 #endif
3294 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3295 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3296 		     record__parse_affinity),
3297 #ifdef HAVE_ZSTD_SUPPORT
3298 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3299 			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3300 			    record__parse_comp_level),
3301 #endif
3302 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3303 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3304 	OPT_UINTEGER(0, "num-thread-synthesize",
3305 		     &record.opts.nr_threads_synthesize,
3306 		     "number of threads to run for event synthesis"),
3307 #ifdef HAVE_LIBPFM
3308 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3309 		"libpfm4 event selector. use 'perf list' to list available events",
3310 		parse_libpfm_events_option),
3311 #endif
3312 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3313 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3314 		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3315 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3316 		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3317 		      parse_control_option),
3318 	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3319 		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3320 	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3321 			  &record.debuginfod.set, "debuginfod urls",
3322 			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3323 			  "system"),
3324 	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3325 			    "write collected trace data into several data files using parallel threads",
3326 			    record__parse_threads),
3327 	OPT_END()
3328 };
3329 
3330 struct option *record_options = __record_options;
3331 
3332 static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3333 {
3334 	struct perf_cpu cpu;
3335 	int idx;
3336 
3337 	if (cpu_map__is_dummy(cpus))
3338 		return;
3339 
3340 	perf_cpu_map__for_each_cpu(cpu, idx, cpus)
3341 		set_bit(cpu.cpu, mask->bits);
3342 }
3343 
3344 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3345 {
3346 	struct perf_cpu_map *cpus;
3347 
3348 	cpus = perf_cpu_map__new(mask_spec);
3349 	if (!cpus)
3350 		return -ENOMEM;
3351 
3352 	bitmap_zero(mask->bits, mask->nbits);
3353 	record__mmap_cpu_mask_init(mask, cpus);
3354 	perf_cpu_map__put(cpus);
3355 
3356 	return 0;
3357 }
3358 
3359 static void record__free_thread_masks(struct record *rec, int nr_threads)
3360 {
3361 	int t;
3362 
3363 	if (rec->thread_masks)
3364 		for (t = 0; t < nr_threads; t++)
3365 			record__thread_mask_free(&rec->thread_masks[t]);
3366 
3367 	zfree(&rec->thread_masks);
3368 }
3369 
3370 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3371 {
3372 	int t, ret;
3373 
3374 	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3375 	if (!rec->thread_masks) {
3376 		pr_err("Failed to allocate thread masks\n");
3377 		return -ENOMEM;
3378 	}
3379 
3380 	for (t = 0; t < nr_threads; t++) {
3381 		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3382 		if (ret) {
3383 			pr_err("Failed to allocate thread masks[%d]\n", t);
3384 			goto out_free;
3385 		}
3386 	}
3387 
3388 	return 0;
3389 
3390 out_free:
3391 	record__free_thread_masks(rec, nr_threads);
3392 
3393 	return ret;
3394 }
3395 
3396 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3397 {
3398 	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3399 
3400 	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3401 	if (ret)
3402 		return ret;
3403 
3404 	rec->nr_threads = nr_cpus;
3405 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3406 
3407 	for (t = 0; t < rec->nr_threads; t++) {
3408 		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3409 		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3410 		if (verbose) {
3411 			pr_debug("thread_masks[%d]: ", t);
3412 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3413 			pr_debug("thread_masks[%d]: ", t);
3414 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3415 		}
3416 	}
3417 
3418 	return 0;
3419 }
3420 
3421 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3422 					  const char **maps_spec, const char **affinity_spec,
3423 					  u32 nr_spec)
3424 {
3425 	u32 s;
3426 	int ret = 0, t = 0;
3427 	struct mmap_cpu_mask cpus_mask;
3428 	struct thread_mask thread_mask, full_mask, *thread_masks;
3429 
3430 	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3431 	if (ret) {
3432 		pr_err("Failed to allocate CPUs mask\n");
3433 		return ret;
3434 	}
3435 	record__mmap_cpu_mask_init(&cpus_mask, cpus);
3436 
3437 	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3438 	if (ret) {
3439 		pr_err("Failed to allocate full mask\n");
3440 		goto out_free_cpu_mask;
3441 	}
3442 
3443 	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3444 	if (ret) {
3445 		pr_err("Failed to allocate thread mask\n");
3446 		goto out_free_full_and_cpu_masks;
3447 	}
3448 
3449 	for (s = 0; s < nr_spec; s++) {
3450 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3451 		if (ret) {
3452 			pr_err("Failed to initialize maps thread mask\n");
3453 			goto out_free;
3454 		}
3455 		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3456 		if (ret) {
3457 			pr_err("Failed to initialize affinity thread mask\n");
3458 			goto out_free;
3459 		}
3460 
3461 		/* ignore invalid CPUs but do not allow empty masks */
3462 		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3463 				cpus_mask.bits, thread_mask.maps.nbits)) {
3464 			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3465 			ret = -EINVAL;
3466 			goto out_free;
3467 		}
3468 		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3469 				cpus_mask.bits, thread_mask.affinity.nbits)) {
3470 			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3471 			ret = -EINVAL;
3472 			goto out_free;
3473 		}
3474 
3475 		/* do not allow intersection with other masks (full_mask) */
3476 		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3477 				      thread_mask.maps.nbits)) {
3478 			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3479 			ret = -EINVAL;
3480 			goto out_free;
3481 		}
3482 		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3483 				      thread_mask.affinity.nbits)) {
3484 			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3485 			ret = -EINVAL;
3486 			goto out_free;
3487 		}
3488 
3489 		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3490 			  thread_mask.maps.bits, full_mask.maps.nbits);
3491 		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3492 			  thread_mask.affinity.bits, full_mask.maps.nbits);
3493 
3494 		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3495 		if (!thread_masks) {
3496 			pr_err("Failed to reallocate thread masks\n");
3497 			ret = -ENOMEM;
3498 			goto out_free;
3499 		}
3500 		rec->thread_masks = thread_masks;
3501 		rec->thread_masks[t] = thread_mask;
3502 		if (verbose) {
3503 			pr_debug("thread_masks[%d]: ", t);
3504 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3505 			pr_debug("thread_masks[%d]: ", t);
3506 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3507 		}
3508 		t++;
3509 		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3510 		if (ret) {
3511 			pr_err("Failed to allocate thread mask\n");
3512 			goto out_free_full_and_cpu_masks;
3513 		}
3514 	}
3515 	rec->nr_threads = t;
3516 	pr_debug("nr_threads: %d\n", rec->nr_threads);
3517 	if (!rec->nr_threads)
3518 		ret = -EINVAL;
3519 
3520 out_free:
3521 	record__thread_mask_free(&thread_mask);
3522 out_free_full_and_cpu_masks:
3523 	record__thread_mask_free(&full_mask);
3524 out_free_cpu_mask:
3525 	record__mmap_cpu_mask_free(&cpus_mask);
3526 
3527 	return ret;
3528 }
3529 
3530 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3531 {
3532 	int ret;
3533 	struct cpu_topology *topo;
3534 
3535 	topo = cpu_topology__new();
3536 	if (!topo) {
3537 		pr_err("Failed to allocate CPU topology\n");
3538 		return -ENOMEM;
3539 	}
3540 
3541 	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3542 					     topo->core_cpus_list, topo->core_cpus_lists);
3543 	cpu_topology__delete(topo);
3544 
3545 	return ret;
3546 }
3547 
3548 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3549 {
3550 	int ret;
3551 	struct cpu_topology *topo;
3552 
3553 	topo = cpu_topology__new();
3554 	if (!topo) {
3555 		pr_err("Failed to allocate CPU topology\n");
3556 		return -ENOMEM;
3557 	}
3558 
3559 	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3560 					     topo->package_cpus_list, topo->package_cpus_lists);
3561 	cpu_topology__delete(topo);
3562 
3563 	return ret;
3564 }
3565 
3566 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3567 {
3568 	u32 s;
3569 	int ret;
3570 	const char **spec;
3571 	struct numa_topology *topo;
3572 
3573 	topo = numa_topology__new();
3574 	if (!topo) {
3575 		pr_err("Failed to allocate NUMA topology\n");
3576 		return -ENOMEM;
3577 	}
3578 
3579 	spec = zalloc(topo->nr * sizeof(char *));
3580 	if (!spec) {
3581 		pr_err("Failed to allocate NUMA spec\n");
3582 		ret = -ENOMEM;
3583 		goto out_delete_topo;
3584 	}
3585 	for (s = 0; s < topo->nr; s++)
3586 		spec[s] = topo->nodes[s].cpus;
3587 
3588 	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3589 
3590 	zfree(&spec);
3591 
3592 out_delete_topo:
3593 	numa_topology__delete(topo);
3594 
3595 	return ret;
3596 }
3597 
3598 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3599 {
3600 	int t, ret;
3601 	u32 s, nr_spec = 0;
3602 	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3603 	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3604 
3605 	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3606 		spec = strtok_r(user_spec, ":", &spec_ptr);
3607 		if (spec == NULL)
3608 			break;
3609 		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3610 		mask = strtok_r(spec, "/", &mask_ptr);
3611 		if (mask == NULL)
3612 			break;
3613 		pr_debug2("  maps mask: %s\n", mask);
3614 		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3615 		if (!tmp_spec) {
3616 			pr_err("Failed to reallocate maps spec\n");
3617 			ret = -ENOMEM;
3618 			goto out_free;
3619 		}
3620 		maps_spec = tmp_spec;
3621 		maps_spec[nr_spec] = dup_mask = strdup(mask);
3622 		if (!maps_spec[nr_spec]) {
3623 			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3624 			ret = -ENOMEM;
3625 			goto out_free;
3626 		}
3627 		mask = strtok_r(NULL, "/", &mask_ptr);
3628 		if (mask == NULL) {
3629 			pr_err("Invalid thread maps or affinity specs\n");
3630 			ret = -EINVAL;
3631 			goto out_free;
3632 		}
3633 		pr_debug2("  affinity mask: %s\n", mask);
3634 		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3635 		if (!tmp_spec) {
3636 			pr_err("Failed to reallocate affinity spec\n");
3637 			ret = -ENOMEM;
3638 			goto out_free;
3639 		}
3640 		affinity_spec = tmp_spec;
3641 		affinity_spec[nr_spec] = strdup(mask);
3642 		if (!affinity_spec[nr_spec]) {
3643 			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3644 			ret = -ENOMEM;
3645 			goto out_free;
3646 		}
3647 		dup_mask = NULL;
3648 		nr_spec++;
3649 	}
3650 
3651 	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3652 					     (const char **)affinity_spec, nr_spec);
3653 
3654 out_free:
3655 	free(dup_mask);
3656 	for (s = 0; s < nr_spec; s++) {
3657 		if (maps_spec)
3658 			free(maps_spec[s]);
3659 		if (affinity_spec)
3660 			free(affinity_spec[s]);
3661 	}
3662 	free(affinity_spec);
3663 	free(maps_spec);
3664 
3665 	return ret;
3666 }
3667 
3668 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3669 {
3670 	int ret;
3671 
3672 	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3673 	if (ret)
3674 		return ret;
3675 
3676 	record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus);
3677 
3678 	rec->nr_threads = 1;
3679 
3680 	return 0;
3681 }
3682 
3683 static int record__init_thread_masks(struct record *rec)
3684 {
3685 	int ret = 0;
3686 	struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
3687 
3688 	if (!record__threads_enabled(rec))
3689 		return record__init_thread_default_masks(rec, cpus);
3690 
3691 	if (cpu_map__is_dummy(cpus)) {
3692 		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3693 		return -EINVAL;
3694 	}
3695 
3696 	switch (rec->opts.threads_spec) {
3697 	case THREAD_SPEC__CPU:
3698 		ret = record__init_thread_cpu_masks(rec, cpus);
3699 		break;
3700 	case THREAD_SPEC__CORE:
3701 		ret = record__init_thread_core_masks(rec, cpus);
3702 		break;
3703 	case THREAD_SPEC__PACKAGE:
3704 		ret = record__init_thread_package_masks(rec, cpus);
3705 		break;
3706 	case THREAD_SPEC__NUMA:
3707 		ret = record__init_thread_numa_masks(rec, cpus);
3708 		break;
3709 	case THREAD_SPEC__USER:
3710 		ret = record__init_thread_user_masks(rec, cpus);
3711 		break;
3712 	default:
3713 		break;
3714 	}
3715 
3716 	return ret;
3717 }
3718 
3719 int cmd_record(int argc, const char **argv)
3720 {
3721 	int err;
3722 	struct record *rec = &record;
3723 	char errbuf[BUFSIZ];
3724 
3725 	setlocale(LC_ALL, "");
3726 
3727 #ifndef HAVE_LIBBPF_SUPPORT
3728 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3729 	set_nobuild('\0', "clang-path", true);
3730 	set_nobuild('\0', "clang-opt", true);
3731 # undef set_nobuild
3732 #endif
3733 
3734 #ifndef HAVE_BPF_PROLOGUE
3735 # if !defined (HAVE_DWARF_SUPPORT)
3736 #  define REASON  "NO_DWARF=1"
3737 # elif !defined (HAVE_LIBBPF_SUPPORT)
3738 #  define REASON  "NO_LIBBPF=1"
3739 # else
3740 #  define REASON  "this architecture doesn't support BPF prologue"
3741 # endif
3742 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3743 	set_nobuild('\0', "vmlinux", true);
3744 # undef set_nobuild
3745 # undef REASON
3746 #endif
3747 
3748 	rec->opts.affinity = PERF_AFFINITY_SYS;
3749 
3750 	rec->evlist = evlist__new();
3751 	if (rec->evlist == NULL)
3752 		return -ENOMEM;
3753 
3754 	err = perf_config(perf_record_config, rec);
3755 	if (err)
3756 		return err;
3757 
3758 	argc = parse_options(argc, argv, record_options, record_usage,
3759 			    PARSE_OPT_STOP_AT_NON_OPTION);
3760 	if (quiet)
3761 		perf_quiet_option();
3762 
3763 	err = symbol__validate_sym_arguments();
3764 	if (err)
3765 		return err;
3766 
3767 	perf_debuginfod_setup(&record.debuginfod);
3768 
3769 	/* Make system wide (-a) the default target. */
3770 	if (!argc && target__none(&rec->opts.target))
3771 		rec->opts.target.system_wide = true;
3772 
3773 	if (nr_cgroups && !rec->opts.target.system_wide) {
3774 		usage_with_options_msg(record_usage, record_options,
3775 			"cgroup monitoring only available in system-wide mode");
3776 
3777 	}
3778 
3779 	if (rec->buildid_mmap) {
3780 		if (!perf_can_record_build_id()) {
3781 			pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3782 			err = -EINVAL;
3783 			goto out_opts;
3784 		}
3785 		pr_debug("Enabling build id in mmap2 events.\n");
3786 		/* Enable mmap build id synthesizing. */
3787 		symbol_conf.buildid_mmap2 = true;
3788 		/* Enable perf_event_attr::build_id bit. */
3789 		rec->opts.build_id = true;
3790 		/* Disable build id cache. */
3791 		rec->no_buildid = true;
3792 	}
3793 
3794 	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3795 		pr_err("Kernel has no cgroup sampling support.\n");
3796 		err = -EINVAL;
3797 		goto out_opts;
3798 	}
3799 
3800 	if (rec->opts.kcore || record__threads_enabled(rec))
3801 		rec->data.is_dir = true;
3802 
3803 	if (record__threads_enabled(rec)) {
3804 		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3805 			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3806 			goto out_opts;
3807 		}
3808 		if (record__aio_enabled(rec)) {
3809 			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3810 			goto out_opts;
3811 		}
3812 	}
3813 
3814 	if (rec->opts.comp_level != 0) {
3815 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3816 		rec->no_buildid = true;
3817 	}
3818 
3819 	if (rec->opts.record_switch_events &&
3820 	    !perf_can_record_switch_events()) {
3821 		ui__error("kernel does not support recording context switch events\n");
3822 		parse_options_usage(record_usage, record_options, "switch-events", 0);
3823 		err = -EINVAL;
3824 		goto out_opts;
3825 	}
3826 
3827 	if (switch_output_setup(rec)) {
3828 		parse_options_usage(record_usage, record_options, "switch-output", 0);
3829 		err = -EINVAL;
3830 		goto out_opts;
3831 	}
3832 
3833 	if (rec->switch_output.time) {
3834 		signal(SIGALRM, alarm_sig_handler);
3835 		alarm(rec->switch_output.time);
3836 	}
3837 
3838 	if (rec->switch_output.num_files) {
3839 		rec->switch_output.filenames = calloc(sizeof(char *),
3840 						      rec->switch_output.num_files);
3841 		if (!rec->switch_output.filenames) {
3842 			err = -EINVAL;
3843 			goto out_opts;
3844 		}
3845 	}
3846 
3847 	if (rec->timestamp_filename && record__threads_enabled(rec)) {
3848 		rec->timestamp_filename = false;
3849 		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3850 	}
3851 
3852 	/*
3853 	 * Allow aliases to facilitate the lookup of symbols for address
3854 	 * filters. Refer to auxtrace_parse_filters().
3855 	 */
3856 	symbol_conf.allow_aliases = true;
3857 
3858 	symbol__init(NULL);
3859 
3860 	err = record__auxtrace_init(rec);
3861 	if (err)
3862 		goto out;
3863 
3864 	if (dry_run)
3865 		goto out;
3866 
3867 	err = bpf__setup_stdout(rec->evlist);
3868 	if (err) {
3869 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3870 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
3871 			 errbuf);
3872 		goto out;
3873 	}
3874 
3875 	err = -ENOMEM;
3876 
3877 	if (rec->no_buildid_cache || rec->no_buildid) {
3878 		disable_buildid_cache();
3879 	} else if (rec->switch_output.enabled) {
3880 		/*
3881 		 * In 'perf record --switch-output', disable buildid
3882 		 * generation by default to reduce data file switching
3883 		 * overhead. Still generate buildid if they are required
3884 		 * explicitly using
3885 		 *
3886 		 *  perf record --switch-output --no-no-buildid \
3887 		 *              --no-no-buildid-cache
3888 		 *
3889 		 * Following code equals to:
3890 		 *
3891 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
3892 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3893 		 *         disable_buildid_cache();
3894 		 */
3895 		bool disable = true;
3896 
3897 		if (rec->no_buildid_set && !rec->no_buildid)
3898 			disable = false;
3899 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3900 			disable = false;
3901 		if (disable) {
3902 			rec->no_buildid = true;
3903 			rec->no_buildid_cache = true;
3904 			disable_buildid_cache();
3905 		}
3906 	}
3907 
3908 	if (record.opts.overwrite)
3909 		record.opts.tail_synthesize = true;
3910 
3911 	if (rec->evlist->core.nr_entries == 0) {
3912 		if (perf_pmu__has_hybrid()) {
3913 			err = evlist__add_default_hybrid(rec->evlist,
3914 							 !record.opts.no_samples);
3915 		} else {
3916 			err = __evlist__add_default(rec->evlist,
3917 						    !record.opts.no_samples);
3918 		}
3919 
3920 		if (err < 0) {
3921 			pr_err("Not enough memory for event selector list\n");
3922 			goto out;
3923 		}
3924 	}
3925 
3926 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3927 		rec->opts.no_inherit = true;
3928 
3929 	err = target__validate(&rec->opts.target);
3930 	if (err) {
3931 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3932 		ui__warning("%s\n", errbuf);
3933 	}
3934 
3935 	err = target__parse_uid(&rec->opts.target);
3936 	if (err) {
3937 		int saved_errno = errno;
3938 
3939 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3940 		ui__error("%s", errbuf);
3941 
3942 		err = -saved_errno;
3943 		goto out;
3944 	}
3945 
3946 	/* Enable ignoring missing threads when -u/-p option is defined. */
3947 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
3948 
3949 	if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
3950 		pr_err("failed to use cpu list %s\n",
3951 		       rec->opts.target.cpu_list);
3952 		goto out;
3953 	}
3954 
3955 	rec->opts.target.hybrid = perf_pmu__has_hybrid();
3956 
3957 	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
3958 		arch__add_leaf_frame_record_opts(&rec->opts);
3959 
3960 	err = -ENOMEM;
3961 	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
3962 		usage_with_options(record_usage, record_options);
3963 
3964 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
3965 	if (err)
3966 		goto out;
3967 
3968 	/*
3969 	 * We take all buildids when the file contains
3970 	 * AUX area tracing data because we do not decode the
3971 	 * trace because it would take too long.
3972 	 */
3973 	if (rec->opts.full_auxtrace)
3974 		rec->buildid_all = true;
3975 
3976 	if (rec->opts.text_poke) {
3977 		err = record__config_text_poke(rec->evlist);
3978 		if (err) {
3979 			pr_err("record__config_text_poke failed, error %d\n", err);
3980 			goto out;
3981 		}
3982 	}
3983 
3984 	if (record_opts__config(&rec->opts)) {
3985 		err = -EINVAL;
3986 		goto out;
3987 	}
3988 
3989 	err = record__init_thread_masks(rec);
3990 	if (err) {
3991 		pr_err("Failed to initialize parallel data streaming masks\n");
3992 		goto out;
3993 	}
3994 
3995 	if (rec->opts.nr_cblocks > nr_cblocks_max)
3996 		rec->opts.nr_cblocks = nr_cblocks_max;
3997 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
3998 
3999 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4000 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4001 
4002 	if (rec->opts.comp_level > comp_level_max)
4003 		rec->opts.comp_level = comp_level_max;
4004 	pr_debug("comp level: %d\n", rec->opts.comp_level);
4005 
4006 	err = __cmd_record(&record, argc, argv);
4007 out:
4008 	evlist__delete(rec->evlist);
4009 	symbol__exit();
4010 	auxtrace_record__free(rec->itr);
4011 out_opts:
4012 	record__free_thread_masks(rec, rec->nr_threads);
4013 	rec->nr_threads = 0;
4014 	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4015 	return err;
4016 }
4017 
4018 static void snapshot_sig_handler(int sig __maybe_unused)
4019 {
4020 	struct record *rec = &record;
4021 
4022 	hit_auxtrace_snapshot_trigger(rec);
4023 
4024 	if (switch_output_signal(rec))
4025 		trigger_hit(&switch_output_trigger);
4026 }
4027 
4028 static void alarm_sig_handler(int sig __maybe_unused)
4029 {
4030 	struct record *rec = &record;
4031 
4032 	if (switch_output_time(rec))
4033 		trigger_hit(&switch_output_trigger);
4034 }
4035