xref: /openbmc/linux/tools/perf/builtin-record.c (revision 3381df09)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/synthetic-events.h"
43 #include "util/time-utils.h"
44 #include "util/units.h"
45 #include "util/bpf-event.h"
46 #include "asm/bug.h"
47 #include "perf.h"
48 
49 #include <errno.h>
50 #include <inttypes.h>
51 #include <locale.h>
52 #include <poll.h>
53 #include <unistd.h>
54 #include <sched.h>
55 #include <signal.h>
56 #include <sys/mman.h>
57 #include <sys/wait.h>
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 #include <fcntl.h>
61 #include <linux/err.h>
62 #include <linux/string.h>
63 #include <linux/time64.h>
64 #include <linux/zalloc.h>
65 #include <linux/bitmap.h>
66 
67 struct switch_output {
68 	bool		 enabled;
69 	bool		 signal;
70 	unsigned long	 size;
71 	unsigned long	 time;
72 	const char	*str;
73 	bool		 set;
74 	char		 **filenames;
75 	int		 num_files;
76 	int		 cur_file;
77 };
78 
79 struct record {
80 	struct perf_tool	tool;
81 	struct record_opts	opts;
82 	u64			bytes_written;
83 	struct perf_data	data;
84 	struct auxtrace_record	*itr;
85 	struct evlist	*evlist;
86 	struct perf_session	*session;
87 	int			realtime_prio;
88 	bool			no_buildid;
89 	bool			no_buildid_set;
90 	bool			no_buildid_cache;
91 	bool			no_buildid_cache_set;
92 	bool			buildid_all;
93 	bool			timestamp_filename;
94 	bool			timestamp_boundary;
95 	struct switch_output	switch_output;
96 	unsigned long long	samples;
97 	struct mmap_cpu_mask	affinity_mask;
98 	unsigned long		output_max_size;	/* = 0: unlimited */
99 };
100 
101 static volatile int done;
102 
103 static volatile int auxtrace_record__snapshot_started;
104 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
105 static DEFINE_TRIGGER(switch_output_trigger);
106 
107 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
108 	"SYS", "NODE", "CPU"
109 };
110 
111 static bool switch_output_signal(struct record *rec)
112 {
113 	return rec->switch_output.signal &&
114 	       trigger_is_ready(&switch_output_trigger);
115 }
116 
117 static bool switch_output_size(struct record *rec)
118 {
119 	return rec->switch_output.size &&
120 	       trigger_is_ready(&switch_output_trigger) &&
121 	       (rec->bytes_written >= rec->switch_output.size);
122 }
123 
124 static bool switch_output_time(struct record *rec)
125 {
126 	return rec->switch_output.time &&
127 	       trigger_is_ready(&switch_output_trigger);
128 }
129 
130 static bool record__output_max_size_exceeded(struct record *rec)
131 {
132 	return rec->output_max_size &&
133 	       (rec->bytes_written >= rec->output_max_size);
134 }
135 
136 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
137 			 void *bf, size_t size)
138 {
139 	struct perf_data_file *file = &rec->session->data->file;
140 
141 	if (perf_data_file__write(file, bf, size) < 0) {
142 		pr_err("failed to write perf data, error: %m\n");
143 		return -1;
144 	}
145 
146 	rec->bytes_written += size;
147 
148 	if (record__output_max_size_exceeded(rec) && !done) {
149 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
150 				" stopping session ]\n",
151 				rec->bytes_written >> 10);
152 		done = 1;
153 	}
154 
155 	if (switch_output_size(rec))
156 		trigger_hit(&switch_output_trigger);
157 
158 	return 0;
159 }
160 
161 static int record__aio_enabled(struct record *rec);
162 static int record__comp_enabled(struct record *rec);
163 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
164 			    void *src, size_t src_size);
165 
166 #ifdef HAVE_AIO_SUPPORT
167 static int record__aio_write(struct aiocb *cblock, int trace_fd,
168 		void *buf, size_t size, off_t off)
169 {
170 	int rc;
171 
172 	cblock->aio_fildes = trace_fd;
173 	cblock->aio_buf    = buf;
174 	cblock->aio_nbytes = size;
175 	cblock->aio_offset = off;
176 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
177 
178 	do {
179 		rc = aio_write(cblock);
180 		if (rc == 0) {
181 			break;
182 		} else if (errno != EAGAIN) {
183 			cblock->aio_fildes = -1;
184 			pr_err("failed to queue perf data, error: %m\n");
185 			break;
186 		}
187 	} while (1);
188 
189 	return rc;
190 }
191 
192 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
193 {
194 	void *rem_buf;
195 	off_t rem_off;
196 	size_t rem_size;
197 	int rc, aio_errno;
198 	ssize_t aio_ret, written;
199 
200 	aio_errno = aio_error(cblock);
201 	if (aio_errno == EINPROGRESS)
202 		return 0;
203 
204 	written = aio_ret = aio_return(cblock);
205 	if (aio_ret < 0) {
206 		if (aio_errno != EINTR)
207 			pr_err("failed to write perf data, error: %m\n");
208 		written = 0;
209 	}
210 
211 	rem_size = cblock->aio_nbytes - written;
212 
213 	if (rem_size == 0) {
214 		cblock->aio_fildes = -1;
215 		/*
216 		 * md->refcount is incremented in record__aio_pushfn() for
217 		 * every aio write request started in record__aio_push() so
218 		 * decrement it because the request is now complete.
219 		 */
220 		perf_mmap__put(&md->core);
221 		rc = 1;
222 	} else {
223 		/*
224 		 * aio write request may require restart with the
225 		 * reminder if the kernel didn't write whole
226 		 * chunk at once.
227 		 */
228 		rem_off = cblock->aio_offset + written;
229 		rem_buf = (void *)(cblock->aio_buf + written);
230 		record__aio_write(cblock, cblock->aio_fildes,
231 				rem_buf, rem_size, rem_off);
232 		rc = 0;
233 	}
234 
235 	return rc;
236 }
237 
238 static int record__aio_sync(struct mmap *md, bool sync_all)
239 {
240 	struct aiocb **aiocb = md->aio.aiocb;
241 	struct aiocb *cblocks = md->aio.cblocks;
242 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
243 	int i, do_suspend;
244 
245 	do {
246 		do_suspend = 0;
247 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
248 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
249 				if (sync_all)
250 					aiocb[i] = NULL;
251 				else
252 					return i;
253 			} else {
254 				/*
255 				 * Started aio write is not complete yet
256 				 * so it has to be waited before the
257 				 * next allocation.
258 				 */
259 				aiocb[i] = &cblocks[i];
260 				do_suspend = 1;
261 			}
262 		}
263 		if (!do_suspend)
264 			return -1;
265 
266 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
267 			if (!(errno == EAGAIN || errno == EINTR))
268 				pr_err("failed to sync perf data, error: %m\n");
269 		}
270 	} while (1);
271 }
272 
273 struct record_aio {
274 	struct record	*rec;
275 	void		*data;
276 	size_t		size;
277 };
278 
279 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
280 {
281 	struct record_aio *aio = to;
282 
283 	/*
284 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
285 	 * to release space in the kernel buffer as fast as possible, calling
286 	 * perf_mmap__consume() from perf_mmap__push() function.
287 	 *
288 	 * That lets the kernel to proceed with storing more profiling data into
289 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
290 	 *
291 	 * Coping can be done in two steps in case the chunk of profiling data
292 	 * crosses the upper bound of the kernel buffer. In this case we first move
293 	 * part of data from map->start till the upper bound and then the reminder
294 	 * from the beginning of the kernel buffer till the end of the data chunk.
295 	 */
296 
297 	if (record__comp_enabled(aio->rec)) {
298 		size = zstd_compress(aio->rec->session, aio->data + aio->size,
299 				     mmap__mmap_len(map) - aio->size,
300 				     buf, size);
301 	} else {
302 		memcpy(aio->data + aio->size, buf, size);
303 	}
304 
305 	if (!aio->size) {
306 		/*
307 		 * Increment map->refcount to guard map->aio.data[] buffer
308 		 * from premature deallocation because map object can be
309 		 * released earlier than aio write request started on
310 		 * map->aio.data[] buffer is complete.
311 		 *
312 		 * perf_mmap__put() is done at record__aio_complete()
313 		 * after started aio request completion or at record__aio_push()
314 		 * if the request failed to start.
315 		 */
316 		perf_mmap__get(&map->core);
317 	}
318 
319 	aio->size += size;
320 
321 	return size;
322 }
323 
324 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
325 {
326 	int ret, idx;
327 	int trace_fd = rec->session->data->file.fd;
328 	struct record_aio aio = { .rec = rec, .size = 0 };
329 
330 	/*
331 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
332 	 * becomes available after previous aio write operation.
333 	 */
334 
335 	idx = record__aio_sync(map, false);
336 	aio.data = map->aio.data[idx];
337 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
338 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
339 		return ret;
340 
341 	rec->samples++;
342 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
343 	if (!ret) {
344 		*off += aio.size;
345 		rec->bytes_written += aio.size;
346 		if (switch_output_size(rec))
347 			trigger_hit(&switch_output_trigger);
348 	} else {
349 		/*
350 		 * Decrement map->refcount incremented in record__aio_pushfn()
351 		 * back if record__aio_write() operation failed to start, otherwise
352 		 * map->refcount is decremented in record__aio_complete() after
353 		 * aio write operation finishes successfully.
354 		 */
355 		perf_mmap__put(&map->core);
356 	}
357 
358 	return ret;
359 }
360 
361 static off_t record__aio_get_pos(int trace_fd)
362 {
363 	return lseek(trace_fd, 0, SEEK_CUR);
364 }
365 
366 static void record__aio_set_pos(int trace_fd, off_t pos)
367 {
368 	lseek(trace_fd, pos, SEEK_SET);
369 }
370 
371 static void record__aio_mmap_read_sync(struct record *rec)
372 {
373 	int i;
374 	struct evlist *evlist = rec->evlist;
375 	struct mmap *maps = evlist->mmap;
376 
377 	if (!record__aio_enabled(rec))
378 		return;
379 
380 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
381 		struct mmap *map = &maps[i];
382 
383 		if (map->core.base)
384 			record__aio_sync(map, true);
385 	}
386 }
387 
388 static int nr_cblocks_default = 1;
389 static int nr_cblocks_max = 4;
390 
391 static int record__aio_parse(const struct option *opt,
392 			     const char *str,
393 			     int unset)
394 {
395 	struct record_opts *opts = (struct record_opts *)opt->value;
396 
397 	if (unset) {
398 		opts->nr_cblocks = 0;
399 	} else {
400 		if (str)
401 			opts->nr_cblocks = strtol(str, NULL, 0);
402 		if (!opts->nr_cblocks)
403 			opts->nr_cblocks = nr_cblocks_default;
404 	}
405 
406 	return 0;
407 }
408 #else /* HAVE_AIO_SUPPORT */
409 static int nr_cblocks_max = 0;
410 
411 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
412 			    off_t *off __maybe_unused)
413 {
414 	return -1;
415 }
416 
417 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
418 {
419 	return -1;
420 }
421 
422 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
423 {
424 }
425 
426 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
427 {
428 }
429 #endif
430 
431 static int record__aio_enabled(struct record *rec)
432 {
433 	return rec->opts.nr_cblocks > 0;
434 }
435 
436 #define MMAP_FLUSH_DEFAULT 1
437 static int record__mmap_flush_parse(const struct option *opt,
438 				    const char *str,
439 				    int unset)
440 {
441 	int flush_max;
442 	struct record_opts *opts = (struct record_opts *)opt->value;
443 	static struct parse_tag tags[] = {
444 			{ .tag  = 'B', .mult = 1       },
445 			{ .tag  = 'K', .mult = 1 << 10 },
446 			{ .tag  = 'M', .mult = 1 << 20 },
447 			{ .tag  = 'G', .mult = 1 << 30 },
448 			{ .tag  = 0 },
449 	};
450 
451 	if (unset)
452 		return 0;
453 
454 	if (str) {
455 		opts->mmap_flush = parse_tag_value(str, tags);
456 		if (opts->mmap_flush == (int)-1)
457 			opts->mmap_flush = strtol(str, NULL, 0);
458 	}
459 
460 	if (!opts->mmap_flush)
461 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
462 
463 	flush_max = evlist__mmap_size(opts->mmap_pages);
464 	flush_max /= 4;
465 	if (opts->mmap_flush > flush_max)
466 		opts->mmap_flush = flush_max;
467 
468 	return 0;
469 }
470 
471 #ifdef HAVE_ZSTD_SUPPORT
472 static unsigned int comp_level_default = 1;
473 
474 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
475 {
476 	struct record_opts *opts = opt->value;
477 
478 	if (unset) {
479 		opts->comp_level = 0;
480 	} else {
481 		if (str)
482 			opts->comp_level = strtol(str, NULL, 0);
483 		if (!opts->comp_level)
484 			opts->comp_level = comp_level_default;
485 	}
486 
487 	return 0;
488 }
489 #endif
490 static unsigned int comp_level_max = 22;
491 
492 static int record__comp_enabled(struct record *rec)
493 {
494 	return rec->opts.comp_level > 0;
495 }
496 
497 static int process_synthesized_event(struct perf_tool *tool,
498 				     union perf_event *event,
499 				     struct perf_sample *sample __maybe_unused,
500 				     struct machine *machine __maybe_unused)
501 {
502 	struct record *rec = container_of(tool, struct record, tool);
503 	return record__write(rec, NULL, event, event->header.size);
504 }
505 
506 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
507 {
508 	struct record *rec = to;
509 
510 	if (record__comp_enabled(rec)) {
511 		size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
512 		bf   = map->data;
513 	}
514 
515 	rec->samples++;
516 	return record__write(rec, map, bf, size);
517 }
518 
519 static volatile int signr = -1;
520 static volatile int child_finished;
521 
522 static void sig_handler(int sig)
523 {
524 	if (sig == SIGCHLD)
525 		child_finished = 1;
526 	else
527 		signr = sig;
528 
529 	done = 1;
530 }
531 
532 static void sigsegv_handler(int sig)
533 {
534 	perf_hooks__recover();
535 	sighandler_dump_stack(sig);
536 }
537 
538 static void record__sig_exit(void)
539 {
540 	if (signr == -1)
541 		return;
542 
543 	signal(signr, SIG_DFL);
544 	raise(signr);
545 }
546 
547 #ifdef HAVE_AUXTRACE_SUPPORT
548 
549 static int record__process_auxtrace(struct perf_tool *tool,
550 				    struct mmap *map,
551 				    union perf_event *event, void *data1,
552 				    size_t len1, void *data2, size_t len2)
553 {
554 	struct record *rec = container_of(tool, struct record, tool);
555 	struct perf_data *data = &rec->data;
556 	size_t padding;
557 	u8 pad[8] = {0};
558 
559 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
560 		off_t file_offset;
561 		int fd = perf_data__fd(data);
562 		int err;
563 
564 		file_offset = lseek(fd, 0, SEEK_CUR);
565 		if (file_offset == -1)
566 			return -1;
567 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
568 						     event, file_offset);
569 		if (err)
570 			return err;
571 	}
572 
573 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
574 	padding = (len1 + len2) & 7;
575 	if (padding)
576 		padding = 8 - padding;
577 
578 	record__write(rec, map, event, event->header.size);
579 	record__write(rec, map, data1, len1);
580 	if (len2)
581 		record__write(rec, map, data2, len2);
582 	record__write(rec, map, &pad, padding);
583 
584 	return 0;
585 }
586 
587 static int record__auxtrace_mmap_read(struct record *rec,
588 				      struct mmap *map)
589 {
590 	int ret;
591 
592 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
593 				  record__process_auxtrace);
594 	if (ret < 0)
595 		return ret;
596 
597 	if (ret)
598 		rec->samples++;
599 
600 	return 0;
601 }
602 
603 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
604 					       struct mmap *map)
605 {
606 	int ret;
607 
608 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
609 					   record__process_auxtrace,
610 					   rec->opts.auxtrace_snapshot_size);
611 	if (ret < 0)
612 		return ret;
613 
614 	if (ret)
615 		rec->samples++;
616 
617 	return 0;
618 }
619 
620 static int record__auxtrace_read_snapshot_all(struct record *rec)
621 {
622 	int i;
623 	int rc = 0;
624 
625 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
626 		struct mmap *map = &rec->evlist->mmap[i];
627 
628 		if (!map->auxtrace_mmap.base)
629 			continue;
630 
631 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
632 			rc = -1;
633 			goto out;
634 		}
635 	}
636 out:
637 	return rc;
638 }
639 
640 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
641 {
642 	pr_debug("Recording AUX area tracing snapshot\n");
643 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
644 		trigger_error(&auxtrace_snapshot_trigger);
645 	} else {
646 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
647 			trigger_error(&auxtrace_snapshot_trigger);
648 		else
649 			trigger_ready(&auxtrace_snapshot_trigger);
650 	}
651 }
652 
653 static int record__auxtrace_snapshot_exit(struct record *rec)
654 {
655 	if (trigger_is_error(&auxtrace_snapshot_trigger))
656 		return 0;
657 
658 	if (!auxtrace_record__snapshot_started &&
659 	    auxtrace_record__snapshot_start(rec->itr))
660 		return -1;
661 
662 	record__read_auxtrace_snapshot(rec, true);
663 	if (trigger_is_error(&auxtrace_snapshot_trigger))
664 		return -1;
665 
666 	return 0;
667 }
668 
669 static int record__auxtrace_init(struct record *rec)
670 {
671 	int err;
672 
673 	if (!rec->itr) {
674 		rec->itr = auxtrace_record__init(rec->evlist, &err);
675 		if (err)
676 			return err;
677 	}
678 
679 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
680 					      rec->opts.auxtrace_snapshot_opts);
681 	if (err)
682 		return err;
683 
684 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
685 					    rec->opts.auxtrace_sample_opts);
686 	if (err)
687 		return err;
688 
689 	return auxtrace_parse_filters(rec->evlist);
690 }
691 
692 #else
693 
694 static inline
695 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
696 			       struct mmap *map __maybe_unused)
697 {
698 	return 0;
699 }
700 
701 static inline
702 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
703 				    bool on_exit __maybe_unused)
704 {
705 }
706 
707 static inline
708 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
709 {
710 	return 0;
711 }
712 
713 static inline
714 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
715 {
716 	return 0;
717 }
718 
719 static int record__auxtrace_init(struct record *rec __maybe_unused)
720 {
721 	return 0;
722 }
723 
724 #endif
725 
726 static bool record__kcore_readable(struct machine *machine)
727 {
728 	char kcore[PATH_MAX];
729 	int fd;
730 
731 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
732 
733 	fd = open(kcore, O_RDONLY);
734 	if (fd < 0)
735 		return false;
736 
737 	close(fd);
738 
739 	return true;
740 }
741 
742 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
743 {
744 	char from_dir[PATH_MAX];
745 	char kcore_dir[PATH_MAX];
746 	int ret;
747 
748 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
749 
750 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
751 	if (ret)
752 		return ret;
753 
754 	return kcore_copy(from_dir, kcore_dir);
755 }
756 
757 static int record__mmap_evlist(struct record *rec,
758 			       struct evlist *evlist)
759 {
760 	struct record_opts *opts = &rec->opts;
761 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
762 				  opts->auxtrace_sample_mode;
763 	char msg[512];
764 
765 	if (opts->affinity != PERF_AFFINITY_SYS)
766 		cpu__setup_cpunode_map();
767 
768 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
769 				 opts->auxtrace_mmap_pages,
770 				 auxtrace_overwrite,
771 				 opts->nr_cblocks, opts->affinity,
772 				 opts->mmap_flush, opts->comp_level) < 0) {
773 		if (errno == EPERM) {
774 			pr_err("Permission error mapping pages.\n"
775 			       "Consider increasing "
776 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
777 			       "or try again with a smaller value of -m/--mmap_pages.\n"
778 			       "(current value: %u,%u)\n",
779 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
780 			return -errno;
781 		} else {
782 			pr_err("failed to mmap with %d (%s)\n", errno,
783 				str_error_r(errno, msg, sizeof(msg)));
784 			if (errno)
785 				return -errno;
786 			else
787 				return -EINVAL;
788 		}
789 	}
790 	return 0;
791 }
792 
793 static int record__mmap(struct record *rec)
794 {
795 	return record__mmap_evlist(rec, rec->evlist);
796 }
797 
798 static int record__open(struct record *rec)
799 {
800 	char msg[BUFSIZ];
801 	struct evsel *pos;
802 	struct evlist *evlist = rec->evlist;
803 	struct perf_session *session = rec->session;
804 	struct record_opts *opts = &rec->opts;
805 	int rc = 0;
806 
807 	/*
808 	 * For initial_delay we need to add a dummy event so that we can track
809 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
810 	 * real events, the ones asked by the user.
811 	 */
812 	if (opts->initial_delay) {
813 		if (perf_evlist__add_dummy(evlist))
814 			return -ENOMEM;
815 
816 		pos = evlist__first(evlist);
817 		pos->tracking = 0;
818 		pos = evlist__last(evlist);
819 		pos->tracking = 1;
820 		pos->core.attr.enable_on_exec = 1;
821 	}
822 
823 	perf_evlist__config(evlist, opts, &callchain_param);
824 
825 	evlist__for_each_entry(evlist, pos) {
826 try_again:
827 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
828 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
829 				if (verbose > 0)
830 					ui__warning("%s\n", msg);
831 				goto try_again;
832 			}
833 			if ((errno == EINVAL || errno == EBADF) &&
834 			    pos->leader != pos &&
835 			    pos->weak_group) {
836 			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
837 				goto try_again;
838 			}
839 			rc = -errno;
840 			perf_evsel__open_strerror(pos, &opts->target,
841 						  errno, msg, sizeof(msg));
842 			ui__error("%s\n", msg);
843 			goto out;
844 		}
845 
846 		pos->supported = true;
847 	}
848 
849 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
850 		pr_warning(
851 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
852 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
853 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
854 "file is not found in the buildid cache or in the vmlinux path.\n\n"
855 "Samples in kernel modules won't be resolved at all.\n\n"
856 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
857 "even with a suitable vmlinux or kallsyms file.\n\n");
858 	}
859 
860 	if (perf_evlist__apply_filters(evlist, &pos)) {
861 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
862 			pos->filter, perf_evsel__name(pos), errno,
863 			str_error_r(errno, msg, sizeof(msg)));
864 		rc = -1;
865 		goto out;
866 	}
867 
868 	rc = record__mmap(rec);
869 	if (rc)
870 		goto out;
871 
872 	session->evlist = evlist;
873 	perf_session__set_id_hdr_size(session);
874 out:
875 	return rc;
876 }
877 
878 static int process_sample_event(struct perf_tool *tool,
879 				union perf_event *event,
880 				struct perf_sample *sample,
881 				struct evsel *evsel,
882 				struct machine *machine)
883 {
884 	struct record *rec = container_of(tool, struct record, tool);
885 
886 	if (rec->evlist->first_sample_time == 0)
887 		rec->evlist->first_sample_time = sample->time;
888 
889 	rec->evlist->last_sample_time = sample->time;
890 
891 	if (rec->buildid_all)
892 		return 0;
893 
894 	rec->samples++;
895 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
896 }
897 
898 static int process_buildids(struct record *rec)
899 {
900 	struct perf_session *session = rec->session;
901 
902 	if (perf_data__size(&rec->data) == 0)
903 		return 0;
904 
905 	/*
906 	 * During this process, it'll load kernel map and replace the
907 	 * dso->long_name to a real pathname it found.  In this case
908 	 * we prefer the vmlinux path like
909 	 *   /lib/modules/3.16.4/build/vmlinux
910 	 *
911 	 * rather than build-id path (in debug directory).
912 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
913 	 */
914 	symbol_conf.ignore_vmlinux_buildid = true;
915 
916 	/*
917 	 * If --buildid-all is given, it marks all DSO regardless of hits,
918 	 * so no need to process samples. But if timestamp_boundary is enabled,
919 	 * it still needs to walk on all samples to get the timestamps of
920 	 * first/last samples.
921 	 */
922 	if (rec->buildid_all && !rec->timestamp_boundary)
923 		rec->tool.sample = NULL;
924 
925 	return perf_session__process_events(session);
926 }
927 
928 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
929 {
930 	int err;
931 	struct perf_tool *tool = data;
932 	/*
933 	 *As for guest kernel when processing subcommand record&report,
934 	 *we arrange module mmap prior to guest kernel mmap and trigger
935 	 *a preload dso because default guest module symbols are loaded
936 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
937 	 *method is used to avoid symbol missing when the first addr is
938 	 *in module instead of in guest kernel.
939 	 */
940 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
941 					     machine);
942 	if (err < 0)
943 		pr_err("Couldn't record guest kernel [%d]'s reference"
944 		       " relocation symbol.\n", machine->pid);
945 
946 	/*
947 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
948 	 * have no _text sometimes.
949 	 */
950 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
951 						 machine);
952 	if (err < 0)
953 		pr_err("Couldn't record guest kernel [%d]'s reference"
954 		       " relocation symbol.\n", machine->pid);
955 }
956 
957 static struct perf_event_header finished_round_event = {
958 	.size = sizeof(struct perf_event_header),
959 	.type = PERF_RECORD_FINISHED_ROUND,
960 };
961 
962 static void record__adjust_affinity(struct record *rec, struct mmap *map)
963 {
964 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
965 	    !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
966 			  rec->affinity_mask.nbits)) {
967 		bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
968 		bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
969 			  map->affinity_mask.bits, rec->affinity_mask.nbits);
970 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
971 				  (cpu_set_t *)rec->affinity_mask.bits);
972 		if (verbose == 2)
973 			mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
974 	}
975 }
976 
977 static size_t process_comp_header(void *record, size_t increment)
978 {
979 	struct perf_record_compressed *event = record;
980 	size_t size = sizeof(*event);
981 
982 	if (increment) {
983 		event->header.size += increment;
984 		return increment;
985 	}
986 
987 	event->header.type = PERF_RECORD_COMPRESSED;
988 	event->header.size = size;
989 
990 	return size;
991 }
992 
993 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
994 			    void *src, size_t src_size)
995 {
996 	size_t compressed;
997 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
998 
999 	compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1000 						     max_record_size, process_comp_header);
1001 
1002 	session->bytes_transferred += src_size;
1003 	session->bytes_compressed  += compressed;
1004 
1005 	return compressed;
1006 }
1007 
1008 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1009 				    bool overwrite, bool synch)
1010 {
1011 	u64 bytes_written = rec->bytes_written;
1012 	int i;
1013 	int rc = 0;
1014 	struct mmap *maps;
1015 	int trace_fd = rec->data.file.fd;
1016 	off_t off = 0;
1017 
1018 	if (!evlist)
1019 		return 0;
1020 
1021 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1022 	if (!maps)
1023 		return 0;
1024 
1025 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1026 		return 0;
1027 
1028 	if (record__aio_enabled(rec))
1029 		off = record__aio_get_pos(trace_fd);
1030 
1031 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
1032 		u64 flush = 0;
1033 		struct mmap *map = &maps[i];
1034 
1035 		if (map->core.base) {
1036 			record__adjust_affinity(rec, map);
1037 			if (synch) {
1038 				flush = map->core.flush;
1039 				map->core.flush = 1;
1040 			}
1041 			if (!record__aio_enabled(rec)) {
1042 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1043 					if (synch)
1044 						map->core.flush = flush;
1045 					rc = -1;
1046 					goto out;
1047 				}
1048 			} else {
1049 				if (record__aio_push(rec, map, &off) < 0) {
1050 					record__aio_set_pos(trace_fd, off);
1051 					if (synch)
1052 						map->core.flush = flush;
1053 					rc = -1;
1054 					goto out;
1055 				}
1056 			}
1057 			if (synch)
1058 				map->core.flush = flush;
1059 		}
1060 
1061 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1062 		    !rec->opts.auxtrace_sample_mode &&
1063 		    record__auxtrace_mmap_read(rec, map) != 0) {
1064 			rc = -1;
1065 			goto out;
1066 		}
1067 	}
1068 
1069 	if (record__aio_enabled(rec))
1070 		record__aio_set_pos(trace_fd, off);
1071 
1072 	/*
1073 	 * Mark the round finished in case we wrote
1074 	 * at least one event.
1075 	 */
1076 	if (bytes_written != rec->bytes_written)
1077 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1078 
1079 	if (overwrite)
1080 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1081 out:
1082 	return rc;
1083 }
1084 
1085 static int record__mmap_read_all(struct record *rec, bool synch)
1086 {
1087 	int err;
1088 
1089 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1090 	if (err)
1091 		return err;
1092 
1093 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1094 }
1095 
1096 static void record__init_features(struct record *rec)
1097 {
1098 	struct perf_session *session = rec->session;
1099 	int feat;
1100 
1101 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1102 		perf_header__set_feat(&session->header, feat);
1103 
1104 	if (rec->no_buildid)
1105 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1106 
1107 	if (!have_tracepoints(&rec->evlist->core.entries))
1108 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1109 
1110 	if (!rec->opts.branch_stack)
1111 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1112 
1113 	if (!rec->opts.full_auxtrace)
1114 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1115 
1116 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1117 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1118 
1119 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1120 	if (!record__comp_enabled(rec))
1121 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1122 
1123 	perf_header__clear_feat(&session->header, HEADER_STAT);
1124 }
1125 
1126 static void
1127 record__finish_output(struct record *rec)
1128 {
1129 	struct perf_data *data = &rec->data;
1130 	int fd = perf_data__fd(data);
1131 
1132 	if (data->is_pipe)
1133 		return;
1134 
1135 	rec->session->header.data_size += rec->bytes_written;
1136 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1137 
1138 	if (!rec->no_buildid) {
1139 		process_buildids(rec);
1140 
1141 		if (rec->buildid_all)
1142 			dsos__hit_all(rec->session);
1143 	}
1144 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1145 
1146 	return;
1147 }
1148 
1149 static int record__synthesize_workload(struct record *rec, bool tail)
1150 {
1151 	int err;
1152 	struct perf_thread_map *thread_map;
1153 
1154 	if (rec->opts.tail_synthesize != tail)
1155 		return 0;
1156 
1157 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1158 	if (thread_map == NULL)
1159 		return -1;
1160 
1161 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1162 						 process_synthesized_event,
1163 						 &rec->session->machines.host,
1164 						 rec->opts.sample_address);
1165 	perf_thread_map__put(thread_map);
1166 	return err;
1167 }
1168 
1169 static int record__synthesize(struct record *rec, bool tail);
1170 
1171 static int
1172 record__switch_output(struct record *rec, bool at_exit)
1173 {
1174 	struct perf_data *data = &rec->data;
1175 	int fd, err;
1176 	char *new_filename;
1177 
1178 	/* Same Size:      "2015122520103046"*/
1179 	char timestamp[] = "InvalidTimestamp";
1180 
1181 	record__aio_mmap_read_sync(rec);
1182 
1183 	record__synthesize(rec, true);
1184 	if (target__none(&rec->opts.target))
1185 		record__synthesize_workload(rec, true);
1186 
1187 	rec->samples = 0;
1188 	record__finish_output(rec);
1189 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1190 	if (err) {
1191 		pr_err("Failed to get current timestamp\n");
1192 		return -EINVAL;
1193 	}
1194 
1195 	fd = perf_data__switch(data, timestamp,
1196 				    rec->session->header.data_offset,
1197 				    at_exit, &new_filename);
1198 	if (fd >= 0 && !at_exit) {
1199 		rec->bytes_written = 0;
1200 		rec->session->header.data_size = 0;
1201 	}
1202 
1203 	if (!quiet)
1204 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1205 			data->path, timestamp);
1206 
1207 	if (rec->switch_output.num_files) {
1208 		int n = rec->switch_output.cur_file + 1;
1209 
1210 		if (n >= rec->switch_output.num_files)
1211 			n = 0;
1212 		rec->switch_output.cur_file = n;
1213 		if (rec->switch_output.filenames[n]) {
1214 			remove(rec->switch_output.filenames[n]);
1215 			zfree(&rec->switch_output.filenames[n]);
1216 		}
1217 		rec->switch_output.filenames[n] = new_filename;
1218 	} else {
1219 		free(new_filename);
1220 	}
1221 
1222 	/* Output tracking events */
1223 	if (!at_exit) {
1224 		record__synthesize(rec, false);
1225 
1226 		/*
1227 		 * In 'perf record --switch-output' without -a,
1228 		 * record__synthesize() in record__switch_output() won't
1229 		 * generate tracking events because there's no thread_map
1230 		 * in evlist. Which causes newly created perf.data doesn't
1231 		 * contain map and comm information.
1232 		 * Create a fake thread_map and directly call
1233 		 * perf_event__synthesize_thread_map() for those events.
1234 		 */
1235 		if (target__none(&rec->opts.target))
1236 			record__synthesize_workload(rec, false);
1237 	}
1238 	return fd;
1239 }
1240 
1241 static volatile int workload_exec_errno;
1242 
1243 /*
1244  * perf_evlist__prepare_workload will send a SIGUSR1
1245  * if the fork fails, since we asked by setting its
1246  * want_signal to true.
1247  */
1248 static void workload_exec_failed_signal(int signo __maybe_unused,
1249 					siginfo_t *info,
1250 					void *ucontext __maybe_unused)
1251 {
1252 	workload_exec_errno = info->si_value.sival_int;
1253 	done = 1;
1254 	child_finished = 1;
1255 }
1256 
1257 static void snapshot_sig_handler(int sig);
1258 static void alarm_sig_handler(int sig);
1259 
1260 static const struct perf_event_mmap_page *
1261 perf_evlist__pick_pc(struct evlist *evlist)
1262 {
1263 	if (evlist) {
1264 		if (evlist->mmap && evlist->mmap[0].core.base)
1265 			return evlist->mmap[0].core.base;
1266 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1267 			return evlist->overwrite_mmap[0].core.base;
1268 	}
1269 	return NULL;
1270 }
1271 
1272 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1273 {
1274 	const struct perf_event_mmap_page *pc;
1275 
1276 	pc = perf_evlist__pick_pc(rec->evlist);
1277 	if (pc)
1278 		return pc;
1279 	return NULL;
1280 }
1281 
1282 static int record__synthesize(struct record *rec, bool tail)
1283 {
1284 	struct perf_session *session = rec->session;
1285 	struct machine *machine = &session->machines.host;
1286 	struct perf_data *data = &rec->data;
1287 	struct record_opts *opts = &rec->opts;
1288 	struct perf_tool *tool = &rec->tool;
1289 	int fd = perf_data__fd(data);
1290 	int err = 0;
1291 
1292 	if (rec->opts.tail_synthesize != tail)
1293 		return 0;
1294 
1295 	if (data->is_pipe) {
1296 		/*
1297 		 * We need to synthesize events first, because some
1298 		 * features works on top of them (on report side).
1299 		 */
1300 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1301 						   process_synthesized_event);
1302 		if (err < 0) {
1303 			pr_err("Couldn't synthesize attrs.\n");
1304 			goto out;
1305 		}
1306 
1307 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1308 						      process_synthesized_event);
1309 		if (err < 0) {
1310 			pr_err("Couldn't synthesize features.\n");
1311 			return err;
1312 		}
1313 
1314 		if (have_tracepoints(&rec->evlist->core.entries)) {
1315 			/*
1316 			 * FIXME err <= 0 here actually means that
1317 			 * there were no tracepoints so its not really
1318 			 * an error, just that we don't need to
1319 			 * synthesize anything.  We really have to
1320 			 * return this more properly and also
1321 			 * propagate errors that now are calling die()
1322 			 */
1323 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1324 								  process_synthesized_event);
1325 			if (err <= 0) {
1326 				pr_err("Couldn't record tracing data.\n");
1327 				goto out;
1328 			}
1329 			rec->bytes_written += err;
1330 		}
1331 	}
1332 
1333 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1334 					  process_synthesized_event, machine);
1335 	if (err)
1336 		goto out;
1337 
1338 	/* Synthesize id_index before auxtrace_info */
1339 	if (rec->opts.auxtrace_sample_mode) {
1340 		err = perf_event__synthesize_id_index(tool,
1341 						      process_synthesized_event,
1342 						      session->evlist, machine);
1343 		if (err)
1344 			goto out;
1345 	}
1346 
1347 	if (rec->opts.full_auxtrace) {
1348 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1349 					session, process_synthesized_event);
1350 		if (err)
1351 			goto out;
1352 	}
1353 
1354 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1355 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1356 							 machine);
1357 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1358 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1359 				   "Check /proc/kallsyms permission or run as root.\n");
1360 
1361 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1362 						     machine);
1363 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1364 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1365 				   "Check /proc/modules permission or run as root.\n");
1366 	}
1367 
1368 	if (perf_guest) {
1369 		machines__process_guests(&session->machines,
1370 					 perf_event__synthesize_guest_os, tool);
1371 	}
1372 
1373 	err = perf_event__synthesize_extra_attr(&rec->tool,
1374 						rec->evlist,
1375 						process_synthesized_event,
1376 						data->is_pipe);
1377 	if (err)
1378 		goto out;
1379 
1380 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1381 						 process_synthesized_event,
1382 						NULL);
1383 	if (err < 0) {
1384 		pr_err("Couldn't synthesize thread map.\n");
1385 		return err;
1386 	}
1387 
1388 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1389 					     process_synthesized_event, NULL);
1390 	if (err < 0) {
1391 		pr_err("Couldn't synthesize cpu map.\n");
1392 		return err;
1393 	}
1394 
1395 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1396 						machine, opts);
1397 	if (err < 0)
1398 		pr_warning("Couldn't synthesize bpf events.\n");
1399 
1400 	err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1401 					     machine);
1402 	if (err < 0)
1403 		pr_warning("Couldn't synthesize cgroup events.\n");
1404 
1405 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1406 					    process_synthesized_event, opts->sample_address,
1407 					    1);
1408 out:
1409 	return err;
1410 }
1411 
1412 static int __cmd_record(struct record *rec, int argc, const char **argv)
1413 {
1414 	int err;
1415 	int status = 0;
1416 	unsigned long waking = 0;
1417 	const bool forks = argc > 0;
1418 	struct perf_tool *tool = &rec->tool;
1419 	struct record_opts *opts = &rec->opts;
1420 	struct perf_data *data = &rec->data;
1421 	struct perf_session *session;
1422 	bool disabled = false, draining = false;
1423 	struct evlist *sb_evlist = NULL;
1424 	int fd;
1425 	float ratio = 0;
1426 
1427 	atexit(record__sig_exit);
1428 	signal(SIGCHLD, sig_handler);
1429 	signal(SIGINT, sig_handler);
1430 	signal(SIGTERM, sig_handler);
1431 	signal(SIGSEGV, sigsegv_handler);
1432 
1433 	if (rec->opts.record_namespaces)
1434 		tool->namespace_events = true;
1435 
1436 	if (rec->opts.record_cgroup) {
1437 #ifdef HAVE_FILE_HANDLE
1438 		tool->cgroup_events = true;
1439 #else
1440 		pr_err("cgroup tracking is not supported\n");
1441 		return -1;
1442 #endif
1443 	}
1444 
1445 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1446 		signal(SIGUSR2, snapshot_sig_handler);
1447 		if (rec->opts.auxtrace_snapshot_mode)
1448 			trigger_on(&auxtrace_snapshot_trigger);
1449 		if (rec->switch_output.enabled)
1450 			trigger_on(&switch_output_trigger);
1451 	} else {
1452 		signal(SIGUSR2, SIG_IGN);
1453 	}
1454 
1455 	session = perf_session__new(data, false, tool);
1456 	if (IS_ERR(session)) {
1457 		pr_err("Perf session creation failed.\n");
1458 		return PTR_ERR(session);
1459 	}
1460 
1461 	fd = perf_data__fd(data);
1462 	rec->session = session;
1463 
1464 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1465 		pr_err("Compression initialization failed.\n");
1466 		return -1;
1467 	}
1468 
1469 	session->header.env.comp_type  = PERF_COMP_ZSTD;
1470 	session->header.env.comp_level = rec->opts.comp_level;
1471 
1472 	if (rec->opts.kcore &&
1473 	    !record__kcore_readable(&session->machines.host)) {
1474 		pr_err("ERROR: kcore is not readable.\n");
1475 		return -1;
1476 	}
1477 
1478 	record__init_features(rec);
1479 
1480 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1481 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1482 
1483 	if (forks) {
1484 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1485 						    argv, data->is_pipe,
1486 						    workload_exec_failed_signal);
1487 		if (err < 0) {
1488 			pr_err("Couldn't run the workload!\n");
1489 			status = err;
1490 			goto out_delete_session;
1491 		}
1492 	}
1493 
1494 	/*
1495 	 * If we have just single event and are sending data
1496 	 * through pipe, we need to force the ids allocation,
1497 	 * because we synthesize event name through the pipe
1498 	 * and need the id for that.
1499 	 */
1500 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1501 		rec->opts.sample_id = true;
1502 
1503 	if (record__open(rec) != 0) {
1504 		err = -1;
1505 		goto out_child;
1506 	}
1507 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1508 
1509 	if (rec->opts.kcore) {
1510 		err = record__kcore_copy(&session->machines.host, data);
1511 		if (err) {
1512 			pr_err("ERROR: Failed to copy kcore\n");
1513 			goto out_child;
1514 		}
1515 	}
1516 
1517 	err = bpf__apply_obj_config();
1518 	if (err) {
1519 		char errbuf[BUFSIZ];
1520 
1521 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1522 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1523 			 errbuf);
1524 		goto out_child;
1525 	}
1526 
1527 	/*
1528 	 * Normally perf_session__new would do this, but it doesn't have the
1529 	 * evlist.
1530 	 */
1531 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1532 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1533 		rec->tool.ordered_events = false;
1534 	}
1535 
1536 	if (!rec->evlist->nr_groups)
1537 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1538 
1539 	if (data->is_pipe) {
1540 		err = perf_header__write_pipe(fd);
1541 		if (err < 0)
1542 			goto out_child;
1543 	} else {
1544 		err = perf_session__write_header(session, rec->evlist, fd, false);
1545 		if (err < 0)
1546 			goto out_child;
1547 	}
1548 
1549 	if (!rec->no_buildid
1550 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1551 		pr_err("Couldn't generate buildids. "
1552 		       "Use --no-buildid to profile anyway.\n");
1553 		err = -1;
1554 		goto out_child;
1555 	}
1556 
1557 	if (!opts->no_bpf_event)
1558 		bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1559 
1560 	if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1561 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1562 		opts->no_bpf_event = true;
1563 	}
1564 
1565 	err = record__synthesize(rec, false);
1566 	if (err < 0)
1567 		goto out_child;
1568 
1569 	if (rec->realtime_prio) {
1570 		struct sched_param param;
1571 
1572 		param.sched_priority = rec->realtime_prio;
1573 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1574 			pr_err("Could not set realtime priority.\n");
1575 			err = -1;
1576 			goto out_child;
1577 		}
1578 	}
1579 
1580 	/*
1581 	 * When perf is starting the traced process, all the events
1582 	 * (apart from group members) have enable_on_exec=1 set,
1583 	 * so don't spoil it by prematurely enabling them.
1584 	 */
1585 	if (!target__none(&opts->target) && !opts->initial_delay)
1586 		evlist__enable(rec->evlist);
1587 
1588 	/*
1589 	 * Let the child rip
1590 	 */
1591 	if (forks) {
1592 		struct machine *machine = &session->machines.host;
1593 		union perf_event *event;
1594 		pid_t tgid;
1595 
1596 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1597 		if (event == NULL) {
1598 			err = -ENOMEM;
1599 			goto out_child;
1600 		}
1601 
1602 		/*
1603 		 * Some H/W events are generated before COMM event
1604 		 * which is emitted during exec(), so perf script
1605 		 * cannot see a correct process name for those events.
1606 		 * Synthesize COMM event to prevent it.
1607 		 */
1608 		tgid = perf_event__synthesize_comm(tool, event,
1609 						   rec->evlist->workload.pid,
1610 						   process_synthesized_event,
1611 						   machine);
1612 		free(event);
1613 
1614 		if (tgid == -1)
1615 			goto out_child;
1616 
1617 		event = malloc(sizeof(event->namespaces) +
1618 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1619 			       machine->id_hdr_size);
1620 		if (event == NULL) {
1621 			err = -ENOMEM;
1622 			goto out_child;
1623 		}
1624 
1625 		/*
1626 		 * Synthesize NAMESPACES event for the command specified.
1627 		 */
1628 		perf_event__synthesize_namespaces(tool, event,
1629 						  rec->evlist->workload.pid,
1630 						  tgid, process_synthesized_event,
1631 						  machine);
1632 		free(event);
1633 
1634 		perf_evlist__start_workload(rec->evlist);
1635 	}
1636 
1637 	if (opts->initial_delay) {
1638 		usleep(opts->initial_delay * USEC_PER_MSEC);
1639 		evlist__enable(rec->evlist);
1640 	}
1641 
1642 	trigger_ready(&auxtrace_snapshot_trigger);
1643 	trigger_ready(&switch_output_trigger);
1644 	perf_hooks__invoke_record_start();
1645 	for (;;) {
1646 		unsigned long long hits = rec->samples;
1647 
1648 		/*
1649 		 * rec->evlist->bkw_mmap_state is possible to be
1650 		 * BKW_MMAP_EMPTY here: when done == true and
1651 		 * hits != rec->samples in previous round.
1652 		 *
1653 		 * perf_evlist__toggle_bkw_mmap ensure we never
1654 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1655 		 */
1656 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1657 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1658 
1659 		if (record__mmap_read_all(rec, false) < 0) {
1660 			trigger_error(&auxtrace_snapshot_trigger);
1661 			trigger_error(&switch_output_trigger);
1662 			err = -1;
1663 			goto out_child;
1664 		}
1665 
1666 		if (auxtrace_record__snapshot_started) {
1667 			auxtrace_record__snapshot_started = 0;
1668 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1669 				record__read_auxtrace_snapshot(rec, false);
1670 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1671 				pr_err("AUX area tracing snapshot failed\n");
1672 				err = -1;
1673 				goto out_child;
1674 			}
1675 		}
1676 
1677 		if (trigger_is_hit(&switch_output_trigger)) {
1678 			/*
1679 			 * If switch_output_trigger is hit, the data in
1680 			 * overwritable ring buffer should have been collected,
1681 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1682 			 *
1683 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1684 			 * record__mmap_read_all() didn't collect data from
1685 			 * overwritable ring buffer. Read again.
1686 			 */
1687 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1688 				continue;
1689 			trigger_ready(&switch_output_trigger);
1690 
1691 			/*
1692 			 * Reenable events in overwrite ring buffer after
1693 			 * record__mmap_read_all(): we should have collected
1694 			 * data from it.
1695 			 */
1696 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1697 
1698 			if (!quiet)
1699 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1700 					waking);
1701 			waking = 0;
1702 			fd = record__switch_output(rec, false);
1703 			if (fd < 0) {
1704 				pr_err("Failed to switch to new file\n");
1705 				trigger_error(&switch_output_trigger);
1706 				err = fd;
1707 				goto out_child;
1708 			}
1709 
1710 			/* re-arm the alarm */
1711 			if (rec->switch_output.time)
1712 				alarm(rec->switch_output.time);
1713 		}
1714 
1715 		if (hits == rec->samples) {
1716 			if (done || draining)
1717 				break;
1718 			err = evlist__poll(rec->evlist, -1);
1719 			/*
1720 			 * Propagate error, only if there's any. Ignore positive
1721 			 * number of returned events and interrupt error.
1722 			 */
1723 			if (err > 0 || (err < 0 && errno == EINTR))
1724 				err = 0;
1725 			waking++;
1726 
1727 			if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1728 				draining = true;
1729 		}
1730 
1731 		/*
1732 		 * When perf is starting the traced process, at the end events
1733 		 * die with the process and we wait for that. Thus no need to
1734 		 * disable events in this case.
1735 		 */
1736 		if (done && !disabled && !target__none(&opts->target)) {
1737 			trigger_off(&auxtrace_snapshot_trigger);
1738 			evlist__disable(rec->evlist);
1739 			disabled = true;
1740 		}
1741 	}
1742 
1743 	trigger_off(&auxtrace_snapshot_trigger);
1744 	trigger_off(&switch_output_trigger);
1745 
1746 	if (opts->auxtrace_snapshot_on_exit)
1747 		record__auxtrace_snapshot_exit(rec);
1748 
1749 	if (forks && workload_exec_errno) {
1750 		char msg[STRERR_BUFSIZE];
1751 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1752 		pr_err("Workload failed: %s\n", emsg);
1753 		err = -1;
1754 		goto out_child;
1755 	}
1756 
1757 	if (!quiet)
1758 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1759 
1760 	if (target__none(&rec->opts.target))
1761 		record__synthesize_workload(rec, true);
1762 
1763 out_child:
1764 	record__mmap_read_all(rec, true);
1765 	record__aio_mmap_read_sync(rec);
1766 
1767 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1768 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1769 		session->header.env.comp_ratio = ratio + 0.5;
1770 	}
1771 
1772 	if (forks) {
1773 		int exit_status;
1774 
1775 		if (!child_finished)
1776 			kill(rec->evlist->workload.pid, SIGTERM);
1777 
1778 		wait(&exit_status);
1779 
1780 		if (err < 0)
1781 			status = err;
1782 		else if (WIFEXITED(exit_status))
1783 			status = WEXITSTATUS(exit_status);
1784 		else if (WIFSIGNALED(exit_status))
1785 			signr = WTERMSIG(exit_status);
1786 	} else
1787 		status = err;
1788 
1789 	record__synthesize(rec, true);
1790 	/* this will be recalculated during process_buildids() */
1791 	rec->samples = 0;
1792 
1793 	if (!err) {
1794 		if (!rec->timestamp_filename) {
1795 			record__finish_output(rec);
1796 		} else {
1797 			fd = record__switch_output(rec, true);
1798 			if (fd < 0) {
1799 				status = fd;
1800 				goto out_delete_session;
1801 			}
1802 		}
1803 	}
1804 
1805 	perf_hooks__invoke_record_end();
1806 
1807 	if (!err && !quiet) {
1808 		char samples[128];
1809 		const char *postfix = rec->timestamp_filename ?
1810 					".<timestamp>" : "";
1811 
1812 		if (rec->samples && !rec->opts.full_auxtrace)
1813 			scnprintf(samples, sizeof(samples),
1814 				  " (%" PRIu64 " samples)", rec->samples);
1815 		else
1816 			samples[0] = '\0';
1817 
1818 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
1819 			perf_data__size(data) / 1024.0 / 1024.0,
1820 			data->path, postfix, samples);
1821 		if (ratio) {
1822 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
1823 					rec->session->bytes_transferred / 1024.0 / 1024.0,
1824 					ratio);
1825 		}
1826 		fprintf(stderr, " ]\n");
1827 	}
1828 
1829 out_delete_session:
1830 	zstd_fini(&session->zstd_data);
1831 	perf_session__delete(session);
1832 
1833 	if (!opts->no_bpf_event)
1834 		perf_evlist__stop_sb_thread(sb_evlist);
1835 	return status;
1836 }
1837 
1838 static void callchain_debug(struct callchain_param *callchain)
1839 {
1840 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1841 
1842 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1843 
1844 	if (callchain->record_mode == CALLCHAIN_DWARF)
1845 		pr_debug("callchain: stack dump size %d\n",
1846 			 callchain->dump_size);
1847 }
1848 
1849 int record_opts__parse_callchain(struct record_opts *record,
1850 				 struct callchain_param *callchain,
1851 				 const char *arg, bool unset)
1852 {
1853 	int ret;
1854 	callchain->enabled = !unset;
1855 
1856 	/* --no-call-graph */
1857 	if (unset) {
1858 		callchain->record_mode = CALLCHAIN_NONE;
1859 		pr_debug("callchain: disabled\n");
1860 		return 0;
1861 	}
1862 
1863 	ret = parse_callchain_record_opt(arg, callchain);
1864 	if (!ret) {
1865 		/* Enable data address sampling for DWARF unwind. */
1866 		if (callchain->record_mode == CALLCHAIN_DWARF)
1867 			record->sample_address = true;
1868 		callchain_debug(callchain);
1869 	}
1870 
1871 	return ret;
1872 }
1873 
1874 int record_parse_callchain_opt(const struct option *opt,
1875 			       const char *arg,
1876 			       int unset)
1877 {
1878 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1879 }
1880 
1881 int record_callchain_opt(const struct option *opt,
1882 			 const char *arg __maybe_unused,
1883 			 int unset __maybe_unused)
1884 {
1885 	struct callchain_param *callchain = opt->value;
1886 
1887 	callchain->enabled = true;
1888 
1889 	if (callchain->record_mode == CALLCHAIN_NONE)
1890 		callchain->record_mode = CALLCHAIN_FP;
1891 
1892 	callchain_debug(callchain);
1893 	return 0;
1894 }
1895 
1896 static int perf_record_config(const char *var, const char *value, void *cb)
1897 {
1898 	struct record *rec = cb;
1899 
1900 	if (!strcmp(var, "record.build-id")) {
1901 		if (!strcmp(value, "cache"))
1902 			rec->no_buildid_cache = false;
1903 		else if (!strcmp(value, "no-cache"))
1904 			rec->no_buildid_cache = true;
1905 		else if (!strcmp(value, "skip"))
1906 			rec->no_buildid = true;
1907 		else
1908 			return -1;
1909 		return 0;
1910 	}
1911 	if (!strcmp(var, "record.call-graph")) {
1912 		var = "call-graph.record-mode";
1913 		return perf_default_config(var, value, cb);
1914 	}
1915 #ifdef HAVE_AIO_SUPPORT
1916 	if (!strcmp(var, "record.aio")) {
1917 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
1918 		if (!rec->opts.nr_cblocks)
1919 			rec->opts.nr_cblocks = nr_cblocks_default;
1920 	}
1921 #endif
1922 
1923 	return 0;
1924 }
1925 
1926 struct clockid_map {
1927 	const char *name;
1928 	int clockid;
1929 };
1930 
1931 #define CLOCKID_MAP(n, c)	\
1932 	{ .name = n, .clockid = (c), }
1933 
1934 #define CLOCKID_END	{ .name = NULL, }
1935 
1936 
1937 /*
1938  * Add the missing ones, we need to build on many distros...
1939  */
1940 #ifndef CLOCK_MONOTONIC_RAW
1941 #define CLOCK_MONOTONIC_RAW 4
1942 #endif
1943 #ifndef CLOCK_BOOTTIME
1944 #define CLOCK_BOOTTIME 7
1945 #endif
1946 #ifndef CLOCK_TAI
1947 #define CLOCK_TAI 11
1948 #endif
1949 
1950 static const struct clockid_map clockids[] = {
1951 	/* available for all events, NMI safe */
1952 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1953 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1954 
1955 	/* available for some events */
1956 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1957 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1958 	CLOCKID_MAP("tai", CLOCK_TAI),
1959 
1960 	/* available for the lazy */
1961 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1962 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1963 	CLOCKID_MAP("real", CLOCK_REALTIME),
1964 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1965 
1966 	CLOCKID_END,
1967 };
1968 
1969 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1970 {
1971 	struct timespec res;
1972 
1973 	*res_ns = 0;
1974 	if (!clock_getres(clk_id, &res))
1975 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1976 	else
1977 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1978 
1979 	return 0;
1980 }
1981 
1982 static int parse_clockid(const struct option *opt, const char *str, int unset)
1983 {
1984 	struct record_opts *opts = (struct record_opts *)opt->value;
1985 	const struct clockid_map *cm;
1986 	const char *ostr = str;
1987 
1988 	if (unset) {
1989 		opts->use_clockid = 0;
1990 		return 0;
1991 	}
1992 
1993 	/* no arg passed */
1994 	if (!str)
1995 		return 0;
1996 
1997 	/* no setting it twice */
1998 	if (opts->use_clockid)
1999 		return -1;
2000 
2001 	opts->use_clockid = true;
2002 
2003 	/* if its a number, we're done */
2004 	if (sscanf(str, "%d", &opts->clockid) == 1)
2005 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2006 
2007 	/* allow a "CLOCK_" prefix to the name */
2008 	if (!strncasecmp(str, "CLOCK_", 6))
2009 		str += 6;
2010 
2011 	for (cm = clockids; cm->name; cm++) {
2012 		if (!strcasecmp(str, cm->name)) {
2013 			opts->clockid = cm->clockid;
2014 			return get_clockid_res(opts->clockid,
2015 					       &opts->clockid_res_ns);
2016 		}
2017 	}
2018 
2019 	opts->use_clockid = false;
2020 	ui__warning("unknown clockid %s, check man page\n", ostr);
2021 	return -1;
2022 }
2023 
2024 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2025 {
2026 	struct record_opts *opts = (struct record_opts *)opt->value;
2027 
2028 	if (unset || !str)
2029 		return 0;
2030 
2031 	if (!strcasecmp(str, "node"))
2032 		opts->affinity = PERF_AFFINITY_NODE;
2033 	else if (!strcasecmp(str, "cpu"))
2034 		opts->affinity = PERF_AFFINITY_CPU;
2035 
2036 	return 0;
2037 }
2038 
2039 static int parse_output_max_size(const struct option *opt,
2040 				 const char *str, int unset)
2041 {
2042 	unsigned long *s = (unsigned long *)opt->value;
2043 	static struct parse_tag tags_size[] = {
2044 		{ .tag  = 'B', .mult = 1       },
2045 		{ .tag  = 'K', .mult = 1 << 10 },
2046 		{ .tag  = 'M', .mult = 1 << 20 },
2047 		{ .tag  = 'G', .mult = 1 << 30 },
2048 		{ .tag  = 0 },
2049 	};
2050 	unsigned long val;
2051 
2052 	if (unset) {
2053 		*s = 0;
2054 		return 0;
2055 	}
2056 
2057 	val = parse_tag_value(str, tags_size);
2058 	if (val != (unsigned long) -1) {
2059 		*s = val;
2060 		return 0;
2061 	}
2062 
2063 	return -1;
2064 }
2065 
2066 static int record__parse_mmap_pages(const struct option *opt,
2067 				    const char *str,
2068 				    int unset __maybe_unused)
2069 {
2070 	struct record_opts *opts = opt->value;
2071 	char *s, *p;
2072 	unsigned int mmap_pages;
2073 	int ret;
2074 
2075 	if (!str)
2076 		return -EINVAL;
2077 
2078 	s = strdup(str);
2079 	if (!s)
2080 		return -ENOMEM;
2081 
2082 	p = strchr(s, ',');
2083 	if (p)
2084 		*p = '\0';
2085 
2086 	if (*s) {
2087 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2088 		if (ret)
2089 			goto out_free;
2090 		opts->mmap_pages = mmap_pages;
2091 	}
2092 
2093 	if (!p) {
2094 		ret = 0;
2095 		goto out_free;
2096 	}
2097 
2098 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2099 	if (ret)
2100 		goto out_free;
2101 
2102 	opts->auxtrace_mmap_pages = mmap_pages;
2103 
2104 out_free:
2105 	free(s);
2106 	return ret;
2107 }
2108 
2109 static void switch_output_size_warn(struct record *rec)
2110 {
2111 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2112 	struct switch_output *s = &rec->switch_output;
2113 
2114 	wakeup_size /= 2;
2115 
2116 	if (s->size < wakeup_size) {
2117 		char buf[100];
2118 
2119 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2120 		pr_warning("WARNING: switch-output data size lower than "
2121 			   "wakeup kernel buffer size (%s) "
2122 			   "expect bigger perf.data sizes\n", buf);
2123 	}
2124 }
2125 
2126 static int switch_output_setup(struct record *rec)
2127 {
2128 	struct switch_output *s = &rec->switch_output;
2129 	static struct parse_tag tags_size[] = {
2130 		{ .tag  = 'B', .mult = 1       },
2131 		{ .tag  = 'K', .mult = 1 << 10 },
2132 		{ .tag  = 'M', .mult = 1 << 20 },
2133 		{ .tag  = 'G', .mult = 1 << 30 },
2134 		{ .tag  = 0 },
2135 	};
2136 	static struct parse_tag tags_time[] = {
2137 		{ .tag  = 's', .mult = 1        },
2138 		{ .tag  = 'm', .mult = 60       },
2139 		{ .tag  = 'h', .mult = 60*60    },
2140 		{ .tag  = 'd', .mult = 60*60*24 },
2141 		{ .tag  = 0 },
2142 	};
2143 	unsigned long val;
2144 
2145 	if (!s->set)
2146 		return 0;
2147 
2148 	if (!strcmp(s->str, "signal")) {
2149 		s->signal = true;
2150 		pr_debug("switch-output with SIGUSR2 signal\n");
2151 		goto enabled;
2152 	}
2153 
2154 	val = parse_tag_value(s->str, tags_size);
2155 	if (val != (unsigned long) -1) {
2156 		s->size = val;
2157 		pr_debug("switch-output with %s size threshold\n", s->str);
2158 		goto enabled;
2159 	}
2160 
2161 	val = parse_tag_value(s->str, tags_time);
2162 	if (val != (unsigned long) -1) {
2163 		s->time = val;
2164 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2165 			 s->str, s->time);
2166 		goto enabled;
2167 	}
2168 
2169 	return -1;
2170 
2171 enabled:
2172 	rec->timestamp_filename = true;
2173 	s->enabled              = true;
2174 
2175 	if (s->size && !rec->opts.no_buffering)
2176 		switch_output_size_warn(rec);
2177 
2178 	return 0;
2179 }
2180 
2181 static const char * const __record_usage[] = {
2182 	"perf record [<options>] [<command>]",
2183 	"perf record [<options>] -- <command> [<options>]",
2184 	NULL
2185 };
2186 const char * const *record_usage = __record_usage;
2187 
2188 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2189 				  struct perf_sample *sample, struct machine *machine)
2190 {
2191 	/*
2192 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2193 	 * no need to add them twice.
2194 	 */
2195 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2196 		return 0;
2197 	return perf_event__process_mmap(tool, event, sample, machine);
2198 }
2199 
2200 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2201 				   struct perf_sample *sample, struct machine *machine)
2202 {
2203 	/*
2204 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2205 	 * no need to add them twice.
2206 	 */
2207 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2208 		return 0;
2209 
2210 	return perf_event__process_mmap2(tool, event, sample, machine);
2211 }
2212 
2213 /*
2214  * XXX Ideally would be local to cmd_record() and passed to a record__new
2215  * because we need to have access to it in record__exit, that is called
2216  * after cmd_record() exits, but since record_options need to be accessible to
2217  * builtin-script, leave it here.
2218  *
2219  * At least we don't ouch it in all the other functions here directly.
2220  *
2221  * Just say no to tons of global variables, sigh.
2222  */
2223 static struct record record = {
2224 	.opts = {
2225 		.sample_time	     = true,
2226 		.mmap_pages	     = UINT_MAX,
2227 		.user_freq	     = UINT_MAX,
2228 		.user_interval	     = ULLONG_MAX,
2229 		.freq		     = 4000,
2230 		.target		     = {
2231 			.uses_mmap   = true,
2232 			.default_per_cpu = true,
2233 		},
2234 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
2235 	},
2236 	.tool = {
2237 		.sample		= process_sample_event,
2238 		.fork		= perf_event__process_fork,
2239 		.exit		= perf_event__process_exit,
2240 		.comm		= perf_event__process_comm,
2241 		.namespaces	= perf_event__process_namespaces,
2242 		.mmap		= build_id__process_mmap,
2243 		.mmap2		= build_id__process_mmap2,
2244 		.ordered_events	= true,
2245 	},
2246 };
2247 
2248 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2249 	"\n\t\t\t\tDefault: fp";
2250 
2251 static bool dry_run;
2252 
2253 /*
2254  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2255  * with it and switch to use the library functions in perf_evlist that came
2256  * from builtin-record.c, i.e. use record_opts,
2257  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2258  * using pipes, etc.
2259  */
2260 static struct option __record_options[] = {
2261 	OPT_CALLBACK('e', "event", &record.evlist, "event",
2262 		     "event selector. use 'perf list' to list available events",
2263 		     parse_events_option),
2264 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2265 		     "event filter", parse_filter),
2266 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2267 			   NULL, "don't record events from perf itself",
2268 			   exclude_perf),
2269 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2270 		    "record events on existing process id"),
2271 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2272 		    "record events on existing thread id"),
2273 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
2274 		    "collect data with this RT SCHED_FIFO priority"),
2275 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2276 		    "collect data without buffering"),
2277 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2278 		    "collect raw sample records from all opened counters"),
2279 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2280 			    "system-wide collection from all CPUs"),
2281 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2282 		    "list of cpus to monitor"),
2283 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2284 	OPT_STRING('o', "output", &record.data.path, "file",
2285 		    "output file name"),
2286 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2287 			&record.opts.no_inherit_set,
2288 			"child tasks do not inherit counters"),
2289 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2290 		    "synthesize non-sample events at the end of output"),
2291 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2292 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2293 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2294 		    "Fail if the specified frequency can't be used"),
2295 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2296 		     "profile at this frequency",
2297 		      record__parse_freq),
2298 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2299 		     "number of mmap data pages and AUX area tracing mmap pages",
2300 		     record__parse_mmap_pages),
2301 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2302 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2303 		     record__mmap_flush_parse),
2304 	OPT_BOOLEAN(0, "group", &record.opts.group,
2305 		    "put the counters into a counter group"),
2306 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2307 			   NULL, "enables call-graph recording" ,
2308 			   &record_callchain_opt),
2309 	OPT_CALLBACK(0, "call-graph", &record.opts,
2310 		     "record_mode[,record_size]", record_callchain_help,
2311 		     &record_parse_callchain_opt),
2312 	OPT_INCR('v', "verbose", &verbose,
2313 		    "be more verbose (show counter open errors, etc)"),
2314 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2315 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2316 		    "per thread counts"),
2317 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2318 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2319 		    "Record the sample physical addresses"),
2320 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2321 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2322 			&record.opts.sample_time_set,
2323 			"Record the sample timestamps"),
2324 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2325 			"Record the sample period"),
2326 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2327 		    "don't sample"),
2328 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2329 			&record.no_buildid_cache_set,
2330 			"do not update the buildid cache"),
2331 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2332 			&record.no_buildid_set,
2333 			"do not collect buildids in perf.data"),
2334 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2335 		     "monitor event in cgroup name only",
2336 		     parse_cgroups),
2337 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2338 		  "ms to wait before starting measurement after program start"),
2339 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2340 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2341 		   "user to profile"),
2342 
2343 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2344 		     "branch any", "sample any taken branches",
2345 		     parse_branch_stack),
2346 
2347 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2348 		     "branch filter mask", "branch stack filter modes",
2349 		     parse_branch_stack),
2350 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2351 		    "sample by weight (on special events only)"),
2352 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2353 		    "sample transaction flags (special events only)"),
2354 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2355 		    "use per-thread mmaps"),
2356 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2357 		    "sample selected machine registers on interrupt,"
2358 		    " use '-I?' to list register names", parse_intr_regs),
2359 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2360 		    "sample selected machine registers on interrupt,"
2361 		    " use '--user-regs=?' to list register names", parse_user_regs),
2362 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2363 		    "Record running/enabled time of read (:S) events"),
2364 	OPT_CALLBACK('k', "clockid", &record.opts,
2365 	"clockid", "clockid to use for events, see clock_gettime()",
2366 	parse_clockid),
2367 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2368 			  "opts", "AUX area tracing Snapshot Mode", ""),
2369 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2370 			  "opts", "sample AUX area", ""),
2371 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2372 			"per thread proc mmap processing timeout in ms"),
2373 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2374 		    "Record namespaces events"),
2375 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2376 		    "Record cgroup events"),
2377 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2378 		    "Record context switch events"),
2379 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2380 			 "Configure all used events to run in kernel space.",
2381 			 PARSE_OPT_EXCLUSIVE),
2382 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2383 			 "Configure all used events to run in user space.",
2384 			 PARSE_OPT_EXCLUSIVE),
2385 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2386 		    "collect kernel callchains"),
2387 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2388 		    "collect user callchains"),
2389 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2390 		   "clang binary to use for compiling BPF scriptlets"),
2391 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2392 		   "options passed to clang when compiling BPF scriptlets"),
2393 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2394 		   "file", "vmlinux pathname"),
2395 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2396 		    "Record build-id of all DSOs regardless of hits"),
2397 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2398 		    "append timestamp to output filename"),
2399 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2400 		    "Record timestamp boundary (time of first/last samples)"),
2401 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2402 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2403 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2404 			  "signal"),
2405 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2406 		   "Limit number of switch output generated files"),
2407 	OPT_BOOLEAN(0, "dry-run", &dry_run,
2408 		    "Parse options then exit"),
2409 #ifdef HAVE_AIO_SUPPORT
2410 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2411 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2412 		     record__aio_parse),
2413 #endif
2414 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2415 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2416 		     record__parse_affinity),
2417 #ifdef HAVE_ZSTD_SUPPORT
2418 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2419 			    "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2420 			    record__parse_comp_level),
2421 #endif
2422 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
2423 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
2424 	OPT_END()
2425 };
2426 
2427 struct option *record_options = __record_options;
2428 
2429 int cmd_record(int argc, const char **argv)
2430 {
2431 	int err;
2432 	struct record *rec = &record;
2433 	char errbuf[BUFSIZ];
2434 
2435 	setlocale(LC_ALL, "");
2436 
2437 #ifndef HAVE_LIBBPF_SUPPORT
2438 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2439 	set_nobuild('\0', "clang-path", true);
2440 	set_nobuild('\0', "clang-opt", true);
2441 # undef set_nobuild
2442 #endif
2443 
2444 #ifndef HAVE_BPF_PROLOGUE
2445 # if !defined (HAVE_DWARF_SUPPORT)
2446 #  define REASON  "NO_DWARF=1"
2447 # elif !defined (HAVE_LIBBPF_SUPPORT)
2448 #  define REASON  "NO_LIBBPF=1"
2449 # else
2450 #  define REASON  "this architecture doesn't support BPF prologue"
2451 # endif
2452 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2453 	set_nobuild('\0', "vmlinux", true);
2454 # undef set_nobuild
2455 # undef REASON
2456 #endif
2457 
2458 	rec->opts.affinity = PERF_AFFINITY_SYS;
2459 
2460 	rec->evlist = evlist__new();
2461 	if (rec->evlist == NULL)
2462 		return -ENOMEM;
2463 
2464 	err = perf_config(perf_record_config, rec);
2465 	if (err)
2466 		return err;
2467 
2468 	argc = parse_options(argc, argv, record_options, record_usage,
2469 			    PARSE_OPT_STOP_AT_NON_OPTION);
2470 	if (quiet)
2471 		perf_quiet_option();
2472 
2473 	/* Make system wide (-a) the default target. */
2474 	if (!argc && target__none(&rec->opts.target))
2475 		rec->opts.target.system_wide = true;
2476 
2477 	if (nr_cgroups && !rec->opts.target.system_wide) {
2478 		usage_with_options_msg(record_usage, record_options,
2479 			"cgroup monitoring only available in system-wide mode");
2480 
2481 	}
2482 
2483 	if (rec->opts.kcore)
2484 		rec->data.is_dir = true;
2485 
2486 	if (rec->opts.comp_level != 0) {
2487 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2488 		rec->no_buildid = true;
2489 	}
2490 
2491 	if (rec->opts.record_switch_events &&
2492 	    !perf_can_record_switch_events()) {
2493 		ui__error("kernel does not support recording context switch events\n");
2494 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2495 		return -EINVAL;
2496 	}
2497 
2498 	if (switch_output_setup(rec)) {
2499 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2500 		return -EINVAL;
2501 	}
2502 
2503 	if (rec->switch_output.time) {
2504 		signal(SIGALRM, alarm_sig_handler);
2505 		alarm(rec->switch_output.time);
2506 	}
2507 
2508 	if (rec->switch_output.num_files) {
2509 		rec->switch_output.filenames = calloc(sizeof(char *),
2510 						      rec->switch_output.num_files);
2511 		if (!rec->switch_output.filenames)
2512 			return -EINVAL;
2513 	}
2514 
2515 	/*
2516 	 * Allow aliases to facilitate the lookup of symbols for address
2517 	 * filters. Refer to auxtrace_parse_filters().
2518 	 */
2519 	symbol_conf.allow_aliases = true;
2520 
2521 	symbol__init(NULL);
2522 
2523 	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2524 		rec->affinity_mask.nbits = cpu__max_cpu();
2525 		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2526 		if (!rec->affinity_mask.bits) {
2527 			pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2528 			return -ENOMEM;
2529 		}
2530 		pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2531 	}
2532 
2533 	err = record__auxtrace_init(rec);
2534 	if (err)
2535 		goto out;
2536 
2537 	if (dry_run)
2538 		goto out;
2539 
2540 	err = bpf__setup_stdout(rec->evlist);
2541 	if (err) {
2542 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2543 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2544 			 errbuf);
2545 		goto out;
2546 	}
2547 
2548 	err = -ENOMEM;
2549 
2550 	if (rec->no_buildid_cache || rec->no_buildid) {
2551 		disable_buildid_cache();
2552 	} else if (rec->switch_output.enabled) {
2553 		/*
2554 		 * In 'perf record --switch-output', disable buildid
2555 		 * generation by default to reduce data file switching
2556 		 * overhead. Still generate buildid if they are required
2557 		 * explicitly using
2558 		 *
2559 		 *  perf record --switch-output --no-no-buildid \
2560 		 *              --no-no-buildid-cache
2561 		 *
2562 		 * Following code equals to:
2563 		 *
2564 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2565 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2566 		 *         disable_buildid_cache();
2567 		 */
2568 		bool disable = true;
2569 
2570 		if (rec->no_buildid_set && !rec->no_buildid)
2571 			disable = false;
2572 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2573 			disable = false;
2574 		if (disable) {
2575 			rec->no_buildid = true;
2576 			rec->no_buildid_cache = true;
2577 			disable_buildid_cache();
2578 		}
2579 	}
2580 
2581 	if (record.opts.overwrite)
2582 		record.opts.tail_synthesize = true;
2583 
2584 	if (rec->evlist->core.nr_entries == 0 &&
2585 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2586 		pr_err("Not enough memory for event selector list\n");
2587 		goto out;
2588 	}
2589 
2590 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2591 		rec->opts.no_inherit = true;
2592 
2593 	err = target__validate(&rec->opts.target);
2594 	if (err) {
2595 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2596 		ui__warning("%s\n", errbuf);
2597 	}
2598 
2599 	err = target__parse_uid(&rec->opts.target);
2600 	if (err) {
2601 		int saved_errno = errno;
2602 
2603 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2604 		ui__error("%s", errbuf);
2605 
2606 		err = -saved_errno;
2607 		goto out;
2608 	}
2609 
2610 	/* Enable ignoring missing threads when -u/-p option is defined. */
2611 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2612 
2613 	err = -ENOMEM;
2614 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2615 		usage_with_options(record_usage, record_options);
2616 
2617 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2618 	if (err)
2619 		goto out;
2620 
2621 	/*
2622 	 * We take all buildids when the file contains
2623 	 * AUX area tracing data because we do not decode the
2624 	 * trace because it would take too long.
2625 	 */
2626 	if (rec->opts.full_auxtrace)
2627 		rec->buildid_all = true;
2628 
2629 	if (record_opts__config(&rec->opts)) {
2630 		err = -EINVAL;
2631 		goto out;
2632 	}
2633 
2634 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2635 		rec->opts.nr_cblocks = nr_cblocks_max;
2636 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2637 
2638 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2639 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2640 
2641 	if (rec->opts.comp_level > comp_level_max)
2642 		rec->opts.comp_level = comp_level_max;
2643 	pr_debug("comp level: %d\n", rec->opts.comp_level);
2644 
2645 	err = __cmd_record(&record, argc, argv);
2646 out:
2647 	bitmap_free(rec->affinity_mask.bits);
2648 	evlist__delete(rec->evlist);
2649 	symbol__exit();
2650 	auxtrace_record__free(rec->itr);
2651 	return err;
2652 }
2653 
2654 static void snapshot_sig_handler(int sig __maybe_unused)
2655 {
2656 	struct record *rec = &record;
2657 
2658 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2659 		trigger_hit(&auxtrace_snapshot_trigger);
2660 		auxtrace_record__snapshot_started = 1;
2661 		if (auxtrace_record__snapshot_start(record.itr))
2662 			trigger_error(&auxtrace_snapshot_trigger);
2663 	}
2664 
2665 	if (switch_output_signal(rec))
2666 		trigger_hit(&switch_output_trigger);
2667 }
2668 
2669 static void alarm_sig_handler(int sig __maybe_unused)
2670 {
2671 	struct record *rec = &record;
2672 
2673 	if (switch_output_time(rec))
2674 		trigger_hit(&switch_output_trigger);
2675 }
2676