xref: /openbmc/linux/tools/perf/builtin-record.c (revision 015d239a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/synthetic-events.h"
43 #include "util/time-utils.h"
44 #include "util/units.h"
45 #include "util/bpf-event.h"
46 #include "asm/bug.h"
47 #include "perf.h"
48 
49 #include <errno.h>
50 #include <inttypes.h>
51 #include <locale.h>
52 #include <poll.h>
53 #include <unistd.h>
54 #include <sched.h>
55 #include <signal.h>
56 #include <sys/mman.h>
57 #include <sys/wait.h>
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 #include <fcntl.h>
61 #include <linux/err.h>
62 #include <linux/string.h>
63 #include <linux/time64.h>
64 #include <linux/zalloc.h>
65 #include <linux/bitmap.h>
66 
67 struct switch_output {
68 	bool		 enabled;
69 	bool		 signal;
70 	unsigned long	 size;
71 	unsigned long	 time;
72 	const char	*str;
73 	bool		 set;
74 	char		 **filenames;
75 	int		 num_files;
76 	int		 cur_file;
77 };
78 
79 struct record {
80 	struct perf_tool	tool;
81 	struct record_opts	opts;
82 	u64			bytes_written;
83 	struct perf_data	data;
84 	struct auxtrace_record	*itr;
85 	struct evlist	*evlist;
86 	struct perf_session	*session;
87 	int			realtime_prio;
88 	bool			no_buildid;
89 	bool			no_buildid_set;
90 	bool			no_buildid_cache;
91 	bool			no_buildid_cache_set;
92 	bool			buildid_all;
93 	bool			timestamp_filename;
94 	bool			timestamp_boundary;
95 	struct switch_output	switch_output;
96 	unsigned long long	samples;
97 	struct mmap_cpu_mask	affinity_mask;
98 	unsigned long		output_max_size;	/* = 0: unlimited */
99 };
100 
101 static volatile int done;
102 
103 static volatile int auxtrace_record__snapshot_started;
104 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
105 static DEFINE_TRIGGER(switch_output_trigger);
106 
107 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
108 	"SYS", "NODE", "CPU"
109 };
110 
111 static bool switch_output_signal(struct record *rec)
112 {
113 	return rec->switch_output.signal &&
114 	       trigger_is_ready(&switch_output_trigger);
115 }
116 
117 static bool switch_output_size(struct record *rec)
118 {
119 	return rec->switch_output.size &&
120 	       trigger_is_ready(&switch_output_trigger) &&
121 	       (rec->bytes_written >= rec->switch_output.size);
122 }
123 
124 static bool switch_output_time(struct record *rec)
125 {
126 	return rec->switch_output.time &&
127 	       trigger_is_ready(&switch_output_trigger);
128 }
129 
130 static bool record__output_max_size_exceeded(struct record *rec)
131 {
132 	return rec->output_max_size &&
133 	       (rec->bytes_written >= rec->output_max_size);
134 }
135 
136 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
137 			 void *bf, size_t size)
138 {
139 	struct perf_data_file *file = &rec->session->data->file;
140 
141 	if (perf_data_file__write(file, bf, size) < 0) {
142 		pr_err("failed to write perf data, error: %m\n");
143 		return -1;
144 	}
145 
146 	rec->bytes_written += size;
147 
148 	if (record__output_max_size_exceeded(rec) && !done) {
149 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
150 				" stopping session ]\n",
151 				rec->bytes_written >> 10);
152 		done = 1;
153 	}
154 
155 	if (switch_output_size(rec))
156 		trigger_hit(&switch_output_trigger);
157 
158 	return 0;
159 }
160 
161 static int record__aio_enabled(struct record *rec);
162 static int record__comp_enabled(struct record *rec);
163 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
164 			    void *src, size_t src_size);
165 
166 #ifdef HAVE_AIO_SUPPORT
167 static int record__aio_write(struct aiocb *cblock, int trace_fd,
168 		void *buf, size_t size, off_t off)
169 {
170 	int rc;
171 
172 	cblock->aio_fildes = trace_fd;
173 	cblock->aio_buf    = buf;
174 	cblock->aio_nbytes = size;
175 	cblock->aio_offset = off;
176 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
177 
178 	do {
179 		rc = aio_write(cblock);
180 		if (rc == 0) {
181 			break;
182 		} else if (errno != EAGAIN) {
183 			cblock->aio_fildes = -1;
184 			pr_err("failed to queue perf data, error: %m\n");
185 			break;
186 		}
187 	} while (1);
188 
189 	return rc;
190 }
191 
192 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
193 {
194 	void *rem_buf;
195 	off_t rem_off;
196 	size_t rem_size;
197 	int rc, aio_errno;
198 	ssize_t aio_ret, written;
199 
200 	aio_errno = aio_error(cblock);
201 	if (aio_errno == EINPROGRESS)
202 		return 0;
203 
204 	written = aio_ret = aio_return(cblock);
205 	if (aio_ret < 0) {
206 		if (aio_errno != EINTR)
207 			pr_err("failed to write perf data, error: %m\n");
208 		written = 0;
209 	}
210 
211 	rem_size = cblock->aio_nbytes - written;
212 
213 	if (rem_size == 0) {
214 		cblock->aio_fildes = -1;
215 		/*
216 		 * md->refcount is incremented in record__aio_pushfn() for
217 		 * every aio write request started in record__aio_push() so
218 		 * decrement it because the request is now complete.
219 		 */
220 		perf_mmap__put(&md->core);
221 		rc = 1;
222 	} else {
223 		/*
224 		 * aio write request may require restart with the
225 		 * reminder if the kernel didn't write whole
226 		 * chunk at once.
227 		 */
228 		rem_off = cblock->aio_offset + written;
229 		rem_buf = (void *)(cblock->aio_buf + written);
230 		record__aio_write(cblock, cblock->aio_fildes,
231 				rem_buf, rem_size, rem_off);
232 		rc = 0;
233 	}
234 
235 	return rc;
236 }
237 
238 static int record__aio_sync(struct mmap *md, bool sync_all)
239 {
240 	struct aiocb **aiocb = md->aio.aiocb;
241 	struct aiocb *cblocks = md->aio.cblocks;
242 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
243 	int i, do_suspend;
244 
245 	do {
246 		do_suspend = 0;
247 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
248 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
249 				if (sync_all)
250 					aiocb[i] = NULL;
251 				else
252 					return i;
253 			} else {
254 				/*
255 				 * Started aio write is not complete yet
256 				 * so it has to be waited before the
257 				 * next allocation.
258 				 */
259 				aiocb[i] = &cblocks[i];
260 				do_suspend = 1;
261 			}
262 		}
263 		if (!do_suspend)
264 			return -1;
265 
266 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
267 			if (!(errno == EAGAIN || errno == EINTR))
268 				pr_err("failed to sync perf data, error: %m\n");
269 		}
270 	} while (1);
271 }
272 
273 struct record_aio {
274 	struct record	*rec;
275 	void		*data;
276 	size_t		size;
277 };
278 
279 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
280 {
281 	struct record_aio *aio = to;
282 
283 	/*
284 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
285 	 * to release space in the kernel buffer as fast as possible, calling
286 	 * perf_mmap__consume() from perf_mmap__push() function.
287 	 *
288 	 * That lets the kernel to proceed with storing more profiling data into
289 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
290 	 *
291 	 * Coping can be done in two steps in case the chunk of profiling data
292 	 * crosses the upper bound of the kernel buffer. In this case we first move
293 	 * part of data from map->start till the upper bound and then the reminder
294 	 * from the beginning of the kernel buffer till the end of the data chunk.
295 	 */
296 
297 	if (record__comp_enabled(aio->rec)) {
298 		size = zstd_compress(aio->rec->session, aio->data + aio->size,
299 				     mmap__mmap_len(map) - aio->size,
300 				     buf, size);
301 	} else {
302 		memcpy(aio->data + aio->size, buf, size);
303 	}
304 
305 	if (!aio->size) {
306 		/*
307 		 * Increment map->refcount to guard map->aio.data[] buffer
308 		 * from premature deallocation because map object can be
309 		 * released earlier than aio write request started on
310 		 * map->aio.data[] buffer is complete.
311 		 *
312 		 * perf_mmap__put() is done at record__aio_complete()
313 		 * after started aio request completion or at record__aio_push()
314 		 * if the request failed to start.
315 		 */
316 		perf_mmap__get(&map->core);
317 	}
318 
319 	aio->size += size;
320 
321 	return size;
322 }
323 
324 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
325 {
326 	int ret, idx;
327 	int trace_fd = rec->session->data->file.fd;
328 	struct record_aio aio = { .rec = rec, .size = 0 };
329 
330 	/*
331 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
332 	 * becomes available after previous aio write operation.
333 	 */
334 
335 	idx = record__aio_sync(map, false);
336 	aio.data = map->aio.data[idx];
337 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
338 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
339 		return ret;
340 
341 	rec->samples++;
342 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
343 	if (!ret) {
344 		*off += aio.size;
345 		rec->bytes_written += aio.size;
346 		if (switch_output_size(rec))
347 			trigger_hit(&switch_output_trigger);
348 	} else {
349 		/*
350 		 * Decrement map->refcount incremented in record__aio_pushfn()
351 		 * back if record__aio_write() operation failed to start, otherwise
352 		 * map->refcount is decremented in record__aio_complete() after
353 		 * aio write operation finishes successfully.
354 		 */
355 		perf_mmap__put(&map->core);
356 	}
357 
358 	return ret;
359 }
360 
361 static off_t record__aio_get_pos(int trace_fd)
362 {
363 	return lseek(trace_fd, 0, SEEK_CUR);
364 }
365 
366 static void record__aio_set_pos(int trace_fd, off_t pos)
367 {
368 	lseek(trace_fd, pos, SEEK_SET);
369 }
370 
371 static void record__aio_mmap_read_sync(struct record *rec)
372 {
373 	int i;
374 	struct evlist *evlist = rec->evlist;
375 	struct mmap *maps = evlist->mmap;
376 
377 	if (!record__aio_enabled(rec))
378 		return;
379 
380 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
381 		struct mmap *map = &maps[i];
382 
383 		if (map->core.base)
384 			record__aio_sync(map, true);
385 	}
386 }
387 
388 static int nr_cblocks_default = 1;
389 static int nr_cblocks_max = 4;
390 
391 static int record__aio_parse(const struct option *opt,
392 			     const char *str,
393 			     int unset)
394 {
395 	struct record_opts *opts = (struct record_opts *)opt->value;
396 
397 	if (unset) {
398 		opts->nr_cblocks = 0;
399 	} else {
400 		if (str)
401 			opts->nr_cblocks = strtol(str, NULL, 0);
402 		if (!opts->nr_cblocks)
403 			opts->nr_cblocks = nr_cblocks_default;
404 	}
405 
406 	return 0;
407 }
408 #else /* HAVE_AIO_SUPPORT */
409 static int nr_cblocks_max = 0;
410 
411 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
412 			    off_t *off __maybe_unused)
413 {
414 	return -1;
415 }
416 
417 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
418 {
419 	return -1;
420 }
421 
422 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
423 {
424 }
425 
426 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
427 {
428 }
429 #endif
430 
431 static int record__aio_enabled(struct record *rec)
432 {
433 	return rec->opts.nr_cblocks > 0;
434 }
435 
436 #define MMAP_FLUSH_DEFAULT 1
437 static int record__mmap_flush_parse(const struct option *opt,
438 				    const char *str,
439 				    int unset)
440 {
441 	int flush_max;
442 	struct record_opts *opts = (struct record_opts *)opt->value;
443 	static struct parse_tag tags[] = {
444 			{ .tag  = 'B', .mult = 1       },
445 			{ .tag  = 'K', .mult = 1 << 10 },
446 			{ .tag  = 'M', .mult = 1 << 20 },
447 			{ .tag  = 'G', .mult = 1 << 30 },
448 			{ .tag  = 0 },
449 	};
450 
451 	if (unset)
452 		return 0;
453 
454 	if (str) {
455 		opts->mmap_flush = parse_tag_value(str, tags);
456 		if (opts->mmap_flush == (int)-1)
457 			opts->mmap_flush = strtol(str, NULL, 0);
458 	}
459 
460 	if (!opts->mmap_flush)
461 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
462 
463 	flush_max = evlist__mmap_size(opts->mmap_pages);
464 	flush_max /= 4;
465 	if (opts->mmap_flush > flush_max)
466 		opts->mmap_flush = flush_max;
467 
468 	return 0;
469 }
470 
471 #ifdef HAVE_ZSTD_SUPPORT
472 static unsigned int comp_level_default = 1;
473 
474 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
475 {
476 	struct record_opts *opts = opt->value;
477 
478 	if (unset) {
479 		opts->comp_level = 0;
480 	} else {
481 		if (str)
482 			opts->comp_level = strtol(str, NULL, 0);
483 		if (!opts->comp_level)
484 			opts->comp_level = comp_level_default;
485 	}
486 
487 	return 0;
488 }
489 #endif
490 static unsigned int comp_level_max = 22;
491 
492 static int record__comp_enabled(struct record *rec)
493 {
494 	return rec->opts.comp_level > 0;
495 }
496 
497 static int process_synthesized_event(struct perf_tool *tool,
498 				     union perf_event *event,
499 				     struct perf_sample *sample __maybe_unused,
500 				     struct machine *machine __maybe_unused)
501 {
502 	struct record *rec = container_of(tool, struct record, tool);
503 	return record__write(rec, NULL, event, event->header.size);
504 }
505 
506 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
507 {
508 	struct record *rec = to;
509 
510 	if (record__comp_enabled(rec)) {
511 		size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
512 		bf   = map->data;
513 	}
514 
515 	rec->samples++;
516 	return record__write(rec, map, bf, size);
517 }
518 
519 static volatile int signr = -1;
520 static volatile int child_finished;
521 
522 static void sig_handler(int sig)
523 {
524 	if (sig == SIGCHLD)
525 		child_finished = 1;
526 	else
527 		signr = sig;
528 
529 	done = 1;
530 }
531 
532 static void sigsegv_handler(int sig)
533 {
534 	perf_hooks__recover();
535 	sighandler_dump_stack(sig);
536 }
537 
538 static void record__sig_exit(void)
539 {
540 	if (signr == -1)
541 		return;
542 
543 	signal(signr, SIG_DFL);
544 	raise(signr);
545 }
546 
547 #ifdef HAVE_AUXTRACE_SUPPORT
548 
549 static int record__process_auxtrace(struct perf_tool *tool,
550 				    struct mmap *map,
551 				    union perf_event *event, void *data1,
552 				    size_t len1, void *data2, size_t len2)
553 {
554 	struct record *rec = container_of(tool, struct record, tool);
555 	struct perf_data *data = &rec->data;
556 	size_t padding;
557 	u8 pad[8] = {0};
558 
559 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
560 		off_t file_offset;
561 		int fd = perf_data__fd(data);
562 		int err;
563 
564 		file_offset = lseek(fd, 0, SEEK_CUR);
565 		if (file_offset == -1)
566 			return -1;
567 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
568 						     event, file_offset);
569 		if (err)
570 			return err;
571 	}
572 
573 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
574 	padding = (len1 + len2) & 7;
575 	if (padding)
576 		padding = 8 - padding;
577 
578 	record__write(rec, map, event, event->header.size);
579 	record__write(rec, map, data1, len1);
580 	if (len2)
581 		record__write(rec, map, data2, len2);
582 	record__write(rec, map, &pad, padding);
583 
584 	return 0;
585 }
586 
587 static int record__auxtrace_mmap_read(struct record *rec,
588 				      struct mmap *map)
589 {
590 	int ret;
591 
592 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
593 				  record__process_auxtrace);
594 	if (ret < 0)
595 		return ret;
596 
597 	if (ret)
598 		rec->samples++;
599 
600 	return 0;
601 }
602 
603 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
604 					       struct mmap *map)
605 {
606 	int ret;
607 
608 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
609 					   record__process_auxtrace,
610 					   rec->opts.auxtrace_snapshot_size);
611 	if (ret < 0)
612 		return ret;
613 
614 	if (ret)
615 		rec->samples++;
616 
617 	return 0;
618 }
619 
620 static int record__auxtrace_read_snapshot_all(struct record *rec)
621 {
622 	int i;
623 	int rc = 0;
624 
625 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
626 		struct mmap *map = &rec->evlist->mmap[i];
627 
628 		if (!map->auxtrace_mmap.base)
629 			continue;
630 
631 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
632 			rc = -1;
633 			goto out;
634 		}
635 	}
636 out:
637 	return rc;
638 }
639 
640 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
641 {
642 	pr_debug("Recording AUX area tracing snapshot\n");
643 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
644 		trigger_error(&auxtrace_snapshot_trigger);
645 	} else {
646 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
647 			trigger_error(&auxtrace_snapshot_trigger);
648 		else
649 			trigger_ready(&auxtrace_snapshot_trigger);
650 	}
651 }
652 
653 static int record__auxtrace_snapshot_exit(struct record *rec)
654 {
655 	if (trigger_is_error(&auxtrace_snapshot_trigger))
656 		return 0;
657 
658 	if (!auxtrace_record__snapshot_started &&
659 	    auxtrace_record__snapshot_start(rec->itr))
660 		return -1;
661 
662 	record__read_auxtrace_snapshot(rec, true);
663 	if (trigger_is_error(&auxtrace_snapshot_trigger))
664 		return -1;
665 
666 	return 0;
667 }
668 
669 static int record__auxtrace_init(struct record *rec)
670 {
671 	int err;
672 
673 	if (!rec->itr) {
674 		rec->itr = auxtrace_record__init(rec->evlist, &err);
675 		if (err)
676 			return err;
677 	}
678 
679 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
680 					      rec->opts.auxtrace_snapshot_opts);
681 	if (err)
682 		return err;
683 
684 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
685 					    rec->opts.auxtrace_sample_opts);
686 	if (err)
687 		return err;
688 
689 	return auxtrace_parse_filters(rec->evlist);
690 }
691 
692 #else
693 
694 static inline
695 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
696 			       struct mmap *map __maybe_unused)
697 {
698 	return 0;
699 }
700 
701 static inline
702 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
703 				    bool on_exit __maybe_unused)
704 {
705 }
706 
707 static inline
708 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
709 {
710 	return 0;
711 }
712 
713 static inline
714 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
715 {
716 	return 0;
717 }
718 
719 static int record__auxtrace_init(struct record *rec __maybe_unused)
720 {
721 	return 0;
722 }
723 
724 #endif
725 
726 static bool record__kcore_readable(struct machine *machine)
727 {
728 	char kcore[PATH_MAX];
729 	int fd;
730 
731 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
732 
733 	fd = open(kcore, O_RDONLY);
734 	if (fd < 0)
735 		return false;
736 
737 	close(fd);
738 
739 	return true;
740 }
741 
742 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
743 {
744 	char from_dir[PATH_MAX];
745 	char kcore_dir[PATH_MAX];
746 	int ret;
747 
748 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
749 
750 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
751 	if (ret)
752 		return ret;
753 
754 	return kcore_copy(from_dir, kcore_dir);
755 }
756 
757 static int record__mmap_evlist(struct record *rec,
758 			       struct evlist *evlist)
759 {
760 	struct record_opts *opts = &rec->opts;
761 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
762 				  opts->auxtrace_sample_mode;
763 	char msg[512];
764 
765 	if (opts->affinity != PERF_AFFINITY_SYS)
766 		cpu__setup_cpunode_map();
767 
768 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
769 				 opts->auxtrace_mmap_pages,
770 				 auxtrace_overwrite,
771 				 opts->nr_cblocks, opts->affinity,
772 				 opts->mmap_flush, opts->comp_level) < 0) {
773 		if (errno == EPERM) {
774 			pr_err("Permission error mapping pages.\n"
775 			       "Consider increasing "
776 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
777 			       "or try again with a smaller value of -m/--mmap_pages.\n"
778 			       "(current value: %u,%u)\n",
779 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
780 			return -errno;
781 		} else {
782 			pr_err("failed to mmap with %d (%s)\n", errno,
783 				str_error_r(errno, msg, sizeof(msg)));
784 			if (errno)
785 				return -errno;
786 			else
787 				return -EINVAL;
788 		}
789 	}
790 	return 0;
791 }
792 
793 static int record__mmap(struct record *rec)
794 {
795 	return record__mmap_evlist(rec, rec->evlist);
796 }
797 
798 static int record__open(struct record *rec)
799 {
800 	char msg[BUFSIZ];
801 	struct evsel *pos;
802 	struct evlist *evlist = rec->evlist;
803 	struct perf_session *session = rec->session;
804 	struct record_opts *opts = &rec->opts;
805 	int rc = 0;
806 
807 	/*
808 	 * For initial_delay we need to add a dummy event so that we can track
809 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
810 	 * real events, the ones asked by the user.
811 	 */
812 	if (opts->initial_delay) {
813 		if (perf_evlist__add_dummy(evlist))
814 			return -ENOMEM;
815 
816 		pos = evlist__first(evlist);
817 		pos->tracking = 0;
818 		pos = evlist__last(evlist);
819 		pos->tracking = 1;
820 		pos->core.attr.enable_on_exec = 1;
821 	}
822 
823 	perf_evlist__config(evlist, opts, &callchain_param);
824 
825 	evlist__for_each_entry(evlist, pos) {
826 try_again:
827 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
828 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
829 				if (verbose > 0)
830 					ui__warning("%s\n", msg);
831 				goto try_again;
832 			}
833 			if ((errno == EINVAL || errno == EBADF) &&
834 			    pos->leader != pos &&
835 			    pos->weak_group) {
836 			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
837 				goto try_again;
838 			}
839 			rc = -errno;
840 			perf_evsel__open_strerror(pos, &opts->target,
841 						  errno, msg, sizeof(msg));
842 			ui__error("%s\n", msg);
843 			goto out;
844 		}
845 
846 		pos->supported = true;
847 	}
848 
849 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
850 		pr_warning(
851 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
852 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
853 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
854 "file is not found in the buildid cache or in the vmlinux path.\n\n"
855 "Samples in kernel modules won't be resolved at all.\n\n"
856 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
857 "even with a suitable vmlinux or kallsyms file.\n\n");
858 	}
859 
860 	if (perf_evlist__apply_filters(evlist, &pos)) {
861 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
862 			pos->filter, perf_evsel__name(pos), errno,
863 			str_error_r(errno, msg, sizeof(msg)));
864 		rc = -1;
865 		goto out;
866 	}
867 
868 	rc = record__mmap(rec);
869 	if (rc)
870 		goto out;
871 
872 	session->evlist = evlist;
873 	perf_session__set_id_hdr_size(session);
874 out:
875 	return rc;
876 }
877 
878 static int process_sample_event(struct perf_tool *tool,
879 				union perf_event *event,
880 				struct perf_sample *sample,
881 				struct evsel *evsel,
882 				struct machine *machine)
883 {
884 	struct record *rec = container_of(tool, struct record, tool);
885 
886 	if (rec->evlist->first_sample_time == 0)
887 		rec->evlist->first_sample_time = sample->time;
888 
889 	rec->evlist->last_sample_time = sample->time;
890 
891 	if (rec->buildid_all)
892 		return 0;
893 
894 	rec->samples++;
895 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
896 }
897 
898 static int process_buildids(struct record *rec)
899 {
900 	struct perf_session *session = rec->session;
901 
902 	if (perf_data__size(&rec->data) == 0)
903 		return 0;
904 
905 	/*
906 	 * During this process, it'll load kernel map and replace the
907 	 * dso->long_name to a real pathname it found.  In this case
908 	 * we prefer the vmlinux path like
909 	 *   /lib/modules/3.16.4/build/vmlinux
910 	 *
911 	 * rather than build-id path (in debug directory).
912 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
913 	 */
914 	symbol_conf.ignore_vmlinux_buildid = true;
915 
916 	/*
917 	 * If --buildid-all is given, it marks all DSO regardless of hits,
918 	 * so no need to process samples. But if timestamp_boundary is enabled,
919 	 * it still needs to walk on all samples to get the timestamps of
920 	 * first/last samples.
921 	 */
922 	if (rec->buildid_all && !rec->timestamp_boundary)
923 		rec->tool.sample = NULL;
924 
925 	return perf_session__process_events(session);
926 }
927 
928 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
929 {
930 	int err;
931 	struct perf_tool *tool = data;
932 	/*
933 	 *As for guest kernel when processing subcommand record&report,
934 	 *we arrange module mmap prior to guest kernel mmap and trigger
935 	 *a preload dso because default guest module symbols are loaded
936 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
937 	 *method is used to avoid symbol missing when the first addr is
938 	 *in module instead of in guest kernel.
939 	 */
940 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
941 					     machine);
942 	if (err < 0)
943 		pr_err("Couldn't record guest kernel [%d]'s reference"
944 		       " relocation symbol.\n", machine->pid);
945 
946 	/*
947 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
948 	 * have no _text sometimes.
949 	 */
950 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
951 						 machine);
952 	if (err < 0)
953 		pr_err("Couldn't record guest kernel [%d]'s reference"
954 		       " relocation symbol.\n", machine->pid);
955 }
956 
957 static struct perf_event_header finished_round_event = {
958 	.size = sizeof(struct perf_event_header),
959 	.type = PERF_RECORD_FINISHED_ROUND,
960 };
961 
962 static void record__adjust_affinity(struct record *rec, struct mmap *map)
963 {
964 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
965 	    !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
966 			  rec->affinity_mask.nbits)) {
967 		bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
968 		bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
969 			  map->affinity_mask.bits, rec->affinity_mask.nbits);
970 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
971 				  (cpu_set_t *)rec->affinity_mask.bits);
972 		if (verbose == 2)
973 			mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
974 	}
975 }
976 
977 static size_t process_comp_header(void *record, size_t increment)
978 {
979 	struct perf_record_compressed *event = record;
980 	size_t size = sizeof(*event);
981 
982 	if (increment) {
983 		event->header.size += increment;
984 		return increment;
985 	}
986 
987 	event->header.type = PERF_RECORD_COMPRESSED;
988 	event->header.size = size;
989 
990 	return size;
991 }
992 
993 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
994 			    void *src, size_t src_size)
995 {
996 	size_t compressed;
997 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
998 
999 	compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1000 						     max_record_size, process_comp_header);
1001 
1002 	session->bytes_transferred += src_size;
1003 	session->bytes_compressed  += compressed;
1004 
1005 	return compressed;
1006 }
1007 
1008 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1009 				    bool overwrite, bool synch)
1010 {
1011 	u64 bytes_written = rec->bytes_written;
1012 	int i;
1013 	int rc = 0;
1014 	struct mmap *maps;
1015 	int trace_fd = rec->data.file.fd;
1016 	off_t off = 0;
1017 
1018 	if (!evlist)
1019 		return 0;
1020 
1021 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1022 	if (!maps)
1023 		return 0;
1024 
1025 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1026 		return 0;
1027 
1028 	if (record__aio_enabled(rec))
1029 		off = record__aio_get_pos(trace_fd);
1030 
1031 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
1032 		u64 flush = 0;
1033 		struct mmap *map = &maps[i];
1034 
1035 		if (map->core.base) {
1036 			record__adjust_affinity(rec, map);
1037 			if (synch) {
1038 				flush = map->core.flush;
1039 				map->core.flush = 1;
1040 			}
1041 			if (!record__aio_enabled(rec)) {
1042 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1043 					if (synch)
1044 						map->core.flush = flush;
1045 					rc = -1;
1046 					goto out;
1047 				}
1048 			} else {
1049 				if (record__aio_push(rec, map, &off) < 0) {
1050 					record__aio_set_pos(trace_fd, off);
1051 					if (synch)
1052 						map->core.flush = flush;
1053 					rc = -1;
1054 					goto out;
1055 				}
1056 			}
1057 			if (synch)
1058 				map->core.flush = flush;
1059 		}
1060 
1061 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1062 		    !rec->opts.auxtrace_sample_mode &&
1063 		    record__auxtrace_mmap_read(rec, map) != 0) {
1064 			rc = -1;
1065 			goto out;
1066 		}
1067 	}
1068 
1069 	if (record__aio_enabled(rec))
1070 		record__aio_set_pos(trace_fd, off);
1071 
1072 	/*
1073 	 * Mark the round finished in case we wrote
1074 	 * at least one event.
1075 	 */
1076 	if (bytes_written != rec->bytes_written)
1077 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1078 
1079 	if (overwrite)
1080 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1081 out:
1082 	return rc;
1083 }
1084 
1085 static int record__mmap_read_all(struct record *rec, bool synch)
1086 {
1087 	int err;
1088 
1089 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1090 	if (err)
1091 		return err;
1092 
1093 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1094 }
1095 
1096 static void record__init_features(struct record *rec)
1097 {
1098 	struct perf_session *session = rec->session;
1099 	int feat;
1100 
1101 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1102 		perf_header__set_feat(&session->header, feat);
1103 
1104 	if (rec->no_buildid)
1105 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1106 
1107 	if (!have_tracepoints(&rec->evlist->core.entries))
1108 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1109 
1110 	if (!rec->opts.branch_stack)
1111 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1112 
1113 	if (!rec->opts.full_auxtrace)
1114 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1115 
1116 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1117 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1118 
1119 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1120 	if (!record__comp_enabled(rec))
1121 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1122 
1123 	perf_header__clear_feat(&session->header, HEADER_STAT);
1124 }
1125 
1126 static void
1127 record__finish_output(struct record *rec)
1128 {
1129 	struct perf_data *data = &rec->data;
1130 	int fd = perf_data__fd(data);
1131 
1132 	if (data->is_pipe)
1133 		return;
1134 
1135 	rec->session->header.data_size += rec->bytes_written;
1136 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1137 
1138 	if (!rec->no_buildid) {
1139 		process_buildids(rec);
1140 
1141 		if (rec->buildid_all)
1142 			dsos__hit_all(rec->session);
1143 	}
1144 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1145 
1146 	return;
1147 }
1148 
1149 static int record__synthesize_workload(struct record *rec, bool tail)
1150 {
1151 	int err;
1152 	struct perf_thread_map *thread_map;
1153 
1154 	if (rec->opts.tail_synthesize != tail)
1155 		return 0;
1156 
1157 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1158 	if (thread_map == NULL)
1159 		return -1;
1160 
1161 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1162 						 process_synthesized_event,
1163 						 &rec->session->machines.host,
1164 						 rec->opts.sample_address);
1165 	perf_thread_map__put(thread_map);
1166 	return err;
1167 }
1168 
1169 static int record__synthesize(struct record *rec, bool tail);
1170 
1171 static int
1172 record__switch_output(struct record *rec, bool at_exit)
1173 {
1174 	struct perf_data *data = &rec->data;
1175 	int fd, err;
1176 	char *new_filename;
1177 
1178 	/* Same Size:      "2015122520103046"*/
1179 	char timestamp[] = "InvalidTimestamp";
1180 
1181 	record__aio_mmap_read_sync(rec);
1182 
1183 	record__synthesize(rec, true);
1184 	if (target__none(&rec->opts.target))
1185 		record__synthesize_workload(rec, true);
1186 
1187 	rec->samples = 0;
1188 	record__finish_output(rec);
1189 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1190 	if (err) {
1191 		pr_err("Failed to get current timestamp\n");
1192 		return -EINVAL;
1193 	}
1194 
1195 	fd = perf_data__switch(data, timestamp,
1196 				    rec->session->header.data_offset,
1197 				    at_exit, &new_filename);
1198 	if (fd >= 0 && !at_exit) {
1199 		rec->bytes_written = 0;
1200 		rec->session->header.data_size = 0;
1201 	}
1202 
1203 	if (!quiet)
1204 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1205 			data->path, timestamp);
1206 
1207 	if (rec->switch_output.num_files) {
1208 		int n = rec->switch_output.cur_file + 1;
1209 
1210 		if (n >= rec->switch_output.num_files)
1211 			n = 0;
1212 		rec->switch_output.cur_file = n;
1213 		if (rec->switch_output.filenames[n]) {
1214 			remove(rec->switch_output.filenames[n]);
1215 			zfree(&rec->switch_output.filenames[n]);
1216 		}
1217 		rec->switch_output.filenames[n] = new_filename;
1218 	} else {
1219 		free(new_filename);
1220 	}
1221 
1222 	/* Output tracking events */
1223 	if (!at_exit) {
1224 		record__synthesize(rec, false);
1225 
1226 		/*
1227 		 * In 'perf record --switch-output' without -a,
1228 		 * record__synthesize() in record__switch_output() won't
1229 		 * generate tracking events because there's no thread_map
1230 		 * in evlist. Which causes newly created perf.data doesn't
1231 		 * contain map and comm information.
1232 		 * Create a fake thread_map and directly call
1233 		 * perf_event__synthesize_thread_map() for those events.
1234 		 */
1235 		if (target__none(&rec->opts.target))
1236 			record__synthesize_workload(rec, false);
1237 	}
1238 	return fd;
1239 }
1240 
1241 static volatile int workload_exec_errno;
1242 
1243 /*
1244  * perf_evlist__prepare_workload will send a SIGUSR1
1245  * if the fork fails, since we asked by setting its
1246  * want_signal to true.
1247  */
1248 static void workload_exec_failed_signal(int signo __maybe_unused,
1249 					siginfo_t *info,
1250 					void *ucontext __maybe_unused)
1251 {
1252 	workload_exec_errno = info->si_value.sival_int;
1253 	done = 1;
1254 	child_finished = 1;
1255 }
1256 
1257 static void snapshot_sig_handler(int sig);
1258 static void alarm_sig_handler(int sig);
1259 
1260 static const struct perf_event_mmap_page *
1261 perf_evlist__pick_pc(struct evlist *evlist)
1262 {
1263 	if (evlist) {
1264 		if (evlist->mmap && evlist->mmap[0].core.base)
1265 			return evlist->mmap[0].core.base;
1266 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1267 			return evlist->overwrite_mmap[0].core.base;
1268 	}
1269 	return NULL;
1270 }
1271 
1272 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1273 {
1274 	const struct perf_event_mmap_page *pc;
1275 
1276 	pc = perf_evlist__pick_pc(rec->evlist);
1277 	if (pc)
1278 		return pc;
1279 	return NULL;
1280 }
1281 
1282 static int record__synthesize(struct record *rec, bool tail)
1283 {
1284 	struct perf_session *session = rec->session;
1285 	struct machine *machine = &session->machines.host;
1286 	struct perf_data *data = &rec->data;
1287 	struct record_opts *opts = &rec->opts;
1288 	struct perf_tool *tool = &rec->tool;
1289 	int fd = perf_data__fd(data);
1290 	int err = 0;
1291 
1292 	if (rec->opts.tail_synthesize != tail)
1293 		return 0;
1294 
1295 	if (data->is_pipe) {
1296 		/*
1297 		 * We need to synthesize events first, because some
1298 		 * features works on top of them (on report side).
1299 		 */
1300 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1301 						   process_synthesized_event);
1302 		if (err < 0) {
1303 			pr_err("Couldn't synthesize attrs.\n");
1304 			goto out;
1305 		}
1306 
1307 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1308 						      process_synthesized_event);
1309 		if (err < 0) {
1310 			pr_err("Couldn't synthesize features.\n");
1311 			return err;
1312 		}
1313 
1314 		if (have_tracepoints(&rec->evlist->core.entries)) {
1315 			/*
1316 			 * FIXME err <= 0 here actually means that
1317 			 * there were no tracepoints so its not really
1318 			 * an error, just that we don't need to
1319 			 * synthesize anything.  We really have to
1320 			 * return this more properly and also
1321 			 * propagate errors that now are calling die()
1322 			 */
1323 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1324 								  process_synthesized_event);
1325 			if (err <= 0) {
1326 				pr_err("Couldn't record tracing data.\n");
1327 				goto out;
1328 			}
1329 			rec->bytes_written += err;
1330 		}
1331 	}
1332 
1333 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1334 					  process_synthesized_event, machine);
1335 	if (err)
1336 		goto out;
1337 
1338 	/* Synthesize id_index before auxtrace_info */
1339 	if (rec->opts.auxtrace_sample_mode) {
1340 		err = perf_event__synthesize_id_index(tool,
1341 						      process_synthesized_event,
1342 						      session->evlist, machine);
1343 		if (err)
1344 			goto out;
1345 	}
1346 
1347 	if (rec->opts.full_auxtrace) {
1348 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1349 					session, process_synthesized_event);
1350 		if (err)
1351 			goto out;
1352 	}
1353 
1354 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1355 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1356 							 machine);
1357 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1358 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1359 				   "Check /proc/kallsyms permission or run as root.\n");
1360 
1361 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1362 						     machine);
1363 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1364 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1365 				   "Check /proc/modules permission or run as root.\n");
1366 	}
1367 
1368 	if (perf_guest) {
1369 		machines__process_guests(&session->machines,
1370 					 perf_event__synthesize_guest_os, tool);
1371 	}
1372 
1373 	err = perf_event__synthesize_extra_attr(&rec->tool,
1374 						rec->evlist,
1375 						process_synthesized_event,
1376 						data->is_pipe);
1377 	if (err)
1378 		goto out;
1379 
1380 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1381 						 process_synthesized_event,
1382 						NULL);
1383 	if (err < 0) {
1384 		pr_err("Couldn't synthesize thread map.\n");
1385 		return err;
1386 	}
1387 
1388 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1389 					     process_synthesized_event, NULL);
1390 	if (err < 0) {
1391 		pr_err("Couldn't synthesize cpu map.\n");
1392 		return err;
1393 	}
1394 
1395 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1396 						machine, opts);
1397 	if (err < 0)
1398 		pr_warning("Couldn't synthesize bpf events.\n");
1399 
1400 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1401 					    process_synthesized_event, opts->sample_address,
1402 					    1);
1403 out:
1404 	return err;
1405 }
1406 
1407 static int __cmd_record(struct record *rec, int argc, const char **argv)
1408 {
1409 	int err;
1410 	int status = 0;
1411 	unsigned long waking = 0;
1412 	const bool forks = argc > 0;
1413 	struct perf_tool *tool = &rec->tool;
1414 	struct record_opts *opts = &rec->opts;
1415 	struct perf_data *data = &rec->data;
1416 	struct perf_session *session;
1417 	bool disabled = false, draining = false;
1418 	struct evlist *sb_evlist = NULL;
1419 	int fd;
1420 	float ratio = 0;
1421 
1422 	atexit(record__sig_exit);
1423 	signal(SIGCHLD, sig_handler);
1424 	signal(SIGINT, sig_handler);
1425 	signal(SIGTERM, sig_handler);
1426 	signal(SIGSEGV, sigsegv_handler);
1427 
1428 	if (rec->opts.record_namespaces)
1429 		tool->namespace_events = true;
1430 
1431 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1432 		signal(SIGUSR2, snapshot_sig_handler);
1433 		if (rec->opts.auxtrace_snapshot_mode)
1434 			trigger_on(&auxtrace_snapshot_trigger);
1435 		if (rec->switch_output.enabled)
1436 			trigger_on(&switch_output_trigger);
1437 	} else {
1438 		signal(SIGUSR2, SIG_IGN);
1439 	}
1440 
1441 	session = perf_session__new(data, false, tool);
1442 	if (IS_ERR(session)) {
1443 		pr_err("Perf session creation failed.\n");
1444 		return PTR_ERR(session);
1445 	}
1446 
1447 	fd = perf_data__fd(data);
1448 	rec->session = session;
1449 
1450 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1451 		pr_err("Compression initialization failed.\n");
1452 		return -1;
1453 	}
1454 
1455 	session->header.env.comp_type  = PERF_COMP_ZSTD;
1456 	session->header.env.comp_level = rec->opts.comp_level;
1457 
1458 	if (rec->opts.kcore &&
1459 	    !record__kcore_readable(&session->machines.host)) {
1460 		pr_err("ERROR: kcore is not readable.\n");
1461 		return -1;
1462 	}
1463 
1464 	record__init_features(rec);
1465 
1466 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1467 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1468 
1469 	if (forks) {
1470 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1471 						    argv, data->is_pipe,
1472 						    workload_exec_failed_signal);
1473 		if (err < 0) {
1474 			pr_err("Couldn't run the workload!\n");
1475 			status = err;
1476 			goto out_delete_session;
1477 		}
1478 	}
1479 
1480 	/*
1481 	 * If we have just single event and are sending data
1482 	 * through pipe, we need to force the ids allocation,
1483 	 * because we synthesize event name through the pipe
1484 	 * and need the id for that.
1485 	 */
1486 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1487 		rec->opts.sample_id = true;
1488 
1489 	if (record__open(rec) != 0) {
1490 		err = -1;
1491 		goto out_child;
1492 	}
1493 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1494 
1495 	if (rec->opts.kcore) {
1496 		err = record__kcore_copy(&session->machines.host, data);
1497 		if (err) {
1498 			pr_err("ERROR: Failed to copy kcore\n");
1499 			goto out_child;
1500 		}
1501 	}
1502 
1503 	err = bpf__apply_obj_config();
1504 	if (err) {
1505 		char errbuf[BUFSIZ];
1506 
1507 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1508 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1509 			 errbuf);
1510 		goto out_child;
1511 	}
1512 
1513 	/*
1514 	 * Normally perf_session__new would do this, but it doesn't have the
1515 	 * evlist.
1516 	 */
1517 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1518 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1519 		rec->tool.ordered_events = false;
1520 	}
1521 
1522 	if (!rec->evlist->nr_groups)
1523 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1524 
1525 	if (data->is_pipe) {
1526 		err = perf_header__write_pipe(fd);
1527 		if (err < 0)
1528 			goto out_child;
1529 	} else {
1530 		err = perf_session__write_header(session, rec->evlist, fd, false);
1531 		if (err < 0)
1532 			goto out_child;
1533 	}
1534 
1535 	if (!rec->no_buildid
1536 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1537 		pr_err("Couldn't generate buildids. "
1538 		       "Use --no-buildid to profile anyway.\n");
1539 		err = -1;
1540 		goto out_child;
1541 	}
1542 
1543 	if (!opts->no_bpf_event)
1544 		bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1545 
1546 	if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1547 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1548 		opts->no_bpf_event = true;
1549 	}
1550 
1551 	err = record__synthesize(rec, false);
1552 	if (err < 0)
1553 		goto out_child;
1554 
1555 	if (rec->realtime_prio) {
1556 		struct sched_param param;
1557 
1558 		param.sched_priority = rec->realtime_prio;
1559 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1560 			pr_err("Could not set realtime priority.\n");
1561 			err = -1;
1562 			goto out_child;
1563 		}
1564 	}
1565 
1566 	/*
1567 	 * When perf is starting the traced process, all the events
1568 	 * (apart from group members) have enable_on_exec=1 set,
1569 	 * so don't spoil it by prematurely enabling them.
1570 	 */
1571 	if (!target__none(&opts->target) && !opts->initial_delay)
1572 		evlist__enable(rec->evlist);
1573 
1574 	/*
1575 	 * Let the child rip
1576 	 */
1577 	if (forks) {
1578 		struct machine *machine = &session->machines.host;
1579 		union perf_event *event;
1580 		pid_t tgid;
1581 
1582 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1583 		if (event == NULL) {
1584 			err = -ENOMEM;
1585 			goto out_child;
1586 		}
1587 
1588 		/*
1589 		 * Some H/W events are generated before COMM event
1590 		 * which is emitted during exec(), so perf script
1591 		 * cannot see a correct process name for those events.
1592 		 * Synthesize COMM event to prevent it.
1593 		 */
1594 		tgid = perf_event__synthesize_comm(tool, event,
1595 						   rec->evlist->workload.pid,
1596 						   process_synthesized_event,
1597 						   machine);
1598 		free(event);
1599 
1600 		if (tgid == -1)
1601 			goto out_child;
1602 
1603 		event = malloc(sizeof(event->namespaces) +
1604 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1605 			       machine->id_hdr_size);
1606 		if (event == NULL) {
1607 			err = -ENOMEM;
1608 			goto out_child;
1609 		}
1610 
1611 		/*
1612 		 * Synthesize NAMESPACES event for the command specified.
1613 		 */
1614 		perf_event__synthesize_namespaces(tool, event,
1615 						  rec->evlist->workload.pid,
1616 						  tgid, process_synthesized_event,
1617 						  machine);
1618 		free(event);
1619 
1620 		perf_evlist__start_workload(rec->evlist);
1621 	}
1622 
1623 	if (opts->initial_delay) {
1624 		usleep(opts->initial_delay * USEC_PER_MSEC);
1625 		evlist__enable(rec->evlist);
1626 	}
1627 
1628 	trigger_ready(&auxtrace_snapshot_trigger);
1629 	trigger_ready(&switch_output_trigger);
1630 	perf_hooks__invoke_record_start();
1631 	for (;;) {
1632 		unsigned long long hits = rec->samples;
1633 
1634 		/*
1635 		 * rec->evlist->bkw_mmap_state is possible to be
1636 		 * BKW_MMAP_EMPTY here: when done == true and
1637 		 * hits != rec->samples in previous round.
1638 		 *
1639 		 * perf_evlist__toggle_bkw_mmap ensure we never
1640 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1641 		 */
1642 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1643 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1644 
1645 		if (record__mmap_read_all(rec, false) < 0) {
1646 			trigger_error(&auxtrace_snapshot_trigger);
1647 			trigger_error(&switch_output_trigger);
1648 			err = -1;
1649 			goto out_child;
1650 		}
1651 
1652 		if (auxtrace_record__snapshot_started) {
1653 			auxtrace_record__snapshot_started = 0;
1654 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1655 				record__read_auxtrace_snapshot(rec, false);
1656 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1657 				pr_err("AUX area tracing snapshot failed\n");
1658 				err = -1;
1659 				goto out_child;
1660 			}
1661 		}
1662 
1663 		if (trigger_is_hit(&switch_output_trigger)) {
1664 			/*
1665 			 * If switch_output_trigger is hit, the data in
1666 			 * overwritable ring buffer should have been collected,
1667 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1668 			 *
1669 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1670 			 * record__mmap_read_all() didn't collect data from
1671 			 * overwritable ring buffer. Read again.
1672 			 */
1673 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1674 				continue;
1675 			trigger_ready(&switch_output_trigger);
1676 
1677 			/*
1678 			 * Reenable events in overwrite ring buffer after
1679 			 * record__mmap_read_all(): we should have collected
1680 			 * data from it.
1681 			 */
1682 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1683 
1684 			if (!quiet)
1685 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1686 					waking);
1687 			waking = 0;
1688 			fd = record__switch_output(rec, false);
1689 			if (fd < 0) {
1690 				pr_err("Failed to switch to new file\n");
1691 				trigger_error(&switch_output_trigger);
1692 				err = fd;
1693 				goto out_child;
1694 			}
1695 
1696 			/* re-arm the alarm */
1697 			if (rec->switch_output.time)
1698 				alarm(rec->switch_output.time);
1699 		}
1700 
1701 		if (hits == rec->samples) {
1702 			if (done || draining)
1703 				break;
1704 			err = evlist__poll(rec->evlist, -1);
1705 			/*
1706 			 * Propagate error, only if there's any. Ignore positive
1707 			 * number of returned events and interrupt error.
1708 			 */
1709 			if (err > 0 || (err < 0 && errno == EINTR))
1710 				err = 0;
1711 			waking++;
1712 
1713 			if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1714 				draining = true;
1715 		}
1716 
1717 		/*
1718 		 * When perf is starting the traced process, at the end events
1719 		 * die with the process and we wait for that. Thus no need to
1720 		 * disable events in this case.
1721 		 */
1722 		if (done && !disabled && !target__none(&opts->target)) {
1723 			trigger_off(&auxtrace_snapshot_trigger);
1724 			evlist__disable(rec->evlist);
1725 			disabled = true;
1726 		}
1727 	}
1728 
1729 	trigger_off(&auxtrace_snapshot_trigger);
1730 	trigger_off(&switch_output_trigger);
1731 
1732 	if (opts->auxtrace_snapshot_on_exit)
1733 		record__auxtrace_snapshot_exit(rec);
1734 
1735 	if (forks && workload_exec_errno) {
1736 		char msg[STRERR_BUFSIZE];
1737 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1738 		pr_err("Workload failed: %s\n", emsg);
1739 		err = -1;
1740 		goto out_child;
1741 	}
1742 
1743 	if (!quiet)
1744 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1745 
1746 	if (target__none(&rec->opts.target))
1747 		record__synthesize_workload(rec, true);
1748 
1749 out_child:
1750 	record__mmap_read_all(rec, true);
1751 	record__aio_mmap_read_sync(rec);
1752 
1753 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1754 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1755 		session->header.env.comp_ratio = ratio + 0.5;
1756 	}
1757 
1758 	if (forks) {
1759 		int exit_status;
1760 
1761 		if (!child_finished)
1762 			kill(rec->evlist->workload.pid, SIGTERM);
1763 
1764 		wait(&exit_status);
1765 
1766 		if (err < 0)
1767 			status = err;
1768 		else if (WIFEXITED(exit_status))
1769 			status = WEXITSTATUS(exit_status);
1770 		else if (WIFSIGNALED(exit_status))
1771 			signr = WTERMSIG(exit_status);
1772 	} else
1773 		status = err;
1774 
1775 	record__synthesize(rec, true);
1776 	/* this will be recalculated during process_buildids() */
1777 	rec->samples = 0;
1778 
1779 	if (!err) {
1780 		if (!rec->timestamp_filename) {
1781 			record__finish_output(rec);
1782 		} else {
1783 			fd = record__switch_output(rec, true);
1784 			if (fd < 0) {
1785 				status = fd;
1786 				goto out_delete_session;
1787 			}
1788 		}
1789 	}
1790 
1791 	perf_hooks__invoke_record_end();
1792 
1793 	if (!err && !quiet) {
1794 		char samples[128];
1795 		const char *postfix = rec->timestamp_filename ?
1796 					".<timestamp>" : "";
1797 
1798 		if (rec->samples && !rec->opts.full_auxtrace)
1799 			scnprintf(samples, sizeof(samples),
1800 				  " (%" PRIu64 " samples)", rec->samples);
1801 		else
1802 			samples[0] = '\0';
1803 
1804 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
1805 			perf_data__size(data) / 1024.0 / 1024.0,
1806 			data->path, postfix, samples);
1807 		if (ratio) {
1808 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
1809 					rec->session->bytes_transferred / 1024.0 / 1024.0,
1810 					ratio);
1811 		}
1812 		fprintf(stderr, " ]\n");
1813 	}
1814 
1815 out_delete_session:
1816 	zstd_fini(&session->zstd_data);
1817 	perf_session__delete(session);
1818 
1819 	if (!opts->no_bpf_event)
1820 		perf_evlist__stop_sb_thread(sb_evlist);
1821 	return status;
1822 }
1823 
1824 static void callchain_debug(struct callchain_param *callchain)
1825 {
1826 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1827 
1828 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1829 
1830 	if (callchain->record_mode == CALLCHAIN_DWARF)
1831 		pr_debug("callchain: stack dump size %d\n",
1832 			 callchain->dump_size);
1833 }
1834 
1835 int record_opts__parse_callchain(struct record_opts *record,
1836 				 struct callchain_param *callchain,
1837 				 const char *arg, bool unset)
1838 {
1839 	int ret;
1840 	callchain->enabled = !unset;
1841 
1842 	/* --no-call-graph */
1843 	if (unset) {
1844 		callchain->record_mode = CALLCHAIN_NONE;
1845 		pr_debug("callchain: disabled\n");
1846 		return 0;
1847 	}
1848 
1849 	ret = parse_callchain_record_opt(arg, callchain);
1850 	if (!ret) {
1851 		/* Enable data address sampling for DWARF unwind. */
1852 		if (callchain->record_mode == CALLCHAIN_DWARF)
1853 			record->sample_address = true;
1854 		callchain_debug(callchain);
1855 	}
1856 
1857 	return ret;
1858 }
1859 
1860 int record_parse_callchain_opt(const struct option *opt,
1861 			       const char *arg,
1862 			       int unset)
1863 {
1864 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1865 }
1866 
1867 int record_callchain_opt(const struct option *opt,
1868 			 const char *arg __maybe_unused,
1869 			 int unset __maybe_unused)
1870 {
1871 	struct callchain_param *callchain = opt->value;
1872 
1873 	callchain->enabled = true;
1874 
1875 	if (callchain->record_mode == CALLCHAIN_NONE)
1876 		callchain->record_mode = CALLCHAIN_FP;
1877 
1878 	callchain_debug(callchain);
1879 	return 0;
1880 }
1881 
1882 static int perf_record_config(const char *var, const char *value, void *cb)
1883 {
1884 	struct record *rec = cb;
1885 
1886 	if (!strcmp(var, "record.build-id")) {
1887 		if (!strcmp(value, "cache"))
1888 			rec->no_buildid_cache = false;
1889 		else if (!strcmp(value, "no-cache"))
1890 			rec->no_buildid_cache = true;
1891 		else if (!strcmp(value, "skip"))
1892 			rec->no_buildid = true;
1893 		else
1894 			return -1;
1895 		return 0;
1896 	}
1897 	if (!strcmp(var, "record.call-graph")) {
1898 		var = "call-graph.record-mode";
1899 		return perf_default_config(var, value, cb);
1900 	}
1901 #ifdef HAVE_AIO_SUPPORT
1902 	if (!strcmp(var, "record.aio")) {
1903 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
1904 		if (!rec->opts.nr_cblocks)
1905 			rec->opts.nr_cblocks = nr_cblocks_default;
1906 	}
1907 #endif
1908 
1909 	return 0;
1910 }
1911 
1912 struct clockid_map {
1913 	const char *name;
1914 	int clockid;
1915 };
1916 
1917 #define CLOCKID_MAP(n, c)	\
1918 	{ .name = n, .clockid = (c), }
1919 
1920 #define CLOCKID_END	{ .name = NULL, }
1921 
1922 
1923 /*
1924  * Add the missing ones, we need to build on many distros...
1925  */
1926 #ifndef CLOCK_MONOTONIC_RAW
1927 #define CLOCK_MONOTONIC_RAW 4
1928 #endif
1929 #ifndef CLOCK_BOOTTIME
1930 #define CLOCK_BOOTTIME 7
1931 #endif
1932 #ifndef CLOCK_TAI
1933 #define CLOCK_TAI 11
1934 #endif
1935 
1936 static const struct clockid_map clockids[] = {
1937 	/* available for all events, NMI safe */
1938 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1939 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1940 
1941 	/* available for some events */
1942 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1943 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1944 	CLOCKID_MAP("tai", CLOCK_TAI),
1945 
1946 	/* available for the lazy */
1947 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1948 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1949 	CLOCKID_MAP("real", CLOCK_REALTIME),
1950 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1951 
1952 	CLOCKID_END,
1953 };
1954 
1955 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1956 {
1957 	struct timespec res;
1958 
1959 	*res_ns = 0;
1960 	if (!clock_getres(clk_id, &res))
1961 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1962 	else
1963 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1964 
1965 	return 0;
1966 }
1967 
1968 static int parse_clockid(const struct option *opt, const char *str, int unset)
1969 {
1970 	struct record_opts *opts = (struct record_opts *)opt->value;
1971 	const struct clockid_map *cm;
1972 	const char *ostr = str;
1973 
1974 	if (unset) {
1975 		opts->use_clockid = 0;
1976 		return 0;
1977 	}
1978 
1979 	/* no arg passed */
1980 	if (!str)
1981 		return 0;
1982 
1983 	/* no setting it twice */
1984 	if (opts->use_clockid)
1985 		return -1;
1986 
1987 	opts->use_clockid = true;
1988 
1989 	/* if its a number, we're done */
1990 	if (sscanf(str, "%d", &opts->clockid) == 1)
1991 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1992 
1993 	/* allow a "CLOCK_" prefix to the name */
1994 	if (!strncasecmp(str, "CLOCK_", 6))
1995 		str += 6;
1996 
1997 	for (cm = clockids; cm->name; cm++) {
1998 		if (!strcasecmp(str, cm->name)) {
1999 			opts->clockid = cm->clockid;
2000 			return get_clockid_res(opts->clockid,
2001 					       &opts->clockid_res_ns);
2002 		}
2003 	}
2004 
2005 	opts->use_clockid = false;
2006 	ui__warning("unknown clockid %s, check man page\n", ostr);
2007 	return -1;
2008 }
2009 
2010 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2011 {
2012 	struct record_opts *opts = (struct record_opts *)opt->value;
2013 
2014 	if (unset || !str)
2015 		return 0;
2016 
2017 	if (!strcasecmp(str, "node"))
2018 		opts->affinity = PERF_AFFINITY_NODE;
2019 	else if (!strcasecmp(str, "cpu"))
2020 		opts->affinity = PERF_AFFINITY_CPU;
2021 
2022 	return 0;
2023 }
2024 
2025 static int parse_output_max_size(const struct option *opt,
2026 				 const char *str, int unset)
2027 {
2028 	unsigned long *s = (unsigned long *)opt->value;
2029 	static struct parse_tag tags_size[] = {
2030 		{ .tag  = 'B', .mult = 1       },
2031 		{ .tag  = 'K', .mult = 1 << 10 },
2032 		{ .tag  = 'M', .mult = 1 << 20 },
2033 		{ .tag  = 'G', .mult = 1 << 30 },
2034 		{ .tag  = 0 },
2035 	};
2036 	unsigned long val;
2037 
2038 	if (unset) {
2039 		*s = 0;
2040 		return 0;
2041 	}
2042 
2043 	val = parse_tag_value(str, tags_size);
2044 	if (val != (unsigned long) -1) {
2045 		*s = val;
2046 		return 0;
2047 	}
2048 
2049 	return -1;
2050 }
2051 
2052 static int record__parse_mmap_pages(const struct option *opt,
2053 				    const char *str,
2054 				    int unset __maybe_unused)
2055 {
2056 	struct record_opts *opts = opt->value;
2057 	char *s, *p;
2058 	unsigned int mmap_pages;
2059 	int ret;
2060 
2061 	if (!str)
2062 		return -EINVAL;
2063 
2064 	s = strdup(str);
2065 	if (!s)
2066 		return -ENOMEM;
2067 
2068 	p = strchr(s, ',');
2069 	if (p)
2070 		*p = '\0';
2071 
2072 	if (*s) {
2073 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2074 		if (ret)
2075 			goto out_free;
2076 		opts->mmap_pages = mmap_pages;
2077 	}
2078 
2079 	if (!p) {
2080 		ret = 0;
2081 		goto out_free;
2082 	}
2083 
2084 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2085 	if (ret)
2086 		goto out_free;
2087 
2088 	opts->auxtrace_mmap_pages = mmap_pages;
2089 
2090 out_free:
2091 	free(s);
2092 	return ret;
2093 }
2094 
2095 static void switch_output_size_warn(struct record *rec)
2096 {
2097 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2098 	struct switch_output *s = &rec->switch_output;
2099 
2100 	wakeup_size /= 2;
2101 
2102 	if (s->size < wakeup_size) {
2103 		char buf[100];
2104 
2105 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2106 		pr_warning("WARNING: switch-output data size lower than "
2107 			   "wakeup kernel buffer size (%s) "
2108 			   "expect bigger perf.data sizes\n", buf);
2109 	}
2110 }
2111 
2112 static int switch_output_setup(struct record *rec)
2113 {
2114 	struct switch_output *s = &rec->switch_output;
2115 	static struct parse_tag tags_size[] = {
2116 		{ .tag  = 'B', .mult = 1       },
2117 		{ .tag  = 'K', .mult = 1 << 10 },
2118 		{ .tag  = 'M', .mult = 1 << 20 },
2119 		{ .tag  = 'G', .mult = 1 << 30 },
2120 		{ .tag  = 0 },
2121 	};
2122 	static struct parse_tag tags_time[] = {
2123 		{ .tag  = 's', .mult = 1        },
2124 		{ .tag  = 'm', .mult = 60       },
2125 		{ .tag  = 'h', .mult = 60*60    },
2126 		{ .tag  = 'd', .mult = 60*60*24 },
2127 		{ .tag  = 0 },
2128 	};
2129 	unsigned long val;
2130 
2131 	if (!s->set)
2132 		return 0;
2133 
2134 	if (!strcmp(s->str, "signal")) {
2135 		s->signal = true;
2136 		pr_debug("switch-output with SIGUSR2 signal\n");
2137 		goto enabled;
2138 	}
2139 
2140 	val = parse_tag_value(s->str, tags_size);
2141 	if (val != (unsigned long) -1) {
2142 		s->size = val;
2143 		pr_debug("switch-output with %s size threshold\n", s->str);
2144 		goto enabled;
2145 	}
2146 
2147 	val = parse_tag_value(s->str, tags_time);
2148 	if (val != (unsigned long) -1) {
2149 		s->time = val;
2150 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2151 			 s->str, s->time);
2152 		goto enabled;
2153 	}
2154 
2155 	return -1;
2156 
2157 enabled:
2158 	rec->timestamp_filename = true;
2159 	s->enabled              = true;
2160 
2161 	if (s->size && !rec->opts.no_buffering)
2162 		switch_output_size_warn(rec);
2163 
2164 	return 0;
2165 }
2166 
2167 static const char * const __record_usage[] = {
2168 	"perf record [<options>] [<command>]",
2169 	"perf record [<options>] -- <command> [<options>]",
2170 	NULL
2171 };
2172 const char * const *record_usage = __record_usage;
2173 
2174 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2175 				  struct perf_sample *sample, struct machine *machine)
2176 {
2177 	/*
2178 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2179 	 * no need to add them twice.
2180 	 */
2181 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2182 		return 0;
2183 	return perf_event__process_mmap(tool, event, sample, machine);
2184 }
2185 
2186 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2187 				   struct perf_sample *sample, struct machine *machine)
2188 {
2189 	/*
2190 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2191 	 * no need to add them twice.
2192 	 */
2193 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2194 		return 0;
2195 
2196 	return perf_event__process_mmap2(tool, event, sample, machine);
2197 }
2198 
2199 /*
2200  * XXX Ideally would be local to cmd_record() and passed to a record__new
2201  * because we need to have access to it in record__exit, that is called
2202  * after cmd_record() exits, but since record_options need to be accessible to
2203  * builtin-script, leave it here.
2204  *
2205  * At least we don't ouch it in all the other functions here directly.
2206  *
2207  * Just say no to tons of global variables, sigh.
2208  */
2209 static struct record record = {
2210 	.opts = {
2211 		.sample_time	     = true,
2212 		.mmap_pages	     = UINT_MAX,
2213 		.user_freq	     = UINT_MAX,
2214 		.user_interval	     = ULLONG_MAX,
2215 		.freq		     = 4000,
2216 		.target		     = {
2217 			.uses_mmap   = true,
2218 			.default_per_cpu = true,
2219 		},
2220 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
2221 	},
2222 	.tool = {
2223 		.sample		= process_sample_event,
2224 		.fork		= perf_event__process_fork,
2225 		.exit		= perf_event__process_exit,
2226 		.comm		= perf_event__process_comm,
2227 		.namespaces	= perf_event__process_namespaces,
2228 		.mmap		= build_id__process_mmap,
2229 		.mmap2		= build_id__process_mmap2,
2230 		.ordered_events	= true,
2231 	},
2232 };
2233 
2234 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2235 	"\n\t\t\t\tDefault: fp";
2236 
2237 static bool dry_run;
2238 
2239 /*
2240  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2241  * with it and switch to use the library functions in perf_evlist that came
2242  * from builtin-record.c, i.e. use record_opts,
2243  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2244  * using pipes, etc.
2245  */
2246 static struct option __record_options[] = {
2247 	OPT_CALLBACK('e', "event", &record.evlist, "event",
2248 		     "event selector. use 'perf list' to list available events",
2249 		     parse_events_option),
2250 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2251 		     "event filter", parse_filter),
2252 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2253 			   NULL, "don't record events from perf itself",
2254 			   exclude_perf),
2255 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2256 		    "record events on existing process id"),
2257 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2258 		    "record events on existing thread id"),
2259 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
2260 		    "collect data with this RT SCHED_FIFO priority"),
2261 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2262 		    "collect data without buffering"),
2263 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2264 		    "collect raw sample records from all opened counters"),
2265 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2266 			    "system-wide collection from all CPUs"),
2267 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2268 		    "list of cpus to monitor"),
2269 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2270 	OPT_STRING('o', "output", &record.data.path, "file",
2271 		    "output file name"),
2272 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2273 			&record.opts.no_inherit_set,
2274 			"child tasks do not inherit counters"),
2275 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2276 		    "synthesize non-sample events at the end of output"),
2277 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2278 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2279 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2280 		    "Fail if the specified frequency can't be used"),
2281 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2282 		     "profile at this frequency",
2283 		      record__parse_freq),
2284 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2285 		     "number of mmap data pages and AUX area tracing mmap pages",
2286 		     record__parse_mmap_pages),
2287 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2288 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2289 		     record__mmap_flush_parse),
2290 	OPT_BOOLEAN(0, "group", &record.opts.group,
2291 		    "put the counters into a counter group"),
2292 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2293 			   NULL, "enables call-graph recording" ,
2294 			   &record_callchain_opt),
2295 	OPT_CALLBACK(0, "call-graph", &record.opts,
2296 		     "record_mode[,record_size]", record_callchain_help,
2297 		     &record_parse_callchain_opt),
2298 	OPT_INCR('v', "verbose", &verbose,
2299 		    "be more verbose (show counter open errors, etc)"),
2300 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2301 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2302 		    "per thread counts"),
2303 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2304 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2305 		    "Record the sample physical addresses"),
2306 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2307 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2308 			&record.opts.sample_time_set,
2309 			"Record the sample timestamps"),
2310 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2311 			"Record the sample period"),
2312 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2313 		    "don't sample"),
2314 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2315 			&record.no_buildid_cache_set,
2316 			"do not update the buildid cache"),
2317 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2318 			&record.no_buildid_set,
2319 			"do not collect buildids in perf.data"),
2320 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2321 		     "monitor event in cgroup name only",
2322 		     parse_cgroups),
2323 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2324 		  "ms to wait before starting measurement after program start"),
2325 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2326 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2327 		   "user to profile"),
2328 
2329 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2330 		     "branch any", "sample any taken branches",
2331 		     parse_branch_stack),
2332 
2333 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2334 		     "branch filter mask", "branch stack filter modes",
2335 		     parse_branch_stack),
2336 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2337 		    "sample by weight (on special events only)"),
2338 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2339 		    "sample transaction flags (special events only)"),
2340 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2341 		    "use per-thread mmaps"),
2342 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2343 		    "sample selected machine registers on interrupt,"
2344 		    " use '-I?' to list register names", parse_intr_regs),
2345 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2346 		    "sample selected machine registers on interrupt,"
2347 		    " use '--user-regs=?' to list register names", parse_user_regs),
2348 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2349 		    "Record running/enabled time of read (:S) events"),
2350 	OPT_CALLBACK('k', "clockid", &record.opts,
2351 	"clockid", "clockid to use for events, see clock_gettime()",
2352 	parse_clockid),
2353 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2354 			  "opts", "AUX area tracing Snapshot Mode", ""),
2355 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2356 			  "opts", "sample AUX area", ""),
2357 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2358 			"per thread proc mmap processing timeout in ms"),
2359 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2360 		    "Record namespaces events"),
2361 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2362 		    "Record context switch events"),
2363 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2364 			 "Configure all used events to run in kernel space.",
2365 			 PARSE_OPT_EXCLUSIVE),
2366 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2367 			 "Configure all used events to run in user space.",
2368 			 PARSE_OPT_EXCLUSIVE),
2369 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2370 		    "collect kernel callchains"),
2371 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2372 		    "collect user callchains"),
2373 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2374 		   "clang binary to use for compiling BPF scriptlets"),
2375 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2376 		   "options passed to clang when compiling BPF scriptlets"),
2377 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2378 		   "file", "vmlinux pathname"),
2379 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2380 		    "Record build-id of all DSOs regardless of hits"),
2381 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2382 		    "append timestamp to output filename"),
2383 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2384 		    "Record timestamp boundary (time of first/last samples)"),
2385 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2386 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2387 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2388 			  "signal"),
2389 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2390 		   "Limit number of switch output generated files"),
2391 	OPT_BOOLEAN(0, "dry-run", &dry_run,
2392 		    "Parse options then exit"),
2393 #ifdef HAVE_AIO_SUPPORT
2394 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2395 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2396 		     record__aio_parse),
2397 #endif
2398 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2399 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2400 		     record__parse_affinity),
2401 #ifdef HAVE_ZSTD_SUPPORT
2402 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2403 			    "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2404 			    record__parse_comp_level),
2405 #endif
2406 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
2407 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
2408 	OPT_END()
2409 };
2410 
2411 struct option *record_options = __record_options;
2412 
2413 int cmd_record(int argc, const char **argv)
2414 {
2415 	int err;
2416 	struct record *rec = &record;
2417 	char errbuf[BUFSIZ];
2418 
2419 	setlocale(LC_ALL, "");
2420 
2421 #ifndef HAVE_LIBBPF_SUPPORT
2422 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2423 	set_nobuild('\0', "clang-path", true);
2424 	set_nobuild('\0', "clang-opt", true);
2425 # undef set_nobuild
2426 #endif
2427 
2428 #ifndef HAVE_BPF_PROLOGUE
2429 # if !defined (HAVE_DWARF_SUPPORT)
2430 #  define REASON  "NO_DWARF=1"
2431 # elif !defined (HAVE_LIBBPF_SUPPORT)
2432 #  define REASON  "NO_LIBBPF=1"
2433 # else
2434 #  define REASON  "this architecture doesn't support BPF prologue"
2435 # endif
2436 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2437 	set_nobuild('\0', "vmlinux", true);
2438 # undef set_nobuild
2439 # undef REASON
2440 #endif
2441 
2442 	rec->opts.affinity = PERF_AFFINITY_SYS;
2443 
2444 	rec->evlist = evlist__new();
2445 	if (rec->evlist == NULL)
2446 		return -ENOMEM;
2447 
2448 	err = perf_config(perf_record_config, rec);
2449 	if (err)
2450 		return err;
2451 
2452 	argc = parse_options(argc, argv, record_options, record_usage,
2453 			    PARSE_OPT_STOP_AT_NON_OPTION);
2454 	if (quiet)
2455 		perf_quiet_option();
2456 
2457 	/* Make system wide (-a) the default target. */
2458 	if (!argc && target__none(&rec->opts.target))
2459 		rec->opts.target.system_wide = true;
2460 
2461 	if (nr_cgroups && !rec->opts.target.system_wide) {
2462 		usage_with_options_msg(record_usage, record_options,
2463 			"cgroup monitoring only available in system-wide mode");
2464 
2465 	}
2466 
2467 	if (rec->opts.kcore)
2468 		rec->data.is_dir = true;
2469 
2470 	if (rec->opts.comp_level != 0) {
2471 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2472 		rec->no_buildid = true;
2473 	}
2474 
2475 	if (rec->opts.record_switch_events &&
2476 	    !perf_can_record_switch_events()) {
2477 		ui__error("kernel does not support recording context switch events\n");
2478 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2479 		return -EINVAL;
2480 	}
2481 
2482 	if (switch_output_setup(rec)) {
2483 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2484 		return -EINVAL;
2485 	}
2486 
2487 	if (rec->switch_output.time) {
2488 		signal(SIGALRM, alarm_sig_handler);
2489 		alarm(rec->switch_output.time);
2490 	}
2491 
2492 	if (rec->switch_output.num_files) {
2493 		rec->switch_output.filenames = calloc(sizeof(char *),
2494 						      rec->switch_output.num_files);
2495 		if (!rec->switch_output.filenames)
2496 			return -EINVAL;
2497 	}
2498 
2499 	/*
2500 	 * Allow aliases to facilitate the lookup of symbols for address
2501 	 * filters. Refer to auxtrace_parse_filters().
2502 	 */
2503 	symbol_conf.allow_aliases = true;
2504 
2505 	symbol__init(NULL);
2506 
2507 	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2508 		rec->affinity_mask.nbits = cpu__max_cpu();
2509 		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2510 		if (!rec->affinity_mask.bits) {
2511 			pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2512 			return -ENOMEM;
2513 		}
2514 		pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2515 	}
2516 
2517 	err = record__auxtrace_init(rec);
2518 	if (err)
2519 		goto out;
2520 
2521 	if (dry_run)
2522 		goto out;
2523 
2524 	err = bpf__setup_stdout(rec->evlist);
2525 	if (err) {
2526 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2527 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2528 			 errbuf);
2529 		goto out;
2530 	}
2531 
2532 	err = -ENOMEM;
2533 
2534 	if (rec->no_buildid_cache || rec->no_buildid) {
2535 		disable_buildid_cache();
2536 	} else if (rec->switch_output.enabled) {
2537 		/*
2538 		 * In 'perf record --switch-output', disable buildid
2539 		 * generation by default to reduce data file switching
2540 		 * overhead. Still generate buildid if they are required
2541 		 * explicitly using
2542 		 *
2543 		 *  perf record --switch-output --no-no-buildid \
2544 		 *              --no-no-buildid-cache
2545 		 *
2546 		 * Following code equals to:
2547 		 *
2548 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2549 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2550 		 *         disable_buildid_cache();
2551 		 */
2552 		bool disable = true;
2553 
2554 		if (rec->no_buildid_set && !rec->no_buildid)
2555 			disable = false;
2556 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2557 			disable = false;
2558 		if (disable) {
2559 			rec->no_buildid = true;
2560 			rec->no_buildid_cache = true;
2561 			disable_buildid_cache();
2562 		}
2563 	}
2564 
2565 	if (record.opts.overwrite)
2566 		record.opts.tail_synthesize = true;
2567 
2568 	if (rec->evlist->core.nr_entries == 0 &&
2569 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2570 		pr_err("Not enough memory for event selector list\n");
2571 		goto out;
2572 	}
2573 
2574 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2575 		rec->opts.no_inherit = true;
2576 
2577 	err = target__validate(&rec->opts.target);
2578 	if (err) {
2579 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2580 		ui__warning("%s\n", errbuf);
2581 	}
2582 
2583 	err = target__parse_uid(&rec->opts.target);
2584 	if (err) {
2585 		int saved_errno = errno;
2586 
2587 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2588 		ui__error("%s", errbuf);
2589 
2590 		err = -saved_errno;
2591 		goto out;
2592 	}
2593 
2594 	/* Enable ignoring missing threads when -u/-p option is defined. */
2595 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2596 
2597 	err = -ENOMEM;
2598 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2599 		usage_with_options(record_usage, record_options);
2600 
2601 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2602 	if (err)
2603 		goto out;
2604 
2605 	/*
2606 	 * We take all buildids when the file contains
2607 	 * AUX area tracing data because we do not decode the
2608 	 * trace because it would take too long.
2609 	 */
2610 	if (rec->opts.full_auxtrace)
2611 		rec->buildid_all = true;
2612 
2613 	if (record_opts__config(&rec->opts)) {
2614 		err = -EINVAL;
2615 		goto out;
2616 	}
2617 
2618 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2619 		rec->opts.nr_cblocks = nr_cblocks_max;
2620 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2621 
2622 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2623 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2624 
2625 	if (rec->opts.comp_level > comp_level_max)
2626 		rec->opts.comp_level = comp_level_max;
2627 	pr_debug("comp level: %d\n", rec->opts.comp_level);
2628 
2629 	err = __cmd_record(&record, argc, argv);
2630 out:
2631 	bitmap_free(rec->affinity_mask.bits);
2632 	evlist__delete(rec->evlist);
2633 	symbol__exit();
2634 	auxtrace_record__free(rec->itr);
2635 	return err;
2636 }
2637 
2638 static void snapshot_sig_handler(int sig __maybe_unused)
2639 {
2640 	struct record *rec = &record;
2641 
2642 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2643 		trigger_hit(&auxtrace_snapshot_trigger);
2644 		auxtrace_record__snapshot_started = 1;
2645 		if (auxtrace_record__snapshot_start(record.itr))
2646 			trigger_error(&auxtrace_snapshot_trigger);
2647 	}
2648 
2649 	if (switch_output_signal(rec))
2650 		trigger_hit(&switch_output_trigger);
2651 }
2652 
2653 static void alarm_sig_handler(int sig __maybe_unused)
2654 {
2655 	struct record *rec = &record;
2656 
2657 	if (switch_output_time(rec))
2658 		trigger_hit(&switch_output_trigger);
2659 }
2660