xref: /openbmc/linux/tools/perf/builtin-record.c (revision 2634682fdffd9ba6e74b76be8aa91cf8b2e05c41)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "util/pfm.h"
49 #include "util/clockid.h"
50 #include "asm/bug.h"
51 #include "perf.h"
52 
53 #include <errno.h>
54 #include <inttypes.h>
55 #include <locale.h>
56 #include <poll.h>
57 #include <pthread.h>
58 #include <unistd.h>
59 #include <sched.h>
60 #include <signal.h>
61 #ifdef HAVE_EVENTFD_SUPPORT
62 #include <sys/eventfd.h>
63 #endif
64 #include <sys/mman.h>
65 #include <sys/wait.h>
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <fcntl.h>
69 #include <linux/err.h>
70 #include <linux/string.h>
71 #include <linux/time64.h>
72 #include <linux/zalloc.h>
73 #include <linux/bitmap.h>
74 #include <sys/time.h>
75 
76 struct switch_output {
77 	bool		 enabled;
78 	bool		 signal;
79 	unsigned long	 size;
80 	unsigned long	 time;
81 	const char	*str;
82 	bool		 set;
83 	char		 **filenames;
84 	int		 num_files;
85 	int		 cur_file;
86 };
87 
88 struct record {
89 	struct perf_tool	tool;
90 	struct record_opts	opts;
91 	u64			bytes_written;
92 	struct perf_data	data;
93 	struct auxtrace_record	*itr;
94 	struct evlist	*evlist;
95 	struct perf_session	*session;
96 	struct evlist		*sb_evlist;
97 	pthread_t		thread_id;
98 	int			realtime_prio;
99 	bool			switch_output_event_set;
100 	bool			no_buildid;
101 	bool			no_buildid_set;
102 	bool			no_buildid_cache;
103 	bool			no_buildid_cache_set;
104 	bool			buildid_all;
105 	bool			timestamp_filename;
106 	bool			timestamp_boundary;
107 	struct switch_output	switch_output;
108 	unsigned long long	samples;
109 	struct mmap_cpu_mask	affinity_mask;
110 	unsigned long		output_max_size;	/* = 0: unlimited */
111 };
112 
113 static volatile int done;
114 
115 static volatile int auxtrace_record__snapshot_started;
116 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
117 static DEFINE_TRIGGER(switch_output_trigger);
118 
119 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
120 	"SYS", "NODE", "CPU"
121 };
122 
123 static bool switch_output_signal(struct record *rec)
124 {
125 	return rec->switch_output.signal &&
126 	       trigger_is_ready(&switch_output_trigger);
127 }
128 
129 static bool switch_output_size(struct record *rec)
130 {
131 	return rec->switch_output.size &&
132 	       trigger_is_ready(&switch_output_trigger) &&
133 	       (rec->bytes_written >= rec->switch_output.size);
134 }
135 
136 static bool switch_output_time(struct record *rec)
137 {
138 	return rec->switch_output.time &&
139 	       trigger_is_ready(&switch_output_trigger);
140 }
141 
142 static bool record__output_max_size_exceeded(struct record *rec)
143 {
144 	return rec->output_max_size &&
145 	       (rec->bytes_written >= rec->output_max_size);
146 }
147 
148 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
149 			 void *bf, size_t size)
150 {
151 	struct perf_data_file *file = &rec->session->data->file;
152 
153 	if (perf_data_file__write(file, bf, size) < 0) {
154 		pr_err("failed to write perf data, error: %m\n");
155 		return -1;
156 	}
157 
158 	rec->bytes_written += size;
159 
160 	if (record__output_max_size_exceeded(rec) && !done) {
161 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
162 				" stopping session ]\n",
163 				rec->bytes_written >> 10);
164 		done = 1;
165 	}
166 
167 	if (switch_output_size(rec))
168 		trigger_hit(&switch_output_trigger);
169 
170 	return 0;
171 }
172 
173 static int record__aio_enabled(struct record *rec);
174 static int record__comp_enabled(struct record *rec);
175 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
176 			    void *src, size_t src_size);
177 
178 #ifdef HAVE_AIO_SUPPORT
179 static int record__aio_write(struct aiocb *cblock, int trace_fd,
180 		void *buf, size_t size, off_t off)
181 {
182 	int rc;
183 
184 	cblock->aio_fildes = trace_fd;
185 	cblock->aio_buf    = buf;
186 	cblock->aio_nbytes = size;
187 	cblock->aio_offset = off;
188 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
189 
190 	do {
191 		rc = aio_write(cblock);
192 		if (rc == 0) {
193 			break;
194 		} else if (errno != EAGAIN) {
195 			cblock->aio_fildes = -1;
196 			pr_err("failed to queue perf data, error: %m\n");
197 			break;
198 		}
199 	} while (1);
200 
201 	return rc;
202 }
203 
204 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
205 {
206 	void *rem_buf;
207 	off_t rem_off;
208 	size_t rem_size;
209 	int rc, aio_errno;
210 	ssize_t aio_ret, written;
211 
212 	aio_errno = aio_error(cblock);
213 	if (aio_errno == EINPROGRESS)
214 		return 0;
215 
216 	written = aio_ret = aio_return(cblock);
217 	if (aio_ret < 0) {
218 		if (aio_errno != EINTR)
219 			pr_err("failed to write perf data, error: %m\n");
220 		written = 0;
221 	}
222 
223 	rem_size = cblock->aio_nbytes - written;
224 
225 	if (rem_size == 0) {
226 		cblock->aio_fildes = -1;
227 		/*
228 		 * md->refcount is incremented in record__aio_pushfn() for
229 		 * every aio write request started in record__aio_push() so
230 		 * decrement it because the request is now complete.
231 		 */
232 		perf_mmap__put(&md->core);
233 		rc = 1;
234 	} else {
235 		/*
236 		 * aio write request may require restart with the
237 		 * reminder if the kernel didn't write whole
238 		 * chunk at once.
239 		 */
240 		rem_off = cblock->aio_offset + written;
241 		rem_buf = (void *)(cblock->aio_buf + written);
242 		record__aio_write(cblock, cblock->aio_fildes,
243 				rem_buf, rem_size, rem_off);
244 		rc = 0;
245 	}
246 
247 	return rc;
248 }
249 
250 static int record__aio_sync(struct mmap *md, bool sync_all)
251 {
252 	struct aiocb **aiocb = md->aio.aiocb;
253 	struct aiocb *cblocks = md->aio.cblocks;
254 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
255 	int i, do_suspend;
256 
257 	do {
258 		do_suspend = 0;
259 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
260 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
261 				if (sync_all)
262 					aiocb[i] = NULL;
263 				else
264 					return i;
265 			} else {
266 				/*
267 				 * Started aio write is not complete yet
268 				 * so it has to be waited before the
269 				 * next allocation.
270 				 */
271 				aiocb[i] = &cblocks[i];
272 				do_suspend = 1;
273 			}
274 		}
275 		if (!do_suspend)
276 			return -1;
277 
278 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
279 			if (!(errno == EAGAIN || errno == EINTR))
280 				pr_err("failed to sync perf data, error: %m\n");
281 		}
282 	} while (1);
283 }
284 
285 struct record_aio {
286 	struct record	*rec;
287 	void		*data;
288 	size_t		size;
289 };
290 
291 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
292 {
293 	struct record_aio *aio = to;
294 
295 	/*
296 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
297 	 * to release space in the kernel buffer as fast as possible, calling
298 	 * perf_mmap__consume() from perf_mmap__push() function.
299 	 *
300 	 * That lets the kernel to proceed with storing more profiling data into
301 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
302 	 *
303 	 * Coping can be done in two steps in case the chunk of profiling data
304 	 * crosses the upper bound of the kernel buffer. In this case we first move
305 	 * part of data from map->start till the upper bound and then the reminder
306 	 * from the beginning of the kernel buffer till the end of the data chunk.
307 	 */
308 
309 	if (record__comp_enabled(aio->rec)) {
310 		size = zstd_compress(aio->rec->session, aio->data + aio->size,
311 				     mmap__mmap_len(map) - aio->size,
312 				     buf, size);
313 	} else {
314 		memcpy(aio->data + aio->size, buf, size);
315 	}
316 
317 	if (!aio->size) {
318 		/*
319 		 * Increment map->refcount to guard map->aio.data[] buffer
320 		 * from premature deallocation because map object can be
321 		 * released earlier than aio write request started on
322 		 * map->aio.data[] buffer is complete.
323 		 *
324 		 * perf_mmap__put() is done at record__aio_complete()
325 		 * after started aio request completion or at record__aio_push()
326 		 * if the request failed to start.
327 		 */
328 		perf_mmap__get(&map->core);
329 	}
330 
331 	aio->size += size;
332 
333 	return size;
334 }
335 
336 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
337 {
338 	int ret, idx;
339 	int trace_fd = rec->session->data->file.fd;
340 	struct record_aio aio = { .rec = rec, .size = 0 };
341 
342 	/*
343 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
344 	 * becomes available after previous aio write operation.
345 	 */
346 
347 	idx = record__aio_sync(map, false);
348 	aio.data = map->aio.data[idx];
349 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
350 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
351 		return ret;
352 
353 	rec->samples++;
354 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
355 	if (!ret) {
356 		*off += aio.size;
357 		rec->bytes_written += aio.size;
358 		if (switch_output_size(rec))
359 			trigger_hit(&switch_output_trigger);
360 	} else {
361 		/*
362 		 * Decrement map->refcount incremented in record__aio_pushfn()
363 		 * back if record__aio_write() operation failed to start, otherwise
364 		 * map->refcount is decremented in record__aio_complete() after
365 		 * aio write operation finishes successfully.
366 		 */
367 		perf_mmap__put(&map->core);
368 	}
369 
370 	return ret;
371 }
372 
373 static off_t record__aio_get_pos(int trace_fd)
374 {
375 	return lseek(trace_fd, 0, SEEK_CUR);
376 }
377 
378 static void record__aio_set_pos(int trace_fd, off_t pos)
379 {
380 	lseek(trace_fd, pos, SEEK_SET);
381 }
382 
383 static void record__aio_mmap_read_sync(struct record *rec)
384 {
385 	int i;
386 	struct evlist *evlist = rec->evlist;
387 	struct mmap *maps = evlist->mmap;
388 
389 	if (!record__aio_enabled(rec))
390 		return;
391 
392 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
393 		struct mmap *map = &maps[i];
394 
395 		if (map->core.base)
396 			record__aio_sync(map, true);
397 	}
398 }
399 
400 static int nr_cblocks_default = 1;
401 static int nr_cblocks_max = 4;
402 
403 static int record__aio_parse(const struct option *opt,
404 			     const char *str,
405 			     int unset)
406 {
407 	struct record_opts *opts = (struct record_opts *)opt->value;
408 
409 	if (unset) {
410 		opts->nr_cblocks = 0;
411 	} else {
412 		if (str)
413 			opts->nr_cblocks = strtol(str, NULL, 0);
414 		if (!opts->nr_cblocks)
415 			opts->nr_cblocks = nr_cblocks_default;
416 	}
417 
418 	return 0;
419 }
420 #else /* HAVE_AIO_SUPPORT */
421 static int nr_cblocks_max = 0;
422 
423 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
424 			    off_t *off __maybe_unused)
425 {
426 	return -1;
427 }
428 
429 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
430 {
431 	return -1;
432 }
433 
434 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
435 {
436 }
437 
438 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
439 {
440 }
441 #endif
442 
443 static int record__aio_enabled(struct record *rec)
444 {
445 	return rec->opts.nr_cblocks > 0;
446 }
447 
448 #define MMAP_FLUSH_DEFAULT 1
449 static int record__mmap_flush_parse(const struct option *opt,
450 				    const char *str,
451 				    int unset)
452 {
453 	int flush_max;
454 	struct record_opts *opts = (struct record_opts *)opt->value;
455 	static struct parse_tag tags[] = {
456 			{ .tag  = 'B', .mult = 1       },
457 			{ .tag  = 'K', .mult = 1 << 10 },
458 			{ .tag  = 'M', .mult = 1 << 20 },
459 			{ .tag  = 'G', .mult = 1 << 30 },
460 			{ .tag  = 0 },
461 	};
462 
463 	if (unset)
464 		return 0;
465 
466 	if (str) {
467 		opts->mmap_flush = parse_tag_value(str, tags);
468 		if (opts->mmap_flush == (int)-1)
469 			opts->mmap_flush = strtol(str, NULL, 0);
470 	}
471 
472 	if (!opts->mmap_flush)
473 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
474 
475 	flush_max = evlist__mmap_size(opts->mmap_pages);
476 	flush_max /= 4;
477 	if (opts->mmap_flush > flush_max)
478 		opts->mmap_flush = flush_max;
479 
480 	return 0;
481 }
482 
483 #ifdef HAVE_ZSTD_SUPPORT
484 static unsigned int comp_level_default = 1;
485 
486 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
487 {
488 	struct record_opts *opts = opt->value;
489 
490 	if (unset) {
491 		opts->comp_level = 0;
492 	} else {
493 		if (str)
494 			opts->comp_level = strtol(str, NULL, 0);
495 		if (!opts->comp_level)
496 			opts->comp_level = comp_level_default;
497 	}
498 
499 	return 0;
500 }
501 #endif
502 static unsigned int comp_level_max = 22;
503 
504 static int record__comp_enabled(struct record *rec)
505 {
506 	return rec->opts.comp_level > 0;
507 }
508 
509 static int process_synthesized_event(struct perf_tool *tool,
510 				     union perf_event *event,
511 				     struct perf_sample *sample __maybe_unused,
512 				     struct machine *machine __maybe_unused)
513 {
514 	struct record *rec = container_of(tool, struct record, tool);
515 	return record__write(rec, NULL, event, event->header.size);
516 }
517 
518 static int process_locked_synthesized_event(struct perf_tool *tool,
519 				     union perf_event *event,
520 				     struct perf_sample *sample __maybe_unused,
521 				     struct machine *machine __maybe_unused)
522 {
523 	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
524 	int ret;
525 
526 	pthread_mutex_lock(&synth_lock);
527 	ret = process_synthesized_event(tool, event, sample, machine);
528 	pthread_mutex_unlock(&synth_lock);
529 	return ret;
530 }
531 
532 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
533 {
534 	struct record *rec = to;
535 
536 	if (record__comp_enabled(rec)) {
537 		size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
538 		bf   = map->data;
539 	}
540 
541 	rec->samples++;
542 	return record__write(rec, map, bf, size);
543 }
544 
545 static volatile int signr = -1;
546 static volatile int child_finished;
547 #ifdef HAVE_EVENTFD_SUPPORT
548 static int done_fd = -1;
549 #endif
550 
551 static void sig_handler(int sig)
552 {
553 	if (sig == SIGCHLD)
554 		child_finished = 1;
555 	else
556 		signr = sig;
557 
558 	done = 1;
559 #ifdef HAVE_EVENTFD_SUPPORT
560 {
561 	u64 tmp = 1;
562 	/*
563 	 * It is possible for this signal handler to run after done is checked
564 	 * in the main loop, but before the perf counter fds are polled. If this
565 	 * happens, the poll() will continue to wait even though done is set,
566 	 * and will only break out if either another signal is received, or the
567 	 * counters are ready for read. To ensure the poll() doesn't sleep when
568 	 * done is set, use an eventfd (done_fd) to wake up the poll().
569 	 */
570 	if (write(done_fd, &tmp, sizeof(tmp)) < 0)
571 		pr_err("failed to signal wakeup fd, error: %m\n");
572 }
573 #endif // HAVE_EVENTFD_SUPPORT
574 }
575 
576 static void sigsegv_handler(int sig)
577 {
578 	perf_hooks__recover();
579 	sighandler_dump_stack(sig);
580 }
581 
582 static void record__sig_exit(void)
583 {
584 	if (signr == -1)
585 		return;
586 
587 	signal(signr, SIG_DFL);
588 	raise(signr);
589 }
590 
591 #ifdef HAVE_AUXTRACE_SUPPORT
592 
593 static int record__process_auxtrace(struct perf_tool *tool,
594 				    struct mmap *map,
595 				    union perf_event *event, void *data1,
596 				    size_t len1, void *data2, size_t len2)
597 {
598 	struct record *rec = container_of(tool, struct record, tool);
599 	struct perf_data *data = &rec->data;
600 	size_t padding;
601 	u8 pad[8] = {0};
602 
603 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
604 		off_t file_offset;
605 		int fd = perf_data__fd(data);
606 		int err;
607 
608 		file_offset = lseek(fd, 0, SEEK_CUR);
609 		if (file_offset == -1)
610 			return -1;
611 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
612 						     event, file_offset);
613 		if (err)
614 			return err;
615 	}
616 
617 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
618 	padding = (len1 + len2) & 7;
619 	if (padding)
620 		padding = 8 - padding;
621 
622 	record__write(rec, map, event, event->header.size);
623 	record__write(rec, map, data1, len1);
624 	if (len2)
625 		record__write(rec, map, data2, len2);
626 	record__write(rec, map, &pad, padding);
627 
628 	return 0;
629 }
630 
631 static int record__auxtrace_mmap_read(struct record *rec,
632 				      struct mmap *map)
633 {
634 	int ret;
635 
636 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
637 				  record__process_auxtrace);
638 	if (ret < 0)
639 		return ret;
640 
641 	if (ret)
642 		rec->samples++;
643 
644 	return 0;
645 }
646 
647 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
648 					       struct mmap *map)
649 {
650 	int ret;
651 
652 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
653 					   record__process_auxtrace,
654 					   rec->opts.auxtrace_snapshot_size);
655 	if (ret < 0)
656 		return ret;
657 
658 	if (ret)
659 		rec->samples++;
660 
661 	return 0;
662 }
663 
664 static int record__auxtrace_read_snapshot_all(struct record *rec)
665 {
666 	int i;
667 	int rc = 0;
668 
669 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
670 		struct mmap *map = &rec->evlist->mmap[i];
671 
672 		if (!map->auxtrace_mmap.base)
673 			continue;
674 
675 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
676 			rc = -1;
677 			goto out;
678 		}
679 	}
680 out:
681 	return rc;
682 }
683 
684 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
685 {
686 	pr_debug("Recording AUX area tracing snapshot\n");
687 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
688 		trigger_error(&auxtrace_snapshot_trigger);
689 	} else {
690 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
691 			trigger_error(&auxtrace_snapshot_trigger);
692 		else
693 			trigger_ready(&auxtrace_snapshot_trigger);
694 	}
695 }
696 
697 static int record__auxtrace_snapshot_exit(struct record *rec)
698 {
699 	if (trigger_is_error(&auxtrace_snapshot_trigger))
700 		return 0;
701 
702 	if (!auxtrace_record__snapshot_started &&
703 	    auxtrace_record__snapshot_start(rec->itr))
704 		return -1;
705 
706 	record__read_auxtrace_snapshot(rec, true);
707 	if (trigger_is_error(&auxtrace_snapshot_trigger))
708 		return -1;
709 
710 	return 0;
711 }
712 
713 static int record__auxtrace_init(struct record *rec)
714 {
715 	int err;
716 
717 	if (!rec->itr) {
718 		rec->itr = auxtrace_record__init(rec->evlist, &err);
719 		if (err)
720 			return err;
721 	}
722 
723 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
724 					      rec->opts.auxtrace_snapshot_opts);
725 	if (err)
726 		return err;
727 
728 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
729 					    rec->opts.auxtrace_sample_opts);
730 	if (err)
731 		return err;
732 
733 	return auxtrace_parse_filters(rec->evlist);
734 }
735 
736 #else
737 
738 static inline
739 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
740 			       struct mmap *map __maybe_unused)
741 {
742 	return 0;
743 }
744 
745 static inline
746 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
747 				    bool on_exit __maybe_unused)
748 {
749 }
750 
751 static inline
752 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
753 {
754 	return 0;
755 }
756 
757 static inline
758 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
759 {
760 	return 0;
761 }
762 
763 static int record__auxtrace_init(struct record *rec __maybe_unused)
764 {
765 	return 0;
766 }
767 
768 #endif
769 
770 static int record__config_text_poke(struct evlist *evlist)
771 {
772 	struct evsel *evsel;
773 	int err;
774 
775 	/* Nothing to do if text poke is already configured */
776 	evlist__for_each_entry(evlist, evsel) {
777 		if (evsel->core.attr.text_poke)
778 			return 0;
779 	}
780 
781 	err = parse_events(evlist, "dummy:u", NULL);
782 	if (err)
783 		return err;
784 
785 	evsel = evlist__last(evlist);
786 
787 	evsel->core.attr.freq = 0;
788 	evsel->core.attr.sample_period = 1;
789 	evsel->core.attr.text_poke = 1;
790 	evsel->core.attr.ksymbol = 1;
791 
792 	evsel->core.system_wide = true;
793 	evsel->no_aux_samples = true;
794 	evsel->immediate = true;
795 
796 	/* Text poke must be collected on all CPUs */
797 	perf_cpu_map__put(evsel->core.own_cpus);
798 	evsel->core.own_cpus = perf_cpu_map__new(NULL);
799 	perf_cpu_map__put(evsel->core.cpus);
800 	evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
801 
802 	evsel__set_sample_bit(evsel, TIME);
803 
804 	return 0;
805 }
806 
807 static bool record__kcore_readable(struct machine *machine)
808 {
809 	char kcore[PATH_MAX];
810 	int fd;
811 
812 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
813 
814 	fd = open(kcore, O_RDONLY);
815 	if (fd < 0)
816 		return false;
817 
818 	close(fd);
819 
820 	return true;
821 }
822 
823 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
824 {
825 	char from_dir[PATH_MAX];
826 	char kcore_dir[PATH_MAX];
827 	int ret;
828 
829 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
830 
831 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
832 	if (ret)
833 		return ret;
834 
835 	return kcore_copy(from_dir, kcore_dir);
836 }
837 
838 static int record__mmap_evlist(struct record *rec,
839 			       struct evlist *evlist)
840 {
841 	struct record_opts *opts = &rec->opts;
842 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
843 				  opts->auxtrace_sample_mode;
844 	char msg[512];
845 
846 	if (opts->affinity != PERF_AFFINITY_SYS)
847 		cpu__setup_cpunode_map();
848 
849 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
850 				 opts->auxtrace_mmap_pages,
851 				 auxtrace_overwrite,
852 				 opts->nr_cblocks, opts->affinity,
853 				 opts->mmap_flush, opts->comp_level) < 0) {
854 		if (errno == EPERM) {
855 			pr_err("Permission error mapping pages.\n"
856 			       "Consider increasing "
857 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
858 			       "or try again with a smaller value of -m/--mmap_pages.\n"
859 			       "(current value: %u,%u)\n",
860 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
861 			return -errno;
862 		} else {
863 			pr_err("failed to mmap with %d (%s)\n", errno,
864 				str_error_r(errno, msg, sizeof(msg)));
865 			if (errno)
866 				return -errno;
867 			else
868 				return -EINVAL;
869 		}
870 	}
871 	return 0;
872 }
873 
874 static int record__mmap(struct record *rec)
875 {
876 	return record__mmap_evlist(rec, rec->evlist);
877 }
878 
879 static int record__open(struct record *rec)
880 {
881 	char msg[BUFSIZ];
882 	struct evsel *pos;
883 	struct evlist *evlist = rec->evlist;
884 	struct perf_session *session = rec->session;
885 	struct record_opts *opts = &rec->opts;
886 	int rc = 0;
887 
888 	/*
889 	 * For initial_delay or system wide, we need to add a dummy event so
890 	 * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
891 	 * event synthesis.
892 	 */
893 	if (opts->initial_delay || target__has_cpu(&opts->target)) {
894 		pos = perf_evlist__get_tracking_event(evlist);
895 		if (!evsel__is_dummy_event(pos)) {
896 			/* Set up dummy event. */
897 			if (evlist__add_dummy(evlist))
898 				return -ENOMEM;
899 			pos = evlist__last(evlist);
900 			perf_evlist__set_tracking_event(evlist, pos);
901 		}
902 
903 		/*
904 		 * Enable the dummy event when the process is forked for
905 		 * initial_delay, immediately for system wide.
906 		 */
907 		if (opts->initial_delay && !pos->immediate)
908 			pos->core.attr.enable_on_exec = 1;
909 		else
910 			pos->immediate = 1;
911 	}
912 
913 	perf_evlist__config(evlist, opts, &callchain_param);
914 
915 	evlist__for_each_entry(evlist, pos) {
916 try_again:
917 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
918 			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
919 				if (verbose > 0)
920 					ui__warning("%s\n", msg);
921 				goto try_again;
922 			}
923 			if ((errno == EINVAL || errno == EBADF) &&
924 			    pos->leader != pos &&
925 			    pos->weak_group) {
926 			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
927 				goto try_again;
928 			}
929 			rc = -errno;
930 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
931 			ui__error("%s\n", msg);
932 			goto out;
933 		}
934 
935 		pos->supported = true;
936 	}
937 
938 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
939 		pr_warning(
940 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
941 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
942 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
943 "file is not found in the buildid cache or in the vmlinux path.\n\n"
944 "Samples in kernel modules won't be resolved at all.\n\n"
945 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
946 "even with a suitable vmlinux or kallsyms file.\n\n");
947 	}
948 
949 	if (perf_evlist__apply_filters(evlist, &pos)) {
950 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
951 			pos->filter, evsel__name(pos), errno,
952 			str_error_r(errno, msg, sizeof(msg)));
953 		rc = -1;
954 		goto out;
955 	}
956 
957 	rc = record__mmap(rec);
958 	if (rc)
959 		goto out;
960 
961 	session->evlist = evlist;
962 	perf_session__set_id_hdr_size(session);
963 out:
964 	return rc;
965 }
966 
967 static int process_sample_event(struct perf_tool *tool,
968 				union perf_event *event,
969 				struct perf_sample *sample,
970 				struct evsel *evsel,
971 				struct machine *machine)
972 {
973 	struct record *rec = container_of(tool, struct record, tool);
974 
975 	if (rec->evlist->first_sample_time == 0)
976 		rec->evlist->first_sample_time = sample->time;
977 
978 	rec->evlist->last_sample_time = sample->time;
979 
980 	if (rec->buildid_all)
981 		return 0;
982 
983 	rec->samples++;
984 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
985 }
986 
987 static int process_buildids(struct record *rec)
988 {
989 	struct perf_session *session = rec->session;
990 
991 	if (perf_data__size(&rec->data) == 0)
992 		return 0;
993 
994 	/*
995 	 * During this process, it'll load kernel map and replace the
996 	 * dso->long_name to a real pathname it found.  In this case
997 	 * we prefer the vmlinux path like
998 	 *   /lib/modules/3.16.4/build/vmlinux
999 	 *
1000 	 * rather than build-id path (in debug directory).
1001 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1002 	 */
1003 	symbol_conf.ignore_vmlinux_buildid = true;
1004 
1005 	/*
1006 	 * If --buildid-all is given, it marks all DSO regardless of hits,
1007 	 * so no need to process samples. But if timestamp_boundary is enabled,
1008 	 * it still needs to walk on all samples to get the timestamps of
1009 	 * first/last samples.
1010 	 */
1011 	if (rec->buildid_all && !rec->timestamp_boundary)
1012 		rec->tool.sample = NULL;
1013 
1014 	return perf_session__process_events(session);
1015 }
1016 
1017 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1018 {
1019 	int err;
1020 	struct perf_tool *tool = data;
1021 	/*
1022 	 *As for guest kernel when processing subcommand record&report,
1023 	 *we arrange module mmap prior to guest kernel mmap and trigger
1024 	 *a preload dso because default guest module symbols are loaded
1025 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1026 	 *method is used to avoid symbol missing when the first addr is
1027 	 *in module instead of in guest kernel.
1028 	 */
1029 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1030 					     machine);
1031 	if (err < 0)
1032 		pr_err("Couldn't record guest kernel [%d]'s reference"
1033 		       " relocation symbol.\n", machine->pid);
1034 
1035 	/*
1036 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1037 	 * have no _text sometimes.
1038 	 */
1039 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1040 						 machine);
1041 	if (err < 0)
1042 		pr_err("Couldn't record guest kernel [%d]'s reference"
1043 		       " relocation symbol.\n", machine->pid);
1044 }
1045 
1046 static struct perf_event_header finished_round_event = {
1047 	.size = sizeof(struct perf_event_header),
1048 	.type = PERF_RECORD_FINISHED_ROUND,
1049 };
1050 
1051 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1052 {
1053 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1054 	    !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
1055 			  rec->affinity_mask.nbits)) {
1056 		bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
1057 		bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
1058 			  map->affinity_mask.bits, rec->affinity_mask.nbits);
1059 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
1060 				  (cpu_set_t *)rec->affinity_mask.bits);
1061 		if (verbose == 2)
1062 			mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
1063 	}
1064 }
1065 
1066 static size_t process_comp_header(void *record, size_t increment)
1067 {
1068 	struct perf_record_compressed *event = record;
1069 	size_t size = sizeof(*event);
1070 
1071 	if (increment) {
1072 		event->header.size += increment;
1073 		return increment;
1074 	}
1075 
1076 	event->header.type = PERF_RECORD_COMPRESSED;
1077 	event->header.size = size;
1078 
1079 	return size;
1080 }
1081 
1082 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1083 			    void *src, size_t src_size)
1084 {
1085 	size_t compressed;
1086 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1087 
1088 	compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1089 						     max_record_size, process_comp_header);
1090 
1091 	session->bytes_transferred += src_size;
1092 	session->bytes_compressed  += compressed;
1093 
1094 	return compressed;
1095 }
1096 
1097 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1098 				    bool overwrite, bool synch)
1099 {
1100 	u64 bytes_written = rec->bytes_written;
1101 	int i;
1102 	int rc = 0;
1103 	struct mmap *maps;
1104 	int trace_fd = rec->data.file.fd;
1105 	off_t off = 0;
1106 
1107 	if (!evlist)
1108 		return 0;
1109 
1110 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1111 	if (!maps)
1112 		return 0;
1113 
1114 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1115 		return 0;
1116 
1117 	if (record__aio_enabled(rec))
1118 		off = record__aio_get_pos(trace_fd);
1119 
1120 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
1121 		u64 flush = 0;
1122 		struct mmap *map = &maps[i];
1123 
1124 		if (map->core.base) {
1125 			record__adjust_affinity(rec, map);
1126 			if (synch) {
1127 				flush = map->core.flush;
1128 				map->core.flush = 1;
1129 			}
1130 			if (!record__aio_enabled(rec)) {
1131 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1132 					if (synch)
1133 						map->core.flush = flush;
1134 					rc = -1;
1135 					goto out;
1136 				}
1137 			} else {
1138 				if (record__aio_push(rec, map, &off) < 0) {
1139 					record__aio_set_pos(trace_fd, off);
1140 					if (synch)
1141 						map->core.flush = flush;
1142 					rc = -1;
1143 					goto out;
1144 				}
1145 			}
1146 			if (synch)
1147 				map->core.flush = flush;
1148 		}
1149 
1150 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1151 		    !rec->opts.auxtrace_sample_mode &&
1152 		    record__auxtrace_mmap_read(rec, map) != 0) {
1153 			rc = -1;
1154 			goto out;
1155 		}
1156 	}
1157 
1158 	if (record__aio_enabled(rec))
1159 		record__aio_set_pos(trace_fd, off);
1160 
1161 	/*
1162 	 * Mark the round finished in case we wrote
1163 	 * at least one event.
1164 	 */
1165 	if (bytes_written != rec->bytes_written)
1166 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1167 
1168 	if (overwrite)
1169 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1170 out:
1171 	return rc;
1172 }
1173 
1174 static int record__mmap_read_all(struct record *rec, bool synch)
1175 {
1176 	int err;
1177 
1178 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1179 	if (err)
1180 		return err;
1181 
1182 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1183 }
1184 
1185 static void record__init_features(struct record *rec)
1186 {
1187 	struct perf_session *session = rec->session;
1188 	int feat;
1189 
1190 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1191 		perf_header__set_feat(&session->header, feat);
1192 
1193 	if (rec->no_buildid)
1194 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1195 
1196 	if (!have_tracepoints(&rec->evlist->core.entries))
1197 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1198 
1199 	if (!rec->opts.branch_stack)
1200 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1201 
1202 	if (!rec->opts.full_auxtrace)
1203 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1204 
1205 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1206 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1207 
1208 	if (!rec->opts.use_clockid)
1209 		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1210 
1211 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1212 	if (!record__comp_enabled(rec))
1213 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1214 
1215 	perf_header__clear_feat(&session->header, HEADER_STAT);
1216 }
1217 
1218 static void
1219 record__finish_output(struct record *rec)
1220 {
1221 	struct perf_data *data = &rec->data;
1222 	int fd = perf_data__fd(data);
1223 
1224 	if (data->is_pipe)
1225 		return;
1226 
1227 	rec->session->header.data_size += rec->bytes_written;
1228 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1229 
1230 	if (!rec->no_buildid) {
1231 		process_buildids(rec);
1232 
1233 		if (rec->buildid_all)
1234 			dsos__hit_all(rec->session);
1235 	}
1236 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1237 
1238 	return;
1239 }
1240 
1241 static int record__synthesize_workload(struct record *rec, bool tail)
1242 {
1243 	int err;
1244 	struct perf_thread_map *thread_map;
1245 
1246 	if (rec->opts.tail_synthesize != tail)
1247 		return 0;
1248 
1249 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1250 	if (thread_map == NULL)
1251 		return -1;
1252 
1253 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1254 						 process_synthesized_event,
1255 						 &rec->session->machines.host,
1256 						 rec->opts.sample_address);
1257 	perf_thread_map__put(thread_map);
1258 	return err;
1259 }
1260 
1261 static int record__synthesize(struct record *rec, bool tail);
1262 
1263 static int
1264 record__switch_output(struct record *rec, bool at_exit)
1265 {
1266 	struct perf_data *data = &rec->data;
1267 	int fd, err;
1268 	char *new_filename;
1269 
1270 	/* Same Size:      "2015122520103046"*/
1271 	char timestamp[] = "InvalidTimestamp";
1272 
1273 	record__aio_mmap_read_sync(rec);
1274 
1275 	record__synthesize(rec, true);
1276 	if (target__none(&rec->opts.target))
1277 		record__synthesize_workload(rec, true);
1278 
1279 	rec->samples = 0;
1280 	record__finish_output(rec);
1281 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1282 	if (err) {
1283 		pr_err("Failed to get current timestamp\n");
1284 		return -EINVAL;
1285 	}
1286 
1287 	fd = perf_data__switch(data, timestamp,
1288 				    rec->session->header.data_offset,
1289 				    at_exit, &new_filename);
1290 	if (fd >= 0 && !at_exit) {
1291 		rec->bytes_written = 0;
1292 		rec->session->header.data_size = 0;
1293 	}
1294 
1295 	if (!quiet)
1296 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1297 			data->path, timestamp);
1298 
1299 	if (rec->switch_output.num_files) {
1300 		int n = rec->switch_output.cur_file + 1;
1301 
1302 		if (n >= rec->switch_output.num_files)
1303 			n = 0;
1304 		rec->switch_output.cur_file = n;
1305 		if (rec->switch_output.filenames[n]) {
1306 			remove(rec->switch_output.filenames[n]);
1307 			zfree(&rec->switch_output.filenames[n]);
1308 		}
1309 		rec->switch_output.filenames[n] = new_filename;
1310 	} else {
1311 		free(new_filename);
1312 	}
1313 
1314 	/* Output tracking events */
1315 	if (!at_exit) {
1316 		record__synthesize(rec, false);
1317 
1318 		/*
1319 		 * In 'perf record --switch-output' without -a,
1320 		 * record__synthesize() in record__switch_output() won't
1321 		 * generate tracking events because there's no thread_map
1322 		 * in evlist. Which causes newly created perf.data doesn't
1323 		 * contain map and comm information.
1324 		 * Create a fake thread_map and directly call
1325 		 * perf_event__synthesize_thread_map() for those events.
1326 		 */
1327 		if (target__none(&rec->opts.target))
1328 			record__synthesize_workload(rec, false);
1329 	}
1330 	return fd;
1331 }
1332 
1333 static volatile int workload_exec_errno;
1334 
1335 /*
1336  * perf_evlist__prepare_workload will send a SIGUSR1
1337  * if the fork fails, since we asked by setting its
1338  * want_signal to true.
1339  */
1340 static void workload_exec_failed_signal(int signo __maybe_unused,
1341 					siginfo_t *info,
1342 					void *ucontext __maybe_unused)
1343 {
1344 	workload_exec_errno = info->si_value.sival_int;
1345 	done = 1;
1346 	child_finished = 1;
1347 }
1348 
1349 static void snapshot_sig_handler(int sig);
1350 static void alarm_sig_handler(int sig);
1351 
1352 static const struct perf_event_mmap_page *
1353 perf_evlist__pick_pc(struct evlist *evlist)
1354 {
1355 	if (evlist) {
1356 		if (evlist->mmap && evlist->mmap[0].core.base)
1357 			return evlist->mmap[0].core.base;
1358 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1359 			return evlist->overwrite_mmap[0].core.base;
1360 	}
1361 	return NULL;
1362 }
1363 
1364 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1365 {
1366 	const struct perf_event_mmap_page *pc;
1367 
1368 	pc = perf_evlist__pick_pc(rec->evlist);
1369 	if (pc)
1370 		return pc;
1371 	return NULL;
1372 }
1373 
1374 static int record__synthesize(struct record *rec, bool tail)
1375 {
1376 	struct perf_session *session = rec->session;
1377 	struct machine *machine = &session->machines.host;
1378 	struct perf_data *data = &rec->data;
1379 	struct record_opts *opts = &rec->opts;
1380 	struct perf_tool *tool = &rec->tool;
1381 	int fd = perf_data__fd(data);
1382 	int err = 0;
1383 	event_op f = process_synthesized_event;
1384 
1385 	if (rec->opts.tail_synthesize != tail)
1386 		return 0;
1387 
1388 	if (data->is_pipe) {
1389 		/*
1390 		 * We need to synthesize events first, because some
1391 		 * features works on top of them (on report side).
1392 		 */
1393 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1394 						   process_synthesized_event);
1395 		if (err < 0) {
1396 			pr_err("Couldn't synthesize attrs.\n");
1397 			goto out;
1398 		}
1399 
1400 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1401 						      process_synthesized_event);
1402 		if (err < 0) {
1403 			pr_err("Couldn't synthesize features.\n");
1404 			return err;
1405 		}
1406 
1407 		if (have_tracepoints(&rec->evlist->core.entries)) {
1408 			/*
1409 			 * FIXME err <= 0 here actually means that
1410 			 * there were no tracepoints so its not really
1411 			 * an error, just that we don't need to
1412 			 * synthesize anything.  We really have to
1413 			 * return this more properly and also
1414 			 * propagate errors that now are calling die()
1415 			 */
1416 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1417 								  process_synthesized_event);
1418 			if (err <= 0) {
1419 				pr_err("Couldn't record tracing data.\n");
1420 				goto out;
1421 			}
1422 			rec->bytes_written += err;
1423 		}
1424 	}
1425 
1426 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1427 					  process_synthesized_event, machine);
1428 	if (err)
1429 		goto out;
1430 
1431 	/* Synthesize id_index before auxtrace_info */
1432 	if (rec->opts.auxtrace_sample_mode) {
1433 		err = perf_event__synthesize_id_index(tool,
1434 						      process_synthesized_event,
1435 						      session->evlist, machine);
1436 		if (err)
1437 			goto out;
1438 	}
1439 
1440 	if (rec->opts.full_auxtrace) {
1441 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1442 					session, process_synthesized_event);
1443 		if (err)
1444 			goto out;
1445 	}
1446 
1447 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1448 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1449 							 machine);
1450 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1451 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1452 				   "Check /proc/kallsyms permission or run as root.\n");
1453 
1454 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1455 						     machine);
1456 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1457 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1458 				   "Check /proc/modules permission or run as root.\n");
1459 	}
1460 
1461 	if (perf_guest) {
1462 		machines__process_guests(&session->machines,
1463 					 perf_event__synthesize_guest_os, tool);
1464 	}
1465 
1466 	err = perf_event__synthesize_extra_attr(&rec->tool,
1467 						rec->evlist,
1468 						process_synthesized_event,
1469 						data->is_pipe);
1470 	if (err)
1471 		goto out;
1472 
1473 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1474 						 process_synthesized_event,
1475 						NULL);
1476 	if (err < 0) {
1477 		pr_err("Couldn't synthesize thread map.\n");
1478 		return err;
1479 	}
1480 
1481 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1482 					     process_synthesized_event, NULL);
1483 	if (err < 0) {
1484 		pr_err("Couldn't synthesize cpu map.\n");
1485 		return err;
1486 	}
1487 
1488 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1489 						machine, opts);
1490 	if (err < 0)
1491 		pr_warning("Couldn't synthesize bpf events.\n");
1492 
1493 	err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1494 					     machine);
1495 	if (err < 0)
1496 		pr_warning("Couldn't synthesize cgroup events.\n");
1497 
1498 	if (rec->opts.nr_threads_synthesize > 1) {
1499 		perf_set_multithreaded();
1500 		f = process_locked_synthesized_event;
1501 	}
1502 
1503 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1504 					    f, opts->sample_address,
1505 					    rec->opts.nr_threads_synthesize);
1506 
1507 	if (rec->opts.nr_threads_synthesize > 1)
1508 		perf_set_singlethreaded();
1509 
1510 out:
1511 	return err;
1512 }
1513 
1514 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1515 {
1516 	struct record *rec = data;
1517 	pthread_kill(rec->thread_id, SIGUSR2);
1518 	return 0;
1519 }
1520 
1521 static int record__setup_sb_evlist(struct record *rec)
1522 {
1523 	struct record_opts *opts = &rec->opts;
1524 
1525 	if (rec->sb_evlist != NULL) {
1526 		/*
1527 		 * We get here if --switch-output-event populated the
1528 		 * sb_evlist, so associate a callback that will send a SIGUSR2
1529 		 * to the main thread.
1530 		 */
1531 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1532 		rec->thread_id = pthread_self();
1533 	}
1534 #ifdef HAVE_LIBBPF_SUPPORT
1535 	if (!opts->no_bpf_event) {
1536 		if (rec->sb_evlist == NULL) {
1537 			rec->sb_evlist = evlist__new();
1538 
1539 			if (rec->sb_evlist == NULL) {
1540 				pr_err("Couldn't create side band evlist.\n.");
1541 				return -1;
1542 			}
1543 		}
1544 
1545 		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1546 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1547 			return -1;
1548 		}
1549 	}
1550 #endif
1551 	if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1552 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1553 		opts->no_bpf_event = true;
1554 	}
1555 
1556 	return 0;
1557 }
1558 
1559 static int record__init_clock(struct record *rec)
1560 {
1561 	struct perf_session *session = rec->session;
1562 	struct timespec ref_clockid;
1563 	struct timeval ref_tod;
1564 	u64 ref;
1565 
1566 	if (!rec->opts.use_clockid)
1567 		return 0;
1568 
1569 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1570 		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1571 
1572 	session->header.env.clock.clockid = rec->opts.clockid;
1573 
1574 	if (gettimeofday(&ref_tod, NULL) != 0) {
1575 		pr_err("gettimeofday failed, cannot set reference time.\n");
1576 		return -1;
1577 	}
1578 
1579 	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1580 		pr_err("clock_gettime failed, cannot set reference time.\n");
1581 		return -1;
1582 	}
1583 
1584 	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1585 	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1586 
1587 	session->header.env.clock.tod_ns = ref;
1588 
1589 	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
1590 	      (u64) ref_clockid.tv_nsec;
1591 
1592 	session->header.env.clock.clockid_ns = ref;
1593 	return 0;
1594 }
1595 
1596 static int __cmd_record(struct record *rec, int argc, const char **argv)
1597 {
1598 	int err;
1599 	int status = 0;
1600 	unsigned long waking = 0;
1601 	const bool forks = argc > 0;
1602 	struct perf_tool *tool = &rec->tool;
1603 	struct record_opts *opts = &rec->opts;
1604 	struct perf_data *data = &rec->data;
1605 	struct perf_session *session;
1606 	bool disabled = false, draining = false;
1607 	int fd;
1608 	float ratio = 0;
1609 	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
1610 
1611 	atexit(record__sig_exit);
1612 	signal(SIGCHLD, sig_handler);
1613 	signal(SIGINT, sig_handler);
1614 	signal(SIGTERM, sig_handler);
1615 	signal(SIGSEGV, sigsegv_handler);
1616 
1617 	if (rec->opts.record_namespaces)
1618 		tool->namespace_events = true;
1619 
1620 	if (rec->opts.record_cgroup) {
1621 #ifdef HAVE_FILE_HANDLE
1622 		tool->cgroup_events = true;
1623 #else
1624 		pr_err("cgroup tracking is not supported\n");
1625 		return -1;
1626 #endif
1627 	}
1628 
1629 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1630 		signal(SIGUSR2, snapshot_sig_handler);
1631 		if (rec->opts.auxtrace_snapshot_mode)
1632 			trigger_on(&auxtrace_snapshot_trigger);
1633 		if (rec->switch_output.enabled)
1634 			trigger_on(&switch_output_trigger);
1635 	} else {
1636 		signal(SIGUSR2, SIG_IGN);
1637 	}
1638 
1639 	session = perf_session__new(data, false, tool);
1640 	if (IS_ERR(session)) {
1641 		pr_err("Perf session creation failed.\n");
1642 		return PTR_ERR(session);
1643 	}
1644 
1645 	fd = perf_data__fd(data);
1646 	rec->session = session;
1647 
1648 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1649 		pr_err("Compression initialization failed.\n");
1650 		return -1;
1651 	}
1652 #ifdef HAVE_EVENTFD_SUPPORT
1653 	done_fd = eventfd(0, EFD_NONBLOCK);
1654 	if (done_fd < 0) {
1655 		pr_err("Failed to create wakeup eventfd, error: %m\n");
1656 		status = -1;
1657 		goto out_delete_session;
1658 	}
1659 	err = evlist__add_pollfd(rec->evlist, done_fd);
1660 	if (err < 0) {
1661 		pr_err("Failed to add wakeup eventfd to poll list\n");
1662 		status = err;
1663 		goto out_delete_session;
1664 	}
1665 #endif // HAVE_EVENTFD_SUPPORT
1666 
1667 	session->header.env.comp_type  = PERF_COMP_ZSTD;
1668 	session->header.env.comp_level = rec->opts.comp_level;
1669 
1670 	if (rec->opts.kcore &&
1671 	    !record__kcore_readable(&session->machines.host)) {
1672 		pr_err("ERROR: kcore is not readable.\n");
1673 		return -1;
1674 	}
1675 
1676 	if (record__init_clock(rec))
1677 		return -1;
1678 
1679 	record__init_features(rec);
1680 
1681 	if (forks) {
1682 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1683 						    argv, data->is_pipe,
1684 						    workload_exec_failed_signal);
1685 		if (err < 0) {
1686 			pr_err("Couldn't run the workload!\n");
1687 			status = err;
1688 			goto out_delete_session;
1689 		}
1690 	}
1691 
1692 	/*
1693 	 * If we have just single event and are sending data
1694 	 * through pipe, we need to force the ids allocation,
1695 	 * because we synthesize event name through the pipe
1696 	 * and need the id for that.
1697 	 */
1698 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1699 		rec->opts.sample_id = true;
1700 
1701 	if (record__open(rec) != 0) {
1702 		err = -1;
1703 		goto out_child;
1704 	}
1705 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1706 
1707 	if (rec->opts.kcore) {
1708 		err = record__kcore_copy(&session->machines.host, data);
1709 		if (err) {
1710 			pr_err("ERROR: Failed to copy kcore\n");
1711 			goto out_child;
1712 		}
1713 	}
1714 
1715 	err = bpf__apply_obj_config();
1716 	if (err) {
1717 		char errbuf[BUFSIZ];
1718 
1719 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1720 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1721 			 errbuf);
1722 		goto out_child;
1723 	}
1724 
1725 	/*
1726 	 * Normally perf_session__new would do this, but it doesn't have the
1727 	 * evlist.
1728 	 */
1729 	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
1730 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1731 		rec->tool.ordered_events = false;
1732 	}
1733 
1734 	if (!rec->evlist->nr_groups)
1735 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1736 
1737 	if (data->is_pipe) {
1738 		err = perf_header__write_pipe(fd);
1739 		if (err < 0)
1740 			goto out_child;
1741 	} else {
1742 		err = perf_session__write_header(session, rec->evlist, fd, false);
1743 		if (err < 0)
1744 			goto out_child;
1745 	}
1746 
1747 	err = -1;
1748 	if (!rec->no_buildid
1749 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1750 		pr_err("Couldn't generate buildids. "
1751 		       "Use --no-buildid to profile anyway.\n");
1752 		goto out_child;
1753 	}
1754 
1755 	err = record__setup_sb_evlist(rec);
1756 	if (err)
1757 		goto out_child;
1758 
1759 	err = record__synthesize(rec, false);
1760 	if (err < 0)
1761 		goto out_child;
1762 
1763 	if (rec->realtime_prio) {
1764 		struct sched_param param;
1765 
1766 		param.sched_priority = rec->realtime_prio;
1767 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1768 			pr_err("Could not set realtime priority.\n");
1769 			err = -1;
1770 			goto out_child;
1771 		}
1772 	}
1773 
1774 	/*
1775 	 * When perf is starting the traced process, all the events
1776 	 * (apart from group members) have enable_on_exec=1 set,
1777 	 * so don't spoil it by prematurely enabling them.
1778 	 */
1779 	if (!target__none(&opts->target) && !opts->initial_delay)
1780 		evlist__enable(rec->evlist);
1781 
1782 	/*
1783 	 * Let the child rip
1784 	 */
1785 	if (forks) {
1786 		struct machine *machine = &session->machines.host;
1787 		union perf_event *event;
1788 		pid_t tgid;
1789 
1790 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1791 		if (event == NULL) {
1792 			err = -ENOMEM;
1793 			goto out_child;
1794 		}
1795 
1796 		/*
1797 		 * Some H/W events are generated before COMM event
1798 		 * which is emitted during exec(), so perf script
1799 		 * cannot see a correct process name for those events.
1800 		 * Synthesize COMM event to prevent it.
1801 		 */
1802 		tgid = perf_event__synthesize_comm(tool, event,
1803 						   rec->evlist->workload.pid,
1804 						   process_synthesized_event,
1805 						   machine);
1806 		free(event);
1807 
1808 		if (tgid == -1)
1809 			goto out_child;
1810 
1811 		event = malloc(sizeof(event->namespaces) +
1812 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1813 			       machine->id_hdr_size);
1814 		if (event == NULL) {
1815 			err = -ENOMEM;
1816 			goto out_child;
1817 		}
1818 
1819 		/*
1820 		 * Synthesize NAMESPACES event for the command specified.
1821 		 */
1822 		perf_event__synthesize_namespaces(tool, event,
1823 						  rec->evlist->workload.pid,
1824 						  tgid, process_synthesized_event,
1825 						  machine);
1826 		free(event);
1827 
1828 		perf_evlist__start_workload(rec->evlist);
1829 	}
1830 
1831 	if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
1832 		goto out_child;
1833 
1834 	if (opts->initial_delay) {
1835 		pr_info(EVLIST_DISABLED_MSG);
1836 		if (opts->initial_delay > 0) {
1837 			usleep(opts->initial_delay * USEC_PER_MSEC);
1838 			evlist__enable(rec->evlist);
1839 			pr_info(EVLIST_ENABLED_MSG);
1840 		}
1841 	}
1842 
1843 	trigger_ready(&auxtrace_snapshot_trigger);
1844 	trigger_ready(&switch_output_trigger);
1845 	perf_hooks__invoke_record_start();
1846 	for (;;) {
1847 		unsigned long long hits = rec->samples;
1848 
1849 		/*
1850 		 * rec->evlist->bkw_mmap_state is possible to be
1851 		 * BKW_MMAP_EMPTY here: when done == true and
1852 		 * hits != rec->samples in previous round.
1853 		 *
1854 		 * perf_evlist__toggle_bkw_mmap ensure we never
1855 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1856 		 */
1857 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1858 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1859 
1860 		if (record__mmap_read_all(rec, false) < 0) {
1861 			trigger_error(&auxtrace_snapshot_trigger);
1862 			trigger_error(&switch_output_trigger);
1863 			err = -1;
1864 			goto out_child;
1865 		}
1866 
1867 		if (auxtrace_record__snapshot_started) {
1868 			auxtrace_record__snapshot_started = 0;
1869 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1870 				record__read_auxtrace_snapshot(rec, false);
1871 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1872 				pr_err("AUX area tracing snapshot failed\n");
1873 				err = -1;
1874 				goto out_child;
1875 			}
1876 		}
1877 
1878 		if (trigger_is_hit(&switch_output_trigger)) {
1879 			/*
1880 			 * If switch_output_trigger is hit, the data in
1881 			 * overwritable ring buffer should have been collected,
1882 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1883 			 *
1884 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1885 			 * record__mmap_read_all() didn't collect data from
1886 			 * overwritable ring buffer. Read again.
1887 			 */
1888 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1889 				continue;
1890 			trigger_ready(&switch_output_trigger);
1891 
1892 			/*
1893 			 * Reenable events in overwrite ring buffer after
1894 			 * record__mmap_read_all(): we should have collected
1895 			 * data from it.
1896 			 */
1897 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1898 
1899 			if (!quiet)
1900 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1901 					waking);
1902 			waking = 0;
1903 			fd = record__switch_output(rec, false);
1904 			if (fd < 0) {
1905 				pr_err("Failed to switch to new file\n");
1906 				trigger_error(&switch_output_trigger);
1907 				err = fd;
1908 				goto out_child;
1909 			}
1910 
1911 			/* re-arm the alarm */
1912 			if (rec->switch_output.time)
1913 				alarm(rec->switch_output.time);
1914 		}
1915 
1916 		if (hits == rec->samples) {
1917 			if (done || draining)
1918 				break;
1919 			err = evlist__poll(rec->evlist, -1);
1920 			/*
1921 			 * Propagate error, only if there's any. Ignore positive
1922 			 * number of returned events and interrupt error.
1923 			 */
1924 			if (err > 0 || (err < 0 && errno == EINTR))
1925 				err = 0;
1926 			waking++;
1927 
1928 			if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1929 				draining = true;
1930 		}
1931 
1932 		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
1933 			switch (cmd) {
1934 			case EVLIST_CTL_CMD_ENABLE:
1935 				pr_info(EVLIST_ENABLED_MSG);
1936 				break;
1937 			case EVLIST_CTL_CMD_DISABLE:
1938 				pr_info(EVLIST_DISABLED_MSG);
1939 				break;
1940 			case EVLIST_CTL_CMD_ACK:
1941 			case EVLIST_CTL_CMD_UNSUPPORTED:
1942 			default:
1943 				break;
1944 			}
1945 		}
1946 
1947 		/*
1948 		 * When perf is starting the traced process, at the end events
1949 		 * die with the process and we wait for that. Thus no need to
1950 		 * disable events in this case.
1951 		 */
1952 		if (done && !disabled && !target__none(&opts->target)) {
1953 			trigger_off(&auxtrace_snapshot_trigger);
1954 			evlist__disable(rec->evlist);
1955 			disabled = true;
1956 		}
1957 	}
1958 
1959 	trigger_off(&auxtrace_snapshot_trigger);
1960 	trigger_off(&switch_output_trigger);
1961 
1962 	if (opts->auxtrace_snapshot_on_exit)
1963 		record__auxtrace_snapshot_exit(rec);
1964 
1965 	if (forks && workload_exec_errno) {
1966 		char msg[STRERR_BUFSIZE];
1967 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1968 		pr_err("Workload failed: %s\n", emsg);
1969 		err = -1;
1970 		goto out_child;
1971 	}
1972 
1973 	if (!quiet)
1974 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1975 
1976 	if (target__none(&rec->opts.target))
1977 		record__synthesize_workload(rec, true);
1978 
1979 out_child:
1980 	evlist__finalize_ctlfd(rec->evlist);
1981 	record__mmap_read_all(rec, true);
1982 	record__aio_mmap_read_sync(rec);
1983 
1984 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1985 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1986 		session->header.env.comp_ratio = ratio + 0.5;
1987 	}
1988 
1989 	if (forks) {
1990 		int exit_status;
1991 
1992 		if (!child_finished)
1993 			kill(rec->evlist->workload.pid, SIGTERM);
1994 
1995 		wait(&exit_status);
1996 
1997 		if (err < 0)
1998 			status = err;
1999 		else if (WIFEXITED(exit_status))
2000 			status = WEXITSTATUS(exit_status);
2001 		else if (WIFSIGNALED(exit_status))
2002 			signr = WTERMSIG(exit_status);
2003 	} else
2004 		status = err;
2005 
2006 	record__synthesize(rec, true);
2007 	/* this will be recalculated during process_buildids() */
2008 	rec->samples = 0;
2009 
2010 	if (!err) {
2011 		if (!rec->timestamp_filename) {
2012 			record__finish_output(rec);
2013 		} else {
2014 			fd = record__switch_output(rec, true);
2015 			if (fd < 0) {
2016 				status = fd;
2017 				goto out_delete_session;
2018 			}
2019 		}
2020 	}
2021 
2022 	perf_hooks__invoke_record_end();
2023 
2024 	if (!err && !quiet) {
2025 		char samples[128];
2026 		const char *postfix = rec->timestamp_filename ?
2027 					".<timestamp>" : "";
2028 
2029 		if (rec->samples && !rec->opts.full_auxtrace)
2030 			scnprintf(samples, sizeof(samples),
2031 				  " (%" PRIu64 " samples)", rec->samples);
2032 		else
2033 			samples[0] = '\0';
2034 
2035 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2036 			perf_data__size(data) / 1024.0 / 1024.0,
2037 			data->path, postfix, samples);
2038 		if (ratio) {
2039 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2040 					rec->session->bytes_transferred / 1024.0 / 1024.0,
2041 					ratio);
2042 		}
2043 		fprintf(stderr, " ]\n");
2044 	}
2045 
2046 out_delete_session:
2047 #ifdef HAVE_EVENTFD_SUPPORT
2048 	if (done_fd >= 0)
2049 		close(done_fd);
2050 #endif
2051 	zstd_fini(&session->zstd_data);
2052 	perf_session__delete(session);
2053 
2054 	if (!opts->no_bpf_event)
2055 		perf_evlist__stop_sb_thread(rec->sb_evlist);
2056 	return status;
2057 }
2058 
2059 static void callchain_debug(struct callchain_param *callchain)
2060 {
2061 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2062 
2063 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2064 
2065 	if (callchain->record_mode == CALLCHAIN_DWARF)
2066 		pr_debug("callchain: stack dump size %d\n",
2067 			 callchain->dump_size);
2068 }
2069 
2070 int record_opts__parse_callchain(struct record_opts *record,
2071 				 struct callchain_param *callchain,
2072 				 const char *arg, bool unset)
2073 {
2074 	int ret;
2075 	callchain->enabled = !unset;
2076 
2077 	/* --no-call-graph */
2078 	if (unset) {
2079 		callchain->record_mode = CALLCHAIN_NONE;
2080 		pr_debug("callchain: disabled\n");
2081 		return 0;
2082 	}
2083 
2084 	ret = parse_callchain_record_opt(arg, callchain);
2085 	if (!ret) {
2086 		/* Enable data address sampling for DWARF unwind. */
2087 		if (callchain->record_mode == CALLCHAIN_DWARF)
2088 			record->sample_address = true;
2089 		callchain_debug(callchain);
2090 	}
2091 
2092 	return ret;
2093 }
2094 
2095 int record_parse_callchain_opt(const struct option *opt,
2096 			       const char *arg,
2097 			       int unset)
2098 {
2099 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2100 }
2101 
2102 int record_callchain_opt(const struct option *opt,
2103 			 const char *arg __maybe_unused,
2104 			 int unset __maybe_unused)
2105 {
2106 	struct callchain_param *callchain = opt->value;
2107 
2108 	callchain->enabled = true;
2109 
2110 	if (callchain->record_mode == CALLCHAIN_NONE)
2111 		callchain->record_mode = CALLCHAIN_FP;
2112 
2113 	callchain_debug(callchain);
2114 	return 0;
2115 }
2116 
2117 static int perf_record_config(const char *var, const char *value, void *cb)
2118 {
2119 	struct record *rec = cb;
2120 
2121 	if (!strcmp(var, "record.build-id")) {
2122 		if (!strcmp(value, "cache"))
2123 			rec->no_buildid_cache = false;
2124 		else if (!strcmp(value, "no-cache"))
2125 			rec->no_buildid_cache = true;
2126 		else if (!strcmp(value, "skip"))
2127 			rec->no_buildid = true;
2128 		else
2129 			return -1;
2130 		return 0;
2131 	}
2132 	if (!strcmp(var, "record.call-graph")) {
2133 		var = "call-graph.record-mode";
2134 		return perf_default_config(var, value, cb);
2135 	}
2136 #ifdef HAVE_AIO_SUPPORT
2137 	if (!strcmp(var, "record.aio")) {
2138 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
2139 		if (!rec->opts.nr_cblocks)
2140 			rec->opts.nr_cblocks = nr_cblocks_default;
2141 	}
2142 #endif
2143 
2144 	return 0;
2145 }
2146 
2147 
2148 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2149 {
2150 	struct record_opts *opts = (struct record_opts *)opt->value;
2151 
2152 	if (unset || !str)
2153 		return 0;
2154 
2155 	if (!strcasecmp(str, "node"))
2156 		opts->affinity = PERF_AFFINITY_NODE;
2157 	else if (!strcasecmp(str, "cpu"))
2158 		opts->affinity = PERF_AFFINITY_CPU;
2159 
2160 	return 0;
2161 }
2162 
2163 static int parse_output_max_size(const struct option *opt,
2164 				 const char *str, int unset)
2165 {
2166 	unsigned long *s = (unsigned long *)opt->value;
2167 	static struct parse_tag tags_size[] = {
2168 		{ .tag  = 'B', .mult = 1       },
2169 		{ .tag  = 'K', .mult = 1 << 10 },
2170 		{ .tag  = 'M', .mult = 1 << 20 },
2171 		{ .tag  = 'G', .mult = 1 << 30 },
2172 		{ .tag  = 0 },
2173 	};
2174 	unsigned long val;
2175 
2176 	if (unset) {
2177 		*s = 0;
2178 		return 0;
2179 	}
2180 
2181 	val = parse_tag_value(str, tags_size);
2182 	if (val != (unsigned long) -1) {
2183 		*s = val;
2184 		return 0;
2185 	}
2186 
2187 	return -1;
2188 }
2189 
2190 static int record__parse_mmap_pages(const struct option *opt,
2191 				    const char *str,
2192 				    int unset __maybe_unused)
2193 {
2194 	struct record_opts *opts = opt->value;
2195 	char *s, *p;
2196 	unsigned int mmap_pages;
2197 	int ret;
2198 
2199 	if (!str)
2200 		return -EINVAL;
2201 
2202 	s = strdup(str);
2203 	if (!s)
2204 		return -ENOMEM;
2205 
2206 	p = strchr(s, ',');
2207 	if (p)
2208 		*p = '\0';
2209 
2210 	if (*s) {
2211 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2212 		if (ret)
2213 			goto out_free;
2214 		opts->mmap_pages = mmap_pages;
2215 	}
2216 
2217 	if (!p) {
2218 		ret = 0;
2219 		goto out_free;
2220 	}
2221 
2222 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2223 	if (ret)
2224 		goto out_free;
2225 
2226 	opts->auxtrace_mmap_pages = mmap_pages;
2227 
2228 out_free:
2229 	free(s);
2230 	return ret;
2231 }
2232 
2233 static int parse_control_option(const struct option *opt,
2234 				const char *str,
2235 				int unset __maybe_unused)
2236 {
2237 	char *comma = NULL, *endptr = NULL;
2238 	struct record_opts *config = (struct record_opts *)opt->value;
2239 
2240 	if (strncmp(str, "fd:", 3))
2241 		return -EINVAL;
2242 
2243 	config->ctl_fd = strtoul(&str[3], &endptr, 0);
2244 	if (endptr == &str[3])
2245 		return -EINVAL;
2246 
2247 	comma = strchr(str, ',');
2248 	if (comma) {
2249 		if (endptr != comma)
2250 			return -EINVAL;
2251 
2252 		config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
2253 		if (endptr == comma + 1 || *endptr != '\0')
2254 			return -EINVAL;
2255 	}
2256 
2257 	return 0;
2258 }
2259 
2260 static void switch_output_size_warn(struct record *rec)
2261 {
2262 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2263 	struct switch_output *s = &rec->switch_output;
2264 
2265 	wakeup_size /= 2;
2266 
2267 	if (s->size < wakeup_size) {
2268 		char buf[100];
2269 
2270 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2271 		pr_warning("WARNING: switch-output data size lower than "
2272 			   "wakeup kernel buffer size (%s) "
2273 			   "expect bigger perf.data sizes\n", buf);
2274 	}
2275 }
2276 
2277 static int switch_output_setup(struct record *rec)
2278 {
2279 	struct switch_output *s = &rec->switch_output;
2280 	static struct parse_tag tags_size[] = {
2281 		{ .tag  = 'B', .mult = 1       },
2282 		{ .tag  = 'K', .mult = 1 << 10 },
2283 		{ .tag  = 'M', .mult = 1 << 20 },
2284 		{ .tag  = 'G', .mult = 1 << 30 },
2285 		{ .tag  = 0 },
2286 	};
2287 	static struct parse_tag tags_time[] = {
2288 		{ .tag  = 's', .mult = 1        },
2289 		{ .tag  = 'm', .mult = 60       },
2290 		{ .tag  = 'h', .mult = 60*60    },
2291 		{ .tag  = 'd', .mult = 60*60*24 },
2292 		{ .tag  = 0 },
2293 	};
2294 	unsigned long val;
2295 
2296 	/*
2297 	 * If we're using --switch-output-events, then we imply its
2298 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2299 	 *  thread to its parent.
2300 	 */
2301 	if (rec->switch_output_event_set)
2302 		goto do_signal;
2303 
2304 	if (!s->set)
2305 		return 0;
2306 
2307 	if (!strcmp(s->str, "signal")) {
2308 do_signal:
2309 		s->signal = true;
2310 		pr_debug("switch-output with SIGUSR2 signal\n");
2311 		goto enabled;
2312 	}
2313 
2314 	val = parse_tag_value(s->str, tags_size);
2315 	if (val != (unsigned long) -1) {
2316 		s->size = val;
2317 		pr_debug("switch-output with %s size threshold\n", s->str);
2318 		goto enabled;
2319 	}
2320 
2321 	val = parse_tag_value(s->str, tags_time);
2322 	if (val != (unsigned long) -1) {
2323 		s->time = val;
2324 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2325 			 s->str, s->time);
2326 		goto enabled;
2327 	}
2328 
2329 	return -1;
2330 
2331 enabled:
2332 	rec->timestamp_filename = true;
2333 	s->enabled              = true;
2334 
2335 	if (s->size && !rec->opts.no_buffering)
2336 		switch_output_size_warn(rec);
2337 
2338 	return 0;
2339 }
2340 
2341 static const char * const __record_usage[] = {
2342 	"perf record [<options>] [<command>]",
2343 	"perf record [<options>] -- <command> [<options>]",
2344 	NULL
2345 };
2346 const char * const *record_usage = __record_usage;
2347 
2348 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2349 				  struct perf_sample *sample, struct machine *machine)
2350 {
2351 	/*
2352 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2353 	 * no need to add them twice.
2354 	 */
2355 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2356 		return 0;
2357 	return perf_event__process_mmap(tool, event, sample, machine);
2358 }
2359 
2360 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2361 				   struct perf_sample *sample, struct machine *machine)
2362 {
2363 	/*
2364 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2365 	 * no need to add them twice.
2366 	 */
2367 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2368 		return 0;
2369 
2370 	return perf_event__process_mmap2(tool, event, sample, machine);
2371 }
2372 
2373 /*
2374  * XXX Ideally would be local to cmd_record() and passed to a record__new
2375  * because we need to have access to it in record__exit, that is called
2376  * after cmd_record() exits, but since record_options need to be accessible to
2377  * builtin-script, leave it here.
2378  *
2379  * At least we don't ouch it in all the other functions here directly.
2380  *
2381  * Just say no to tons of global variables, sigh.
2382  */
2383 static struct record record = {
2384 	.opts = {
2385 		.sample_time	     = true,
2386 		.mmap_pages	     = UINT_MAX,
2387 		.user_freq	     = UINT_MAX,
2388 		.user_interval	     = ULLONG_MAX,
2389 		.freq		     = 4000,
2390 		.target		     = {
2391 			.uses_mmap   = true,
2392 			.default_per_cpu = true,
2393 		},
2394 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
2395 		.nr_threads_synthesize = 1,
2396 		.ctl_fd              = -1,
2397 		.ctl_fd_ack          = -1,
2398 	},
2399 	.tool = {
2400 		.sample		= process_sample_event,
2401 		.fork		= perf_event__process_fork,
2402 		.exit		= perf_event__process_exit,
2403 		.comm		= perf_event__process_comm,
2404 		.namespaces	= perf_event__process_namespaces,
2405 		.mmap		= build_id__process_mmap,
2406 		.mmap2		= build_id__process_mmap2,
2407 		.ordered_events	= true,
2408 	},
2409 };
2410 
2411 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2412 	"\n\t\t\t\tDefault: fp";
2413 
2414 static bool dry_run;
2415 
2416 /*
2417  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2418  * with it and switch to use the library functions in perf_evlist that came
2419  * from builtin-record.c, i.e. use record_opts,
2420  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2421  * using pipes, etc.
2422  */
2423 static struct option __record_options[] = {
2424 	OPT_CALLBACK('e', "event", &record.evlist, "event",
2425 		     "event selector. use 'perf list' to list available events",
2426 		     parse_events_option),
2427 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2428 		     "event filter", parse_filter),
2429 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2430 			   NULL, "don't record events from perf itself",
2431 			   exclude_perf),
2432 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2433 		    "record events on existing process id"),
2434 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2435 		    "record events on existing thread id"),
2436 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
2437 		    "collect data with this RT SCHED_FIFO priority"),
2438 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2439 		    "collect data without buffering"),
2440 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2441 		    "collect raw sample records from all opened counters"),
2442 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2443 			    "system-wide collection from all CPUs"),
2444 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2445 		    "list of cpus to monitor"),
2446 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2447 	OPT_STRING('o', "output", &record.data.path, "file",
2448 		    "output file name"),
2449 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2450 			&record.opts.no_inherit_set,
2451 			"child tasks do not inherit counters"),
2452 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2453 		    "synthesize non-sample events at the end of output"),
2454 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2455 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
2456 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2457 		    "Fail if the specified frequency can't be used"),
2458 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2459 		     "profile at this frequency",
2460 		      record__parse_freq),
2461 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2462 		     "number of mmap data pages and AUX area tracing mmap pages",
2463 		     record__parse_mmap_pages),
2464 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2465 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2466 		     record__mmap_flush_parse),
2467 	OPT_BOOLEAN(0, "group", &record.opts.group,
2468 		    "put the counters into a counter group"),
2469 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2470 			   NULL, "enables call-graph recording" ,
2471 			   &record_callchain_opt),
2472 	OPT_CALLBACK(0, "call-graph", &record.opts,
2473 		     "record_mode[,record_size]", record_callchain_help,
2474 		     &record_parse_callchain_opt),
2475 	OPT_INCR('v', "verbose", &verbose,
2476 		    "be more verbose (show counter open errors, etc)"),
2477 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2478 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2479 		    "per thread counts"),
2480 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2481 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2482 		    "Record the sample physical addresses"),
2483 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2484 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2485 			&record.opts.sample_time_set,
2486 			"Record the sample timestamps"),
2487 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2488 			"Record the sample period"),
2489 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2490 		    "don't sample"),
2491 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2492 			&record.no_buildid_cache_set,
2493 			"do not update the buildid cache"),
2494 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2495 			&record.no_buildid_set,
2496 			"do not collect buildids in perf.data"),
2497 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2498 		     "monitor event in cgroup name only",
2499 		     parse_cgroups),
2500 	OPT_INTEGER('D', "delay", &record.opts.initial_delay,
2501 		  "ms to wait before starting measurement after program start (-1: start with events disabled)"),
2502 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2503 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2504 		   "user to profile"),
2505 
2506 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2507 		     "branch any", "sample any taken branches",
2508 		     parse_branch_stack),
2509 
2510 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2511 		     "branch filter mask", "branch stack filter modes",
2512 		     parse_branch_stack),
2513 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2514 		    "sample by weight (on special events only)"),
2515 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2516 		    "sample transaction flags (special events only)"),
2517 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2518 		    "use per-thread mmaps"),
2519 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2520 		    "sample selected machine registers on interrupt,"
2521 		    " use '-I?' to list register names", parse_intr_regs),
2522 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2523 		    "sample selected machine registers on interrupt,"
2524 		    " use '--user-regs=?' to list register names", parse_user_regs),
2525 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2526 		    "Record running/enabled time of read (:S) events"),
2527 	OPT_CALLBACK('k', "clockid", &record.opts,
2528 	"clockid", "clockid to use for events, see clock_gettime()",
2529 	parse_clockid),
2530 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2531 			  "opts", "AUX area tracing Snapshot Mode", ""),
2532 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2533 			  "opts", "sample AUX area", ""),
2534 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2535 			"per thread proc mmap processing timeout in ms"),
2536 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2537 		    "Record namespaces events"),
2538 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2539 		    "Record cgroup events"),
2540 	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
2541 			&record.opts.record_switch_events_set,
2542 			"Record context switch events"),
2543 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2544 			 "Configure all used events to run in kernel space.",
2545 			 PARSE_OPT_EXCLUSIVE),
2546 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2547 			 "Configure all used events to run in user space.",
2548 			 PARSE_OPT_EXCLUSIVE),
2549 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2550 		    "collect kernel callchains"),
2551 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2552 		    "collect user callchains"),
2553 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2554 		   "clang binary to use for compiling BPF scriptlets"),
2555 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2556 		   "options passed to clang when compiling BPF scriptlets"),
2557 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2558 		   "file", "vmlinux pathname"),
2559 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2560 		    "Record build-id of all DSOs regardless of hits"),
2561 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2562 		    "append timestamp to output filename"),
2563 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2564 		    "Record timestamp boundary (time of first/last samples)"),
2565 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2566 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2567 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2568 			  "signal"),
2569 	OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2570 			 "switch output event selector. use 'perf list' to list available events",
2571 			 parse_events_option_new_evlist),
2572 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2573 		   "Limit number of switch output generated files"),
2574 	OPT_BOOLEAN(0, "dry-run", &dry_run,
2575 		    "Parse options then exit"),
2576 #ifdef HAVE_AIO_SUPPORT
2577 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2578 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2579 		     record__aio_parse),
2580 #endif
2581 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2582 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2583 		     record__parse_affinity),
2584 #ifdef HAVE_ZSTD_SUPPORT
2585 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2586 			    "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2587 			    record__parse_comp_level),
2588 #endif
2589 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
2590 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
2591 	OPT_UINTEGER(0, "num-thread-synthesize",
2592 		     &record.opts.nr_threads_synthesize,
2593 		     "number of threads to run for event synthesis"),
2594 #ifdef HAVE_LIBPFM
2595 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
2596 		"libpfm4 event selector. use 'perf list' to list available events",
2597 		parse_libpfm_events_option),
2598 #endif
2599 	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]",
2600 		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
2601 		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
2602 		      parse_control_option),
2603 	OPT_END()
2604 };
2605 
2606 struct option *record_options = __record_options;
2607 
2608 int cmd_record(int argc, const char **argv)
2609 {
2610 	int err;
2611 	struct record *rec = &record;
2612 	char errbuf[BUFSIZ];
2613 
2614 	setlocale(LC_ALL, "");
2615 
2616 #ifndef HAVE_LIBBPF_SUPPORT
2617 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2618 	set_nobuild('\0', "clang-path", true);
2619 	set_nobuild('\0', "clang-opt", true);
2620 # undef set_nobuild
2621 #endif
2622 
2623 #ifndef HAVE_BPF_PROLOGUE
2624 # if !defined (HAVE_DWARF_SUPPORT)
2625 #  define REASON  "NO_DWARF=1"
2626 # elif !defined (HAVE_LIBBPF_SUPPORT)
2627 #  define REASON  "NO_LIBBPF=1"
2628 # else
2629 #  define REASON  "this architecture doesn't support BPF prologue"
2630 # endif
2631 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2632 	set_nobuild('\0', "vmlinux", true);
2633 # undef set_nobuild
2634 # undef REASON
2635 #endif
2636 
2637 	rec->opts.affinity = PERF_AFFINITY_SYS;
2638 
2639 	rec->evlist = evlist__new();
2640 	if (rec->evlist == NULL)
2641 		return -ENOMEM;
2642 
2643 	err = perf_config(perf_record_config, rec);
2644 	if (err)
2645 		return err;
2646 
2647 	argc = parse_options(argc, argv, record_options, record_usage,
2648 			    PARSE_OPT_STOP_AT_NON_OPTION);
2649 	if (quiet)
2650 		perf_quiet_option();
2651 
2652 	/* Make system wide (-a) the default target. */
2653 	if (!argc && target__none(&rec->opts.target))
2654 		rec->opts.target.system_wide = true;
2655 
2656 	if (nr_cgroups && !rec->opts.target.system_wide) {
2657 		usage_with_options_msg(record_usage, record_options,
2658 			"cgroup monitoring only available in system-wide mode");
2659 
2660 	}
2661 
2662 	if (rec->opts.kcore)
2663 		rec->data.is_dir = true;
2664 
2665 	if (rec->opts.comp_level != 0) {
2666 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2667 		rec->no_buildid = true;
2668 	}
2669 
2670 	if (rec->opts.record_switch_events &&
2671 	    !perf_can_record_switch_events()) {
2672 		ui__error("kernel does not support recording context switch events\n");
2673 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2674 		return -EINVAL;
2675 	}
2676 
2677 	if (switch_output_setup(rec)) {
2678 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2679 		return -EINVAL;
2680 	}
2681 
2682 	if (rec->switch_output.time) {
2683 		signal(SIGALRM, alarm_sig_handler);
2684 		alarm(rec->switch_output.time);
2685 	}
2686 
2687 	if (rec->switch_output.num_files) {
2688 		rec->switch_output.filenames = calloc(sizeof(char *),
2689 						      rec->switch_output.num_files);
2690 		if (!rec->switch_output.filenames)
2691 			return -EINVAL;
2692 	}
2693 
2694 	/*
2695 	 * Allow aliases to facilitate the lookup of symbols for address
2696 	 * filters. Refer to auxtrace_parse_filters().
2697 	 */
2698 	symbol_conf.allow_aliases = true;
2699 
2700 	symbol__init(NULL);
2701 
2702 	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2703 		rec->affinity_mask.nbits = cpu__max_cpu();
2704 		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2705 		if (!rec->affinity_mask.bits) {
2706 			pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2707 			return -ENOMEM;
2708 		}
2709 		pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2710 	}
2711 
2712 	err = record__auxtrace_init(rec);
2713 	if (err)
2714 		goto out;
2715 
2716 	if (dry_run)
2717 		goto out;
2718 
2719 	err = bpf__setup_stdout(rec->evlist);
2720 	if (err) {
2721 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2722 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2723 			 errbuf);
2724 		goto out;
2725 	}
2726 
2727 	err = -ENOMEM;
2728 
2729 	if (rec->no_buildid_cache || rec->no_buildid) {
2730 		disable_buildid_cache();
2731 	} else if (rec->switch_output.enabled) {
2732 		/*
2733 		 * In 'perf record --switch-output', disable buildid
2734 		 * generation by default to reduce data file switching
2735 		 * overhead. Still generate buildid if they are required
2736 		 * explicitly using
2737 		 *
2738 		 *  perf record --switch-output --no-no-buildid \
2739 		 *              --no-no-buildid-cache
2740 		 *
2741 		 * Following code equals to:
2742 		 *
2743 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2744 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2745 		 *         disable_buildid_cache();
2746 		 */
2747 		bool disable = true;
2748 
2749 		if (rec->no_buildid_set && !rec->no_buildid)
2750 			disable = false;
2751 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2752 			disable = false;
2753 		if (disable) {
2754 			rec->no_buildid = true;
2755 			rec->no_buildid_cache = true;
2756 			disable_buildid_cache();
2757 		}
2758 	}
2759 
2760 	if (record.opts.overwrite)
2761 		record.opts.tail_synthesize = true;
2762 
2763 	if (rec->evlist->core.nr_entries == 0 &&
2764 	    __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2765 		pr_err("Not enough memory for event selector list\n");
2766 		goto out;
2767 	}
2768 
2769 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2770 		rec->opts.no_inherit = true;
2771 
2772 	err = target__validate(&rec->opts.target);
2773 	if (err) {
2774 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2775 		ui__warning("%s\n", errbuf);
2776 	}
2777 
2778 	err = target__parse_uid(&rec->opts.target);
2779 	if (err) {
2780 		int saved_errno = errno;
2781 
2782 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2783 		ui__error("%s", errbuf);
2784 
2785 		err = -saved_errno;
2786 		goto out;
2787 	}
2788 
2789 	/* Enable ignoring missing threads when -u/-p option is defined. */
2790 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2791 
2792 	err = -ENOMEM;
2793 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2794 		usage_with_options(record_usage, record_options);
2795 
2796 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2797 	if (err)
2798 		goto out;
2799 
2800 	/*
2801 	 * We take all buildids when the file contains
2802 	 * AUX area tracing data because we do not decode the
2803 	 * trace because it would take too long.
2804 	 */
2805 	if (rec->opts.full_auxtrace)
2806 		rec->buildid_all = true;
2807 
2808 	if (rec->opts.text_poke) {
2809 		err = record__config_text_poke(rec->evlist);
2810 		if (err) {
2811 			pr_err("record__config_text_poke failed, error %d\n", err);
2812 			goto out;
2813 		}
2814 	}
2815 
2816 	if (record_opts__config(&rec->opts)) {
2817 		err = -EINVAL;
2818 		goto out;
2819 	}
2820 
2821 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2822 		rec->opts.nr_cblocks = nr_cblocks_max;
2823 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2824 
2825 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2826 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2827 
2828 	if (rec->opts.comp_level > comp_level_max)
2829 		rec->opts.comp_level = comp_level_max;
2830 	pr_debug("comp level: %d\n", rec->opts.comp_level);
2831 
2832 	err = __cmd_record(&record, argc, argv);
2833 out:
2834 	bitmap_free(rec->affinity_mask.bits);
2835 	evlist__delete(rec->evlist);
2836 	symbol__exit();
2837 	auxtrace_record__free(rec->itr);
2838 	return err;
2839 }
2840 
2841 static void snapshot_sig_handler(int sig __maybe_unused)
2842 {
2843 	struct record *rec = &record;
2844 
2845 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2846 		trigger_hit(&auxtrace_snapshot_trigger);
2847 		auxtrace_record__snapshot_started = 1;
2848 		if (auxtrace_record__snapshot_start(record.itr))
2849 			trigger_error(&auxtrace_snapshot_trigger);
2850 	}
2851 
2852 	if (switch_output_signal(rec))
2853 		trigger_hit(&switch_output_trigger);
2854 }
2855 
2856 static void alarm_sig_handler(int sig __maybe_unused)
2857 {
2858 	struct record *rec = &record;
2859 
2860 	if (switch_output_time(rec))
2861 		trigger_hit(&switch_output_trigger);
2862 }
2863