xref: /openbmc/linux/tools/perf/builtin-record.c (revision 6e6c61d3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <locale.h>
49 #include <poll.h>
50 #include <unistd.h>
51 #include <sched.h>
52 #include <signal.h>
53 #include <sys/mman.h>
54 #include <sys/wait.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data	data;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	bool			timestamp_boundary;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
110 			 void *bf, size_t size)
111 {
112 	struct perf_data_file *file = &rec->session->data->file;
113 
114 	if (perf_data_file__write(file, bf, size) < 0) {
115 		pr_err("failed to write perf data, error: %m\n");
116 		return -1;
117 	}
118 
119 	rec->bytes_written += size;
120 
121 	if (switch_output_size(rec))
122 		trigger_hit(&switch_output_trigger);
123 
124 	return 0;
125 }
126 
127 #ifdef HAVE_AIO_SUPPORT
128 static int record__aio_write(struct aiocb *cblock, int trace_fd,
129 		void *buf, size_t size, off_t off)
130 {
131 	int rc;
132 
133 	cblock->aio_fildes = trace_fd;
134 	cblock->aio_buf    = buf;
135 	cblock->aio_nbytes = size;
136 	cblock->aio_offset = off;
137 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
138 
139 	do {
140 		rc = aio_write(cblock);
141 		if (rc == 0) {
142 			break;
143 		} else if (errno != EAGAIN) {
144 			cblock->aio_fildes = -1;
145 			pr_err("failed to queue perf data, error: %m\n");
146 			break;
147 		}
148 	} while (1);
149 
150 	return rc;
151 }
152 
153 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
154 {
155 	void *rem_buf;
156 	off_t rem_off;
157 	size_t rem_size;
158 	int rc, aio_errno;
159 	ssize_t aio_ret, written;
160 
161 	aio_errno = aio_error(cblock);
162 	if (aio_errno == EINPROGRESS)
163 		return 0;
164 
165 	written = aio_ret = aio_return(cblock);
166 	if (aio_ret < 0) {
167 		if (aio_errno != EINTR)
168 			pr_err("failed to write perf data, error: %m\n");
169 		written = 0;
170 	}
171 
172 	rem_size = cblock->aio_nbytes - written;
173 
174 	if (rem_size == 0) {
175 		cblock->aio_fildes = -1;
176 		/*
177 		 * md->refcount is incremented in perf_mmap__push() for
178 		 * every enqueued aio write request so decrement it because
179 		 * the request is now complete.
180 		 */
181 		perf_mmap__put(md);
182 		rc = 1;
183 	} else {
184 		/*
185 		 * aio write request may require restart with the
186 		 * reminder if the kernel didn't write whole
187 		 * chunk at once.
188 		 */
189 		rem_off = cblock->aio_offset + written;
190 		rem_buf = (void *)(cblock->aio_buf + written);
191 		record__aio_write(cblock, cblock->aio_fildes,
192 				rem_buf, rem_size, rem_off);
193 		rc = 0;
194 	}
195 
196 	return rc;
197 }
198 
199 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
200 {
201 	struct aiocb **aiocb = md->aio.aiocb;
202 	struct aiocb *cblocks = md->aio.cblocks;
203 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
204 	int i, do_suspend;
205 
206 	do {
207 		do_suspend = 0;
208 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
209 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
210 				if (sync_all)
211 					aiocb[i] = NULL;
212 				else
213 					return i;
214 			} else {
215 				/*
216 				 * Started aio write is not complete yet
217 				 * so it has to be waited before the
218 				 * next allocation.
219 				 */
220 				aiocb[i] = &cblocks[i];
221 				do_suspend = 1;
222 			}
223 		}
224 		if (!do_suspend)
225 			return -1;
226 
227 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
228 			if (!(errno == EAGAIN || errno == EINTR))
229 				pr_err("failed to sync perf data, error: %m\n");
230 		}
231 	} while (1);
232 }
233 
234 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
235 {
236 	struct record *rec = to;
237 	int ret, trace_fd = rec->session->data->file.fd;
238 
239 	rec->samples++;
240 
241 	ret = record__aio_write(cblock, trace_fd, bf, size, off);
242 	if (!ret) {
243 		rec->bytes_written += size;
244 		if (switch_output_size(rec))
245 			trigger_hit(&switch_output_trigger);
246 	}
247 
248 	return ret;
249 }
250 
251 static off_t record__aio_get_pos(int trace_fd)
252 {
253 	return lseek(trace_fd, 0, SEEK_CUR);
254 }
255 
256 static void record__aio_set_pos(int trace_fd, off_t pos)
257 {
258 	lseek(trace_fd, pos, SEEK_SET);
259 }
260 
261 static void record__aio_mmap_read_sync(struct record *rec)
262 {
263 	int i;
264 	struct perf_evlist *evlist = rec->evlist;
265 	struct perf_mmap *maps = evlist->mmap;
266 
267 	if (!rec->opts.nr_cblocks)
268 		return;
269 
270 	for (i = 0; i < evlist->nr_mmaps; i++) {
271 		struct perf_mmap *map = &maps[i];
272 
273 		if (map->base)
274 			record__aio_sync(map, true);
275 	}
276 }
277 
278 static int nr_cblocks_default = 1;
279 static int nr_cblocks_max = 4;
280 
281 static int record__aio_parse(const struct option *opt,
282 			     const char *str,
283 			     int unset)
284 {
285 	struct record_opts *opts = (struct record_opts *)opt->value;
286 
287 	if (unset) {
288 		opts->nr_cblocks = 0;
289 	} else {
290 		if (str)
291 			opts->nr_cblocks = strtol(str, NULL, 0);
292 		if (!opts->nr_cblocks)
293 			opts->nr_cblocks = nr_cblocks_default;
294 	}
295 
296 	return 0;
297 }
298 #else /* HAVE_AIO_SUPPORT */
299 static int nr_cblocks_max = 0;
300 
301 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
302 {
303 	return -1;
304 }
305 
306 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
307 		void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
308 {
309 	return -1;
310 }
311 
312 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
313 {
314 	return -1;
315 }
316 
317 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
318 {
319 }
320 
321 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
322 {
323 }
324 #endif
325 
326 static int record__aio_enabled(struct record *rec)
327 {
328 	return rec->opts.nr_cblocks > 0;
329 }
330 
331 static int process_synthesized_event(struct perf_tool *tool,
332 				     union perf_event *event,
333 				     struct perf_sample *sample __maybe_unused,
334 				     struct machine *machine __maybe_unused)
335 {
336 	struct record *rec = container_of(tool, struct record, tool);
337 	return record__write(rec, NULL, event, event->header.size);
338 }
339 
340 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
341 {
342 	struct record *rec = to;
343 
344 	rec->samples++;
345 	return record__write(rec, map, bf, size);
346 }
347 
348 static volatile int done;
349 static volatile int signr = -1;
350 static volatile int child_finished;
351 
352 static void sig_handler(int sig)
353 {
354 	if (sig == SIGCHLD)
355 		child_finished = 1;
356 	else
357 		signr = sig;
358 
359 	done = 1;
360 }
361 
362 static void sigsegv_handler(int sig)
363 {
364 	perf_hooks__recover();
365 	sighandler_dump_stack(sig);
366 }
367 
368 static void record__sig_exit(void)
369 {
370 	if (signr == -1)
371 		return;
372 
373 	signal(signr, SIG_DFL);
374 	raise(signr);
375 }
376 
377 #ifdef HAVE_AUXTRACE_SUPPORT
378 
379 static int record__process_auxtrace(struct perf_tool *tool,
380 				    struct perf_mmap *map,
381 				    union perf_event *event, void *data1,
382 				    size_t len1, void *data2, size_t len2)
383 {
384 	struct record *rec = container_of(tool, struct record, tool);
385 	struct perf_data *data = &rec->data;
386 	size_t padding;
387 	u8 pad[8] = {0};
388 
389 	if (!perf_data__is_pipe(data)) {
390 		off_t file_offset;
391 		int fd = perf_data__fd(data);
392 		int err;
393 
394 		file_offset = lseek(fd, 0, SEEK_CUR);
395 		if (file_offset == -1)
396 			return -1;
397 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
398 						     event, file_offset);
399 		if (err)
400 			return err;
401 	}
402 
403 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
404 	padding = (len1 + len2) & 7;
405 	if (padding)
406 		padding = 8 - padding;
407 
408 	record__write(rec, map, event, event->header.size);
409 	record__write(rec, map, data1, len1);
410 	if (len2)
411 		record__write(rec, map, data2, len2);
412 	record__write(rec, map, &pad, padding);
413 
414 	return 0;
415 }
416 
417 static int record__auxtrace_mmap_read(struct record *rec,
418 				      struct perf_mmap *map)
419 {
420 	int ret;
421 
422 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
423 				  record__process_auxtrace);
424 	if (ret < 0)
425 		return ret;
426 
427 	if (ret)
428 		rec->samples++;
429 
430 	return 0;
431 }
432 
433 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
434 					       struct perf_mmap *map)
435 {
436 	int ret;
437 
438 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
439 					   record__process_auxtrace,
440 					   rec->opts.auxtrace_snapshot_size);
441 	if (ret < 0)
442 		return ret;
443 
444 	if (ret)
445 		rec->samples++;
446 
447 	return 0;
448 }
449 
450 static int record__auxtrace_read_snapshot_all(struct record *rec)
451 {
452 	int i;
453 	int rc = 0;
454 
455 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
456 		struct perf_mmap *map = &rec->evlist->mmap[i];
457 
458 		if (!map->auxtrace_mmap.base)
459 			continue;
460 
461 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
462 			rc = -1;
463 			goto out;
464 		}
465 	}
466 out:
467 	return rc;
468 }
469 
470 static void record__read_auxtrace_snapshot(struct record *rec)
471 {
472 	pr_debug("Recording AUX area tracing snapshot\n");
473 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
474 		trigger_error(&auxtrace_snapshot_trigger);
475 	} else {
476 		if (auxtrace_record__snapshot_finish(rec->itr))
477 			trigger_error(&auxtrace_snapshot_trigger);
478 		else
479 			trigger_ready(&auxtrace_snapshot_trigger);
480 	}
481 }
482 
483 static int record__auxtrace_init(struct record *rec)
484 {
485 	int err;
486 
487 	if (!rec->itr) {
488 		rec->itr = auxtrace_record__init(rec->evlist, &err);
489 		if (err)
490 			return err;
491 	}
492 
493 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
494 					      rec->opts.auxtrace_snapshot_opts);
495 	if (err)
496 		return err;
497 
498 	return auxtrace_parse_filters(rec->evlist);
499 }
500 
501 #else
502 
503 static inline
504 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
505 			       struct perf_mmap *map __maybe_unused)
506 {
507 	return 0;
508 }
509 
510 static inline
511 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
512 {
513 }
514 
515 static inline
516 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
517 {
518 	return 0;
519 }
520 
521 static int record__auxtrace_init(struct record *rec __maybe_unused)
522 {
523 	return 0;
524 }
525 
526 #endif
527 
528 static int record__mmap_evlist(struct record *rec,
529 			       struct perf_evlist *evlist)
530 {
531 	struct record_opts *opts = &rec->opts;
532 	char msg[512];
533 
534 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
535 				 opts->auxtrace_mmap_pages,
536 				 opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) {
537 		if (errno == EPERM) {
538 			pr_err("Permission error mapping pages.\n"
539 			       "Consider increasing "
540 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
541 			       "or try again with a smaller value of -m/--mmap_pages.\n"
542 			       "(current value: %u,%u)\n",
543 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
544 			return -errno;
545 		} else {
546 			pr_err("failed to mmap with %d (%s)\n", errno,
547 				str_error_r(errno, msg, sizeof(msg)));
548 			if (errno)
549 				return -errno;
550 			else
551 				return -EINVAL;
552 		}
553 	}
554 	return 0;
555 }
556 
557 static int record__mmap(struct record *rec)
558 {
559 	return record__mmap_evlist(rec, rec->evlist);
560 }
561 
562 static int record__open(struct record *rec)
563 {
564 	char msg[BUFSIZ];
565 	struct perf_evsel *pos;
566 	struct perf_evlist *evlist = rec->evlist;
567 	struct perf_session *session = rec->session;
568 	struct record_opts *opts = &rec->opts;
569 	struct perf_evsel_config_term *err_term;
570 	int rc = 0;
571 
572 	/*
573 	 * For initial_delay we need to add a dummy event so that we can track
574 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
575 	 * real events, the ones asked by the user.
576 	 */
577 	if (opts->initial_delay) {
578 		if (perf_evlist__add_dummy(evlist))
579 			return -ENOMEM;
580 
581 		pos = perf_evlist__first(evlist);
582 		pos->tracking = 0;
583 		pos = perf_evlist__last(evlist);
584 		pos->tracking = 1;
585 		pos->attr.enable_on_exec = 1;
586 	}
587 
588 	perf_evlist__config(evlist, opts, &callchain_param);
589 
590 	evlist__for_each_entry(evlist, pos) {
591 try_again:
592 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
593 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
594 				if (verbose > 0)
595 					ui__warning("%s\n", msg);
596 				goto try_again;
597 			}
598 			if ((errno == EINVAL || errno == EBADF) &&
599 			    pos->leader != pos &&
600 			    pos->weak_group) {
601 			        pos = perf_evlist__reset_weak_group(evlist, pos);
602 				goto try_again;
603 			}
604 			rc = -errno;
605 			perf_evsel__open_strerror(pos, &opts->target,
606 						  errno, msg, sizeof(msg));
607 			ui__error("%s\n", msg);
608 			goto out;
609 		}
610 
611 		pos->supported = true;
612 	}
613 
614 	if (perf_evlist__apply_filters(evlist, &pos)) {
615 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
616 			pos->filter, perf_evsel__name(pos), errno,
617 			str_error_r(errno, msg, sizeof(msg)));
618 		rc = -1;
619 		goto out;
620 	}
621 
622 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
623 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
624 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
625 		      str_error_r(errno, msg, sizeof(msg)));
626 		rc = -1;
627 		goto out;
628 	}
629 
630 	rc = record__mmap(rec);
631 	if (rc)
632 		goto out;
633 
634 	session->evlist = evlist;
635 	perf_session__set_id_hdr_size(session);
636 out:
637 	return rc;
638 }
639 
640 static int process_sample_event(struct perf_tool *tool,
641 				union perf_event *event,
642 				struct perf_sample *sample,
643 				struct perf_evsel *evsel,
644 				struct machine *machine)
645 {
646 	struct record *rec = container_of(tool, struct record, tool);
647 
648 	if (rec->evlist->first_sample_time == 0)
649 		rec->evlist->first_sample_time = sample->time;
650 
651 	rec->evlist->last_sample_time = sample->time;
652 
653 	if (rec->buildid_all)
654 		return 0;
655 
656 	rec->samples++;
657 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
658 }
659 
660 static int process_buildids(struct record *rec)
661 {
662 	struct perf_data *data = &rec->data;
663 	struct perf_session *session = rec->session;
664 
665 	if (data->size == 0)
666 		return 0;
667 
668 	/*
669 	 * During this process, it'll load kernel map and replace the
670 	 * dso->long_name to a real pathname it found.  In this case
671 	 * we prefer the vmlinux path like
672 	 *   /lib/modules/3.16.4/build/vmlinux
673 	 *
674 	 * rather than build-id path (in debug directory).
675 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
676 	 */
677 	symbol_conf.ignore_vmlinux_buildid = true;
678 
679 	/*
680 	 * If --buildid-all is given, it marks all DSO regardless of hits,
681 	 * so no need to process samples. But if timestamp_boundary is enabled,
682 	 * it still needs to walk on all samples to get the timestamps of
683 	 * first/last samples.
684 	 */
685 	if (rec->buildid_all && !rec->timestamp_boundary)
686 		rec->tool.sample = NULL;
687 
688 	return perf_session__process_events(session);
689 }
690 
691 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
692 {
693 	int err;
694 	struct perf_tool *tool = data;
695 	/*
696 	 *As for guest kernel when processing subcommand record&report,
697 	 *we arrange module mmap prior to guest kernel mmap and trigger
698 	 *a preload dso because default guest module symbols are loaded
699 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
700 	 *method is used to avoid symbol missing when the first addr is
701 	 *in module instead of in guest kernel.
702 	 */
703 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
704 					     machine);
705 	if (err < 0)
706 		pr_err("Couldn't record guest kernel [%d]'s reference"
707 		       " relocation symbol.\n", machine->pid);
708 
709 	/*
710 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
711 	 * have no _text sometimes.
712 	 */
713 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
714 						 machine);
715 	if (err < 0)
716 		pr_err("Couldn't record guest kernel [%d]'s reference"
717 		       " relocation symbol.\n", machine->pid);
718 }
719 
720 static struct perf_event_header finished_round_event = {
721 	.size = sizeof(struct perf_event_header),
722 	.type = PERF_RECORD_FINISHED_ROUND,
723 };
724 
725 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
726 				    bool overwrite)
727 {
728 	u64 bytes_written = rec->bytes_written;
729 	int i;
730 	int rc = 0;
731 	struct perf_mmap *maps;
732 	int trace_fd = rec->data.file.fd;
733 	off_t off;
734 
735 	if (!evlist)
736 		return 0;
737 
738 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
739 	if (!maps)
740 		return 0;
741 
742 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
743 		return 0;
744 
745 	if (record__aio_enabled(rec))
746 		off = record__aio_get_pos(trace_fd);
747 
748 	for (i = 0; i < evlist->nr_mmaps; i++) {
749 		struct perf_mmap *map = &maps[i];
750 
751 		if (map->base) {
752 			if (!record__aio_enabled(rec)) {
753 				if (perf_mmap__push(map, rec, record__pushfn) != 0) {
754 					rc = -1;
755 					goto out;
756 				}
757 			} else {
758 				int idx;
759 				/*
760 				 * Call record__aio_sync() to wait till map->data buffer
761 				 * becomes available after previous aio write request.
762 				 */
763 				idx = record__aio_sync(map, false);
764 				if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
765 					record__aio_set_pos(trace_fd, off);
766 					rc = -1;
767 					goto out;
768 				}
769 			}
770 		}
771 
772 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
773 		    record__auxtrace_mmap_read(rec, map) != 0) {
774 			rc = -1;
775 			goto out;
776 		}
777 	}
778 
779 	if (record__aio_enabled(rec))
780 		record__aio_set_pos(trace_fd, off);
781 
782 	/*
783 	 * Mark the round finished in case we wrote
784 	 * at least one event.
785 	 */
786 	if (bytes_written != rec->bytes_written)
787 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
788 
789 	if (overwrite)
790 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
791 out:
792 	return rc;
793 }
794 
795 static int record__mmap_read_all(struct record *rec)
796 {
797 	int err;
798 
799 	err = record__mmap_read_evlist(rec, rec->evlist, false);
800 	if (err)
801 		return err;
802 
803 	return record__mmap_read_evlist(rec, rec->evlist, true);
804 }
805 
806 static void record__init_features(struct record *rec)
807 {
808 	struct perf_session *session = rec->session;
809 	int feat;
810 
811 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
812 		perf_header__set_feat(&session->header, feat);
813 
814 	if (rec->no_buildid)
815 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
816 
817 	if (!have_tracepoints(&rec->evlist->entries))
818 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
819 
820 	if (!rec->opts.branch_stack)
821 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
822 
823 	if (!rec->opts.full_auxtrace)
824 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
825 
826 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
827 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
828 
829 	perf_header__clear_feat(&session->header, HEADER_STAT);
830 }
831 
832 static void
833 record__finish_output(struct record *rec)
834 {
835 	struct perf_data *data = &rec->data;
836 	int fd = perf_data__fd(data);
837 
838 	if (data->is_pipe)
839 		return;
840 
841 	rec->session->header.data_size += rec->bytes_written;
842 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
843 
844 	if (!rec->no_buildid) {
845 		process_buildids(rec);
846 
847 		if (rec->buildid_all)
848 			dsos__hit_all(rec->session);
849 	}
850 	perf_session__write_header(rec->session, rec->evlist, fd, true);
851 
852 	return;
853 }
854 
855 static int record__synthesize_workload(struct record *rec, bool tail)
856 {
857 	int err;
858 	struct thread_map *thread_map;
859 
860 	if (rec->opts.tail_synthesize != tail)
861 		return 0;
862 
863 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
864 	if (thread_map == NULL)
865 		return -1;
866 
867 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
868 						 process_synthesized_event,
869 						 &rec->session->machines.host,
870 						 rec->opts.sample_address);
871 	thread_map__put(thread_map);
872 	return err;
873 }
874 
875 static int record__synthesize(struct record *rec, bool tail);
876 
877 static int
878 record__switch_output(struct record *rec, bool at_exit)
879 {
880 	struct perf_data *data = &rec->data;
881 	int fd, err;
882 
883 	/* Same Size:      "2015122520103046"*/
884 	char timestamp[] = "InvalidTimestamp";
885 
886 	record__aio_mmap_read_sync(rec);
887 
888 	record__synthesize(rec, true);
889 	if (target__none(&rec->opts.target))
890 		record__synthesize_workload(rec, true);
891 
892 	rec->samples = 0;
893 	record__finish_output(rec);
894 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
895 	if (err) {
896 		pr_err("Failed to get current timestamp\n");
897 		return -EINVAL;
898 	}
899 
900 	fd = perf_data__switch(data, timestamp,
901 				    rec->session->header.data_offset,
902 				    at_exit);
903 	if (fd >= 0 && !at_exit) {
904 		rec->bytes_written = 0;
905 		rec->session->header.data_size = 0;
906 	}
907 
908 	if (!quiet)
909 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
910 			data->file.path, timestamp);
911 
912 	/* Output tracking events */
913 	if (!at_exit) {
914 		record__synthesize(rec, false);
915 
916 		/*
917 		 * In 'perf record --switch-output' without -a,
918 		 * record__synthesize() in record__switch_output() won't
919 		 * generate tracking events because there's no thread_map
920 		 * in evlist. Which causes newly created perf.data doesn't
921 		 * contain map and comm information.
922 		 * Create a fake thread_map and directly call
923 		 * perf_event__synthesize_thread_map() for those events.
924 		 */
925 		if (target__none(&rec->opts.target))
926 			record__synthesize_workload(rec, false);
927 	}
928 	return fd;
929 }
930 
931 static volatile int workload_exec_errno;
932 
933 /*
934  * perf_evlist__prepare_workload will send a SIGUSR1
935  * if the fork fails, since we asked by setting its
936  * want_signal to true.
937  */
938 static void workload_exec_failed_signal(int signo __maybe_unused,
939 					siginfo_t *info,
940 					void *ucontext __maybe_unused)
941 {
942 	workload_exec_errno = info->si_value.sival_int;
943 	done = 1;
944 	child_finished = 1;
945 }
946 
947 static void snapshot_sig_handler(int sig);
948 static void alarm_sig_handler(int sig);
949 
950 int __weak
951 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
952 			    struct perf_tool *tool __maybe_unused,
953 			    perf_event__handler_t process __maybe_unused,
954 			    struct machine *machine __maybe_unused)
955 {
956 	return 0;
957 }
958 
959 static const struct perf_event_mmap_page *
960 perf_evlist__pick_pc(struct perf_evlist *evlist)
961 {
962 	if (evlist) {
963 		if (evlist->mmap && evlist->mmap[0].base)
964 			return evlist->mmap[0].base;
965 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
966 			return evlist->overwrite_mmap[0].base;
967 	}
968 	return NULL;
969 }
970 
971 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
972 {
973 	const struct perf_event_mmap_page *pc;
974 
975 	pc = perf_evlist__pick_pc(rec->evlist);
976 	if (pc)
977 		return pc;
978 	return NULL;
979 }
980 
981 static int record__synthesize(struct record *rec, bool tail)
982 {
983 	struct perf_session *session = rec->session;
984 	struct machine *machine = &session->machines.host;
985 	struct perf_data *data = &rec->data;
986 	struct record_opts *opts = &rec->opts;
987 	struct perf_tool *tool = &rec->tool;
988 	int fd = perf_data__fd(data);
989 	int err = 0;
990 
991 	if (rec->opts.tail_synthesize != tail)
992 		return 0;
993 
994 	if (data->is_pipe) {
995 		/*
996 		 * We need to synthesize events first, because some
997 		 * features works on top of them (on report side).
998 		 */
999 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1000 						   process_synthesized_event);
1001 		if (err < 0) {
1002 			pr_err("Couldn't synthesize attrs.\n");
1003 			goto out;
1004 		}
1005 
1006 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1007 						      process_synthesized_event);
1008 		if (err < 0) {
1009 			pr_err("Couldn't synthesize features.\n");
1010 			return err;
1011 		}
1012 
1013 		if (have_tracepoints(&rec->evlist->entries)) {
1014 			/*
1015 			 * FIXME err <= 0 here actually means that
1016 			 * there were no tracepoints so its not really
1017 			 * an error, just that we don't need to
1018 			 * synthesize anything.  We really have to
1019 			 * return this more properly and also
1020 			 * propagate errors that now are calling die()
1021 			 */
1022 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1023 								  process_synthesized_event);
1024 			if (err <= 0) {
1025 				pr_err("Couldn't record tracing data.\n");
1026 				goto out;
1027 			}
1028 			rec->bytes_written += err;
1029 		}
1030 	}
1031 
1032 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1033 					  process_synthesized_event, machine);
1034 	if (err)
1035 		goto out;
1036 
1037 	if (rec->opts.full_auxtrace) {
1038 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1039 					session, process_synthesized_event);
1040 		if (err)
1041 			goto out;
1042 	}
1043 
1044 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1045 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1046 							 machine);
1047 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1048 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1049 				   "Check /proc/kallsyms permission or run as root.\n");
1050 
1051 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1052 						     machine);
1053 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1054 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1055 				   "Check /proc/modules permission or run as root.\n");
1056 	}
1057 
1058 	if (perf_guest) {
1059 		machines__process_guests(&session->machines,
1060 					 perf_event__synthesize_guest_os, tool);
1061 	}
1062 
1063 	err = perf_event__synthesize_extra_attr(&rec->tool,
1064 						rec->evlist,
1065 						process_synthesized_event,
1066 						data->is_pipe);
1067 	if (err)
1068 		goto out;
1069 
1070 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1071 						 process_synthesized_event,
1072 						NULL);
1073 	if (err < 0) {
1074 		pr_err("Couldn't synthesize thread map.\n");
1075 		return err;
1076 	}
1077 
1078 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1079 					     process_synthesized_event, NULL);
1080 	if (err < 0) {
1081 		pr_err("Couldn't synthesize cpu map.\n");
1082 		return err;
1083 	}
1084 
1085 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1086 					    process_synthesized_event, opts->sample_address,
1087 					    1);
1088 out:
1089 	return err;
1090 }
1091 
1092 static int __cmd_record(struct record *rec, int argc, const char **argv)
1093 {
1094 	int err;
1095 	int status = 0;
1096 	unsigned long waking = 0;
1097 	const bool forks = argc > 0;
1098 	struct perf_tool *tool = &rec->tool;
1099 	struct record_opts *opts = &rec->opts;
1100 	struct perf_data *data = &rec->data;
1101 	struct perf_session *session;
1102 	bool disabled = false, draining = false;
1103 	int fd;
1104 
1105 	atexit(record__sig_exit);
1106 	signal(SIGCHLD, sig_handler);
1107 	signal(SIGINT, sig_handler);
1108 	signal(SIGTERM, sig_handler);
1109 	signal(SIGSEGV, sigsegv_handler);
1110 
1111 	if (rec->opts.record_namespaces)
1112 		tool->namespace_events = true;
1113 
1114 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1115 		signal(SIGUSR2, snapshot_sig_handler);
1116 		if (rec->opts.auxtrace_snapshot_mode)
1117 			trigger_on(&auxtrace_snapshot_trigger);
1118 		if (rec->switch_output.enabled)
1119 			trigger_on(&switch_output_trigger);
1120 	} else {
1121 		signal(SIGUSR2, SIG_IGN);
1122 	}
1123 
1124 	session = perf_session__new(data, false, tool);
1125 	if (session == NULL) {
1126 		pr_err("Perf session creation failed.\n");
1127 		return -1;
1128 	}
1129 
1130 	fd = perf_data__fd(data);
1131 	rec->session = session;
1132 
1133 	record__init_features(rec);
1134 
1135 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1136 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1137 
1138 	if (forks) {
1139 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1140 						    argv, data->is_pipe,
1141 						    workload_exec_failed_signal);
1142 		if (err < 0) {
1143 			pr_err("Couldn't run the workload!\n");
1144 			status = err;
1145 			goto out_delete_session;
1146 		}
1147 	}
1148 
1149 	/*
1150 	 * If we have just single event and are sending data
1151 	 * through pipe, we need to force the ids allocation,
1152 	 * because we synthesize event name through the pipe
1153 	 * and need the id for that.
1154 	 */
1155 	if (data->is_pipe && rec->evlist->nr_entries == 1)
1156 		rec->opts.sample_id = true;
1157 
1158 	if (record__open(rec) != 0) {
1159 		err = -1;
1160 		goto out_child;
1161 	}
1162 
1163 	err = bpf__apply_obj_config();
1164 	if (err) {
1165 		char errbuf[BUFSIZ];
1166 
1167 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1168 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1169 			 errbuf);
1170 		goto out_child;
1171 	}
1172 
1173 	/*
1174 	 * Normally perf_session__new would do this, but it doesn't have the
1175 	 * evlist.
1176 	 */
1177 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1178 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1179 		rec->tool.ordered_events = false;
1180 	}
1181 
1182 	if (!rec->evlist->nr_groups)
1183 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1184 
1185 	if (data->is_pipe) {
1186 		err = perf_header__write_pipe(fd);
1187 		if (err < 0)
1188 			goto out_child;
1189 	} else {
1190 		err = perf_session__write_header(session, rec->evlist, fd, false);
1191 		if (err < 0)
1192 			goto out_child;
1193 	}
1194 
1195 	if (!rec->no_buildid
1196 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1197 		pr_err("Couldn't generate buildids. "
1198 		       "Use --no-buildid to profile anyway.\n");
1199 		err = -1;
1200 		goto out_child;
1201 	}
1202 
1203 	err = record__synthesize(rec, false);
1204 	if (err < 0)
1205 		goto out_child;
1206 
1207 	if (rec->realtime_prio) {
1208 		struct sched_param param;
1209 
1210 		param.sched_priority = rec->realtime_prio;
1211 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1212 			pr_err("Could not set realtime priority.\n");
1213 			err = -1;
1214 			goto out_child;
1215 		}
1216 	}
1217 
1218 	/*
1219 	 * When perf is starting the traced process, all the events
1220 	 * (apart from group members) have enable_on_exec=1 set,
1221 	 * so don't spoil it by prematurely enabling them.
1222 	 */
1223 	if (!target__none(&opts->target) && !opts->initial_delay)
1224 		perf_evlist__enable(rec->evlist);
1225 
1226 	/*
1227 	 * Let the child rip
1228 	 */
1229 	if (forks) {
1230 		struct machine *machine = &session->machines.host;
1231 		union perf_event *event;
1232 		pid_t tgid;
1233 
1234 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1235 		if (event == NULL) {
1236 			err = -ENOMEM;
1237 			goto out_child;
1238 		}
1239 
1240 		/*
1241 		 * Some H/W events are generated before COMM event
1242 		 * which is emitted during exec(), so perf script
1243 		 * cannot see a correct process name for those events.
1244 		 * Synthesize COMM event to prevent it.
1245 		 */
1246 		tgid = perf_event__synthesize_comm(tool, event,
1247 						   rec->evlist->workload.pid,
1248 						   process_synthesized_event,
1249 						   machine);
1250 		free(event);
1251 
1252 		if (tgid == -1)
1253 			goto out_child;
1254 
1255 		event = malloc(sizeof(event->namespaces) +
1256 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1257 			       machine->id_hdr_size);
1258 		if (event == NULL) {
1259 			err = -ENOMEM;
1260 			goto out_child;
1261 		}
1262 
1263 		/*
1264 		 * Synthesize NAMESPACES event for the command specified.
1265 		 */
1266 		perf_event__synthesize_namespaces(tool, event,
1267 						  rec->evlist->workload.pid,
1268 						  tgid, process_synthesized_event,
1269 						  machine);
1270 		free(event);
1271 
1272 		perf_evlist__start_workload(rec->evlist);
1273 	}
1274 
1275 	if (opts->initial_delay) {
1276 		usleep(opts->initial_delay * USEC_PER_MSEC);
1277 		perf_evlist__enable(rec->evlist);
1278 	}
1279 
1280 	trigger_ready(&auxtrace_snapshot_trigger);
1281 	trigger_ready(&switch_output_trigger);
1282 	perf_hooks__invoke_record_start();
1283 	for (;;) {
1284 		unsigned long long hits = rec->samples;
1285 
1286 		/*
1287 		 * rec->evlist->bkw_mmap_state is possible to be
1288 		 * BKW_MMAP_EMPTY here: when done == true and
1289 		 * hits != rec->samples in previous round.
1290 		 *
1291 		 * perf_evlist__toggle_bkw_mmap ensure we never
1292 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1293 		 */
1294 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1295 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1296 
1297 		if (record__mmap_read_all(rec) < 0) {
1298 			trigger_error(&auxtrace_snapshot_trigger);
1299 			trigger_error(&switch_output_trigger);
1300 			err = -1;
1301 			goto out_child;
1302 		}
1303 
1304 		if (auxtrace_record__snapshot_started) {
1305 			auxtrace_record__snapshot_started = 0;
1306 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1307 				record__read_auxtrace_snapshot(rec);
1308 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1309 				pr_err("AUX area tracing snapshot failed\n");
1310 				err = -1;
1311 				goto out_child;
1312 			}
1313 		}
1314 
1315 		if (trigger_is_hit(&switch_output_trigger)) {
1316 			/*
1317 			 * If switch_output_trigger is hit, the data in
1318 			 * overwritable ring buffer should have been collected,
1319 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1320 			 *
1321 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1322 			 * record__mmap_read_all() didn't collect data from
1323 			 * overwritable ring buffer. Read again.
1324 			 */
1325 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1326 				continue;
1327 			trigger_ready(&switch_output_trigger);
1328 
1329 			/*
1330 			 * Reenable events in overwrite ring buffer after
1331 			 * record__mmap_read_all(): we should have collected
1332 			 * data from it.
1333 			 */
1334 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1335 
1336 			if (!quiet)
1337 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1338 					waking);
1339 			waking = 0;
1340 			fd = record__switch_output(rec, false);
1341 			if (fd < 0) {
1342 				pr_err("Failed to switch to new file\n");
1343 				trigger_error(&switch_output_trigger);
1344 				err = fd;
1345 				goto out_child;
1346 			}
1347 
1348 			/* re-arm the alarm */
1349 			if (rec->switch_output.time)
1350 				alarm(rec->switch_output.time);
1351 		}
1352 
1353 		if (hits == rec->samples) {
1354 			if (done || draining)
1355 				break;
1356 			err = perf_evlist__poll(rec->evlist, -1);
1357 			/*
1358 			 * Propagate error, only if there's any. Ignore positive
1359 			 * number of returned events and interrupt error.
1360 			 */
1361 			if (err > 0 || (err < 0 && errno == EINTR))
1362 				err = 0;
1363 			waking++;
1364 
1365 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1366 				draining = true;
1367 		}
1368 
1369 		/*
1370 		 * When perf is starting the traced process, at the end events
1371 		 * die with the process and we wait for that. Thus no need to
1372 		 * disable events in this case.
1373 		 */
1374 		if (done && !disabled && !target__none(&opts->target)) {
1375 			trigger_off(&auxtrace_snapshot_trigger);
1376 			perf_evlist__disable(rec->evlist);
1377 			disabled = true;
1378 		}
1379 	}
1380 	trigger_off(&auxtrace_snapshot_trigger);
1381 	trigger_off(&switch_output_trigger);
1382 
1383 	if (forks && workload_exec_errno) {
1384 		char msg[STRERR_BUFSIZE];
1385 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1386 		pr_err("Workload failed: %s\n", emsg);
1387 		err = -1;
1388 		goto out_child;
1389 	}
1390 
1391 	if (!quiet)
1392 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1393 
1394 	if (target__none(&rec->opts.target))
1395 		record__synthesize_workload(rec, true);
1396 
1397 out_child:
1398 	record__aio_mmap_read_sync(rec);
1399 
1400 	if (forks) {
1401 		int exit_status;
1402 
1403 		if (!child_finished)
1404 			kill(rec->evlist->workload.pid, SIGTERM);
1405 
1406 		wait(&exit_status);
1407 
1408 		if (err < 0)
1409 			status = err;
1410 		else if (WIFEXITED(exit_status))
1411 			status = WEXITSTATUS(exit_status);
1412 		else if (WIFSIGNALED(exit_status))
1413 			signr = WTERMSIG(exit_status);
1414 	} else
1415 		status = err;
1416 
1417 	record__synthesize(rec, true);
1418 	/* this will be recalculated during process_buildids() */
1419 	rec->samples = 0;
1420 
1421 	if (!err) {
1422 		if (!rec->timestamp_filename) {
1423 			record__finish_output(rec);
1424 		} else {
1425 			fd = record__switch_output(rec, true);
1426 			if (fd < 0) {
1427 				status = fd;
1428 				goto out_delete_session;
1429 			}
1430 		}
1431 	}
1432 
1433 	perf_hooks__invoke_record_end();
1434 
1435 	if (!err && !quiet) {
1436 		char samples[128];
1437 		const char *postfix = rec->timestamp_filename ?
1438 					".<timestamp>" : "";
1439 
1440 		if (rec->samples && !rec->opts.full_auxtrace)
1441 			scnprintf(samples, sizeof(samples),
1442 				  " (%" PRIu64 " samples)", rec->samples);
1443 		else
1444 			samples[0] = '\0';
1445 
1446 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1447 			perf_data__size(data) / 1024.0 / 1024.0,
1448 			data->file.path, postfix, samples);
1449 	}
1450 
1451 out_delete_session:
1452 	perf_session__delete(session);
1453 	return status;
1454 }
1455 
1456 static void callchain_debug(struct callchain_param *callchain)
1457 {
1458 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1459 
1460 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1461 
1462 	if (callchain->record_mode == CALLCHAIN_DWARF)
1463 		pr_debug("callchain: stack dump size %d\n",
1464 			 callchain->dump_size);
1465 }
1466 
1467 int record_opts__parse_callchain(struct record_opts *record,
1468 				 struct callchain_param *callchain,
1469 				 const char *arg, bool unset)
1470 {
1471 	int ret;
1472 	callchain->enabled = !unset;
1473 
1474 	/* --no-call-graph */
1475 	if (unset) {
1476 		callchain->record_mode = CALLCHAIN_NONE;
1477 		pr_debug("callchain: disabled\n");
1478 		return 0;
1479 	}
1480 
1481 	ret = parse_callchain_record_opt(arg, callchain);
1482 	if (!ret) {
1483 		/* Enable data address sampling for DWARF unwind. */
1484 		if (callchain->record_mode == CALLCHAIN_DWARF)
1485 			record->sample_address = true;
1486 		callchain_debug(callchain);
1487 	}
1488 
1489 	return ret;
1490 }
1491 
1492 int record_parse_callchain_opt(const struct option *opt,
1493 			       const char *arg,
1494 			       int unset)
1495 {
1496 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1497 }
1498 
1499 int record_callchain_opt(const struct option *opt,
1500 			 const char *arg __maybe_unused,
1501 			 int unset __maybe_unused)
1502 {
1503 	struct callchain_param *callchain = opt->value;
1504 
1505 	callchain->enabled = true;
1506 
1507 	if (callchain->record_mode == CALLCHAIN_NONE)
1508 		callchain->record_mode = CALLCHAIN_FP;
1509 
1510 	callchain_debug(callchain);
1511 	return 0;
1512 }
1513 
1514 static int perf_record_config(const char *var, const char *value, void *cb)
1515 {
1516 	struct record *rec = cb;
1517 
1518 	if (!strcmp(var, "record.build-id")) {
1519 		if (!strcmp(value, "cache"))
1520 			rec->no_buildid_cache = false;
1521 		else if (!strcmp(value, "no-cache"))
1522 			rec->no_buildid_cache = true;
1523 		else if (!strcmp(value, "skip"))
1524 			rec->no_buildid = true;
1525 		else
1526 			return -1;
1527 		return 0;
1528 	}
1529 	if (!strcmp(var, "record.call-graph")) {
1530 		var = "call-graph.record-mode";
1531 		return perf_default_config(var, value, cb);
1532 	}
1533 #ifdef HAVE_AIO_SUPPORT
1534 	if (!strcmp(var, "record.aio")) {
1535 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
1536 		if (!rec->opts.nr_cblocks)
1537 			rec->opts.nr_cblocks = nr_cblocks_default;
1538 	}
1539 #endif
1540 
1541 	return 0;
1542 }
1543 
1544 struct clockid_map {
1545 	const char *name;
1546 	int clockid;
1547 };
1548 
1549 #define CLOCKID_MAP(n, c)	\
1550 	{ .name = n, .clockid = (c), }
1551 
1552 #define CLOCKID_END	{ .name = NULL, }
1553 
1554 
1555 /*
1556  * Add the missing ones, we need to build on many distros...
1557  */
1558 #ifndef CLOCK_MONOTONIC_RAW
1559 #define CLOCK_MONOTONIC_RAW 4
1560 #endif
1561 #ifndef CLOCK_BOOTTIME
1562 #define CLOCK_BOOTTIME 7
1563 #endif
1564 #ifndef CLOCK_TAI
1565 #define CLOCK_TAI 11
1566 #endif
1567 
1568 static const struct clockid_map clockids[] = {
1569 	/* available for all events, NMI safe */
1570 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1571 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1572 
1573 	/* available for some events */
1574 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1575 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1576 	CLOCKID_MAP("tai", CLOCK_TAI),
1577 
1578 	/* available for the lazy */
1579 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1580 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1581 	CLOCKID_MAP("real", CLOCK_REALTIME),
1582 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1583 
1584 	CLOCKID_END,
1585 };
1586 
1587 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1588 {
1589 	struct timespec res;
1590 
1591 	*res_ns = 0;
1592 	if (!clock_getres(clk_id, &res))
1593 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1594 	else
1595 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1596 
1597 	return 0;
1598 }
1599 
1600 static int parse_clockid(const struct option *opt, const char *str, int unset)
1601 {
1602 	struct record_opts *opts = (struct record_opts *)opt->value;
1603 	const struct clockid_map *cm;
1604 	const char *ostr = str;
1605 
1606 	if (unset) {
1607 		opts->use_clockid = 0;
1608 		return 0;
1609 	}
1610 
1611 	/* no arg passed */
1612 	if (!str)
1613 		return 0;
1614 
1615 	/* no setting it twice */
1616 	if (opts->use_clockid)
1617 		return -1;
1618 
1619 	opts->use_clockid = true;
1620 
1621 	/* if its a number, we're done */
1622 	if (sscanf(str, "%d", &opts->clockid) == 1)
1623 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1624 
1625 	/* allow a "CLOCK_" prefix to the name */
1626 	if (!strncasecmp(str, "CLOCK_", 6))
1627 		str += 6;
1628 
1629 	for (cm = clockids; cm->name; cm++) {
1630 		if (!strcasecmp(str, cm->name)) {
1631 			opts->clockid = cm->clockid;
1632 			return get_clockid_res(opts->clockid,
1633 					       &opts->clockid_res_ns);
1634 		}
1635 	}
1636 
1637 	opts->use_clockid = false;
1638 	ui__warning("unknown clockid %s, check man page\n", ostr);
1639 	return -1;
1640 }
1641 
1642 static int record__parse_mmap_pages(const struct option *opt,
1643 				    const char *str,
1644 				    int unset __maybe_unused)
1645 {
1646 	struct record_opts *opts = opt->value;
1647 	char *s, *p;
1648 	unsigned int mmap_pages;
1649 	int ret;
1650 
1651 	if (!str)
1652 		return -EINVAL;
1653 
1654 	s = strdup(str);
1655 	if (!s)
1656 		return -ENOMEM;
1657 
1658 	p = strchr(s, ',');
1659 	if (p)
1660 		*p = '\0';
1661 
1662 	if (*s) {
1663 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1664 		if (ret)
1665 			goto out_free;
1666 		opts->mmap_pages = mmap_pages;
1667 	}
1668 
1669 	if (!p) {
1670 		ret = 0;
1671 		goto out_free;
1672 	}
1673 
1674 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1675 	if (ret)
1676 		goto out_free;
1677 
1678 	opts->auxtrace_mmap_pages = mmap_pages;
1679 
1680 out_free:
1681 	free(s);
1682 	return ret;
1683 }
1684 
1685 static void switch_output_size_warn(struct record *rec)
1686 {
1687 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1688 	struct switch_output *s = &rec->switch_output;
1689 
1690 	wakeup_size /= 2;
1691 
1692 	if (s->size < wakeup_size) {
1693 		char buf[100];
1694 
1695 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1696 		pr_warning("WARNING: switch-output data size lower than "
1697 			   "wakeup kernel buffer size (%s) "
1698 			   "expect bigger perf.data sizes\n", buf);
1699 	}
1700 }
1701 
1702 static int switch_output_setup(struct record *rec)
1703 {
1704 	struct switch_output *s = &rec->switch_output;
1705 	static struct parse_tag tags_size[] = {
1706 		{ .tag  = 'B', .mult = 1       },
1707 		{ .tag  = 'K', .mult = 1 << 10 },
1708 		{ .tag  = 'M', .mult = 1 << 20 },
1709 		{ .tag  = 'G', .mult = 1 << 30 },
1710 		{ .tag  = 0 },
1711 	};
1712 	static struct parse_tag tags_time[] = {
1713 		{ .tag  = 's', .mult = 1        },
1714 		{ .tag  = 'm', .mult = 60       },
1715 		{ .tag  = 'h', .mult = 60*60    },
1716 		{ .tag  = 'd', .mult = 60*60*24 },
1717 		{ .tag  = 0 },
1718 	};
1719 	unsigned long val;
1720 
1721 	if (!s->set)
1722 		return 0;
1723 
1724 	if (!strcmp(s->str, "signal")) {
1725 		s->signal = true;
1726 		pr_debug("switch-output with SIGUSR2 signal\n");
1727 		goto enabled;
1728 	}
1729 
1730 	val = parse_tag_value(s->str, tags_size);
1731 	if (val != (unsigned long) -1) {
1732 		s->size = val;
1733 		pr_debug("switch-output with %s size threshold\n", s->str);
1734 		goto enabled;
1735 	}
1736 
1737 	val = parse_tag_value(s->str, tags_time);
1738 	if (val != (unsigned long) -1) {
1739 		s->time = val;
1740 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1741 			 s->str, s->time);
1742 		goto enabled;
1743 	}
1744 
1745 	return -1;
1746 
1747 enabled:
1748 	rec->timestamp_filename = true;
1749 	s->enabled              = true;
1750 
1751 	if (s->size && !rec->opts.no_buffering)
1752 		switch_output_size_warn(rec);
1753 
1754 	return 0;
1755 }
1756 
1757 static const char * const __record_usage[] = {
1758 	"perf record [<options>] [<command>]",
1759 	"perf record [<options>] -- <command> [<options>]",
1760 	NULL
1761 };
1762 const char * const *record_usage = __record_usage;
1763 
1764 /*
1765  * XXX Ideally would be local to cmd_record() and passed to a record__new
1766  * because we need to have access to it in record__exit, that is called
1767  * after cmd_record() exits, but since record_options need to be accessible to
1768  * builtin-script, leave it here.
1769  *
1770  * At least we don't ouch it in all the other functions here directly.
1771  *
1772  * Just say no to tons of global variables, sigh.
1773  */
1774 static struct record record = {
1775 	.opts = {
1776 		.sample_time	     = true,
1777 		.mmap_pages	     = UINT_MAX,
1778 		.user_freq	     = UINT_MAX,
1779 		.user_interval	     = ULLONG_MAX,
1780 		.freq		     = 4000,
1781 		.target		     = {
1782 			.uses_mmap   = true,
1783 			.default_per_cpu = true,
1784 		},
1785 	},
1786 	.tool = {
1787 		.sample		= process_sample_event,
1788 		.fork		= perf_event__process_fork,
1789 		.exit		= perf_event__process_exit,
1790 		.comm		= perf_event__process_comm,
1791 		.namespaces	= perf_event__process_namespaces,
1792 		.mmap		= perf_event__process_mmap,
1793 		.mmap2		= perf_event__process_mmap2,
1794 		.ordered_events	= true,
1795 	},
1796 };
1797 
1798 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1799 	"\n\t\t\t\tDefault: fp";
1800 
1801 static bool dry_run;
1802 
1803 /*
1804  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1805  * with it and switch to use the library functions in perf_evlist that came
1806  * from builtin-record.c, i.e. use record_opts,
1807  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1808  * using pipes, etc.
1809  */
1810 static struct option __record_options[] = {
1811 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1812 		     "event selector. use 'perf list' to list available events",
1813 		     parse_events_option),
1814 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1815 		     "event filter", parse_filter),
1816 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1817 			   NULL, "don't record events from perf itself",
1818 			   exclude_perf),
1819 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1820 		    "record events on existing process id"),
1821 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1822 		    "record events on existing thread id"),
1823 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1824 		    "collect data with this RT SCHED_FIFO priority"),
1825 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1826 		    "collect data without buffering"),
1827 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1828 		    "collect raw sample records from all opened counters"),
1829 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1830 			    "system-wide collection from all CPUs"),
1831 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1832 		    "list of cpus to monitor"),
1833 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1834 	OPT_STRING('o', "output", &record.data.file.path, "file",
1835 		    "output file name"),
1836 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1837 			&record.opts.no_inherit_set,
1838 			"child tasks do not inherit counters"),
1839 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1840 		    "synthesize non-sample events at the end of output"),
1841 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1842 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1843 		    "Fail if the specified frequency can't be used"),
1844 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1845 		     "profile at this frequency",
1846 		      record__parse_freq),
1847 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1848 		     "number of mmap data pages and AUX area tracing mmap pages",
1849 		     record__parse_mmap_pages),
1850 	OPT_BOOLEAN(0, "group", &record.opts.group,
1851 		    "put the counters into a counter group"),
1852 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1853 			   NULL, "enables call-graph recording" ,
1854 			   &record_callchain_opt),
1855 	OPT_CALLBACK(0, "call-graph", &record.opts,
1856 		     "record_mode[,record_size]", record_callchain_help,
1857 		     &record_parse_callchain_opt),
1858 	OPT_INCR('v', "verbose", &verbose,
1859 		    "be more verbose (show counter open errors, etc)"),
1860 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1861 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1862 		    "per thread counts"),
1863 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1864 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1865 		    "Record the sample physical addresses"),
1866 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1867 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1868 			&record.opts.sample_time_set,
1869 			"Record the sample timestamps"),
1870 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1871 			"Record the sample period"),
1872 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1873 		    "don't sample"),
1874 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1875 			&record.no_buildid_cache_set,
1876 			"do not update the buildid cache"),
1877 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1878 			&record.no_buildid_set,
1879 			"do not collect buildids in perf.data"),
1880 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1881 		     "monitor event in cgroup name only",
1882 		     parse_cgroups),
1883 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1884 		  "ms to wait before starting measurement after program start"),
1885 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1886 		   "user to profile"),
1887 
1888 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1889 		     "branch any", "sample any taken branches",
1890 		     parse_branch_stack),
1891 
1892 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1893 		     "branch filter mask", "branch stack filter modes",
1894 		     parse_branch_stack),
1895 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1896 		    "sample by weight (on special events only)"),
1897 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1898 		    "sample transaction flags (special events only)"),
1899 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1900 		    "use per-thread mmaps"),
1901 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1902 		    "sample selected machine registers on interrupt,"
1903 		    " use -I ? to list register names", parse_regs),
1904 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1905 		    "sample selected machine registers on interrupt,"
1906 		    " use -I ? to list register names", parse_regs),
1907 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1908 		    "Record running/enabled time of read (:S) events"),
1909 	OPT_CALLBACK('k', "clockid", &record.opts,
1910 	"clockid", "clockid to use for events, see clock_gettime()",
1911 	parse_clockid),
1912 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1913 			  "opts", "AUX area tracing Snapshot Mode", ""),
1914 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1915 			"per thread proc mmap processing timeout in ms"),
1916 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1917 		    "Record namespaces events"),
1918 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1919 		    "Record context switch events"),
1920 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1921 			 "Configure all used events to run in kernel space.",
1922 			 PARSE_OPT_EXCLUSIVE),
1923 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1924 			 "Configure all used events to run in user space.",
1925 			 PARSE_OPT_EXCLUSIVE),
1926 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1927 		   "clang binary to use for compiling BPF scriptlets"),
1928 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1929 		   "options passed to clang when compiling BPF scriptlets"),
1930 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1931 		   "file", "vmlinux pathname"),
1932 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1933 		    "Record build-id of all DSOs regardless of hits"),
1934 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1935 		    "append timestamp to output filename"),
1936 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1937 		    "Record timestamp boundary (time of first/last samples)"),
1938 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1939 			  &record.switch_output.set, "signal,size,time",
1940 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1941 			  "signal"),
1942 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1943 		    "Parse options then exit"),
1944 #ifdef HAVE_AIO_SUPPORT
1945 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
1946 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
1947 		     record__aio_parse),
1948 #endif
1949 	OPT_END()
1950 };
1951 
1952 struct option *record_options = __record_options;
1953 
1954 int cmd_record(int argc, const char **argv)
1955 {
1956 	int err;
1957 	struct record *rec = &record;
1958 	char errbuf[BUFSIZ];
1959 
1960 	setlocale(LC_ALL, "");
1961 
1962 #ifndef HAVE_LIBBPF_SUPPORT
1963 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1964 	set_nobuild('\0', "clang-path", true);
1965 	set_nobuild('\0', "clang-opt", true);
1966 # undef set_nobuild
1967 #endif
1968 
1969 #ifndef HAVE_BPF_PROLOGUE
1970 # if !defined (HAVE_DWARF_SUPPORT)
1971 #  define REASON  "NO_DWARF=1"
1972 # elif !defined (HAVE_LIBBPF_SUPPORT)
1973 #  define REASON  "NO_LIBBPF=1"
1974 # else
1975 #  define REASON  "this architecture doesn't support BPF prologue"
1976 # endif
1977 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1978 	set_nobuild('\0', "vmlinux", true);
1979 # undef set_nobuild
1980 # undef REASON
1981 #endif
1982 
1983 	rec->evlist = perf_evlist__new();
1984 	if (rec->evlist == NULL)
1985 		return -ENOMEM;
1986 
1987 	err = perf_config(perf_record_config, rec);
1988 	if (err)
1989 		return err;
1990 
1991 	argc = parse_options(argc, argv, record_options, record_usage,
1992 			    PARSE_OPT_STOP_AT_NON_OPTION);
1993 	if (quiet)
1994 		perf_quiet_option();
1995 
1996 	/* Make system wide (-a) the default target. */
1997 	if (!argc && target__none(&rec->opts.target))
1998 		rec->opts.target.system_wide = true;
1999 
2000 	if (nr_cgroups && !rec->opts.target.system_wide) {
2001 		usage_with_options_msg(record_usage, record_options,
2002 			"cgroup monitoring only available in system-wide mode");
2003 
2004 	}
2005 	if (rec->opts.record_switch_events &&
2006 	    !perf_can_record_switch_events()) {
2007 		ui__error("kernel does not support recording context switch events\n");
2008 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2009 		return -EINVAL;
2010 	}
2011 
2012 	if (switch_output_setup(rec)) {
2013 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2014 		return -EINVAL;
2015 	}
2016 
2017 	if (rec->switch_output.time) {
2018 		signal(SIGALRM, alarm_sig_handler);
2019 		alarm(rec->switch_output.time);
2020 	}
2021 
2022 	/*
2023 	 * Allow aliases to facilitate the lookup of symbols for address
2024 	 * filters. Refer to auxtrace_parse_filters().
2025 	 */
2026 	symbol_conf.allow_aliases = true;
2027 
2028 	symbol__init(NULL);
2029 
2030 	err = record__auxtrace_init(rec);
2031 	if (err)
2032 		goto out;
2033 
2034 	if (dry_run)
2035 		goto out;
2036 
2037 	err = bpf__setup_stdout(rec->evlist);
2038 	if (err) {
2039 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2040 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2041 			 errbuf);
2042 		goto out;
2043 	}
2044 
2045 	err = -ENOMEM;
2046 
2047 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2048 		pr_warning(
2049 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2050 "check /proc/sys/kernel/kptr_restrict.\n\n"
2051 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2052 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2053 "Samples in kernel modules won't be resolved at all.\n\n"
2054 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2055 "even with a suitable vmlinux or kallsyms file.\n\n");
2056 
2057 	if (rec->no_buildid_cache || rec->no_buildid) {
2058 		disable_buildid_cache();
2059 	} else if (rec->switch_output.enabled) {
2060 		/*
2061 		 * In 'perf record --switch-output', disable buildid
2062 		 * generation by default to reduce data file switching
2063 		 * overhead. Still generate buildid if they are required
2064 		 * explicitly using
2065 		 *
2066 		 *  perf record --switch-output --no-no-buildid \
2067 		 *              --no-no-buildid-cache
2068 		 *
2069 		 * Following code equals to:
2070 		 *
2071 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2072 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2073 		 *         disable_buildid_cache();
2074 		 */
2075 		bool disable = true;
2076 
2077 		if (rec->no_buildid_set && !rec->no_buildid)
2078 			disable = false;
2079 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2080 			disable = false;
2081 		if (disable) {
2082 			rec->no_buildid = true;
2083 			rec->no_buildid_cache = true;
2084 			disable_buildid_cache();
2085 		}
2086 	}
2087 
2088 	if (record.opts.overwrite)
2089 		record.opts.tail_synthesize = true;
2090 
2091 	if (rec->evlist->nr_entries == 0 &&
2092 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2093 		pr_err("Not enough memory for event selector list\n");
2094 		goto out;
2095 	}
2096 
2097 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2098 		rec->opts.no_inherit = true;
2099 
2100 	err = target__validate(&rec->opts.target);
2101 	if (err) {
2102 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2103 		ui__warning("%s\n", errbuf);
2104 	}
2105 
2106 	err = target__parse_uid(&rec->opts.target);
2107 	if (err) {
2108 		int saved_errno = errno;
2109 
2110 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2111 		ui__error("%s", errbuf);
2112 
2113 		err = -saved_errno;
2114 		goto out;
2115 	}
2116 
2117 	/* Enable ignoring missing threads when -u/-p option is defined. */
2118 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2119 
2120 	err = -ENOMEM;
2121 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2122 		usage_with_options(record_usage, record_options);
2123 
2124 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2125 	if (err)
2126 		goto out;
2127 
2128 	/*
2129 	 * We take all buildids when the file contains
2130 	 * AUX area tracing data because we do not decode the
2131 	 * trace because it would take too long.
2132 	 */
2133 	if (rec->opts.full_auxtrace)
2134 		rec->buildid_all = true;
2135 
2136 	if (record_opts__config(&rec->opts)) {
2137 		err = -EINVAL;
2138 		goto out;
2139 	}
2140 
2141 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2142 		rec->opts.nr_cblocks = nr_cblocks_max;
2143 	if (verbose > 0)
2144 		pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2145 
2146 	err = __cmd_record(&record, argc, argv);
2147 out:
2148 	perf_evlist__delete(rec->evlist);
2149 	symbol__exit();
2150 	auxtrace_record__free(rec->itr);
2151 	return err;
2152 }
2153 
2154 static void snapshot_sig_handler(int sig __maybe_unused)
2155 {
2156 	struct record *rec = &record;
2157 
2158 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2159 		trigger_hit(&auxtrace_snapshot_trigger);
2160 		auxtrace_record__snapshot_started = 1;
2161 		if (auxtrace_record__snapshot_start(record.itr))
2162 			trigger_error(&auxtrace_snapshot_trigger);
2163 	}
2164 
2165 	if (switch_output_signal(rec))
2166 		trigger_hit(&switch_output_trigger);
2167 }
2168 
2169 static void alarm_sig_handler(int sig __maybe_unused)
2170 {
2171 	struct record *rec = &record;
2172 
2173 	if (switch_output_time(rec))
2174 		trigger_hit(&switch_output_trigger);
2175 }
2176