xref: /openbmc/linux/tools/perf/builtin-record.c (revision e42dd3ee)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/cpu-set-sched.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "util/bpf-event.h"
45 #include "asm/bug.h"
46 
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <locale.h>
50 #include <poll.h>
51 #include <unistd.h>
52 #include <sched.h>
53 #include <signal.h>
54 #include <sys/mman.h>
55 #include <sys/wait.h>
56 #include <linux/time64.h>
57 
58 struct switch_output {
59 	bool		 enabled;
60 	bool		 signal;
61 	unsigned long	 size;
62 	unsigned long	 time;
63 	const char	*str;
64 	bool		 set;
65 };
66 
67 struct record {
68 	struct perf_tool	tool;
69 	struct record_opts	opts;
70 	u64			bytes_written;
71 	struct perf_data	data;
72 	struct auxtrace_record	*itr;
73 	struct perf_evlist	*evlist;
74 	struct perf_session	*session;
75 	int			realtime_prio;
76 	bool			no_buildid;
77 	bool			no_buildid_set;
78 	bool			no_buildid_cache;
79 	bool			no_buildid_cache_set;
80 	bool			buildid_all;
81 	bool			timestamp_filename;
82 	bool			timestamp_boundary;
83 	struct switch_output	switch_output;
84 	unsigned long long	samples;
85 	cpu_set_t		affinity_mask;
86 };
87 
88 static volatile int auxtrace_record__snapshot_started;
89 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
90 static DEFINE_TRIGGER(switch_output_trigger);
91 
92 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
93 	"SYS", "NODE", "CPU"
94 };
95 
96 static bool switch_output_signal(struct record *rec)
97 {
98 	return rec->switch_output.signal &&
99 	       trigger_is_ready(&switch_output_trigger);
100 }
101 
102 static bool switch_output_size(struct record *rec)
103 {
104 	return rec->switch_output.size &&
105 	       trigger_is_ready(&switch_output_trigger) &&
106 	       (rec->bytes_written >= rec->switch_output.size);
107 }
108 
109 static bool switch_output_time(struct record *rec)
110 {
111 	return rec->switch_output.time &&
112 	       trigger_is_ready(&switch_output_trigger);
113 }
114 
115 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
116 			 void *bf, size_t size)
117 {
118 	struct perf_data_file *file = &rec->session->data->file;
119 
120 	if (perf_data_file__write(file, bf, size) < 0) {
121 		pr_err("failed to write perf data, error: %m\n");
122 		return -1;
123 	}
124 
125 	rec->bytes_written += size;
126 
127 	if (switch_output_size(rec))
128 		trigger_hit(&switch_output_trigger);
129 
130 	return 0;
131 }
132 
133 #ifdef HAVE_AIO_SUPPORT
134 static int record__aio_write(struct aiocb *cblock, int trace_fd,
135 		void *buf, size_t size, off_t off)
136 {
137 	int rc;
138 
139 	cblock->aio_fildes = trace_fd;
140 	cblock->aio_buf    = buf;
141 	cblock->aio_nbytes = size;
142 	cblock->aio_offset = off;
143 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
144 
145 	do {
146 		rc = aio_write(cblock);
147 		if (rc == 0) {
148 			break;
149 		} else if (errno != EAGAIN) {
150 			cblock->aio_fildes = -1;
151 			pr_err("failed to queue perf data, error: %m\n");
152 			break;
153 		}
154 	} while (1);
155 
156 	return rc;
157 }
158 
159 static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
160 {
161 	void *rem_buf;
162 	off_t rem_off;
163 	size_t rem_size;
164 	int rc, aio_errno;
165 	ssize_t aio_ret, written;
166 
167 	aio_errno = aio_error(cblock);
168 	if (aio_errno == EINPROGRESS)
169 		return 0;
170 
171 	written = aio_ret = aio_return(cblock);
172 	if (aio_ret < 0) {
173 		if (aio_errno != EINTR)
174 			pr_err("failed to write perf data, error: %m\n");
175 		written = 0;
176 	}
177 
178 	rem_size = cblock->aio_nbytes - written;
179 
180 	if (rem_size == 0) {
181 		cblock->aio_fildes = -1;
182 		/*
183 		 * md->refcount is incremented in perf_mmap__push() for
184 		 * every enqueued aio write request so decrement it because
185 		 * the request is now complete.
186 		 */
187 		perf_mmap__put(md);
188 		rc = 1;
189 	} else {
190 		/*
191 		 * aio write request may require restart with the
192 		 * reminder if the kernel didn't write whole
193 		 * chunk at once.
194 		 */
195 		rem_off = cblock->aio_offset + written;
196 		rem_buf = (void *)(cblock->aio_buf + written);
197 		record__aio_write(cblock, cblock->aio_fildes,
198 				rem_buf, rem_size, rem_off);
199 		rc = 0;
200 	}
201 
202 	return rc;
203 }
204 
205 static int record__aio_sync(struct perf_mmap *md, bool sync_all)
206 {
207 	struct aiocb **aiocb = md->aio.aiocb;
208 	struct aiocb *cblocks = md->aio.cblocks;
209 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
210 	int i, do_suspend;
211 
212 	do {
213 		do_suspend = 0;
214 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
215 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
216 				if (sync_all)
217 					aiocb[i] = NULL;
218 				else
219 					return i;
220 			} else {
221 				/*
222 				 * Started aio write is not complete yet
223 				 * so it has to be waited before the
224 				 * next allocation.
225 				 */
226 				aiocb[i] = &cblocks[i];
227 				do_suspend = 1;
228 			}
229 		}
230 		if (!do_suspend)
231 			return -1;
232 
233 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
234 			if (!(errno == EAGAIN || errno == EINTR))
235 				pr_err("failed to sync perf data, error: %m\n");
236 		}
237 	} while (1);
238 }
239 
240 static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
241 {
242 	struct record *rec = to;
243 	int ret, trace_fd = rec->session->data->file.fd;
244 
245 	rec->samples++;
246 
247 	ret = record__aio_write(cblock, trace_fd, bf, size, off);
248 	if (!ret) {
249 		rec->bytes_written += size;
250 		if (switch_output_size(rec))
251 			trigger_hit(&switch_output_trigger);
252 	}
253 
254 	return ret;
255 }
256 
257 static off_t record__aio_get_pos(int trace_fd)
258 {
259 	return lseek(trace_fd, 0, SEEK_CUR);
260 }
261 
262 static void record__aio_set_pos(int trace_fd, off_t pos)
263 {
264 	lseek(trace_fd, pos, SEEK_SET);
265 }
266 
267 static void record__aio_mmap_read_sync(struct record *rec)
268 {
269 	int i;
270 	struct perf_evlist *evlist = rec->evlist;
271 	struct perf_mmap *maps = evlist->mmap;
272 
273 	if (!rec->opts.nr_cblocks)
274 		return;
275 
276 	for (i = 0; i < evlist->nr_mmaps; i++) {
277 		struct perf_mmap *map = &maps[i];
278 
279 		if (map->base)
280 			record__aio_sync(map, true);
281 	}
282 }
283 
284 static int nr_cblocks_default = 1;
285 static int nr_cblocks_max = 4;
286 
287 static int record__aio_parse(const struct option *opt,
288 			     const char *str,
289 			     int unset)
290 {
291 	struct record_opts *opts = (struct record_opts *)opt->value;
292 
293 	if (unset) {
294 		opts->nr_cblocks = 0;
295 	} else {
296 		if (str)
297 			opts->nr_cblocks = strtol(str, NULL, 0);
298 		if (!opts->nr_cblocks)
299 			opts->nr_cblocks = nr_cblocks_default;
300 	}
301 
302 	return 0;
303 }
304 #else /* HAVE_AIO_SUPPORT */
305 static int nr_cblocks_max = 0;
306 
307 static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
308 {
309 	return -1;
310 }
311 
312 static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
313 		void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
314 {
315 	return -1;
316 }
317 
318 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
319 {
320 	return -1;
321 }
322 
323 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
324 {
325 }
326 
327 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
328 {
329 }
330 #endif
331 
332 static int record__aio_enabled(struct record *rec)
333 {
334 	return rec->opts.nr_cblocks > 0;
335 }
336 
337 static int process_synthesized_event(struct perf_tool *tool,
338 				     union perf_event *event,
339 				     struct perf_sample *sample __maybe_unused,
340 				     struct machine *machine __maybe_unused)
341 {
342 	struct record *rec = container_of(tool, struct record, tool);
343 	return record__write(rec, NULL, event, event->header.size);
344 }
345 
346 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
347 {
348 	struct record *rec = to;
349 
350 	rec->samples++;
351 	return record__write(rec, map, bf, size);
352 }
353 
354 static volatile int done;
355 static volatile int signr = -1;
356 static volatile int child_finished;
357 
358 static void sig_handler(int sig)
359 {
360 	if (sig == SIGCHLD)
361 		child_finished = 1;
362 	else
363 		signr = sig;
364 
365 	done = 1;
366 }
367 
368 static void sigsegv_handler(int sig)
369 {
370 	perf_hooks__recover();
371 	sighandler_dump_stack(sig);
372 }
373 
374 static void record__sig_exit(void)
375 {
376 	if (signr == -1)
377 		return;
378 
379 	signal(signr, SIG_DFL);
380 	raise(signr);
381 }
382 
383 #ifdef HAVE_AUXTRACE_SUPPORT
384 
385 static int record__process_auxtrace(struct perf_tool *tool,
386 				    struct perf_mmap *map,
387 				    union perf_event *event, void *data1,
388 				    size_t len1, void *data2, size_t len2)
389 {
390 	struct record *rec = container_of(tool, struct record, tool);
391 	struct perf_data *data = &rec->data;
392 	size_t padding;
393 	u8 pad[8] = {0};
394 
395 	if (!perf_data__is_pipe(data)) {
396 		off_t file_offset;
397 		int fd = perf_data__fd(data);
398 		int err;
399 
400 		file_offset = lseek(fd, 0, SEEK_CUR);
401 		if (file_offset == -1)
402 			return -1;
403 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
404 						     event, file_offset);
405 		if (err)
406 			return err;
407 	}
408 
409 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
410 	padding = (len1 + len2) & 7;
411 	if (padding)
412 		padding = 8 - padding;
413 
414 	record__write(rec, map, event, event->header.size);
415 	record__write(rec, map, data1, len1);
416 	if (len2)
417 		record__write(rec, map, data2, len2);
418 	record__write(rec, map, &pad, padding);
419 
420 	return 0;
421 }
422 
423 static int record__auxtrace_mmap_read(struct record *rec,
424 				      struct perf_mmap *map)
425 {
426 	int ret;
427 
428 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
429 				  record__process_auxtrace);
430 	if (ret < 0)
431 		return ret;
432 
433 	if (ret)
434 		rec->samples++;
435 
436 	return 0;
437 }
438 
439 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
440 					       struct perf_mmap *map)
441 {
442 	int ret;
443 
444 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
445 					   record__process_auxtrace,
446 					   rec->opts.auxtrace_snapshot_size);
447 	if (ret < 0)
448 		return ret;
449 
450 	if (ret)
451 		rec->samples++;
452 
453 	return 0;
454 }
455 
456 static int record__auxtrace_read_snapshot_all(struct record *rec)
457 {
458 	int i;
459 	int rc = 0;
460 
461 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
462 		struct perf_mmap *map = &rec->evlist->mmap[i];
463 
464 		if (!map->auxtrace_mmap.base)
465 			continue;
466 
467 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
468 			rc = -1;
469 			goto out;
470 		}
471 	}
472 out:
473 	return rc;
474 }
475 
476 static void record__read_auxtrace_snapshot(struct record *rec)
477 {
478 	pr_debug("Recording AUX area tracing snapshot\n");
479 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
480 		trigger_error(&auxtrace_snapshot_trigger);
481 	} else {
482 		if (auxtrace_record__snapshot_finish(rec->itr))
483 			trigger_error(&auxtrace_snapshot_trigger);
484 		else
485 			trigger_ready(&auxtrace_snapshot_trigger);
486 	}
487 }
488 
489 static int record__auxtrace_init(struct record *rec)
490 {
491 	int err;
492 
493 	if (!rec->itr) {
494 		rec->itr = auxtrace_record__init(rec->evlist, &err);
495 		if (err)
496 			return err;
497 	}
498 
499 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
500 					      rec->opts.auxtrace_snapshot_opts);
501 	if (err)
502 		return err;
503 
504 	return auxtrace_parse_filters(rec->evlist);
505 }
506 
507 #else
508 
509 static inline
510 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
511 			       struct perf_mmap *map __maybe_unused)
512 {
513 	return 0;
514 }
515 
516 static inline
517 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
518 {
519 }
520 
521 static inline
522 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
523 {
524 	return 0;
525 }
526 
527 static int record__auxtrace_init(struct record *rec __maybe_unused)
528 {
529 	return 0;
530 }
531 
532 #endif
533 
534 static int record__mmap_evlist(struct record *rec,
535 			       struct perf_evlist *evlist)
536 {
537 	struct record_opts *opts = &rec->opts;
538 	char msg[512];
539 
540 	if (opts->affinity != PERF_AFFINITY_SYS)
541 		cpu__setup_cpunode_map();
542 
543 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
544 				 opts->auxtrace_mmap_pages,
545 				 opts->auxtrace_snapshot_mode,
546 				 opts->nr_cblocks, opts->affinity) < 0) {
547 		if (errno == EPERM) {
548 			pr_err("Permission error mapping pages.\n"
549 			       "Consider increasing "
550 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
551 			       "or try again with a smaller value of -m/--mmap_pages.\n"
552 			       "(current value: %u,%u)\n",
553 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
554 			return -errno;
555 		} else {
556 			pr_err("failed to mmap with %d (%s)\n", errno,
557 				str_error_r(errno, msg, sizeof(msg)));
558 			if (errno)
559 				return -errno;
560 			else
561 				return -EINVAL;
562 		}
563 	}
564 	return 0;
565 }
566 
567 static int record__mmap(struct record *rec)
568 {
569 	return record__mmap_evlist(rec, rec->evlist);
570 }
571 
572 static int record__open(struct record *rec)
573 {
574 	char msg[BUFSIZ];
575 	struct perf_evsel *pos;
576 	struct perf_evlist *evlist = rec->evlist;
577 	struct perf_session *session = rec->session;
578 	struct record_opts *opts = &rec->opts;
579 	int rc = 0;
580 
581 	/*
582 	 * For initial_delay we need to add a dummy event so that we can track
583 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
584 	 * real events, the ones asked by the user.
585 	 */
586 	if (opts->initial_delay) {
587 		if (perf_evlist__add_dummy(evlist))
588 			return -ENOMEM;
589 
590 		pos = perf_evlist__first(evlist);
591 		pos->tracking = 0;
592 		pos = perf_evlist__last(evlist);
593 		pos->tracking = 1;
594 		pos->attr.enable_on_exec = 1;
595 	}
596 
597 	perf_evlist__config(evlist, opts, &callchain_param);
598 
599 	evlist__for_each_entry(evlist, pos) {
600 try_again:
601 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
602 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
603 				if (verbose > 0)
604 					ui__warning("%s\n", msg);
605 				goto try_again;
606 			}
607 			if ((errno == EINVAL || errno == EBADF) &&
608 			    pos->leader != pos &&
609 			    pos->weak_group) {
610 			        pos = perf_evlist__reset_weak_group(evlist, pos);
611 				goto try_again;
612 			}
613 			rc = -errno;
614 			perf_evsel__open_strerror(pos, &opts->target,
615 						  errno, msg, sizeof(msg));
616 			ui__error("%s\n", msg);
617 			goto out;
618 		}
619 
620 		pos->supported = true;
621 	}
622 
623 	if (perf_evlist__apply_filters(evlist, &pos)) {
624 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
625 			pos->filter, perf_evsel__name(pos), errno,
626 			str_error_r(errno, msg, sizeof(msg)));
627 		rc = -1;
628 		goto out;
629 	}
630 
631 	rc = record__mmap(rec);
632 	if (rc)
633 		goto out;
634 
635 	session->evlist = evlist;
636 	perf_session__set_id_hdr_size(session);
637 out:
638 	return rc;
639 }
640 
641 static int process_sample_event(struct perf_tool *tool,
642 				union perf_event *event,
643 				struct perf_sample *sample,
644 				struct perf_evsel *evsel,
645 				struct machine *machine)
646 {
647 	struct record *rec = container_of(tool, struct record, tool);
648 
649 	if (rec->evlist->first_sample_time == 0)
650 		rec->evlist->first_sample_time = sample->time;
651 
652 	rec->evlist->last_sample_time = sample->time;
653 
654 	if (rec->buildid_all)
655 		return 0;
656 
657 	rec->samples++;
658 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
659 }
660 
661 static int process_buildids(struct record *rec)
662 {
663 	struct perf_session *session = rec->session;
664 
665 	if (perf_data__size(&rec->data) == 0)
666 		return 0;
667 
668 	/*
669 	 * During this process, it'll load kernel map and replace the
670 	 * dso->long_name to a real pathname it found.  In this case
671 	 * we prefer the vmlinux path like
672 	 *   /lib/modules/3.16.4/build/vmlinux
673 	 *
674 	 * rather than build-id path (in debug directory).
675 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
676 	 */
677 	symbol_conf.ignore_vmlinux_buildid = true;
678 
679 	/*
680 	 * If --buildid-all is given, it marks all DSO regardless of hits,
681 	 * so no need to process samples. But if timestamp_boundary is enabled,
682 	 * it still needs to walk on all samples to get the timestamps of
683 	 * first/last samples.
684 	 */
685 	if (rec->buildid_all && !rec->timestamp_boundary)
686 		rec->tool.sample = NULL;
687 
688 	return perf_session__process_events(session);
689 }
690 
691 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
692 {
693 	int err;
694 	struct perf_tool *tool = data;
695 	/*
696 	 *As for guest kernel when processing subcommand record&report,
697 	 *we arrange module mmap prior to guest kernel mmap and trigger
698 	 *a preload dso because default guest module symbols are loaded
699 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
700 	 *method is used to avoid symbol missing when the first addr is
701 	 *in module instead of in guest kernel.
702 	 */
703 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
704 					     machine);
705 	if (err < 0)
706 		pr_err("Couldn't record guest kernel [%d]'s reference"
707 		       " relocation symbol.\n", machine->pid);
708 
709 	/*
710 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
711 	 * have no _text sometimes.
712 	 */
713 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
714 						 machine);
715 	if (err < 0)
716 		pr_err("Couldn't record guest kernel [%d]'s reference"
717 		       " relocation symbol.\n", machine->pid);
718 }
719 
720 static struct perf_event_header finished_round_event = {
721 	.size = sizeof(struct perf_event_header),
722 	.type = PERF_RECORD_FINISHED_ROUND,
723 };
724 
725 static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
726 {
727 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
728 	    !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
729 		CPU_ZERO(&rec->affinity_mask);
730 		CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
731 		sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
732 	}
733 }
734 
735 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
736 				    bool overwrite)
737 {
738 	u64 bytes_written = rec->bytes_written;
739 	int i;
740 	int rc = 0;
741 	struct perf_mmap *maps;
742 	int trace_fd = rec->data.file.fd;
743 	off_t off;
744 
745 	if (!evlist)
746 		return 0;
747 
748 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
749 	if (!maps)
750 		return 0;
751 
752 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
753 		return 0;
754 
755 	if (record__aio_enabled(rec))
756 		off = record__aio_get_pos(trace_fd);
757 
758 	for (i = 0; i < evlist->nr_mmaps; i++) {
759 		struct perf_mmap *map = &maps[i];
760 
761 		if (map->base) {
762 			record__adjust_affinity(rec, map);
763 			if (!record__aio_enabled(rec)) {
764 				if (perf_mmap__push(map, rec, record__pushfn) != 0) {
765 					rc = -1;
766 					goto out;
767 				}
768 			} else {
769 				int idx;
770 				/*
771 				 * Call record__aio_sync() to wait till map->data buffer
772 				 * becomes available after previous aio write request.
773 				 */
774 				idx = record__aio_sync(map, false);
775 				if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
776 					record__aio_set_pos(trace_fd, off);
777 					rc = -1;
778 					goto out;
779 				}
780 			}
781 		}
782 
783 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
784 		    record__auxtrace_mmap_read(rec, map) != 0) {
785 			rc = -1;
786 			goto out;
787 		}
788 	}
789 
790 	if (record__aio_enabled(rec))
791 		record__aio_set_pos(trace_fd, off);
792 
793 	/*
794 	 * Mark the round finished in case we wrote
795 	 * at least one event.
796 	 */
797 	if (bytes_written != rec->bytes_written)
798 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
799 
800 	if (overwrite)
801 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
802 out:
803 	return rc;
804 }
805 
806 static int record__mmap_read_all(struct record *rec)
807 {
808 	int err;
809 
810 	err = record__mmap_read_evlist(rec, rec->evlist, false);
811 	if (err)
812 		return err;
813 
814 	return record__mmap_read_evlist(rec, rec->evlist, true);
815 }
816 
817 static void record__init_features(struct record *rec)
818 {
819 	struct perf_session *session = rec->session;
820 	int feat;
821 
822 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
823 		perf_header__set_feat(&session->header, feat);
824 
825 	if (rec->no_buildid)
826 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
827 
828 	if (!have_tracepoints(&rec->evlist->entries))
829 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
830 
831 	if (!rec->opts.branch_stack)
832 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
833 
834 	if (!rec->opts.full_auxtrace)
835 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
836 
837 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
838 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
839 
840 	perf_header__clear_feat(&session->header, HEADER_STAT);
841 }
842 
843 static void
844 record__finish_output(struct record *rec)
845 {
846 	struct perf_data *data = &rec->data;
847 	int fd = perf_data__fd(data);
848 
849 	if (data->is_pipe)
850 		return;
851 
852 	rec->session->header.data_size += rec->bytes_written;
853 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
854 
855 	if (!rec->no_buildid) {
856 		process_buildids(rec);
857 
858 		if (rec->buildid_all)
859 			dsos__hit_all(rec->session);
860 	}
861 	perf_session__write_header(rec->session, rec->evlist, fd, true);
862 
863 	return;
864 }
865 
866 static int record__synthesize_workload(struct record *rec, bool tail)
867 {
868 	int err;
869 	struct thread_map *thread_map;
870 
871 	if (rec->opts.tail_synthesize != tail)
872 		return 0;
873 
874 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
875 	if (thread_map == NULL)
876 		return -1;
877 
878 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
879 						 process_synthesized_event,
880 						 &rec->session->machines.host,
881 						 rec->opts.sample_address);
882 	thread_map__put(thread_map);
883 	return err;
884 }
885 
886 static int record__synthesize(struct record *rec, bool tail);
887 
888 static int
889 record__switch_output(struct record *rec, bool at_exit)
890 {
891 	struct perf_data *data = &rec->data;
892 	int fd, err;
893 
894 	/* Same Size:      "2015122520103046"*/
895 	char timestamp[] = "InvalidTimestamp";
896 
897 	record__aio_mmap_read_sync(rec);
898 
899 	record__synthesize(rec, true);
900 	if (target__none(&rec->opts.target))
901 		record__synthesize_workload(rec, true);
902 
903 	rec->samples = 0;
904 	record__finish_output(rec);
905 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
906 	if (err) {
907 		pr_err("Failed to get current timestamp\n");
908 		return -EINVAL;
909 	}
910 
911 	fd = perf_data__switch(data, timestamp,
912 				    rec->session->header.data_offset,
913 				    at_exit);
914 	if (fd >= 0 && !at_exit) {
915 		rec->bytes_written = 0;
916 		rec->session->header.data_size = 0;
917 	}
918 
919 	if (!quiet)
920 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
921 			data->path, timestamp);
922 
923 	/* Output tracking events */
924 	if (!at_exit) {
925 		record__synthesize(rec, false);
926 
927 		/*
928 		 * In 'perf record --switch-output' without -a,
929 		 * record__synthesize() in record__switch_output() won't
930 		 * generate tracking events because there's no thread_map
931 		 * in evlist. Which causes newly created perf.data doesn't
932 		 * contain map and comm information.
933 		 * Create a fake thread_map and directly call
934 		 * perf_event__synthesize_thread_map() for those events.
935 		 */
936 		if (target__none(&rec->opts.target))
937 			record__synthesize_workload(rec, false);
938 	}
939 	return fd;
940 }
941 
942 static volatile int workload_exec_errno;
943 
944 /*
945  * perf_evlist__prepare_workload will send a SIGUSR1
946  * if the fork fails, since we asked by setting its
947  * want_signal to true.
948  */
949 static void workload_exec_failed_signal(int signo __maybe_unused,
950 					siginfo_t *info,
951 					void *ucontext __maybe_unused)
952 {
953 	workload_exec_errno = info->si_value.sival_int;
954 	done = 1;
955 	child_finished = 1;
956 }
957 
958 static void snapshot_sig_handler(int sig);
959 static void alarm_sig_handler(int sig);
960 
961 int __weak
962 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
963 			    struct perf_tool *tool __maybe_unused,
964 			    perf_event__handler_t process __maybe_unused,
965 			    struct machine *machine __maybe_unused)
966 {
967 	return 0;
968 }
969 
970 static const struct perf_event_mmap_page *
971 perf_evlist__pick_pc(struct perf_evlist *evlist)
972 {
973 	if (evlist) {
974 		if (evlist->mmap && evlist->mmap[0].base)
975 			return evlist->mmap[0].base;
976 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
977 			return evlist->overwrite_mmap[0].base;
978 	}
979 	return NULL;
980 }
981 
982 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
983 {
984 	const struct perf_event_mmap_page *pc;
985 
986 	pc = perf_evlist__pick_pc(rec->evlist);
987 	if (pc)
988 		return pc;
989 	return NULL;
990 }
991 
992 static int record__synthesize(struct record *rec, bool tail)
993 {
994 	struct perf_session *session = rec->session;
995 	struct machine *machine = &session->machines.host;
996 	struct perf_data *data = &rec->data;
997 	struct record_opts *opts = &rec->opts;
998 	struct perf_tool *tool = &rec->tool;
999 	int fd = perf_data__fd(data);
1000 	int err = 0;
1001 
1002 	if (rec->opts.tail_synthesize != tail)
1003 		return 0;
1004 
1005 	if (data->is_pipe) {
1006 		/*
1007 		 * We need to synthesize events first, because some
1008 		 * features works on top of them (on report side).
1009 		 */
1010 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1011 						   process_synthesized_event);
1012 		if (err < 0) {
1013 			pr_err("Couldn't synthesize attrs.\n");
1014 			goto out;
1015 		}
1016 
1017 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1018 						      process_synthesized_event);
1019 		if (err < 0) {
1020 			pr_err("Couldn't synthesize features.\n");
1021 			return err;
1022 		}
1023 
1024 		if (have_tracepoints(&rec->evlist->entries)) {
1025 			/*
1026 			 * FIXME err <= 0 here actually means that
1027 			 * there were no tracepoints so its not really
1028 			 * an error, just that we don't need to
1029 			 * synthesize anything.  We really have to
1030 			 * return this more properly and also
1031 			 * propagate errors that now are calling die()
1032 			 */
1033 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1034 								  process_synthesized_event);
1035 			if (err <= 0) {
1036 				pr_err("Couldn't record tracing data.\n");
1037 				goto out;
1038 			}
1039 			rec->bytes_written += err;
1040 		}
1041 	}
1042 
1043 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1044 					  process_synthesized_event, machine);
1045 	if (err)
1046 		goto out;
1047 
1048 	if (rec->opts.full_auxtrace) {
1049 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1050 					session, process_synthesized_event);
1051 		if (err)
1052 			goto out;
1053 	}
1054 
1055 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1056 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1057 							 machine);
1058 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1059 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1060 				   "Check /proc/kallsyms permission or run as root.\n");
1061 
1062 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1063 						     machine);
1064 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1065 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1066 				   "Check /proc/modules permission or run as root.\n");
1067 	}
1068 
1069 	if (perf_guest) {
1070 		machines__process_guests(&session->machines,
1071 					 perf_event__synthesize_guest_os, tool);
1072 	}
1073 
1074 	err = perf_event__synthesize_extra_attr(&rec->tool,
1075 						rec->evlist,
1076 						process_synthesized_event,
1077 						data->is_pipe);
1078 	if (err)
1079 		goto out;
1080 
1081 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1082 						 process_synthesized_event,
1083 						NULL);
1084 	if (err < 0) {
1085 		pr_err("Couldn't synthesize thread map.\n");
1086 		return err;
1087 	}
1088 
1089 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1090 					     process_synthesized_event, NULL);
1091 	if (err < 0) {
1092 		pr_err("Couldn't synthesize cpu map.\n");
1093 		return err;
1094 	}
1095 
1096 	err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
1097 						machine, opts);
1098 	if (err < 0)
1099 		pr_warning("Couldn't synthesize bpf events.\n");
1100 
1101 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1102 					    process_synthesized_event, opts->sample_address,
1103 					    1);
1104 out:
1105 	return err;
1106 }
1107 
1108 static int __cmd_record(struct record *rec, int argc, const char **argv)
1109 {
1110 	int err;
1111 	int status = 0;
1112 	unsigned long waking = 0;
1113 	const bool forks = argc > 0;
1114 	struct perf_tool *tool = &rec->tool;
1115 	struct record_opts *opts = &rec->opts;
1116 	struct perf_data *data = &rec->data;
1117 	struct perf_session *session;
1118 	bool disabled = false, draining = false;
1119 	int fd;
1120 
1121 	atexit(record__sig_exit);
1122 	signal(SIGCHLD, sig_handler);
1123 	signal(SIGINT, sig_handler);
1124 	signal(SIGTERM, sig_handler);
1125 	signal(SIGSEGV, sigsegv_handler);
1126 
1127 	if (rec->opts.record_namespaces)
1128 		tool->namespace_events = true;
1129 
1130 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1131 		signal(SIGUSR2, snapshot_sig_handler);
1132 		if (rec->opts.auxtrace_snapshot_mode)
1133 			trigger_on(&auxtrace_snapshot_trigger);
1134 		if (rec->switch_output.enabled)
1135 			trigger_on(&switch_output_trigger);
1136 	} else {
1137 		signal(SIGUSR2, SIG_IGN);
1138 	}
1139 
1140 	session = perf_session__new(data, false, tool);
1141 	if (session == NULL) {
1142 		pr_err("Perf session creation failed.\n");
1143 		return -1;
1144 	}
1145 
1146 	fd = perf_data__fd(data);
1147 	rec->session = session;
1148 
1149 	record__init_features(rec);
1150 
1151 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1152 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1153 
1154 	if (forks) {
1155 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1156 						    argv, data->is_pipe,
1157 						    workload_exec_failed_signal);
1158 		if (err < 0) {
1159 			pr_err("Couldn't run the workload!\n");
1160 			status = err;
1161 			goto out_delete_session;
1162 		}
1163 	}
1164 
1165 	/*
1166 	 * If we have just single event and are sending data
1167 	 * through pipe, we need to force the ids allocation,
1168 	 * because we synthesize event name through the pipe
1169 	 * and need the id for that.
1170 	 */
1171 	if (data->is_pipe && rec->evlist->nr_entries == 1)
1172 		rec->opts.sample_id = true;
1173 
1174 	if (record__open(rec) != 0) {
1175 		err = -1;
1176 		goto out_child;
1177 	}
1178 
1179 	err = bpf__apply_obj_config();
1180 	if (err) {
1181 		char errbuf[BUFSIZ];
1182 
1183 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1184 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1185 			 errbuf);
1186 		goto out_child;
1187 	}
1188 
1189 	/*
1190 	 * Normally perf_session__new would do this, but it doesn't have the
1191 	 * evlist.
1192 	 */
1193 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1194 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1195 		rec->tool.ordered_events = false;
1196 	}
1197 
1198 	if (!rec->evlist->nr_groups)
1199 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1200 
1201 	if (data->is_pipe) {
1202 		err = perf_header__write_pipe(fd);
1203 		if (err < 0)
1204 			goto out_child;
1205 	} else {
1206 		err = perf_session__write_header(session, rec->evlist, fd, false);
1207 		if (err < 0)
1208 			goto out_child;
1209 	}
1210 
1211 	if (!rec->no_buildid
1212 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1213 		pr_err("Couldn't generate buildids. "
1214 		       "Use --no-buildid to profile anyway.\n");
1215 		err = -1;
1216 		goto out_child;
1217 	}
1218 
1219 	err = record__synthesize(rec, false);
1220 	if (err < 0)
1221 		goto out_child;
1222 
1223 	if (rec->realtime_prio) {
1224 		struct sched_param param;
1225 
1226 		param.sched_priority = rec->realtime_prio;
1227 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1228 			pr_err("Could not set realtime priority.\n");
1229 			err = -1;
1230 			goto out_child;
1231 		}
1232 	}
1233 
1234 	/*
1235 	 * When perf is starting the traced process, all the events
1236 	 * (apart from group members) have enable_on_exec=1 set,
1237 	 * so don't spoil it by prematurely enabling them.
1238 	 */
1239 	if (!target__none(&opts->target) && !opts->initial_delay)
1240 		perf_evlist__enable(rec->evlist);
1241 
1242 	/*
1243 	 * Let the child rip
1244 	 */
1245 	if (forks) {
1246 		struct machine *machine = &session->machines.host;
1247 		union perf_event *event;
1248 		pid_t tgid;
1249 
1250 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1251 		if (event == NULL) {
1252 			err = -ENOMEM;
1253 			goto out_child;
1254 		}
1255 
1256 		/*
1257 		 * Some H/W events are generated before COMM event
1258 		 * which is emitted during exec(), so perf script
1259 		 * cannot see a correct process name for those events.
1260 		 * Synthesize COMM event to prevent it.
1261 		 */
1262 		tgid = perf_event__synthesize_comm(tool, event,
1263 						   rec->evlist->workload.pid,
1264 						   process_synthesized_event,
1265 						   machine);
1266 		free(event);
1267 
1268 		if (tgid == -1)
1269 			goto out_child;
1270 
1271 		event = malloc(sizeof(event->namespaces) +
1272 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1273 			       machine->id_hdr_size);
1274 		if (event == NULL) {
1275 			err = -ENOMEM;
1276 			goto out_child;
1277 		}
1278 
1279 		/*
1280 		 * Synthesize NAMESPACES event for the command specified.
1281 		 */
1282 		perf_event__synthesize_namespaces(tool, event,
1283 						  rec->evlist->workload.pid,
1284 						  tgid, process_synthesized_event,
1285 						  machine);
1286 		free(event);
1287 
1288 		perf_evlist__start_workload(rec->evlist);
1289 	}
1290 
1291 	if (opts->initial_delay) {
1292 		usleep(opts->initial_delay * USEC_PER_MSEC);
1293 		perf_evlist__enable(rec->evlist);
1294 	}
1295 
1296 	trigger_ready(&auxtrace_snapshot_trigger);
1297 	trigger_ready(&switch_output_trigger);
1298 	perf_hooks__invoke_record_start();
1299 	for (;;) {
1300 		unsigned long long hits = rec->samples;
1301 
1302 		/*
1303 		 * rec->evlist->bkw_mmap_state is possible to be
1304 		 * BKW_MMAP_EMPTY here: when done == true and
1305 		 * hits != rec->samples in previous round.
1306 		 *
1307 		 * perf_evlist__toggle_bkw_mmap ensure we never
1308 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1309 		 */
1310 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1311 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1312 
1313 		if (record__mmap_read_all(rec) < 0) {
1314 			trigger_error(&auxtrace_snapshot_trigger);
1315 			trigger_error(&switch_output_trigger);
1316 			err = -1;
1317 			goto out_child;
1318 		}
1319 
1320 		if (auxtrace_record__snapshot_started) {
1321 			auxtrace_record__snapshot_started = 0;
1322 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1323 				record__read_auxtrace_snapshot(rec);
1324 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1325 				pr_err("AUX area tracing snapshot failed\n");
1326 				err = -1;
1327 				goto out_child;
1328 			}
1329 		}
1330 
1331 		if (trigger_is_hit(&switch_output_trigger)) {
1332 			/*
1333 			 * If switch_output_trigger is hit, the data in
1334 			 * overwritable ring buffer should have been collected,
1335 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1336 			 *
1337 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1338 			 * record__mmap_read_all() didn't collect data from
1339 			 * overwritable ring buffer. Read again.
1340 			 */
1341 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1342 				continue;
1343 			trigger_ready(&switch_output_trigger);
1344 
1345 			/*
1346 			 * Reenable events in overwrite ring buffer after
1347 			 * record__mmap_read_all(): we should have collected
1348 			 * data from it.
1349 			 */
1350 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1351 
1352 			if (!quiet)
1353 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1354 					waking);
1355 			waking = 0;
1356 			fd = record__switch_output(rec, false);
1357 			if (fd < 0) {
1358 				pr_err("Failed to switch to new file\n");
1359 				trigger_error(&switch_output_trigger);
1360 				err = fd;
1361 				goto out_child;
1362 			}
1363 
1364 			/* re-arm the alarm */
1365 			if (rec->switch_output.time)
1366 				alarm(rec->switch_output.time);
1367 		}
1368 
1369 		if (hits == rec->samples) {
1370 			if (done || draining)
1371 				break;
1372 			err = perf_evlist__poll(rec->evlist, -1);
1373 			/*
1374 			 * Propagate error, only if there's any. Ignore positive
1375 			 * number of returned events and interrupt error.
1376 			 */
1377 			if (err > 0 || (err < 0 && errno == EINTR))
1378 				err = 0;
1379 			waking++;
1380 
1381 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1382 				draining = true;
1383 		}
1384 
1385 		/*
1386 		 * When perf is starting the traced process, at the end events
1387 		 * die with the process and we wait for that. Thus no need to
1388 		 * disable events in this case.
1389 		 */
1390 		if (done && !disabled && !target__none(&opts->target)) {
1391 			trigger_off(&auxtrace_snapshot_trigger);
1392 			perf_evlist__disable(rec->evlist);
1393 			disabled = true;
1394 		}
1395 	}
1396 	trigger_off(&auxtrace_snapshot_trigger);
1397 	trigger_off(&switch_output_trigger);
1398 
1399 	if (forks && workload_exec_errno) {
1400 		char msg[STRERR_BUFSIZE];
1401 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1402 		pr_err("Workload failed: %s\n", emsg);
1403 		err = -1;
1404 		goto out_child;
1405 	}
1406 
1407 	if (!quiet)
1408 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1409 
1410 	if (target__none(&rec->opts.target))
1411 		record__synthesize_workload(rec, true);
1412 
1413 out_child:
1414 	record__aio_mmap_read_sync(rec);
1415 
1416 	if (forks) {
1417 		int exit_status;
1418 
1419 		if (!child_finished)
1420 			kill(rec->evlist->workload.pid, SIGTERM);
1421 
1422 		wait(&exit_status);
1423 
1424 		if (err < 0)
1425 			status = err;
1426 		else if (WIFEXITED(exit_status))
1427 			status = WEXITSTATUS(exit_status);
1428 		else if (WIFSIGNALED(exit_status))
1429 			signr = WTERMSIG(exit_status);
1430 	} else
1431 		status = err;
1432 
1433 	record__synthesize(rec, true);
1434 	/* this will be recalculated during process_buildids() */
1435 	rec->samples = 0;
1436 
1437 	if (!err) {
1438 		if (!rec->timestamp_filename) {
1439 			record__finish_output(rec);
1440 		} else {
1441 			fd = record__switch_output(rec, true);
1442 			if (fd < 0) {
1443 				status = fd;
1444 				goto out_delete_session;
1445 			}
1446 		}
1447 	}
1448 
1449 	perf_hooks__invoke_record_end();
1450 
1451 	if (!err && !quiet) {
1452 		char samples[128];
1453 		const char *postfix = rec->timestamp_filename ?
1454 					".<timestamp>" : "";
1455 
1456 		if (rec->samples && !rec->opts.full_auxtrace)
1457 			scnprintf(samples, sizeof(samples),
1458 				  " (%" PRIu64 " samples)", rec->samples);
1459 		else
1460 			samples[0] = '\0';
1461 
1462 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1463 			perf_data__size(data) / 1024.0 / 1024.0,
1464 			data->path, postfix, samples);
1465 	}
1466 
1467 out_delete_session:
1468 	perf_session__delete(session);
1469 	return status;
1470 }
1471 
1472 static void callchain_debug(struct callchain_param *callchain)
1473 {
1474 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1475 
1476 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1477 
1478 	if (callchain->record_mode == CALLCHAIN_DWARF)
1479 		pr_debug("callchain: stack dump size %d\n",
1480 			 callchain->dump_size);
1481 }
1482 
1483 int record_opts__parse_callchain(struct record_opts *record,
1484 				 struct callchain_param *callchain,
1485 				 const char *arg, bool unset)
1486 {
1487 	int ret;
1488 	callchain->enabled = !unset;
1489 
1490 	/* --no-call-graph */
1491 	if (unset) {
1492 		callchain->record_mode = CALLCHAIN_NONE;
1493 		pr_debug("callchain: disabled\n");
1494 		return 0;
1495 	}
1496 
1497 	ret = parse_callchain_record_opt(arg, callchain);
1498 	if (!ret) {
1499 		/* Enable data address sampling for DWARF unwind. */
1500 		if (callchain->record_mode == CALLCHAIN_DWARF)
1501 			record->sample_address = true;
1502 		callchain_debug(callchain);
1503 	}
1504 
1505 	return ret;
1506 }
1507 
1508 int record_parse_callchain_opt(const struct option *opt,
1509 			       const char *arg,
1510 			       int unset)
1511 {
1512 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1513 }
1514 
1515 int record_callchain_opt(const struct option *opt,
1516 			 const char *arg __maybe_unused,
1517 			 int unset __maybe_unused)
1518 {
1519 	struct callchain_param *callchain = opt->value;
1520 
1521 	callchain->enabled = true;
1522 
1523 	if (callchain->record_mode == CALLCHAIN_NONE)
1524 		callchain->record_mode = CALLCHAIN_FP;
1525 
1526 	callchain_debug(callchain);
1527 	return 0;
1528 }
1529 
1530 static int perf_record_config(const char *var, const char *value, void *cb)
1531 {
1532 	struct record *rec = cb;
1533 
1534 	if (!strcmp(var, "record.build-id")) {
1535 		if (!strcmp(value, "cache"))
1536 			rec->no_buildid_cache = false;
1537 		else if (!strcmp(value, "no-cache"))
1538 			rec->no_buildid_cache = true;
1539 		else if (!strcmp(value, "skip"))
1540 			rec->no_buildid = true;
1541 		else
1542 			return -1;
1543 		return 0;
1544 	}
1545 	if (!strcmp(var, "record.call-graph")) {
1546 		var = "call-graph.record-mode";
1547 		return perf_default_config(var, value, cb);
1548 	}
1549 #ifdef HAVE_AIO_SUPPORT
1550 	if (!strcmp(var, "record.aio")) {
1551 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
1552 		if (!rec->opts.nr_cblocks)
1553 			rec->opts.nr_cblocks = nr_cblocks_default;
1554 	}
1555 #endif
1556 
1557 	return 0;
1558 }
1559 
1560 struct clockid_map {
1561 	const char *name;
1562 	int clockid;
1563 };
1564 
1565 #define CLOCKID_MAP(n, c)	\
1566 	{ .name = n, .clockid = (c), }
1567 
1568 #define CLOCKID_END	{ .name = NULL, }
1569 
1570 
1571 /*
1572  * Add the missing ones, we need to build on many distros...
1573  */
1574 #ifndef CLOCK_MONOTONIC_RAW
1575 #define CLOCK_MONOTONIC_RAW 4
1576 #endif
1577 #ifndef CLOCK_BOOTTIME
1578 #define CLOCK_BOOTTIME 7
1579 #endif
1580 #ifndef CLOCK_TAI
1581 #define CLOCK_TAI 11
1582 #endif
1583 
1584 static const struct clockid_map clockids[] = {
1585 	/* available for all events, NMI safe */
1586 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1587 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1588 
1589 	/* available for some events */
1590 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1591 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1592 	CLOCKID_MAP("tai", CLOCK_TAI),
1593 
1594 	/* available for the lazy */
1595 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1596 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1597 	CLOCKID_MAP("real", CLOCK_REALTIME),
1598 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1599 
1600 	CLOCKID_END,
1601 };
1602 
1603 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1604 {
1605 	struct timespec res;
1606 
1607 	*res_ns = 0;
1608 	if (!clock_getres(clk_id, &res))
1609 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1610 	else
1611 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1612 
1613 	return 0;
1614 }
1615 
1616 static int parse_clockid(const struct option *opt, const char *str, int unset)
1617 {
1618 	struct record_opts *opts = (struct record_opts *)opt->value;
1619 	const struct clockid_map *cm;
1620 	const char *ostr = str;
1621 
1622 	if (unset) {
1623 		opts->use_clockid = 0;
1624 		return 0;
1625 	}
1626 
1627 	/* no arg passed */
1628 	if (!str)
1629 		return 0;
1630 
1631 	/* no setting it twice */
1632 	if (opts->use_clockid)
1633 		return -1;
1634 
1635 	opts->use_clockid = true;
1636 
1637 	/* if its a number, we're done */
1638 	if (sscanf(str, "%d", &opts->clockid) == 1)
1639 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1640 
1641 	/* allow a "CLOCK_" prefix to the name */
1642 	if (!strncasecmp(str, "CLOCK_", 6))
1643 		str += 6;
1644 
1645 	for (cm = clockids; cm->name; cm++) {
1646 		if (!strcasecmp(str, cm->name)) {
1647 			opts->clockid = cm->clockid;
1648 			return get_clockid_res(opts->clockid,
1649 					       &opts->clockid_res_ns);
1650 		}
1651 	}
1652 
1653 	opts->use_clockid = false;
1654 	ui__warning("unknown clockid %s, check man page\n", ostr);
1655 	return -1;
1656 }
1657 
1658 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1659 {
1660 	struct record_opts *opts = (struct record_opts *)opt->value;
1661 
1662 	if (unset || !str)
1663 		return 0;
1664 
1665 	if (!strcasecmp(str, "node"))
1666 		opts->affinity = PERF_AFFINITY_NODE;
1667 	else if (!strcasecmp(str, "cpu"))
1668 		opts->affinity = PERF_AFFINITY_CPU;
1669 
1670 	return 0;
1671 }
1672 
1673 static int record__parse_mmap_pages(const struct option *opt,
1674 				    const char *str,
1675 				    int unset __maybe_unused)
1676 {
1677 	struct record_opts *opts = opt->value;
1678 	char *s, *p;
1679 	unsigned int mmap_pages;
1680 	int ret;
1681 
1682 	if (!str)
1683 		return -EINVAL;
1684 
1685 	s = strdup(str);
1686 	if (!s)
1687 		return -ENOMEM;
1688 
1689 	p = strchr(s, ',');
1690 	if (p)
1691 		*p = '\0';
1692 
1693 	if (*s) {
1694 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1695 		if (ret)
1696 			goto out_free;
1697 		opts->mmap_pages = mmap_pages;
1698 	}
1699 
1700 	if (!p) {
1701 		ret = 0;
1702 		goto out_free;
1703 	}
1704 
1705 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1706 	if (ret)
1707 		goto out_free;
1708 
1709 	opts->auxtrace_mmap_pages = mmap_pages;
1710 
1711 out_free:
1712 	free(s);
1713 	return ret;
1714 }
1715 
1716 static void switch_output_size_warn(struct record *rec)
1717 {
1718 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1719 	struct switch_output *s = &rec->switch_output;
1720 
1721 	wakeup_size /= 2;
1722 
1723 	if (s->size < wakeup_size) {
1724 		char buf[100];
1725 
1726 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1727 		pr_warning("WARNING: switch-output data size lower than "
1728 			   "wakeup kernel buffer size (%s) "
1729 			   "expect bigger perf.data sizes\n", buf);
1730 	}
1731 }
1732 
1733 static int switch_output_setup(struct record *rec)
1734 {
1735 	struct switch_output *s = &rec->switch_output;
1736 	static struct parse_tag tags_size[] = {
1737 		{ .tag  = 'B', .mult = 1       },
1738 		{ .tag  = 'K', .mult = 1 << 10 },
1739 		{ .tag  = 'M', .mult = 1 << 20 },
1740 		{ .tag  = 'G', .mult = 1 << 30 },
1741 		{ .tag  = 0 },
1742 	};
1743 	static struct parse_tag tags_time[] = {
1744 		{ .tag  = 's', .mult = 1        },
1745 		{ .tag  = 'm', .mult = 60       },
1746 		{ .tag  = 'h', .mult = 60*60    },
1747 		{ .tag  = 'd', .mult = 60*60*24 },
1748 		{ .tag  = 0 },
1749 	};
1750 	unsigned long val;
1751 
1752 	if (!s->set)
1753 		return 0;
1754 
1755 	if (!strcmp(s->str, "signal")) {
1756 		s->signal = true;
1757 		pr_debug("switch-output with SIGUSR2 signal\n");
1758 		goto enabled;
1759 	}
1760 
1761 	val = parse_tag_value(s->str, tags_size);
1762 	if (val != (unsigned long) -1) {
1763 		s->size = val;
1764 		pr_debug("switch-output with %s size threshold\n", s->str);
1765 		goto enabled;
1766 	}
1767 
1768 	val = parse_tag_value(s->str, tags_time);
1769 	if (val != (unsigned long) -1) {
1770 		s->time = val;
1771 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1772 			 s->str, s->time);
1773 		goto enabled;
1774 	}
1775 
1776 	return -1;
1777 
1778 enabled:
1779 	rec->timestamp_filename = true;
1780 	s->enabled              = true;
1781 
1782 	if (s->size && !rec->opts.no_buffering)
1783 		switch_output_size_warn(rec);
1784 
1785 	return 0;
1786 }
1787 
1788 static const char * const __record_usage[] = {
1789 	"perf record [<options>] [<command>]",
1790 	"perf record [<options>] -- <command> [<options>]",
1791 	NULL
1792 };
1793 const char * const *record_usage = __record_usage;
1794 
1795 /*
1796  * XXX Ideally would be local to cmd_record() and passed to a record__new
1797  * because we need to have access to it in record__exit, that is called
1798  * after cmd_record() exits, but since record_options need to be accessible to
1799  * builtin-script, leave it here.
1800  *
1801  * At least we don't ouch it in all the other functions here directly.
1802  *
1803  * Just say no to tons of global variables, sigh.
1804  */
1805 static struct record record = {
1806 	.opts = {
1807 		.sample_time	     = true,
1808 		.mmap_pages	     = UINT_MAX,
1809 		.user_freq	     = UINT_MAX,
1810 		.user_interval	     = ULLONG_MAX,
1811 		.freq		     = 4000,
1812 		.target		     = {
1813 			.uses_mmap   = true,
1814 			.default_per_cpu = true,
1815 		},
1816 	},
1817 	.tool = {
1818 		.sample		= process_sample_event,
1819 		.fork		= perf_event__process_fork,
1820 		.exit		= perf_event__process_exit,
1821 		.comm		= perf_event__process_comm,
1822 		.namespaces	= perf_event__process_namespaces,
1823 		.mmap		= perf_event__process_mmap,
1824 		.mmap2		= perf_event__process_mmap2,
1825 		.ordered_events	= true,
1826 	},
1827 };
1828 
1829 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1830 	"\n\t\t\t\tDefault: fp";
1831 
1832 static bool dry_run;
1833 
1834 /*
1835  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1836  * with it and switch to use the library functions in perf_evlist that came
1837  * from builtin-record.c, i.e. use record_opts,
1838  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1839  * using pipes, etc.
1840  */
1841 static struct option __record_options[] = {
1842 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1843 		     "event selector. use 'perf list' to list available events",
1844 		     parse_events_option),
1845 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1846 		     "event filter", parse_filter),
1847 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1848 			   NULL, "don't record events from perf itself",
1849 			   exclude_perf),
1850 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1851 		    "record events on existing process id"),
1852 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1853 		    "record events on existing thread id"),
1854 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1855 		    "collect data with this RT SCHED_FIFO priority"),
1856 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1857 		    "collect data without buffering"),
1858 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1859 		    "collect raw sample records from all opened counters"),
1860 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1861 			    "system-wide collection from all CPUs"),
1862 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1863 		    "list of cpus to monitor"),
1864 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1865 	OPT_STRING('o', "output", &record.data.path, "file",
1866 		    "output file name"),
1867 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1868 			&record.opts.no_inherit_set,
1869 			"child tasks do not inherit counters"),
1870 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1871 		    "synthesize non-sample events at the end of output"),
1872 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1873 	OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"),
1874 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1875 		    "Fail if the specified frequency can't be used"),
1876 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1877 		     "profile at this frequency",
1878 		      record__parse_freq),
1879 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1880 		     "number of mmap data pages and AUX area tracing mmap pages",
1881 		     record__parse_mmap_pages),
1882 	OPT_BOOLEAN(0, "group", &record.opts.group,
1883 		    "put the counters into a counter group"),
1884 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1885 			   NULL, "enables call-graph recording" ,
1886 			   &record_callchain_opt),
1887 	OPT_CALLBACK(0, "call-graph", &record.opts,
1888 		     "record_mode[,record_size]", record_callchain_help,
1889 		     &record_parse_callchain_opt),
1890 	OPT_INCR('v', "verbose", &verbose,
1891 		    "be more verbose (show counter open errors, etc)"),
1892 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1893 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1894 		    "per thread counts"),
1895 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1896 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1897 		    "Record the sample physical addresses"),
1898 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1899 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1900 			&record.opts.sample_time_set,
1901 			"Record the sample timestamps"),
1902 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1903 			"Record the sample period"),
1904 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1905 		    "don't sample"),
1906 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1907 			&record.no_buildid_cache_set,
1908 			"do not update the buildid cache"),
1909 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1910 			&record.no_buildid_set,
1911 			"do not collect buildids in perf.data"),
1912 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1913 		     "monitor event in cgroup name only",
1914 		     parse_cgroups),
1915 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1916 		  "ms to wait before starting measurement after program start"),
1917 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1918 		   "user to profile"),
1919 
1920 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1921 		     "branch any", "sample any taken branches",
1922 		     parse_branch_stack),
1923 
1924 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1925 		     "branch filter mask", "branch stack filter modes",
1926 		     parse_branch_stack),
1927 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1928 		    "sample by weight (on special events only)"),
1929 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1930 		    "sample transaction flags (special events only)"),
1931 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1932 		    "use per-thread mmaps"),
1933 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1934 		    "sample selected machine registers on interrupt,"
1935 		    " use -I ? to list register names", parse_regs),
1936 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1937 		    "sample selected machine registers on interrupt,"
1938 		    " use -I ? to list register names", parse_regs),
1939 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1940 		    "Record running/enabled time of read (:S) events"),
1941 	OPT_CALLBACK('k', "clockid", &record.opts,
1942 	"clockid", "clockid to use for events, see clock_gettime()",
1943 	parse_clockid),
1944 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1945 			  "opts", "AUX area tracing Snapshot Mode", ""),
1946 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1947 			"per thread proc mmap processing timeout in ms"),
1948 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1949 		    "Record namespaces events"),
1950 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1951 		    "Record context switch events"),
1952 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1953 			 "Configure all used events to run in kernel space.",
1954 			 PARSE_OPT_EXCLUSIVE),
1955 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1956 			 "Configure all used events to run in user space.",
1957 			 PARSE_OPT_EXCLUSIVE),
1958 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1959 		   "clang binary to use for compiling BPF scriptlets"),
1960 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1961 		   "options passed to clang when compiling BPF scriptlets"),
1962 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1963 		   "file", "vmlinux pathname"),
1964 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1965 		    "Record build-id of all DSOs regardless of hits"),
1966 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1967 		    "append timestamp to output filename"),
1968 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1969 		    "Record timestamp boundary (time of first/last samples)"),
1970 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1971 			  &record.switch_output.set, "signal,size,time",
1972 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1973 			  "signal"),
1974 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1975 		    "Parse options then exit"),
1976 #ifdef HAVE_AIO_SUPPORT
1977 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
1978 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
1979 		     record__aio_parse),
1980 #endif
1981 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
1982 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
1983 		     record__parse_affinity),
1984 	OPT_END()
1985 };
1986 
1987 struct option *record_options = __record_options;
1988 
1989 int cmd_record(int argc, const char **argv)
1990 {
1991 	int err;
1992 	struct record *rec = &record;
1993 	char errbuf[BUFSIZ];
1994 
1995 	setlocale(LC_ALL, "");
1996 
1997 #ifndef HAVE_LIBBPF_SUPPORT
1998 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1999 	set_nobuild('\0', "clang-path", true);
2000 	set_nobuild('\0', "clang-opt", true);
2001 # undef set_nobuild
2002 #endif
2003 
2004 #ifndef HAVE_BPF_PROLOGUE
2005 # if !defined (HAVE_DWARF_SUPPORT)
2006 #  define REASON  "NO_DWARF=1"
2007 # elif !defined (HAVE_LIBBPF_SUPPORT)
2008 #  define REASON  "NO_LIBBPF=1"
2009 # else
2010 #  define REASON  "this architecture doesn't support BPF prologue"
2011 # endif
2012 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2013 	set_nobuild('\0', "vmlinux", true);
2014 # undef set_nobuild
2015 # undef REASON
2016 #endif
2017 
2018 	CPU_ZERO(&rec->affinity_mask);
2019 	rec->opts.affinity = PERF_AFFINITY_SYS;
2020 
2021 	rec->evlist = perf_evlist__new();
2022 	if (rec->evlist == NULL)
2023 		return -ENOMEM;
2024 
2025 	err = perf_config(perf_record_config, rec);
2026 	if (err)
2027 		return err;
2028 
2029 	argc = parse_options(argc, argv, record_options, record_usage,
2030 			    PARSE_OPT_STOP_AT_NON_OPTION);
2031 	if (quiet)
2032 		perf_quiet_option();
2033 
2034 	/* Make system wide (-a) the default target. */
2035 	if (!argc && target__none(&rec->opts.target))
2036 		rec->opts.target.system_wide = true;
2037 
2038 	if (nr_cgroups && !rec->opts.target.system_wide) {
2039 		usage_with_options_msg(record_usage, record_options,
2040 			"cgroup monitoring only available in system-wide mode");
2041 
2042 	}
2043 	if (rec->opts.record_switch_events &&
2044 	    !perf_can_record_switch_events()) {
2045 		ui__error("kernel does not support recording context switch events\n");
2046 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2047 		return -EINVAL;
2048 	}
2049 
2050 	if (switch_output_setup(rec)) {
2051 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2052 		return -EINVAL;
2053 	}
2054 
2055 	if (rec->switch_output.time) {
2056 		signal(SIGALRM, alarm_sig_handler);
2057 		alarm(rec->switch_output.time);
2058 	}
2059 
2060 	/*
2061 	 * Allow aliases to facilitate the lookup of symbols for address
2062 	 * filters. Refer to auxtrace_parse_filters().
2063 	 */
2064 	symbol_conf.allow_aliases = true;
2065 
2066 	symbol__init(NULL);
2067 
2068 	err = record__auxtrace_init(rec);
2069 	if (err)
2070 		goto out;
2071 
2072 	if (dry_run)
2073 		goto out;
2074 
2075 	err = bpf__setup_stdout(rec->evlist);
2076 	if (err) {
2077 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2078 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2079 			 errbuf);
2080 		goto out;
2081 	}
2082 
2083 	err = -ENOMEM;
2084 
2085 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2086 		pr_warning(
2087 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2088 "check /proc/sys/kernel/kptr_restrict.\n\n"
2089 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2090 "file is not found in the buildid cache or in the vmlinux path.\n\n"
2091 "Samples in kernel modules won't be resolved at all.\n\n"
2092 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2093 "even with a suitable vmlinux or kallsyms file.\n\n");
2094 
2095 	if (rec->no_buildid_cache || rec->no_buildid) {
2096 		disable_buildid_cache();
2097 	} else if (rec->switch_output.enabled) {
2098 		/*
2099 		 * In 'perf record --switch-output', disable buildid
2100 		 * generation by default to reduce data file switching
2101 		 * overhead. Still generate buildid if they are required
2102 		 * explicitly using
2103 		 *
2104 		 *  perf record --switch-output --no-no-buildid \
2105 		 *              --no-no-buildid-cache
2106 		 *
2107 		 * Following code equals to:
2108 		 *
2109 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2110 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2111 		 *         disable_buildid_cache();
2112 		 */
2113 		bool disable = true;
2114 
2115 		if (rec->no_buildid_set && !rec->no_buildid)
2116 			disable = false;
2117 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2118 			disable = false;
2119 		if (disable) {
2120 			rec->no_buildid = true;
2121 			rec->no_buildid_cache = true;
2122 			disable_buildid_cache();
2123 		}
2124 	}
2125 
2126 	if (record.opts.overwrite)
2127 		record.opts.tail_synthesize = true;
2128 
2129 	if (rec->evlist->nr_entries == 0 &&
2130 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2131 		pr_err("Not enough memory for event selector list\n");
2132 		goto out;
2133 	}
2134 
2135 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2136 		rec->opts.no_inherit = true;
2137 
2138 	err = target__validate(&rec->opts.target);
2139 	if (err) {
2140 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2141 		ui__warning("%s\n", errbuf);
2142 	}
2143 
2144 	err = target__parse_uid(&rec->opts.target);
2145 	if (err) {
2146 		int saved_errno = errno;
2147 
2148 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2149 		ui__error("%s", errbuf);
2150 
2151 		err = -saved_errno;
2152 		goto out;
2153 	}
2154 
2155 	/* Enable ignoring missing threads when -u/-p option is defined. */
2156 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2157 
2158 	err = -ENOMEM;
2159 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2160 		usage_with_options(record_usage, record_options);
2161 
2162 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2163 	if (err)
2164 		goto out;
2165 
2166 	/*
2167 	 * We take all buildids when the file contains
2168 	 * AUX area tracing data because we do not decode the
2169 	 * trace because it would take too long.
2170 	 */
2171 	if (rec->opts.full_auxtrace)
2172 		rec->buildid_all = true;
2173 
2174 	if (record_opts__config(&rec->opts)) {
2175 		err = -EINVAL;
2176 		goto out;
2177 	}
2178 
2179 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2180 		rec->opts.nr_cblocks = nr_cblocks_max;
2181 	if (verbose > 0)
2182 		pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2183 
2184 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2185 
2186 	err = __cmd_record(&record, argc, argv);
2187 out:
2188 	perf_evlist__delete(rec->evlist);
2189 	symbol__exit();
2190 	auxtrace_record__free(rec->itr);
2191 	return err;
2192 }
2193 
2194 static void snapshot_sig_handler(int sig __maybe_unused)
2195 {
2196 	struct record *rec = &record;
2197 
2198 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2199 		trigger_hit(&auxtrace_snapshot_trigger);
2200 		auxtrace_record__snapshot_started = 1;
2201 		if (auxtrace_record__snapshot_start(record.itr))
2202 			trigger_error(&auxtrace_snapshot_trigger);
2203 	}
2204 
2205 	if (switch_output_signal(rec))
2206 		trigger_hit(&switch_output_trigger);
2207 }
2208 
2209 static void alarm_sig_handler(int sig __maybe_unused)
2210 {
2211 	struct record *rec = &record;
2212 
2213 	if (switch_output_time(rec))
2214 		trigger_hit(&switch_output_trigger);
2215 }
2216