xref: /openbmc/linux/tools/perf/builtin-record.c (revision 483eb062)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45 
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48 	if (__on_exit_count == ATEXIT_MAX)
49 		return -ENOMEM;
50 	else if (__on_exit_count == 0)
51 		atexit(__handle_on_exit_funcs);
52 	__on_exit_funcs[__on_exit_count] = function;
53 	__on_exit_args[__on_exit_count++] = arg;
54 	return 0;
55 }
56 
57 static void __handle_on_exit_funcs(void)
58 {
59 	int i;
60 	for (i = 0; i < __on_exit_count; i++)
61 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64 
65 struct record {
66 	struct perf_tool	tool;
67 	struct record_opts	opts;
68 	u64			bytes_written;
69 	struct perf_data_file	file;
70 	struct perf_evlist	*evlist;
71 	struct perf_session	*session;
72 	const char		*progname;
73 	int			realtime_prio;
74 	bool			no_buildid;
75 	bool			no_buildid_cache;
76 	long			samples;
77 };
78 
79 static int record__write(struct record *rec, void *bf, size_t size)
80 {
81 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82 		pr_err("failed to write perf data, error: %m\n");
83 		return -1;
84 	}
85 
86 	rec->bytes_written += size;
87 	return 0;
88 }
89 
90 static int process_synthesized_event(struct perf_tool *tool,
91 				     union perf_event *event,
92 				     struct perf_sample *sample __maybe_unused,
93 				     struct machine *machine __maybe_unused)
94 {
95 	struct record *rec = container_of(tool, struct record, tool);
96 	return record__write(rec, event, event->header.size);
97 }
98 
99 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100 {
101 	unsigned int head = perf_mmap__read_head(md);
102 	unsigned int old = md->prev;
103 	unsigned char *data = md->base + page_size;
104 	unsigned long size;
105 	void *buf;
106 	int rc = 0;
107 
108 	if (old == head)
109 		return 0;
110 
111 	rec->samples++;
112 
113 	size = head - old;
114 
115 	if ((old & md->mask) + size != (head & md->mask)) {
116 		buf = &data[old & md->mask];
117 		size = md->mask + 1 - (old & md->mask);
118 		old += size;
119 
120 		if (record__write(rec, buf, size) < 0) {
121 			rc = -1;
122 			goto out;
123 		}
124 	}
125 
126 	buf = &data[old & md->mask];
127 	size = head - old;
128 	old += size;
129 
130 	if (record__write(rec, buf, size) < 0) {
131 		rc = -1;
132 		goto out;
133 	}
134 
135 	md->prev = old;
136 	perf_mmap__write_tail(md, old);
137 
138 out:
139 	return rc;
140 }
141 
142 static volatile int done = 0;
143 static volatile int signr = -1;
144 static volatile int child_finished = 0;
145 
146 static void sig_handler(int sig)
147 {
148 	if (sig == SIGCHLD)
149 		child_finished = 1;
150 
151 	done = 1;
152 	signr = sig;
153 }
154 
155 static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156 {
157 	struct record *rec = arg;
158 	int status;
159 
160 	if (rec->evlist->workload.pid > 0) {
161 		if (!child_finished)
162 			kill(rec->evlist->workload.pid, SIGTERM);
163 
164 		wait(&status);
165 		if (WIFSIGNALED(status))
166 			psignal(WTERMSIG(status), rec->progname);
167 	}
168 
169 	if (signr == -1 || signr == SIGUSR1)
170 		return;
171 
172 	signal(signr, SIG_DFL);
173 }
174 
175 static int record__open(struct record *rec)
176 {
177 	char msg[512];
178 	struct perf_evsel *pos;
179 	struct perf_evlist *evlist = rec->evlist;
180 	struct perf_session *session = rec->session;
181 	struct record_opts *opts = &rec->opts;
182 	int rc = 0;
183 
184 	perf_evlist__config(evlist, opts);
185 
186 	evlist__for_each(evlist, pos) {
187 try_again:
188 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190 				if (verbose)
191 					ui__warning("%s\n", msg);
192 				goto try_again;
193 			}
194 
195 			rc = -errno;
196 			perf_evsel__open_strerror(pos, &opts->target,
197 						  errno, msg, sizeof(msg));
198 			ui__error("%s\n", msg);
199 			goto out;
200 		}
201 	}
202 
203 	if (perf_evlist__apply_filters(evlist)) {
204 		error("failed to set filter with %d (%s)\n", errno,
205 			strerror(errno));
206 		rc = -1;
207 		goto out;
208 	}
209 
210 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211 		if (errno == EPERM) {
212 			pr_err("Permission error mapping pages.\n"
213 			       "Consider increasing "
214 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
215 			       "or try again with a smaller value of -m/--mmap_pages.\n"
216 			       "(current value: %u)\n", opts->mmap_pages);
217 			rc = -errno;
218 		} else {
219 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220 			rc = -errno;
221 		}
222 		goto out;
223 	}
224 
225 	session->evlist = evlist;
226 	perf_session__set_id_hdr_size(session);
227 out:
228 	return rc;
229 }
230 
231 static int process_buildids(struct record *rec)
232 {
233 	struct perf_data_file *file  = &rec->file;
234 	struct perf_session *session = rec->session;
235 	u64 start = session->header.data_offset;
236 
237 	u64 size = lseek(file->fd, 0, SEEK_CUR);
238 	if (size == 0)
239 		return 0;
240 
241 	return __perf_session__process_events(session, start,
242 					      size - start,
243 					      size, &build_id__mark_dso_hit_ops);
244 }
245 
246 static void record__exit(int status, void *arg)
247 {
248 	struct record *rec = arg;
249 	struct perf_data_file *file = &rec->file;
250 
251 	if (status != 0)
252 		return;
253 
254 	if (!file->is_pipe) {
255 		rec->session->header.data_size += rec->bytes_written;
256 
257 		if (!rec->no_buildid)
258 			process_buildids(rec);
259 		perf_session__write_header(rec->session, rec->evlist,
260 					   file->fd, true);
261 		perf_session__delete(rec->session);
262 		perf_evlist__delete(rec->evlist);
263 		symbol__exit();
264 	}
265 }
266 
267 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268 {
269 	int err;
270 	struct perf_tool *tool = data;
271 	/*
272 	 *As for guest kernel when processing subcommand record&report,
273 	 *we arrange module mmap prior to guest kernel mmap and trigger
274 	 *a preload dso because default guest module symbols are loaded
275 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276 	 *method is used to avoid symbol missing when the first addr is
277 	 *in module instead of in guest kernel.
278 	 */
279 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
280 					     machine);
281 	if (err < 0)
282 		pr_err("Couldn't record guest kernel [%d]'s reference"
283 		       " relocation symbol.\n", machine->pid);
284 
285 	/*
286 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287 	 * have no _text sometimes.
288 	 */
289 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290 						 machine, "_text");
291 	if (err < 0)
292 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
293 							 machine, "_stext");
294 	if (err < 0)
295 		pr_err("Couldn't record guest kernel [%d]'s reference"
296 		       " relocation symbol.\n", machine->pid);
297 }
298 
299 static struct perf_event_header finished_round_event = {
300 	.size = sizeof(struct perf_event_header),
301 	.type = PERF_RECORD_FINISHED_ROUND,
302 };
303 
304 static int record__mmap_read_all(struct record *rec)
305 {
306 	int i;
307 	int rc = 0;
308 
309 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
310 		if (rec->evlist->mmap[i].base) {
311 			if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
312 				rc = -1;
313 				goto out;
314 			}
315 		}
316 	}
317 
318 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
319 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
320 
321 out:
322 	return rc;
323 }
324 
325 static void record__init_features(struct record *rec)
326 {
327 	struct perf_session *session = rec->session;
328 	int feat;
329 
330 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
331 		perf_header__set_feat(&session->header, feat);
332 
333 	if (rec->no_buildid)
334 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
335 
336 	if (!have_tracepoints(&rec->evlist->entries))
337 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
338 
339 	if (!rec->opts.branch_stack)
340 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
341 }
342 
343 static volatile int workload_exec_errno;
344 
345 /*
346  * perf_evlist__prepare_workload will send a SIGUSR1
347  * if the fork fails, since we asked by setting its
348  * want_signal to true.
349  */
350 static void workload_exec_failed_signal(int signo, siginfo_t *info,
351 					void *ucontext __maybe_unused)
352 {
353 	workload_exec_errno = info->si_value.sival_int;
354 	done = 1;
355 	signr = signo;
356 	child_finished = 1;
357 }
358 
359 static int __cmd_record(struct record *rec, int argc, const char **argv)
360 {
361 	int err;
362 	unsigned long waking = 0;
363 	const bool forks = argc > 0;
364 	struct machine *machine;
365 	struct perf_tool *tool = &rec->tool;
366 	struct record_opts *opts = &rec->opts;
367 	struct perf_data_file *file = &rec->file;
368 	struct perf_session *session;
369 	bool disabled = false;
370 
371 	rec->progname = argv[0];
372 
373 	on_exit(record__sig_exit, rec);
374 	signal(SIGCHLD, sig_handler);
375 	signal(SIGINT, sig_handler);
376 	signal(SIGTERM, sig_handler);
377 
378 	session = perf_session__new(file, false, NULL);
379 	if (session == NULL) {
380 		pr_err("Not enough memory for reading perf file header\n");
381 		return -1;
382 	}
383 
384 	rec->session = session;
385 
386 	record__init_features(rec);
387 
388 	if (forks) {
389 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
390 						    argv, file->is_pipe,
391 						    workload_exec_failed_signal);
392 		if (err < 0) {
393 			pr_err("Couldn't run the workload!\n");
394 			goto out_delete_session;
395 		}
396 	}
397 
398 	if (record__open(rec) != 0) {
399 		err = -1;
400 		goto out_delete_session;
401 	}
402 
403 	if (!rec->evlist->nr_groups)
404 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
405 
406 	/*
407 	 * perf_session__delete(session) will be called at record__exit()
408 	 */
409 	on_exit(record__exit, rec);
410 
411 	if (file->is_pipe) {
412 		err = perf_header__write_pipe(file->fd);
413 		if (err < 0)
414 			goto out_delete_session;
415 	} else {
416 		err = perf_session__write_header(session, rec->evlist,
417 						 file->fd, false);
418 		if (err < 0)
419 			goto out_delete_session;
420 	}
421 
422 	if (!rec->no_buildid
423 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
424 		pr_err("Couldn't generate buildids. "
425 		       "Use --no-buildid to profile anyway.\n");
426 		err = -1;
427 		goto out_delete_session;
428 	}
429 
430 	machine = &session->machines.host;
431 
432 	if (file->is_pipe) {
433 		err = perf_event__synthesize_attrs(tool, session,
434 						   process_synthesized_event);
435 		if (err < 0) {
436 			pr_err("Couldn't synthesize attrs.\n");
437 			goto out_delete_session;
438 		}
439 
440 		if (have_tracepoints(&rec->evlist->entries)) {
441 			/*
442 			 * FIXME err <= 0 here actually means that
443 			 * there were no tracepoints so its not really
444 			 * an error, just that we don't need to
445 			 * synthesize anything.  We really have to
446 			 * return this more properly and also
447 			 * propagate errors that now are calling die()
448 			 */
449 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
450 								  process_synthesized_event);
451 			if (err <= 0) {
452 				pr_err("Couldn't record tracing data.\n");
453 				goto out_delete_session;
454 			}
455 			rec->bytes_written += err;
456 		}
457 	}
458 
459 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
460 						 machine, "_text");
461 	if (err < 0)
462 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
463 							 machine, "_stext");
464 	if (err < 0)
465 		pr_err("Couldn't record kernel reference relocation symbol\n"
466 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
467 		       "Check /proc/kallsyms permission or run as root.\n");
468 
469 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
470 					     machine);
471 	if (err < 0)
472 		pr_err("Couldn't record kernel module information.\n"
473 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
474 		       "Check /proc/modules permission or run as root.\n");
475 
476 	if (perf_guest) {
477 		machines__process_guests(&session->machines,
478 					 perf_event__synthesize_guest_os, tool);
479 	}
480 
481 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
482 					    process_synthesized_event, opts->sample_address);
483 	if (err != 0)
484 		goto out_delete_session;
485 
486 	if (rec->realtime_prio) {
487 		struct sched_param param;
488 
489 		param.sched_priority = rec->realtime_prio;
490 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
491 			pr_err("Could not set realtime priority.\n");
492 			err = -1;
493 			goto out_delete_session;
494 		}
495 	}
496 
497 	/*
498 	 * When perf is starting the traced process, all the events
499 	 * (apart from group members) have enable_on_exec=1 set,
500 	 * so don't spoil it by prematurely enabling them.
501 	 */
502 	if (!target__none(&opts->target) && !opts->initial_delay)
503 		perf_evlist__enable(rec->evlist);
504 
505 	/*
506 	 * Let the child rip
507 	 */
508 	if (forks)
509 		perf_evlist__start_workload(rec->evlist);
510 
511 	if (opts->initial_delay) {
512 		usleep(opts->initial_delay * 1000);
513 		perf_evlist__enable(rec->evlist);
514 	}
515 
516 	for (;;) {
517 		int hits = rec->samples;
518 
519 		if (record__mmap_read_all(rec) < 0) {
520 			err = -1;
521 			goto out_delete_session;
522 		}
523 
524 		if (hits == rec->samples) {
525 			if (done)
526 				break;
527 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
528 			waking++;
529 		}
530 
531 		/*
532 		 * When perf is starting the traced process, at the end events
533 		 * die with the process and we wait for that. Thus no need to
534 		 * disable events in this case.
535 		 */
536 		if (done && !disabled && !target__none(&opts->target)) {
537 			perf_evlist__disable(rec->evlist);
538 			disabled = true;
539 		}
540 	}
541 
542 	if (forks && workload_exec_errno) {
543 		char msg[512];
544 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
545 		pr_err("Workload failed: %s\n", emsg);
546 		err = -1;
547 		goto out_delete_session;
548 	}
549 
550 	if (quiet || signr == SIGUSR1)
551 		return 0;
552 
553 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
554 
555 	/*
556 	 * Approximate RIP event size: 24 bytes.
557 	 */
558 	fprintf(stderr,
559 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
560 		(double)rec->bytes_written / 1024.0 / 1024.0,
561 		file->path,
562 		rec->bytes_written / 24);
563 
564 	return 0;
565 
566 out_delete_session:
567 	perf_session__delete(session);
568 	return err;
569 }
570 
571 #define BRANCH_OPT(n, m) \
572 	{ .name = n, .mode = (m) }
573 
574 #define BRANCH_END { .name = NULL }
575 
576 struct branch_mode {
577 	const char *name;
578 	int mode;
579 };
580 
581 static const struct branch_mode branch_modes[] = {
582 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
583 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
584 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
585 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
586 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
587 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
588 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
589 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
590 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
591 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
592 	BRANCH_END
593 };
594 
595 static int
596 parse_branch_stack(const struct option *opt, const char *str, int unset)
597 {
598 #define ONLY_PLM \
599 	(PERF_SAMPLE_BRANCH_USER	|\
600 	 PERF_SAMPLE_BRANCH_KERNEL	|\
601 	 PERF_SAMPLE_BRANCH_HV)
602 
603 	uint64_t *mode = (uint64_t *)opt->value;
604 	const struct branch_mode *br;
605 	char *s, *os = NULL, *p;
606 	int ret = -1;
607 
608 	if (unset)
609 		return 0;
610 
611 	/*
612 	 * cannot set it twice, -b + --branch-filter for instance
613 	 */
614 	if (*mode)
615 		return -1;
616 
617 	/* str may be NULL in case no arg is passed to -b */
618 	if (str) {
619 		/* because str is read-only */
620 		s = os = strdup(str);
621 		if (!s)
622 			return -1;
623 
624 		for (;;) {
625 			p = strchr(s, ',');
626 			if (p)
627 				*p = '\0';
628 
629 			for (br = branch_modes; br->name; br++) {
630 				if (!strcasecmp(s, br->name))
631 					break;
632 			}
633 			if (!br->name) {
634 				ui__warning("unknown branch filter %s,"
635 					    " check man page\n", s);
636 				goto error;
637 			}
638 
639 			*mode |= br->mode;
640 
641 			if (!p)
642 				break;
643 
644 			s = p + 1;
645 		}
646 	}
647 	ret = 0;
648 
649 	/* default to any branch */
650 	if ((*mode & ~ONLY_PLM) == 0) {
651 		*mode = PERF_SAMPLE_BRANCH_ANY;
652 	}
653 error:
654 	free(os);
655 	return ret;
656 }
657 
658 #ifdef HAVE_LIBUNWIND_SUPPORT
659 static int get_stack_size(char *str, unsigned long *_size)
660 {
661 	char *endptr;
662 	unsigned long size;
663 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
664 
665 	size = strtoul(str, &endptr, 0);
666 
667 	do {
668 		if (*endptr)
669 			break;
670 
671 		size = round_up(size, sizeof(u64));
672 		if (!size || size > max_size)
673 			break;
674 
675 		*_size = size;
676 		return 0;
677 
678 	} while (0);
679 
680 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
681 	       max_size, str);
682 	return -1;
683 }
684 #endif /* HAVE_LIBUNWIND_SUPPORT */
685 
686 int record_parse_callchain(const char *arg, struct record_opts *opts)
687 {
688 	char *tok, *name, *saveptr = NULL;
689 	char *buf;
690 	int ret = -1;
691 
692 	/* We need buffer that we know we can write to. */
693 	buf = malloc(strlen(arg) + 1);
694 	if (!buf)
695 		return -ENOMEM;
696 
697 	strcpy(buf, arg);
698 
699 	tok = strtok_r((char *)buf, ",", &saveptr);
700 	name = tok ? : (char *)buf;
701 
702 	do {
703 		/* Framepointer style */
704 		if (!strncmp(name, "fp", sizeof("fp"))) {
705 			if (!strtok_r(NULL, ",", &saveptr)) {
706 				opts->call_graph = CALLCHAIN_FP;
707 				ret = 0;
708 			} else
709 				pr_err("callchain: No more arguments "
710 				       "needed for -g fp\n");
711 			break;
712 
713 #ifdef HAVE_LIBUNWIND_SUPPORT
714 		/* Dwarf style */
715 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
716 			const unsigned long default_stack_dump_size = 8192;
717 
718 			ret = 0;
719 			opts->call_graph = CALLCHAIN_DWARF;
720 			opts->stack_dump_size = default_stack_dump_size;
721 
722 			tok = strtok_r(NULL, ",", &saveptr);
723 			if (tok) {
724 				unsigned long size = 0;
725 
726 				ret = get_stack_size(tok, &size);
727 				opts->stack_dump_size = size;
728 			}
729 #endif /* HAVE_LIBUNWIND_SUPPORT */
730 		} else {
731 			pr_err("callchain: Unknown --call-graph option "
732 			       "value: %s\n", arg);
733 			break;
734 		}
735 
736 	} while (0);
737 
738 	free(buf);
739 	return ret;
740 }
741 
742 static void callchain_debug(struct record_opts *opts)
743 {
744 	pr_debug("callchain: type %d\n", opts->call_graph);
745 
746 	if (opts->call_graph == CALLCHAIN_DWARF)
747 		pr_debug("callchain: stack dump size %d\n",
748 			 opts->stack_dump_size);
749 }
750 
751 int record_parse_callchain_opt(const struct option *opt,
752 			       const char *arg,
753 			       int unset)
754 {
755 	struct record_opts *opts = opt->value;
756 	int ret;
757 
758 	/* --no-call-graph */
759 	if (unset) {
760 		opts->call_graph = CALLCHAIN_NONE;
761 		pr_debug("callchain: disabled\n");
762 		return 0;
763 	}
764 
765 	ret = record_parse_callchain(arg, opts);
766 	if (!ret)
767 		callchain_debug(opts);
768 
769 	return ret;
770 }
771 
772 int record_callchain_opt(const struct option *opt,
773 			 const char *arg __maybe_unused,
774 			 int unset __maybe_unused)
775 {
776 	struct record_opts *opts = opt->value;
777 
778 	if (opts->call_graph == CALLCHAIN_NONE)
779 		opts->call_graph = CALLCHAIN_FP;
780 
781 	callchain_debug(opts);
782 	return 0;
783 }
784 
785 static const char * const record_usage[] = {
786 	"perf record [<options>] [<command>]",
787 	"perf record [<options>] -- <command> [<options>]",
788 	NULL
789 };
790 
791 /*
792  * XXX Ideally would be local to cmd_record() and passed to a record__new
793  * because we need to have access to it in record__exit, that is called
794  * after cmd_record() exits, but since record_options need to be accessible to
795  * builtin-script, leave it here.
796  *
797  * At least we don't ouch it in all the other functions here directly.
798  *
799  * Just say no to tons of global variables, sigh.
800  */
801 static struct record record = {
802 	.opts = {
803 		.mmap_pages	     = UINT_MAX,
804 		.user_freq	     = UINT_MAX,
805 		.user_interval	     = ULLONG_MAX,
806 		.freq		     = 4000,
807 		.target		     = {
808 			.uses_mmap   = true,
809 			.default_per_cpu = true,
810 		},
811 	},
812 };
813 
814 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
815 
816 #ifdef HAVE_LIBUNWIND_SUPPORT
817 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
818 #else
819 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
820 #endif
821 
822 /*
823  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
824  * with it and switch to use the library functions in perf_evlist that came
825  * from builtin-record.c, i.e. use record_opts,
826  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
827  * using pipes, etc.
828  */
829 const struct option record_options[] = {
830 	OPT_CALLBACK('e', "event", &record.evlist, "event",
831 		     "event selector. use 'perf list' to list available events",
832 		     parse_events_option),
833 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
834 		     "event filter", parse_filter),
835 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
836 		    "record events on existing process id"),
837 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
838 		    "record events on existing thread id"),
839 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
840 		    "collect data with this RT SCHED_FIFO priority"),
841 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
842 		    "collect data without buffering"),
843 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
844 		    "collect raw sample records from all opened counters"),
845 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
846 			    "system-wide collection from all CPUs"),
847 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
848 		    "list of cpus to monitor"),
849 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
850 	OPT_STRING('o', "output", &record.file.path, "file",
851 		    "output file name"),
852 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
853 			&record.opts.no_inherit_set,
854 			"child tasks do not inherit counters"),
855 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
856 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
857 		     "number of mmap data pages",
858 		     perf_evlist__parse_mmap_pages),
859 	OPT_BOOLEAN(0, "group", &record.opts.group,
860 		    "put the counters into a counter group"),
861 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
862 			   NULL, "enables call-graph recording" ,
863 			   &record_callchain_opt),
864 	OPT_CALLBACK(0, "call-graph", &record.opts,
865 		     "mode[,dump_size]", record_callchain_help,
866 		     &record_parse_callchain_opt),
867 	OPT_INCR('v', "verbose", &verbose,
868 		    "be more verbose (show counter open errors, etc)"),
869 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
870 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
871 		    "per thread counts"),
872 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
873 		    "Sample addresses"),
874 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
875 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
876 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
877 		    "don't sample"),
878 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
879 		    "do not update the buildid cache"),
880 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
881 		    "do not collect buildids in perf.data"),
882 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
883 		     "monitor event in cgroup name only",
884 		     parse_cgroups),
885 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
886 		  "ms to wait before starting measurement after program start"),
887 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
888 		   "user to profile"),
889 
890 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
891 		     "branch any", "sample any taken branches",
892 		     parse_branch_stack),
893 
894 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
895 		     "branch filter mask", "branch stack filter modes",
896 		     parse_branch_stack),
897 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
898 		    "sample by weight (on special events only)"),
899 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
900 		    "sample transaction flags (special events only)"),
901 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
902 		    "use per-thread mmaps"),
903 	OPT_END()
904 };
905 
906 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
907 {
908 	int err = -ENOMEM;
909 	struct record *rec = &record;
910 	char errbuf[BUFSIZ];
911 
912 	rec->evlist = perf_evlist__new();
913 	if (rec->evlist == NULL)
914 		return -ENOMEM;
915 
916 	argc = parse_options(argc, argv, record_options, record_usage,
917 			    PARSE_OPT_STOP_AT_NON_OPTION);
918 	if (!argc && target__none(&rec->opts.target))
919 		usage_with_options(record_usage, record_options);
920 
921 	if (nr_cgroups && !rec->opts.target.system_wide) {
922 		ui__error("cgroup monitoring only available in"
923 			  " system-wide mode\n");
924 		usage_with_options(record_usage, record_options);
925 	}
926 
927 	symbol__init();
928 
929 	if (symbol_conf.kptr_restrict)
930 		pr_warning(
931 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
932 "check /proc/sys/kernel/kptr_restrict.\n\n"
933 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
934 "file is not found in the buildid cache or in the vmlinux path.\n\n"
935 "Samples in kernel modules won't be resolved at all.\n\n"
936 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
937 "even with a suitable vmlinux or kallsyms file.\n\n");
938 
939 	if (rec->no_buildid_cache || rec->no_buildid)
940 		disable_buildid_cache();
941 
942 	if (rec->evlist->nr_entries == 0 &&
943 	    perf_evlist__add_default(rec->evlist) < 0) {
944 		pr_err("Not enough memory for event selector list\n");
945 		goto out_symbol_exit;
946 	}
947 
948 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
949 		rec->opts.no_inherit = true;
950 
951 	err = target__validate(&rec->opts.target);
952 	if (err) {
953 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
954 		ui__warning("%s", errbuf);
955 	}
956 
957 	err = target__parse_uid(&rec->opts.target);
958 	if (err) {
959 		int saved_errno = errno;
960 
961 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
962 		ui__error("%s", errbuf);
963 
964 		err = -saved_errno;
965 		goto out_symbol_exit;
966 	}
967 
968 	err = -ENOMEM;
969 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
970 		usage_with_options(record_usage, record_options);
971 
972 	if (record_opts__config(&rec->opts)) {
973 		err = -EINVAL;
974 		goto out_symbol_exit;
975 	}
976 
977 	err = __cmd_record(&record, argc, argv);
978 out_symbol_exit:
979 	symbol__exit();
980 	return err;
981 }
982