xref: /openbmc/linux/tools/perf/builtin-record.c (revision bb66fc67)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45 
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48 	if (__on_exit_count == ATEXIT_MAX)
49 		return -ENOMEM;
50 	else if (__on_exit_count == 0)
51 		atexit(__handle_on_exit_funcs);
52 	__on_exit_funcs[__on_exit_count] = function;
53 	__on_exit_args[__on_exit_count++] = arg;
54 	return 0;
55 }
56 
57 static void __handle_on_exit_funcs(void)
58 {
59 	int i;
60 	for (i = 0; i < __on_exit_count; i++)
61 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64 
65 struct record {
66 	struct perf_tool	tool;
67 	struct record_opts	opts;
68 	u64			bytes_written;
69 	struct perf_data_file	file;
70 	struct perf_evlist	*evlist;
71 	struct perf_session	*session;
72 	const char		*progname;
73 	int			realtime_prio;
74 	bool			no_buildid;
75 	bool			no_buildid_cache;
76 	long			samples;
77 };
78 
79 static int record__write(struct record *rec, void *bf, size_t size)
80 {
81 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
82 		pr_err("failed to write perf data, error: %m\n");
83 		return -1;
84 	}
85 
86 	rec->bytes_written += size;
87 	return 0;
88 }
89 
90 static int process_synthesized_event(struct perf_tool *tool,
91 				     union perf_event *event,
92 				     struct perf_sample *sample __maybe_unused,
93 				     struct machine *machine __maybe_unused)
94 {
95 	struct record *rec = container_of(tool, struct record, tool);
96 	return record__write(rec, event, event->header.size);
97 }
98 
99 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
100 {
101 	unsigned int head = perf_mmap__read_head(md);
102 	unsigned int old = md->prev;
103 	unsigned char *data = md->base + page_size;
104 	unsigned long size;
105 	void *buf;
106 	int rc = 0;
107 
108 	if (old == head)
109 		return 0;
110 
111 	rec->samples++;
112 
113 	size = head - old;
114 
115 	if ((old & md->mask) + size != (head & md->mask)) {
116 		buf = &data[old & md->mask];
117 		size = md->mask + 1 - (old & md->mask);
118 		old += size;
119 
120 		if (record__write(rec, buf, size) < 0) {
121 			rc = -1;
122 			goto out;
123 		}
124 	}
125 
126 	buf = &data[old & md->mask];
127 	size = head - old;
128 	old += size;
129 
130 	if (record__write(rec, buf, size) < 0) {
131 		rc = -1;
132 		goto out;
133 	}
134 
135 	md->prev = old;
136 	perf_mmap__write_tail(md, old);
137 
138 out:
139 	return rc;
140 }
141 
142 static volatile int done = 0;
143 static volatile int signr = -1;
144 static volatile int child_finished = 0;
145 
146 static void sig_handler(int sig)
147 {
148 	if (sig == SIGCHLD)
149 		child_finished = 1;
150 
151 	done = 1;
152 	signr = sig;
153 }
154 
155 static void record__sig_exit(int exit_status __maybe_unused, void *arg)
156 {
157 	struct record *rec = arg;
158 	int status;
159 
160 	if (rec->evlist->workload.pid > 0) {
161 		if (!child_finished)
162 			kill(rec->evlist->workload.pid, SIGTERM);
163 
164 		wait(&status);
165 		if (WIFSIGNALED(status))
166 			psignal(WTERMSIG(status), rec->progname);
167 	}
168 
169 	if (signr == -1 || signr == SIGUSR1)
170 		return;
171 
172 	signal(signr, SIG_DFL);
173 }
174 
175 static int record__open(struct record *rec)
176 {
177 	char msg[512];
178 	struct perf_evsel *pos;
179 	struct perf_evlist *evlist = rec->evlist;
180 	struct perf_session *session = rec->session;
181 	struct record_opts *opts = &rec->opts;
182 	int rc = 0;
183 
184 	perf_evlist__config(evlist, opts);
185 
186 	evlist__for_each(evlist, pos) {
187 try_again:
188 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
189 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
190 				if (verbose)
191 					ui__warning("%s\n", msg);
192 				goto try_again;
193 			}
194 
195 			rc = -errno;
196 			perf_evsel__open_strerror(pos, &opts->target,
197 						  errno, msg, sizeof(msg));
198 			ui__error("%s\n", msg);
199 			goto out;
200 		}
201 	}
202 
203 	if (perf_evlist__apply_filters(evlist)) {
204 		error("failed to set filter with %d (%s)\n", errno,
205 			strerror(errno));
206 		rc = -1;
207 		goto out;
208 	}
209 
210 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
211 		if (errno == EPERM) {
212 			pr_err("Permission error mapping pages.\n"
213 			       "Consider increasing "
214 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
215 			       "or try again with a smaller value of -m/--mmap_pages.\n"
216 			       "(current value: %u)\n", opts->mmap_pages);
217 			rc = -errno;
218 		} else {
219 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
220 			rc = -errno;
221 		}
222 		goto out;
223 	}
224 
225 	session->evlist = evlist;
226 	perf_session__set_id_hdr_size(session);
227 out:
228 	return rc;
229 }
230 
231 static int process_buildids(struct record *rec)
232 {
233 	struct perf_data_file *file  = &rec->file;
234 	struct perf_session *session = rec->session;
235 	u64 start = session->header.data_offset;
236 
237 	u64 size = lseek(file->fd, 0, SEEK_CUR);
238 	if (size == 0)
239 		return 0;
240 
241 	return __perf_session__process_events(session, start,
242 					      size - start,
243 					      size, &build_id__mark_dso_hit_ops);
244 }
245 
246 static void record__exit(int status, void *arg)
247 {
248 	struct record *rec = arg;
249 	struct perf_data_file *file = &rec->file;
250 
251 	if (status != 0)
252 		return;
253 
254 	if (!file->is_pipe) {
255 		rec->session->header.data_size += rec->bytes_written;
256 
257 		if (!rec->no_buildid)
258 			process_buildids(rec);
259 		perf_session__write_header(rec->session, rec->evlist,
260 					   file->fd, true);
261 		perf_session__delete(rec->session);
262 		perf_evlist__delete(rec->evlist);
263 		symbol__exit();
264 	}
265 }
266 
267 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
268 {
269 	int err;
270 	struct perf_tool *tool = data;
271 	/*
272 	 *As for guest kernel when processing subcommand record&report,
273 	 *we arrange module mmap prior to guest kernel mmap and trigger
274 	 *a preload dso because default guest module symbols are loaded
275 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
276 	 *method is used to avoid symbol missing when the first addr is
277 	 *in module instead of in guest kernel.
278 	 */
279 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
280 					     machine);
281 	if (err < 0)
282 		pr_err("Couldn't record guest kernel [%d]'s reference"
283 		       " relocation symbol.\n", machine->pid);
284 
285 	/*
286 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
287 	 * have no _text sometimes.
288 	 */
289 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
290 						 machine);
291 	if (err < 0)
292 		pr_err("Couldn't record guest kernel [%d]'s reference"
293 		       " relocation symbol.\n", machine->pid);
294 }
295 
296 static struct perf_event_header finished_round_event = {
297 	.size = sizeof(struct perf_event_header),
298 	.type = PERF_RECORD_FINISHED_ROUND,
299 };
300 
301 static int record__mmap_read_all(struct record *rec)
302 {
303 	int i;
304 	int rc = 0;
305 
306 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
307 		if (rec->evlist->mmap[i].base) {
308 			if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
309 				rc = -1;
310 				goto out;
311 			}
312 		}
313 	}
314 
315 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
316 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
317 
318 out:
319 	return rc;
320 }
321 
322 static void record__init_features(struct record *rec)
323 {
324 	struct perf_session *session = rec->session;
325 	int feat;
326 
327 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
328 		perf_header__set_feat(&session->header, feat);
329 
330 	if (rec->no_buildid)
331 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
332 
333 	if (!have_tracepoints(&rec->evlist->entries))
334 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
335 
336 	if (!rec->opts.branch_stack)
337 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
338 }
339 
340 static volatile int workload_exec_errno;
341 
342 /*
343  * perf_evlist__prepare_workload will send a SIGUSR1
344  * if the fork fails, since we asked by setting its
345  * want_signal to true.
346  */
347 static void workload_exec_failed_signal(int signo, siginfo_t *info,
348 					void *ucontext __maybe_unused)
349 {
350 	workload_exec_errno = info->si_value.sival_int;
351 	done = 1;
352 	signr = signo;
353 	child_finished = 1;
354 }
355 
356 static int __cmd_record(struct record *rec, int argc, const char **argv)
357 {
358 	int err;
359 	unsigned long waking = 0;
360 	const bool forks = argc > 0;
361 	struct machine *machine;
362 	struct perf_tool *tool = &rec->tool;
363 	struct record_opts *opts = &rec->opts;
364 	struct perf_data_file *file = &rec->file;
365 	struct perf_session *session;
366 	bool disabled = false;
367 
368 	rec->progname = argv[0];
369 
370 	on_exit(record__sig_exit, rec);
371 	signal(SIGCHLD, sig_handler);
372 	signal(SIGINT, sig_handler);
373 	signal(SIGTERM, sig_handler);
374 
375 	session = perf_session__new(file, false, NULL);
376 	if (session == NULL) {
377 		pr_err("Not enough memory for reading perf file header\n");
378 		return -1;
379 	}
380 
381 	rec->session = session;
382 
383 	record__init_features(rec);
384 
385 	if (forks) {
386 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
387 						    argv, file->is_pipe,
388 						    workload_exec_failed_signal);
389 		if (err < 0) {
390 			pr_err("Couldn't run the workload!\n");
391 			goto out_delete_session;
392 		}
393 	}
394 
395 	if (record__open(rec) != 0) {
396 		err = -1;
397 		goto out_delete_session;
398 	}
399 
400 	if (!rec->evlist->nr_groups)
401 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
402 
403 	/*
404 	 * perf_session__delete(session) will be called at record__exit()
405 	 */
406 	on_exit(record__exit, rec);
407 
408 	if (file->is_pipe) {
409 		err = perf_header__write_pipe(file->fd);
410 		if (err < 0)
411 			goto out_delete_session;
412 	} else {
413 		err = perf_session__write_header(session, rec->evlist,
414 						 file->fd, false);
415 		if (err < 0)
416 			goto out_delete_session;
417 	}
418 
419 	if (!rec->no_buildid
420 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
421 		pr_err("Couldn't generate buildids. "
422 		       "Use --no-buildid to profile anyway.\n");
423 		err = -1;
424 		goto out_delete_session;
425 	}
426 
427 	machine = &session->machines.host;
428 
429 	if (file->is_pipe) {
430 		err = perf_event__synthesize_attrs(tool, session,
431 						   process_synthesized_event);
432 		if (err < 0) {
433 			pr_err("Couldn't synthesize attrs.\n");
434 			goto out_delete_session;
435 		}
436 
437 		if (have_tracepoints(&rec->evlist->entries)) {
438 			/*
439 			 * FIXME err <= 0 here actually means that
440 			 * there were no tracepoints so its not really
441 			 * an error, just that we don't need to
442 			 * synthesize anything.  We really have to
443 			 * return this more properly and also
444 			 * propagate errors that now are calling die()
445 			 */
446 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
447 								  process_synthesized_event);
448 			if (err <= 0) {
449 				pr_err("Couldn't record tracing data.\n");
450 				goto out_delete_session;
451 			}
452 			rec->bytes_written += err;
453 		}
454 	}
455 
456 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
457 						 machine);
458 	if (err < 0)
459 		pr_err("Couldn't record kernel reference relocation symbol\n"
460 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
461 		       "Check /proc/kallsyms permission or run as root.\n");
462 
463 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
464 					     machine);
465 	if (err < 0)
466 		pr_err("Couldn't record kernel module information.\n"
467 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
468 		       "Check /proc/modules permission or run as root.\n");
469 
470 	if (perf_guest) {
471 		machines__process_guests(&session->machines,
472 					 perf_event__synthesize_guest_os, tool);
473 	}
474 
475 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
476 					    process_synthesized_event, opts->sample_address);
477 	if (err != 0)
478 		goto out_delete_session;
479 
480 	if (rec->realtime_prio) {
481 		struct sched_param param;
482 
483 		param.sched_priority = rec->realtime_prio;
484 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
485 			pr_err("Could not set realtime priority.\n");
486 			err = -1;
487 			goto out_delete_session;
488 		}
489 	}
490 
491 	/*
492 	 * When perf is starting the traced process, all the events
493 	 * (apart from group members) have enable_on_exec=1 set,
494 	 * so don't spoil it by prematurely enabling them.
495 	 */
496 	if (!target__none(&opts->target) && !opts->initial_delay)
497 		perf_evlist__enable(rec->evlist);
498 
499 	/*
500 	 * Let the child rip
501 	 */
502 	if (forks)
503 		perf_evlist__start_workload(rec->evlist);
504 
505 	if (opts->initial_delay) {
506 		usleep(opts->initial_delay * 1000);
507 		perf_evlist__enable(rec->evlist);
508 	}
509 
510 	for (;;) {
511 		int hits = rec->samples;
512 
513 		if (record__mmap_read_all(rec) < 0) {
514 			err = -1;
515 			goto out_delete_session;
516 		}
517 
518 		if (hits == rec->samples) {
519 			if (done)
520 				break;
521 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
522 			waking++;
523 		}
524 
525 		/*
526 		 * When perf is starting the traced process, at the end events
527 		 * die with the process and we wait for that. Thus no need to
528 		 * disable events in this case.
529 		 */
530 		if (done && !disabled && !target__none(&opts->target)) {
531 			perf_evlist__disable(rec->evlist);
532 			disabled = true;
533 		}
534 	}
535 
536 	if (forks && workload_exec_errno) {
537 		char msg[512];
538 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
539 		pr_err("Workload failed: %s\n", emsg);
540 		err = -1;
541 		goto out_delete_session;
542 	}
543 
544 	if (quiet || signr == SIGUSR1)
545 		return 0;
546 
547 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548 
549 	/*
550 	 * Approximate RIP event size: 24 bytes.
551 	 */
552 	fprintf(stderr,
553 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554 		(double)rec->bytes_written / 1024.0 / 1024.0,
555 		file->path,
556 		rec->bytes_written / 24);
557 
558 	return 0;
559 
560 out_delete_session:
561 	perf_session__delete(session);
562 	return err;
563 }
564 
565 #define BRANCH_OPT(n, m) \
566 	{ .name = n, .mode = (m) }
567 
568 #define BRANCH_END { .name = NULL }
569 
570 struct branch_mode {
571 	const char *name;
572 	int mode;
573 };
574 
575 static const struct branch_mode branch_modes[] = {
576 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586 	BRANCH_END
587 };
588 
589 static int
590 parse_branch_stack(const struct option *opt, const char *str, int unset)
591 {
592 #define ONLY_PLM \
593 	(PERF_SAMPLE_BRANCH_USER	|\
594 	 PERF_SAMPLE_BRANCH_KERNEL	|\
595 	 PERF_SAMPLE_BRANCH_HV)
596 
597 	uint64_t *mode = (uint64_t *)opt->value;
598 	const struct branch_mode *br;
599 	char *s, *os = NULL, *p;
600 	int ret = -1;
601 
602 	if (unset)
603 		return 0;
604 
605 	/*
606 	 * cannot set it twice, -b + --branch-filter for instance
607 	 */
608 	if (*mode)
609 		return -1;
610 
611 	/* str may be NULL in case no arg is passed to -b */
612 	if (str) {
613 		/* because str is read-only */
614 		s = os = strdup(str);
615 		if (!s)
616 			return -1;
617 
618 		for (;;) {
619 			p = strchr(s, ',');
620 			if (p)
621 				*p = '\0';
622 
623 			for (br = branch_modes; br->name; br++) {
624 				if (!strcasecmp(s, br->name))
625 					break;
626 			}
627 			if (!br->name) {
628 				ui__warning("unknown branch filter %s,"
629 					    " check man page\n", s);
630 				goto error;
631 			}
632 
633 			*mode |= br->mode;
634 
635 			if (!p)
636 				break;
637 
638 			s = p + 1;
639 		}
640 	}
641 	ret = 0;
642 
643 	/* default to any branch */
644 	if ((*mode & ~ONLY_PLM) == 0) {
645 		*mode = PERF_SAMPLE_BRANCH_ANY;
646 	}
647 error:
648 	free(os);
649 	return ret;
650 }
651 
652 #ifdef HAVE_DWARF_UNWIND_SUPPORT
653 static int get_stack_size(char *str, unsigned long *_size)
654 {
655 	char *endptr;
656 	unsigned long size;
657 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658 
659 	size = strtoul(str, &endptr, 0);
660 
661 	do {
662 		if (*endptr)
663 			break;
664 
665 		size = round_up(size, sizeof(u64));
666 		if (!size || size > max_size)
667 			break;
668 
669 		*_size = size;
670 		return 0;
671 
672 	} while (0);
673 
674 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675 	       max_size, str);
676 	return -1;
677 }
678 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
679 
680 int record_parse_callchain(const char *arg, struct record_opts *opts)
681 {
682 	char *tok, *name, *saveptr = NULL;
683 	char *buf;
684 	int ret = -1;
685 
686 	/* We need buffer that we know we can write to. */
687 	buf = malloc(strlen(arg) + 1);
688 	if (!buf)
689 		return -ENOMEM;
690 
691 	strcpy(buf, arg);
692 
693 	tok = strtok_r((char *)buf, ",", &saveptr);
694 	name = tok ? : (char *)buf;
695 
696 	do {
697 		/* Framepointer style */
698 		if (!strncmp(name, "fp", sizeof("fp"))) {
699 			if (!strtok_r(NULL, ",", &saveptr)) {
700 				opts->call_graph = CALLCHAIN_FP;
701 				ret = 0;
702 			} else
703 				pr_err("callchain: No more arguments "
704 				       "needed for -g fp\n");
705 			break;
706 
707 #ifdef HAVE_DWARF_UNWIND_SUPPORT
708 		/* Dwarf style */
709 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710 			const unsigned long default_stack_dump_size = 8192;
711 
712 			ret = 0;
713 			opts->call_graph = CALLCHAIN_DWARF;
714 			opts->stack_dump_size = default_stack_dump_size;
715 
716 			tok = strtok_r(NULL, ",", &saveptr);
717 			if (tok) {
718 				unsigned long size = 0;
719 
720 				ret = get_stack_size(tok, &size);
721 				opts->stack_dump_size = size;
722 			}
723 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
724 		} else {
725 			pr_err("callchain: Unknown --call-graph option "
726 			       "value: %s\n", arg);
727 			break;
728 		}
729 
730 	} while (0);
731 
732 	free(buf);
733 	return ret;
734 }
735 
736 static void callchain_debug(struct record_opts *opts)
737 {
738 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
739 
740 	pr_debug("callchain: type %s\n", str[opts->call_graph]);
741 
742 	if (opts->call_graph == CALLCHAIN_DWARF)
743 		pr_debug("callchain: stack dump size %d\n",
744 			 opts->stack_dump_size);
745 }
746 
747 int record_parse_callchain_opt(const struct option *opt,
748 			       const char *arg,
749 			       int unset)
750 {
751 	struct record_opts *opts = opt->value;
752 	int ret;
753 
754 	opts->call_graph_enabled = !unset;
755 
756 	/* --no-call-graph */
757 	if (unset) {
758 		opts->call_graph = CALLCHAIN_NONE;
759 		pr_debug("callchain: disabled\n");
760 		return 0;
761 	}
762 
763 	ret = record_parse_callchain(arg, opts);
764 	if (!ret)
765 		callchain_debug(opts);
766 
767 	return ret;
768 }
769 
770 int record_callchain_opt(const struct option *opt,
771 			 const char *arg __maybe_unused,
772 			 int unset __maybe_unused)
773 {
774 	struct record_opts *opts = opt->value;
775 
776 	opts->call_graph_enabled = !unset;
777 
778 	if (opts->call_graph == CALLCHAIN_NONE)
779 		opts->call_graph = CALLCHAIN_FP;
780 
781 	callchain_debug(opts);
782 	return 0;
783 }
784 
785 static int perf_record_config(const char *var, const char *value, void *cb)
786 {
787 	struct record *rec = cb;
788 
789 	if (!strcmp(var, "record.call-graph"))
790 		return record_parse_callchain(value, &rec->opts);
791 
792 	return perf_default_config(var, value, cb);
793 }
794 
795 static const char * const record_usage[] = {
796 	"perf record [<options>] [<command>]",
797 	"perf record [<options>] -- <command> [<options>]",
798 	NULL
799 };
800 
801 /*
802  * XXX Ideally would be local to cmd_record() and passed to a record__new
803  * because we need to have access to it in record__exit, that is called
804  * after cmd_record() exits, but since record_options need to be accessible to
805  * builtin-script, leave it here.
806  *
807  * At least we don't ouch it in all the other functions here directly.
808  *
809  * Just say no to tons of global variables, sigh.
810  */
811 static struct record record = {
812 	.opts = {
813 		.mmap_pages	     = UINT_MAX,
814 		.user_freq	     = UINT_MAX,
815 		.user_interval	     = ULLONG_MAX,
816 		.freq		     = 4000,
817 		.target		     = {
818 			.uses_mmap   = true,
819 			.default_per_cpu = true,
820 		},
821 	},
822 };
823 
824 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
825 
826 #ifdef HAVE_DWARF_UNWIND_SUPPORT
827 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
828 #else
829 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
830 #endif
831 
832 /*
833  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
834  * with it and switch to use the library functions in perf_evlist that came
835  * from builtin-record.c, i.e. use record_opts,
836  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
837  * using pipes, etc.
838  */
839 const struct option record_options[] = {
840 	OPT_CALLBACK('e', "event", &record.evlist, "event",
841 		     "event selector. use 'perf list' to list available events",
842 		     parse_events_option),
843 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
844 		     "event filter", parse_filter),
845 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
846 		    "record events on existing process id"),
847 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
848 		    "record events on existing thread id"),
849 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
850 		    "collect data with this RT SCHED_FIFO priority"),
851 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
852 		    "collect data without buffering"),
853 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
854 		    "collect raw sample records from all opened counters"),
855 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
856 			    "system-wide collection from all CPUs"),
857 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
858 		    "list of cpus to monitor"),
859 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
860 	OPT_STRING('o', "output", &record.file.path, "file",
861 		    "output file name"),
862 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
863 			&record.opts.no_inherit_set,
864 			"child tasks do not inherit counters"),
865 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
866 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
867 		     "number of mmap data pages",
868 		     perf_evlist__parse_mmap_pages),
869 	OPT_BOOLEAN(0, "group", &record.opts.group,
870 		    "put the counters into a counter group"),
871 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
872 			   NULL, "enables call-graph recording" ,
873 			   &record_callchain_opt),
874 	OPT_CALLBACK(0, "call-graph", &record.opts,
875 		     "mode[,dump_size]", record_callchain_help,
876 		     &record_parse_callchain_opt),
877 	OPT_INCR('v', "verbose", &verbose,
878 		    "be more verbose (show counter open errors, etc)"),
879 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
880 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
881 		    "per thread counts"),
882 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
883 		    "Sample addresses"),
884 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
885 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
886 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
887 		    "don't sample"),
888 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
889 		    "do not update the buildid cache"),
890 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
891 		    "do not collect buildids in perf.data"),
892 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
893 		     "monitor event in cgroup name only",
894 		     parse_cgroups),
895 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
896 		  "ms to wait before starting measurement after program start"),
897 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
898 		   "user to profile"),
899 
900 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
901 		     "branch any", "sample any taken branches",
902 		     parse_branch_stack),
903 
904 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
905 		     "branch filter mask", "branch stack filter modes",
906 		     parse_branch_stack),
907 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
908 		    "sample by weight (on special events only)"),
909 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
910 		    "sample transaction flags (special events only)"),
911 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
912 		    "use per-thread mmaps"),
913 	OPT_END()
914 };
915 
916 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
917 {
918 	int err = -ENOMEM;
919 	struct record *rec = &record;
920 	char errbuf[BUFSIZ];
921 
922 	rec->evlist = perf_evlist__new();
923 	if (rec->evlist == NULL)
924 		return -ENOMEM;
925 
926 	perf_config(perf_record_config, rec);
927 
928 	argc = parse_options(argc, argv, record_options, record_usage,
929 			    PARSE_OPT_STOP_AT_NON_OPTION);
930 	if (!argc && target__none(&rec->opts.target))
931 		usage_with_options(record_usage, record_options);
932 
933 	if (nr_cgroups && !rec->opts.target.system_wide) {
934 		ui__error("cgroup monitoring only available in"
935 			  " system-wide mode\n");
936 		usage_with_options(record_usage, record_options);
937 	}
938 
939 	symbol__init();
940 
941 	if (symbol_conf.kptr_restrict)
942 		pr_warning(
943 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
944 "check /proc/sys/kernel/kptr_restrict.\n\n"
945 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
946 "file is not found in the buildid cache or in the vmlinux path.\n\n"
947 "Samples in kernel modules won't be resolved at all.\n\n"
948 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
949 "even with a suitable vmlinux or kallsyms file.\n\n");
950 
951 	if (rec->no_buildid_cache || rec->no_buildid)
952 		disable_buildid_cache();
953 
954 	if (rec->evlist->nr_entries == 0 &&
955 	    perf_evlist__add_default(rec->evlist) < 0) {
956 		pr_err("Not enough memory for event selector list\n");
957 		goto out_symbol_exit;
958 	}
959 
960 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
961 		rec->opts.no_inherit = true;
962 
963 	err = target__validate(&rec->opts.target);
964 	if (err) {
965 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
966 		ui__warning("%s", errbuf);
967 	}
968 
969 	err = target__parse_uid(&rec->opts.target);
970 	if (err) {
971 		int saved_errno = errno;
972 
973 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
974 		ui__error("%s", errbuf);
975 
976 		err = -saved_errno;
977 		goto out_symbol_exit;
978 	}
979 
980 	err = -ENOMEM;
981 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
982 		usage_with_options(record_usage, record_options);
983 
984 	if (record_opts__config(&rec->opts)) {
985 		err = -EINVAL;
986 		goto out_symbol_exit;
987 	}
988 
989 	err = __cmd_record(&record, argc, argv);
990 out_symbol_exit:
991 	symbol__exit();
992 	return err;
993 }
994