xref: /openbmc/linux/tools/perf/builtin-record.c (revision c4ee0af3)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 #ifndef HAVE_ON_EXIT_SUPPORT
34 #ifndef ATEXIT_MAX
35 #define ATEXIT_MAX 32
36 #endif
37 static int __on_exit_count = 0;
38 typedef void (*on_exit_func_t) (int, void *);
39 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
40 static void *__on_exit_args[ATEXIT_MAX];
41 static int __exitcode = 0;
42 static void __handle_on_exit_funcs(void);
43 static int on_exit(on_exit_func_t function, void *arg);
44 #define exit(x) (exit)(__exitcode = (x))
45 
46 static int on_exit(on_exit_func_t function, void *arg)
47 {
48 	if (__on_exit_count == ATEXIT_MAX)
49 		return -ENOMEM;
50 	else if (__on_exit_count == 0)
51 		atexit(__handle_on_exit_funcs);
52 	__on_exit_funcs[__on_exit_count] = function;
53 	__on_exit_args[__on_exit_count++] = arg;
54 	return 0;
55 }
56 
57 static void __handle_on_exit_funcs(void)
58 {
59 	int i;
60 	for (i = 0; i < __on_exit_count; i++)
61 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
62 }
63 #endif
64 
65 struct perf_record {
66 	struct perf_tool	tool;
67 	struct perf_record_opts	opts;
68 	u64			bytes_written;
69 	struct perf_data_file	file;
70 	struct perf_evlist	*evlist;
71 	struct perf_session	*session;
72 	const char		*progname;
73 	int			realtime_prio;
74 	bool			no_buildid;
75 	bool			no_buildid_cache;
76 	long			samples;
77 };
78 
79 static int do_write_output(struct perf_record *rec, void *buf, size_t size)
80 {
81 	struct perf_data_file *file = &rec->file;
82 
83 	while (size) {
84 		ssize_t ret = write(file->fd, buf, size);
85 
86 		if (ret < 0) {
87 			pr_err("failed to write perf data, error: %m\n");
88 			return -1;
89 		}
90 
91 		size -= ret;
92 		buf += ret;
93 
94 		rec->bytes_written += ret;
95 	}
96 
97 	return 0;
98 }
99 
100 static int write_output(struct perf_record *rec, void *buf, size_t size)
101 {
102 	return do_write_output(rec, buf, size);
103 }
104 
105 static int process_synthesized_event(struct perf_tool *tool,
106 				     union perf_event *event,
107 				     struct perf_sample *sample __maybe_unused,
108 				     struct machine *machine __maybe_unused)
109 {
110 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
111 	if (write_output(rec, event, event->header.size) < 0)
112 		return -1;
113 
114 	return 0;
115 }
116 
117 static int perf_record__mmap_read(struct perf_record *rec,
118 				   struct perf_mmap *md)
119 {
120 	unsigned int head = perf_mmap__read_head(md);
121 	unsigned int old = md->prev;
122 	unsigned char *data = md->base + page_size;
123 	unsigned long size;
124 	void *buf;
125 	int rc = 0;
126 
127 	if (old == head)
128 		return 0;
129 
130 	rec->samples++;
131 
132 	size = head - old;
133 
134 	if ((old & md->mask) + size != (head & md->mask)) {
135 		buf = &data[old & md->mask];
136 		size = md->mask + 1 - (old & md->mask);
137 		old += size;
138 
139 		if (write_output(rec, buf, size) < 0) {
140 			rc = -1;
141 			goto out;
142 		}
143 	}
144 
145 	buf = &data[old & md->mask];
146 	size = head - old;
147 	old += size;
148 
149 	if (write_output(rec, buf, size) < 0) {
150 		rc = -1;
151 		goto out;
152 	}
153 
154 	md->prev = old;
155 	perf_mmap__write_tail(md, old);
156 
157 out:
158 	return rc;
159 }
160 
161 static volatile int done = 0;
162 static volatile int signr = -1;
163 static volatile int child_finished = 0;
164 
165 static void sig_handler(int sig)
166 {
167 	if (sig == SIGCHLD)
168 		child_finished = 1;
169 
170 	done = 1;
171 	signr = sig;
172 }
173 
174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
175 {
176 	struct perf_record *rec = arg;
177 	int status;
178 
179 	if (rec->evlist->workload.pid > 0) {
180 		if (!child_finished)
181 			kill(rec->evlist->workload.pid, SIGTERM);
182 
183 		wait(&status);
184 		if (WIFSIGNALED(status))
185 			psignal(WTERMSIG(status), rec->progname);
186 	}
187 
188 	if (signr == -1 || signr == SIGUSR1)
189 		return;
190 
191 	signal(signr, SIG_DFL);
192 }
193 
194 static int perf_record__open(struct perf_record *rec)
195 {
196 	char msg[512];
197 	struct perf_evsel *pos;
198 	struct perf_evlist *evlist = rec->evlist;
199 	struct perf_session *session = rec->session;
200 	struct perf_record_opts *opts = &rec->opts;
201 	int rc = 0;
202 
203 	perf_evlist__config(evlist, opts);
204 
205 	list_for_each_entry(pos, &evlist->entries, node) {
206 try_again:
207 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
208 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
209 				if (verbose)
210 					ui__warning("%s\n", msg);
211 				goto try_again;
212 			}
213 
214 			rc = -errno;
215 			perf_evsel__open_strerror(pos, &opts->target,
216 						  errno, msg, sizeof(msg));
217 			ui__error("%s\n", msg);
218 			goto out;
219 		}
220 	}
221 
222 	if (perf_evlist__apply_filters(evlist)) {
223 		error("failed to set filter with %d (%s)\n", errno,
224 			strerror(errno));
225 		rc = -1;
226 		goto out;
227 	}
228 
229 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
230 		if (errno == EPERM) {
231 			pr_err("Permission error mapping pages.\n"
232 			       "Consider increasing "
233 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
234 			       "or try again with a smaller value of -m/--mmap_pages.\n"
235 			       "(current value: %d)\n", opts->mmap_pages);
236 			rc = -errno;
237 		} else {
238 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
239 			rc = -errno;
240 		}
241 		goto out;
242 	}
243 
244 	session->evlist = evlist;
245 	perf_session__set_id_hdr_size(session);
246 out:
247 	return rc;
248 }
249 
250 static int process_buildids(struct perf_record *rec)
251 {
252 	struct perf_data_file *file  = &rec->file;
253 	struct perf_session *session = rec->session;
254 	u64 start = session->header.data_offset;
255 
256 	u64 size = lseek(file->fd, 0, SEEK_CUR);
257 	if (size == 0)
258 		return 0;
259 
260 	return __perf_session__process_events(session, start,
261 					      size - start,
262 					      size, &build_id__mark_dso_hit_ops);
263 }
264 
265 static void perf_record__exit(int status, void *arg)
266 {
267 	struct perf_record *rec = arg;
268 	struct perf_data_file *file = &rec->file;
269 
270 	if (status != 0)
271 		return;
272 
273 	if (!file->is_pipe) {
274 		rec->session->header.data_size += rec->bytes_written;
275 
276 		if (!rec->no_buildid)
277 			process_buildids(rec);
278 		perf_session__write_header(rec->session, rec->evlist,
279 					   file->fd, true);
280 		perf_session__delete(rec->session);
281 		perf_evlist__delete(rec->evlist);
282 		symbol__exit();
283 	}
284 }
285 
286 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
287 {
288 	int err;
289 	struct perf_tool *tool = data;
290 	/*
291 	 *As for guest kernel when processing subcommand record&report,
292 	 *we arrange module mmap prior to guest kernel mmap and trigger
293 	 *a preload dso because default guest module symbols are loaded
294 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
295 	 *method is used to avoid symbol missing when the first addr is
296 	 *in module instead of in guest kernel.
297 	 */
298 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
299 					     machine);
300 	if (err < 0)
301 		pr_err("Couldn't record guest kernel [%d]'s reference"
302 		       " relocation symbol.\n", machine->pid);
303 
304 	/*
305 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
306 	 * have no _text sometimes.
307 	 */
308 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
309 						 machine, "_text");
310 	if (err < 0)
311 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
312 							 machine, "_stext");
313 	if (err < 0)
314 		pr_err("Couldn't record guest kernel [%d]'s reference"
315 		       " relocation symbol.\n", machine->pid);
316 }
317 
318 static struct perf_event_header finished_round_event = {
319 	.size = sizeof(struct perf_event_header),
320 	.type = PERF_RECORD_FINISHED_ROUND,
321 };
322 
323 static int perf_record__mmap_read_all(struct perf_record *rec)
324 {
325 	int i;
326 	int rc = 0;
327 
328 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
329 		if (rec->evlist->mmap[i].base) {
330 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
331 				rc = -1;
332 				goto out;
333 			}
334 		}
335 	}
336 
337 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
338 		rc = write_output(rec, &finished_round_event,
339 				  sizeof(finished_round_event));
340 
341 out:
342 	return rc;
343 }
344 
345 static void perf_record__init_features(struct perf_record *rec)
346 {
347 	struct perf_evlist *evsel_list = rec->evlist;
348 	struct perf_session *session = rec->session;
349 	int feat;
350 
351 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
352 		perf_header__set_feat(&session->header, feat);
353 
354 	if (rec->no_buildid)
355 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
356 
357 	if (!have_tracepoints(&evsel_list->entries))
358 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
359 
360 	if (!rec->opts.branch_stack)
361 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
362 }
363 
364 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
365 {
366 	int err;
367 	unsigned long waking = 0;
368 	const bool forks = argc > 0;
369 	struct machine *machine;
370 	struct perf_tool *tool = &rec->tool;
371 	struct perf_record_opts *opts = &rec->opts;
372 	struct perf_evlist *evsel_list = rec->evlist;
373 	struct perf_data_file *file = &rec->file;
374 	struct perf_session *session;
375 	bool disabled = false;
376 
377 	rec->progname = argv[0];
378 
379 	on_exit(perf_record__sig_exit, rec);
380 	signal(SIGCHLD, sig_handler);
381 	signal(SIGINT, sig_handler);
382 	signal(SIGUSR1, sig_handler);
383 	signal(SIGTERM, sig_handler);
384 
385 	session = perf_session__new(file, false, NULL);
386 	if (session == NULL) {
387 		pr_err("Not enough memory for reading perf file header\n");
388 		return -1;
389 	}
390 
391 	rec->session = session;
392 
393 	perf_record__init_features(rec);
394 
395 	if (forks) {
396 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
397 						    argv, file->is_pipe,
398 						    true);
399 		if (err < 0) {
400 			pr_err("Couldn't run the workload!\n");
401 			goto out_delete_session;
402 		}
403 	}
404 
405 	if (perf_record__open(rec) != 0) {
406 		err = -1;
407 		goto out_delete_session;
408 	}
409 
410 	if (!evsel_list->nr_groups)
411 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
412 
413 	/*
414 	 * perf_session__delete(session) will be called at perf_record__exit()
415 	 */
416 	on_exit(perf_record__exit, rec);
417 
418 	if (file->is_pipe) {
419 		err = perf_header__write_pipe(file->fd);
420 		if (err < 0)
421 			goto out_delete_session;
422 	} else {
423 		err = perf_session__write_header(session, evsel_list,
424 						 file->fd, false);
425 		if (err < 0)
426 			goto out_delete_session;
427 	}
428 
429 	if (!rec->no_buildid
430 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
431 		pr_err("Couldn't generate buildids. "
432 		       "Use --no-buildid to profile anyway.\n");
433 		err = -1;
434 		goto out_delete_session;
435 	}
436 
437 	machine = &session->machines.host;
438 
439 	if (file->is_pipe) {
440 		err = perf_event__synthesize_attrs(tool, session,
441 						   process_synthesized_event);
442 		if (err < 0) {
443 			pr_err("Couldn't synthesize attrs.\n");
444 			goto out_delete_session;
445 		}
446 
447 		if (have_tracepoints(&evsel_list->entries)) {
448 			/*
449 			 * FIXME err <= 0 here actually means that
450 			 * there were no tracepoints so its not really
451 			 * an error, just that we don't need to
452 			 * synthesize anything.  We really have to
453 			 * return this more properly and also
454 			 * propagate errors that now are calling die()
455 			 */
456 			err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list,
457 								  process_synthesized_event);
458 			if (err <= 0) {
459 				pr_err("Couldn't record tracing data.\n");
460 				goto out_delete_session;
461 			}
462 			rec->bytes_written += err;
463 		}
464 	}
465 
466 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
467 						 machine, "_text");
468 	if (err < 0)
469 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
470 							 machine, "_stext");
471 	if (err < 0)
472 		pr_err("Couldn't record kernel reference relocation symbol\n"
473 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
474 		       "Check /proc/kallsyms permission or run as root.\n");
475 
476 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
477 					     machine);
478 	if (err < 0)
479 		pr_err("Couldn't record kernel module information.\n"
480 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
481 		       "Check /proc/modules permission or run as root.\n");
482 
483 	if (perf_guest) {
484 		machines__process_guests(&session->machines,
485 					 perf_event__synthesize_guest_os, tool);
486 	}
487 
488 	err = __machine__synthesize_threads(machine, tool, &opts->target, evsel_list->threads,
489 					    process_synthesized_event, opts->sample_address);
490 	if (err != 0)
491 		goto out_delete_session;
492 
493 	if (rec->realtime_prio) {
494 		struct sched_param param;
495 
496 		param.sched_priority = rec->realtime_prio;
497 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
498 			pr_err("Could not set realtime priority.\n");
499 			err = -1;
500 			goto out_delete_session;
501 		}
502 	}
503 
504 	/*
505 	 * When perf is starting the traced process, all the events
506 	 * (apart from group members) have enable_on_exec=1 set,
507 	 * so don't spoil it by prematurely enabling them.
508 	 */
509 	if (!target__none(&opts->target))
510 		perf_evlist__enable(evsel_list);
511 
512 	/*
513 	 * Let the child rip
514 	 */
515 	if (forks)
516 		perf_evlist__start_workload(evsel_list);
517 
518 	for (;;) {
519 		int hits = rec->samples;
520 
521 		if (perf_record__mmap_read_all(rec) < 0) {
522 			err = -1;
523 			goto out_delete_session;
524 		}
525 
526 		if (hits == rec->samples) {
527 			if (done)
528 				break;
529 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
530 			waking++;
531 		}
532 
533 		/*
534 		 * When perf is starting the traced process, at the end events
535 		 * die with the process and we wait for that. Thus no need to
536 		 * disable events in this case.
537 		 */
538 		if (done && !disabled && !target__none(&opts->target)) {
539 			perf_evlist__disable(evsel_list);
540 			disabled = true;
541 		}
542 	}
543 
544 	if (quiet || signr == SIGUSR1)
545 		return 0;
546 
547 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
548 
549 	/*
550 	 * Approximate RIP event size: 24 bytes.
551 	 */
552 	fprintf(stderr,
553 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
554 		(double)rec->bytes_written / 1024.0 / 1024.0,
555 		file->path,
556 		rec->bytes_written / 24);
557 
558 	return 0;
559 
560 out_delete_session:
561 	perf_session__delete(session);
562 	return err;
563 }
564 
565 #define BRANCH_OPT(n, m) \
566 	{ .name = n, .mode = (m) }
567 
568 #define BRANCH_END { .name = NULL }
569 
570 struct branch_mode {
571 	const char *name;
572 	int mode;
573 };
574 
575 static const struct branch_mode branch_modes[] = {
576 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
577 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
578 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
579 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
580 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
581 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
582 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
583 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
584 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
585 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
586 	BRANCH_END
587 };
588 
589 static int
590 parse_branch_stack(const struct option *opt, const char *str, int unset)
591 {
592 #define ONLY_PLM \
593 	(PERF_SAMPLE_BRANCH_USER	|\
594 	 PERF_SAMPLE_BRANCH_KERNEL	|\
595 	 PERF_SAMPLE_BRANCH_HV)
596 
597 	uint64_t *mode = (uint64_t *)opt->value;
598 	const struct branch_mode *br;
599 	char *s, *os = NULL, *p;
600 	int ret = -1;
601 
602 	if (unset)
603 		return 0;
604 
605 	/*
606 	 * cannot set it twice, -b + --branch-filter for instance
607 	 */
608 	if (*mode)
609 		return -1;
610 
611 	/* str may be NULL in case no arg is passed to -b */
612 	if (str) {
613 		/* because str is read-only */
614 		s = os = strdup(str);
615 		if (!s)
616 			return -1;
617 
618 		for (;;) {
619 			p = strchr(s, ',');
620 			if (p)
621 				*p = '\0';
622 
623 			for (br = branch_modes; br->name; br++) {
624 				if (!strcasecmp(s, br->name))
625 					break;
626 			}
627 			if (!br->name) {
628 				ui__warning("unknown branch filter %s,"
629 					    " check man page\n", s);
630 				goto error;
631 			}
632 
633 			*mode |= br->mode;
634 
635 			if (!p)
636 				break;
637 
638 			s = p + 1;
639 		}
640 	}
641 	ret = 0;
642 
643 	/* default to any branch */
644 	if ((*mode & ~ONLY_PLM) == 0) {
645 		*mode = PERF_SAMPLE_BRANCH_ANY;
646 	}
647 error:
648 	free(os);
649 	return ret;
650 }
651 
652 #ifdef HAVE_LIBUNWIND_SUPPORT
653 static int get_stack_size(char *str, unsigned long *_size)
654 {
655 	char *endptr;
656 	unsigned long size;
657 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
658 
659 	size = strtoul(str, &endptr, 0);
660 
661 	do {
662 		if (*endptr)
663 			break;
664 
665 		size = round_up(size, sizeof(u64));
666 		if (!size || size > max_size)
667 			break;
668 
669 		*_size = size;
670 		return 0;
671 
672 	} while (0);
673 
674 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
675 	       max_size, str);
676 	return -1;
677 }
678 #endif /* HAVE_LIBUNWIND_SUPPORT */
679 
680 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
681 {
682 	char *tok, *name, *saveptr = NULL;
683 	char *buf;
684 	int ret = -1;
685 
686 	/* We need buffer that we know we can write to. */
687 	buf = malloc(strlen(arg) + 1);
688 	if (!buf)
689 		return -ENOMEM;
690 
691 	strcpy(buf, arg);
692 
693 	tok = strtok_r((char *)buf, ",", &saveptr);
694 	name = tok ? : (char *)buf;
695 
696 	do {
697 		/* Framepointer style */
698 		if (!strncmp(name, "fp", sizeof("fp"))) {
699 			if (!strtok_r(NULL, ",", &saveptr)) {
700 				opts->call_graph = CALLCHAIN_FP;
701 				ret = 0;
702 			} else
703 				pr_err("callchain: No more arguments "
704 				       "needed for -g fp\n");
705 			break;
706 
707 #ifdef HAVE_LIBUNWIND_SUPPORT
708 		/* Dwarf style */
709 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
710 			const unsigned long default_stack_dump_size = 8192;
711 
712 			ret = 0;
713 			opts->call_graph = CALLCHAIN_DWARF;
714 			opts->stack_dump_size = default_stack_dump_size;
715 
716 			tok = strtok_r(NULL, ",", &saveptr);
717 			if (tok) {
718 				unsigned long size = 0;
719 
720 				ret = get_stack_size(tok, &size);
721 				opts->stack_dump_size = size;
722 			}
723 #endif /* HAVE_LIBUNWIND_SUPPORT */
724 		} else {
725 			pr_err("callchain: Unknown --call-graph option "
726 			       "value: %s\n", arg);
727 			break;
728 		}
729 
730 	} while (0);
731 
732 	free(buf);
733 	return ret;
734 }
735 
736 static void callchain_debug(struct perf_record_opts *opts)
737 {
738 	pr_debug("callchain: type %d\n", opts->call_graph);
739 
740 	if (opts->call_graph == CALLCHAIN_DWARF)
741 		pr_debug("callchain: stack dump size %d\n",
742 			 opts->stack_dump_size);
743 }
744 
745 int record_parse_callchain_opt(const struct option *opt,
746 			       const char *arg,
747 			       int unset)
748 {
749 	struct perf_record_opts *opts = opt->value;
750 	int ret;
751 
752 	/* --no-call-graph */
753 	if (unset) {
754 		opts->call_graph = CALLCHAIN_NONE;
755 		pr_debug("callchain: disabled\n");
756 		return 0;
757 	}
758 
759 	ret = record_parse_callchain(arg, opts);
760 	if (!ret)
761 		callchain_debug(opts);
762 
763 	return ret;
764 }
765 
766 int record_callchain_opt(const struct option *opt,
767 			 const char *arg __maybe_unused,
768 			 int unset __maybe_unused)
769 {
770 	struct perf_record_opts *opts = opt->value;
771 
772 	if (opts->call_graph == CALLCHAIN_NONE)
773 		opts->call_graph = CALLCHAIN_FP;
774 
775 	callchain_debug(opts);
776 	return 0;
777 }
778 
779 static const char * const record_usage[] = {
780 	"perf record [<options>] [<command>]",
781 	"perf record [<options>] -- <command> [<options>]",
782 	NULL
783 };
784 
785 /*
786  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
787  * because we need to have access to it in perf_record__exit, that is called
788  * after cmd_record() exits, but since record_options need to be accessible to
789  * builtin-script, leave it here.
790  *
791  * At least we don't ouch it in all the other functions here directly.
792  *
793  * Just say no to tons of global variables, sigh.
794  */
795 static struct perf_record record = {
796 	.opts = {
797 		.mmap_pages	     = UINT_MAX,
798 		.user_freq	     = UINT_MAX,
799 		.user_interval	     = ULLONG_MAX,
800 		.freq		     = 4000,
801 		.target		     = {
802 			.uses_mmap   = true,
803 		},
804 	},
805 };
806 
807 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
808 
809 #ifdef HAVE_LIBUNWIND_SUPPORT
810 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
811 #else
812 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
813 #endif
814 
815 /*
816  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
817  * with it and switch to use the library functions in perf_evlist that came
818  * from builtin-record.c, i.e. use perf_record_opts,
819  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
820  * using pipes, etc.
821  */
822 const struct option record_options[] = {
823 	OPT_CALLBACK('e', "event", &record.evlist, "event",
824 		     "event selector. use 'perf list' to list available events",
825 		     parse_events_option),
826 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
827 		     "event filter", parse_filter),
828 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
829 		    "record events on existing process id"),
830 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
831 		    "record events on existing thread id"),
832 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
833 		    "collect data with this RT SCHED_FIFO priority"),
834 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
835 		    "collect data without buffering"),
836 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
837 		    "collect raw sample records from all opened counters"),
838 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
839 			    "system-wide collection from all CPUs"),
840 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
841 		    "list of cpus to monitor"),
842 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
843 	OPT_STRING('o', "output", &record.file.path, "file",
844 		    "output file name"),
845 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
846 		    "child tasks do not inherit counters"),
847 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
848 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
849 		     "number of mmap data pages",
850 		     perf_evlist__parse_mmap_pages),
851 	OPT_BOOLEAN(0, "group", &record.opts.group,
852 		    "put the counters into a counter group"),
853 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
854 			   NULL, "enables call-graph recording" ,
855 			   &record_callchain_opt),
856 	OPT_CALLBACK(0, "call-graph", &record.opts,
857 		     "mode[,dump_size]", record_callchain_help,
858 		     &record_parse_callchain_opt),
859 	OPT_INCR('v', "verbose", &verbose,
860 		    "be more verbose (show counter open errors, etc)"),
861 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
862 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
863 		    "per thread counts"),
864 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
865 		    "Sample addresses"),
866 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
867 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
868 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
869 		    "don't sample"),
870 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
871 		    "do not update the buildid cache"),
872 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
873 		    "do not collect buildids in perf.data"),
874 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
875 		     "monitor event in cgroup name only",
876 		     parse_cgroups),
877 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
878 		   "user to profile"),
879 
880 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
881 		     "branch any", "sample any taken branches",
882 		     parse_branch_stack),
883 
884 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
885 		     "branch filter mask", "branch stack filter modes",
886 		     parse_branch_stack),
887 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
888 		    "sample by weight (on special events only)"),
889 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
890 		    "sample transaction flags (special events only)"),
891 	OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu,
892 		    "force the use of per-cpu mmaps"),
893 	OPT_END()
894 };
895 
896 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
897 {
898 	int err = -ENOMEM;
899 	struct perf_evlist *evsel_list;
900 	struct perf_record *rec = &record;
901 	char errbuf[BUFSIZ];
902 
903 	evsel_list = perf_evlist__new();
904 	if (evsel_list == NULL)
905 		return -ENOMEM;
906 
907 	rec->evlist = evsel_list;
908 
909 	argc = parse_options(argc, argv, record_options, record_usage,
910 			    PARSE_OPT_STOP_AT_NON_OPTION);
911 	if (!argc && target__none(&rec->opts.target))
912 		usage_with_options(record_usage, record_options);
913 
914 	if (nr_cgroups && !rec->opts.target.system_wide) {
915 		ui__error("cgroup monitoring only available in"
916 			  " system-wide mode\n");
917 		usage_with_options(record_usage, record_options);
918 	}
919 
920 	symbol__init();
921 
922 	if (symbol_conf.kptr_restrict)
923 		pr_warning(
924 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
925 "check /proc/sys/kernel/kptr_restrict.\n\n"
926 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
927 "file is not found in the buildid cache or in the vmlinux path.\n\n"
928 "Samples in kernel modules won't be resolved at all.\n\n"
929 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
930 "even with a suitable vmlinux or kallsyms file.\n\n");
931 
932 	if (rec->no_buildid_cache || rec->no_buildid)
933 		disable_buildid_cache();
934 
935 	if (evsel_list->nr_entries == 0 &&
936 	    perf_evlist__add_default(evsel_list) < 0) {
937 		pr_err("Not enough memory for event selector list\n");
938 		goto out_symbol_exit;
939 	}
940 
941 	err = target__validate(&rec->opts.target);
942 	if (err) {
943 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
944 		ui__warning("%s", errbuf);
945 	}
946 
947 	err = target__parse_uid(&rec->opts.target);
948 	if (err) {
949 		int saved_errno = errno;
950 
951 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
952 		ui__error("%s", errbuf);
953 
954 		err = -saved_errno;
955 		goto out_symbol_exit;
956 	}
957 
958 	err = -ENOMEM;
959 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
960 		usage_with_options(record_usage, record_options);
961 
962 	if (perf_record_opts__config(&rec->opts)) {
963 		err = -EINVAL;
964 		goto out_free_fd;
965 	}
966 
967 	err = __cmd_record(&record, argc, argv);
968 
969 	perf_evlist__munmap(evsel_list);
970 	perf_evlist__close(evsel_list);
971 out_free_fd:
972 	perf_evlist__delete_maps(evsel_list);
973 out_symbol_exit:
974 	symbol__exit();
975 	return err;
976 }
977