xref: /openbmc/linux/tools/perf/builtin-record.c (revision 089a49b6)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 
28 #include <unistd.h>
29 #include <sched.h>
30 #include <sys/mman.h>
31 
32 #ifndef HAVE_ON_EXIT
33 #ifndef ATEXIT_MAX
34 #define ATEXIT_MAX 32
35 #endif
36 static int __on_exit_count = 0;
37 typedef void (*on_exit_func_t) (int, void *);
38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
39 static void *__on_exit_args[ATEXIT_MAX];
40 static int __exitcode = 0;
41 static void __handle_on_exit_funcs(void);
42 static int on_exit(on_exit_func_t function, void *arg);
43 #define exit(x) (exit)(__exitcode = (x))
44 
45 static int on_exit(on_exit_func_t function, void *arg)
46 {
47 	if (__on_exit_count == ATEXIT_MAX)
48 		return -ENOMEM;
49 	else if (__on_exit_count == 0)
50 		atexit(__handle_on_exit_funcs);
51 	__on_exit_funcs[__on_exit_count] = function;
52 	__on_exit_args[__on_exit_count++] = arg;
53 	return 0;
54 }
55 
56 static void __handle_on_exit_funcs(void)
57 {
58 	int i;
59 	for (i = 0; i < __on_exit_count; i++)
60 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
61 }
62 #endif
63 
64 struct perf_record {
65 	struct perf_tool	tool;
66 	struct perf_record_opts	opts;
67 	u64			bytes_written;
68 	const char		*output_name;
69 	struct perf_evlist	*evlist;
70 	struct perf_session	*session;
71 	const char		*progname;
72 	int			output;
73 	unsigned int		page_size;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_cache;
77 	long			samples;
78 	off_t			post_processing_offset;
79 };
80 
81 static void advance_output(struct perf_record *rec, size_t size)
82 {
83 	rec->bytes_written += size;
84 }
85 
86 static int write_output(struct perf_record *rec, void *buf, size_t size)
87 {
88 	while (size) {
89 		int ret = write(rec->output, buf, size);
90 
91 		if (ret < 0) {
92 			pr_err("failed to write\n");
93 			return -1;
94 		}
95 
96 		size -= ret;
97 		buf += ret;
98 
99 		rec->bytes_written += ret;
100 	}
101 
102 	return 0;
103 }
104 
105 static int process_synthesized_event(struct perf_tool *tool,
106 				     union perf_event *event,
107 				     struct perf_sample *sample __maybe_unused,
108 				     struct machine *machine __maybe_unused)
109 {
110 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
111 	if (write_output(rec, event, event->header.size) < 0)
112 		return -1;
113 
114 	return 0;
115 }
116 
117 static int perf_record__mmap_read(struct perf_record *rec,
118 				   struct perf_mmap *md)
119 {
120 	unsigned int head = perf_mmap__read_head(md);
121 	unsigned int old = md->prev;
122 	unsigned char *data = md->base + rec->page_size;
123 	unsigned long size;
124 	void *buf;
125 	int rc = 0;
126 
127 	if (old == head)
128 		return 0;
129 
130 	rec->samples++;
131 
132 	size = head - old;
133 
134 	if ((old & md->mask) + size != (head & md->mask)) {
135 		buf = &data[old & md->mask];
136 		size = md->mask + 1 - (old & md->mask);
137 		old += size;
138 
139 		if (write_output(rec, buf, size) < 0) {
140 			rc = -1;
141 			goto out;
142 		}
143 	}
144 
145 	buf = &data[old & md->mask];
146 	size = head - old;
147 	old += size;
148 
149 	if (write_output(rec, buf, size) < 0) {
150 		rc = -1;
151 		goto out;
152 	}
153 
154 	md->prev = old;
155 	perf_mmap__write_tail(md, old);
156 
157 out:
158 	return rc;
159 }
160 
161 static volatile int done = 0;
162 static volatile int signr = -1;
163 static volatile int child_finished = 0;
164 
165 static void sig_handler(int sig)
166 {
167 	if (sig == SIGCHLD)
168 		child_finished = 1;
169 
170 	done = 1;
171 	signr = sig;
172 }
173 
174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
175 {
176 	struct perf_record *rec = arg;
177 	int status;
178 
179 	if (rec->evlist->workload.pid > 0) {
180 		if (!child_finished)
181 			kill(rec->evlist->workload.pid, SIGTERM);
182 
183 		wait(&status);
184 		if (WIFSIGNALED(status))
185 			psignal(WTERMSIG(status), rec->progname);
186 	}
187 
188 	if (signr == -1 || signr == SIGUSR1)
189 		return;
190 
191 	signal(signr, SIG_DFL);
192 }
193 
194 static int perf_record__open(struct perf_record *rec)
195 {
196 	char msg[512];
197 	struct perf_evsel *pos;
198 	struct perf_evlist *evlist = rec->evlist;
199 	struct perf_session *session = rec->session;
200 	struct perf_record_opts *opts = &rec->opts;
201 	int rc = 0;
202 
203 	perf_evlist__config(evlist, opts);
204 
205 	list_for_each_entry(pos, &evlist->entries, node) {
206 try_again:
207 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
208 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
209 				if (verbose)
210 					ui__warning("%s\n", msg);
211 				goto try_again;
212 			}
213 
214 			rc = -errno;
215 			perf_evsel__open_strerror(pos, &opts->target,
216 						  errno, msg, sizeof(msg));
217 			ui__error("%s\n", msg);
218 			goto out;
219 		}
220 	}
221 
222 	if (perf_evlist__apply_filters(evlist)) {
223 		error("failed to set filter with %d (%s)\n", errno,
224 			strerror(errno));
225 		rc = -1;
226 		goto out;
227 	}
228 
229 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
230 		if (errno == EPERM) {
231 			pr_err("Permission error mapping pages.\n"
232 			       "Consider increasing "
233 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
234 			       "or try again with a smaller value of -m/--mmap_pages.\n"
235 			       "(current value: %d)\n", opts->mmap_pages);
236 			rc = -errno;
237 		} else if (!is_power_of_2(opts->mmap_pages) &&
238 			   (opts->mmap_pages != UINT_MAX)) {
239 			pr_err("--mmap_pages/-m value must be a power of two.");
240 			rc = -EINVAL;
241 		} else {
242 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
243 			rc = -errno;
244 		}
245 		goto out;
246 	}
247 
248 	session->evlist = evlist;
249 	perf_session__set_id_hdr_size(session);
250 out:
251 	return rc;
252 }
253 
254 static int process_buildids(struct perf_record *rec)
255 {
256 	u64 size = lseek(rec->output, 0, SEEK_CUR);
257 
258 	if (size == 0)
259 		return 0;
260 
261 	rec->session->fd = rec->output;
262 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
263 					      size - rec->post_processing_offset,
264 					      size, &build_id__mark_dso_hit_ops);
265 }
266 
267 static void perf_record__exit(int status, void *arg)
268 {
269 	struct perf_record *rec = arg;
270 
271 	if (status != 0)
272 		return;
273 
274 	if (!rec->opts.pipe_output) {
275 		rec->session->header.data_size += rec->bytes_written;
276 
277 		if (!rec->no_buildid)
278 			process_buildids(rec);
279 		perf_session__write_header(rec->session, rec->evlist,
280 					   rec->output, true);
281 		perf_session__delete(rec->session);
282 		perf_evlist__delete(rec->evlist);
283 		symbol__exit();
284 	}
285 }
286 
287 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
288 {
289 	int err;
290 	struct perf_tool *tool = data;
291 	/*
292 	 *As for guest kernel when processing subcommand record&report,
293 	 *we arrange module mmap prior to guest kernel mmap and trigger
294 	 *a preload dso because default guest module symbols are loaded
295 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
296 	 *method is used to avoid symbol missing when the first addr is
297 	 *in module instead of in guest kernel.
298 	 */
299 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
300 					     machine);
301 	if (err < 0)
302 		pr_err("Couldn't record guest kernel [%d]'s reference"
303 		       " relocation symbol.\n", machine->pid);
304 
305 	/*
306 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
307 	 * have no _text sometimes.
308 	 */
309 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
310 						 machine, "_text");
311 	if (err < 0)
312 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
313 							 machine, "_stext");
314 	if (err < 0)
315 		pr_err("Couldn't record guest kernel [%d]'s reference"
316 		       " relocation symbol.\n", machine->pid);
317 }
318 
319 static struct perf_event_header finished_round_event = {
320 	.size = sizeof(struct perf_event_header),
321 	.type = PERF_RECORD_FINISHED_ROUND,
322 };
323 
324 static int perf_record__mmap_read_all(struct perf_record *rec)
325 {
326 	int i;
327 	int rc = 0;
328 
329 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
330 		if (rec->evlist->mmap[i].base) {
331 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
332 				rc = -1;
333 				goto out;
334 			}
335 		}
336 	}
337 
338 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
339 		rc = write_output(rec, &finished_round_event,
340 				  sizeof(finished_round_event));
341 
342 out:
343 	return rc;
344 }
345 
346 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
347 {
348 	struct stat st;
349 	int flags;
350 	int err, output, feat;
351 	unsigned long waking = 0;
352 	const bool forks = argc > 0;
353 	struct machine *machine;
354 	struct perf_tool *tool = &rec->tool;
355 	struct perf_record_opts *opts = &rec->opts;
356 	struct perf_evlist *evsel_list = rec->evlist;
357 	const char *output_name = rec->output_name;
358 	struct perf_session *session;
359 	bool disabled = false;
360 
361 	rec->progname = argv[0];
362 
363 	rec->page_size = sysconf(_SC_PAGE_SIZE);
364 
365 	on_exit(perf_record__sig_exit, rec);
366 	signal(SIGCHLD, sig_handler);
367 	signal(SIGINT, sig_handler);
368 	signal(SIGUSR1, sig_handler);
369 	signal(SIGTERM, sig_handler);
370 
371 	if (!output_name) {
372 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
373 			opts->pipe_output = true;
374 		else
375 			rec->output_name = output_name = "perf.data";
376 	}
377 	if (output_name) {
378 		if (!strcmp(output_name, "-"))
379 			opts->pipe_output = true;
380 		else if (!stat(output_name, &st) && st.st_size) {
381 			char oldname[PATH_MAX];
382 			snprintf(oldname, sizeof(oldname), "%s.old",
383 				 output_name);
384 			unlink(oldname);
385 			rename(output_name, oldname);
386 		}
387 	}
388 
389 	flags = O_CREAT|O_RDWR|O_TRUNC;
390 
391 	if (opts->pipe_output)
392 		output = STDOUT_FILENO;
393 	else
394 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
395 	if (output < 0) {
396 		perror("failed to create output file");
397 		return -1;
398 	}
399 
400 	rec->output = output;
401 
402 	session = perf_session__new(output_name, O_WRONLY,
403 				    true, false, NULL);
404 	if (session == NULL) {
405 		pr_err("Not enough memory for reading perf file header\n");
406 		return -1;
407 	}
408 
409 	rec->session = session;
410 
411 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
412 		perf_header__set_feat(&session->header, feat);
413 
414 	if (rec->no_buildid)
415 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
416 
417 	if (!have_tracepoints(&evsel_list->entries))
418 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
419 
420 	if (!rec->opts.branch_stack)
421 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
422 
423 	if (forks) {
424 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
425 						    argv, opts->pipe_output,
426 						    true);
427 		if (err < 0) {
428 			pr_err("Couldn't run the workload!\n");
429 			goto out_delete_session;
430 		}
431 	}
432 
433 	if (perf_record__open(rec) != 0) {
434 		err = -1;
435 		goto out_delete_session;
436 	}
437 
438 	if (!evsel_list->nr_groups)
439 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
440 
441 	/*
442 	 * perf_session__delete(session) will be called at perf_record__exit()
443 	 */
444 	on_exit(perf_record__exit, rec);
445 
446 	if (opts->pipe_output) {
447 		err = perf_header__write_pipe(output);
448 		if (err < 0)
449 			goto out_delete_session;
450 	} else {
451 		err = perf_session__write_header(session, evsel_list,
452 						 output, false);
453 		if (err < 0)
454 			goto out_delete_session;
455 	}
456 
457 	if (!rec->no_buildid
458 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
459 		pr_err("Couldn't generate buildids. "
460 		       "Use --no-buildid to profile anyway.\n");
461 		err = -1;
462 		goto out_delete_session;
463 	}
464 
465 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
466 
467 	machine = &session->machines.host;
468 
469 	if (opts->pipe_output) {
470 		err = perf_event__synthesize_attrs(tool, session,
471 						   process_synthesized_event);
472 		if (err < 0) {
473 			pr_err("Couldn't synthesize attrs.\n");
474 			goto out_delete_session;
475 		}
476 
477 		if (have_tracepoints(&evsel_list->entries)) {
478 			/*
479 			 * FIXME err <= 0 here actually means that
480 			 * there were no tracepoints so its not really
481 			 * an error, just that we don't need to
482 			 * synthesize anything.  We really have to
483 			 * return this more properly and also
484 			 * propagate errors that now are calling die()
485 			 */
486 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
487 								  process_synthesized_event);
488 			if (err <= 0) {
489 				pr_err("Couldn't record tracing data.\n");
490 				goto out_delete_session;
491 			}
492 			advance_output(rec, err);
493 		}
494 	}
495 
496 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
497 						 machine, "_text");
498 	if (err < 0)
499 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
500 							 machine, "_stext");
501 	if (err < 0)
502 		pr_err("Couldn't record kernel reference relocation symbol\n"
503 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
504 		       "Check /proc/kallsyms permission or run as root.\n");
505 
506 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
507 					     machine);
508 	if (err < 0)
509 		pr_err("Couldn't record kernel module information.\n"
510 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
511 		       "Check /proc/modules permission or run as root.\n");
512 
513 	if (perf_guest) {
514 		machines__process_guests(&session->machines,
515 					 perf_event__synthesize_guest_os, tool);
516 	}
517 
518 	if (perf_target__has_task(&opts->target))
519 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
520 						  process_synthesized_event,
521 						  machine);
522 	else if (perf_target__has_cpu(&opts->target))
523 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
524 					       machine);
525 	else /* command specified */
526 		err = 0;
527 
528 	if (err != 0)
529 		goto out_delete_session;
530 
531 	if (rec->realtime_prio) {
532 		struct sched_param param;
533 
534 		param.sched_priority = rec->realtime_prio;
535 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
536 			pr_err("Could not set realtime priority.\n");
537 			err = -1;
538 			goto out_delete_session;
539 		}
540 	}
541 
542 	/*
543 	 * When perf is starting the traced process, all the events
544 	 * (apart from group members) have enable_on_exec=1 set,
545 	 * so don't spoil it by prematurely enabling them.
546 	 */
547 	if (!perf_target__none(&opts->target))
548 		perf_evlist__enable(evsel_list);
549 
550 	/*
551 	 * Let the child rip
552 	 */
553 	if (forks)
554 		perf_evlist__start_workload(evsel_list);
555 
556 	for (;;) {
557 		int hits = rec->samples;
558 
559 		if (perf_record__mmap_read_all(rec) < 0) {
560 			err = -1;
561 			goto out_delete_session;
562 		}
563 
564 		if (hits == rec->samples) {
565 			if (done)
566 				break;
567 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
568 			waking++;
569 		}
570 
571 		/*
572 		 * When perf is starting the traced process, at the end events
573 		 * die with the process and we wait for that. Thus no need to
574 		 * disable events in this case.
575 		 */
576 		if (done && !disabled && !perf_target__none(&opts->target)) {
577 			perf_evlist__disable(evsel_list);
578 			disabled = true;
579 		}
580 	}
581 
582 	if (quiet || signr == SIGUSR1)
583 		return 0;
584 
585 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
586 
587 	/*
588 	 * Approximate RIP event size: 24 bytes.
589 	 */
590 	fprintf(stderr,
591 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
592 		(double)rec->bytes_written / 1024.0 / 1024.0,
593 		output_name,
594 		rec->bytes_written / 24);
595 
596 	return 0;
597 
598 out_delete_session:
599 	perf_session__delete(session);
600 	return err;
601 }
602 
603 #define BRANCH_OPT(n, m) \
604 	{ .name = n, .mode = (m) }
605 
606 #define BRANCH_END { .name = NULL }
607 
608 struct branch_mode {
609 	const char *name;
610 	int mode;
611 };
612 
613 static const struct branch_mode branch_modes[] = {
614 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
615 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
616 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
617 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
618 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
619 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
620 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
621 	BRANCH_END
622 };
623 
624 static int
625 parse_branch_stack(const struct option *opt, const char *str, int unset)
626 {
627 #define ONLY_PLM \
628 	(PERF_SAMPLE_BRANCH_USER	|\
629 	 PERF_SAMPLE_BRANCH_KERNEL	|\
630 	 PERF_SAMPLE_BRANCH_HV)
631 
632 	uint64_t *mode = (uint64_t *)opt->value;
633 	const struct branch_mode *br;
634 	char *s, *os = NULL, *p;
635 	int ret = -1;
636 
637 	if (unset)
638 		return 0;
639 
640 	/*
641 	 * cannot set it twice, -b + --branch-filter for instance
642 	 */
643 	if (*mode)
644 		return -1;
645 
646 	/* str may be NULL in case no arg is passed to -b */
647 	if (str) {
648 		/* because str is read-only */
649 		s = os = strdup(str);
650 		if (!s)
651 			return -1;
652 
653 		for (;;) {
654 			p = strchr(s, ',');
655 			if (p)
656 				*p = '\0';
657 
658 			for (br = branch_modes; br->name; br++) {
659 				if (!strcasecmp(s, br->name))
660 					break;
661 			}
662 			if (!br->name) {
663 				ui__warning("unknown branch filter %s,"
664 					    " check man page\n", s);
665 				goto error;
666 			}
667 
668 			*mode |= br->mode;
669 
670 			if (!p)
671 				break;
672 
673 			s = p + 1;
674 		}
675 	}
676 	ret = 0;
677 
678 	/* default to any branch */
679 	if ((*mode & ~ONLY_PLM) == 0) {
680 		*mode = PERF_SAMPLE_BRANCH_ANY;
681 	}
682 error:
683 	free(os);
684 	return ret;
685 }
686 
687 #ifdef LIBUNWIND_SUPPORT
688 static int get_stack_size(char *str, unsigned long *_size)
689 {
690 	char *endptr;
691 	unsigned long size;
692 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
693 
694 	size = strtoul(str, &endptr, 0);
695 
696 	do {
697 		if (*endptr)
698 			break;
699 
700 		size = round_up(size, sizeof(u64));
701 		if (!size || size > max_size)
702 			break;
703 
704 		*_size = size;
705 		return 0;
706 
707 	} while (0);
708 
709 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
710 	       max_size, str);
711 	return -1;
712 }
713 #endif /* LIBUNWIND_SUPPORT */
714 
715 int record_parse_callchain_opt(const struct option *opt,
716 			       const char *arg, int unset)
717 {
718 	struct perf_record_opts *opts = opt->value;
719 	char *tok, *name, *saveptr = NULL;
720 	char *buf;
721 	int ret = -1;
722 
723 	/* --no-call-graph */
724 	if (unset)
725 		return 0;
726 
727 	/* We specified default option if none is provided. */
728 	BUG_ON(!arg);
729 
730 	/* We need buffer that we know we can write to. */
731 	buf = malloc(strlen(arg) + 1);
732 	if (!buf)
733 		return -ENOMEM;
734 
735 	strcpy(buf, arg);
736 
737 	tok = strtok_r((char *)buf, ",", &saveptr);
738 	name = tok ? : (char *)buf;
739 
740 	do {
741 		/* Framepointer style */
742 		if (!strncmp(name, "fp", sizeof("fp"))) {
743 			if (!strtok_r(NULL, ",", &saveptr)) {
744 				opts->call_graph = CALLCHAIN_FP;
745 				ret = 0;
746 			} else
747 				pr_err("callchain: No more arguments "
748 				       "needed for -g fp\n");
749 			break;
750 
751 #ifdef LIBUNWIND_SUPPORT
752 		/* Dwarf style */
753 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
754 			const unsigned long default_stack_dump_size = 8192;
755 
756 			ret = 0;
757 			opts->call_graph = CALLCHAIN_DWARF;
758 			opts->stack_dump_size = default_stack_dump_size;
759 
760 			tok = strtok_r(NULL, ",", &saveptr);
761 			if (tok) {
762 				unsigned long size = 0;
763 
764 				ret = get_stack_size(tok, &size);
765 				opts->stack_dump_size = size;
766 			}
767 
768 			if (!ret)
769 				pr_debug("callchain: stack dump size %d\n",
770 					 opts->stack_dump_size);
771 #endif /* LIBUNWIND_SUPPORT */
772 		} else {
773 			pr_err("callchain: Unknown -g option "
774 			       "value: %s\n", arg);
775 			break;
776 		}
777 
778 	} while (0);
779 
780 	free(buf);
781 
782 	if (!ret)
783 		pr_debug("callchain: type %d\n", opts->call_graph);
784 
785 	return ret;
786 }
787 
788 static const char * const record_usage[] = {
789 	"perf record [<options>] [<command>]",
790 	"perf record [<options>] -- <command> [<options>]",
791 	NULL
792 };
793 
794 /*
795  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
796  * because we need to have access to it in perf_record__exit, that is called
797  * after cmd_record() exits, but since record_options need to be accessible to
798  * builtin-script, leave it here.
799  *
800  * At least we don't ouch it in all the other functions here directly.
801  *
802  * Just say no to tons of global variables, sigh.
803  */
804 static struct perf_record record = {
805 	.opts = {
806 		.mmap_pages	     = UINT_MAX,
807 		.user_freq	     = UINT_MAX,
808 		.user_interval	     = ULLONG_MAX,
809 		.freq		     = 4000,
810 		.target		     = {
811 			.uses_mmap   = true,
812 		},
813 	},
814 };
815 
816 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
817 
818 #ifdef LIBUNWIND_SUPPORT
819 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
820 #else
821 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
822 #endif
823 
824 /*
825  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
826  * with it and switch to use the library functions in perf_evlist that came
827  * from builtin-record.c, i.e. use perf_record_opts,
828  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
829  * using pipes, etc.
830  */
831 const struct option record_options[] = {
832 	OPT_CALLBACK('e', "event", &record.evlist, "event",
833 		     "event selector. use 'perf list' to list available events",
834 		     parse_events_option),
835 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
836 		     "event filter", parse_filter),
837 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
838 		    "record events on existing process id"),
839 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
840 		    "record events on existing thread id"),
841 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
842 		    "collect data with this RT SCHED_FIFO priority"),
843 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
844 		    "collect data without buffering"),
845 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
846 		    "collect raw sample records from all opened counters"),
847 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
848 			    "system-wide collection from all CPUs"),
849 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
850 		    "list of cpus to monitor"),
851 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
852 	OPT_STRING('o', "output", &record.output_name, "file",
853 		    "output file name"),
854 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
855 		    "child tasks do not inherit counters"),
856 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
857 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
858 		     "number of mmap data pages"),
859 	OPT_BOOLEAN(0, "group", &record.opts.group,
860 		    "put the counters into a counter group"),
861 	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
862 			     "mode[,dump_size]", record_callchain_help,
863 			     &record_parse_callchain_opt, "fp"),
864 	OPT_INCR('v', "verbose", &verbose,
865 		    "be more verbose (show counter open errors, etc)"),
866 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
867 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
868 		    "per thread counts"),
869 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
870 		    "Sample addresses"),
871 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
872 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
873 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
874 		    "don't sample"),
875 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
876 		    "do not update the buildid cache"),
877 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
878 		    "do not collect buildids in perf.data"),
879 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
880 		     "monitor event in cgroup name only",
881 		     parse_cgroups),
882 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
883 		   "user to profile"),
884 
885 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
886 		     "branch any", "sample any taken branches",
887 		     parse_branch_stack),
888 
889 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
890 		     "branch filter mask", "branch stack filter modes",
891 		     parse_branch_stack),
892 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
893 		    "sample by weight (on special events only)"),
894 	OPT_END()
895 };
896 
897 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
898 {
899 	int err = -ENOMEM;
900 	struct perf_evlist *evsel_list;
901 	struct perf_record *rec = &record;
902 	char errbuf[BUFSIZ];
903 
904 	evsel_list = perf_evlist__new();
905 	if (evsel_list == NULL)
906 		return -ENOMEM;
907 
908 	rec->evlist = evsel_list;
909 
910 	argc = parse_options(argc, argv, record_options, record_usage,
911 			    PARSE_OPT_STOP_AT_NON_OPTION);
912 	if (!argc && perf_target__none(&rec->opts.target))
913 		usage_with_options(record_usage, record_options);
914 
915 	if (nr_cgroups && !rec->opts.target.system_wide) {
916 		ui__error("cgroup monitoring only available in"
917 			  " system-wide mode\n");
918 		usage_with_options(record_usage, record_options);
919 	}
920 
921 	symbol__init();
922 
923 	if (symbol_conf.kptr_restrict)
924 		pr_warning(
925 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
926 "check /proc/sys/kernel/kptr_restrict.\n\n"
927 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
928 "file is not found in the buildid cache or in the vmlinux path.\n\n"
929 "Samples in kernel modules won't be resolved at all.\n\n"
930 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
931 "even with a suitable vmlinux or kallsyms file.\n\n");
932 
933 	if (rec->no_buildid_cache || rec->no_buildid)
934 		disable_buildid_cache();
935 
936 	if (evsel_list->nr_entries == 0 &&
937 	    perf_evlist__add_default(evsel_list) < 0) {
938 		pr_err("Not enough memory for event selector list\n");
939 		goto out_symbol_exit;
940 	}
941 
942 	err = perf_target__validate(&rec->opts.target);
943 	if (err) {
944 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
945 		ui__warning("%s", errbuf);
946 	}
947 
948 	err = perf_target__parse_uid(&rec->opts.target);
949 	if (err) {
950 		int saved_errno = errno;
951 
952 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
953 		ui__error("%s", errbuf);
954 
955 		err = -saved_errno;
956 		goto out_symbol_exit;
957 	}
958 
959 	err = -ENOMEM;
960 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
961 		usage_with_options(record_usage, record_options);
962 
963 	if (rec->opts.user_interval != ULLONG_MAX)
964 		rec->opts.default_interval = rec->opts.user_interval;
965 	if (rec->opts.user_freq != UINT_MAX)
966 		rec->opts.freq = rec->opts.user_freq;
967 
968 	/*
969 	 * User specified count overrides default frequency.
970 	 */
971 	if (rec->opts.default_interval)
972 		rec->opts.freq = 0;
973 	else if (rec->opts.freq) {
974 		rec->opts.default_interval = rec->opts.freq;
975 	} else {
976 		ui__error("frequency and count are zero, aborting\n");
977 		err = -EINVAL;
978 		goto out_free_fd;
979 	}
980 
981 	err = __cmd_record(&record, argc, argv);
982 
983 	perf_evlist__munmap(evsel_list);
984 	perf_evlist__close(evsel_list);
985 out_free_fd:
986 	perf_evlist__delete_maps(evsel_list);
987 out_symbol_exit:
988 	symbol__exit();
989 	return err;
990 }
991