xref: /openbmc/linux/tools/perf/builtin-record.c (revision 5104d265)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 
28 #include <unistd.h>
29 #include <sched.h>
30 #include <sys/mman.h>
31 
32 #ifndef HAVE_ON_EXIT
33 #ifndef ATEXIT_MAX
34 #define ATEXIT_MAX 32
35 #endif
36 static int __on_exit_count = 0;
37 typedef void (*on_exit_func_t) (int, void *);
38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
39 static void *__on_exit_args[ATEXIT_MAX];
40 static int __exitcode = 0;
41 static void __handle_on_exit_funcs(void);
42 static int on_exit(on_exit_func_t function, void *arg);
43 #define exit(x) (exit)(__exitcode = (x))
44 
45 static int on_exit(on_exit_func_t function, void *arg)
46 {
47 	if (__on_exit_count == ATEXIT_MAX)
48 		return -ENOMEM;
49 	else if (__on_exit_count == 0)
50 		atexit(__handle_on_exit_funcs);
51 	__on_exit_funcs[__on_exit_count] = function;
52 	__on_exit_args[__on_exit_count++] = arg;
53 	return 0;
54 }
55 
56 static void __handle_on_exit_funcs(void)
57 {
58 	int i;
59 	for (i = 0; i < __on_exit_count; i++)
60 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
61 }
62 #endif
63 
64 struct perf_record {
65 	struct perf_tool	tool;
66 	struct perf_record_opts	opts;
67 	u64			bytes_written;
68 	const char		*output_name;
69 	struct perf_evlist	*evlist;
70 	struct perf_session	*session;
71 	const char		*progname;
72 	int			output;
73 	unsigned int		page_size;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_cache;
77 	long			samples;
78 	off_t			post_processing_offset;
79 };
80 
81 static void advance_output(struct perf_record *rec, size_t size)
82 {
83 	rec->bytes_written += size;
84 }
85 
86 static int write_output(struct perf_record *rec, void *buf, size_t size)
87 {
88 	while (size) {
89 		int ret = write(rec->output, buf, size);
90 
91 		if (ret < 0) {
92 			pr_err("failed to write\n");
93 			return -1;
94 		}
95 
96 		size -= ret;
97 		buf += ret;
98 
99 		rec->bytes_written += ret;
100 	}
101 
102 	return 0;
103 }
104 
105 static int process_synthesized_event(struct perf_tool *tool,
106 				     union perf_event *event,
107 				     struct perf_sample *sample __maybe_unused,
108 				     struct machine *machine __maybe_unused)
109 {
110 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
111 	if (write_output(rec, event, event->header.size) < 0)
112 		return -1;
113 
114 	return 0;
115 }
116 
117 static int perf_record__mmap_read(struct perf_record *rec,
118 				   struct perf_mmap *md)
119 {
120 	unsigned int head = perf_mmap__read_head(md);
121 	unsigned int old = md->prev;
122 	unsigned char *data = md->base + rec->page_size;
123 	unsigned long size;
124 	void *buf;
125 	int rc = 0;
126 
127 	if (old == head)
128 		return 0;
129 
130 	rec->samples++;
131 
132 	size = head - old;
133 
134 	if ((old & md->mask) + size != (head & md->mask)) {
135 		buf = &data[old & md->mask];
136 		size = md->mask + 1 - (old & md->mask);
137 		old += size;
138 
139 		if (write_output(rec, buf, size) < 0) {
140 			rc = -1;
141 			goto out;
142 		}
143 	}
144 
145 	buf = &data[old & md->mask];
146 	size = head - old;
147 	old += size;
148 
149 	if (write_output(rec, buf, size) < 0) {
150 		rc = -1;
151 		goto out;
152 	}
153 
154 	md->prev = old;
155 	perf_mmap__write_tail(md, old);
156 
157 out:
158 	return rc;
159 }
160 
161 static volatile int done = 0;
162 static volatile int signr = -1;
163 static volatile int child_finished = 0;
164 
165 static void sig_handler(int sig)
166 {
167 	if (sig == SIGCHLD)
168 		child_finished = 1;
169 
170 	done = 1;
171 	signr = sig;
172 }
173 
174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
175 {
176 	struct perf_record *rec = arg;
177 	int status;
178 
179 	if (rec->evlist->workload.pid > 0) {
180 		if (!child_finished)
181 			kill(rec->evlist->workload.pid, SIGTERM);
182 
183 		wait(&status);
184 		if (WIFSIGNALED(status))
185 			psignal(WTERMSIG(status), rec->progname);
186 	}
187 
188 	if (signr == -1 || signr == SIGUSR1)
189 		return;
190 
191 	signal(signr, SIG_DFL);
192 }
193 
194 static int perf_record__open(struct perf_record *rec)
195 {
196 	char msg[512];
197 	struct perf_evsel *pos;
198 	struct perf_evlist *evlist = rec->evlist;
199 	struct perf_session *session = rec->session;
200 	struct perf_record_opts *opts = &rec->opts;
201 	int rc = 0;
202 
203 	perf_evlist__config(evlist, opts);
204 
205 	list_for_each_entry(pos, &evlist->entries, node) {
206 try_again:
207 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
208 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
209 				if (verbose)
210 					ui__warning("%s\n", msg);
211 				goto try_again;
212 			}
213 
214 			rc = -errno;
215 			perf_evsel__open_strerror(pos, &opts->target,
216 						  errno, msg, sizeof(msg));
217 			ui__error("%s\n", msg);
218 			goto out;
219 		}
220 	}
221 
222 	if (perf_evlist__apply_filters(evlist)) {
223 		error("failed to set filter with %d (%s)\n", errno,
224 			strerror(errno));
225 		rc = -1;
226 		goto out;
227 	}
228 
229 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
230 		if (errno == EPERM) {
231 			pr_err("Permission error mapping pages.\n"
232 			       "Consider increasing "
233 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
234 			       "or try again with a smaller value of -m/--mmap_pages.\n"
235 			       "(current value: %d)\n", opts->mmap_pages);
236 			rc = -errno;
237 		} else if (!is_power_of_2(opts->mmap_pages) &&
238 			   (opts->mmap_pages != UINT_MAX)) {
239 			pr_err("--mmap_pages/-m value must be a power of two.");
240 			rc = -EINVAL;
241 		} else {
242 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
243 			rc = -errno;
244 		}
245 		goto out;
246 	}
247 
248 	session->evlist = evlist;
249 	perf_session__set_id_hdr_size(session);
250 out:
251 	return rc;
252 }
253 
254 static int process_buildids(struct perf_record *rec)
255 {
256 	u64 size = lseek(rec->output, 0, SEEK_CUR);
257 
258 	if (size == 0)
259 		return 0;
260 
261 	rec->session->fd = rec->output;
262 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
263 					      size - rec->post_processing_offset,
264 					      size, &build_id__mark_dso_hit_ops);
265 }
266 
267 static void perf_record__exit(int status, void *arg)
268 {
269 	struct perf_record *rec = arg;
270 
271 	if (status != 0)
272 		return;
273 
274 	if (!rec->opts.pipe_output) {
275 		rec->session->header.data_size += rec->bytes_written;
276 
277 		if (!rec->no_buildid)
278 			process_buildids(rec);
279 		perf_session__write_header(rec->session, rec->evlist,
280 					   rec->output, true);
281 		perf_session__delete(rec->session);
282 		perf_evlist__delete(rec->evlist);
283 		symbol__exit();
284 	}
285 }
286 
287 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
288 {
289 	int err;
290 	struct perf_tool *tool = data;
291 	/*
292 	 *As for guest kernel when processing subcommand record&report,
293 	 *we arrange module mmap prior to guest kernel mmap and trigger
294 	 *a preload dso because default guest module symbols are loaded
295 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
296 	 *method is used to avoid symbol missing when the first addr is
297 	 *in module instead of in guest kernel.
298 	 */
299 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
300 					     machine);
301 	if (err < 0)
302 		pr_err("Couldn't record guest kernel [%d]'s reference"
303 		       " relocation symbol.\n", machine->pid);
304 
305 	/*
306 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
307 	 * have no _text sometimes.
308 	 */
309 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
310 						 machine, "_text");
311 	if (err < 0)
312 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
313 							 machine, "_stext");
314 	if (err < 0)
315 		pr_err("Couldn't record guest kernel [%d]'s reference"
316 		       " relocation symbol.\n", machine->pid);
317 }
318 
319 static struct perf_event_header finished_round_event = {
320 	.size = sizeof(struct perf_event_header),
321 	.type = PERF_RECORD_FINISHED_ROUND,
322 };
323 
324 static int perf_record__mmap_read_all(struct perf_record *rec)
325 {
326 	int i;
327 	int rc = 0;
328 
329 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
330 		if (rec->evlist->mmap[i].base) {
331 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
332 				rc = -1;
333 				goto out;
334 			}
335 		}
336 	}
337 
338 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
339 		rc = write_output(rec, &finished_round_event,
340 				  sizeof(finished_round_event));
341 
342 out:
343 	return rc;
344 }
345 
346 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
347 {
348 	struct stat st;
349 	int flags;
350 	int err, output, feat;
351 	unsigned long waking = 0;
352 	const bool forks = argc > 0;
353 	struct machine *machine;
354 	struct perf_tool *tool = &rec->tool;
355 	struct perf_record_opts *opts = &rec->opts;
356 	struct perf_evlist *evsel_list = rec->evlist;
357 	const char *output_name = rec->output_name;
358 	struct perf_session *session;
359 	bool disabled = false;
360 
361 	rec->progname = argv[0];
362 
363 	rec->page_size = sysconf(_SC_PAGE_SIZE);
364 
365 	on_exit(perf_record__sig_exit, rec);
366 	signal(SIGCHLD, sig_handler);
367 	signal(SIGINT, sig_handler);
368 	signal(SIGUSR1, sig_handler);
369 	signal(SIGTERM, sig_handler);
370 
371 	if (!output_name) {
372 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
373 			opts->pipe_output = true;
374 		else
375 			rec->output_name = output_name = "perf.data";
376 	}
377 	if (output_name) {
378 		if (!strcmp(output_name, "-"))
379 			opts->pipe_output = true;
380 		else if (!stat(output_name, &st) && st.st_size) {
381 			char oldname[PATH_MAX];
382 			snprintf(oldname, sizeof(oldname), "%s.old",
383 				 output_name);
384 			unlink(oldname);
385 			rename(output_name, oldname);
386 		}
387 	}
388 
389 	flags = O_CREAT|O_RDWR|O_TRUNC;
390 
391 	if (opts->pipe_output)
392 		output = STDOUT_FILENO;
393 	else
394 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
395 	if (output < 0) {
396 		perror("failed to create output file");
397 		return -1;
398 	}
399 
400 	rec->output = output;
401 
402 	session = perf_session__new(output_name, O_WRONLY,
403 				    true, false, NULL);
404 	if (session == NULL) {
405 		pr_err("Not enough memory for reading perf file header\n");
406 		return -1;
407 	}
408 
409 	rec->session = session;
410 
411 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
412 		perf_header__set_feat(&session->header, feat);
413 
414 	if (rec->no_buildid)
415 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
416 
417 	if (!have_tracepoints(&evsel_list->entries))
418 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
419 
420 	if (!rec->opts.branch_stack)
421 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
422 
423 	if (forks) {
424 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
425 						    argv, opts->pipe_output,
426 						    true);
427 		if (err < 0) {
428 			pr_err("Couldn't run the workload!\n");
429 			goto out_delete_session;
430 		}
431 	}
432 
433 	if (perf_record__open(rec) != 0) {
434 		err = -1;
435 		goto out_delete_session;
436 	}
437 
438 	if (!evsel_list->nr_groups)
439 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
440 
441 	/*
442 	 * perf_session__delete(session) will be called at perf_record__exit()
443 	 */
444 	on_exit(perf_record__exit, rec);
445 
446 	if (opts->pipe_output) {
447 		err = perf_header__write_pipe(output);
448 		if (err < 0)
449 			goto out_delete_session;
450 	} else {
451 		err = perf_session__write_header(session, evsel_list,
452 						 output, false);
453 		if (err < 0)
454 			goto out_delete_session;
455 	}
456 
457 	if (!rec->no_buildid
458 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
459 		pr_err("Couldn't generate buildids. "
460 		       "Use --no-buildid to profile anyway.\n");
461 		err = -1;
462 		goto out_delete_session;
463 	}
464 
465 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
466 
467 	machine = &session->machines.host;
468 
469 	if (opts->pipe_output) {
470 		err = perf_event__synthesize_attrs(tool, session,
471 						   process_synthesized_event);
472 		if (err < 0) {
473 			pr_err("Couldn't synthesize attrs.\n");
474 			goto out_delete_session;
475 		}
476 
477 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
478 							 machine);
479 		if (err < 0) {
480 			pr_err("Couldn't synthesize event_types.\n");
481 			goto out_delete_session;
482 		}
483 
484 		if (have_tracepoints(&evsel_list->entries)) {
485 			/*
486 			 * FIXME err <= 0 here actually means that
487 			 * there were no tracepoints so its not really
488 			 * an error, just that we don't need to
489 			 * synthesize anything.  We really have to
490 			 * return this more properly and also
491 			 * propagate errors that now are calling die()
492 			 */
493 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
494 								  process_synthesized_event);
495 			if (err <= 0) {
496 				pr_err("Couldn't record tracing data.\n");
497 				goto out_delete_session;
498 			}
499 			advance_output(rec, err);
500 		}
501 	}
502 
503 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
504 						 machine, "_text");
505 	if (err < 0)
506 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
507 							 machine, "_stext");
508 	if (err < 0)
509 		pr_err("Couldn't record kernel reference relocation symbol\n"
510 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
511 		       "Check /proc/kallsyms permission or run as root.\n");
512 
513 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
514 					     machine);
515 	if (err < 0)
516 		pr_err("Couldn't record kernel module information.\n"
517 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
518 		       "Check /proc/modules permission or run as root.\n");
519 
520 	if (perf_guest) {
521 		machines__process_guests(&session->machines,
522 					 perf_event__synthesize_guest_os, tool);
523 	}
524 
525 	if (perf_target__has_task(&opts->target))
526 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
527 						  process_synthesized_event,
528 						  machine);
529 	else if (perf_target__has_cpu(&opts->target))
530 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
531 					       machine);
532 	else /* command specified */
533 		err = 0;
534 
535 	if (err != 0)
536 		goto out_delete_session;
537 
538 	if (rec->realtime_prio) {
539 		struct sched_param param;
540 
541 		param.sched_priority = rec->realtime_prio;
542 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
543 			pr_err("Could not set realtime priority.\n");
544 			err = -1;
545 			goto out_delete_session;
546 		}
547 	}
548 
549 	/*
550 	 * When perf is starting the traced process, all the events
551 	 * (apart from group members) have enable_on_exec=1 set,
552 	 * so don't spoil it by prematurely enabling them.
553 	 */
554 	if (!perf_target__none(&opts->target))
555 		perf_evlist__enable(evsel_list);
556 
557 	/*
558 	 * Let the child rip
559 	 */
560 	if (forks)
561 		perf_evlist__start_workload(evsel_list);
562 
563 	for (;;) {
564 		int hits = rec->samples;
565 
566 		if (perf_record__mmap_read_all(rec) < 0) {
567 			err = -1;
568 			goto out_delete_session;
569 		}
570 
571 		if (hits == rec->samples) {
572 			if (done)
573 				break;
574 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
575 			waking++;
576 		}
577 
578 		/*
579 		 * When perf is starting the traced process, at the end events
580 		 * die with the process and we wait for that. Thus no need to
581 		 * disable events in this case.
582 		 */
583 		if (done && !disabled && !perf_target__none(&opts->target)) {
584 			perf_evlist__disable(evsel_list);
585 			disabled = true;
586 		}
587 	}
588 
589 	if (quiet || signr == SIGUSR1)
590 		return 0;
591 
592 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
593 
594 	/*
595 	 * Approximate RIP event size: 24 bytes.
596 	 */
597 	fprintf(stderr,
598 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
599 		(double)rec->bytes_written / 1024.0 / 1024.0,
600 		output_name,
601 		rec->bytes_written / 24);
602 
603 	return 0;
604 
605 out_delete_session:
606 	perf_session__delete(session);
607 	return err;
608 }
609 
610 #define BRANCH_OPT(n, m) \
611 	{ .name = n, .mode = (m) }
612 
613 #define BRANCH_END { .name = NULL }
614 
615 struct branch_mode {
616 	const char *name;
617 	int mode;
618 };
619 
620 static const struct branch_mode branch_modes[] = {
621 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
622 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
623 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
624 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
625 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
626 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
627 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
628 	BRANCH_END
629 };
630 
631 static int
632 parse_branch_stack(const struct option *opt, const char *str, int unset)
633 {
634 #define ONLY_PLM \
635 	(PERF_SAMPLE_BRANCH_USER	|\
636 	 PERF_SAMPLE_BRANCH_KERNEL	|\
637 	 PERF_SAMPLE_BRANCH_HV)
638 
639 	uint64_t *mode = (uint64_t *)opt->value;
640 	const struct branch_mode *br;
641 	char *s, *os = NULL, *p;
642 	int ret = -1;
643 
644 	if (unset)
645 		return 0;
646 
647 	/*
648 	 * cannot set it twice, -b + --branch-filter for instance
649 	 */
650 	if (*mode)
651 		return -1;
652 
653 	/* str may be NULL in case no arg is passed to -b */
654 	if (str) {
655 		/* because str is read-only */
656 		s = os = strdup(str);
657 		if (!s)
658 			return -1;
659 
660 		for (;;) {
661 			p = strchr(s, ',');
662 			if (p)
663 				*p = '\0';
664 
665 			for (br = branch_modes; br->name; br++) {
666 				if (!strcasecmp(s, br->name))
667 					break;
668 			}
669 			if (!br->name) {
670 				ui__warning("unknown branch filter %s,"
671 					    " check man page\n", s);
672 				goto error;
673 			}
674 
675 			*mode |= br->mode;
676 
677 			if (!p)
678 				break;
679 
680 			s = p + 1;
681 		}
682 	}
683 	ret = 0;
684 
685 	/* default to any branch */
686 	if ((*mode & ~ONLY_PLM) == 0) {
687 		*mode = PERF_SAMPLE_BRANCH_ANY;
688 	}
689 error:
690 	free(os);
691 	return ret;
692 }
693 
694 #ifdef LIBUNWIND_SUPPORT
695 static int get_stack_size(char *str, unsigned long *_size)
696 {
697 	char *endptr;
698 	unsigned long size;
699 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
700 
701 	size = strtoul(str, &endptr, 0);
702 
703 	do {
704 		if (*endptr)
705 			break;
706 
707 		size = round_up(size, sizeof(u64));
708 		if (!size || size > max_size)
709 			break;
710 
711 		*_size = size;
712 		return 0;
713 
714 	} while (0);
715 
716 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
717 	       max_size, str);
718 	return -1;
719 }
720 #endif /* LIBUNWIND_SUPPORT */
721 
722 int record_parse_callchain_opt(const struct option *opt,
723 			       const char *arg, int unset)
724 {
725 	struct perf_record_opts *opts = opt->value;
726 	char *tok, *name, *saveptr = NULL;
727 	char *buf;
728 	int ret = -1;
729 
730 	/* --no-call-graph */
731 	if (unset)
732 		return 0;
733 
734 	/* We specified default option if none is provided. */
735 	BUG_ON(!arg);
736 
737 	/* We need buffer that we know we can write to. */
738 	buf = malloc(strlen(arg) + 1);
739 	if (!buf)
740 		return -ENOMEM;
741 
742 	strcpy(buf, arg);
743 
744 	tok = strtok_r((char *)buf, ",", &saveptr);
745 	name = tok ? : (char *)buf;
746 
747 	do {
748 		/* Framepointer style */
749 		if (!strncmp(name, "fp", sizeof("fp"))) {
750 			if (!strtok_r(NULL, ",", &saveptr)) {
751 				opts->call_graph = CALLCHAIN_FP;
752 				ret = 0;
753 			} else
754 				pr_err("callchain: No more arguments "
755 				       "needed for -g fp\n");
756 			break;
757 
758 #ifdef LIBUNWIND_SUPPORT
759 		/* Dwarf style */
760 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
761 			const unsigned long default_stack_dump_size = 8192;
762 
763 			ret = 0;
764 			opts->call_graph = CALLCHAIN_DWARF;
765 			opts->stack_dump_size = default_stack_dump_size;
766 
767 			tok = strtok_r(NULL, ",", &saveptr);
768 			if (tok) {
769 				unsigned long size = 0;
770 
771 				ret = get_stack_size(tok, &size);
772 				opts->stack_dump_size = size;
773 			}
774 
775 			if (!ret)
776 				pr_debug("callchain: stack dump size %d\n",
777 					 opts->stack_dump_size);
778 #endif /* LIBUNWIND_SUPPORT */
779 		} else {
780 			pr_err("callchain: Unknown -g option "
781 			       "value: %s\n", arg);
782 			break;
783 		}
784 
785 	} while (0);
786 
787 	free(buf);
788 
789 	if (!ret)
790 		pr_debug("callchain: type %d\n", opts->call_graph);
791 
792 	return ret;
793 }
794 
795 static const char * const record_usage[] = {
796 	"perf record [<options>] [<command>]",
797 	"perf record [<options>] -- <command> [<options>]",
798 	NULL
799 };
800 
801 /*
802  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
803  * because we need to have access to it in perf_record__exit, that is called
804  * after cmd_record() exits, but since record_options need to be accessible to
805  * builtin-script, leave it here.
806  *
807  * At least we don't ouch it in all the other functions here directly.
808  *
809  * Just say no to tons of global variables, sigh.
810  */
811 static struct perf_record record = {
812 	.opts = {
813 		.mmap_pages	     = UINT_MAX,
814 		.user_freq	     = UINT_MAX,
815 		.user_interval	     = ULLONG_MAX,
816 		.freq		     = 4000,
817 		.target		     = {
818 			.uses_mmap   = true,
819 		},
820 	},
821 };
822 
823 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
824 
825 #ifdef LIBUNWIND_SUPPORT
826 const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
827 #else
828 const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
829 #endif
830 
831 /*
832  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
833  * with it and switch to use the library functions in perf_evlist that came
834  * from builtin-record.c, i.e. use perf_record_opts,
835  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
836  * using pipes, etc.
837  */
838 const struct option record_options[] = {
839 	OPT_CALLBACK('e', "event", &record.evlist, "event",
840 		     "event selector. use 'perf list' to list available events",
841 		     parse_events_option),
842 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
843 		     "event filter", parse_filter),
844 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
845 		    "record events on existing process id"),
846 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
847 		    "record events on existing thread id"),
848 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
849 		    "collect data with this RT SCHED_FIFO priority"),
850 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
851 		    "collect data without buffering"),
852 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
853 		    "collect raw sample records from all opened counters"),
854 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
855 			    "system-wide collection from all CPUs"),
856 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
857 		    "list of cpus to monitor"),
858 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
859 	OPT_STRING('o', "output", &record.output_name, "file",
860 		    "output file name"),
861 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
862 		    "child tasks do not inherit counters"),
863 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
864 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
865 		     "number of mmap data pages"),
866 	OPT_BOOLEAN(0, "group", &record.opts.group,
867 		    "put the counters into a counter group"),
868 	OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
869 			     "mode[,dump_size]", record_callchain_help,
870 			     &record_parse_callchain_opt, "fp"),
871 	OPT_INCR('v', "verbose", &verbose,
872 		    "be more verbose (show counter open errors, etc)"),
873 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
874 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
875 		    "per thread counts"),
876 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
877 		    "Sample addresses"),
878 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
879 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
880 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
881 		    "don't sample"),
882 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
883 		    "do not update the buildid cache"),
884 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
885 		    "do not collect buildids in perf.data"),
886 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
887 		     "monitor event in cgroup name only",
888 		     parse_cgroups),
889 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
890 		   "user to profile"),
891 
892 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
893 		     "branch any", "sample any taken branches",
894 		     parse_branch_stack),
895 
896 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
897 		     "branch filter mask", "branch stack filter modes",
898 		     parse_branch_stack),
899 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
900 		    "sample by weight (on special events only)"),
901 	OPT_END()
902 };
903 
904 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
905 {
906 	int err = -ENOMEM;
907 	struct perf_evsel *pos;
908 	struct perf_evlist *evsel_list;
909 	struct perf_record *rec = &record;
910 	char errbuf[BUFSIZ];
911 
912 	evsel_list = perf_evlist__new();
913 	if (evsel_list == NULL)
914 		return -ENOMEM;
915 
916 	rec->evlist = evsel_list;
917 
918 	argc = parse_options(argc, argv, record_options, record_usage,
919 			    PARSE_OPT_STOP_AT_NON_OPTION);
920 	if (!argc && perf_target__none(&rec->opts.target))
921 		usage_with_options(record_usage, record_options);
922 
923 	if (nr_cgroups && !rec->opts.target.system_wide) {
924 		ui__error("cgroup monitoring only available in"
925 			  " system-wide mode\n");
926 		usage_with_options(record_usage, record_options);
927 	}
928 
929 	symbol__init();
930 
931 	if (symbol_conf.kptr_restrict)
932 		pr_warning(
933 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
934 "check /proc/sys/kernel/kptr_restrict.\n\n"
935 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
936 "file is not found in the buildid cache or in the vmlinux path.\n\n"
937 "Samples in kernel modules won't be resolved at all.\n\n"
938 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
939 "even with a suitable vmlinux or kallsyms file.\n\n");
940 
941 	if (rec->no_buildid_cache || rec->no_buildid)
942 		disable_buildid_cache();
943 
944 	if (evsel_list->nr_entries == 0 &&
945 	    perf_evlist__add_default(evsel_list) < 0) {
946 		pr_err("Not enough memory for event selector list\n");
947 		goto out_symbol_exit;
948 	}
949 
950 	err = perf_target__validate(&rec->opts.target);
951 	if (err) {
952 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
953 		ui__warning("%s", errbuf);
954 	}
955 
956 	err = perf_target__parse_uid(&rec->opts.target);
957 	if (err) {
958 		int saved_errno = errno;
959 
960 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
961 		ui__error("%s", errbuf);
962 
963 		err = -saved_errno;
964 		goto out_symbol_exit;
965 	}
966 
967 	err = -ENOMEM;
968 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
969 		usage_with_options(record_usage, record_options);
970 
971 	list_for_each_entry(pos, &evsel_list->entries, node) {
972 		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
973 			goto out_free_fd;
974 	}
975 
976 	if (rec->opts.user_interval != ULLONG_MAX)
977 		rec->opts.default_interval = rec->opts.user_interval;
978 	if (rec->opts.user_freq != UINT_MAX)
979 		rec->opts.freq = rec->opts.user_freq;
980 
981 	/*
982 	 * User specified count overrides default frequency.
983 	 */
984 	if (rec->opts.default_interval)
985 		rec->opts.freq = 0;
986 	else if (rec->opts.freq) {
987 		rec->opts.default_interval = rec->opts.freq;
988 	} else {
989 		ui__error("frequency and count are zero, aborting\n");
990 		err = -EINVAL;
991 		goto out_free_fd;
992 	}
993 
994 	err = __cmd_record(&record, argc, argv);
995 
996 	perf_evlist__munmap(evsel_list);
997 	perf_evlist__close(evsel_list);
998 out_free_fd:
999 	perf_evlist__delete_maps(evsel_list);
1000 out_symbol_exit:
1001 	symbol__exit();
1002 	return err;
1003 }
1004