xref: /openbmc/linux/tools/perf/builtin-record.c (revision 84d517f3)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 
34 struct record {
35 	struct perf_tool	tool;
36 	struct record_opts	opts;
37 	u64			bytes_written;
38 	struct perf_data_file	file;
39 	struct perf_evlist	*evlist;
40 	struct perf_session	*session;
41 	const char		*progname;
42 	int			realtime_prio;
43 	bool			no_buildid;
44 	bool			no_buildid_cache;
45 	long			samples;
46 };
47 
48 static int record__write(struct record *rec, void *bf, size_t size)
49 {
50 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
51 		pr_err("failed to write perf data, error: %m\n");
52 		return -1;
53 	}
54 
55 	rec->bytes_written += size;
56 	return 0;
57 }
58 
59 static int process_synthesized_event(struct perf_tool *tool,
60 				     union perf_event *event,
61 				     struct perf_sample *sample __maybe_unused,
62 				     struct machine *machine __maybe_unused)
63 {
64 	struct record *rec = container_of(tool, struct record, tool);
65 	return record__write(rec, event, event->header.size);
66 }
67 
68 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
69 {
70 	unsigned int head = perf_mmap__read_head(md);
71 	unsigned int old = md->prev;
72 	unsigned char *data = md->base + page_size;
73 	unsigned long size;
74 	void *buf;
75 	int rc = 0;
76 
77 	if (old == head)
78 		return 0;
79 
80 	rec->samples++;
81 
82 	size = head - old;
83 
84 	if ((old & md->mask) + size != (head & md->mask)) {
85 		buf = &data[old & md->mask];
86 		size = md->mask + 1 - (old & md->mask);
87 		old += size;
88 
89 		if (record__write(rec, buf, size) < 0) {
90 			rc = -1;
91 			goto out;
92 		}
93 	}
94 
95 	buf = &data[old & md->mask];
96 	size = head - old;
97 	old += size;
98 
99 	if (record__write(rec, buf, size) < 0) {
100 		rc = -1;
101 		goto out;
102 	}
103 
104 	md->prev = old;
105 	perf_mmap__write_tail(md, old);
106 
107 out:
108 	return rc;
109 }
110 
111 static volatile int done = 0;
112 static volatile int signr = -1;
113 static volatile int child_finished = 0;
114 
115 static void sig_handler(int sig)
116 {
117 	if (sig == SIGCHLD)
118 		child_finished = 1;
119 	else
120 		signr = sig;
121 
122 	done = 1;
123 }
124 
125 static void record__sig_exit(void)
126 {
127 	if (signr == -1)
128 		return;
129 
130 	signal(signr, SIG_DFL);
131 	raise(signr);
132 }
133 
134 static int record__open(struct record *rec)
135 {
136 	char msg[512];
137 	struct perf_evsel *pos;
138 	struct perf_evlist *evlist = rec->evlist;
139 	struct perf_session *session = rec->session;
140 	struct record_opts *opts = &rec->opts;
141 	int rc = 0;
142 
143 	perf_evlist__config(evlist, opts);
144 
145 	evlist__for_each(evlist, pos) {
146 try_again:
147 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
148 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
149 				if (verbose)
150 					ui__warning("%s\n", msg);
151 				goto try_again;
152 			}
153 
154 			rc = -errno;
155 			perf_evsel__open_strerror(pos, &opts->target,
156 						  errno, msg, sizeof(msg));
157 			ui__error("%s\n", msg);
158 			goto out;
159 		}
160 	}
161 
162 	if (perf_evlist__apply_filters(evlist)) {
163 		error("failed to set filter with %d (%s)\n", errno,
164 			strerror(errno));
165 		rc = -1;
166 		goto out;
167 	}
168 
169 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
170 		if (errno == EPERM) {
171 			pr_err("Permission error mapping pages.\n"
172 			       "Consider increasing "
173 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
174 			       "or try again with a smaller value of -m/--mmap_pages.\n"
175 			       "(current value: %u)\n", opts->mmap_pages);
176 			rc = -errno;
177 		} else {
178 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
179 			rc = -errno;
180 		}
181 		goto out;
182 	}
183 
184 	session->evlist = evlist;
185 	perf_session__set_id_hdr_size(session);
186 out:
187 	return rc;
188 }
189 
190 static int process_buildids(struct record *rec)
191 {
192 	struct perf_data_file *file  = &rec->file;
193 	struct perf_session *session = rec->session;
194 	u64 start = session->header.data_offset;
195 
196 	u64 size = lseek(file->fd, 0, SEEK_CUR);
197 	if (size == 0)
198 		return 0;
199 
200 	return __perf_session__process_events(session, start,
201 					      size - start,
202 					      size, &build_id__mark_dso_hit_ops);
203 }
204 
205 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
206 {
207 	int err;
208 	struct perf_tool *tool = data;
209 	/*
210 	 *As for guest kernel when processing subcommand record&report,
211 	 *we arrange module mmap prior to guest kernel mmap and trigger
212 	 *a preload dso because default guest module symbols are loaded
213 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
214 	 *method is used to avoid symbol missing when the first addr is
215 	 *in module instead of in guest kernel.
216 	 */
217 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
218 					     machine);
219 	if (err < 0)
220 		pr_err("Couldn't record guest kernel [%d]'s reference"
221 		       " relocation symbol.\n", machine->pid);
222 
223 	/*
224 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
225 	 * have no _text sometimes.
226 	 */
227 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
228 						 machine);
229 	if (err < 0)
230 		pr_err("Couldn't record guest kernel [%d]'s reference"
231 		       " relocation symbol.\n", machine->pid);
232 }
233 
234 static struct perf_event_header finished_round_event = {
235 	.size = sizeof(struct perf_event_header),
236 	.type = PERF_RECORD_FINISHED_ROUND,
237 };
238 
239 static int record__mmap_read_all(struct record *rec)
240 {
241 	int i;
242 	int rc = 0;
243 
244 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
245 		if (rec->evlist->mmap[i].base) {
246 			if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
247 				rc = -1;
248 				goto out;
249 			}
250 		}
251 	}
252 
253 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
254 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
255 
256 out:
257 	return rc;
258 }
259 
260 static void record__init_features(struct record *rec)
261 {
262 	struct perf_session *session = rec->session;
263 	int feat;
264 
265 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
266 		perf_header__set_feat(&session->header, feat);
267 
268 	if (rec->no_buildid)
269 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
270 
271 	if (!have_tracepoints(&rec->evlist->entries))
272 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
273 
274 	if (!rec->opts.branch_stack)
275 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
276 }
277 
278 static volatile int workload_exec_errno;
279 
280 /*
281  * perf_evlist__prepare_workload will send a SIGUSR1
282  * if the fork fails, since we asked by setting its
283  * want_signal to true.
284  */
285 static void workload_exec_failed_signal(int signo __maybe_unused,
286 					siginfo_t *info,
287 					void *ucontext __maybe_unused)
288 {
289 	workload_exec_errno = info->si_value.sival_int;
290 	done = 1;
291 	child_finished = 1;
292 }
293 
294 static int __cmd_record(struct record *rec, int argc, const char **argv)
295 {
296 	int err;
297 	int status = 0;
298 	unsigned long waking = 0;
299 	const bool forks = argc > 0;
300 	struct machine *machine;
301 	struct perf_tool *tool = &rec->tool;
302 	struct record_opts *opts = &rec->opts;
303 	struct perf_data_file *file = &rec->file;
304 	struct perf_session *session;
305 	bool disabled = false;
306 
307 	rec->progname = argv[0];
308 
309 	atexit(record__sig_exit);
310 	signal(SIGCHLD, sig_handler);
311 	signal(SIGINT, sig_handler);
312 	signal(SIGTERM, sig_handler);
313 
314 	session = perf_session__new(file, false, NULL);
315 	if (session == NULL) {
316 		pr_err("Perf session creation failed.\n");
317 		return -1;
318 	}
319 
320 	rec->session = session;
321 
322 	record__init_features(rec);
323 
324 	if (forks) {
325 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
326 						    argv, file->is_pipe,
327 						    workload_exec_failed_signal);
328 		if (err < 0) {
329 			pr_err("Couldn't run the workload!\n");
330 			status = err;
331 			goto out_delete_session;
332 		}
333 	}
334 
335 	if (record__open(rec) != 0) {
336 		err = -1;
337 		goto out_child;
338 	}
339 
340 	if (!rec->evlist->nr_groups)
341 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
342 
343 	if (file->is_pipe) {
344 		err = perf_header__write_pipe(file->fd);
345 		if (err < 0)
346 			goto out_child;
347 	} else {
348 		err = perf_session__write_header(session, rec->evlist,
349 						 file->fd, false);
350 		if (err < 0)
351 			goto out_child;
352 	}
353 
354 	if (!rec->no_buildid
355 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
356 		pr_err("Couldn't generate buildids. "
357 		       "Use --no-buildid to profile anyway.\n");
358 		err = -1;
359 		goto out_child;
360 	}
361 
362 	machine = &session->machines.host;
363 
364 	if (file->is_pipe) {
365 		err = perf_event__synthesize_attrs(tool, session,
366 						   process_synthesized_event);
367 		if (err < 0) {
368 			pr_err("Couldn't synthesize attrs.\n");
369 			goto out_child;
370 		}
371 
372 		if (have_tracepoints(&rec->evlist->entries)) {
373 			/*
374 			 * FIXME err <= 0 here actually means that
375 			 * there were no tracepoints so its not really
376 			 * an error, just that we don't need to
377 			 * synthesize anything.  We really have to
378 			 * return this more properly and also
379 			 * propagate errors that now are calling die()
380 			 */
381 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
382 								  process_synthesized_event);
383 			if (err <= 0) {
384 				pr_err("Couldn't record tracing data.\n");
385 				goto out_child;
386 			}
387 			rec->bytes_written += err;
388 		}
389 	}
390 
391 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
392 						 machine);
393 	if (err < 0)
394 		pr_err("Couldn't record kernel reference relocation symbol\n"
395 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
396 		       "Check /proc/kallsyms permission or run as root.\n");
397 
398 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
399 					     machine);
400 	if (err < 0)
401 		pr_err("Couldn't record kernel module information.\n"
402 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
403 		       "Check /proc/modules permission or run as root.\n");
404 
405 	if (perf_guest) {
406 		machines__process_guests(&session->machines,
407 					 perf_event__synthesize_guest_os, tool);
408 	}
409 
410 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
411 					    process_synthesized_event, opts->sample_address);
412 	if (err != 0)
413 		goto out_child;
414 
415 	if (rec->realtime_prio) {
416 		struct sched_param param;
417 
418 		param.sched_priority = rec->realtime_prio;
419 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
420 			pr_err("Could not set realtime priority.\n");
421 			err = -1;
422 			goto out_child;
423 		}
424 	}
425 
426 	/*
427 	 * When perf is starting the traced process, all the events
428 	 * (apart from group members) have enable_on_exec=1 set,
429 	 * so don't spoil it by prematurely enabling them.
430 	 */
431 	if (!target__none(&opts->target) && !opts->initial_delay)
432 		perf_evlist__enable(rec->evlist);
433 
434 	/*
435 	 * Let the child rip
436 	 */
437 	if (forks)
438 		perf_evlist__start_workload(rec->evlist);
439 
440 	if (opts->initial_delay) {
441 		usleep(opts->initial_delay * 1000);
442 		perf_evlist__enable(rec->evlist);
443 	}
444 
445 	for (;;) {
446 		int hits = rec->samples;
447 
448 		if (record__mmap_read_all(rec) < 0) {
449 			err = -1;
450 			goto out_child;
451 		}
452 
453 		if (hits == rec->samples) {
454 			if (done)
455 				break;
456 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
457 			if (err < 0 && errno == EINTR)
458 				err = 0;
459 			waking++;
460 		}
461 
462 		/*
463 		 * When perf is starting the traced process, at the end events
464 		 * die with the process and we wait for that. Thus no need to
465 		 * disable events in this case.
466 		 */
467 		if (done && !disabled && !target__none(&opts->target)) {
468 			perf_evlist__disable(rec->evlist);
469 			disabled = true;
470 		}
471 	}
472 
473 	if (forks && workload_exec_errno) {
474 		char msg[512];
475 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
476 		pr_err("Workload failed: %s\n", emsg);
477 		err = -1;
478 		goto out_child;
479 	}
480 
481 	if (!quiet) {
482 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
483 
484 		/*
485 		 * Approximate RIP event size: 24 bytes.
486 		 */
487 		fprintf(stderr,
488 			"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
489 			(double)rec->bytes_written / 1024.0 / 1024.0,
490 			file->path,
491 			rec->bytes_written / 24);
492 	}
493 
494 out_child:
495 	if (forks) {
496 		int exit_status;
497 
498 		if (!child_finished)
499 			kill(rec->evlist->workload.pid, SIGTERM);
500 
501 		wait(&exit_status);
502 
503 		if (err < 0)
504 			status = err;
505 		else if (WIFEXITED(exit_status))
506 			status = WEXITSTATUS(exit_status);
507 		else if (WIFSIGNALED(exit_status))
508 			signr = WTERMSIG(exit_status);
509 	} else
510 		status = err;
511 
512 	if (!err && !file->is_pipe) {
513 		rec->session->header.data_size += rec->bytes_written;
514 
515 		if (!rec->no_buildid)
516 			process_buildids(rec);
517 		perf_session__write_header(rec->session, rec->evlist,
518 					   file->fd, true);
519 	}
520 
521 out_delete_session:
522 	perf_session__delete(session);
523 	return status;
524 }
525 
526 #define BRANCH_OPT(n, m) \
527 	{ .name = n, .mode = (m) }
528 
529 #define BRANCH_END { .name = NULL }
530 
531 struct branch_mode {
532 	const char *name;
533 	int mode;
534 };
535 
536 static const struct branch_mode branch_modes[] = {
537 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
538 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
539 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
540 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
541 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
542 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
543 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
544 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
545 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
546 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
547 	BRANCH_END
548 };
549 
550 static int
551 parse_branch_stack(const struct option *opt, const char *str, int unset)
552 {
553 #define ONLY_PLM \
554 	(PERF_SAMPLE_BRANCH_USER	|\
555 	 PERF_SAMPLE_BRANCH_KERNEL	|\
556 	 PERF_SAMPLE_BRANCH_HV)
557 
558 	uint64_t *mode = (uint64_t *)opt->value;
559 	const struct branch_mode *br;
560 	char *s, *os = NULL, *p;
561 	int ret = -1;
562 
563 	if (unset)
564 		return 0;
565 
566 	/*
567 	 * cannot set it twice, -b + --branch-filter for instance
568 	 */
569 	if (*mode)
570 		return -1;
571 
572 	/* str may be NULL in case no arg is passed to -b */
573 	if (str) {
574 		/* because str is read-only */
575 		s = os = strdup(str);
576 		if (!s)
577 			return -1;
578 
579 		for (;;) {
580 			p = strchr(s, ',');
581 			if (p)
582 				*p = '\0';
583 
584 			for (br = branch_modes; br->name; br++) {
585 				if (!strcasecmp(s, br->name))
586 					break;
587 			}
588 			if (!br->name) {
589 				ui__warning("unknown branch filter %s,"
590 					    " check man page\n", s);
591 				goto error;
592 			}
593 
594 			*mode |= br->mode;
595 
596 			if (!p)
597 				break;
598 
599 			s = p + 1;
600 		}
601 	}
602 	ret = 0;
603 
604 	/* default to any branch */
605 	if ((*mode & ~ONLY_PLM) == 0) {
606 		*mode = PERF_SAMPLE_BRANCH_ANY;
607 	}
608 error:
609 	free(os);
610 	return ret;
611 }
612 
613 #ifdef HAVE_DWARF_UNWIND_SUPPORT
614 static int get_stack_size(char *str, unsigned long *_size)
615 {
616 	char *endptr;
617 	unsigned long size;
618 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
619 
620 	size = strtoul(str, &endptr, 0);
621 
622 	do {
623 		if (*endptr)
624 			break;
625 
626 		size = round_up(size, sizeof(u64));
627 		if (!size || size > max_size)
628 			break;
629 
630 		*_size = size;
631 		return 0;
632 
633 	} while (0);
634 
635 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
636 	       max_size, str);
637 	return -1;
638 }
639 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
640 
641 int record_parse_callchain(const char *arg, struct record_opts *opts)
642 {
643 	char *tok, *name, *saveptr = NULL;
644 	char *buf;
645 	int ret = -1;
646 
647 	/* We need buffer that we know we can write to. */
648 	buf = malloc(strlen(arg) + 1);
649 	if (!buf)
650 		return -ENOMEM;
651 
652 	strcpy(buf, arg);
653 
654 	tok = strtok_r((char *)buf, ",", &saveptr);
655 	name = tok ? : (char *)buf;
656 
657 	do {
658 		/* Framepointer style */
659 		if (!strncmp(name, "fp", sizeof("fp"))) {
660 			if (!strtok_r(NULL, ",", &saveptr)) {
661 				opts->call_graph = CALLCHAIN_FP;
662 				ret = 0;
663 			} else
664 				pr_err("callchain: No more arguments "
665 				       "needed for -g fp\n");
666 			break;
667 
668 #ifdef HAVE_DWARF_UNWIND_SUPPORT
669 		/* Dwarf style */
670 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
671 			const unsigned long default_stack_dump_size = 8192;
672 
673 			ret = 0;
674 			opts->call_graph = CALLCHAIN_DWARF;
675 			opts->stack_dump_size = default_stack_dump_size;
676 
677 			tok = strtok_r(NULL, ",", &saveptr);
678 			if (tok) {
679 				unsigned long size = 0;
680 
681 				ret = get_stack_size(tok, &size);
682 				opts->stack_dump_size = size;
683 			}
684 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
685 		} else {
686 			pr_err("callchain: Unknown --call-graph option "
687 			       "value: %s\n", arg);
688 			break;
689 		}
690 
691 	} while (0);
692 
693 	free(buf);
694 	return ret;
695 }
696 
697 static void callchain_debug(struct record_opts *opts)
698 {
699 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
700 
701 	pr_debug("callchain: type %s\n", str[opts->call_graph]);
702 
703 	if (opts->call_graph == CALLCHAIN_DWARF)
704 		pr_debug("callchain: stack dump size %d\n",
705 			 opts->stack_dump_size);
706 }
707 
708 int record_parse_callchain_opt(const struct option *opt,
709 			       const char *arg,
710 			       int unset)
711 {
712 	struct record_opts *opts = opt->value;
713 	int ret;
714 
715 	opts->call_graph_enabled = !unset;
716 
717 	/* --no-call-graph */
718 	if (unset) {
719 		opts->call_graph = CALLCHAIN_NONE;
720 		pr_debug("callchain: disabled\n");
721 		return 0;
722 	}
723 
724 	ret = record_parse_callchain(arg, opts);
725 	if (!ret)
726 		callchain_debug(opts);
727 
728 	return ret;
729 }
730 
731 int record_callchain_opt(const struct option *opt,
732 			 const char *arg __maybe_unused,
733 			 int unset __maybe_unused)
734 {
735 	struct record_opts *opts = opt->value;
736 
737 	opts->call_graph_enabled = !unset;
738 
739 	if (opts->call_graph == CALLCHAIN_NONE)
740 		opts->call_graph = CALLCHAIN_FP;
741 
742 	callchain_debug(opts);
743 	return 0;
744 }
745 
746 static int perf_record_config(const char *var, const char *value, void *cb)
747 {
748 	struct record *rec = cb;
749 
750 	if (!strcmp(var, "record.call-graph"))
751 		return record_parse_callchain(value, &rec->opts);
752 
753 	return perf_default_config(var, value, cb);
754 }
755 
756 static const char * const record_usage[] = {
757 	"perf record [<options>] [<command>]",
758 	"perf record [<options>] -- <command> [<options>]",
759 	NULL
760 };
761 
762 /*
763  * XXX Ideally would be local to cmd_record() and passed to a record__new
764  * because we need to have access to it in record__exit, that is called
765  * after cmd_record() exits, but since record_options need to be accessible to
766  * builtin-script, leave it here.
767  *
768  * At least we don't ouch it in all the other functions here directly.
769  *
770  * Just say no to tons of global variables, sigh.
771  */
772 static struct record record = {
773 	.opts = {
774 		.mmap_pages	     = UINT_MAX,
775 		.user_freq	     = UINT_MAX,
776 		.user_interval	     = ULLONG_MAX,
777 		.freq		     = 4000,
778 		.target		     = {
779 			.uses_mmap   = true,
780 			.default_per_cpu = true,
781 		},
782 	},
783 };
784 
785 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
786 
787 #ifdef HAVE_DWARF_UNWIND_SUPPORT
788 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
789 #else
790 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
791 #endif
792 
793 /*
794  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
795  * with it and switch to use the library functions in perf_evlist that came
796  * from builtin-record.c, i.e. use record_opts,
797  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
798  * using pipes, etc.
799  */
800 const struct option record_options[] = {
801 	OPT_CALLBACK('e', "event", &record.evlist, "event",
802 		     "event selector. use 'perf list' to list available events",
803 		     parse_events_option),
804 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
805 		     "event filter", parse_filter),
806 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
807 		    "record events on existing process id"),
808 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
809 		    "record events on existing thread id"),
810 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
811 		    "collect data with this RT SCHED_FIFO priority"),
812 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
813 		    "collect data without buffering"),
814 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
815 		    "collect raw sample records from all opened counters"),
816 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
817 			    "system-wide collection from all CPUs"),
818 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
819 		    "list of cpus to monitor"),
820 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
821 	OPT_STRING('o', "output", &record.file.path, "file",
822 		    "output file name"),
823 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
824 			&record.opts.no_inherit_set,
825 			"child tasks do not inherit counters"),
826 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
827 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
828 		     "number of mmap data pages",
829 		     perf_evlist__parse_mmap_pages),
830 	OPT_BOOLEAN(0, "group", &record.opts.group,
831 		    "put the counters into a counter group"),
832 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
833 			   NULL, "enables call-graph recording" ,
834 			   &record_callchain_opt),
835 	OPT_CALLBACK(0, "call-graph", &record.opts,
836 		     "mode[,dump_size]", record_callchain_help,
837 		     &record_parse_callchain_opt),
838 	OPT_INCR('v', "verbose", &verbose,
839 		    "be more verbose (show counter open errors, etc)"),
840 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
841 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
842 		    "per thread counts"),
843 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
844 		    "Sample addresses"),
845 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
846 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
847 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
848 		    "don't sample"),
849 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
850 		    "do not update the buildid cache"),
851 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
852 		    "do not collect buildids in perf.data"),
853 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
854 		     "monitor event in cgroup name only",
855 		     parse_cgroups),
856 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
857 		  "ms to wait before starting measurement after program start"),
858 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
859 		   "user to profile"),
860 
861 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
862 		     "branch any", "sample any taken branches",
863 		     parse_branch_stack),
864 
865 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
866 		     "branch filter mask", "branch stack filter modes",
867 		     parse_branch_stack),
868 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
869 		    "sample by weight (on special events only)"),
870 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
871 		    "sample transaction flags (special events only)"),
872 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
873 		    "use per-thread mmaps"),
874 	OPT_END()
875 };
876 
877 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
878 {
879 	int err = -ENOMEM;
880 	struct record *rec = &record;
881 	char errbuf[BUFSIZ];
882 
883 	rec->evlist = perf_evlist__new();
884 	if (rec->evlist == NULL)
885 		return -ENOMEM;
886 
887 	perf_config(perf_record_config, rec);
888 
889 	argc = parse_options(argc, argv, record_options, record_usage,
890 			    PARSE_OPT_STOP_AT_NON_OPTION);
891 	if (!argc && target__none(&rec->opts.target))
892 		usage_with_options(record_usage, record_options);
893 
894 	if (nr_cgroups && !rec->opts.target.system_wide) {
895 		ui__error("cgroup monitoring only available in"
896 			  " system-wide mode\n");
897 		usage_with_options(record_usage, record_options);
898 	}
899 
900 	symbol__init();
901 
902 	if (symbol_conf.kptr_restrict)
903 		pr_warning(
904 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
905 "check /proc/sys/kernel/kptr_restrict.\n\n"
906 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
907 "file is not found in the buildid cache or in the vmlinux path.\n\n"
908 "Samples in kernel modules won't be resolved at all.\n\n"
909 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
910 "even with a suitable vmlinux or kallsyms file.\n\n");
911 
912 	if (rec->no_buildid_cache || rec->no_buildid)
913 		disable_buildid_cache();
914 
915 	if (rec->evlist->nr_entries == 0 &&
916 	    perf_evlist__add_default(rec->evlist) < 0) {
917 		pr_err("Not enough memory for event selector list\n");
918 		goto out_symbol_exit;
919 	}
920 
921 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
922 		rec->opts.no_inherit = true;
923 
924 	err = target__validate(&rec->opts.target);
925 	if (err) {
926 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
927 		ui__warning("%s", errbuf);
928 	}
929 
930 	err = target__parse_uid(&rec->opts.target);
931 	if (err) {
932 		int saved_errno = errno;
933 
934 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
935 		ui__error("%s", errbuf);
936 
937 		err = -saved_errno;
938 		goto out_symbol_exit;
939 	}
940 
941 	err = -ENOMEM;
942 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
943 		usage_with_options(record_usage, record_options);
944 
945 	if (record_opts__config(&rec->opts)) {
946 		err = -EINVAL;
947 		goto out_symbol_exit;
948 	}
949 
950 	err = __cmd_record(&record, argc, argv);
951 out_symbol_exit:
952 	perf_evlist__delete(rec->evlist);
953 	symbol__exit();
954 	return err;
955 }
956