xref: /openbmc/linux/tools/perf/builtin-record.c (revision 275876e2)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 
34 struct record {
35 	struct perf_tool	tool;
36 	struct record_opts	opts;
37 	u64			bytes_written;
38 	struct perf_data_file	file;
39 	struct perf_evlist	*evlist;
40 	struct perf_session	*session;
41 	const char		*progname;
42 	int			realtime_prio;
43 	bool			no_buildid;
44 	bool			no_buildid_cache;
45 	long			samples;
46 };
47 
48 static int record__write(struct record *rec, void *bf, size_t size)
49 {
50 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
51 		pr_err("failed to write perf data, error: %m\n");
52 		return -1;
53 	}
54 
55 	rec->bytes_written += size;
56 	return 0;
57 }
58 
59 static int process_synthesized_event(struct perf_tool *tool,
60 				     union perf_event *event,
61 				     struct perf_sample *sample __maybe_unused,
62 				     struct machine *machine __maybe_unused)
63 {
64 	struct record *rec = container_of(tool, struct record, tool);
65 	return record__write(rec, event, event->header.size);
66 }
67 
68 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
69 {
70 	unsigned int head = perf_mmap__read_head(md);
71 	unsigned int old = md->prev;
72 	unsigned char *data = md->base + page_size;
73 	unsigned long size;
74 	void *buf;
75 	int rc = 0;
76 
77 	if (old == head)
78 		return 0;
79 
80 	rec->samples++;
81 
82 	size = head - old;
83 
84 	if ((old & md->mask) + size != (head & md->mask)) {
85 		buf = &data[old & md->mask];
86 		size = md->mask + 1 - (old & md->mask);
87 		old += size;
88 
89 		if (record__write(rec, buf, size) < 0) {
90 			rc = -1;
91 			goto out;
92 		}
93 	}
94 
95 	buf = &data[old & md->mask];
96 	size = head - old;
97 	old += size;
98 
99 	if (record__write(rec, buf, size) < 0) {
100 		rc = -1;
101 		goto out;
102 	}
103 
104 	md->prev = old;
105 	perf_mmap__write_tail(md, old);
106 
107 out:
108 	return rc;
109 }
110 
111 static volatile int done = 0;
112 static volatile int signr = -1;
113 static volatile int child_finished = 0;
114 
115 static void sig_handler(int sig)
116 {
117 	if (sig == SIGCHLD)
118 		child_finished = 1;
119 	else
120 		signr = sig;
121 
122 	done = 1;
123 }
124 
125 static void record__sig_exit(void)
126 {
127 	if (signr == -1)
128 		return;
129 
130 	signal(signr, SIG_DFL);
131 	raise(signr);
132 }
133 
134 static int record__open(struct record *rec)
135 {
136 	char msg[512];
137 	struct perf_evsel *pos;
138 	struct perf_evlist *evlist = rec->evlist;
139 	struct perf_session *session = rec->session;
140 	struct record_opts *opts = &rec->opts;
141 	int rc = 0;
142 
143 	perf_evlist__config(evlist, opts);
144 
145 	evlist__for_each(evlist, pos) {
146 try_again:
147 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
148 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
149 				if (verbose)
150 					ui__warning("%s\n", msg);
151 				goto try_again;
152 			}
153 
154 			rc = -errno;
155 			perf_evsel__open_strerror(pos, &opts->target,
156 						  errno, msg, sizeof(msg));
157 			ui__error("%s\n", msg);
158 			goto out;
159 		}
160 	}
161 
162 	if (perf_evlist__apply_filters(evlist)) {
163 		error("failed to set filter with %d (%s)\n", errno,
164 			strerror(errno));
165 		rc = -1;
166 		goto out;
167 	}
168 
169 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
170 		if (errno == EPERM) {
171 			pr_err("Permission error mapping pages.\n"
172 			       "Consider increasing "
173 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
174 			       "or try again with a smaller value of -m/--mmap_pages.\n"
175 			       "(current value: %u)\n", opts->mmap_pages);
176 			rc = -errno;
177 		} else {
178 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
179 			rc = -errno;
180 		}
181 		goto out;
182 	}
183 
184 	session->evlist = evlist;
185 	perf_session__set_id_hdr_size(session);
186 out:
187 	return rc;
188 }
189 
190 static int process_buildids(struct record *rec)
191 {
192 	struct perf_data_file *file  = &rec->file;
193 	struct perf_session *session = rec->session;
194 	u64 start = session->header.data_offset;
195 
196 	u64 size = lseek(file->fd, 0, SEEK_CUR);
197 	if (size == 0)
198 		return 0;
199 
200 	return __perf_session__process_events(session, start,
201 					      size - start,
202 					      size, &build_id__mark_dso_hit_ops);
203 }
204 
205 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
206 {
207 	int err;
208 	struct perf_tool *tool = data;
209 	/*
210 	 *As for guest kernel when processing subcommand record&report,
211 	 *we arrange module mmap prior to guest kernel mmap and trigger
212 	 *a preload dso because default guest module symbols are loaded
213 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
214 	 *method is used to avoid symbol missing when the first addr is
215 	 *in module instead of in guest kernel.
216 	 */
217 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
218 					     machine);
219 	if (err < 0)
220 		pr_err("Couldn't record guest kernel [%d]'s reference"
221 		       " relocation symbol.\n", machine->pid);
222 
223 	/*
224 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
225 	 * have no _text sometimes.
226 	 */
227 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
228 						 machine);
229 	if (err < 0)
230 		pr_err("Couldn't record guest kernel [%d]'s reference"
231 		       " relocation symbol.\n", machine->pid);
232 }
233 
234 static struct perf_event_header finished_round_event = {
235 	.size = sizeof(struct perf_event_header),
236 	.type = PERF_RECORD_FINISHED_ROUND,
237 };
238 
239 static int record__mmap_read_all(struct record *rec)
240 {
241 	u64 bytes_written = rec->bytes_written;
242 	int i;
243 	int rc = 0;
244 
245 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
246 		if (rec->evlist->mmap[i].base) {
247 			if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
248 				rc = -1;
249 				goto out;
250 			}
251 		}
252 	}
253 
254 	/*
255 	 * Mark the round finished in case we wrote
256 	 * at least one event.
257 	 */
258 	if (bytes_written != rec->bytes_written)
259 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
260 
261 out:
262 	return rc;
263 }
264 
265 static void record__init_features(struct record *rec)
266 {
267 	struct perf_session *session = rec->session;
268 	int feat;
269 
270 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
271 		perf_header__set_feat(&session->header, feat);
272 
273 	if (rec->no_buildid)
274 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
275 
276 	if (!have_tracepoints(&rec->evlist->entries))
277 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
278 
279 	if (!rec->opts.branch_stack)
280 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
281 }
282 
283 static volatile int workload_exec_errno;
284 
285 /*
286  * perf_evlist__prepare_workload will send a SIGUSR1
287  * if the fork fails, since we asked by setting its
288  * want_signal to true.
289  */
290 static void workload_exec_failed_signal(int signo __maybe_unused,
291 					siginfo_t *info,
292 					void *ucontext __maybe_unused)
293 {
294 	workload_exec_errno = info->si_value.sival_int;
295 	done = 1;
296 	child_finished = 1;
297 }
298 
299 static int __cmd_record(struct record *rec, int argc, const char **argv)
300 {
301 	int err;
302 	int status = 0;
303 	unsigned long waking = 0;
304 	const bool forks = argc > 0;
305 	struct machine *machine;
306 	struct perf_tool *tool = &rec->tool;
307 	struct record_opts *opts = &rec->opts;
308 	struct perf_data_file *file = &rec->file;
309 	struct perf_session *session;
310 	bool disabled = false;
311 
312 	rec->progname = argv[0];
313 
314 	atexit(record__sig_exit);
315 	signal(SIGCHLD, sig_handler);
316 	signal(SIGINT, sig_handler);
317 	signal(SIGTERM, sig_handler);
318 
319 	session = perf_session__new(file, false, NULL);
320 	if (session == NULL) {
321 		pr_err("Perf session creation failed.\n");
322 		return -1;
323 	}
324 
325 	rec->session = session;
326 
327 	record__init_features(rec);
328 
329 	if (forks) {
330 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
331 						    argv, file->is_pipe,
332 						    workload_exec_failed_signal);
333 		if (err < 0) {
334 			pr_err("Couldn't run the workload!\n");
335 			status = err;
336 			goto out_delete_session;
337 		}
338 	}
339 
340 	if (record__open(rec) != 0) {
341 		err = -1;
342 		goto out_child;
343 	}
344 
345 	if (!rec->evlist->nr_groups)
346 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
347 
348 	if (file->is_pipe) {
349 		err = perf_header__write_pipe(file->fd);
350 		if (err < 0)
351 			goto out_child;
352 	} else {
353 		err = perf_session__write_header(session, rec->evlist,
354 						 file->fd, false);
355 		if (err < 0)
356 			goto out_child;
357 	}
358 
359 	if (!rec->no_buildid
360 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
361 		pr_err("Couldn't generate buildids. "
362 		       "Use --no-buildid to profile anyway.\n");
363 		err = -1;
364 		goto out_child;
365 	}
366 
367 	machine = &session->machines.host;
368 
369 	if (file->is_pipe) {
370 		err = perf_event__synthesize_attrs(tool, session,
371 						   process_synthesized_event);
372 		if (err < 0) {
373 			pr_err("Couldn't synthesize attrs.\n");
374 			goto out_child;
375 		}
376 
377 		if (have_tracepoints(&rec->evlist->entries)) {
378 			/*
379 			 * FIXME err <= 0 here actually means that
380 			 * there were no tracepoints so its not really
381 			 * an error, just that we don't need to
382 			 * synthesize anything.  We really have to
383 			 * return this more properly and also
384 			 * propagate errors that now are calling die()
385 			 */
386 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
387 								  process_synthesized_event);
388 			if (err <= 0) {
389 				pr_err("Couldn't record tracing data.\n");
390 				goto out_child;
391 			}
392 			rec->bytes_written += err;
393 		}
394 	}
395 
396 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
397 						 machine);
398 	if (err < 0)
399 		pr_err("Couldn't record kernel reference relocation symbol\n"
400 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
401 		       "Check /proc/kallsyms permission or run as root.\n");
402 
403 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
404 					     machine);
405 	if (err < 0)
406 		pr_err("Couldn't record kernel module information.\n"
407 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
408 		       "Check /proc/modules permission or run as root.\n");
409 
410 	if (perf_guest) {
411 		machines__process_guests(&session->machines,
412 					 perf_event__synthesize_guest_os, tool);
413 	}
414 
415 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
416 					    process_synthesized_event, opts->sample_address);
417 	if (err != 0)
418 		goto out_child;
419 
420 	if (rec->realtime_prio) {
421 		struct sched_param param;
422 
423 		param.sched_priority = rec->realtime_prio;
424 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
425 			pr_err("Could not set realtime priority.\n");
426 			err = -1;
427 			goto out_child;
428 		}
429 	}
430 
431 	/*
432 	 * When perf is starting the traced process, all the events
433 	 * (apart from group members) have enable_on_exec=1 set,
434 	 * so don't spoil it by prematurely enabling them.
435 	 */
436 	if (!target__none(&opts->target) && !opts->initial_delay)
437 		perf_evlist__enable(rec->evlist);
438 
439 	/*
440 	 * Let the child rip
441 	 */
442 	if (forks)
443 		perf_evlist__start_workload(rec->evlist);
444 
445 	if (opts->initial_delay) {
446 		usleep(opts->initial_delay * 1000);
447 		perf_evlist__enable(rec->evlist);
448 	}
449 
450 	for (;;) {
451 		int hits = rec->samples;
452 
453 		if (record__mmap_read_all(rec) < 0) {
454 			err = -1;
455 			goto out_child;
456 		}
457 
458 		if (hits == rec->samples) {
459 			if (done)
460 				break;
461 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
462 			/*
463 			 * Propagate error, only if there's any. Ignore positive
464 			 * number of returned events and interrupt error.
465 			 */
466 			if (err > 0 || (err < 0 && errno == EINTR))
467 				err = 0;
468 			waking++;
469 		}
470 
471 		/*
472 		 * When perf is starting the traced process, at the end events
473 		 * die with the process and we wait for that. Thus no need to
474 		 * disable events in this case.
475 		 */
476 		if (done && !disabled && !target__none(&opts->target)) {
477 			perf_evlist__disable(rec->evlist);
478 			disabled = true;
479 		}
480 	}
481 
482 	if (forks && workload_exec_errno) {
483 		char msg[512];
484 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
485 		pr_err("Workload failed: %s\n", emsg);
486 		err = -1;
487 		goto out_child;
488 	}
489 
490 	if (!quiet) {
491 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
492 
493 		/*
494 		 * Approximate RIP event size: 24 bytes.
495 		 */
496 		fprintf(stderr,
497 			"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
498 			(double)rec->bytes_written / 1024.0 / 1024.0,
499 			file->path,
500 			rec->bytes_written / 24);
501 	}
502 
503 out_child:
504 	if (forks) {
505 		int exit_status;
506 
507 		if (!child_finished)
508 			kill(rec->evlist->workload.pid, SIGTERM);
509 
510 		wait(&exit_status);
511 
512 		if (err < 0)
513 			status = err;
514 		else if (WIFEXITED(exit_status))
515 			status = WEXITSTATUS(exit_status);
516 		else if (WIFSIGNALED(exit_status))
517 			signr = WTERMSIG(exit_status);
518 	} else
519 		status = err;
520 
521 	if (!err && !file->is_pipe) {
522 		rec->session->header.data_size += rec->bytes_written;
523 
524 		if (!rec->no_buildid)
525 			process_buildids(rec);
526 		perf_session__write_header(rec->session, rec->evlist,
527 					   file->fd, true);
528 	}
529 
530 out_delete_session:
531 	perf_session__delete(session);
532 	return status;
533 }
534 
535 #define BRANCH_OPT(n, m) \
536 	{ .name = n, .mode = (m) }
537 
538 #define BRANCH_END { .name = NULL }
539 
540 struct branch_mode {
541 	const char *name;
542 	int mode;
543 };
544 
545 static const struct branch_mode branch_modes[] = {
546 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
547 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
548 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
549 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
550 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
551 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
552 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
553 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
554 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
555 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
556 	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
557 	BRANCH_END
558 };
559 
560 static int
561 parse_branch_stack(const struct option *opt, const char *str, int unset)
562 {
563 #define ONLY_PLM \
564 	(PERF_SAMPLE_BRANCH_USER	|\
565 	 PERF_SAMPLE_BRANCH_KERNEL	|\
566 	 PERF_SAMPLE_BRANCH_HV)
567 
568 	uint64_t *mode = (uint64_t *)opt->value;
569 	const struct branch_mode *br;
570 	char *s, *os = NULL, *p;
571 	int ret = -1;
572 
573 	if (unset)
574 		return 0;
575 
576 	/*
577 	 * cannot set it twice, -b + --branch-filter for instance
578 	 */
579 	if (*mode)
580 		return -1;
581 
582 	/* str may be NULL in case no arg is passed to -b */
583 	if (str) {
584 		/* because str is read-only */
585 		s = os = strdup(str);
586 		if (!s)
587 			return -1;
588 
589 		for (;;) {
590 			p = strchr(s, ',');
591 			if (p)
592 				*p = '\0';
593 
594 			for (br = branch_modes; br->name; br++) {
595 				if (!strcasecmp(s, br->name))
596 					break;
597 			}
598 			if (!br->name) {
599 				ui__warning("unknown branch filter %s,"
600 					    " check man page\n", s);
601 				goto error;
602 			}
603 
604 			*mode |= br->mode;
605 
606 			if (!p)
607 				break;
608 
609 			s = p + 1;
610 		}
611 	}
612 	ret = 0;
613 
614 	/* default to any branch */
615 	if ((*mode & ~ONLY_PLM) == 0) {
616 		*mode = PERF_SAMPLE_BRANCH_ANY;
617 	}
618 error:
619 	free(os);
620 	return ret;
621 }
622 
623 #ifdef HAVE_DWARF_UNWIND_SUPPORT
624 static int get_stack_size(char *str, unsigned long *_size)
625 {
626 	char *endptr;
627 	unsigned long size;
628 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
629 
630 	size = strtoul(str, &endptr, 0);
631 
632 	do {
633 		if (*endptr)
634 			break;
635 
636 		size = round_up(size, sizeof(u64));
637 		if (!size || size > max_size)
638 			break;
639 
640 		*_size = size;
641 		return 0;
642 
643 	} while (0);
644 
645 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
646 	       max_size, str);
647 	return -1;
648 }
649 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
650 
651 int record_parse_callchain(const char *arg, struct record_opts *opts)
652 {
653 	char *tok, *name, *saveptr = NULL;
654 	char *buf;
655 	int ret = -1;
656 
657 	/* We need buffer that we know we can write to. */
658 	buf = malloc(strlen(arg) + 1);
659 	if (!buf)
660 		return -ENOMEM;
661 
662 	strcpy(buf, arg);
663 
664 	tok = strtok_r((char *)buf, ",", &saveptr);
665 	name = tok ? : (char *)buf;
666 
667 	do {
668 		/* Framepointer style */
669 		if (!strncmp(name, "fp", sizeof("fp"))) {
670 			if (!strtok_r(NULL, ",", &saveptr)) {
671 				opts->call_graph = CALLCHAIN_FP;
672 				ret = 0;
673 			} else
674 				pr_err("callchain: No more arguments "
675 				       "needed for -g fp\n");
676 			break;
677 
678 #ifdef HAVE_DWARF_UNWIND_SUPPORT
679 		/* Dwarf style */
680 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
681 			const unsigned long default_stack_dump_size = 8192;
682 
683 			ret = 0;
684 			opts->call_graph = CALLCHAIN_DWARF;
685 			opts->stack_dump_size = default_stack_dump_size;
686 
687 			tok = strtok_r(NULL, ",", &saveptr);
688 			if (tok) {
689 				unsigned long size = 0;
690 
691 				ret = get_stack_size(tok, &size);
692 				opts->stack_dump_size = size;
693 			}
694 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
695 		} else {
696 			pr_err("callchain: Unknown --call-graph option "
697 			       "value: %s\n", arg);
698 			break;
699 		}
700 
701 	} while (0);
702 
703 	free(buf);
704 	return ret;
705 }
706 
707 static void callchain_debug(struct record_opts *opts)
708 {
709 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
710 
711 	pr_debug("callchain: type %s\n", str[opts->call_graph]);
712 
713 	if (opts->call_graph == CALLCHAIN_DWARF)
714 		pr_debug("callchain: stack dump size %d\n",
715 			 opts->stack_dump_size);
716 }
717 
718 int record_parse_callchain_opt(const struct option *opt,
719 			       const char *arg,
720 			       int unset)
721 {
722 	struct record_opts *opts = opt->value;
723 	int ret;
724 
725 	opts->call_graph_enabled = !unset;
726 
727 	/* --no-call-graph */
728 	if (unset) {
729 		opts->call_graph = CALLCHAIN_NONE;
730 		pr_debug("callchain: disabled\n");
731 		return 0;
732 	}
733 
734 	ret = record_parse_callchain(arg, opts);
735 	if (!ret)
736 		callchain_debug(opts);
737 
738 	return ret;
739 }
740 
741 int record_callchain_opt(const struct option *opt,
742 			 const char *arg __maybe_unused,
743 			 int unset __maybe_unused)
744 {
745 	struct record_opts *opts = opt->value;
746 
747 	opts->call_graph_enabled = !unset;
748 
749 	if (opts->call_graph == CALLCHAIN_NONE)
750 		opts->call_graph = CALLCHAIN_FP;
751 
752 	callchain_debug(opts);
753 	return 0;
754 }
755 
756 static int perf_record_config(const char *var, const char *value, void *cb)
757 {
758 	struct record *rec = cb;
759 
760 	if (!strcmp(var, "record.call-graph"))
761 		return record_parse_callchain(value, &rec->opts);
762 
763 	return perf_default_config(var, value, cb);
764 }
765 
766 static const char * const record_usage[] = {
767 	"perf record [<options>] [<command>]",
768 	"perf record [<options>] -- <command> [<options>]",
769 	NULL
770 };
771 
772 /*
773  * XXX Ideally would be local to cmd_record() and passed to a record__new
774  * because we need to have access to it in record__exit, that is called
775  * after cmd_record() exits, but since record_options need to be accessible to
776  * builtin-script, leave it here.
777  *
778  * At least we don't ouch it in all the other functions here directly.
779  *
780  * Just say no to tons of global variables, sigh.
781  */
782 static struct record record = {
783 	.opts = {
784 		.mmap_pages	     = UINT_MAX,
785 		.user_freq	     = UINT_MAX,
786 		.user_interval	     = ULLONG_MAX,
787 		.freq		     = 4000,
788 		.target		     = {
789 			.uses_mmap   = true,
790 			.default_per_cpu = true,
791 		},
792 	},
793 };
794 
795 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
796 
797 #ifdef HAVE_DWARF_UNWIND_SUPPORT
798 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
799 #else
800 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
801 #endif
802 
803 /*
804  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
805  * with it and switch to use the library functions in perf_evlist that came
806  * from builtin-record.c, i.e. use record_opts,
807  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
808  * using pipes, etc.
809  */
810 const struct option record_options[] = {
811 	OPT_CALLBACK('e', "event", &record.evlist, "event",
812 		     "event selector. use 'perf list' to list available events",
813 		     parse_events_option),
814 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
815 		     "event filter", parse_filter),
816 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
817 		    "record events on existing process id"),
818 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
819 		    "record events on existing thread id"),
820 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
821 		    "collect data with this RT SCHED_FIFO priority"),
822 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
823 		    "collect data without buffering"),
824 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
825 		    "collect raw sample records from all opened counters"),
826 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
827 			    "system-wide collection from all CPUs"),
828 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
829 		    "list of cpus to monitor"),
830 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
831 	OPT_STRING('o', "output", &record.file.path, "file",
832 		    "output file name"),
833 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
834 			&record.opts.no_inherit_set,
835 			"child tasks do not inherit counters"),
836 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
837 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
838 		     "number of mmap data pages",
839 		     perf_evlist__parse_mmap_pages),
840 	OPT_BOOLEAN(0, "group", &record.opts.group,
841 		    "put the counters into a counter group"),
842 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
843 			   NULL, "enables call-graph recording" ,
844 			   &record_callchain_opt),
845 	OPT_CALLBACK(0, "call-graph", &record.opts,
846 		     "mode[,dump_size]", record_callchain_help,
847 		     &record_parse_callchain_opt),
848 	OPT_INCR('v', "verbose", &verbose,
849 		    "be more verbose (show counter open errors, etc)"),
850 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
851 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
852 		    "per thread counts"),
853 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
854 		    "Sample addresses"),
855 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
856 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
857 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
858 		    "don't sample"),
859 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
860 		    "do not update the buildid cache"),
861 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
862 		    "do not collect buildids in perf.data"),
863 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
864 		     "monitor event in cgroup name only",
865 		     parse_cgroups),
866 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
867 		  "ms to wait before starting measurement after program start"),
868 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
869 		   "user to profile"),
870 
871 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
872 		     "branch any", "sample any taken branches",
873 		     parse_branch_stack),
874 
875 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
876 		     "branch filter mask", "branch stack filter modes",
877 		     parse_branch_stack),
878 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
879 		    "sample by weight (on special events only)"),
880 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
881 		    "sample transaction flags (special events only)"),
882 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
883 		    "use per-thread mmaps"),
884 	OPT_END()
885 };
886 
887 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
888 {
889 	int err = -ENOMEM;
890 	struct record *rec = &record;
891 	char errbuf[BUFSIZ];
892 
893 	rec->evlist = perf_evlist__new();
894 	if (rec->evlist == NULL)
895 		return -ENOMEM;
896 
897 	perf_config(perf_record_config, rec);
898 
899 	argc = parse_options(argc, argv, record_options, record_usage,
900 			    PARSE_OPT_STOP_AT_NON_OPTION);
901 	if (!argc && target__none(&rec->opts.target))
902 		usage_with_options(record_usage, record_options);
903 
904 	if (nr_cgroups && !rec->opts.target.system_wide) {
905 		ui__error("cgroup monitoring only available in"
906 			  " system-wide mode\n");
907 		usage_with_options(record_usage, record_options);
908 	}
909 
910 	symbol__init();
911 
912 	if (symbol_conf.kptr_restrict)
913 		pr_warning(
914 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
915 "check /proc/sys/kernel/kptr_restrict.\n\n"
916 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
917 "file is not found in the buildid cache or in the vmlinux path.\n\n"
918 "Samples in kernel modules won't be resolved at all.\n\n"
919 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
920 "even with a suitable vmlinux or kallsyms file.\n\n");
921 
922 	if (rec->no_buildid_cache || rec->no_buildid)
923 		disable_buildid_cache();
924 
925 	if (rec->evlist->nr_entries == 0 &&
926 	    perf_evlist__add_default(rec->evlist) < 0) {
927 		pr_err("Not enough memory for event selector list\n");
928 		goto out_symbol_exit;
929 	}
930 
931 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
932 		rec->opts.no_inherit = true;
933 
934 	err = target__validate(&rec->opts.target);
935 	if (err) {
936 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
937 		ui__warning("%s", errbuf);
938 	}
939 
940 	err = target__parse_uid(&rec->opts.target);
941 	if (err) {
942 		int saved_errno = errno;
943 
944 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
945 		ui__error("%s", errbuf);
946 
947 		err = -saved_errno;
948 		goto out_symbol_exit;
949 	}
950 
951 	err = -ENOMEM;
952 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
953 		usage_with_options(record_usage, record_options);
954 
955 	if (record_opts__config(&rec->opts)) {
956 		err = -EINVAL;
957 		goto out_symbol_exit;
958 	}
959 
960 	err = __cmd_record(&record, argc, argv);
961 out_symbol_exit:
962 	perf_evlist__delete(rec->evlist);
963 	symbol__exit();
964 	return err;
965 }
966