xref: /openbmc/linux/tools/perf/builtin-record.c (revision d2999e1b)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 #include "util/data.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 
34 struct record {
35 	struct perf_tool	tool;
36 	struct record_opts	opts;
37 	u64			bytes_written;
38 	struct perf_data_file	file;
39 	struct perf_evlist	*evlist;
40 	struct perf_session	*session;
41 	const char		*progname;
42 	int			realtime_prio;
43 	bool			no_buildid;
44 	bool			no_buildid_cache;
45 	long			samples;
46 };
47 
48 static int record__write(struct record *rec, void *bf, size_t size)
49 {
50 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
51 		pr_err("failed to write perf data, error: %m\n");
52 		return -1;
53 	}
54 
55 	rec->bytes_written += size;
56 	return 0;
57 }
58 
59 static int process_synthesized_event(struct perf_tool *tool,
60 				     union perf_event *event,
61 				     struct perf_sample *sample __maybe_unused,
62 				     struct machine *machine __maybe_unused)
63 {
64 	struct record *rec = container_of(tool, struct record, tool);
65 	return record__write(rec, event, event->header.size);
66 }
67 
68 static int record__mmap_read(struct record *rec, struct perf_mmap *md)
69 {
70 	unsigned int head = perf_mmap__read_head(md);
71 	unsigned int old = md->prev;
72 	unsigned char *data = md->base + page_size;
73 	unsigned long size;
74 	void *buf;
75 	int rc = 0;
76 
77 	if (old == head)
78 		return 0;
79 
80 	rec->samples++;
81 
82 	size = head - old;
83 
84 	if ((old & md->mask) + size != (head & md->mask)) {
85 		buf = &data[old & md->mask];
86 		size = md->mask + 1 - (old & md->mask);
87 		old += size;
88 
89 		if (record__write(rec, buf, size) < 0) {
90 			rc = -1;
91 			goto out;
92 		}
93 	}
94 
95 	buf = &data[old & md->mask];
96 	size = head - old;
97 	old += size;
98 
99 	if (record__write(rec, buf, size) < 0) {
100 		rc = -1;
101 		goto out;
102 	}
103 
104 	md->prev = old;
105 	perf_mmap__write_tail(md, old);
106 
107 out:
108 	return rc;
109 }
110 
111 static volatile int done = 0;
112 static volatile int signr = -1;
113 static volatile int child_finished = 0;
114 
115 static void sig_handler(int sig)
116 {
117 	if (sig == SIGCHLD)
118 		child_finished = 1;
119 	else
120 		signr = sig;
121 
122 	done = 1;
123 }
124 
125 static void record__sig_exit(void)
126 {
127 	if (signr == -1)
128 		return;
129 
130 	signal(signr, SIG_DFL);
131 	raise(signr);
132 }
133 
134 static int record__open(struct record *rec)
135 {
136 	char msg[512];
137 	struct perf_evsel *pos;
138 	struct perf_evlist *evlist = rec->evlist;
139 	struct perf_session *session = rec->session;
140 	struct record_opts *opts = &rec->opts;
141 	int rc = 0;
142 
143 	perf_evlist__config(evlist, opts);
144 
145 	evlist__for_each(evlist, pos) {
146 try_again:
147 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
148 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
149 				if (verbose)
150 					ui__warning("%s\n", msg);
151 				goto try_again;
152 			}
153 
154 			rc = -errno;
155 			perf_evsel__open_strerror(pos, &opts->target,
156 						  errno, msg, sizeof(msg));
157 			ui__error("%s\n", msg);
158 			goto out;
159 		}
160 	}
161 
162 	if (perf_evlist__apply_filters(evlist)) {
163 		error("failed to set filter with %d (%s)\n", errno,
164 			strerror(errno));
165 		rc = -1;
166 		goto out;
167 	}
168 
169 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
170 		if (errno == EPERM) {
171 			pr_err("Permission error mapping pages.\n"
172 			       "Consider increasing "
173 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
174 			       "or try again with a smaller value of -m/--mmap_pages.\n"
175 			       "(current value: %u)\n", opts->mmap_pages);
176 			rc = -errno;
177 		} else {
178 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
179 			rc = -errno;
180 		}
181 		goto out;
182 	}
183 
184 	session->evlist = evlist;
185 	perf_session__set_id_hdr_size(session);
186 out:
187 	return rc;
188 }
189 
190 static int process_buildids(struct record *rec)
191 {
192 	struct perf_data_file *file  = &rec->file;
193 	struct perf_session *session = rec->session;
194 	u64 start = session->header.data_offset;
195 
196 	u64 size = lseek(file->fd, 0, SEEK_CUR);
197 	if (size == 0)
198 		return 0;
199 
200 	return __perf_session__process_events(session, start,
201 					      size - start,
202 					      size, &build_id__mark_dso_hit_ops);
203 }
204 
205 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
206 {
207 	int err;
208 	struct perf_tool *tool = data;
209 	/*
210 	 *As for guest kernel when processing subcommand record&report,
211 	 *we arrange module mmap prior to guest kernel mmap and trigger
212 	 *a preload dso because default guest module symbols are loaded
213 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
214 	 *method is used to avoid symbol missing when the first addr is
215 	 *in module instead of in guest kernel.
216 	 */
217 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
218 					     machine);
219 	if (err < 0)
220 		pr_err("Couldn't record guest kernel [%d]'s reference"
221 		       " relocation symbol.\n", machine->pid);
222 
223 	/*
224 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
225 	 * have no _text sometimes.
226 	 */
227 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
228 						 machine);
229 	if (err < 0)
230 		pr_err("Couldn't record guest kernel [%d]'s reference"
231 		       " relocation symbol.\n", machine->pid);
232 }
233 
234 static struct perf_event_header finished_round_event = {
235 	.size = sizeof(struct perf_event_header),
236 	.type = PERF_RECORD_FINISHED_ROUND,
237 };
238 
239 static int record__mmap_read_all(struct record *rec)
240 {
241 	int i;
242 	int rc = 0;
243 
244 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
245 		if (rec->evlist->mmap[i].base) {
246 			if (record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
247 				rc = -1;
248 				goto out;
249 			}
250 		}
251 	}
252 
253 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
254 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
255 
256 out:
257 	return rc;
258 }
259 
260 static void record__init_features(struct record *rec)
261 {
262 	struct perf_session *session = rec->session;
263 	int feat;
264 
265 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
266 		perf_header__set_feat(&session->header, feat);
267 
268 	if (rec->no_buildid)
269 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
270 
271 	if (!have_tracepoints(&rec->evlist->entries))
272 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
273 
274 	if (!rec->opts.branch_stack)
275 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
276 }
277 
278 static volatile int workload_exec_errno;
279 
280 /*
281  * perf_evlist__prepare_workload will send a SIGUSR1
282  * if the fork fails, since we asked by setting its
283  * want_signal to true.
284  */
285 static void workload_exec_failed_signal(int signo __maybe_unused,
286 					siginfo_t *info,
287 					void *ucontext __maybe_unused)
288 {
289 	workload_exec_errno = info->si_value.sival_int;
290 	done = 1;
291 	child_finished = 1;
292 }
293 
294 static int __cmd_record(struct record *rec, int argc, const char **argv)
295 {
296 	int err;
297 	int status = 0;
298 	unsigned long waking = 0;
299 	const bool forks = argc > 0;
300 	struct machine *machine;
301 	struct perf_tool *tool = &rec->tool;
302 	struct record_opts *opts = &rec->opts;
303 	struct perf_data_file *file = &rec->file;
304 	struct perf_session *session;
305 	bool disabled = false;
306 
307 	rec->progname = argv[0];
308 
309 	atexit(record__sig_exit);
310 	signal(SIGCHLD, sig_handler);
311 	signal(SIGINT, sig_handler);
312 	signal(SIGTERM, sig_handler);
313 
314 	session = perf_session__new(file, false, NULL);
315 	if (session == NULL) {
316 		pr_err("Perf session creation failed.\n");
317 		return -1;
318 	}
319 
320 	rec->session = session;
321 
322 	record__init_features(rec);
323 
324 	if (forks) {
325 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
326 						    argv, file->is_pipe,
327 						    workload_exec_failed_signal);
328 		if (err < 0) {
329 			pr_err("Couldn't run the workload!\n");
330 			status = err;
331 			goto out_delete_session;
332 		}
333 	}
334 
335 	if (record__open(rec) != 0) {
336 		err = -1;
337 		goto out_child;
338 	}
339 
340 	if (!rec->evlist->nr_groups)
341 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
342 
343 	if (file->is_pipe) {
344 		err = perf_header__write_pipe(file->fd);
345 		if (err < 0)
346 			goto out_child;
347 	} else {
348 		err = perf_session__write_header(session, rec->evlist,
349 						 file->fd, false);
350 		if (err < 0)
351 			goto out_child;
352 	}
353 
354 	if (!rec->no_buildid
355 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
356 		pr_err("Couldn't generate buildids. "
357 		       "Use --no-buildid to profile anyway.\n");
358 		err = -1;
359 		goto out_child;
360 	}
361 
362 	machine = &session->machines.host;
363 
364 	if (file->is_pipe) {
365 		err = perf_event__synthesize_attrs(tool, session,
366 						   process_synthesized_event);
367 		if (err < 0) {
368 			pr_err("Couldn't synthesize attrs.\n");
369 			goto out_child;
370 		}
371 
372 		if (have_tracepoints(&rec->evlist->entries)) {
373 			/*
374 			 * FIXME err <= 0 here actually means that
375 			 * there were no tracepoints so its not really
376 			 * an error, just that we don't need to
377 			 * synthesize anything.  We really have to
378 			 * return this more properly and also
379 			 * propagate errors that now are calling die()
380 			 */
381 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
382 								  process_synthesized_event);
383 			if (err <= 0) {
384 				pr_err("Couldn't record tracing data.\n");
385 				goto out_child;
386 			}
387 			rec->bytes_written += err;
388 		}
389 	}
390 
391 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
392 						 machine);
393 	if (err < 0)
394 		pr_err("Couldn't record kernel reference relocation symbol\n"
395 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
396 		       "Check /proc/kallsyms permission or run as root.\n");
397 
398 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
399 					     machine);
400 	if (err < 0)
401 		pr_err("Couldn't record kernel module information.\n"
402 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
403 		       "Check /proc/modules permission or run as root.\n");
404 
405 	if (perf_guest) {
406 		machines__process_guests(&session->machines,
407 					 perf_event__synthesize_guest_os, tool);
408 	}
409 
410 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
411 					    process_synthesized_event, opts->sample_address);
412 	if (err != 0)
413 		goto out_child;
414 
415 	if (rec->realtime_prio) {
416 		struct sched_param param;
417 
418 		param.sched_priority = rec->realtime_prio;
419 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
420 			pr_err("Could not set realtime priority.\n");
421 			err = -1;
422 			goto out_child;
423 		}
424 	}
425 
426 	/*
427 	 * When perf is starting the traced process, all the events
428 	 * (apart from group members) have enable_on_exec=1 set,
429 	 * so don't spoil it by prematurely enabling them.
430 	 */
431 	if (!target__none(&opts->target) && !opts->initial_delay)
432 		perf_evlist__enable(rec->evlist);
433 
434 	/*
435 	 * Let the child rip
436 	 */
437 	if (forks)
438 		perf_evlist__start_workload(rec->evlist);
439 
440 	if (opts->initial_delay) {
441 		usleep(opts->initial_delay * 1000);
442 		perf_evlist__enable(rec->evlist);
443 	}
444 
445 	for (;;) {
446 		int hits = rec->samples;
447 
448 		if (record__mmap_read_all(rec) < 0) {
449 			err = -1;
450 			goto out_child;
451 		}
452 
453 		if (hits == rec->samples) {
454 			if (done)
455 				break;
456 			err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
457 			/*
458 			 * Propagate error, only if there's any. Ignore positive
459 			 * number of returned events and interrupt error.
460 			 */
461 			if (err > 0 || (err < 0 && errno == EINTR))
462 				err = 0;
463 			waking++;
464 		}
465 
466 		/*
467 		 * When perf is starting the traced process, at the end events
468 		 * die with the process and we wait for that. Thus no need to
469 		 * disable events in this case.
470 		 */
471 		if (done && !disabled && !target__none(&opts->target)) {
472 			perf_evlist__disable(rec->evlist);
473 			disabled = true;
474 		}
475 	}
476 
477 	if (forks && workload_exec_errno) {
478 		char msg[512];
479 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
480 		pr_err("Workload failed: %s\n", emsg);
481 		err = -1;
482 		goto out_child;
483 	}
484 
485 	if (!quiet) {
486 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
487 
488 		/*
489 		 * Approximate RIP event size: 24 bytes.
490 		 */
491 		fprintf(stderr,
492 			"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
493 			(double)rec->bytes_written / 1024.0 / 1024.0,
494 			file->path,
495 			rec->bytes_written / 24);
496 	}
497 
498 out_child:
499 	if (forks) {
500 		int exit_status;
501 
502 		if (!child_finished)
503 			kill(rec->evlist->workload.pid, SIGTERM);
504 
505 		wait(&exit_status);
506 
507 		if (err < 0)
508 			status = err;
509 		else if (WIFEXITED(exit_status))
510 			status = WEXITSTATUS(exit_status);
511 		else if (WIFSIGNALED(exit_status))
512 			signr = WTERMSIG(exit_status);
513 	} else
514 		status = err;
515 
516 	if (!err && !file->is_pipe) {
517 		rec->session->header.data_size += rec->bytes_written;
518 
519 		if (!rec->no_buildid)
520 			process_buildids(rec);
521 		perf_session__write_header(rec->session, rec->evlist,
522 					   file->fd, true);
523 	}
524 
525 out_delete_session:
526 	perf_session__delete(session);
527 	return status;
528 }
529 
530 #define BRANCH_OPT(n, m) \
531 	{ .name = n, .mode = (m) }
532 
533 #define BRANCH_END { .name = NULL }
534 
535 struct branch_mode {
536 	const char *name;
537 	int mode;
538 };
539 
540 static const struct branch_mode branch_modes[] = {
541 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
542 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
543 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
544 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
545 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
546 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
547 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
548 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
549 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
550 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
551 	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
552 	BRANCH_END
553 };
554 
555 static int
556 parse_branch_stack(const struct option *opt, const char *str, int unset)
557 {
558 #define ONLY_PLM \
559 	(PERF_SAMPLE_BRANCH_USER	|\
560 	 PERF_SAMPLE_BRANCH_KERNEL	|\
561 	 PERF_SAMPLE_BRANCH_HV)
562 
563 	uint64_t *mode = (uint64_t *)opt->value;
564 	const struct branch_mode *br;
565 	char *s, *os = NULL, *p;
566 	int ret = -1;
567 
568 	if (unset)
569 		return 0;
570 
571 	/*
572 	 * cannot set it twice, -b + --branch-filter for instance
573 	 */
574 	if (*mode)
575 		return -1;
576 
577 	/* str may be NULL in case no arg is passed to -b */
578 	if (str) {
579 		/* because str is read-only */
580 		s = os = strdup(str);
581 		if (!s)
582 			return -1;
583 
584 		for (;;) {
585 			p = strchr(s, ',');
586 			if (p)
587 				*p = '\0';
588 
589 			for (br = branch_modes; br->name; br++) {
590 				if (!strcasecmp(s, br->name))
591 					break;
592 			}
593 			if (!br->name) {
594 				ui__warning("unknown branch filter %s,"
595 					    " check man page\n", s);
596 				goto error;
597 			}
598 
599 			*mode |= br->mode;
600 
601 			if (!p)
602 				break;
603 
604 			s = p + 1;
605 		}
606 	}
607 	ret = 0;
608 
609 	/* default to any branch */
610 	if ((*mode & ~ONLY_PLM) == 0) {
611 		*mode = PERF_SAMPLE_BRANCH_ANY;
612 	}
613 error:
614 	free(os);
615 	return ret;
616 }
617 
618 #ifdef HAVE_DWARF_UNWIND_SUPPORT
619 static int get_stack_size(char *str, unsigned long *_size)
620 {
621 	char *endptr;
622 	unsigned long size;
623 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
624 
625 	size = strtoul(str, &endptr, 0);
626 
627 	do {
628 		if (*endptr)
629 			break;
630 
631 		size = round_up(size, sizeof(u64));
632 		if (!size || size > max_size)
633 			break;
634 
635 		*_size = size;
636 		return 0;
637 
638 	} while (0);
639 
640 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
641 	       max_size, str);
642 	return -1;
643 }
644 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
645 
646 int record_parse_callchain(const char *arg, struct record_opts *opts)
647 {
648 	char *tok, *name, *saveptr = NULL;
649 	char *buf;
650 	int ret = -1;
651 
652 	/* We need buffer that we know we can write to. */
653 	buf = malloc(strlen(arg) + 1);
654 	if (!buf)
655 		return -ENOMEM;
656 
657 	strcpy(buf, arg);
658 
659 	tok = strtok_r((char *)buf, ",", &saveptr);
660 	name = tok ? : (char *)buf;
661 
662 	do {
663 		/* Framepointer style */
664 		if (!strncmp(name, "fp", sizeof("fp"))) {
665 			if (!strtok_r(NULL, ",", &saveptr)) {
666 				opts->call_graph = CALLCHAIN_FP;
667 				ret = 0;
668 			} else
669 				pr_err("callchain: No more arguments "
670 				       "needed for -g fp\n");
671 			break;
672 
673 #ifdef HAVE_DWARF_UNWIND_SUPPORT
674 		/* Dwarf style */
675 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
676 			const unsigned long default_stack_dump_size = 8192;
677 
678 			ret = 0;
679 			opts->call_graph = CALLCHAIN_DWARF;
680 			opts->stack_dump_size = default_stack_dump_size;
681 
682 			tok = strtok_r(NULL, ",", &saveptr);
683 			if (tok) {
684 				unsigned long size = 0;
685 
686 				ret = get_stack_size(tok, &size);
687 				opts->stack_dump_size = size;
688 			}
689 #endif /* HAVE_DWARF_UNWIND_SUPPORT */
690 		} else {
691 			pr_err("callchain: Unknown --call-graph option "
692 			       "value: %s\n", arg);
693 			break;
694 		}
695 
696 	} while (0);
697 
698 	free(buf);
699 	return ret;
700 }
701 
702 static void callchain_debug(struct record_opts *opts)
703 {
704 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
705 
706 	pr_debug("callchain: type %s\n", str[opts->call_graph]);
707 
708 	if (opts->call_graph == CALLCHAIN_DWARF)
709 		pr_debug("callchain: stack dump size %d\n",
710 			 opts->stack_dump_size);
711 }
712 
713 int record_parse_callchain_opt(const struct option *opt,
714 			       const char *arg,
715 			       int unset)
716 {
717 	struct record_opts *opts = opt->value;
718 	int ret;
719 
720 	opts->call_graph_enabled = !unset;
721 
722 	/* --no-call-graph */
723 	if (unset) {
724 		opts->call_graph = CALLCHAIN_NONE;
725 		pr_debug("callchain: disabled\n");
726 		return 0;
727 	}
728 
729 	ret = record_parse_callchain(arg, opts);
730 	if (!ret)
731 		callchain_debug(opts);
732 
733 	return ret;
734 }
735 
736 int record_callchain_opt(const struct option *opt,
737 			 const char *arg __maybe_unused,
738 			 int unset __maybe_unused)
739 {
740 	struct record_opts *opts = opt->value;
741 
742 	opts->call_graph_enabled = !unset;
743 
744 	if (opts->call_graph == CALLCHAIN_NONE)
745 		opts->call_graph = CALLCHAIN_FP;
746 
747 	callchain_debug(opts);
748 	return 0;
749 }
750 
751 static int perf_record_config(const char *var, const char *value, void *cb)
752 {
753 	struct record *rec = cb;
754 
755 	if (!strcmp(var, "record.call-graph"))
756 		return record_parse_callchain(value, &rec->opts);
757 
758 	return perf_default_config(var, value, cb);
759 }
760 
761 static const char * const record_usage[] = {
762 	"perf record [<options>] [<command>]",
763 	"perf record [<options>] -- <command> [<options>]",
764 	NULL
765 };
766 
767 /*
768  * XXX Ideally would be local to cmd_record() and passed to a record__new
769  * because we need to have access to it in record__exit, that is called
770  * after cmd_record() exits, but since record_options need to be accessible to
771  * builtin-script, leave it here.
772  *
773  * At least we don't ouch it in all the other functions here directly.
774  *
775  * Just say no to tons of global variables, sigh.
776  */
777 static struct record record = {
778 	.opts = {
779 		.mmap_pages	     = UINT_MAX,
780 		.user_freq	     = UINT_MAX,
781 		.user_interval	     = ULLONG_MAX,
782 		.freq		     = 4000,
783 		.target		     = {
784 			.uses_mmap   = true,
785 			.default_per_cpu = true,
786 		},
787 	},
788 };
789 
790 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
791 
792 #ifdef HAVE_DWARF_UNWIND_SUPPORT
793 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
794 #else
795 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
796 #endif
797 
798 /*
799  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
800  * with it and switch to use the library functions in perf_evlist that came
801  * from builtin-record.c, i.e. use record_opts,
802  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
803  * using pipes, etc.
804  */
805 const struct option record_options[] = {
806 	OPT_CALLBACK('e', "event", &record.evlist, "event",
807 		     "event selector. use 'perf list' to list available events",
808 		     parse_events_option),
809 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
810 		     "event filter", parse_filter),
811 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
812 		    "record events on existing process id"),
813 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
814 		    "record events on existing thread id"),
815 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
816 		    "collect data with this RT SCHED_FIFO priority"),
817 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
818 		    "collect data without buffering"),
819 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
820 		    "collect raw sample records from all opened counters"),
821 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
822 			    "system-wide collection from all CPUs"),
823 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
824 		    "list of cpus to monitor"),
825 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
826 	OPT_STRING('o', "output", &record.file.path, "file",
827 		    "output file name"),
828 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
829 			&record.opts.no_inherit_set,
830 			"child tasks do not inherit counters"),
831 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
832 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
833 		     "number of mmap data pages",
834 		     perf_evlist__parse_mmap_pages),
835 	OPT_BOOLEAN(0, "group", &record.opts.group,
836 		    "put the counters into a counter group"),
837 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
838 			   NULL, "enables call-graph recording" ,
839 			   &record_callchain_opt),
840 	OPT_CALLBACK(0, "call-graph", &record.opts,
841 		     "mode[,dump_size]", record_callchain_help,
842 		     &record_parse_callchain_opt),
843 	OPT_INCR('v', "verbose", &verbose,
844 		    "be more verbose (show counter open errors, etc)"),
845 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
846 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
847 		    "per thread counts"),
848 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
849 		    "Sample addresses"),
850 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
851 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
852 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
853 		    "don't sample"),
854 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
855 		    "do not update the buildid cache"),
856 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
857 		    "do not collect buildids in perf.data"),
858 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
859 		     "monitor event in cgroup name only",
860 		     parse_cgroups),
861 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
862 		  "ms to wait before starting measurement after program start"),
863 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
864 		   "user to profile"),
865 
866 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
867 		     "branch any", "sample any taken branches",
868 		     parse_branch_stack),
869 
870 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
871 		     "branch filter mask", "branch stack filter modes",
872 		     parse_branch_stack),
873 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
874 		    "sample by weight (on special events only)"),
875 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
876 		    "sample transaction flags (special events only)"),
877 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
878 		    "use per-thread mmaps"),
879 	OPT_END()
880 };
881 
882 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
883 {
884 	int err = -ENOMEM;
885 	struct record *rec = &record;
886 	char errbuf[BUFSIZ];
887 
888 	rec->evlist = perf_evlist__new();
889 	if (rec->evlist == NULL)
890 		return -ENOMEM;
891 
892 	perf_config(perf_record_config, rec);
893 
894 	argc = parse_options(argc, argv, record_options, record_usage,
895 			    PARSE_OPT_STOP_AT_NON_OPTION);
896 	if (!argc && target__none(&rec->opts.target))
897 		usage_with_options(record_usage, record_options);
898 
899 	if (nr_cgroups && !rec->opts.target.system_wide) {
900 		ui__error("cgroup monitoring only available in"
901 			  " system-wide mode\n");
902 		usage_with_options(record_usage, record_options);
903 	}
904 
905 	symbol__init();
906 
907 	if (symbol_conf.kptr_restrict)
908 		pr_warning(
909 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
910 "check /proc/sys/kernel/kptr_restrict.\n\n"
911 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
912 "file is not found in the buildid cache or in the vmlinux path.\n\n"
913 "Samples in kernel modules won't be resolved at all.\n\n"
914 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
915 "even with a suitable vmlinux or kallsyms file.\n\n");
916 
917 	if (rec->no_buildid_cache || rec->no_buildid)
918 		disable_buildid_cache();
919 
920 	if (rec->evlist->nr_entries == 0 &&
921 	    perf_evlist__add_default(rec->evlist) < 0) {
922 		pr_err("Not enough memory for event selector list\n");
923 		goto out_symbol_exit;
924 	}
925 
926 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
927 		rec->opts.no_inherit = true;
928 
929 	err = target__validate(&rec->opts.target);
930 	if (err) {
931 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
932 		ui__warning("%s", errbuf);
933 	}
934 
935 	err = target__parse_uid(&rec->opts.target);
936 	if (err) {
937 		int saved_errno = errno;
938 
939 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
940 		ui__error("%s", errbuf);
941 
942 		err = -saved_errno;
943 		goto out_symbol_exit;
944 	}
945 
946 	err = -ENOMEM;
947 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
948 		usage_with_options(record_usage, record_options);
949 
950 	if (record_opts__config(&rec->opts)) {
951 		err = -EINVAL;
952 		goto out_symbol_exit;
953 	}
954 
955 	err = __cmd_record(&record, argc, argv);
956 out_symbol_exit:
957 	perf_evlist__delete(rec->evlist);
958 	symbol__exit();
959 	return err;
960 }
961