xref: /openbmc/linux/tools/perf/builtin-record.c (revision 95e9fd10)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33 
34 enum write_mode_t {
35 	WRITE_FORCE,
36 	WRITE_APPEND
37 };
38 
39 struct perf_record {
40 	struct perf_tool	tool;
41 	struct perf_record_opts	opts;
42 	u64			bytes_written;
43 	const char		*output_name;
44 	struct perf_evlist	*evlist;
45 	struct perf_session	*session;
46 	const char		*progname;
47 	int			output;
48 	unsigned int		page_size;
49 	int			realtime_prio;
50 	enum write_mode_t	write_mode;
51 	bool			no_buildid;
52 	bool			no_buildid_cache;
53 	bool			force;
54 	bool			file_new;
55 	bool			append_file;
56 	long			samples;
57 	off_t			post_processing_offset;
58 };
59 
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62 	rec->bytes_written += size;
63 }
64 
65 static void write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67 	while (size) {
68 		int ret = write(rec->output, buf, size);
69 
70 		if (ret < 0)
71 			die("failed to write");
72 
73 		size -= ret;
74 		buf += ret;
75 
76 		rec->bytes_written += ret;
77 	}
78 }
79 
80 static int process_synthesized_event(struct perf_tool *tool,
81 				     union perf_event *event,
82 				     struct perf_sample *sample __used,
83 				     struct machine *machine __used)
84 {
85 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
86 	write_output(rec, event, event->header.size);
87 	return 0;
88 }
89 
90 static void perf_record__mmap_read(struct perf_record *rec,
91 				   struct perf_mmap *md)
92 {
93 	unsigned int head = perf_mmap__read_head(md);
94 	unsigned int old = md->prev;
95 	unsigned char *data = md->base + rec->page_size;
96 	unsigned long size;
97 	void *buf;
98 
99 	if (old == head)
100 		return;
101 
102 	rec->samples++;
103 
104 	size = head - old;
105 
106 	if ((old & md->mask) + size != (head & md->mask)) {
107 		buf = &data[old & md->mask];
108 		size = md->mask + 1 - (old & md->mask);
109 		old += size;
110 
111 		write_output(rec, buf, size);
112 	}
113 
114 	buf = &data[old & md->mask];
115 	size = head - old;
116 	old += size;
117 
118 	write_output(rec, buf, size);
119 
120 	md->prev = old;
121 	perf_mmap__write_tail(md, old);
122 }
123 
124 static volatile int done = 0;
125 static volatile int signr = -1;
126 static volatile int child_finished = 0;
127 
128 static void sig_handler(int sig)
129 {
130 	if (sig == SIGCHLD)
131 		child_finished = 1;
132 
133 	done = 1;
134 	signr = sig;
135 }
136 
137 static void perf_record__sig_exit(int exit_status __used, void *arg)
138 {
139 	struct perf_record *rec = arg;
140 	int status;
141 
142 	if (rec->evlist->workload.pid > 0) {
143 		if (!child_finished)
144 			kill(rec->evlist->workload.pid, SIGTERM);
145 
146 		wait(&status);
147 		if (WIFSIGNALED(status))
148 			psignal(WTERMSIG(status), rec->progname);
149 	}
150 
151 	if (signr == -1 || signr == SIGUSR1)
152 		return;
153 
154 	signal(signr, SIG_DFL);
155 	kill(getpid(), signr);
156 }
157 
158 static bool perf_evlist__equal(struct perf_evlist *evlist,
159 			       struct perf_evlist *other)
160 {
161 	struct perf_evsel *pos, *pair;
162 
163 	if (evlist->nr_entries != other->nr_entries)
164 		return false;
165 
166 	pair = list_entry(other->entries.next, struct perf_evsel, node);
167 
168 	list_for_each_entry(pos, &evlist->entries, node) {
169 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
170 			return false;
171 		pair = list_entry(pair->node.next, struct perf_evsel, node);
172 	}
173 
174 	return true;
175 }
176 
177 static void perf_record__open(struct perf_record *rec)
178 {
179 	struct perf_evsel *pos, *first;
180 	struct perf_evlist *evlist = rec->evlist;
181 	struct perf_session *session = rec->session;
182 	struct perf_record_opts *opts = &rec->opts;
183 
184 	first = list_entry(evlist->entries.next, struct perf_evsel, node);
185 
186 	perf_evlist__config_attrs(evlist, opts);
187 
188 	list_for_each_entry(pos, &evlist->entries, node) {
189 		struct perf_event_attr *attr = &pos->attr;
190 		struct xyarray *group_fd = NULL;
191 		/*
192 		 * Check if parse_single_tracepoint_event has already asked for
193 		 * PERF_SAMPLE_TIME.
194 		 *
195 		 * XXX this is kludgy but short term fix for problems introduced by
196 		 * eac23d1c that broke 'perf script' by having different sample_types
197 		 * when using multiple tracepoint events when we use a perf binary
198 		 * that tries to use sample_id_all on an older kernel.
199 		 *
200 		 * We need to move counter creation to perf_session, support
201 		 * different sample_types, etc.
202 		 */
203 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
204 
205 		if (opts->group && pos != first)
206 			group_fd = first->fd;
207 fallback_missing_features:
208 		if (opts->exclude_guest_missing)
209 			attr->exclude_guest = attr->exclude_host = 0;
210 retry_sample_id:
211 		attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
212 try_again:
213 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
214 				     opts->group, group_fd) < 0) {
215 			int err = errno;
216 
217 			if (err == EPERM || err == EACCES) {
218 				ui__error_paranoid();
219 				exit(EXIT_FAILURE);
220 			} else if (err ==  ENODEV && opts->target.cpu_list) {
221 				die("No such device - did you specify"
222 					" an out-of-range profile CPU?\n");
223 			} else if (err == EINVAL) {
224 				if (!opts->exclude_guest_missing &&
225 				    (attr->exclude_guest || attr->exclude_host)) {
226 					pr_debug("Old kernel, cannot exclude "
227 						 "guest or host samples.\n");
228 					opts->exclude_guest_missing = true;
229 					goto fallback_missing_features;
230 				} else if (!opts->sample_id_all_missing) {
231 					/*
232 					 * Old kernel, no attr->sample_id_type_all field
233 					 */
234 					opts->sample_id_all_missing = true;
235 					if (!opts->sample_time && !opts->raw_samples && !time_needed)
236 						attr->sample_type &= ~PERF_SAMPLE_TIME;
237 
238 					goto retry_sample_id;
239 				}
240 			}
241 
242 			/*
243 			 * If it's cycles then fall back to hrtimer
244 			 * based cpu-clock-tick sw counter, which
245 			 * is always available even if no PMU support.
246 			 *
247 			 * PPC returns ENXIO until 2.6.37 (behavior changed
248 			 * with commit b0a873e).
249 			 */
250 			if ((err == ENOENT || err == ENXIO)
251 					&& attr->type == PERF_TYPE_HARDWARE
252 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
253 
254 				if (verbose)
255 					ui__warning("The cycles event is not supported, "
256 						    "trying to fall back to cpu-clock-ticks\n");
257 				attr->type = PERF_TYPE_SOFTWARE;
258 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
259 				if (pos->name) {
260 					free(pos->name);
261 					pos->name = NULL;
262 				}
263 				goto try_again;
264 			}
265 
266 			if (err == ENOENT) {
267 				ui__error("The %s event is not supported.\n",
268 					  perf_evsel__name(pos));
269 				exit(EXIT_FAILURE);
270 			}
271 
272 			printf("\n");
273 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
274 			      err, strerror(err));
275 
276 #if defined(__i386__) || defined(__x86_64__)
277 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
278 				die("No hardware sampling interrupt available."
279 				    " No APIC? If so then you can boot the kernel"
280 				    " with the \"lapic\" boot parameter to"
281 				    " force-enable it.\n");
282 #endif
283 
284 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
285 		}
286 	}
287 
288 	if (perf_evlist__set_filters(evlist)) {
289 		error("failed to set filter with %d (%s)\n", errno,
290 			strerror(errno));
291 		exit(-1);
292 	}
293 
294 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
295 		if (errno == EPERM)
296 			die("Permission error mapping pages.\n"
297 			    "Consider increasing "
298 			    "/proc/sys/kernel/perf_event_mlock_kb,\n"
299 			    "or try again with a smaller value of -m/--mmap_pages.\n"
300 			    "(current value: %d)\n", opts->mmap_pages);
301 		else if (!is_power_of_2(opts->mmap_pages))
302 			die("--mmap_pages/-m value must be a power of two.");
303 
304 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
305 	}
306 
307 	if (rec->file_new)
308 		session->evlist = evlist;
309 	else {
310 		if (!perf_evlist__equal(session->evlist, evlist)) {
311 			fprintf(stderr, "incompatible append\n");
312 			exit(-1);
313 		}
314  	}
315 
316 	perf_session__set_id_hdr_size(session);
317 }
318 
319 static int process_buildids(struct perf_record *rec)
320 {
321 	u64 size = lseek(rec->output, 0, SEEK_CUR);
322 
323 	if (size == 0)
324 		return 0;
325 
326 	rec->session->fd = rec->output;
327 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
328 					      size - rec->post_processing_offset,
329 					      size, &build_id__mark_dso_hit_ops);
330 }
331 
332 static void perf_record__exit(int status __used, void *arg)
333 {
334 	struct perf_record *rec = arg;
335 
336 	if (!rec->opts.pipe_output) {
337 		rec->session->header.data_size += rec->bytes_written;
338 
339 		if (!rec->no_buildid)
340 			process_buildids(rec);
341 		perf_session__write_header(rec->session, rec->evlist,
342 					   rec->output, true);
343 		perf_session__delete(rec->session);
344 		perf_evlist__delete(rec->evlist);
345 		symbol__exit();
346 	}
347 }
348 
349 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
350 {
351 	int err;
352 	struct perf_tool *tool = data;
353 
354 	if (machine__is_host(machine))
355 		return;
356 
357 	/*
358 	 *As for guest kernel when processing subcommand record&report,
359 	 *we arrange module mmap prior to guest kernel mmap and trigger
360 	 *a preload dso because default guest module symbols are loaded
361 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
362 	 *method is used to avoid symbol missing when the first addr is
363 	 *in module instead of in guest kernel.
364 	 */
365 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
366 					     machine);
367 	if (err < 0)
368 		pr_err("Couldn't record guest kernel [%d]'s reference"
369 		       " relocation symbol.\n", machine->pid);
370 
371 	/*
372 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
373 	 * have no _text sometimes.
374 	 */
375 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
376 						 machine, "_text");
377 	if (err < 0)
378 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
379 							 machine, "_stext");
380 	if (err < 0)
381 		pr_err("Couldn't record guest kernel [%d]'s reference"
382 		       " relocation symbol.\n", machine->pid);
383 }
384 
385 static struct perf_event_header finished_round_event = {
386 	.size = sizeof(struct perf_event_header),
387 	.type = PERF_RECORD_FINISHED_ROUND,
388 };
389 
390 static void perf_record__mmap_read_all(struct perf_record *rec)
391 {
392 	int i;
393 
394 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
395 		if (rec->evlist->mmap[i].base)
396 			perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
397 	}
398 
399 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
400 		write_output(rec, &finished_round_event, sizeof(finished_round_event));
401 }
402 
403 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
404 {
405 	struct stat st;
406 	int flags;
407 	int err, output, feat;
408 	unsigned long waking = 0;
409 	const bool forks = argc > 0;
410 	struct machine *machine;
411 	struct perf_tool *tool = &rec->tool;
412 	struct perf_record_opts *opts = &rec->opts;
413 	struct perf_evlist *evsel_list = rec->evlist;
414 	const char *output_name = rec->output_name;
415 	struct perf_session *session;
416 
417 	rec->progname = argv[0];
418 
419 	rec->page_size = sysconf(_SC_PAGE_SIZE);
420 
421 	on_exit(perf_record__sig_exit, rec);
422 	signal(SIGCHLD, sig_handler);
423 	signal(SIGINT, sig_handler);
424 	signal(SIGUSR1, sig_handler);
425 
426 	if (!output_name) {
427 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
428 			opts->pipe_output = true;
429 		else
430 			rec->output_name = output_name = "perf.data";
431 	}
432 	if (output_name) {
433 		if (!strcmp(output_name, "-"))
434 			opts->pipe_output = true;
435 		else if (!stat(output_name, &st) && st.st_size) {
436 			if (rec->write_mode == WRITE_FORCE) {
437 				char oldname[PATH_MAX];
438 				snprintf(oldname, sizeof(oldname), "%s.old",
439 					 output_name);
440 				unlink(oldname);
441 				rename(output_name, oldname);
442 			}
443 		} else if (rec->write_mode == WRITE_APPEND) {
444 			rec->write_mode = WRITE_FORCE;
445 		}
446 	}
447 
448 	flags = O_CREAT|O_RDWR;
449 	if (rec->write_mode == WRITE_APPEND)
450 		rec->file_new = 0;
451 	else
452 		flags |= O_TRUNC;
453 
454 	if (opts->pipe_output)
455 		output = STDOUT_FILENO;
456 	else
457 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
458 	if (output < 0) {
459 		perror("failed to create output file");
460 		exit(-1);
461 	}
462 
463 	rec->output = output;
464 
465 	session = perf_session__new(output_name, O_WRONLY,
466 				    rec->write_mode == WRITE_FORCE, false, NULL);
467 	if (session == NULL) {
468 		pr_err("Not enough memory for reading perf file header\n");
469 		return -1;
470 	}
471 
472 	rec->session = session;
473 
474 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
475 		perf_header__set_feat(&session->header, feat);
476 
477 	if (rec->no_buildid)
478 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
479 
480 	if (!have_tracepoints(&evsel_list->entries))
481 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
482 
483 	if (!rec->opts.branch_stack)
484 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
485 
486 	if (!rec->file_new) {
487 		err = perf_session__read_header(session, output);
488 		if (err < 0)
489 			goto out_delete_session;
490 	}
491 
492 	if (forks) {
493 		err = perf_evlist__prepare_workload(evsel_list, opts, argv);
494 		if (err < 0) {
495 			pr_err("Couldn't run the workload!\n");
496 			goto out_delete_session;
497 		}
498 	}
499 
500 	perf_record__open(rec);
501 
502 	/*
503 	 * perf_session__delete(session) will be called at perf_record__exit()
504 	 */
505 	on_exit(perf_record__exit, rec);
506 
507 	if (opts->pipe_output) {
508 		err = perf_header__write_pipe(output);
509 		if (err < 0)
510 			return err;
511 	} else if (rec->file_new) {
512 		err = perf_session__write_header(session, evsel_list,
513 						 output, false);
514 		if (err < 0)
515 			return err;
516 	}
517 
518 	if (!rec->no_buildid
519 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
520 		pr_err("Couldn't generate buildids. "
521 		       "Use --no-buildid to profile anyway.\n");
522 		return -1;
523 	}
524 
525 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
526 
527 	machine = perf_session__find_host_machine(session);
528 	if (!machine) {
529 		pr_err("Couldn't find native kernel information.\n");
530 		return -1;
531 	}
532 
533 	if (opts->pipe_output) {
534 		err = perf_event__synthesize_attrs(tool, session,
535 						   process_synthesized_event);
536 		if (err < 0) {
537 			pr_err("Couldn't synthesize attrs.\n");
538 			return err;
539 		}
540 
541 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
542 							 machine);
543 		if (err < 0) {
544 			pr_err("Couldn't synthesize event_types.\n");
545 			return err;
546 		}
547 
548 		if (have_tracepoints(&evsel_list->entries)) {
549 			/*
550 			 * FIXME err <= 0 here actually means that
551 			 * there were no tracepoints so its not really
552 			 * an error, just that we don't need to
553 			 * synthesize anything.  We really have to
554 			 * return this more properly and also
555 			 * propagate errors that now are calling die()
556 			 */
557 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
558 								  process_synthesized_event);
559 			if (err <= 0) {
560 				pr_err("Couldn't record tracing data.\n");
561 				return err;
562 			}
563 			advance_output(rec, err);
564 		}
565 	}
566 
567 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
568 						 machine, "_text");
569 	if (err < 0)
570 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
571 							 machine, "_stext");
572 	if (err < 0)
573 		pr_err("Couldn't record kernel reference relocation symbol\n"
574 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575 		       "Check /proc/kallsyms permission or run as root.\n");
576 
577 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
578 					     machine);
579 	if (err < 0)
580 		pr_err("Couldn't record kernel module information.\n"
581 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
582 		       "Check /proc/modules permission or run as root.\n");
583 
584 	if (perf_guest)
585 		perf_session__process_machines(session, tool,
586 					       perf_event__synthesize_guest_os);
587 
588 	if (!opts->target.system_wide)
589 		perf_event__synthesize_thread_map(tool, evsel_list->threads,
590 						  process_synthesized_event,
591 						  machine);
592 	else
593 		perf_event__synthesize_threads(tool, process_synthesized_event,
594 					       machine);
595 
596 	if (rec->realtime_prio) {
597 		struct sched_param param;
598 
599 		param.sched_priority = rec->realtime_prio;
600 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
601 			pr_err("Could not set realtime priority.\n");
602 			exit(-1);
603 		}
604 	}
605 
606 	perf_evlist__enable(evsel_list);
607 
608 	/*
609 	 * Let the child rip
610 	 */
611 	if (forks)
612 		perf_evlist__start_workload(evsel_list);
613 
614 	for (;;) {
615 		int hits = rec->samples;
616 
617 		perf_record__mmap_read_all(rec);
618 
619 		if (hits == rec->samples) {
620 			if (done)
621 				break;
622 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
623 			waking++;
624 		}
625 
626 		if (done)
627 			perf_evlist__disable(evsel_list);
628 	}
629 
630 	if (quiet || signr == SIGUSR1)
631 		return 0;
632 
633 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
634 
635 	/*
636 	 * Approximate RIP event size: 24 bytes.
637 	 */
638 	fprintf(stderr,
639 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
640 		(double)rec->bytes_written / 1024.0 / 1024.0,
641 		output_name,
642 		rec->bytes_written / 24);
643 
644 	return 0;
645 
646 out_delete_session:
647 	perf_session__delete(session);
648 	return err;
649 }
650 
651 #define BRANCH_OPT(n, m) \
652 	{ .name = n, .mode = (m) }
653 
654 #define BRANCH_END { .name = NULL }
655 
656 struct branch_mode {
657 	const char *name;
658 	int mode;
659 };
660 
661 static const struct branch_mode branch_modes[] = {
662 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
663 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
664 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
665 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
666 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
667 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
668 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
669 	BRANCH_END
670 };
671 
672 static int
673 parse_branch_stack(const struct option *opt, const char *str, int unset)
674 {
675 #define ONLY_PLM \
676 	(PERF_SAMPLE_BRANCH_USER	|\
677 	 PERF_SAMPLE_BRANCH_KERNEL	|\
678 	 PERF_SAMPLE_BRANCH_HV)
679 
680 	uint64_t *mode = (uint64_t *)opt->value;
681 	const struct branch_mode *br;
682 	char *s, *os = NULL, *p;
683 	int ret = -1;
684 
685 	if (unset)
686 		return 0;
687 
688 	/*
689 	 * cannot set it twice, -b + --branch-filter for instance
690 	 */
691 	if (*mode)
692 		return -1;
693 
694 	/* str may be NULL in case no arg is passed to -b */
695 	if (str) {
696 		/* because str is read-only */
697 		s = os = strdup(str);
698 		if (!s)
699 			return -1;
700 
701 		for (;;) {
702 			p = strchr(s, ',');
703 			if (p)
704 				*p = '\0';
705 
706 			for (br = branch_modes; br->name; br++) {
707 				if (!strcasecmp(s, br->name))
708 					break;
709 			}
710 			if (!br->name) {
711 				ui__warning("unknown branch filter %s,"
712 					    " check man page\n", s);
713 				goto error;
714 			}
715 
716 			*mode |= br->mode;
717 
718 			if (!p)
719 				break;
720 
721 			s = p + 1;
722 		}
723 	}
724 	ret = 0;
725 
726 	/* default to any branch */
727 	if ((*mode & ~ONLY_PLM) == 0) {
728 		*mode = PERF_SAMPLE_BRANCH_ANY;
729 	}
730 error:
731 	free(os);
732 	return ret;
733 }
734 
735 static const char * const record_usage[] = {
736 	"perf record [<options>] [<command>]",
737 	"perf record [<options>] -- <command> [<options>]",
738 	NULL
739 };
740 
741 /*
742  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
743  * because we need to have access to it in perf_record__exit, that is called
744  * after cmd_record() exits, but since record_options need to be accessible to
745  * builtin-script, leave it here.
746  *
747  * At least we don't ouch it in all the other functions here directly.
748  *
749  * Just say no to tons of global variables, sigh.
750  */
751 static struct perf_record record = {
752 	.opts = {
753 		.mmap_pages	     = UINT_MAX,
754 		.user_freq	     = UINT_MAX,
755 		.user_interval	     = ULLONG_MAX,
756 		.freq		     = 4000,
757 		.target		     = {
758 			.uses_mmap   = true,
759 		},
760 	},
761 	.write_mode = WRITE_FORCE,
762 	.file_new   = true,
763 };
764 
765 /*
766  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
767  * with it and switch to use the library functions in perf_evlist that came
768  * from builtin-record.c, i.e. use perf_record_opts,
769  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
770  * using pipes, etc.
771  */
772 const struct option record_options[] = {
773 	OPT_CALLBACK('e', "event", &record.evlist, "event",
774 		     "event selector. use 'perf list' to list available events",
775 		     parse_events_option),
776 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
777 		     "event filter", parse_filter),
778 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
779 		    "record events on existing process id"),
780 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
781 		    "record events on existing thread id"),
782 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
783 		    "collect data with this RT SCHED_FIFO priority"),
784 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
785 		    "collect data without buffering"),
786 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
787 		    "collect raw sample records from all opened counters"),
788 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
789 			    "system-wide collection from all CPUs"),
790 	OPT_BOOLEAN('A', "append", &record.append_file,
791 			    "append to the output file to do incremental profiling"),
792 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
793 		    "list of cpus to monitor"),
794 	OPT_BOOLEAN('f', "force", &record.force,
795 			"overwrite existing data file (deprecated)"),
796 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
797 	OPT_STRING('o', "output", &record.output_name, "file",
798 		    "output file name"),
799 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
800 		    "child tasks do not inherit counters"),
801 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
802 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
803 		     "number of mmap data pages"),
804 	OPT_BOOLEAN(0, "group", &record.opts.group,
805 		    "put the counters into a counter group"),
806 	OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
807 		    "do call-graph (stack chain/backtrace) recording"),
808 	OPT_INCR('v', "verbose", &verbose,
809 		    "be more verbose (show counter open errors, etc)"),
810 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
811 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
812 		    "per thread counts"),
813 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
814 		    "Sample addresses"),
815 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
816 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
817 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
818 		    "don't sample"),
819 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
820 		    "do not update the buildid cache"),
821 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
822 		    "do not collect buildids in perf.data"),
823 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
824 		     "monitor event in cgroup name only",
825 		     parse_cgroups),
826 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
827 		   "user to profile"),
828 
829 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
830 		     "branch any", "sample any taken branches",
831 		     parse_branch_stack),
832 
833 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
834 		     "branch filter mask", "branch stack filter modes",
835 		     parse_branch_stack),
836 	OPT_END()
837 };
838 
839 int cmd_record(int argc, const char **argv, const char *prefix __used)
840 {
841 	int err = -ENOMEM;
842 	struct perf_evsel *pos;
843 	struct perf_evlist *evsel_list;
844 	struct perf_record *rec = &record;
845 	char errbuf[BUFSIZ];
846 
847 	evsel_list = perf_evlist__new(NULL, NULL);
848 	if (evsel_list == NULL)
849 		return -ENOMEM;
850 
851 	rec->evlist = evsel_list;
852 
853 	argc = parse_options(argc, argv, record_options, record_usage,
854 			    PARSE_OPT_STOP_AT_NON_OPTION);
855 	if (!argc && perf_target__none(&rec->opts.target))
856 		usage_with_options(record_usage, record_options);
857 
858 	if (rec->force && rec->append_file) {
859 		ui__error("Can't overwrite and append at the same time."
860 			  " You need to choose between -f and -A");
861 		usage_with_options(record_usage, record_options);
862 	} else if (rec->append_file) {
863 		rec->write_mode = WRITE_APPEND;
864 	} else {
865 		rec->write_mode = WRITE_FORCE;
866 	}
867 
868 	if (nr_cgroups && !rec->opts.target.system_wide) {
869 		ui__error("cgroup monitoring only available in"
870 			  " system-wide mode\n");
871 		usage_with_options(record_usage, record_options);
872 	}
873 
874 	symbol__init();
875 
876 	if (symbol_conf.kptr_restrict)
877 		pr_warning(
878 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
879 "check /proc/sys/kernel/kptr_restrict.\n\n"
880 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
881 "file is not found in the buildid cache or in the vmlinux path.\n\n"
882 "Samples in kernel modules won't be resolved at all.\n\n"
883 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
884 "even with a suitable vmlinux or kallsyms file.\n\n");
885 
886 	if (rec->no_buildid_cache || rec->no_buildid)
887 		disable_buildid_cache();
888 
889 	if (evsel_list->nr_entries == 0 &&
890 	    perf_evlist__add_default(evsel_list) < 0) {
891 		pr_err("Not enough memory for event selector list\n");
892 		goto out_symbol_exit;
893 	}
894 
895 	err = perf_target__validate(&rec->opts.target);
896 	if (err) {
897 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
898 		ui__warning("%s", errbuf);
899 	}
900 
901 	err = perf_target__parse_uid(&rec->opts.target);
902 	if (err) {
903 		int saved_errno = errno;
904 
905 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
906 		ui__error("%s", errbuf);
907 
908 		err = -saved_errno;
909 		goto out_free_fd;
910 	}
911 
912 	err = -ENOMEM;
913 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
914 		usage_with_options(record_usage, record_options);
915 
916 	list_for_each_entry(pos, &evsel_list->entries, node) {
917 		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
918 			goto out_free_fd;
919 	}
920 
921 	if (rec->opts.user_interval != ULLONG_MAX)
922 		rec->opts.default_interval = rec->opts.user_interval;
923 	if (rec->opts.user_freq != UINT_MAX)
924 		rec->opts.freq = rec->opts.user_freq;
925 
926 	/*
927 	 * User specified count overrides default frequency.
928 	 */
929 	if (rec->opts.default_interval)
930 		rec->opts.freq = 0;
931 	else if (rec->opts.freq) {
932 		rec->opts.default_interval = rec->opts.freq;
933 	} else {
934 		ui__error("frequency and count are zero, aborting\n");
935 		err = -EINVAL;
936 		goto out_free_fd;
937 	}
938 
939 	err = __cmd_record(&record, argc, argv);
940 out_free_fd:
941 	perf_evlist__delete_maps(evsel_list);
942 out_symbol_exit:
943 	symbol__exit();
944 	return err;
945 }
946