xref: /openbmc/linux/tools/perf/builtin-record.c (revision 9c1f8594)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/symbol.h"
26 #include "util/cpumap.h"
27 #include "util/thread_map.h"
28 
29 #include <unistd.h>
30 #include <sched.h>
31 #include <sys/mman.h>
32 
33 enum write_mode_t {
34 	WRITE_FORCE,
35 	WRITE_APPEND
36 };
37 
38 static u64			user_interval			= ULLONG_MAX;
39 static u64			default_interval		=      0;
40 
41 static unsigned int		page_size;
42 static unsigned int		mmap_pages			= UINT_MAX;
43 static unsigned int		user_freq 			= UINT_MAX;
44 static int			freq				=   1000;
45 static int			output;
46 static int			pipe_output			=      0;
47 static const char		*output_name			= NULL;
48 static bool			group				=  false;
49 static int			realtime_prio			=      0;
50 static bool			nodelay				=  false;
51 static bool			raw_samples			=  false;
52 static bool			sample_id_all_avail		=   true;
53 static bool			system_wide			=  false;
54 static pid_t			target_pid			=     -1;
55 static pid_t			target_tid			=     -1;
56 static pid_t			child_pid			=     -1;
57 static bool			no_inherit			=  false;
58 static enum write_mode_t	write_mode			= WRITE_FORCE;
59 static bool			call_graph			=  false;
60 static bool			inherit_stat			=  false;
61 static bool			no_samples			=  false;
62 static bool			sample_address			=  false;
63 static bool			sample_time			=  false;
64 static bool			no_buildid			=  false;
65 static bool			no_buildid_cache		=  false;
66 static struct perf_evlist	*evsel_list;
67 
68 static long			samples				=      0;
69 static u64			bytes_written			=      0;
70 
71 static int			file_new			=      1;
72 static off_t			post_processing_offset;
73 
74 static struct perf_session	*session;
75 static const char		*cpu_list;
76 
77 static void advance_output(size_t size)
78 {
79 	bytes_written += size;
80 }
81 
82 static void write_output(void *buf, size_t size)
83 {
84 	while (size) {
85 		int ret = write(output, buf, size);
86 
87 		if (ret < 0)
88 			die("failed to write");
89 
90 		size -= ret;
91 		buf += ret;
92 
93 		bytes_written += ret;
94 	}
95 }
96 
97 static int process_synthesized_event(union perf_event *event,
98 				     struct perf_sample *sample __used,
99 				     struct perf_session *self __used)
100 {
101 	write_output(event, event->header.size);
102 	return 0;
103 }
104 
105 static void mmap_read(struct perf_mmap *md)
106 {
107 	unsigned int head = perf_mmap__read_head(md);
108 	unsigned int old = md->prev;
109 	unsigned char *data = md->base + page_size;
110 	unsigned long size;
111 	void *buf;
112 
113 	if (old == head)
114 		return;
115 
116 	samples++;
117 
118 	size = head - old;
119 
120 	if ((old & md->mask) + size != (head & md->mask)) {
121 		buf = &data[old & md->mask];
122 		size = md->mask + 1 - (old & md->mask);
123 		old += size;
124 
125 		write_output(buf, size);
126 	}
127 
128 	buf = &data[old & md->mask];
129 	size = head - old;
130 	old += size;
131 
132 	write_output(buf, size);
133 
134 	md->prev = old;
135 	perf_mmap__write_tail(md, old);
136 }
137 
138 static volatile int done = 0;
139 static volatile int signr = -1;
140 
141 static void sig_handler(int sig)
142 {
143 	done = 1;
144 	signr = sig;
145 }
146 
147 static void sig_atexit(void)
148 {
149 	if (child_pid > 0)
150 		kill(child_pid, SIGTERM);
151 
152 	if (signr == -1 || signr == SIGUSR1)
153 		return;
154 
155 	signal(signr, SIG_DFL);
156 	kill(getpid(), signr);
157 }
158 
159 static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
160 {
161 	struct perf_event_attr *attr = &evsel->attr;
162 	int track = !evsel->idx; /* only the first counter needs these */
163 
164 	attr->inherit		= !no_inherit;
165 	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
166 				  PERF_FORMAT_TOTAL_TIME_RUNNING |
167 				  PERF_FORMAT_ID;
168 
169 	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
170 
171 	if (evlist->nr_entries > 1)
172 		attr->sample_type |= PERF_SAMPLE_ID;
173 
174 	/*
175 	 * We default some events to a 1 default interval. But keep
176 	 * it a weak assumption overridable by the user.
177 	 */
178 	if (!attr->sample_period || (user_freq != UINT_MAX &&
179 				     user_interval != ULLONG_MAX)) {
180 		if (freq) {
181 			attr->sample_type	|= PERF_SAMPLE_PERIOD;
182 			attr->freq		= 1;
183 			attr->sample_freq	= freq;
184 		} else {
185 			attr->sample_period = default_interval;
186 		}
187 	}
188 
189 	if (no_samples)
190 		attr->sample_freq = 0;
191 
192 	if (inherit_stat)
193 		attr->inherit_stat = 1;
194 
195 	if (sample_address) {
196 		attr->sample_type	|= PERF_SAMPLE_ADDR;
197 		attr->mmap_data = track;
198 	}
199 
200 	if (call_graph)
201 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
202 
203 	if (system_wide)
204 		attr->sample_type	|= PERF_SAMPLE_CPU;
205 
206 	if (sample_id_all_avail &&
207 	    (sample_time || system_wide || !no_inherit || cpu_list))
208 		attr->sample_type	|= PERF_SAMPLE_TIME;
209 
210 	if (raw_samples) {
211 		attr->sample_type	|= PERF_SAMPLE_TIME;
212 		attr->sample_type	|= PERF_SAMPLE_RAW;
213 		attr->sample_type	|= PERF_SAMPLE_CPU;
214 	}
215 
216 	if (nodelay) {
217 		attr->watermark = 0;
218 		attr->wakeup_events = 1;
219 	}
220 
221 	attr->mmap		= track;
222 	attr->comm		= track;
223 
224 	if (target_pid == -1 && target_tid == -1 && !system_wide) {
225 		attr->disabled = 1;
226 		attr->enable_on_exec = 1;
227 	}
228 }
229 
230 static bool perf_evlist__equal(struct perf_evlist *evlist,
231 			       struct perf_evlist *other)
232 {
233 	struct perf_evsel *pos, *pair;
234 
235 	if (evlist->nr_entries != other->nr_entries)
236 		return false;
237 
238 	pair = list_entry(other->entries.next, struct perf_evsel, node);
239 
240 	list_for_each_entry(pos, &evlist->entries, node) {
241 		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
242 			return false;
243 		pair = list_entry(pair->node.next, struct perf_evsel, node);
244 	}
245 
246 	return true;
247 }
248 
249 static void open_counters(struct perf_evlist *evlist)
250 {
251 	struct perf_evsel *pos;
252 
253 	if (evlist->cpus->map[0] < 0)
254 		no_inherit = true;
255 
256 	list_for_each_entry(pos, &evlist->entries, node) {
257 		struct perf_event_attr *attr = &pos->attr;
258 		/*
259 		 * Check if parse_single_tracepoint_event has already asked for
260 		 * PERF_SAMPLE_TIME.
261 		 *
262 		 * XXX this is kludgy but short term fix for problems introduced by
263 		 * eac23d1c that broke 'perf script' by having different sample_types
264 		 * when using multiple tracepoint events when we use a perf binary
265 		 * that tries to use sample_id_all on an older kernel.
266 		 *
267 		 * We need to move counter creation to perf_session, support
268 		 * different sample_types, etc.
269 		 */
270 		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
271 
272 		config_attr(pos, evlist);
273 retry_sample_id:
274 		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
275 try_again:
276 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
277 			int err = errno;
278 
279 			if (err == EPERM || err == EACCES) {
280 				ui__warning_paranoid();
281 				exit(EXIT_FAILURE);
282 			} else if (err ==  ENODEV && cpu_list) {
283 				die("No such device - did you specify"
284 					" an out-of-range profile CPU?\n");
285 			} else if (err == EINVAL && sample_id_all_avail) {
286 				/*
287 				 * Old kernel, no attr->sample_id_type_all field
288 				 */
289 				sample_id_all_avail = false;
290 				if (!sample_time && !raw_samples && !time_needed)
291 					attr->sample_type &= ~PERF_SAMPLE_TIME;
292 
293 				goto retry_sample_id;
294 			}
295 
296 			/*
297 			 * If it's cycles then fall back to hrtimer
298 			 * based cpu-clock-tick sw counter, which
299 			 * is always available even if no PMU support:
300 			 */
301 			if (attr->type == PERF_TYPE_HARDWARE
302 					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
303 
304 				if (verbose)
305 					ui__warning("The cycles event is not supported, "
306 						    "trying to fall back to cpu-clock-ticks\n");
307 				attr->type = PERF_TYPE_SOFTWARE;
308 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
309 				goto try_again;
310 			}
311 
312 			if (err == ENOENT) {
313 				ui__warning("The %s event is not supported.\n",
314 					    event_name(pos));
315 				exit(EXIT_FAILURE);
316 			}
317 
318 			printf("\n");
319 			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
320 			      err, strerror(err));
321 
322 #if defined(__i386__) || defined(__x86_64__)
323 			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
324 				die("No hardware sampling interrupt available."
325 				    " No APIC? If so then you can boot the kernel"
326 				    " with the \"lapic\" boot parameter to"
327 				    " force-enable it.\n");
328 #endif
329 
330 			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
331 		}
332 	}
333 
334 	if (perf_evlist__set_filters(evlist)) {
335 		error("failed to set filter with %d (%s)\n", errno,
336 			strerror(errno));
337 		exit(-1);
338 	}
339 
340 	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
341 		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
342 
343 	if (file_new)
344 		session->evlist = evlist;
345 	else {
346 		if (!perf_evlist__equal(session->evlist, evlist)) {
347 			fprintf(stderr, "incompatible append\n");
348 			exit(-1);
349 		}
350  	}
351 
352 	perf_session__update_sample_type(session);
353 }
354 
355 static int process_buildids(void)
356 {
357 	u64 size = lseek(output, 0, SEEK_CUR);
358 
359 	if (size == 0)
360 		return 0;
361 
362 	session->fd = output;
363 	return __perf_session__process_events(session, post_processing_offset,
364 					      size - post_processing_offset,
365 					      size, &build_id__mark_dso_hit_ops);
366 }
367 
368 static void atexit_header(void)
369 {
370 	if (!pipe_output) {
371 		session->header.data_size += bytes_written;
372 
373 		if (!no_buildid)
374 			process_buildids();
375 		perf_session__write_header(session, evsel_list, output, true);
376 		perf_session__delete(session);
377 		perf_evlist__delete(evsel_list);
378 		symbol__exit();
379 	}
380 }
381 
382 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
383 {
384 	int err;
385 	struct perf_session *psession = data;
386 
387 	if (machine__is_host(machine))
388 		return;
389 
390 	/*
391 	 *As for guest kernel when processing subcommand record&report,
392 	 *we arrange module mmap prior to guest kernel mmap and trigger
393 	 *a preload dso because default guest module symbols are loaded
394 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
395 	 *method is used to avoid symbol missing when the first addr is
396 	 *in module instead of in guest kernel.
397 	 */
398 	err = perf_event__synthesize_modules(process_synthesized_event,
399 					     psession, machine);
400 	if (err < 0)
401 		pr_err("Couldn't record guest kernel [%d]'s reference"
402 		       " relocation symbol.\n", machine->pid);
403 
404 	/*
405 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
406 	 * have no _text sometimes.
407 	 */
408 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
409 						 psession, machine, "_text");
410 	if (err < 0)
411 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
412 							 psession, machine,
413 							 "_stext");
414 	if (err < 0)
415 		pr_err("Couldn't record guest kernel [%d]'s reference"
416 		       " relocation symbol.\n", machine->pid);
417 }
418 
419 static struct perf_event_header finished_round_event = {
420 	.size = sizeof(struct perf_event_header),
421 	.type = PERF_RECORD_FINISHED_ROUND,
422 };
423 
424 static void mmap_read_all(void)
425 {
426 	int i;
427 
428 	for (i = 0; i < evsel_list->nr_mmaps; i++) {
429 		if (evsel_list->mmap[i].base)
430 			mmap_read(&evsel_list->mmap[i]);
431 	}
432 
433 	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
434 		write_output(&finished_round_event, sizeof(finished_round_event));
435 }
436 
437 static int __cmd_record(int argc, const char **argv)
438 {
439 	struct stat st;
440 	int flags;
441 	int err;
442 	unsigned long waking = 0;
443 	int child_ready_pipe[2], go_pipe[2];
444 	const bool forks = argc > 0;
445 	char buf;
446 	struct machine *machine;
447 
448 	page_size = sysconf(_SC_PAGE_SIZE);
449 
450 	atexit(sig_atexit);
451 	signal(SIGCHLD, sig_handler);
452 	signal(SIGINT, sig_handler);
453 	signal(SIGUSR1, sig_handler);
454 
455 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
456 		perror("failed to create pipes");
457 		exit(-1);
458 	}
459 
460 	if (!output_name) {
461 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
462 			pipe_output = 1;
463 		else
464 			output_name = "perf.data";
465 	}
466 	if (output_name) {
467 		if (!strcmp(output_name, "-"))
468 			pipe_output = 1;
469 		else if (!stat(output_name, &st) && st.st_size) {
470 			if (write_mode == WRITE_FORCE) {
471 				char oldname[PATH_MAX];
472 				snprintf(oldname, sizeof(oldname), "%s.old",
473 					 output_name);
474 				unlink(oldname);
475 				rename(output_name, oldname);
476 			}
477 		} else if (write_mode == WRITE_APPEND) {
478 			write_mode = WRITE_FORCE;
479 		}
480 	}
481 
482 	flags = O_CREAT|O_RDWR;
483 	if (write_mode == WRITE_APPEND)
484 		file_new = 0;
485 	else
486 		flags |= O_TRUNC;
487 
488 	if (pipe_output)
489 		output = STDOUT_FILENO;
490 	else
491 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
492 	if (output < 0) {
493 		perror("failed to create output file");
494 		exit(-1);
495 	}
496 
497 	session = perf_session__new(output_name, O_WRONLY,
498 				    write_mode == WRITE_FORCE, false, NULL);
499 	if (session == NULL) {
500 		pr_err("Not enough memory for reading perf file header\n");
501 		return -1;
502 	}
503 
504 	if (!no_buildid)
505 		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
506 
507 	if (!file_new) {
508 		err = perf_session__read_header(session, output);
509 		if (err < 0)
510 			goto out_delete_session;
511 	}
512 
513 	if (have_tracepoints(&evsel_list->entries))
514 		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
515 
516 	/* 512 kiB: default amount of unprivileged mlocked memory */
517 	if (mmap_pages == UINT_MAX)
518 		mmap_pages = (512 * 1024) / page_size;
519 
520 	if (forks) {
521 		child_pid = fork();
522 		if (child_pid < 0) {
523 			perror("failed to fork");
524 			exit(-1);
525 		}
526 
527 		if (!child_pid) {
528 			if (pipe_output)
529 				dup2(2, 1);
530 			close(child_ready_pipe[0]);
531 			close(go_pipe[1]);
532 			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
533 
534 			/*
535 			 * Do a dummy execvp to get the PLT entry resolved,
536 			 * so we avoid the resolver overhead on the real
537 			 * execvp call.
538 			 */
539 			execvp("", (char **)argv);
540 
541 			/*
542 			 * Tell the parent we're ready to go
543 			 */
544 			close(child_ready_pipe[1]);
545 
546 			/*
547 			 * Wait until the parent tells us to go.
548 			 */
549 			if (read(go_pipe[0], &buf, 1) == -1)
550 				perror("unable to read pipe");
551 
552 			execvp(argv[0], (char **)argv);
553 
554 			perror(argv[0]);
555 			kill(getppid(), SIGUSR1);
556 			exit(-1);
557 		}
558 
559 		if (!system_wide && target_tid == -1 && target_pid == -1)
560 			evsel_list->threads->map[0] = child_pid;
561 
562 		close(child_ready_pipe[1]);
563 		close(go_pipe[0]);
564 		/*
565 		 * wait for child to settle
566 		 */
567 		if (read(child_ready_pipe[0], &buf, 1) == -1) {
568 			perror("unable to read pipe");
569 			exit(-1);
570 		}
571 		close(child_ready_pipe[0]);
572 	}
573 
574 	open_counters(evsel_list);
575 
576 	/*
577 	 * perf_session__delete(session) will be called at atexit_header()
578 	 */
579 	atexit(atexit_header);
580 
581 	if (pipe_output) {
582 		err = perf_header__write_pipe(output);
583 		if (err < 0)
584 			return err;
585 	} else if (file_new) {
586 		err = perf_session__write_header(session, evsel_list,
587 						 output, false);
588 		if (err < 0)
589 			return err;
590 	}
591 
592 	post_processing_offset = lseek(output, 0, SEEK_CUR);
593 
594 	if (pipe_output) {
595 		err = perf_session__synthesize_attrs(session,
596 						     process_synthesized_event);
597 		if (err < 0) {
598 			pr_err("Couldn't synthesize attrs.\n");
599 			return err;
600 		}
601 
602 		err = perf_event__synthesize_event_types(process_synthesized_event,
603 							 session);
604 		if (err < 0) {
605 			pr_err("Couldn't synthesize event_types.\n");
606 			return err;
607 		}
608 
609 		if (have_tracepoints(&evsel_list->entries)) {
610 			/*
611 			 * FIXME err <= 0 here actually means that
612 			 * there were no tracepoints so its not really
613 			 * an error, just that we don't need to
614 			 * synthesize anything.  We really have to
615 			 * return this more properly and also
616 			 * propagate errors that now are calling die()
617 			 */
618 			err = perf_event__synthesize_tracing_data(output, evsel_list,
619 								  process_synthesized_event,
620 								  session);
621 			if (err <= 0) {
622 				pr_err("Couldn't record tracing data.\n");
623 				return err;
624 			}
625 			advance_output(err);
626 		}
627 	}
628 
629 	machine = perf_session__find_host_machine(session);
630 	if (!machine) {
631 		pr_err("Couldn't find native kernel information.\n");
632 		return -1;
633 	}
634 
635 	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
636 						 session, machine, "_text");
637 	if (err < 0)
638 		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
639 							 session, machine, "_stext");
640 	if (err < 0)
641 		pr_err("Couldn't record kernel reference relocation symbol\n"
642 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
643 		       "Check /proc/kallsyms permission or run as root.\n");
644 
645 	err = perf_event__synthesize_modules(process_synthesized_event,
646 					     session, machine);
647 	if (err < 0)
648 		pr_err("Couldn't record kernel module information.\n"
649 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
650 		       "Check /proc/modules permission or run as root.\n");
651 
652 	if (perf_guest)
653 		perf_session__process_machines(session,
654 					       perf_event__synthesize_guest_os);
655 
656 	if (!system_wide)
657 		perf_event__synthesize_thread_map(evsel_list->threads,
658 						  process_synthesized_event,
659 						  session);
660 	else
661 		perf_event__synthesize_threads(process_synthesized_event,
662 					       session);
663 
664 	if (realtime_prio) {
665 		struct sched_param param;
666 
667 		param.sched_priority = realtime_prio;
668 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
669 			pr_err("Could not set realtime priority.\n");
670 			exit(-1);
671 		}
672 	}
673 
674 	/*
675 	 * Let the child rip
676 	 */
677 	if (forks)
678 		close(go_pipe[1]);
679 
680 	for (;;) {
681 		int hits = samples;
682 
683 		mmap_read_all();
684 
685 		if (hits == samples) {
686 			if (done)
687 				break;
688 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
689 			waking++;
690 		}
691 
692 		if (done)
693 			perf_evlist__disable(evsel_list);
694 	}
695 
696 	if (quiet || signr == SIGUSR1)
697 		return 0;
698 
699 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
700 
701 	/*
702 	 * Approximate RIP event size: 24 bytes.
703 	 */
704 	fprintf(stderr,
705 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
706 		(double)bytes_written / 1024.0 / 1024.0,
707 		output_name,
708 		bytes_written / 24);
709 
710 	return 0;
711 
712 out_delete_session:
713 	perf_session__delete(session);
714 	return err;
715 }
716 
717 static const char * const record_usage[] = {
718 	"perf record [<options>] [<command>]",
719 	"perf record [<options>] -- <command> [<options>]",
720 	NULL
721 };
722 
723 static bool force, append_file;
724 
725 const struct option record_options[] = {
726 	OPT_CALLBACK('e', "event", &evsel_list, "event",
727 		     "event selector. use 'perf list' to list available events",
728 		     parse_events_option),
729 	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
730 		     "event filter", parse_filter),
731 	OPT_INTEGER('p', "pid", &target_pid,
732 		    "record events on existing process id"),
733 	OPT_INTEGER('t', "tid", &target_tid,
734 		    "record events on existing thread id"),
735 	OPT_INTEGER('r', "realtime", &realtime_prio,
736 		    "collect data with this RT SCHED_FIFO priority"),
737 	OPT_BOOLEAN('D', "no-delay", &nodelay,
738 		    "collect data without buffering"),
739 	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
740 		    "collect raw sample records from all opened counters"),
741 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
742 			    "system-wide collection from all CPUs"),
743 	OPT_BOOLEAN('A', "append", &append_file,
744 			    "append to the output file to do incremental profiling"),
745 	OPT_STRING('C', "cpu", &cpu_list, "cpu",
746 		    "list of cpus to monitor"),
747 	OPT_BOOLEAN('f', "force", &force,
748 			"overwrite existing data file (deprecated)"),
749 	OPT_U64('c', "count", &user_interval, "event period to sample"),
750 	OPT_STRING('o', "output", &output_name, "file",
751 		    "output file name"),
752 	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
753 		    "child tasks do not inherit counters"),
754 	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
755 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
756 	OPT_BOOLEAN(0, "group", &group,
757 		    "put the counters into a counter group"),
758 	OPT_BOOLEAN('g', "call-graph", &call_graph,
759 		    "do call-graph (stack chain/backtrace) recording"),
760 	OPT_INCR('v', "verbose", &verbose,
761 		    "be more verbose (show counter open errors, etc)"),
762 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
763 	OPT_BOOLEAN('s', "stat", &inherit_stat,
764 		    "per thread counts"),
765 	OPT_BOOLEAN('d', "data", &sample_address,
766 		    "Sample addresses"),
767 	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
768 	OPT_BOOLEAN('n', "no-samples", &no_samples,
769 		    "don't sample"),
770 	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
771 		    "do not update the buildid cache"),
772 	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
773 		    "do not collect buildids in perf.data"),
774 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
775 		     "monitor event in cgroup name only",
776 		     parse_cgroups),
777 	OPT_END()
778 };
779 
780 int cmd_record(int argc, const char **argv, const char *prefix __used)
781 {
782 	int err = -ENOMEM;
783 	struct perf_evsel *pos;
784 
785 	evsel_list = perf_evlist__new(NULL, NULL);
786 	if (evsel_list == NULL)
787 		return -ENOMEM;
788 
789 	argc = parse_options(argc, argv, record_options, record_usage,
790 			    PARSE_OPT_STOP_AT_NON_OPTION);
791 	if (!argc && target_pid == -1 && target_tid == -1 &&
792 		!system_wide && !cpu_list)
793 		usage_with_options(record_usage, record_options);
794 
795 	if (force && append_file) {
796 		fprintf(stderr, "Can't overwrite and append at the same time."
797 				" You need to choose between -f and -A");
798 		usage_with_options(record_usage, record_options);
799 	} else if (append_file) {
800 		write_mode = WRITE_APPEND;
801 	} else {
802 		write_mode = WRITE_FORCE;
803 	}
804 
805 	if (nr_cgroups && !system_wide) {
806 		fprintf(stderr, "cgroup monitoring only available in"
807 			" system-wide mode\n");
808 		usage_with_options(record_usage, record_options);
809 	}
810 
811 	symbol__init();
812 
813 	if (symbol_conf.kptr_restrict)
814 		pr_warning(
815 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
816 "check /proc/sys/kernel/kptr_restrict.\n\n"
817 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
818 "file is not found in the buildid cache or in the vmlinux path.\n\n"
819 "Samples in kernel modules won't be resolved at all.\n\n"
820 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
821 "even with a suitable vmlinux or kallsyms file.\n\n");
822 
823 	if (no_buildid_cache || no_buildid)
824 		disable_buildid_cache();
825 
826 	if (evsel_list->nr_entries == 0 &&
827 	    perf_evlist__add_default(evsel_list) < 0) {
828 		pr_err("Not enough memory for event selector list\n");
829 		goto out_symbol_exit;
830 	}
831 
832 	if (target_pid != -1)
833 		target_tid = target_pid;
834 
835 	if (perf_evlist__create_maps(evsel_list, target_pid,
836 				     target_tid, cpu_list) < 0)
837 		usage_with_options(record_usage, record_options);
838 
839 	list_for_each_entry(pos, &evsel_list->entries, node) {
840 		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
841 					 evsel_list->threads->nr) < 0)
842 			goto out_free_fd;
843 		if (perf_header__push_event(pos->attr.config, event_name(pos)))
844 			goto out_free_fd;
845 	}
846 
847 	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
848 		goto out_free_fd;
849 
850 	if (user_interval != ULLONG_MAX)
851 		default_interval = user_interval;
852 	if (user_freq != UINT_MAX)
853 		freq = user_freq;
854 
855 	/*
856 	 * User specified count overrides default frequency.
857 	 */
858 	if (default_interval)
859 		freq = 0;
860 	else if (freq) {
861 		default_interval = freq;
862 	} else {
863 		fprintf(stderr, "frequency and count are zero, aborting\n");
864 		err = -EINVAL;
865 		goto out_free_fd;
866 	}
867 
868 	err = __cmd_record(argc, argv);
869 out_free_fd:
870 	perf_evlist__delete_maps(evsel_list);
871 out_symbol_exit:
872 	symbol__exit();
873 	return err;
874 }
875