xref: /openbmc/linux/tools/perf/builtin-record.c (revision 4f205687)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 #include "util/perf_regs.h"
31 #include "util/auxtrace.h"
32 #include "util/tsc.h"
33 #include "util/parse-branch-options.h"
34 #include "util/parse-regs-options.h"
35 #include "util/llvm-utils.h"
36 #include "util/bpf-loader.h"
37 #include "util/trigger.h"
38 #include "asm/bug.h"
39 
40 #include <unistd.h>
41 #include <sched.h>
42 #include <sys/mman.h>
43 #include <asm/bug.h>
44 
45 
46 struct record {
47 	struct perf_tool	tool;
48 	struct record_opts	opts;
49 	u64			bytes_written;
50 	struct perf_data_file	file;
51 	struct auxtrace_record	*itr;
52 	struct perf_evlist	*evlist;
53 	struct perf_session	*session;
54 	const char		*progname;
55 	int			realtime_prio;
56 	bool			no_buildid;
57 	bool			no_buildid_set;
58 	bool			no_buildid_cache;
59 	bool			no_buildid_cache_set;
60 	bool			buildid_all;
61 	bool			timestamp_filename;
62 	bool			switch_output;
63 	unsigned long long	samples;
64 };
65 
66 static int record__write(struct record *rec, void *bf, size_t size)
67 {
68 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
69 		pr_err("failed to write perf data, error: %m\n");
70 		return -1;
71 	}
72 
73 	rec->bytes_written += size;
74 	return 0;
75 }
76 
77 static int process_synthesized_event(struct perf_tool *tool,
78 				     union perf_event *event,
79 				     struct perf_sample *sample __maybe_unused,
80 				     struct machine *machine __maybe_unused)
81 {
82 	struct record *rec = container_of(tool, struct record, tool);
83 	return record__write(rec, event, event->header.size);
84 }
85 
86 static int
87 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
88 {
89 	struct perf_event_header *pheader;
90 	u64 evt_head = head;
91 	int size = mask + 1;
92 
93 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
94 	pheader = (struct perf_event_header *)(buf + (head & mask));
95 	*start = head;
96 	while (true) {
97 		if (evt_head - head >= (unsigned int)size) {
98 			pr_debug("Finshed reading backward ring buffer: rewind\n");
99 			if (evt_head - head > (unsigned int)size)
100 				evt_head -= pheader->size;
101 			*end = evt_head;
102 			return 0;
103 		}
104 
105 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
106 
107 		if (pheader->size == 0) {
108 			pr_debug("Finshed reading backward ring buffer: get start\n");
109 			*end = evt_head;
110 			return 0;
111 		}
112 
113 		evt_head += pheader->size;
114 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
115 	}
116 	WARN_ONCE(1, "Shouldn't get here\n");
117 	return -1;
118 }
119 
120 static int
121 rb_find_range(struct perf_evlist *evlist,
122 	      void *data, int mask, u64 head, u64 old,
123 	      u64 *start, u64 *end)
124 {
125 	if (!evlist->backward) {
126 		*start = old;
127 		*end = head;
128 		return 0;
129 	}
130 
131 	return backward_rb_find_range(data, mask, head, start, end);
132 }
133 
134 static int record__mmap_read(struct record *rec, int idx)
135 {
136 	struct perf_mmap *md = &rec->evlist->mmap[idx];
137 	u64 head = perf_mmap__read_head(md);
138 	u64 old = md->prev;
139 	u64 end = head, start = old;
140 	unsigned char *data = md->base + page_size;
141 	unsigned long size;
142 	void *buf;
143 	int rc = 0;
144 
145 	if (rb_find_range(rec->evlist, data, md->mask, head,
146 			  old, &start, &end))
147 		return -1;
148 
149 	if (start == end)
150 		return 0;
151 
152 	rec->samples++;
153 
154 	size = end - start;
155 	if (size > (unsigned long)(md->mask) + 1) {
156 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
157 
158 		md->prev = head;
159 		perf_evlist__mmap_consume(rec->evlist, idx);
160 		return 0;
161 	}
162 
163 	if ((start & md->mask) + size != (end & md->mask)) {
164 		buf = &data[start & md->mask];
165 		size = md->mask + 1 - (start & md->mask);
166 		start += size;
167 
168 		if (record__write(rec, buf, size) < 0) {
169 			rc = -1;
170 			goto out;
171 		}
172 	}
173 
174 	buf = &data[start & md->mask];
175 	size = end - start;
176 	start += size;
177 
178 	if (record__write(rec, buf, size) < 0) {
179 		rc = -1;
180 		goto out;
181 	}
182 
183 	md->prev = head;
184 	perf_evlist__mmap_consume(rec->evlist, idx);
185 out:
186 	return rc;
187 }
188 
189 static volatile int done;
190 static volatile int signr = -1;
191 static volatile int child_finished;
192 
193 static volatile int auxtrace_record__snapshot_started;
194 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
195 static DEFINE_TRIGGER(switch_output_trigger);
196 
197 static void sig_handler(int sig)
198 {
199 	if (sig == SIGCHLD)
200 		child_finished = 1;
201 	else
202 		signr = sig;
203 
204 	done = 1;
205 }
206 
207 static void record__sig_exit(void)
208 {
209 	if (signr == -1)
210 		return;
211 
212 	signal(signr, SIG_DFL);
213 	raise(signr);
214 }
215 
216 #ifdef HAVE_AUXTRACE_SUPPORT
217 
218 static int record__process_auxtrace(struct perf_tool *tool,
219 				    union perf_event *event, void *data1,
220 				    size_t len1, void *data2, size_t len2)
221 {
222 	struct record *rec = container_of(tool, struct record, tool);
223 	struct perf_data_file *file = &rec->file;
224 	size_t padding;
225 	u8 pad[8] = {0};
226 
227 	if (!perf_data_file__is_pipe(file)) {
228 		off_t file_offset;
229 		int fd = perf_data_file__fd(file);
230 		int err;
231 
232 		file_offset = lseek(fd, 0, SEEK_CUR);
233 		if (file_offset == -1)
234 			return -1;
235 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
236 						     event, file_offset);
237 		if (err)
238 			return err;
239 	}
240 
241 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
242 	padding = (len1 + len2) & 7;
243 	if (padding)
244 		padding = 8 - padding;
245 
246 	record__write(rec, event, event->header.size);
247 	record__write(rec, data1, len1);
248 	if (len2)
249 		record__write(rec, data2, len2);
250 	record__write(rec, &pad, padding);
251 
252 	return 0;
253 }
254 
255 static int record__auxtrace_mmap_read(struct record *rec,
256 				      struct auxtrace_mmap *mm)
257 {
258 	int ret;
259 
260 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
261 				  record__process_auxtrace);
262 	if (ret < 0)
263 		return ret;
264 
265 	if (ret)
266 		rec->samples++;
267 
268 	return 0;
269 }
270 
271 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
272 					       struct auxtrace_mmap *mm)
273 {
274 	int ret;
275 
276 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
277 					   record__process_auxtrace,
278 					   rec->opts.auxtrace_snapshot_size);
279 	if (ret < 0)
280 		return ret;
281 
282 	if (ret)
283 		rec->samples++;
284 
285 	return 0;
286 }
287 
288 static int record__auxtrace_read_snapshot_all(struct record *rec)
289 {
290 	int i;
291 	int rc = 0;
292 
293 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
294 		struct auxtrace_mmap *mm =
295 				&rec->evlist->mmap[i].auxtrace_mmap;
296 
297 		if (!mm->base)
298 			continue;
299 
300 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
301 			rc = -1;
302 			goto out;
303 		}
304 	}
305 out:
306 	return rc;
307 }
308 
309 static void record__read_auxtrace_snapshot(struct record *rec)
310 {
311 	pr_debug("Recording AUX area tracing snapshot\n");
312 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
313 		trigger_error(&auxtrace_snapshot_trigger);
314 	} else {
315 		if (auxtrace_record__snapshot_finish(rec->itr))
316 			trigger_error(&auxtrace_snapshot_trigger);
317 		else
318 			trigger_ready(&auxtrace_snapshot_trigger);
319 	}
320 }
321 
322 #else
323 
324 static inline
325 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
326 			       struct auxtrace_mmap *mm __maybe_unused)
327 {
328 	return 0;
329 }
330 
331 static inline
332 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
333 {
334 }
335 
336 static inline
337 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
338 {
339 	return 0;
340 }
341 
342 #endif
343 
344 static int record__open(struct record *rec)
345 {
346 	char msg[512];
347 	struct perf_evsel *pos;
348 	struct perf_evlist *evlist = rec->evlist;
349 	struct perf_session *session = rec->session;
350 	struct record_opts *opts = &rec->opts;
351 	int rc = 0;
352 
353 	perf_evlist__config(evlist, opts, &callchain_param);
354 
355 	evlist__for_each(evlist, pos) {
356 try_again:
357 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
358 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
359 				if (verbose)
360 					ui__warning("%s\n", msg);
361 				goto try_again;
362 			}
363 
364 			rc = -errno;
365 			perf_evsel__open_strerror(pos, &opts->target,
366 						  errno, msg, sizeof(msg));
367 			ui__error("%s\n", msg);
368 			goto out;
369 		}
370 	}
371 
372 	if (perf_evlist__apply_filters(evlist, &pos)) {
373 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
374 			pos->filter, perf_evsel__name(pos), errno,
375 			strerror_r(errno, msg, sizeof(msg)));
376 		rc = -1;
377 		goto out;
378 	}
379 
380 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
381 				 opts->auxtrace_mmap_pages,
382 				 opts->auxtrace_snapshot_mode) < 0) {
383 		if (errno == EPERM) {
384 			pr_err("Permission error mapping pages.\n"
385 			       "Consider increasing "
386 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
387 			       "or try again with a smaller value of -m/--mmap_pages.\n"
388 			       "(current value: %u,%u)\n",
389 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
390 			rc = -errno;
391 		} else {
392 			pr_err("failed to mmap with %d (%s)\n", errno,
393 				strerror_r(errno, msg, sizeof(msg)));
394 			if (errno)
395 				rc = -errno;
396 			else
397 				rc = -EINVAL;
398 		}
399 		goto out;
400 	}
401 
402 	session->evlist = evlist;
403 	perf_session__set_id_hdr_size(session);
404 out:
405 	return rc;
406 }
407 
408 static int process_sample_event(struct perf_tool *tool,
409 				union perf_event *event,
410 				struct perf_sample *sample,
411 				struct perf_evsel *evsel,
412 				struct machine *machine)
413 {
414 	struct record *rec = container_of(tool, struct record, tool);
415 
416 	rec->samples++;
417 
418 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
419 }
420 
421 static int process_buildids(struct record *rec)
422 {
423 	struct perf_data_file *file  = &rec->file;
424 	struct perf_session *session = rec->session;
425 
426 	if (file->size == 0)
427 		return 0;
428 
429 	/*
430 	 * During this process, it'll load kernel map and replace the
431 	 * dso->long_name to a real pathname it found.  In this case
432 	 * we prefer the vmlinux path like
433 	 *   /lib/modules/3.16.4/build/vmlinux
434 	 *
435 	 * rather than build-id path (in debug directory).
436 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
437 	 */
438 	symbol_conf.ignore_vmlinux_buildid = true;
439 
440 	/*
441 	 * If --buildid-all is given, it marks all DSO regardless of hits,
442 	 * so no need to process samples.
443 	 */
444 	if (rec->buildid_all)
445 		rec->tool.sample = NULL;
446 
447 	return perf_session__process_events(session);
448 }
449 
450 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
451 {
452 	int err;
453 	struct perf_tool *tool = data;
454 	/*
455 	 *As for guest kernel when processing subcommand record&report,
456 	 *we arrange module mmap prior to guest kernel mmap and trigger
457 	 *a preload dso because default guest module symbols are loaded
458 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
459 	 *method is used to avoid symbol missing when the first addr is
460 	 *in module instead of in guest kernel.
461 	 */
462 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
463 					     machine);
464 	if (err < 0)
465 		pr_err("Couldn't record guest kernel [%d]'s reference"
466 		       " relocation symbol.\n", machine->pid);
467 
468 	/*
469 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
470 	 * have no _text sometimes.
471 	 */
472 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
473 						 machine);
474 	if (err < 0)
475 		pr_err("Couldn't record guest kernel [%d]'s reference"
476 		       " relocation symbol.\n", machine->pid);
477 }
478 
479 static struct perf_event_header finished_round_event = {
480 	.size = sizeof(struct perf_event_header),
481 	.type = PERF_RECORD_FINISHED_ROUND,
482 };
483 
484 static int record__mmap_read_all(struct record *rec)
485 {
486 	u64 bytes_written = rec->bytes_written;
487 	int i;
488 	int rc = 0;
489 
490 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
491 		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
492 
493 		if (rec->evlist->mmap[i].base) {
494 			if (record__mmap_read(rec, i) != 0) {
495 				rc = -1;
496 				goto out;
497 			}
498 		}
499 
500 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
501 		    record__auxtrace_mmap_read(rec, mm) != 0) {
502 			rc = -1;
503 			goto out;
504 		}
505 	}
506 
507 	/*
508 	 * Mark the round finished in case we wrote
509 	 * at least one event.
510 	 */
511 	if (bytes_written != rec->bytes_written)
512 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
513 
514 out:
515 	return rc;
516 }
517 
518 static void record__init_features(struct record *rec)
519 {
520 	struct perf_session *session = rec->session;
521 	int feat;
522 
523 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
524 		perf_header__set_feat(&session->header, feat);
525 
526 	if (rec->no_buildid)
527 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
528 
529 	if (!have_tracepoints(&rec->evlist->entries))
530 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
531 
532 	if (!rec->opts.branch_stack)
533 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
534 
535 	if (!rec->opts.full_auxtrace)
536 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
537 
538 	perf_header__clear_feat(&session->header, HEADER_STAT);
539 }
540 
541 static void
542 record__finish_output(struct record *rec)
543 {
544 	struct perf_data_file *file = &rec->file;
545 	int fd = perf_data_file__fd(file);
546 
547 	if (file->is_pipe)
548 		return;
549 
550 	rec->session->header.data_size += rec->bytes_written;
551 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
552 
553 	if (!rec->no_buildid) {
554 		process_buildids(rec);
555 
556 		if (rec->buildid_all)
557 			dsos__hit_all(rec->session);
558 	}
559 	perf_session__write_header(rec->session, rec->evlist, fd, true);
560 
561 	return;
562 }
563 
564 static int record__synthesize_workload(struct record *rec)
565 {
566 	struct {
567 		struct thread_map map;
568 		struct thread_map_data map_data;
569 	} thread_map;
570 
571 	thread_map.map.nr = 1;
572 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
573 	thread_map.map.map[0].comm = NULL;
574 	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
575 						 process_synthesized_event,
576 						 &rec->session->machines.host,
577 						 rec->opts.sample_address,
578 						 rec->opts.proc_map_timeout);
579 }
580 
581 static int record__synthesize(struct record *rec);
582 
583 static int
584 record__switch_output(struct record *rec, bool at_exit)
585 {
586 	struct perf_data_file *file = &rec->file;
587 	int fd, err;
588 
589 	/* Same Size:      "2015122520103046"*/
590 	char timestamp[] = "InvalidTimestamp";
591 
592 	rec->samples = 0;
593 	record__finish_output(rec);
594 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
595 	if (err) {
596 		pr_err("Failed to get current timestamp\n");
597 		return -EINVAL;
598 	}
599 
600 	fd = perf_data_file__switch(file, timestamp,
601 				    rec->session->header.data_offset,
602 				    at_exit);
603 	if (fd >= 0 && !at_exit) {
604 		rec->bytes_written = 0;
605 		rec->session->header.data_size = 0;
606 	}
607 
608 	if (!quiet)
609 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
610 			file->path, timestamp);
611 
612 	/* Output tracking events */
613 	if (!at_exit) {
614 		record__synthesize(rec);
615 
616 		/*
617 		 * In 'perf record --switch-output' without -a,
618 		 * record__synthesize() in record__switch_output() won't
619 		 * generate tracking events because there's no thread_map
620 		 * in evlist. Which causes newly created perf.data doesn't
621 		 * contain map and comm information.
622 		 * Create a fake thread_map and directly call
623 		 * perf_event__synthesize_thread_map() for those events.
624 		 */
625 		if (target__none(&rec->opts.target))
626 			record__synthesize_workload(rec);
627 	}
628 	return fd;
629 }
630 
631 static volatile int workload_exec_errno;
632 
633 /*
634  * perf_evlist__prepare_workload will send a SIGUSR1
635  * if the fork fails, since we asked by setting its
636  * want_signal to true.
637  */
638 static void workload_exec_failed_signal(int signo __maybe_unused,
639 					siginfo_t *info,
640 					void *ucontext __maybe_unused)
641 {
642 	workload_exec_errno = info->si_value.sival_int;
643 	done = 1;
644 	child_finished = 1;
645 }
646 
647 static void snapshot_sig_handler(int sig);
648 
649 int __weak
650 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
651 			    struct perf_tool *tool __maybe_unused,
652 			    perf_event__handler_t process __maybe_unused,
653 			    struct machine *machine __maybe_unused)
654 {
655 	return 0;
656 }
657 
658 static int record__synthesize(struct record *rec)
659 {
660 	struct perf_session *session = rec->session;
661 	struct machine *machine = &session->machines.host;
662 	struct perf_data_file *file = &rec->file;
663 	struct record_opts *opts = &rec->opts;
664 	struct perf_tool *tool = &rec->tool;
665 	int fd = perf_data_file__fd(file);
666 	int err = 0;
667 
668 	if (file->is_pipe) {
669 		err = perf_event__synthesize_attrs(tool, session,
670 						   process_synthesized_event);
671 		if (err < 0) {
672 			pr_err("Couldn't synthesize attrs.\n");
673 			goto out;
674 		}
675 
676 		if (have_tracepoints(&rec->evlist->entries)) {
677 			/*
678 			 * FIXME err <= 0 here actually means that
679 			 * there were no tracepoints so its not really
680 			 * an error, just that we don't need to
681 			 * synthesize anything.  We really have to
682 			 * return this more properly and also
683 			 * propagate errors that now are calling die()
684 			 */
685 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
686 								  process_synthesized_event);
687 			if (err <= 0) {
688 				pr_err("Couldn't record tracing data.\n");
689 				goto out;
690 			}
691 			rec->bytes_written += err;
692 		}
693 	}
694 
695 	err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
696 					  process_synthesized_event, machine);
697 	if (err)
698 		goto out;
699 
700 	if (rec->opts.full_auxtrace) {
701 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
702 					session, process_synthesized_event);
703 		if (err)
704 			goto out;
705 	}
706 
707 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
708 						 machine);
709 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
710 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
711 			   "Check /proc/kallsyms permission or run as root.\n");
712 
713 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
714 					     machine);
715 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
716 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
717 			   "Check /proc/modules permission or run as root.\n");
718 
719 	if (perf_guest) {
720 		machines__process_guests(&session->machines,
721 					 perf_event__synthesize_guest_os, tool);
722 	}
723 
724 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
725 					    process_synthesized_event, opts->sample_address,
726 					    opts->proc_map_timeout);
727 out:
728 	return err;
729 }
730 
731 static int __cmd_record(struct record *rec, int argc, const char **argv)
732 {
733 	int err;
734 	int status = 0;
735 	unsigned long waking = 0;
736 	const bool forks = argc > 0;
737 	struct machine *machine;
738 	struct perf_tool *tool = &rec->tool;
739 	struct record_opts *opts = &rec->opts;
740 	struct perf_data_file *file = &rec->file;
741 	struct perf_session *session;
742 	bool disabled = false, draining = false;
743 	int fd;
744 
745 	rec->progname = argv[0];
746 
747 	atexit(record__sig_exit);
748 	signal(SIGCHLD, sig_handler);
749 	signal(SIGINT, sig_handler);
750 	signal(SIGTERM, sig_handler);
751 
752 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
753 		signal(SIGUSR2, snapshot_sig_handler);
754 		if (rec->opts.auxtrace_snapshot_mode)
755 			trigger_on(&auxtrace_snapshot_trigger);
756 		if (rec->switch_output)
757 			trigger_on(&switch_output_trigger);
758 	} else {
759 		signal(SIGUSR2, SIG_IGN);
760 	}
761 
762 	session = perf_session__new(file, false, tool);
763 	if (session == NULL) {
764 		pr_err("Perf session creation failed.\n");
765 		return -1;
766 	}
767 
768 	fd = perf_data_file__fd(file);
769 	rec->session = session;
770 
771 	record__init_features(rec);
772 
773 	if (forks) {
774 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
775 						    argv, file->is_pipe,
776 						    workload_exec_failed_signal);
777 		if (err < 0) {
778 			pr_err("Couldn't run the workload!\n");
779 			status = err;
780 			goto out_delete_session;
781 		}
782 	}
783 
784 	if (record__open(rec) != 0) {
785 		err = -1;
786 		goto out_child;
787 	}
788 
789 	err = bpf__apply_obj_config();
790 	if (err) {
791 		char errbuf[BUFSIZ];
792 
793 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
794 		pr_err("ERROR: Apply config to BPF failed: %s\n",
795 			 errbuf);
796 		goto out_child;
797 	}
798 
799 	/*
800 	 * Normally perf_session__new would do this, but it doesn't have the
801 	 * evlist.
802 	 */
803 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
804 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
805 		rec->tool.ordered_events = false;
806 	}
807 
808 	if (!rec->evlist->nr_groups)
809 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
810 
811 	if (file->is_pipe) {
812 		err = perf_header__write_pipe(fd);
813 		if (err < 0)
814 			goto out_child;
815 	} else {
816 		err = perf_session__write_header(session, rec->evlist, fd, false);
817 		if (err < 0)
818 			goto out_child;
819 	}
820 
821 	if (!rec->no_buildid
822 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
823 		pr_err("Couldn't generate buildids. "
824 		       "Use --no-buildid to profile anyway.\n");
825 		err = -1;
826 		goto out_child;
827 	}
828 
829 	machine = &session->machines.host;
830 
831 	err = record__synthesize(rec);
832 	if (err < 0)
833 		goto out_child;
834 
835 	if (rec->realtime_prio) {
836 		struct sched_param param;
837 
838 		param.sched_priority = rec->realtime_prio;
839 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
840 			pr_err("Could not set realtime priority.\n");
841 			err = -1;
842 			goto out_child;
843 		}
844 	}
845 
846 	/*
847 	 * When perf is starting the traced process, all the events
848 	 * (apart from group members) have enable_on_exec=1 set,
849 	 * so don't spoil it by prematurely enabling them.
850 	 */
851 	if (!target__none(&opts->target) && !opts->initial_delay)
852 		perf_evlist__enable(rec->evlist);
853 
854 	/*
855 	 * Let the child rip
856 	 */
857 	if (forks) {
858 		union perf_event *event;
859 
860 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
861 		if (event == NULL) {
862 			err = -ENOMEM;
863 			goto out_child;
864 		}
865 
866 		/*
867 		 * Some H/W events are generated before COMM event
868 		 * which is emitted during exec(), so perf script
869 		 * cannot see a correct process name for those events.
870 		 * Synthesize COMM event to prevent it.
871 		 */
872 		perf_event__synthesize_comm(tool, event,
873 					    rec->evlist->workload.pid,
874 					    process_synthesized_event,
875 					    machine);
876 		free(event);
877 
878 		perf_evlist__start_workload(rec->evlist);
879 	}
880 
881 	if (opts->initial_delay) {
882 		usleep(opts->initial_delay * 1000);
883 		perf_evlist__enable(rec->evlist);
884 	}
885 
886 	trigger_ready(&auxtrace_snapshot_trigger);
887 	trigger_ready(&switch_output_trigger);
888 	for (;;) {
889 		unsigned long long hits = rec->samples;
890 
891 		if (record__mmap_read_all(rec) < 0) {
892 			trigger_error(&auxtrace_snapshot_trigger);
893 			trigger_error(&switch_output_trigger);
894 			err = -1;
895 			goto out_child;
896 		}
897 
898 		if (auxtrace_record__snapshot_started) {
899 			auxtrace_record__snapshot_started = 0;
900 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
901 				record__read_auxtrace_snapshot(rec);
902 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
903 				pr_err("AUX area tracing snapshot failed\n");
904 				err = -1;
905 				goto out_child;
906 			}
907 		}
908 
909 		if (trigger_is_hit(&switch_output_trigger)) {
910 			trigger_ready(&switch_output_trigger);
911 
912 			if (!quiet)
913 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
914 					waking);
915 			waking = 0;
916 			fd = record__switch_output(rec, false);
917 			if (fd < 0) {
918 				pr_err("Failed to switch to new file\n");
919 				trigger_error(&switch_output_trigger);
920 				err = fd;
921 				goto out_child;
922 			}
923 		}
924 
925 		if (hits == rec->samples) {
926 			if (done || draining)
927 				break;
928 			err = perf_evlist__poll(rec->evlist, -1);
929 			/*
930 			 * Propagate error, only if there's any. Ignore positive
931 			 * number of returned events and interrupt error.
932 			 */
933 			if (err > 0 || (err < 0 && errno == EINTR))
934 				err = 0;
935 			waking++;
936 
937 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
938 				draining = true;
939 		}
940 
941 		/*
942 		 * When perf is starting the traced process, at the end events
943 		 * die with the process and we wait for that. Thus no need to
944 		 * disable events in this case.
945 		 */
946 		if (done && !disabled && !target__none(&opts->target)) {
947 			trigger_off(&auxtrace_snapshot_trigger);
948 			perf_evlist__disable(rec->evlist);
949 			disabled = true;
950 		}
951 	}
952 	trigger_off(&auxtrace_snapshot_trigger);
953 	trigger_off(&switch_output_trigger);
954 
955 	if (forks && workload_exec_errno) {
956 		char msg[STRERR_BUFSIZE];
957 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
958 		pr_err("Workload failed: %s\n", emsg);
959 		err = -1;
960 		goto out_child;
961 	}
962 
963 	if (!quiet)
964 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
965 
966 out_child:
967 	if (forks) {
968 		int exit_status;
969 
970 		if (!child_finished)
971 			kill(rec->evlist->workload.pid, SIGTERM);
972 
973 		wait(&exit_status);
974 
975 		if (err < 0)
976 			status = err;
977 		else if (WIFEXITED(exit_status))
978 			status = WEXITSTATUS(exit_status);
979 		else if (WIFSIGNALED(exit_status))
980 			signr = WTERMSIG(exit_status);
981 	} else
982 		status = err;
983 
984 	/* this will be recalculated during process_buildids() */
985 	rec->samples = 0;
986 
987 	if (!err) {
988 		if (!rec->timestamp_filename) {
989 			record__finish_output(rec);
990 		} else {
991 			fd = record__switch_output(rec, true);
992 			if (fd < 0) {
993 				status = fd;
994 				goto out_delete_session;
995 			}
996 		}
997 	}
998 
999 	if (!err && !quiet) {
1000 		char samples[128];
1001 		const char *postfix = rec->timestamp_filename ?
1002 					".<timestamp>" : "";
1003 
1004 		if (rec->samples && !rec->opts.full_auxtrace)
1005 			scnprintf(samples, sizeof(samples),
1006 				  " (%" PRIu64 " samples)", rec->samples);
1007 		else
1008 			samples[0] = '\0';
1009 
1010 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1011 			perf_data_file__size(file) / 1024.0 / 1024.0,
1012 			file->path, postfix, samples);
1013 	}
1014 
1015 out_delete_session:
1016 	perf_session__delete(session);
1017 	return status;
1018 }
1019 
1020 static void callchain_debug(struct callchain_param *callchain)
1021 {
1022 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1023 
1024 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1025 
1026 	if (callchain->record_mode == CALLCHAIN_DWARF)
1027 		pr_debug("callchain: stack dump size %d\n",
1028 			 callchain->dump_size);
1029 }
1030 
1031 int record_opts__parse_callchain(struct record_opts *record,
1032 				 struct callchain_param *callchain,
1033 				 const char *arg, bool unset)
1034 {
1035 	int ret;
1036 	callchain->enabled = !unset;
1037 
1038 	/* --no-call-graph */
1039 	if (unset) {
1040 		callchain->record_mode = CALLCHAIN_NONE;
1041 		pr_debug("callchain: disabled\n");
1042 		return 0;
1043 	}
1044 
1045 	ret = parse_callchain_record_opt(arg, callchain);
1046 	if (!ret) {
1047 		/* Enable data address sampling for DWARF unwind. */
1048 		if (callchain->record_mode == CALLCHAIN_DWARF)
1049 			record->sample_address = true;
1050 		callchain_debug(callchain);
1051 	}
1052 
1053 	return ret;
1054 }
1055 
1056 int record_parse_callchain_opt(const struct option *opt,
1057 			       const char *arg,
1058 			       int unset)
1059 {
1060 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1061 }
1062 
1063 int record_callchain_opt(const struct option *opt,
1064 			 const char *arg __maybe_unused,
1065 			 int unset __maybe_unused)
1066 {
1067 	struct callchain_param *callchain = opt->value;
1068 
1069 	callchain->enabled = true;
1070 
1071 	if (callchain->record_mode == CALLCHAIN_NONE)
1072 		callchain->record_mode = CALLCHAIN_FP;
1073 
1074 	callchain_debug(callchain);
1075 	return 0;
1076 }
1077 
1078 static int perf_record_config(const char *var, const char *value, void *cb)
1079 {
1080 	struct record *rec = cb;
1081 
1082 	if (!strcmp(var, "record.build-id")) {
1083 		if (!strcmp(value, "cache"))
1084 			rec->no_buildid_cache = false;
1085 		else if (!strcmp(value, "no-cache"))
1086 			rec->no_buildid_cache = true;
1087 		else if (!strcmp(value, "skip"))
1088 			rec->no_buildid = true;
1089 		else
1090 			return -1;
1091 		return 0;
1092 	}
1093 	if (!strcmp(var, "record.call-graph"))
1094 		var = "call-graph.record-mode"; /* fall-through */
1095 
1096 	return perf_default_config(var, value, cb);
1097 }
1098 
1099 struct clockid_map {
1100 	const char *name;
1101 	int clockid;
1102 };
1103 
1104 #define CLOCKID_MAP(n, c)	\
1105 	{ .name = n, .clockid = (c), }
1106 
1107 #define CLOCKID_END	{ .name = NULL, }
1108 
1109 
1110 /*
1111  * Add the missing ones, we need to build on many distros...
1112  */
1113 #ifndef CLOCK_MONOTONIC_RAW
1114 #define CLOCK_MONOTONIC_RAW 4
1115 #endif
1116 #ifndef CLOCK_BOOTTIME
1117 #define CLOCK_BOOTTIME 7
1118 #endif
1119 #ifndef CLOCK_TAI
1120 #define CLOCK_TAI 11
1121 #endif
1122 
1123 static const struct clockid_map clockids[] = {
1124 	/* available for all events, NMI safe */
1125 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1126 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1127 
1128 	/* available for some events */
1129 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1130 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1131 	CLOCKID_MAP("tai", CLOCK_TAI),
1132 
1133 	/* available for the lazy */
1134 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1135 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1136 	CLOCKID_MAP("real", CLOCK_REALTIME),
1137 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1138 
1139 	CLOCKID_END,
1140 };
1141 
1142 static int parse_clockid(const struct option *opt, const char *str, int unset)
1143 {
1144 	struct record_opts *opts = (struct record_opts *)opt->value;
1145 	const struct clockid_map *cm;
1146 	const char *ostr = str;
1147 
1148 	if (unset) {
1149 		opts->use_clockid = 0;
1150 		return 0;
1151 	}
1152 
1153 	/* no arg passed */
1154 	if (!str)
1155 		return 0;
1156 
1157 	/* no setting it twice */
1158 	if (opts->use_clockid)
1159 		return -1;
1160 
1161 	opts->use_clockid = true;
1162 
1163 	/* if its a number, we're done */
1164 	if (sscanf(str, "%d", &opts->clockid) == 1)
1165 		return 0;
1166 
1167 	/* allow a "CLOCK_" prefix to the name */
1168 	if (!strncasecmp(str, "CLOCK_", 6))
1169 		str += 6;
1170 
1171 	for (cm = clockids; cm->name; cm++) {
1172 		if (!strcasecmp(str, cm->name)) {
1173 			opts->clockid = cm->clockid;
1174 			return 0;
1175 		}
1176 	}
1177 
1178 	opts->use_clockid = false;
1179 	ui__warning("unknown clockid %s, check man page\n", ostr);
1180 	return -1;
1181 }
1182 
1183 static int record__parse_mmap_pages(const struct option *opt,
1184 				    const char *str,
1185 				    int unset __maybe_unused)
1186 {
1187 	struct record_opts *opts = opt->value;
1188 	char *s, *p;
1189 	unsigned int mmap_pages;
1190 	int ret;
1191 
1192 	if (!str)
1193 		return -EINVAL;
1194 
1195 	s = strdup(str);
1196 	if (!s)
1197 		return -ENOMEM;
1198 
1199 	p = strchr(s, ',');
1200 	if (p)
1201 		*p = '\0';
1202 
1203 	if (*s) {
1204 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1205 		if (ret)
1206 			goto out_free;
1207 		opts->mmap_pages = mmap_pages;
1208 	}
1209 
1210 	if (!p) {
1211 		ret = 0;
1212 		goto out_free;
1213 	}
1214 
1215 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1216 	if (ret)
1217 		goto out_free;
1218 
1219 	opts->auxtrace_mmap_pages = mmap_pages;
1220 
1221 out_free:
1222 	free(s);
1223 	return ret;
1224 }
1225 
1226 static const char * const __record_usage[] = {
1227 	"perf record [<options>] [<command>]",
1228 	"perf record [<options>] -- <command> [<options>]",
1229 	NULL
1230 };
1231 const char * const *record_usage = __record_usage;
1232 
1233 /*
1234  * XXX Ideally would be local to cmd_record() and passed to a record__new
1235  * because we need to have access to it in record__exit, that is called
1236  * after cmd_record() exits, but since record_options need to be accessible to
1237  * builtin-script, leave it here.
1238  *
1239  * At least we don't ouch it in all the other functions here directly.
1240  *
1241  * Just say no to tons of global variables, sigh.
1242  */
1243 static struct record record = {
1244 	.opts = {
1245 		.sample_time	     = true,
1246 		.mmap_pages	     = UINT_MAX,
1247 		.user_freq	     = UINT_MAX,
1248 		.user_interval	     = ULLONG_MAX,
1249 		.freq		     = 4000,
1250 		.target		     = {
1251 			.uses_mmap   = true,
1252 			.default_per_cpu = true,
1253 		},
1254 		.proc_map_timeout     = 500,
1255 	},
1256 	.tool = {
1257 		.sample		= process_sample_event,
1258 		.fork		= perf_event__process_fork,
1259 		.exit		= perf_event__process_exit,
1260 		.comm		= perf_event__process_comm,
1261 		.mmap		= perf_event__process_mmap,
1262 		.mmap2		= perf_event__process_mmap2,
1263 		.ordered_events	= true,
1264 	},
1265 };
1266 
1267 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1268 	"\n\t\t\t\tDefault: fp";
1269 
1270 /*
1271  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1272  * with it and switch to use the library functions in perf_evlist that came
1273  * from builtin-record.c, i.e. use record_opts,
1274  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1275  * using pipes, etc.
1276  */
1277 struct option __record_options[] = {
1278 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1279 		     "event selector. use 'perf list' to list available events",
1280 		     parse_events_option),
1281 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1282 		     "event filter", parse_filter),
1283 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1284 			   NULL, "don't record events from perf itself",
1285 			   exclude_perf),
1286 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1287 		    "record events on existing process id"),
1288 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1289 		    "record events on existing thread id"),
1290 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1291 		    "collect data with this RT SCHED_FIFO priority"),
1292 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1293 		    "collect data without buffering"),
1294 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1295 		    "collect raw sample records from all opened counters"),
1296 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1297 			    "system-wide collection from all CPUs"),
1298 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1299 		    "list of cpus to monitor"),
1300 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1301 	OPT_STRING('o', "output", &record.file.path, "file",
1302 		    "output file name"),
1303 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1304 			&record.opts.no_inherit_set,
1305 			"child tasks do not inherit counters"),
1306 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1307 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1308 		     "number of mmap data pages and AUX area tracing mmap pages",
1309 		     record__parse_mmap_pages),
1310 	OPT_BOOLEAN(0, "group", &record.opts.group,
1311 		    "put the counters into a counter group"),
1312 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1313 			   NULL, "enables call-graph recording" ,
1314 			   &record_callchain_opt),
1315 	OPT_CALLBACK(0, "call-graph", &record.opts,
1316 		     "record_mode[,record_size]", record_callchain_help,
1317 		     &record_parse_callchain_opt),
1318 	OPT_INCR('v', "verbose", &verbose,
1319 		    "be more verbose (show counter open errors, etc)"),
1320 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1321 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1322 		    "per thread counts"),
1323 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1324 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1325 			&record.opts.sample_time_set,
1326 			"Record the sample timestamps"),
1327 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1328 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1329 		    "don't sample"),
1330 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1331 			&record.no_buildid_cache_set,
1332 			"do not update the buildid cache"),
1333 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1334 			&record.no_buildid_set,
1335 			"do not collect buildids in perf.data"),
1336 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1337 		     "monitor event in cgroup name only",
1338 		     parse_cgroups),
1339 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1340 		  "ms to wait before starting measurement after program start"),
1341 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1342 		   "user to profile"),
1343 
1344 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1345 		     "branch any", "sample any taken branches",
1346 		     parse_branch_stack),
1347 
1348 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1349 		     "branch filter mask", "branch stack filter modes",
1350 		     parse_branch_stack),
1351 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1352 		    "sample by weight (on special events only)"),
1353 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1354 		    "sample transaction flags (special events only)"),
1355 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1356 		    "use per-thread mmaps"),
1357 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1358 		    "sample selected machine registers on interrupt,"
1359 		    " use -I ? to list register names", parse_regs),
1360 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1361 		    "Record running/enabled time of read (:S) events"),
1362 	OPT_CALLBACK('k', "clockid", &record.opts,
1363 	"clockid", "clockid to use for events, see clock_gettime()",
1364 	parse_clockid),
1365 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1366 			  "opts", "AUX area tracing Snapshot Mode", ""),
1367 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1368 			"per thread proc mmap processing timeout in ms"),
1369 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1370 		    "Record context switch events"),
1371 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1372 			 "Configure all used events to run in kernel space.",
1373 			 PARSE_OPT_EXCLUSIVE),
1374 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1375 			 "Configure all used events to run in user space.",
1376 			 PARSE_OPT_EXCLUSIVE),
1377 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1378 		   "clang binary to use for compiling BPF scriptlets"),
1379 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1380 		   "options passed to clang when compiling BPF scriptlets"),
1381 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1382 		   "file", "vmlinux pathname"),
1383 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1384 		    "Record build-id of all DSOs regardless of hits"),
1385 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1386 		    "append timestamp to output filename"),
1387 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1388 		    "Switch output when receive SIGUSR2"),
1389 	OPT_END()
1390 };
1391 
1392 struct option *record_options = __record_options;
1393 
1394 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1395 {
1396 	int err;
1397 	struct record *rec = &record;
1398 	char errbuf[BUFSIZ];
1399 
1400 #ifndef HAVE_LIBBPF_SUPPORT
1401 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1402 	set_nobuild('\0', "clang-path", true);
1403 	set_nobuild('\0', "clang-opt", true);
1404 # undef set_nobuild
1405 #endif
1406 
1407 #ifndef HAVE_BPF_PROLOGUE
1408 # if !defined (HAVE_DWARF_SUPPORT)
1409 #  define REASON  "NO_DWARF=1"
1410 # elif !defined (HAVE_LIBBPF_SUPPORT)
1411 #  define REASON  "NO_LIBBPF=1"
1412 # else
1413 #  define REASON  "this architecture doesn't support BPF prologue"
1414 # endif
1415 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1416 	set_nobuild('\0', "vmlinux", true);
1417 # undef set_nobuild
1418 # undef REASON
1419 #endif
1420 
1421 	rec->evlist = perf_evlist__new();
1422 	if (rec->evlist == NULL)
1423 		return -ENOMEM;
1424 
1425 	perf_config(perf_record_config, rec);
1426 
1427 	argc = parse_options(argc, argv, record_options, record_usage,
1428 			    PARSE_OPT_STOP_AT_NON_OPTION);
1429 	if (!argc && target__none(&rec->opts.target))
1430 		usage_with_options(record_usage, record_options);
1431 
1432 	if (nr_cgroups && !rec->opts.target.system_wide) {
1433 		usage_with_options_msg(record_usage, record_options,
1434 			"cgroup monitoring only available in system-wide mode");
1435 
1436 	}
1437 	if (rec->opts.record_switch_events &&
1438 	    !perf_can_record_switch_events()) {
1439 		ui__error("kernel does not support recording context switch events\n");
1440 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1441 		return -EINVAL;
1442 	}
1443 
1444 	if (rec->switch_output)
1445 		rec->timestamp_filename = true;
1446 
1447 	if (!rec->itr) {
1448 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1449 		if (err)
1450 			return err;
1451 	}
1452 
1453 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1454 					      rec->opts.auxtrace_snapshot_opts);
1455 	if (err)
1456 		return err;
1457 
1458 	err = bpf__setup_stdout(rec->evlist);
1459 	if (err) {
1460 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1461 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1462 			 errbuf);
1463 		return err;
1464 	}
1465 
1466 	err = -ENOMEM;
1467 
1468 	symbol__init(NULL);
1469 
1470 	if (symbol_conf.kptr_restrict)
1471 		pr_warning(
1472 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1473 "check /proc/sys/kernel/kptr_restrict.\n\n"
1474 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1475 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1476 "Samples in kernel modules won't be resolved at all.\n\n"
1477 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1478 "even with a suitable vmlinux or kallsyms file.\n\n");
1479 
1480 	if (rec->no_buildid_cache || rec->no_buildid) {
1481 		disable_buildid_cache();
1482 	} else if (rec->switch_output) {
1483 		/*
1484 		 * In 'perf record --switch-output', disable buildid
1485 		 * generation by default to reduce data file switching
1486 		 * overhead. Still generate buildid if they are required
1487 		 * explicitly using
1488 		 *
1489 		 *  perf record --signal-trigger --no-no-buildid \
1490 		 *              --no-no-buildid-cache
1491 		 *
1492 		 * Following code equals to:
1493 		 *
1494 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1495 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1496 		 *         disable_buildid_cache();
1497 		 */
1498 		bool disable = true;
1499 
1500 		if (rec->no_buildid_set && !rec->no_buildid)
1501 			disable = false;
1502 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1503 			disable = false;
1504 		if (disable) {
1505 			rec->no_buildid = true;
1506 			rec->no_buildid_cache = true;
1507 			disable_buildid_cache();
1508 		}
1509 	}
1510 
1511 	if (rec->evlist->nr_entries == 0 &&
1512 	    perf_evlist__add_default(rec->evlist) < 0) {
1513 		pr_err("Not enough memory for event selector list\n");
1514 		goto out_symbol_exit;
1515 	}
1516 
1517 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1518 		rec->opts.no_inherit = true;
1519 
1520 	err = target__validate(&rec->opts.target);
1521 	if (err) {
1522 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1523 		ui__warning("%s", errbuf);
1524 	}
1525 
1526 	err = target__parse_uid(&rec->opts.target);
1527 	if (err) {
1528 		int saved_errno = errno;
1529 
1530 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1531 		ui__error("%s", errbuf);
1532 
1533 		err = -saved_errno;
1534 		goto out_symbol_exit;
1535 	}
1536 
1537 	err = -ENOMEM;
1538 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1539 		usage_with_options(record_usage, record_options);
1540 
1541 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1542 	if (err)
1543 		goto out_symbol_exit;
1544 
1545 	/*
1546 	 * We take all buildids when the file contains
1547 	 * AUX area tracing data because we do not decode the
1548 	 * trace because it would take too long.
1549 	 */
1550 	if (rec->opts.full_auxtrace)
1551 		rec->buildid_all = true;
1552 
1553 	if (record_opts__config(&rec->opts)) {
1554 		err = -EINVAL;
1555 		goto out_symbol_exit;
1556 	}
1557 
1558 	err = __cmd_record(&record, argc, argv);
1559 out_symbol_exit:
1560 	perf_evlist__delete(rec->evlist);
1561 	symbol__exit();
1562 	auxtrace_record__free(rec->itr);
1563 	return err;
1564 }
1565 
1566 static void snapshot_sig_handler(int sig __maybe_unused)
1567 {
1568 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1569 		trigger_hit(&auxtrace_snapshot_trigger);
1570 		auxtrace_record__snapshot_started = 1;
1571 		if (auxtrace_record__snapshot_start(record.itr))
1572 			trigger_error(&auxtrace_snapshot_trigger);
1573 	}
1574 
1575 	if (trigger_is_ready(&switch_output_trigger))
1576 		trigger_hit(&switch_output_trigger);
1577 }
1578