xref: /openbmc/linux/tools/perf/builtin-record.c (revision e4781421e883340b796da5a724bda7226817990b)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17 
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42 
43 #include <unistd.h>
44 #include <sched.h>
45 #include <sys/mman.h>
46 #include <asm/bug.h>
47 #include <linux/time64.h>
48 
49 struct record {
50 	struct perf_tool	tool;
51 	struct record_opts	opts;
52 	u64			bytes_written;
53 	struct perf_data_file	file;
54 	struct auxtrace_record	*itr;
55 	struct perf_evlist	*evlist;
56 	struct perf_session	*session;
57 	const char		*progname;
58 	int			realtime_prio;
59 	bool			no_buildid;
60 	bool			no_buildid_set;
61 	bool			no_buildid_cache;
62 	bool			no_buildid_cache_set;
63 	bool			buildid_all;
64 	bool			timestamp_filename;
65 	bool			switch_output;
66 	unsigned long long	samples;
67 };
68 
69 static int record__write(struct record *rec, void *bf, size_t size)
70 {
71 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
72 		pr_err("failed to write perf data, error: %m\n");
73 		return -1;
74 	}
75 
76 	rec->bytes_written += size;
77 	return 0;
78 }
79 
80 static int process_synthesized_event(struct perf_tool *tool,
81 				     union perf_event *event,
82 				     struct perf_sample *sample __maybe_unused,
83 				     struct machine *machine __maybe_unused)
84 {
85 	struct record *rec = container_of(tool, struct record, tool);
86 	return record__write(rec, event, event->header.size);
87 }
88 
89 static int
90 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
91 {
92 	struct perf_event_header *pheader;
93 	u64 evt_head = head;
94 	int size = mask + 1;
95 
96 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
97 	pheader = (struct perf_event_header *)(buf + (head & mask));
98 	*start = head;
99 	while (true) {
100 		if (evt_head - head >= (unsigned int)size) {
101 			pr_debug("Finished reading backward ring buffer: rewind\n");
102 			if (evt_head - head > (unsigned int)size)
103 				evt_head -= pheader->size;
104 			*end = evt_head;
105 			return 0;
106 		}
107 
108 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
109 
110 		if (pheader->size == 0) {
111 			pr_debug("Finished reading backward ring buffer: get start\n");
112 			*end = evt_head;
113 			return 0;
114 		}
115 
116 		evt_head += pheader->size;
117 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
118 	}
119 	WARN_ONCE(1, "Shouldn't get here\n");
120 	return -1;
121 }
122 
123 static int
124 rb_find_range(void *data, int mask, u64 head, u64 old,
125 	      u64 *start, u64 *end, bool backward)
126 {
127 	if (!backward) {
128 		*start = old;
129 		*end = head;
130 		return 0;
131 	}
132 
133 	return backward_rb_find_range(data, mask, head, start, end);
134 }
135 
136 static int
137 record__mmap_read(struct record *rec, struct perf_mmap *md,
138 		  bool overwrite, bool backward)
139 {
140 	u64 head = perf_mmap__read_head(md);
141 	u64 old = md->prev;
142 	u64 end = head, start = old;
143 	unsigned char *data = md->base + page_size;
144 	unsigned long size;
145 	void *buf;
146 	int rc = 0;
147 
148 	if (rb_find_range(data, md->mask, head,
149 			  old, &start, &end, backward))
150 		return -1;
151 
152 	if (start == end)
153 		return 0;
154 
155 	rec->samples++;
156 
157 	size = end - start;
158 	if (size > (unsigned long)(md->mask) + 1) {
159 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
160 
161 		md->prev = head;
162 		perf_mmap__consume(md, overwrite || backward);
163 		return 0;
164 	}
165 
166 	if ((start & md->mask) + size != (end & md->mask)) {
167 		buf = &data[start & md->mask];
168 		size = md->mask + 1 - (start & md->mask);
169 		start += size;
170 
171 		if (record__write(rec, buf, size) < 0) {
172 			rc = -1;
173 			goto out;
174 		}
175 	}
176 
177 	buf = &data[start & md->mask];
178 	size = end - start;
179 	start += size;
180 
181 	if (record__write(rec, buf, size) < 0) {
182 		rc = -1;
183 		goto out;
184 	}
185 
186 	md->prev = head;
187 	perf_mmap__consume(md, overwrite || backward);
188 out:
189 	return rc;
190 }
191 
192 static volatile int done;
193 static volatile int signr = -1;
194 static volatile int child_finished;
195 
196 static volatile int auxtrace_record__snapshot_started;
197 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
198 static DEFINE_TRIGGER(switch_output_trigger);
199 
200 static void sig_handler(int sig)
201 {
202 	if (sig == SIGCHLD)
203 		child_finished = 1;
204 	else
205 		signr = sig;
206 
207 	done = 1;
208 }
209 
210 static void sigsegv_handler(int sig)
211 {
212 	perf_hooks__recover();
213 	sighandler_dump_stack(sig);
214 }
215 
216 static void record__sig_exit(void)
217 {
218 	if (signr == -1)
219 		return;
220 
221 	signal(signr, SIG_DFL);
222 	raise(signr);
223 }
224 
225 #ifdef HAVE_AUXTRACE_SUPPORT
226 
227 static int record__process_auxtrace(struct perf_tool *tool,
228 				    union perf_event *event, void *data1,
229 				    size_t len1, void *data2, size_t len2)
230 {
231 	struct record *rec = container_of(tool, struct record, tool);
232 	struct perf_data_file *file = &rec->file;
233 	size_t padding;
234 	u8 pad[8] = {0};
235 
236 	if (!perf_data_file__is_pipe(file)) {
237 		off_t file_offset;
238 		int fd = perf_data_file__fd(file);
239 		int err;
240 
241 		file_offset = lseek(fd, 0, SEEK_CUR);
242 		if (file_offset == -1)
243 			return -1;
244 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
245 						     event, file_offset);
246 		if (err)
247 			return err;
248 	}
249 
250 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
251 	padding = (len1 + len2) & 7;
252 	if (padding)
253 		padding = 8 - padding;
254 
255 	record__write(rec, event, event->header.size);
256 	record__write(rec, data1, len1);
257 	if (len2)
258 		record__write(rec, data2, len2);
259 	record__write(rec, &pad, padding);
260 
261 	return 0;
262 }
263 
264 static int record__auxtrace_mmap_read(struct record *rec,
265 				      struct auxtrace_mmap *mm)
266 {
267 	int ret;
268 
269 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
270 				  record__process_auxtrace);
271 	if (ret < 0)
272 		return ret;
273 
274 	if (ret)
275 		rec->samples++;
276 
277 	return 0;
278 }
279 
280 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
281 					       struct auxtrace_mmap *mm)
282 {
283 	int ret;
284 
285 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
286 					   record__process_auxtrace,
287 					   rec->opts.auxtrace_snapshot_size);
288 	if (ret < 0)
289 		return ret;
290 
291 	if (ret)
292 		rec->samples++;
293 
294 	return 0;
295 }
296 
297 static int record__auxtrace_read_snapshot_all(struct record *rec)
298 {
299 	int i;
300 	int rc = 0;
301 
302 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
303 		struct auxtrace_mmap *mm =
304 				&rec->evlist->mmap[i].auxtrace_mmap;
305 
306 		if (!mm->base)
307 			continue;
308 
309 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
310 			rc = -1;
311 			goto out;
312 		}
313 	}
314 out:
315 	return rc;
316 }
317 
318 static void record__read_auxtrace_snapshot(struct record *rec)
319 {
320 	pr_debug("Recording AUX area tracing snapshot\n");
321 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
322 		trigger_error(&auxtrace_snapshot_trigger);
323 	} else {
324 		if (auxtrace_record__snapshot_finish(rec->itr))
325 			trigger_error(&auxtrace_snapshot_trigger);
326 		else
327 			trigger_ready(&auxtrace_snapshot_trigger);
328 	}
329 }
330 
331 #else
332 
333 static inline
334 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
335 			       struct auxtrace_mmap *mm __maybe_unused)
336 {
337 	return 0;
338 }
339 
340 static inline
341 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
342 {
343 }
344 
345 static inline
346 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
347 {
348 	return 0;
349 }
350 
351 #endif
352 
353 static int record__mmap_evlist(struct record *rec,
354 			       struct perf_evlist *evlist)
355 {
356 	struct record_opts *opts = &rec->opts;
357 	char msg[512];
358 
359 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
360 				 opts->auxtrace_mmap_pages,
361 				 opts->auxtrace_snapshot_mode) < 0) {
362 		if (errno == EPERM) {
363 			pr_err("Permission error mapping pages.\n"
364 			       "Consider increasing "
365 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
366 			       "or try again with a smaller value of -m/--mmap_pages.\n"
367 			       "(current value: %u,%u)\n",
368 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
369 			return -errno;
370 		} else {
371 			pr_err("failed to mmap with %d (%s)\n", errno,
372 				str_error_r(errno, msg, sizeof(msg)));
373 			if (errno)
374 				return -errno;
375 			else
376 				return -EINVAL;
377 		}
378 	}
379 	return 0;
380 }
381 
382 static int record__mmap(struct record *rec)
383 {
384 	return record__mmap_evlist(rec, rec->evlist);
385 }
386 
387 static int record__open(struct record *rec)
388 {
389 	char msg[512];
390 	struct perf_evsel *pos;
391 	struct perf_evlist *evlist = rec->evlist;
392 	struct perf_session *session = rec->session;
393 	struct record_opts *opts = &rec->opts;
394 	struct perf_evsel_config_term *err_term;
395 	int rc = 0;
396 
397 	perf_evlist__config(evlist, opts, &callchain_param);
398 
399 	evlist__for_each_entry(evlist, pos) {
400 try_again:
401 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
402 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
403 				if (verbose)
404 					ui__warning("%s\n", msg);
405 				goto try_again;
406 			}
407 
408 			rc = -errno;
409 			perf_evsel__open_strerror(pos, &opts->target,
410 						  errno, msg, sizeof(msg));
411 			ui__error("%s\n", msg);
412 			goto out;
413 		}
414 	}
415 
416 	if (perf_evlist__apply_filters(evlist, &pos)) {
417 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
418 			pos->filter, perf_evsel__name(pos), errno,
419 			str_error_r(errno, msg, sizeof(msg)));
420 		rc = -1;
421 		goto out;
422 	}
423 
424 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
425 		error("failed to set config \"%s\" on event %s with %d (%s)\n",
426 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
427 		      str_error_r(errno, msg, sizeof(msg)));
428 		rc = -1;
429 		goto out;
430 	}
431 
432 	rc = record__mmap(rec);
433 	if (rc)
434 		goto out;
435 
436 	session->evlist = evlist;
437 	perf_session__set_id_hdr_size(session);
438 out:
439 	return rc;
440 }
441 
442 static int process_sample_event(struct perf_tool *tool,
443 				union perf_event *event,
444 				struct perf_sample *sample,
445 				struct perf_evsel *evsel,
446 				struct machine *machine)
447 {
448 	struct record *rec = container_of(tool, struct record, tool);
449 
450 	rec->samples++;
451 
452 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
453 }
454 
455 static int process_buildids(struct record *rec)
456 {
457 	struct perf_data_file *file  = &rec->file;
458 	struct perf_session *session = rec->session;
459 
460 	if (file->size == 0)
461 		return 0;
462 
463 	/*
464 	 * During this process, it'll load kernel map and replace the
465 	 * dso->long_name to a real pathname it found.  In this case
466 	 * we prefer the vmlinux path like
467 	 *   /lib/modules/3.16.4/build/vmlinux
468 	 *
469 	 * rather than build-id path (in debug directory).
470 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
471 	 */
472 	symbol_conf.ignore_vmlinux_buildid = true;
473 
474 	/*
475 	 * If --buildid-all is given, it marks all DSO regardless of hits,
476 	 * so no need to process samples.
477 	 */
478 	if (rec->buildid_all)
479 		rec->tool.sample = NULL;
480 
481 	return perf_session__process_events(session);
482 }
483 
484 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
485 {
486 	int err;
487 	struct perf_tool *tool = data;
488 	/*
489 	 *As for guest kernel when processing subcommand record&report,
490 	 *we arrange module mmap prior to guest kernel mmap and trigger
491 	 *a preload dso because default guest module symbols are loaded
492 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
493 	 *method is used to avoid symbol missing when the first addr is
494 	 *in module instead of in guest kernel.
495 	 */
496 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
497 					     machine);
498 	if (err < 0)
499 		pr_err("Couldn't record guest kernel [%d]'s reference"
500 		       " relocation symbol.\n", machine->pid);
501 
502 	/*
503 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
504 	 * have no _text sometimes.
505 	 */
506 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
507 						 machine);
508 	if (err < 0)
509 		pr_err("Couldn't record guest kernel [%d]'s reference"
510 		       " relocation symbol.\n", machine->pid);
511 }
512 
513 static struct perf_event_header finished_round_event = {
514 	.size = sizeof(struct perf_event_header),
515 	.type = PERF_RECORD_FINISHED_ROUND,
516 };
517 
518 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
519 				    bool backward)
520 {
521 	u64 bytes_written = rec->bytes_written;
522 	int i;
523 	int rc = 0;
524 	struct perf_mmap *maps;
525 
526 	if (!evlist)
527 		return 0;
528 
529 	maps = backward ? evlist->backward_mmap : evlist->mmap;
530 	if (!maps)
531 		return 0;
532 
533 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
534 		return 0;
535 
536 	for (i = 0; i < evlist->nr_mmaps; i++) {
537 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
538 
539 		if (maps[i].base) {
540 			if (record__mmap_read(rec, &maps[i],
541 					      evlist->overwrite, backward) != 0) {
542 				rc = -1;
543 				goto out;
544 			}
545 		}
546 
547 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
548 		    record__auxtrace_mmap_read(rec, mm) != 0) {
549 			rc = -1;
550 			goto out;
551 		}
552 	}
553 
554 	/*
555 	 * Mark the round finished in case we wrote
556 	 * at least one event.
557 	 */
558 	if (bytes_written != rec->bytes_written)
559 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
560 
561 	if (backward)
562 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
563 out:
564 	return rc;
565 }
566 
567 static int record__mmap_read_all(struct record *rec)
568 {
569 	int err;
570 
571 	err = record__mmap_read_evlist(rec, rec->evlist, false);
572 	if (err)
573 		return err;
574 
575 	return record__mmap_read_evlist(rec, rec->evlist, true);
576 }
577 
578 static void record__init_features(struct record *rec)
579 {
580 	struct perf_session *session = rec->session;
581 	int feat;
582 
583 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
584 		perf_header__set_feat(&session->header, feat);
585 
586 	if (rec->no_buildid)
587 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
588 
589 	if (!have_tracepoints(&rec->evlist->entries))
590 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
591 
592 	if (!rec->opts.branch_stack)
593 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
594 
595 	if (!rec->opts.full_auxtrace)
596 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
597 
598 	perf_header__clear_feat(&session->header, HEADER_STAT);
599 }
600 
601 static void
602 record__finish_output(struct record *rec)
603 {
604 	struct perf_data_file *file = &rec->file;
605 	int fd = perf_data_file__fd(file);
606 
607 	if (file->is_pipe)
608 		return;
609 
610 	rec->session->header.data_size += rec->bytes_written;
611 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
612 
613 	if (!rec->no_buildid) {
614 		process_buildids(rec);
615 
616 		if (rec->buildid_all)
617 			dsos__hit_all(rec->session);
618 	}
619 	perf_session__write_header(rec->session, rec->evlist, fd, true);
620 
621 	return;
622 }
623 
624 static int record__synthesize_workload(struct record *rec, bool tail)
625 {
626 	struct {
627 		struct thread_map map;
628 		struct thread_map_data map_data;
629 	} thread_map;
630 
631 	if (rec->opts.tail_synthesize != tail)
632 		return 0;
633 
634 	thread_map.map.nr = 1;
635 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
636 	thread_map.map.map[0].comm = NULL;
637 	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
638 						 process_synthesized_event,
639 						 &rec->session->machines.host,
640 						 rec->opts.sample_address,
641 						 rec->opts.proc_map_timeout);
642 }
643 
644 static int record__synthesize(struct record *rec, bool tail);
645 
646 static int
647 record__switch_output(struct record *rec, bool at_exit)
648 {
649 	struct perf_data_file *file = &rec->file;
650 	int fd, err;
651 
652 	/* Same Size:      "2015122520103046"*/
653 	char timestamp[] = "InvalidTimestamp";
654 
655 	record__synthesize(rec, true);
656 	if (target__none(&rec->opts.target))
657 		record__synthesize_workload(rec, true);
658 
659 	rec->samples = 0;
660 	record__finish_output(rec);
661 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
662 	if (err) {
663 		pr_err("Failed to get current timestamp\n");
664 		return -EINVAL;
665 	}
666 
667 	fd = perf_data_file__switch(file, timestamp,
668 				    rec->session->header.data_offset,
669 				    at_exit);
670 	if (fd >= 0 && !at_exit) {
671 		rec->bytes_written = 0;
672 		rec->session->header.data_size = 0;
673 	}
674 
675 	if (!quiet)
676 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
677 			file->path, timestamp);
678 
679 	/* Output tracking events */
680 	if (!at_exit) {
681 		record__synthesize(rec, false);
682 
683 		/*
684 		 * In 'perf record --switch-output' without -a,
685 		 * record__synthesize() in record__switch_output() won't
686 		 * generate tracking events because there's no thread_map
687 		 * in evlist. Which causes newly created perf.data doesn't
688 		 * contain map and comm information.
689 		 * Create a fake thread_map and directly call
690 		 * perf_event__synthesize_thread_map() for those events.
691 		 */
692 		if (target__none(&rec->opts.target))
693 			record__synthesize_workload(rec, false);
694 	}
695 	return fd;
696 }
697 
698 static volatile int workload_exec_errno;
699 
700 /*
701  * perf_evlist__prepare_workload will send a SIGUSR1
702  * if the fork fails, since we asked by setting its
703  * want_signal to true.
704  */
705 static void workload_exec_failed_signal(int signo __maybe_unused,
706 					siginfo_t *info,
707 					void *ucontext __maybe_unused)
708 {
709 	workload_exec_errno = info->si_value.sival_int;
710 	done = 1;
711 	child_finished = 1;
712 }
713 
714 static void snapshot_sig_handler(int sig);
715 
716 int __weak
717 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
718 			    struct perf_tool *tool __maybe_unused,
719 			    perf_event__handler_t process __maybe_unused,
720 			    struct machine *machine __maybe_unused)
721 {
722 	return 0;
723 }
724 
725 static const struct perf_event_mmap_page *
726 perf_evlist__pick_pc(struct perf_evlist *evlist)
727 {
728 	if (evlist) {
729 		if (evlist->mmap && evlist->mmap[0].base)
730 			return evlist->mmap[0].base;
731 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
732 			return evlist->backward_mmap[0].base;
733 	}
734 	return NULL;
735 }
736 
737 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
738 {
739 	const struct perf_event_mmap_page *pc;
740 
741 	pc = perf_evlist__pick_pc(rec->evlist);
742 	if (pc)
743 		return pc;
744 	return NULL;
745 }
746 
747 static int record__synthesize(struct record *rec, bool tail)
748 {
749 	struct perf_session *session = rec->session;
750 	struct machine *machine = &session->machines.host;
751 	struct perf_data_file *file = &rec->file;
752 	struct record_opts *opts = &rec->opts;
753 	struct perf_tool *tool = &rec->tool;
754 	int fd = perf_data_file__fd(file);
755 	int err = 0;
756 
757 	if (rec->opts.tail_synthesize != tail)
758 		return 0;
759 
760 	if (file->is_pipe) {
761 		err = perf_event__synthesize_attrs(tool, session,
762 						   process_synthesized_event);
763 		if (err < 0) {
764 			pr_err("Couldn't synthesize attrs.\n");
765 			goto out;
766 		}
767 
768 		if (have_tracepoints(&rec->evlist->entries)) {
769 			/*
770 			 * FIXME err <= 0 here actually means that
771 			 * there were no tracepoints so its not really
772 			 * an error, just that we don't need to
773 			 * synthesize anything.  We really have to
774 			 * return this more properly and also
775 			 * propagate errors that now are calling die()
776 			 */
777 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
778 								  process_synthesized_event);
779 			if (err <= 0) {
780 				pr_err("Couldn't record tracing data.\n");
781 				goto out;
782 			}
783 			rec->bytes_written += err;
784 		}
785 	}
786 
787 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
788 					  process_synthesized_event, machine);
789 	if (err)
790 		goto out;
791 
792 	if (rec->opts.full_auxtrace) {
793 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
794 					session, process_synthesized_event);
795 		if (err)
796 			goto out;
797 	}
798 
799 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
800 						 machine);
801 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
802 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
803 			   "Check /proc/kallsyms permission or run as root.\n");
804 
805 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
806 					     machine);
807 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
808 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
809 			   "Check /proc/modules permission or run as root.\n");
810 
811 	if (perf_guest) {
812 		machines__process_guests(&session->machines,
813 					 perf_event__synthesize_guest_os, tool);
814 	}
815 
816 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
817 					    process_synthesized_event, opts->sample_address,
818 					    opts->proc_map_timeout);
819 out:
820 	return err;
821 }
822 
823 static int __cmd_record(struct record *rec, int argc, const char **argv)
824 {
825 	int err;
826 	int status = 0;
827 	unsigned long waking = 0;
828 	const bool forks = argc > 0;
829 	struct machine *machine;
830 	struct perf_tool *tool = &rec->tool;
831 	struct record_opts *opts = &rec->opts;
832 	struct perf_data_file *file = &rec->file;
833 	struct perf_session *session;
834 	bool disabled = false, draining = false;
835 	int fd;
836 
837 	rec->progname = argv[0];
838 
839 	atexit(record__sig_exit);
840 	signal(SIGCHLD, sig_handler);
841 	signal(SIGINT, sig_handler);
842 	signal(SIGTERM, sig_handler);
843 	signal(SIGSEGV, sigsegv_handler);
844 
845 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
846 		signal(SIGUSR2, snapshot_sig_handler);
847 		if (rec->opts.auxtrace_snapshot_mode)
848 			trigger_on(&auxtrace_snapshot_trigger);
849 		if (rec->switch_output)
850 			trigger_on(&switch_output_trigger);
851 	} else {
852 		signal(SIGUSR2, SIG_IGN);
853 	}
854 
855 	session = perf_session__new(file, false, tool);
856 	if (session == NULL) {
857 		pr_err("Perf session creation failed.\n");
858 		return -1;
859 	}
860 
861 	fd = perf_data_file__fd(file);
862 	rec->session = session;
863 
864 	record__init_features(rec);
865 
866 	if (forks) {
867 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
868 						    argv, file->is_pipe,
869 						    workload_exec_failed_signal);
870 		if (err < 0) {
871 			pr_err("Couldn't run the workload!\n");
872 			status = err;
873 			goto out_delete_session;
874 		}
875 	}
876 
877 	if (record__open(rec) != 0) {
878 		err = -1;
879 		goto out_child;
880 	}
881 
882 	err = bpf__apply_obj_config();
883 	if (err) {
884 		char errbuf[BUFSIZ];
885 
886 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
887 		pr_err("ERROR: Apply config to BPF failed: %s\n",
888 			 errbuf);
889 		goto out_child;
890 	}
891 
892 	/*
893 	 * Normally perf_session__new would do this, but it doesn't have the
894 	 * evlist.
895 	 */
896 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
897 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
898 		rec->tool.ordered_events = false;
899 	}
900 
901 	if (!rec->evlist->nr_groups)
902 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
903 
904 	if (file->is_pipe) {
905 		err = perf_header__write_pipe(fd);
906 		if (err < 0)
907 			goto out_child;
908 	} else {
909 		err = perf_session__write_header(session, rec->evlist, fd, false);
910 		if (err < 0)
911 			goto out_child;
912 	}
913 
914 	if (!rec->no_buildid
915 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
916 		pr_err("Couldn't generate buildids. "
917 		       "Use --no-buildid to profile anyway.\n");
918 		err = -1;
919 		goto out_child;
920 	}
921 
922 	machine = &session->machines.host;
923 
924 	err = record__synthesize(rec, false);
925 	if (err < 0)
926 		goto out_child;
927 
928 	if (rec->realtime_prio) {
929 		struct sched_param param;
930 
931 		param.sched_priority = rec->realtime_prio;
932 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
933 			pr_err("Could not set realtime priority.\n");
934 			err = -1;
935 			goto out_child;
936 		}
937 	}
938 
939 	/*
940 	 * When perf is starting the traced process, all the events
941 	 * (apart from group members) have enable_on_exec=1 set,
942 	 * so don't spoil it by prematurely enabling them.
943 	 */
944 	if (!target__none(&opts->target) && !opts->initial_delay)
945 		perf_evlist__enable(rec->evlist);
946 
947 	/*
948 	 * Let the child rip
949 	 */
950 	if (forks) {
951 		union perf_event *event;
952 
953 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
954 		if (event == NULL) {
955 			err = -ENOMEM;
956 			goto out_child;
957 		}
958 
959 		/*
960 		 * Some H/W events are generated before COMM event
961 		 * which is emitted during exec(), so perf script
962 		 * cannot see a correct process name for those events.
963 		 * Synthesize COMM event to prevent it.
964 		 */
965 		perf_event__synthesize_comm(tool, event,
966 					    rec->evlist->workload.pid,
967 					    process_synthesized_event,
968 					    machine);
969 		free(event);
970 
971 		perf_evlist__start_workload(rec->evlist);
972 	}
973 
974 	if (opts->initial_delay) {
975 		usleep(opts->initial_delay * USEC_PER_MSEC);
976 		perf_evlist__enable(rec->evlist);
977 	}
978 
979 	trigger_ready(&auxtrace_snapshot_trigger);
980 	trigger_ready(&switch_output_trigger);
981 	perf_hooks__invoke_record_start();
982 	for (;;) {
983 		unsigned long long hits = rec->samples;
984 
985 		/*
986 		 * rec->evlist->bkw_mmap_state is possible to be
987 		 * BKW_MMAP_EMPTY here: when done == true and
988 		 * hits != rec->samples in previous round.
989 		 *
990 		 * perf_evlist__toggle_bkw_mmap ensure we never
991 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
992 		 */
993 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
994 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
995 
996 		if (record__mmap_read_all(rec) < 0) {
997 			trigger_error(&auxtrace_snapshot_trigger);
998 			trigger_error(&switch_output_trigger);
999 			err = -1;
1000 			goto out_child;
1001 		}
1002 
1003 		if (auxtrace_record__snapshot_started) {
1004 			auxtrace_record__snapshot_started = 0;
1005 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1006 				record__read_auxtrace_snapshot(rec);
1007 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1008 				pr_err("AUX area tracing snapshot failed\n");
1009 				err = -1;
1010 				goto out_child;
1011 			}
1012 		}
1013 
1014 		if (trigger_is_hit(&switch_output_trigger)) {
1015 			/*
1016 			 * If switch_output_trigger is hit, the data in
1017 			 * overwritable ring buffer should have been collected,
1018 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1019 			 *
1020 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1021 			 * record__mmap_read_all() didn't collect data from
1022 			 * overwritable ring buffer. Read again.
1023 			 */
1024 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1025 				continue;
1026 			trigger_ready(&switch_output_trigger);
1027 
1028 			/*
1029 			 * Reenable events in overwrite ring buffer after
1030 			 * record__mmap_read_all(): we should have collected
1031 			 * data from it.
1032 			 */
1033 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1034 
1035 			if (!quiet)
1036 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1037 					waking);
1038 			waking = 0;
1039 			fd = record__switch_output(rec, false);
1040 			if (fd < 0) {
1041 				pr_err("Failed to switch to new file\n");
1042 				trigger_error(&switch_output_trigger);
1043 				err = fd;
1044 				goto out_child;
1045 			}
1046 		}
1047 
1048 		if (hits == rec->samples) {
1049 			if (done || draining)
1050 				break;
1051 			err = perf_evlist__poll(rec->evlist, -1);
1052 			/*
1053 			 * Propagate error, only if there's any. Ignore positive
1054 			 * number of returned events and interrupt error.
1055 			 */
1056 			if (err > 0 || (err < 0 && errno == EINTR))
1057 				err = 0;
1058 			waking++;
1059 
1060 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1061 				draining = true;
1062 		}
1063 
1064 		/*
1065 		 * When perf is starting the traced process, at the end events
1066 		 * die with the process and we wait for that. Thus no need to
1067 		 * disable events in this case.
1068 		 */
1069 		if (done && !disabled && !target__none(&opts->target)) {
1070 			trigger_off(&auxtrace_snapshot_trigger);
1071 			perf_evlist__disable(rec->evlist);
1072 			disabled = true;
1073 		}
1074 	}
1075 	trigger_off(&auxtrace_snapshot_trigger);
1076 	trigger_off(&switch_output_trigger);
1077 
1078 	if (forks && workload_exec_errno) {
1079 		char msg[STRERR_BUFSIZE];
1080 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1081 		pr_err("Workload failed: %s\n", emsg);
1082 		err = -1;
1083 		goto out_child;
1084 	}
1085 
1086 	if (!quiet)
1087 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1088 
1089 	if (target__none(&rec->opts.target))
1090 		record__synthesize_workload(rec, true);
1091 
1092 out_child:
1093 	if (forks) {
1094 		int exit_status;
1095 
1096 		if (!child_finished)
1097 			kill(rec->evlist->workload.pid, SIGTERM);
1098 
1099 		wait(&exit_status);
1100 
1101 		if (err < 0)
1102 			status = err;
1103 		else if (WIFEXITED(exit_status))
1104 			status = WEXITSTATUS(exit_status);
1105 		else if (WIFSIGNALED(exit_status))
1106 			signr = WTERMSIG(exit_status);
1107 	} else
1108 		status = err;
1109 
1110 	record__synthesize(rec, true);
1111 	/* this will be recalculated during process_buildids() */
1112 	rec->samples = 0;
1113 
1114 	if (!err) {
1115 		if (!rec->timestamp_filename) {
1116 			record__finish_output(rec);
1117 		} else {
1118 			fd = record__switch_output(rec, true);
1119 			if (fd < 0) {
1120 				status = fd;
1121 				goto out_delete_session;
1122 			}
1123 		}
1124 	}
1125 
1126 	perf_hooks__invoke_record_end();
1127 
1128 	if (!err && !quiet) {
1129 		char samples[128];
1130 		const char *postfix = rec->timestamp_filename ?
1131 					".<timestamp>" : "";
1132 
1133 		if (rec->samples && !rec->opts.full_auxtrace)
1134 			scnprintf(samples, sizeof(samples),
1135 				  " (%" PRIu64 " samples)", rec->samples);
1136 		else
1137 			samples[0] = '\0';
1138 
1139 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1140 			perf_data_file__size(file) / 1024.0 / 1024.0,
1141 			file->path, postfix, samples);
1142 	}
1143 
1144 out_delete_session:
1145 	perf_session__delete(session);
1146 	return status;
1147 }
1148 
1149 static void callchain_debug(struct callchain_param *callchain)
1150 {
1151 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1152 
1153 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1154 
1155 	if (callchain->record_mode == CALLCHAIN_DWARF)
1156 		pr_debug("callchain: stack dump size %d\n",
1157 			 callchain->dump_size);
1158 }
1159 
1160 int record_opts__parse_callchain(struct record_opts *record,
1161 				 struct callchain_param *callchain,
1162 				 const char *arg, bool unset)
1163 {
1164 	int ret;
1165 	callchain->enabled = !unset;
1166 
1167 	/* --no-call-graph */
1168 	if (unset) {
1169 		callchain->record_mode = CALLCHAIN_NONE;
1170 		pr_debug("callchain: disabled\n");
1171 		return 0;
1172 	}
1173 
1174 	ret = parse_callchain_record_opt(arg, callchain);
1175 	if (!ret) {
1176 		/* Enable data address sampling for DWARF unwind. */
1177 		if (callchain->record_mode == CALLCHAIN_DWARF)
1178 			record->sample_address = true;
1179 		callchain_debug(callchain);
1180 	}
1181 
1182 	return ret;
1183 }
1184 
1185 int record_parse_callchain_opt(const struct option *opt,
1186 			       const char *arg,
1187 			       int unset)
1188 {
1189 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1190 }
1191 
1192 int record_callchain_opt(const struct option *opt,
1193 			 const char *arg __maybe_unused,
1194 			 int unset __maybe_unused)
1195 {
1196 	struct callchain_param *callchain = opt->value;
1197 
1198 	callchain->enabled = true;
1199 
1200 	if (callchain->record_mode == CALLCHAIN_NONE)
1201 		callchain->record_mode = CALLCHAIN_FP;
1202 
1203 	callchain_debug(callchain);
1204 	return 0;
1205 }
1206 
1207 static int perf_record_config(const char *var, const char *value, void *cb)
1208 {
1209 	struct record *rec = cb;
1210 
1211 	if (!strcmp(var, "record.build-id")) {
1212 		if (!strcmp(value, "cache"))
1213 			rec->no_buildid_cache = false;
1214 		else if (!strcmp(value, "no-cache"))
1215 			rec->no_buildid_cache = true;
1216 		else if (!strcmp(value, "skip"))
1217 			rec->no_buildid = true;
1218 		else
1219 			return -1;
1220 		return 0;
1221 	}
1222 	if (!strcmp(var, "record.call-graph"))
1223 		var = "call-graph.record-mode"; /* fall-through */
1224 
1225 	return perf_default_config(var, value, cb);
1226 }
1227 
1228 struct clockid_map {
1229 	const char *name;
1230 	int clockid;
1231 };
1232 
1233 #define CLOCKID_MAP(n, c)	\
1234 	{ .name = n, .clockid = (c), }
1235 
1236 #define CLOCKID_END	{ .name = NULL, }
1237 
1238 
1239 /*
1240  * Add the missing ones, we need to build on many distros...
1241  */
1242 #ifndef CLOCK_MONOTONIC_RAW
1243 #define CLOCK_MONOTONIC_RAW 4
1244 #endif
1245 #ifndef CLOCK_BOOTTIME
1246 #define CLOCK_BOOTTIME 7
1247 #endif
1248 #ifndef CLOCK_TAI
1249 #define CLOCK_TAI 11
1250 #endif
1251 
1252 static const struct clockid_map clockids[] = {
1253 	/* available for all events, NMI safe */
1254 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1255 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1256 
1257 	/* available for some events */
1258 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1259 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1260 	CLOCKID_MAP("tai", CLOCK_TAI),
1261 
1262 	/* available for the lazy */
1263 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1264 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1265 	CLOCKID_MAP("real", CLOCK_REALTIME),
1266 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1267 
1268 	CLOCKID_END,
1269 };
1270 
1271 static int parse_clockid(const struct option *opt, const char *str, int unset)
1272 {
1273 	struct record_opts *opts = (struct record_opts *)opt->value;
1274 	const struct clockid_map *cm;
1275 	const char *ostr = str;
1276 
1277 	if (unset) {
1278 		opts->use_clockid = 0;
1279 		return 0;
1280 	}
1281 
1282 	/* no arg passed */
1283 	if (!str)
1284 		return 0;
1285 
1286 	/* no setting it twice */
1287 	if (opts->use_clockid)
1288 		return -1;
1289 
1290 	opts->use_clockid = true;
1291 
1292 	/* if its a number, we're done */
1293 	if (sscanf(str, "%d", &opts->clockid) == 1)
1294 		return 0;
1295 
1296 	/* allow a "CLOCK_" prefix to the name */
1297 	if (!strncasecmp(str, "CLOCK_", 6))
1298 		str += 6;
1299 
1300 	for (cm = clockids; cm->name; cm++) {
1301 		if (!strcasecmp(str, cm->name)) {
1302 			opts->clockid = cm->clockid;
1303 			return 0;
1304 		}
1305 	}
1306 
1307 	opts->use_clockid = false;
1308 	ui__warning("unknown clockid %s, check man page\n", ostr);
1309 	return -1;
1310 }
1311 
1312 static int record__parse_mmap_pages(const struct option *opt,
1313 				    const char *str,
1314 				    int unset __maybe_unused)
1315 {
1316 	struct record_opts *opts = opt->value;
1317 	char *s, *p;
1318 	unsigned int mmap_pages;
1319 	int ret;
1320 
1321 	if (!str)
1322 		return -EINVAL;
1323 
1324 	s = strdup(str);
1325 	if (!s)
1326 		return -ENOMEM;
1327 
1328 	p = strchr(s, ',');
1329 	if (p)
1330 		*p = '\0';
1331 
1332 	if (*s) {
1333 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1334 		if (ret)
1335 			goto out_free;
1336 		opts->mmap_pages = mmap_pages;
1337 	}
1338 
1339 	if (!p) {
1340 		ret = 0;
1341 		goto out_free;
1342 	}
1343 
1344 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1345 	if (ret)
1346 		goto out_free;
1347 
1348 	opts->auxtrace_mmap_pages = mmap_pages;
1349 
1350 out_free:
1351 	free(s);
1352 	return ret;
1353 }
1354 
1355 static const char * const __record_usage[] = {
1356 	"perf record [<options>] [<command>]",
1357 	"perf record [<options>] -- <command> [<options>]",
1358 	NULL
1359 };
1360 const char * const *record_usage = __record_usage;
1361 
1362 /*
1363  * XXX Ideally would be local to cmd_record() and passed to a record__new
1364  * because we need to have access to it in record__exit, that is called
1365  * after cmd_record() exits, but since record_options need to be accessible to
1366  * builtin-script, leave it here.
1367  *
1368  * At least we don't ouch it in all the other functions here directly.
1369  *
1370  * Just say no to tons of global variables, sigh.
1371  */
1372 static struct record record = {
1373 	.opts = {
1374 		.sample_time	     = true,
1375 		.mmap_pages	     = UINT_MAX,
1376 		.user_freq	     = UINT_MAX,
1377 		.user_interval	     = ULLONG_MAX,
1378 		.freq		     = 4000,
1379 		.target		     = {
1380 			.uses_mmap   = true,
1381 			.default_per_cpu = true,
1382 		},
1383 		.proc_map_timeout     = 500,
1384 	},
1385 	.tool = {
1386 		.sample		= process_sample_event,
1387 		.fork		= perf_event__process_fork,
1388 		.exit		= perf_event__process_exit,
1389 		.comm		= perf_event__process_comm,
1390 		.mmap		= perf_event__process_mmap,
1391 		.mmap2		= perf_event__process_mmap2,
1392 		.ordered_events	= true,
1393 	},
1394 };
1395 
1396 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1397 	"\n\t\t\t\tDefault: fp";
1398 
1399 static bool dry_run;
1400 
1401 /*
1402  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1403  * with it and switch to use the library functions in perf_evlist that came
1404  * from builtin-record.c, i.e. use record_opts,
1405  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1406  * using pipes, etc.
1407  */
1408 struct option __record_options[] = {
1409 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1410 		     "event selector. use 'perf list' to list available events",
1411 		     parse_events_option),
1412 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1413 		     "event filter", parse_filter),
1414 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1415 			   NULL, "don't record events from perf itself",
1416 			   exclude_perf),
1417 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1418 		    "record events on existing process id"),
1419 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1420 		    "record events on existing thread id"),
1421 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1422 		    "collect data with this RT SCHED_FIFO priority"),
1423 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1424 		    "collect data without buffering"),
1425 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1426 		    "collect raw sample records from all opened counters"),
1427 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1428 			    "system-wide collection from all CPUs"),
1429 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1430 		    "list of cpus to monitor"),
1431 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1432 	OPT_STRING('o', "output", &record.file.path, "file",
1433 		    "output file name"),
1434 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1435 			&record.opts.no_inherit_set,
1436 			"child tasks do not inherit counters"),
1437 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1438 		    "synthesize non-sample events at the end of output"),
1439 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1440 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1441 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1442 		     "number of mmap data pages and AUX area tracing mmap pages",
1443 		     record__parse_mmap_pages),
1444 	OPT_BOOLEAN(0, "group", &record.opts.group,
1445 		    "put the counters into a counter group"),
1446 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1447 			   NULL, "enables call-graph recording" ,
1448 			   &record_callchain_opt),
1449 	OPT_CALLBACK(0, "call-graph", &record.opts,
1450 		     "record_mode[,record_size]", record_callchain_help,
1451 		     &record_parse_callchain_opt),
1452 	OPT_INCR('v', "verbose", &verbose,
1453 		    "be more verbose (show counter open errors, etc)"),
1454 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1455 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1456 		    "per thread counts"),
1457 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1458 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1459 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1460 			&record.opts.sample_time_set,
1461 			"Record the sample timestamps"),
1462 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1463 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1464 		    "don't sample"),
1465 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1466 			&record.no_buildid_cache_set,
1467 			"do not update the buildid cache"),
1468 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1469 			&record.no_buildid_set,
1470 			"do not collect buildids in perf.data"),
1471 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1472 		     "monitor event in cgroup name only",
1473 		     parse_cgroups),
1474 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1475 		  "ms to wait before starting measurement after program start"),
1476 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1477 		   "user to profile"),
1478 
1479 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1480 		     "branch any", "sample any taken branches",
1481 		     parse_branch_stack),
1482 
1483 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1484 		     "branch filter mask", "branch stack filter modes",
1485 		     parse_branch_stack),
1486 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1487 		    "sample by weight (on special events only)"),
1488 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1489 		    "sample transaction flags (special events only)"),
1490 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1491 		    "use per-thread mmaps"),
1492 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1493 		    "sample selected machine registers on interrupt,"
1494 		    " use -I ? to list register names", parse_regs),
1495 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1496 		    "Record running/enabled time of read (:S) events"),
1497 	OPT_CALLBACK('k', "clockid", &record.opts,
1498 	"clockid", "clockid to use for events, see clock_gettime()",
1499 	parse_clockid),
1500 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1501 			  "opts", "AUX area tracing Snapshot Mode", ""),
1502 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1503 			"per thread proc mmap processing timeout in ms"),
1504 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1505 		    "Record context switch events"),
1506 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1507 			 "Configure all used events to run in kernel space.",
1508 			 PARSE_OPT_EXCLUSIVE),
1509 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1510 			 "Configure all used events to run in user space.",
1511 			 PARSE_OPT_EXCLUSIVE),
1512 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1513 		   "clang binary to use for compiling BPF scriptlets"),
1514 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1515 		   "options passed to clang when compiling BPF scriptlets"),
1516 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1517 		   "file", "vmlinux pathname"),
1518 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1519 		    "Record build-id of all DSOs regardless of hits"),
1520 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1521 		    "append timestamp to output filename"),
1522 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1523 		    "Switch output when receive SIGUSR2"),
1524 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1525 		    "Parse options then exit"),
1526 	OPT_END()
1527 };
1528 
1529 struct option *record_options = __record_options;
1530 
1531 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1532 {
1533 	int err;
1534 	struct record *rec = &record;
1535 	char errbuf[BUFSIZ];
1536 
1537 #ifndef HAVE_LIBBPF_SUPPORT
1538 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1539 	set_nobuild('\0', "clang-path", true);
1540 	set_nobuild('\0', "clang-opt", true);
1541 # undef set_nobuild
1542 #endif
1543 
1544 #ifndef HAVE_BPF_PROLOGUE
1545 # if !defined (HAVE_DWARF_SUPPORT)
1546 #  define REASON  "NO_DWARF=1"
1547 # elif !defined (HAVE_LIBBPF_SUPPORT)
1548 #  define REASON  "NO_LIBBPF=1"
1549 # else
1550 #  define REASON  "this architecture doesn't support BPF prologue"
1551 # endif
1552 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1553 	set_nobuild('\0', "vmlinux", true);
1554 # undef set_nobuild
1555 # undef REASON
1556 #endif
1557 
1558 	rec->evlist = perf_evlist__new();
1559 	if (rec->evlist == NULL)
1560 		return -ENOMEM;
1561 
1562 	perf_config(perf_record_config, rec);
1563 
1564 	argc = parse_options(argc, argv, record_options, record_usage,
1565 			    PARSE_OPT_STOP_AT_NON_OPTION);
1566 	if (!argc && target__none(&rec->opts.target))
1567 		usage_with_options(record_usage, record_options);
1568 
1569 	if (nr_cgroups && !rec->opts.target.system_wide) {
1570 		usage_with_options_msg(record_usage, record_options,
1571 			"cgroup monitoring only available in system-wide mode");
1572 
1573 	}
1574 	if (rec->opts.record_switch_events &&
1575 	    !perf_can_record_switch_events()) {
1576 		ui__error("kernel does not support recording context switch events\n");
1577 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1578 		return -EINVAL;
1579 	}
1580 
1581 	if (rec->switch_output)
1582 		rec->timestamp_filename = true;
1583 
1584 	if (!rec->itr) {
1585 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1586 		if (err)
1587 			goto out;
1588 	}
1589 
1590 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1591 					      rec->opts.auxtrace_snapshot_opts);
1592 	if (err)
1593 		goto out;
1594 
1595 	/*
1596 	 * Allow aliases to facilitate the lookup of symbols for address
1597 	 * filters. Refer to auxtrace_parse_filters().
1598 	 */
1599 	symbol_conf.allow_aliases = true;
1600 
1601 	symbol__init(NULL);
1602 
1603 	err = auxtrace_parse_filters(rec->evlist);
1604 	if (err)
1605 		goto out;
1606 
1607 	if (dry_run)
1608 		goto out;
1609 
1610 	err = bpf__setup_stdout(rec->evlist);
1611 	if (err) {
1612 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1613 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1614 			 errbuf);
1615 		goto out;
1616 	}
1617 
1618 	err = -ENOMEM;
1619 
1620 	if (symbol_conf.kptr_restrict)
1621 		pr_warning(
1622 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1623 "check /proc/sys/kernel/kptr_restrict.\n\n"
1624 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1625 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1626 "Samples in kernel modules won't be resolved at all.\n\n"
1627 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1628 "even with a suitable vmlinux or kallsyms file.\n\n");
1629 
1630 	if (rec->no_buildid_cache || rec->no_buildid) {
1631 		disable_buildid_cache();
1632 	} else if (rec->switch_output) {
1633 		/*
1634 		 * In 'perf record --switch-output', disable buildid
1635 		 * generation by default to reduce data file switching
1636 		 * overhead. Still generate buildid if they are required
1637 		 * explicitly using
1638 		 *
1639 		 *  perf record --signal-trigger --no-no-buildid \
1640 		 *              --no-no-buildid-cache
1641 		 *
1642 		 * Following code equals to:
1643 		 *
1644 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1645 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1646 		 *         disable_buildid_cache();
1647 		 */
1648 		bool disable = true;
1649 
1650 		if (rec->no_buildid_set && !rec->no_buildid)
1651 			disable = false;
1652 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1653 			disable = false;
1654 		if (disable) {
1655 			rec->no_buildid = true;
1656 			rec->no_buildid_cache = true;
1657 			disable_buildid_cache();
1658 		}
1659 	}
1660 
1661 	if (record.opts.overwrite)
1662 		record.opts.tail_synthesize = true;
1663 
1664 	if (rec->evlist->nr_entries == 0 &&
1665 	    perf_evlist__add_default(rec->evlist) < 0) {
1666 		pr_err("Not enough memory for event selector list\n");
1667 		goto out;
1668 	}
1669 
1670 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1671 		rec->opts.no_inherit = true;
1672 
1673 	err = target__validate(&rec->opts.target);
1674 	if (err) {
1675 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1676 		ui__warning("%s", errbuf);
1677 	}
1678 
1679 	err = target__parse_uid(&rec->opts.target);
1680 	if (err) {
1681 		int saved_errno = errno;
1682 
1683 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1684 		ui__error("%s", errbuf);
1685 
1686 		err = -saved_errno;
1687 		goto out;
1688 	}
1689 
1690 	/* Enable ignoring missing threads when -u option is defined. */
1691 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1692 
1693 	err = -ENOMEM;
1694 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1695 		usage_with_options(record_usage, record_options);
1696 
1697 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1698 	if (err)
1699 		goto out;
1700 
1701 	/*
1702 	 * We take all buildids when the file contains
1703 	 * AUX area tracing data because we do not decode the
1704 	 * trace because it would take too long.
1705 	 */
1706 	if (rec->opts.full_auxtrace)
1707 		rec->buildid_all = true;
1708 
1709 	if (record_opts__config(&rec->opts)) {
1710 		err = -EINVAL;
1711 		goto out;
1712 	}
1713 
1714 	err = __cmd_record(&record, argc, argv);
1715 out:
1716 	perf_evlist__delete(rec->evlist);
1717 	symbol__exit();
1718 	auxtrace_record__free(rec->itr);
1719 	return err;
1720 }
1721 
1722 static void snapshot_sig_handler(int sig __maybe_unused)
1723 {
1724 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1725 		trigger_hit(&auxtrace_snapshot_trigger);
1726 		auxtrace_record__snapshot_started = 1;
1727 		if (auxtrace_record__snapshot_start(record.itr))
1728 			trigger_error(&auxtrace_snapshot_trigger);
1729 	}
1730 
1731 	if (trigger_is_ready(&switch_output_trigger))
1732 		trigger_hit(&switch_output_trigger);
1733 }
1734