xref: /openbmc/linux/tools/perf/builtin-record.c (revision 680ef72a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <poll.h>
49 #include <unistd.h>
50 #include <sched.h>
51 #include <signal.h>
52 #include <sys/mman.h>
53 #include <sys/wait.h>
54 #include <asm/bug.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data	data;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	const char		*progname;
75 	int			realtime_prio;
76 	bool			no_buildid;
77 	bool			no_buildid_set;
78 	bool			no_buildid_cache;
79 	bool			no_buildid_cache_set;
80 	bool			buildid_all;
81 	bool			timestamp_filename;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
109 static int record__write(struct record *rec, void *bf, size_t size)
110 {
111 	if (perf_data__write(rec->session->data, bf, size) < 0) {
112 		pr_err("failed to write perf data, error: %m\n");
113 		return -1;
114 	}
115 
116 	rec->bytes_written += size;
117 
118 	if (switch_output_size(rec))
119 		trigger_hit(&switch_output_trigger);
120 
121 	return 0;
122 }
123 
124 static int process_synthesized_event(struct perf_tool *tool,
125 				     union perf_event *event,
126 				     struct perf_sample *sample __maybe_unused,
127 				     struct machine *machine __maybe_unused)
128 {
129 	struct record *rec = container_of(tool, struct record, tool);
130 	return record__write(rec, event, event->header.size);
131 }
132 
133 static int record__pushfn(void *to, void *bf, size_t size)
134 {
135 	struct record *rec = to;
136 
137 	rec->samples++;
138 	return record__write(rec, bf, size);
139 }
140 
141 static volatile int done;
142 static volatile int signr = -1;
143 static volatile int child_finished;
144 
145 static void sig_handler(int sig)
146 {
147 	if (sig == SIGCHLD)
148 		child_finished = 1;
149 	else
150 		signr = sig;
151 
152 	done = 1;
153 }
154 
155 static void sigsegv_handler(int sig)
156 {
157 	perf_hooks__recover();
158 	sighandler_dump_stack(sig);
159 }
160 
161 static void record__sig_exit(void)
162 {
163 	if (signr == -1)
164 		return;
165 
166 	signal(signr, SIG_DFL);
167 	raise(signr);
168 }
169 
170 #ifdef HAVE_AUXTRACE_SUPPORT
171 
172 static int record__process_auxtrace(struct perf_tool *tool,
173 				    union perf_event *event, void *data1,
174 				    size_t len1, void *data2, size_t len2)
175 {
176 	struct record *rec = container_of(tool, struct record, tool);
177 	struct perf_data *data = &rec->data;
178 	size_t padding;
179 	u8 pad[8] = {0};
180 
181 	if (!perf_data__is_pipe(data)) {
182 		off_t file_offset;
183 		int fd = perf_data__fd(data);
184 		int err;
185 
186 		file_offset = lseek(fd, 0, SEEK_CUR);
187 		if (file_offset == -1)
188 			return -1;
189 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
190 						     event, file_offset);
191 		if (err)
192 			return err;
193 	}
194 
195 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
196 	padding = (len1 + len2) & 7;
197 	if (padding)
198 		padding = 8 - padding;
199 
200 	record__write(rec, event, event->header.size);
201 	record__write(rec, data1, len1);
202 	if (len2)
203 		record__write(rec, data2, len2);
204 	record__write(rec, &pad, padding);
205 
206 	return 0;
207 }
208 
209 static int record__auxtrace_mmap_read(struct record *rec,
210 				      struct auxtrace_mmap *mm)
211 {
212 	int ret;
213 
214 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
215 				  record__process_auxtrace);
216 	if (ret < 0)
217 		return ret;
218 
219 	if (ret)
220 		rec->samples++;
221 
222 	return 0;
223 }
224 
225 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
226 					       struct auxtrace_mmap *mm)
227 {
228 	int ret;
229 
230 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
231 					   record__process_auxtrace,
232 					   rec->opts.auxtrace_snapshot_size);
233 	if (ret < 0)
234 		return ret;
235 
236 	if (ret)
237 		rec->samples++;
238 
239 	return 0;
240 }
241 
242 static int record__auxtrace_read_snapshot_all(struct record *rec)
243 {
244 	int i;
245 	int rc = 0;
246 
247 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
248 		struct auxtrace_mmap *mm =
249 				&rec->evlist->mmap[i].auxtrace_mmap;
250 
251 		if (!mm->base)
252 			continue;
253 
254 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
255 			rc = -1;
256 			goto out;
257 		}
258 	}
259 out:
260 	return rc;
261 }
262 
263 static void record__read_auxtrace_snapshot(struct record *rec)
264 {
265 	pr_debug("Recording AUX area tracing snapshot\n");
266 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
267 		trigger_error(&auxtrace_snapshot_trigger);
268 	} else {
269 		if (auxtrace_record__snapshot_finish(rec->itr))
270 			trigger_error(&auxtrace_snapshot_trigger);
271 		else
272 			trigger_ready(&auxtrace_snapshot_trigger);
273 	}
274 }
275 
276 #else
277 
278 static inline
279 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
280 			       struct auxtrace_mmap *mm __maybe_unused)
281 {
282 	return 0;
283 }
284 
285 static inline
286 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
287 {
288 }
289 
290 static inline
291 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
292 {
293 	return 0;
294 }
295 
296 #endif
297 
298 static int record__mmap_evlist(struct record *rec,
299 			       struct perf_evlist *evlist)
300 {
301 	struct record_opts *opts = &rec->opts;
302 	char msg[512];
303 
304 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
305 				 opts->auxtrace_mmap_pages,
306 				 opts->auxtrace_snapshot_mode) < 0) {
307 		if (errno == EPERM) {
308 			pr_err("Permission error mapping pages.\n"
309 			       "Consider increasing "
310 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
311 			       "or try again with a smaller value of -m/--mmap_pages.\n"
312 			       "(current value: %u,%u)\n",
313 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
314 			return -errno;
315 		} else {
316 			pr_err("failed to mmap with %d (%s)\n", errno,
317 				str_error_r(errno, msg, sizeof(msg)));
318 			if (errno)
319 				return -errno;
320 			else
321 				return -EINVAL;
322 		}
323 	}
324 	return 0;
325 }
326 
327 static int record__mmap(struct record *rec)
328 {
329 	return record__mmap_evlist(rec, rec->evlist);
330 }
331 
332 static int record__open(struct record *rec)
333 {
334 	char msg[BUFSIZ];
335 	struct perf_evsel *pos;
336 	struct perf_evlist *evlist = rec->evlist;
337 	struct perf_session *session = rec->session;
338 	struct record_opts *opts = &rec->opts;
339 	struct perf_evsel_config_term *err_term;
340 	int rc = 0;
341 
342 	perf_evlist__config(evlist, opts, &callchain_param);
343 
344 	evlist__for_each_entry(evlist, pos) {
345 try_again:
346 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
347 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
348 				if (verbose > 0)
349 					ui__warning("%s\n", msg);
350 				goto try_again;
351 			}
352 
353 			rc = -errno;
354 			perf_evsel__open_strerror(pos, &opts->target,
355 						  errno, msg, sizeof(msg));
356 			ui__error("%s\n", msg);
357 			goto out;
358 		}
359 	}
360 
361 	if (perf_evlist__apply_filters(evlist, &pos)) {
362 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
363 			pos->filter, perf_evsel__name(pos), errno,
364 			str_error_r(errno, msg, sizeof(msg)));
365 		rc = -1;
366 		goto out;
367 	}
368 
369 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
370 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
371 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
372 		      str_error_r(errno, msg, sizeof(msg)));
373 		rc = -1;
374 		goto out;
375 	}
376 
377 	rc = record__mmap(rec);
378 	if (rc)
379 		goto out;
380 
381 	session->evlist = evlist;
382 	perf_session__set_id_hdr_size(session);
383 out:
384 	return rc;
385 }
386 
387 static int process_sample_event(struct perf_tool *tool,
388 				union perf_event *event,
389 				struct perf_sample *sample,
390 				struct perf_evsel *evsel,
391 				struct machine *machine)
392 {
393 	struct record *rec = container_of(tool, struct record, tool);
394 
395 	rec->samples++;
396 
397 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
398 }
399 
400 static int process_buildids(struct record *rec)
401 {
402 	struct perf_data *data = &rec->data;
403 	struct perf_session *session = rec->session;
404 
405 	if (data->size == 0)
406 		return 0;
407 
408 	/*
409 	 * During this process, it'll load kernel map and replace the
410 	 * dso->long_name to a real pathname it found.  In this case
411 	 * we prefer the vmlinux path like
412 	 *   /lib/modules/3.16.4/build/vmlinux
413 	 *
414 	 * rather than build-id path (in debug directory).
415 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
416 	 */
417 	symbol_conf.ignore_vmlinux_buildid = true;
418 
419 	/*
420 	 * If --buildid-all is given, it marks all DSO regardless of hits,
421 	 * so no need to process samples.
422 	 */
423 	if (rec->buildid_all)
424 		rec->tool.sample = NULL;
425 
426 	return perf_session__process_events(session);
427 }
428 
429 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
430 {
431 	int err;
432 	struct perf_tool *tool = data;
433 	/*
434 	 *As for guest kernel when processing subcommand record&report,
435 	 *we arrange module mmap prior to guest kernel mmap and trigger
436 	 *a preload dso because default guest module symbols are loaded
437 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
438 	 *method is used to avoid symbol missing when the first addr is
439 	 *in module instead of in guest kernel.
440 	 */
441 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
442 					     machine);
443 	if (err < 0)
444 		pr_err("Couldn't record guest kernel [%d]'s reference"
445 		       " relocation symbol.\n", machine->pid);
446 
447 	/*
448 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
449 	 * have no _text sometimes.
450 	 */
451 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
452 						 machine);
453 	if (err < 0)
454 		pr_err("Couldn't record guest kernel [%d]'s reference"
455 		       " relocation symbol.\n", machine->pid);
456 }
457 
458 static struct perf_event_header finished_round_event = {
459 	.size = sizeof(struct perf_event_header),
460 	.type = PERF_RECORD_FINISHED_ROUND,
461 };
462 
463 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
464 				    bool backward)
465 {
466 	u64 bytes_written = rec->bytes_written;
467 	int i;
468 	int rc = 0;
469 	struct perf_mmap *maps;
470 
471 	if (!evlist)
472 		return 0;
473 
474 	maps = backward ? evlist->backward_mmap : evlist->mmap;
475 	if (!maps)
476 		return 0;
477 
478 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
479 		return 0;
480 
481 	for (i = 0; i < evlist->nr_mmaps; i++) {
482 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
483 
484 		if (maps[i].base) {
485 			if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) {
486 				rc = -1;
487 				goto out;
488 			}
489 		}
490 
491 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
492 		    record__auxtrace_mmap_read(rec, mm) != 0) {
493 			rc = -1;
494 			goto out;
495 		}
496 	}
497 
498 	/*
499 	 * Mark the round finished in case we wrote
500 	 * at least one event.
501 	 */
502 	if (bytes_written != rec->bytes_written)
503 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
504 
505 	if (backward)
506 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
507 out:
508 	return rc;
509 }
510 
511 static int record__mmap_read_all(struct record *rec)
512 {
513 	int err;
514 
515 	err = record__mmap_read_evlist(rec, rec->evlist, false);
516 	if (err)
517 		return err;
518 
519 	return record__mmap_read_evlist(rec, rec->evlist, true);
520 }
521 
522 static void record__init_features(struct record *rec)
523 {
524 	struct perf_session *session = rec->session;
525 	int feat;
526 
527 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
528 		perf_header__set_feat(&session->header, feat);
529 
530 	if (rec->no_buildid)
531 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
532 
533 	if (!have_tracepoints(&rec->evlist->entries))
534 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
535 
536 	if (!rec->opts.branch_stack)
537 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
538 
539 	if (!rec->opts.full_auxtrace)
540 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
541 
542 	perf_header__clear_feat(&session->header, HEADER_STAT);
543 }
544 
545 static void
546 record__finish_output(struct record *rec)
547 {
548 	struct perf_data *data = &rec->data;
549 	int fd = perf_data__fd(data);
550 
551 	if (data->is_pipe)
552 		return;
553 
554 	rec->session->header.data_size += rec->bytes_written;
555 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
556 
557 	if (!rec->no_buildid) {
558 		process_buildids(rec);
559 
560 		if (rec->buildid_all)
561 			dsos__hit_all(rec->session);
562 	}
563 	perf_session__write_header(rec->session, rec->evlist, fd, true);
564 
565 	return;
566 }
567 
568 static int record__synthesize_workload(struct record *rec, bool tail)
569 {
570 	int err;
571 	struct thread_map *thread_map;
572 
573 	if (rec->opts.tail_synthesize != tail)
574 		return 0;
575 
576 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
577 	if (thread_map == NULL)
578 		return -1;
579 
580 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
581 						 process_synthesized_event,
582 						 &rec->session->machines.host,
583 						 rec->opts.sample_address,
584 						 rec->opts.proc_map_timeout);
585 	thread_map__put(thread_map);
586 	return err;
587 }
588 
589 static int record__synthesize(struct record *rec, bool tail);
590 
591 static int
592 record__switch_output(struct record *rec, bool at_exit)
593 {
594 	struct perf_data *data = &rec->data;
595 	int fd, err;
596 
597 	/* Same Size:      "2015122520103046"*/
598 	char timestamp[] = "InvalidTimestamp";
599 
600 	record__synthesize(rec, true);
601 	if (target__none(&rec->opts.target))
602 		record__synthesize_workload(rec, true);
603 
604 	rec->samples = 0;
605 	record__finish_output(rec);
606 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
607 	if (err) {
608 		pr_err("Failed to get current timestamp\n");
609 		return -EINVAL;
610 	}
611 
612 	fd = perf_data__switch(data, timestamp,
613 				    rec->session->header.data_offset,
614 				    at_exit);
615 	if (fd >= 0 && !at_exit) {
616 		rec->bytes_written = 0;
617 		rec->session->header.data_size = 0;
618 	}
619 
620 	if (!quiet)
621 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
622 			data->file.path, timestamp);
623 
624 	/* Output tracking events */
625 	if (!at_exit) {
626 		record__synthesize(rec, false);
627 
628 		/*
629 		 * In 'perf record --switch-output' without -a,
630 		 * record__synthesize() in record__switch_output() won't
631 		 * generate tracking events because there's no thread_map
632 		 * in evlist. Which causes newly created perf.data doesn't
633 		 * contain map and comm information.
634 		 * Create a fake thread_map and directly call
635 		 * perf_event__synthesize_thread_map() for those events.
636 		 */
637 		if (target__none(&rec->opts.target))
638 			record__synthesize_workload(rec, false);
639 	}
640 	return fd;
641 }
642 
643 static volatile int workload_exec_errno;
644 
645 /*
646  * perf_evlist__prepare_workload will send a SIGUSR1
647  * if the fork fails, since we asked by setting its
648  * want_signal to true.
649  */
650 static void workload_exec_failed_signal(int signo __maybe_unused,
651 					siginfo_t *info,
652 					void *ucontext __maybe_unused)
653 {
654 	workload_exec_errno = info->si_value.sival_int;
655 	done = 1;
656 	child_finished = 1;
657 }
658 
659 static void snapshot_sig_handler(int sig);
660 static void alarm_sig_handler(int sig);
661 
662 int __weak
663 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
664 			    struct perf_tool *tool __maybe_unused,
665 			    perf_event__handler_t process __maybe_unused,
666 			    struct machine *machine __maybe_unused)
667 {
668 	return 0;
669 }
670 
671 static const struct perf_event_mmap_page *
672 perf_evlist__pick_pc(struct perf_evlist *evlist)
673 {
674 	if (evlist) {
675 		if (evlist->mmap && evlist->mmap[0].base)
676 			return evlist->mmap[0].base;
677 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
678 			return evlist->backward_mmap[0].base;
679 	}
680 	return NULL;
681 }
682 
683 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
684 {
685 	const struct perf_event_mmap_page *pc;
686 
687 	pc = perf_evlist__pick_pc(rec->evlist);
688 	if (pc)
689 		return pc;
690 	return NULL;
691 }
692 
693 static int record__synthesize(struct record *rec, bool tail)
694 {
695 	struct perf_session *session = rec->session;
696 	struct machine *machine = &session->machines.host;
697 	struct perf_data *data = &rec->data;
698 	struct record_opts *opts = &rec->opts;
699 	struct perf_tool *tool = &rec->tool;
700 	int fd = perf_data__fd(data);
701 	int err = 0;
702 
703 	if (rec->opts.tail_synthesize != tail)
704 		return 0;
705 
706 	if (data->is_pipe) {
707 		err = perf_event__synthesize_features(
708 			tool, session, rec->evlist, process_synthesized_event);
709 		if (err < 0) {
710 			pr_err("Couldn't synthesize features.\n");
711 			return err;
712 		}
713 
714 		err = perf_event__synthesize_attrs(tool, session,
715 						   process_synthesized_event);
716 		if (err < 0) {
717 			pr_err("Couldn't synthesize attrs.\n");
718 			goto out;
719 		}
720 
721 		if (have_tracepoints(&rec->evlist->entries)) {
722 			/*
723 			 * FIXME err <= 0 here actually means that
724 			 * there were no tracepoints so its not really
725 			 * an error, just that we don't need to
726 			 * synthesize anything.  We really have to
727 			 * return this more properly and also
728 			 * propagate errors that now are calling die()
729 			 */
730 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
731 								  process_synthesized_event);
732 			if (err <= 0) {
733 				pr_err("Couldn't record tracing data.\n");
734 				goto out;
735 			}
736 			rec->bytes_written += err;
737 		}
738 	}
739 
740 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
741 					  process_synthesized_event, machine);
742 	if (err)
743 		goto out;
744 
745 	if (rec->opts.full_auxtrace) {
746 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
747 					session, process_synthesized_event);
748 		if (err)
749 			goto out;
750 	}
751 
752 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
753 						 machine);
754 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
755 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
756 			   "Check /proc/kallsyms permission or run as root.\n");
757 
758 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
759 					     machine);
760 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
761 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
762 			   "Check /proc/modules permission or run as root.\n");
763 
764 	if (perf_guest) {
765 		machines__process_guests(&session->machines,
766 					 perf_event__synthesize_guest_os, tool);
767 	}
768 
769 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
770 					    process_synthesized_event, opts->sample_address,
771 					    opts->proc_map_timeout, 1);
772 out:
773 	return err;
774 }
775 
776 static int __cmd_record(struct record *rec, int argc, const char **argv)
777 {
778 	int err;
779 	int status = 0;
780 	unsigned long waking = 0;
781 	const bool forks = argc > 0;
782 	struct machine *machine;
783 	struct perf_tool *tool = &rec->tool;
784 	struct record_opts *opts = &rec->opts;
785 	struct perf_data *data = &rec->data;
786 	struct perf_session *session;
787 	bool disabled = false, draining = false;
788 	int fd;
789 
790 	rec->progname = argv[0];
791 
792 	atexit(record__sig_exit);
793 	signal(SIGCHLD, sig_handler);
794 	signal(SIGINT, sig_handler);
795 	signal(SIGTERM, sig_handler);
796 	signal(SIGSEGV, sigsegv_handler);
797 
798 	if (rec->opts.record_namespaces)
799 		tool->namespace_events = true;
800 
801 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
802 		signal(SIGUSR2, snapshot_sig_handler);
803 		if (rec->opts.auxtrace_snapshot_mode)
804 			trigger_on(&auxtrace_snapshot_trigger);
805 		if (rec->switch_output.enabled)
806 			trigger_on(&switch_output_trigger);
807 	} else {
808 		signal(SIGUSR2, SIG_IGN);
809 	}
810 
811 	session = perf_session__new(data, false, tool);
812 	if (session == NULL) {
813 		pr_err("Perf session creation failed.\n");
814 		return -1;
815 	}
816 
817 	fd = perf_data__fd(data);
818 	rec->session = session;
819 
820 	record__init_features(rec);
821 
822 	if (forks) {
823 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
824 						    argv, data->is_pipe,
825 						    workload_exec_failed_signal);
826 		if (err < 0) {
827 			pr_err("Couldn't run the workload!\n");
828 			status = err;
829 			goto out_delete_session;
830 		}
831 	}
832 
833 	if (record__open(rec) != 0) {
834 		err = -1;
835 		goto out_child;
836 	}
837 
838 	err = bpf__apply_obj_config();
839 	if (err) {
840 		char errbuf[BUFSIZ];
841 
842 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
843 		pr_err("ERROR: Apply config to BPF failed: %s\n",
844 			 errbuf);
845 		goto out_child;
846 	}
847 
848 	/*
849 	 * Normally perf_session__new would do this, but it doesn't have the
850 	 * evlist.
851 	 */
852 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
853 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
854 		rec->tool.ordered_events = false;
855 	}
856 
857 	if (!rec->evlist->nr_groups)
858 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
859 
860 	if (data->is_pipe) {
861 		err = perf_header__write_pipe(fd);
862 		if (err < 0)
863 			goto out_child;
864 	} else {
865 		err = perf_session__write_header(session, rec->evlist, fd, false);
866 		if (err < 0)
867 			goto out_child;
868 	}
869 
870 	if (!rec->no_buildid
871 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
872 		pr_err("Couldn't generate buildids. "
873 		       "Use --no-buildid to profile anyway.\n");
874 		err = -1;
875 		goto out_child;
876 	}
877 
878 	machine = &session->machines.host;
879 
880 	err = record__synthesize(rec, false);
881 	if (err < 0)
882 		goto out_child;
883 
884 	if (rec->realtime_prio) {
885 		struct sched_param param;
886 
887 		param.sched_priority = rec->realtime_prio;
888 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
889 			pr_err("Could not set realtime priority.\n");
890 			err = -1;
891 			goto out_child;
892 		}
893 	}
894 
895 	/*
896 	 * When perf is starting the traced process, all the events
897 	 * (apart from group members) have enable_on_exec=1 set,
898 	 * so don't spoil it by prematurely enabling them.
899 	 */
900 	if (!target__none(&opts->target) && !opts->initial_delay)
901 		perf_evlist__enable(rec->evlist);
902 
903 	/*
904 	 * Let the child rip
905 	 */
906 	if (forks) {
907 		union perf_event *event;
908 		pid_t tgid;
909 
910 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
911 		if (event == NULL) {
912 			err = -ENOMEM;
913 			goto out_child;
914 		}
915 
916 		/*
917 		 * Some H/W events are generated before COMM event
918 		 * which is emitted during exec(), so perf script
919 		 * cannot see a correct process name for those events.
920 		 * Synthesize COMM event to prevent it.
921 		 */
922 		tgid = perf_event__synthesize_comm(tool, event,
923 						   rec->evlist->workload.pid,
924 						   process_synthesized_event,
925 						   machine);
926 		free(event);
927 
928 		if (tgid == -1)
929 			goto out_child;
930 
931 		event = malloc(sizeof(event->namespaces) +
932 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
933 			       machine->id_hdr_size);
934 		if (event == NULL) {
935 			err = -ENOMEM;
936 			goto out_child;
937 		}
938 
939 		/*
940 		 * Synthesize NAMESPACES event for the command specified.
941 		 */
942 		perf_event__synthesize_namespaces(tool, event,
943 						  rec->evlist->workload.pid,
944 						  tgid, process_synthesized_event,
945 						  machine);
946 		free(event);
947 
948 		perf_evlist__start_workload(rec->evlist);
949 	}
950 
951 	if (opts->initial_delay) {
952 		usleep(opts->initial_delay * USEC_PER_MSEC);
953 		perf_evlist__enable(rec->evlist);
954 	}
955 
956 	trigger_ready(&auxtrace_snapshot_trigger);
957 	trigger_ready(&switch_output_trigger);
958 	perf_hooks__invoke_record_start();
959 	for (;;) {
960 		unsigned long long hits = rec->samples;
961 
962 		/*
963 		 * rec->evlist->bkw_mmap_state is possible to be
964 		 * BKW_MMAP_EMPTY here: when done == true and
965 		 * hits != rec->samples in previous round.
966 		 *
967 		 * perf_evlist__toggle_bkw_mmap ensure we never
968 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
969 		 */
970 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
971 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
972 
973 		if (record__mmap_read_all(rec) < 0) {
974 			trigger_error(&auxtrace_snapshot_trigger);
975 			trigger_error(&switch_output_trigger);
976 			err = -1;
977 			goto out_child;
978 		}
979 
980 		if (auxtrace_record__snapshot_started) {
981 			auxtrace_record__snapshot_started = 0;
982 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
983 				record__read_auxtrace_snapshot(rec);
984 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
985 				pr_err("AUX area tracing snapshot failed\n");
986 				err = -1;
987 				goto out_child;
988 			}
989 		}
990 
991 		if (trigger_is_hit(&switch_output_trigger)) {
992 			/*
993 			 * If switch_output_trigger is hit, the data in
994 			 * overwritable ring buffer should have been collected,
995 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
996 			 *
997 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
998 			 * record__mmap_read_all() didn't collect data from
999 			 * overwritable ring buffer. Read again.
1000 			 */
1001 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1002 				continue;
1003 			trigger_ready(&switch_output_trigger);
1004 
1005 			/*
1006 			 * Reenable events in overwrite ring buffer after
1007 			 * record__mmap_read_all(): we should have collected
1008 			 * data from it.
1009 			 */
1010 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1011 
1012 			if (!quiet)
1013 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1014 					waking);
1015 			waking = 0;
1016 			fd = record__switch_output(rec, false);
1017 			if (fd < 0) {
1018 				pr_err("Failed to switch to new file\n");
1019 				trigger_error(&switch_output_trigger);
1020 				err = fd;
1021 				goto out_child;
1022 			}
1023 
1024 			/* re-arm the alarm */
1025 			if (rec->switch_output.time)
1026 				alarm(rec->switch_output.time);
1027 		}
1028 
1029 		if (hits == rec->samples) {
1030 			if (done || draining)
1031 				break;
1032 			err = perf_evlist__poll(rec->evlist, -1);
1033 			/*
1034 			 * Propagate error, only if there's any. Ignore positive
1035 			 * number of returned events and interrupt error.
1036 			 */
1037 			if (err > 0 || (err < 0 && errno == EINTR))
1038 				err = 0;
1039 			waking++;
1040 
1041 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1042 				draining = true;
1043 		}
1044 
1045 		/*
1046 		 * When perf is starting the traced process, at the end events
1047 		 * die with the process and we wait for that. Thus no need to
1048 		 * disable events in this case.
1049 		 */
1050 		if (done && !disabled && !target__none(&opts->target)) {
1051 			trigger_off(&auxtrace_snapshot_trigger);
1052 			perf_evlist__disable(rec->evlist);
1053 			disabled = true;
1054 		}
1055 	}
1056 	trigger_off(&auxtrace_snapshot_trigger);
1057 	trigger_off(&switch_output_trigger);
1058 
1059 	if (forks && workload_exec_errno) {
1060 		char msg[STRERR_BUFSIZE];
1061 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1062 		pr_err("Workload failed: %s\n", emsg);
1063 		err = -1;
1064 		goto out_child;
1065 	}
1066 
1067 	if (!quiet)
1068 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1069 
1070 	if (target__none(&rec->opts.target))
1071 		record__synthesize_workload(rec, true);
1072 
1073 out_child:
1074 	if (forks) {
1075 		int exit_status;
1076 
1077 		if (!child_finished)
1078 			kill(rec->evlist->workload.pid, SIGTERM);
1079 
1080 		wait(&exit_status);
1081 
1082 		if (err < 0)
1083 			status = err;
1084 		else if (WIFEXITED(exit_status))
1085 			status = WEXITSTATUS(exit_status);
1086 		else if (WIFSIGNALED(exit_status))
1087 			signr = WTERMSIG(exit_status);
1088 	} else
1089 		status = err;
1090 
1091 	record__synthesize(rec, true);
1092 	/* this will be recalculated during process_buildids() */
1093 	rec->samples = 0;
1094 
1095 	if (!err) {
1096 		if (!rec->timestamp_filename) {
1097 			record__finish_output(rec);
1098 		} else {
1099 			fd = record__switch_output(rec, true);
1100 			if (fd < 0) {
1101 				status = fd;
1102 				goto out_delete_session;
1103 			}
1104 		}
1105 	}
1106 
1107 	perf_hooks__invoke_record_end();
1108 
1109 	if (!err && !quiet) {
1110 		char samples[128];
1111 		const char *postfix = rec->timestamp_filename ?
1112 					".<timestamp>" : "";
1113 
1114 		if (rec->samples && !rec->opts.full_auxtrace)
1115 			scnprintf(samples, sizeof(samples),
1116 				  " (%" PRIu64 " samples)", rec->samples);
1117 		else
1118 			samples[0] = '\0';
1119 
1120 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1121 			perf_data__size(data) / 1024.0 / 1024.0,
1122 			data->file.path, postfix, samples);
1123 	}
1124 
1125 out_delete_session:
1126 	perf_session__delete(session);
1127 	return status;
1128 }
1129 
1130 static void callchain_debug(struct callchain_param *callchain)
1131 {
1132 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1133 
1134 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1135 
1136 	if (callchain->record_mode == CALLCHAIN_DWARF)
1137 		pr_debug("callchain: stack dump size %d\n",
1138 			 callchain->dump_size);
1139 }
1140 
1141 int record_opts__parse_callchain(struct record_opts *record,
1142 				 struct callchain_param *callchain,
1143 				 const char *arg, bool unset)
1144 {
1145 	int ret;
1146 	callchain->enabled = !unset;
1147 
1148 	/* --no-call-graph */
1149 	if (unset) {
1150 		callchain->record_mode = CALLCHAIN_NONE;
1151 		pr_debug("callchain: disabled\n");
1152 		return 0;
1153 	}
1154 
1155 	ret = parse_callchain_record_opt(arg, callchain);
1156 	if (!ret) {
1157 		/* Enable data address sampling for DWARF unwind. */
1158 		if (callchain->record_mode == CALLCHAIN_DWARF)
1159 			record->sample_address = true;
1160 		callchain_debug(callchain);
1161 	}
1162 
1163 	return ret;
1164 }
1165 
1166 int record_parse_callchain_opt(const struct option *opt,
1167 			       const char *arg,
1168 			       int unset)
1169 {
1170 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1171 }
1172 
1173 int record_callchain_opt(const struct option *opt,
1174 			 const char *arg __maybe_unused,
1175 			 int unset __maybe_unused)
1176 {
1177 	struct callchain_param *callchain = opt->value;
1178 
1179 	callchain->enabled = true;
1180 
1181 	if (callchain->record_mode == CALLCHAIN_NONE)
1182 		callchain->record_mode = CALLCHAIN_FP;
1183 
1184 	callchain_debug(callchain);
1185 	return 0;
1186 }
1187 
1188 static int perf_record_config(const char *var, const char *value, void *cb)
1189 {
1190 	struct record *rec = cb;
1191 
1192 	if (!strcmp(var, "record.build-id")) {
1193 		if (!strcmp(value, "cache"))
1194 			rec->no_buildid_cache = false;
1195 		else if (!strcmp(value, "no-cache"))
1196 			rec->no_buildid_cache = true;
1197 		else if (!strcmp(value, "skip"))
1198 			rec->no_buildid = true;
1199 		else
1200 			return -1;
1201 		return 0;
1202 	}
1203 	if (!strcmp(var, "record.call-graph"))
1204 		var = "call-graph.record-mode"; /* fall-through */
1205 
1206 	return perf_default_config(var, value, cb);
1207 }
1208 
1209 struct clockid_map {
1210 	const char *name;
1211 	int clockid;
1212 };
1213 
1214 #define CLOCKID_MAP(n, c)	\
1215 	{ .name = n, .clockid = (c), }
1216 
1217 #define CLOCKID_END	{ .name = NULL, }
1218 
1219 
1220 /*
1221  * Add the missing ones, we need to build on many distros...
1222  */
1223 #ifndef CLOCK_MONOTONIC_RAW
1224 #define CLOCK_MONOTONIC_RAW 4
1225 #endif
1226 #ifndef CLOCK_BOOTTIME
1227 #define CLOCK_BOOTTIME 7
1228 #endif
1229 #ifndef CLOCK_TAI
1230 #define CLOCK_TAI 11
1231 #endif
1232 
1233 static const struct clockid_map clockids[] = {
1234 	/* available for all events, NMI safe */
1235 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1236 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1237 
1238 	/* available for some events */
1239 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1240 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1241 	CLOCKID_MAP("tai", CLOCK_TAI),
1242 
1243 	/* available for the lazy */
1244 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1245 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1246 	CLOCKID_MAP("real", CLOCK_REALTIME),
1247 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1248 
1249 	CLOCKID_END,
1250 };
1251 
1252 static int parse_clockid(const struct option *opt, const char *str, int unset)
1253 {
1254 	struct record_opts *opts = (struct record_opts *)opt->value;
1255 	const struct clockid_map *cm;
1256 	const char *ostr = str;
1257 
1258 	if (unset) {
1259 		opts->use_clockid = 0;
1260 		return 0;
1261 	}
1262 
1263 	/* no arg passed */
1264 	if (!str)
1265 		return 0;
1266 
1267 	/* no setting it twice */
1268 	if (opts->use_clockid)
1269 		return -1;
1270 
1271 	opts->use_clockid = true;
1272 
1273 	/* if its a number, we're done */
1274 	if (sscanf(str, "%d", &opts->clockid) == 1)
1275 		return 0;
1276 
1277 	/* allow a "CLOCK_" prefix to the name */
1278 	if (!strncasecmp(str, "CLOCK_", 6))
1279 		str += 6;
1280 
1281 	for (cm = clockids; cm->name; cm++) {
1282 		if (!strcasecmp(str, cm->name)) {
1283 			opts->clockid = cm->clockid;
1284 			return 0;
1285 		}
1286 	}
1287 
1288 	opts->use_clockid = false;
1289 	ui__warning("unknown clockid %s, check man page\n", ostr);
1290 	return -1;
1291 }
1292 
1293 static int record__parse_mmap_pages(const struct option *opt,
1294 				    const char *str,
1295 				    int unset __maybe_unused)
1296 {
1297 	struct record_opts *opts = opt->value;
1298 	char *s, *p;
1299 	unsigned int mmap_pages;
1300 	int ret;
1301 
1302 	if (!str)
1303 		return -EINVAL;
1304 
1305 	s = strdup(str);
1306 	if (!s)
1307 		return -ENOMEM;
1308 
1309 	p = strchr(s, ',');
1310 	if (p)
1311 		*p = '\0';
1312 
1313 	if (*s) {
1314 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1315 		if (ret)
1316 			goto out_free;
1317 		opts->mmap_pages = mmap_pages;
1318 	}
1319 
1320 	if (!p) {
1321 		ret = 0;
1322 		goto out_free;
1323 	}
1324 
1325 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1326 	if (ret)
1327 		goto out_free;
1328 
1329 	opts->auxtrace_mmap_pages = mmap_pages;
1330 
1331 out_free:
1332 	free(s);
1333 	return ret;
1334 }
1335 
1336 static void switch_output_size_warn(struct record *rec)
1337 {
1338 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1339 	struct switch_output *s = &rec->switch_output;
1340 
1341 	wakeup_size /= 2;
1342 
1343 	if (s->size < wakeup_size) {
1344 		char buf[100];
1345 
1346 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1347 		pr_warning("WARNING: switch-output data size lower than "
1348 			   "wakeup kernel buffer size (%s) "
1349 			   "expect bigger perf.data sizes\n", buf);
1350 	}
1351 }
1352 
1353 static int switch_output_setup(struct record *rec)
1354 {
1355 	struct switch_output *s = &rec->switch_output;
1356 	static struct parse_tag tags_size[] = {
1357 		{ .tag  = 'B', .mult = 1       },
1358 		{ .tag  = 'K', .mult = 1 << 10 },
1359 		{ .tag  = 'M', .mult = 1 << 20 },
1360 		{ .tag  = 'G', .mult = 1 << 30 },
1361 		{ .tag  = 0 },
1362 	};
1363 	static struct parse_tag tags_time[] = {
1364 		{ .tag  = 's', .mult = 1        },
1365 		{ .tag  = 'm', .mult = 60       },
1366 		{ .tag  = 'h', .mult = 60*60    },
1367 		{ .tag  = 'd', .mult = 60*60*24 },
1368 		{ .tag  = 0 },
1369 	};
1370 	unsigned long val;
1371 
1372 	if (!s->set)
1373 		return 0;
1374 
1375 	if (!strcmp(s->str, "signal")) {
1376 		s->signal = true;
1377 		pr_debug("switch-output with SIGUSR2 signal\n");
1378 		goto enabled;
1379 	}
1380 
1381 	val = parse_tag_value(s->str, tags_size);
1382 	if (val != (unsigned long) -1) {
1383 		s->size = val;
1384 		pr_debug("switch-output with %s size threshold\n", s->str);
1385 		goto enabled;
1386 	}
1387 
1388 	val = parse_tag_value(s->str, tags_time);
1389 	if (val != (unsigned long) -1) {
1390 		s->time = val;
1391 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1392 			 s->str, s->time);
1393 		goto enabled;
1394 	}
1395 
1396 	return -1;
1397 
1398 enabled:
1399 	rec->timestamp_filename = true;
1400 	s->enabled              = true;
1401 
1402 	if (s->size && !rec->opts.no_buffering)
1403 		switch_output_size_warn(rec);
1404 
1405 	return 0;
1406 }
1407 
1408 static const char * const __record_usage[] = {
1409 	"perf record [<options>] [<command>]",
1410 	"perf record [<options>] -- <command> [<options>]",
1411 	NULL
1412 };
1413 const char * const *record_usage = __record_usage;
1414 
1415 /*
1416  * XXX Ideally would be local to cmd_record() and passed to a record__new
1417  * because we need to have access to it in record__exit, that is called
1418  * after cmd_record() exits, but since record_options need to be accessible to
1419  * builtin-script, leave it here.
1420  *
1421  * At least we don't ouch it in all the other functions here directly.
1422  *
1423  * Just say no to tons of global variables, sigh.
1424  */
1425 static struct record record = {
1426 	.opts = {
1427 		.sample_time	     = true,
1428 		.mmap_pages	     = UINT_MAX,
1429 		.user_freq	     = UINT_MAX,
1430 		.user_interval	     = ULLONG_MAX,
1431 		.freq		     = 4000,
1432 		.target		     = {
1433 			.uses_mmap   = true,
1434 			.default_per_cpu = true,
1435 		},
1436 		.proc_map_timeout     = 500,
1437 	},
1438 	.tool = {
1439 		.sample		= process_sample_event,
1440 		.fork		= perf_event__process_fork,
1441 		.exit		= perf_event__process_exit,
1442 		.comm		= perf_event__process_comm,
1443 		.namespaces	= perf_event__process_namespaces,
1444 		.mmap		= perf_event__process_mmap,
1445 		.mmap2		= perf_event__process_mmap2,
1446 		.ordered_events	= true,
1447 	},
1448 };
1449 
1450 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1451 	"\n\t\t\t\tDefault: fp";
1452 
1453 static bool dry_run;
1454 
1455 /*
1456  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1457  * with it and switch to use the library functions in perf_evlist that came
1458  * from builtin-record.c, i.e. use record_opts,
1459  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1460  * using pipes, etc.
1461  */
1462 static struct option __record_options[] = {
1463 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1464 		     "event selector. use 'perf list' to list available events",
1465 		     parse_events_option),
1466 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1467 		     "event filter", parse_filter),
1468 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1469 			   NULL, "don't record events from perf itself",
1470 			   exclude_perf),
1471 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1472 		    "record events on existing process id"),
1473 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1474 		    "record events on existing thread id"),
1475 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1476 		    "collect data with this RT SCHED_FIFO priority"),
1477 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1478 		    "collect data without buffering"),
1479 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1480 		    "collect raw sample records from all opened counters"),
1481 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1482 			    "system-wide collection from all CPUs"),
1483 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1484 		    "list of cpus to monitor"),
1485 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1486 	OPT_STRING('o', "output", &record.data.file.path, "file",
1487 		    "output file name"),
1488 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1489 			&record.opts.no_inherit_set,
1490 			"child tasks do not inherit counters"),
1491 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1492 		    "synthesize non-sample events at the end of output"),
1493 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1494 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1495 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1496 		     "number of mmap data pages and AUX area tracing mmap pages",
1497 		     record__parse_mmap_pages),
1498 	OPT_BOOLEAN(0, "group", &record.opts.group,
1499 		    "put the counters into a counter group"),
1500 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1501 			   NULL, "enables call-graph recording" ,
1502 			   &record_callchain_opt),
1503 	OPT_CALLBACK(0, "call-graph", &record.opts,
1504 		     "record_mode[,record_size]", record_callchain_help,
1505 		     &record_parse_callchain_opt),
1506 	OPT_INCR('v', "verbose", &verbose,
1507 		    "be more verbose (show counter open errors, etc)"),
1508 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1509 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1510 		    "per thread counts"),
1511 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1512 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1513 		    "Record the sample physical addresses"),
1514 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1515 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1516 			&record.opts.sample_time_set,
1517 			"Record the sample timestamps"),
1518 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1519 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1520 		    "don't sample"),
1521 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1522 			&record.no_buildid_cache_set,
1523 			"do not update the buildid cache"),
1524 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1525 			&record.no_buildid_set,
1526 			"do not collect buildids in perf.data"),
1527 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1528 		     "monitor event in cgroup name only",
1529 		     parse_cgroups),
1530 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1531 		  "ms to wait before starting measurement after program start"),
1532 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1533 		   "user to profile"),
1534 
1535 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1536 		     "branch any", "sample any taken branches",
1537 		     parse_branch_stack),
1538 
1539 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1540 		     "branch filter mask", "branch stack filter modes",
1541 		     parse_branch_stack),
1542 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1543 		    "sample by weight (on special events only)"),
1544 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1545 		    "sample transaction flags (special events only)"),
1546 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1547 		    "use per-thread mmaps"),
1548 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1549 		    "sample selected machine registers on interrupt,"
1550 		    " use -I ? to list register names", parse_regs),
1551 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1552 		    "sample selected machine registers on interrupt,"
1553 		    " use -I ? to list register names", parse_regs),
1554 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1555 		    "Record running/enabled time of read (:S) events"),
1556 	OPT_CALLBACK('k', "clockid", &record.opts,
1557 	"clockid", "clockid to use for events, see clock_gettime()",
1558 	parse_clockid),
1559 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1560 			  "opts", "AUX area tracing Snapshot Mode", ""),
1561 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1562 			"per thread proc mmap processing timeout in ms"),
1563 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1564 		    "Record namespaces events"),
1565 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1566 		    "Record context switch events"),
1567 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1568 			 "Configure all used events to run in kernel space.",
1569 			 PARSE_OPT_EXCLUSIVE),
1570 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1571 			 "Configure all used events to run in user space.",
1572 			 PARSE_OPT_EXCLUSIVE),
1573 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1574 		   "clang binary to use for compiling BPF scriptlets"),
1575 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1576 		   "options passed to clang when compiling BPF scriptlets"),
1577 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1578 		   "file", "vmlinux pathname"),
1579 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1580 		    "Record build-id of all DSOs regardless of hits"),
1581 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1582 		    "append timestamp to output filename"),
1583 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1584 			  &record.switch_output.set, "signal,size,time",
1585 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1586 			  "signal"),
1587 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1588 		    "Parse options then exit"),
1589 	OPT_END()
1590 };
1591 
1592 struct option *record_options = __record_options;
1593 
1594 int cmd_record(int argc, const char **argv)
1595 {
1596 	int err;
1597 	struct record *rec = &record;
1598 	char errbuf[BUFSIZ];
1599 
1600 #ifndef HAVE_LIBBPF_SUPPORT
1601 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1602 	set_nobuild('\0', "clang-path", true);
1603 	set_nobuild('\0', "clang-opt", true);
1604 # undef set_nobuild
1605 #endif
1606 
1607 #ifndef HAVE_BPF_PROLOGUE
1608 # if !defined (HAVE_DWARF_SUPPORT)
1609 #  define REASON  "NO_DWARF=1"
1610 # elif !defined (HAVE_LIBBPF_SUPPORT)
1611 #  define REASON  "NO_LIBBPF=1"
1612 # else
1613 #  define REASON  "this architecture doesn't support BPF prologue"
1614 # endif
1615 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1616 	set_nobuild('\0', "vmlinux", true);
1617 # undef set_nobuild
1618 # undef REASON
1619 #endif
1620 
1621 	rec->evlist = perf_evlist__new();
1622 	if (rec->evlist == NULL)
1623 		return -ENOMEM;
1624 
1625 	err = perf_config(perf_record_config, rec);
1626 	if (err)
1627 		return err;
1628 
1629 	argc = parse_options(argc, argv, record_options, record_usage,
1630 			    PARSE_OPT_STOP_AT_NON_OPTION);
1631 	if (quiet)
1632 		perf_quiet_option();
1633 
1634 	/* Make system wide (-a) the default target. */
1635 	if (!argc && target__none(&rec->opts.target))
1636 		rec->opts.target.system_wide = true;
1637 
1638 	if (nr_cgroups && !rec->opts.target.system_wide) {
1639 		usage_with_options_msg(record_usage, record_options,
1640 			"cgroup monitoring only available in system-wide mode");
1641 
1642 	}
1643 	if (rec->opts.record_switch_events &&
1644 	    !perf_can_record_switch_events()) {
1645 		ui__error("kernel does not support recording context switch events\n");
1646 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1647 		return -EINVAL;
1648 	}
1649 
1650 	if (switch_output_setup(rec)) {
1651 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1652 		return -EINVAL;
1653 	}
1654 
1655 	if (rec->switch_output.time) {
1656 		signal(SIGALRM, alarm_sig_handler);
1657 		alarm(rec->switch_output.time);
1658 	}
1659 
1660 	if (!rec->itr) {
1661 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1662 		if (err)
1663 			goto out;
1664 	}
1665 
1666 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1667 					      rec->opts.auxtrace_snapshot_opts);
1668 	if (err)
1669 		goto out;
1670 
1671 	/*
1672 	 * Allow aliases to facilitate the lookup of symbols for address
1673 	 * filters. Refer to auxtrace_parse_filters().
1674 	 */
1675 	symbol_conf.allow_aliases = true;
1676 
1677 	symbol__init(NULL);
1678 
1679 	err = auxtrace_parse_filters(rec->evlist);
1680 	if (err)
1681 		goto out;
1682 
1683 	if (dry_run)
1684 		goto out;
1685 
1686 	err = bpf__setup_stdout(rec->evlist);
1687 	if (err) {
1688 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1689 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1690 			 errbuf);
1691 		goto out;
1692 	}
1693 
1694 	err = -ENOMEM;
1695 
1696 	if (symbol_conf.kptr_restrict)
1697 		pr_warning(
1698 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1699 "check /proc/sys/kernel/kptr_restrict.\n\n"
1700 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1701 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1702 "Samples in kernel modules won't be resolved at all.\n\n"
1703 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1704 "even with a suitable vmlinux or kallsyms file.\n\n");
1705 
1706 	if (rec->no_buildid_cache || rec->no_buildid) {
1707 		disable_buildid_cache();
1708 	} else if (rec->switch_output.enabled) {
1709 		/*
1710 		 * In 'perf record --switch-output', disable buildid
1711 		 * generation by default to reduce data file switching
1712 		 * overhead. Still generate buildid if they are required
1713 		 * explicitly using
1714 		 *
1715 		 *  perf record --switch-output --no-no-buildid \
1716 		 *              --no-no-buildid-cache
1717 		 *
1718 		 * Following code equals to:
1719 		 *
1720 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1721 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1722 		 *         disable_buildid_cache();
1723 		 */
1724 		bool disable = true;
1725 
1726 		if (rec->no_buildid_set && !rec->no_buildid)
1727 			disable = false;
1728 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1729 			disable = false;
1730 		if (disable) {
1731 			rec->no_buildid = true;
1732 			rec->no_buildid_cache = true;
1733 			disable_buildid_cache();
1734 		}
1735 	}
1736 
1737 	if (record.opts.overwrite)
1738 		record.opts.tail_synthesize = true;
1739 
1740 	if (rec->evlist->nr_entries == 0 &&
1741 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1742 		pr_err("Not enough memory for event selector list\n");
1743 		goto out;
1744 	}
1745 
1746 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1747 		rec->opts.no_inherit = true;
1748 
1749 	err = target__validate(&rec->opts.target);
1750 	if (err) {
1751 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1752 		ui__warning("%s", errbuf);
1753 	}
1754 
1755 	err = target__parse_uid(&rec->opts.target);
1756 	if (err) {
1757 		int saved_errno = errno;
1758 
1759 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1760 		ui__error("%s", errbuf);
1761 
1762 		err = -saved_errno;
1763 		goto out;
1764 	}
1765 
1766 	/* Enable ignoring missing threads when -u option is defined. */
1767 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1768 
1769 	err = -ENOMEM;
1770 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1771 		usage_with_options(record_usage, record_options);
1772 
1773 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1774 	if (err)
1775 		goto out;
1776 
1777 	/*
1778 	 * We take all buildids when the file contains
1779 	 * AUX area tracing data because we do not decode the
1780 	 * trace because it would take too long.
1781 	 */
1782 	if (rec->opts.full_auxtrace)
1783 		rec->buildid_all = true;
1784 
1785 	if (record_opts__config(&rec->opts)) {
1786 		err = -EINVAL;
1787 		goto out;
1788 	}
1789 
1790 	err = __cmd_record(&record, argc, argv);
1791 out:
1792 	perf_evlist__delete(rec->evlist);
1793 	symbol__exit();
1794 	auxtrace_record__free(rec->itr);
1795 	return err;
1796 }
1797 
1798 static void snapshot_sig_handler(int sig __maybe_unused)
1799 {
1800 	struct record *rec = &record;
1801 
1802 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1803 		trigger_hit(&auxtrace_snapshot_trigger);
1804 		auxtrace_record__snapshot_started = 1;
1805 		if (auxtrace_record__snapshot_start(record.itr))
1806 			trigger_error(&auxtrace_snapshot_trigger);
1807 	}
1808 
1809 	if (switch_output_signal(rec))
1810 		trigger_hit(&switch_output_trigger);
1811 }
1812 
1813 static void alarm_sig_handler(int sig __maybe_unused)
1814 {
1815 	struct record *rec = &record;
1816 
1817 	if (switch_output_time(rec))
1818 		trigger_hit(&switch_output_trigger);
1819 }
1820