xref: /openbmc/linux/tools/perf/builtin-record.c (revision 2209fda3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <locale.h>
49 #include <poll.h>
50 #include <unistd.h>
51 #include <sched.h>
52 #include <signal.h>
53 #include <sys/mman.h>
54 #include <sys/wait.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data	data;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	bool			timestamp_boundary;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
110 			 void *bf, size_t size)
111 {
112 	struct perf_data_file *file = &rec->session->data->file;
113 
114 	if (perf_data_file__write(file, bf, size) < 0) {
115 		pr_err("failed to write perf data, error: %m\n");
116 		return -1;
117 	}
118 
119 	rec->bytes_written += size;
120 
121 	if (switch_output_size(rec))
122 		trigger_hit(&switch_output_trigger);
123 
124 	return 0;
125 }
126 
127 static int process_synthesized_event(struct perf_tool *tool,
128 				     union perf_event *event,
129 				     struct perf_sample *sample __maybe_unused,
130 				     struct machine *machine __maybe_unused)
131 {
132 	struct record *rec = container_of(tool, struct record, tool);
133 	return record__write(rec, NULL, event, event->header.size);
134 }
135 
136 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
137 {
138 	struct record *rec = to;
139 
140 	rec->samples++;
141 	return record__write(rec, map, bf, size);
142 }
143 
144 static volatile int done;
145 static volatile int signr = -1;
146 static volatile int child_finished;
147 
148 static void sig_handler(int sig)
149 {
150 	if (sig == SIGCHLD)
151 		child_finished = 1;
152 	else
153 		signr = sig;
154 
155 	done = 1;
156 }
157 
158 static void sigsegv_handler(int sig)
159 {
160 	perf_hooks__recover();
161 	sighandler_dump_stack(sig);
162 }
163 
164 static void record__sig_exit(void)
165 {
166 	if (signr == -1)
167 		return;
168 
169 	signal(signr, SIG_DFL);
170 	raise(signr);
171 }
172 
173 #ifdef HAVE_AUXTRACE_SUPPORT
174 
175 static int record__process_auxtrace(struct perf_tool *tool,
176 				    struct perf_mmap *map,
177 				    union perf_event *event, void *data1,
178 				    size_t len1, void *data2, size_t len2)
179 {
180 	struct record *rec = container_of(tool, struct record, tool);
181 	struct perf_data *data = &rec->data;
182 	size_t padding;
183 	u8 pad[8] = {0};
184 
185 	if (!perf_data__is_pipe(data)) {
186 		off_t file_offset;
187 		int fd = perf_data__fd(data);
188 		int err;
189 
190 		file_offset = lseek(fd, 0, SEEK_CUR);
191 		if (file_offset == -1)
192 			return -1;
193 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
194 						     event, file_offset);
195 		if (err)
196 			return err;
197 	}
198 
199 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
200 	padding = (len1 + len2) & 7;
201 	if (padding)
202 		padding = 8 - padding;
203 
204 	record__write(rec, map, event, event->header.size);
205 	record__write(rec, map, data1, len1);
206 	if (len2)
207 		record__write(rec, map, data2, len2);
208 	record__write(rec, map, &pad, padding);
209 
210 	return 0;
211 }
212 
213 static int record__auxtrace_mmap_read(struct record *rec,
214 				      struct perf_mmap *map)
215 {
216 	int ret;
217 
218 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
219 				  record__process_auxtrace);
220 	if (ret < 0)
221 		return ret;
222 
223 	if (ret)
224 		rec->samples++;
225 
226 	return 0;
227 }
228 
229 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
230 					       struct perf_mmap *map)
231 {
232 	int ret;
233 
234 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
235 					   record__process_auxtrace,
236 					   rec->opts.auxtrace_snapshot_size);
237 	if (ret < 0)
238 		return ret;
239 
240 	if (ret)
241 		rec->samples++;
242 
243 	return 0;
244 }
245 
246 static int record__auxtrace_read_snapshot_all(struct record *rec)
247 {
248 	int i;
249 	int rc = 0;
250 
251 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
252 		struct perf_mmap *map = &rec->evlist->mmap[i];
253 
254 		if (!map->auxtrace_mmap.base)
255 			continue;
256 
257 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
258 			rc = -1;
259 			goto out;
260 		}
261 	}
262 out:
263 	return rc;
264 }
265 
266 static void record__read_auxtrace_snapshot(struct record *rec)
267 {
268 	pr_debug("Recording AUX area tracing snapshot\n");
269 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
270 		trigger_error(&auxtrace_snapshot_trigger);
271 	} else {
272 		if (auxtrace_record__snapshot_finish(rec->itr))
273 			trigger_error(&auxtrace_snapshot_trigger);
274 		else
275 			trigger_ready(&auxtrace_snapshot_trigger);
276 	}
277 }
278 
279 static int record__auxtrace_init(struct record *rec)
280 {
281 	int err;
282 
283 	if (!rec->itr) {
284 		rec->itr = auxtrace_record__init(rec->evlist, &err);
285 		if (err)
286 			return err;
287 	}
288 
289 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
290 					      rec->opts.auxtrace_snapshot_opts);
291 	if (err)
292 		return err;
293 
294 	return auxtrace_parse_filters(rec->evlist);
295 }
296 
297 #else
298 
299 static inline
300 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
301 			       struct perf_mmap *map __maybe_unused)
302 {
303 	return 0;
304 }
305 
306 static inline
307 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
308 {
309 }
310 
311 static inline
312 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
313 {
314 	return 0;
315 }
316 
317 static int record__auxtrace_init(struct record *rec __maybe_unused)
318 {
319 	return 0;
320 }
321 
322 #endif
323 
324 static int record__mmap_evlist(struct record *rec,
325 			       struct perf_evlist *evlist)
326 {
327 	struct record_opts *opts = &rec->opts;
328 	char msg[512];
329 
330 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
331 				 opts->auxtrace_mmap_pages,
332 				 opts->auxtrace_snapshot_mode) < 0) {
333 		if (errno == EPERM) {
334 			pr_err("Permission error mapping pages.\n"
335 			       "Consider increasing "
336 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
337 			       "or try again with a smaller value of -m/--mmap_pages.\n"
338 			       "(current value: %u,%u)\n",
339 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
340 			return -errno;
341 		} else {
342 			pr_err("failed to mmap with %d (%s)\n", errno,
343 				str_error_r(errno, msg, sizeof(msg)));
344 			if (errno)
345 				return -errno;
346 			else
347 				return -EINVAL;
348 		}
349 	}
350 	return 0;
351 }
352 
353 static int record__mmap(struct record *rec)
354 {
355 	return record__mmap_evlist(rec, rec->evlist);
356 }
357 
358 static int record__open(struct record *rec)
359 {
360 	char msg[BUFSIZ];
361 	struct perf_evsel *pos;
362 	struct perf_evlist *evlist = rec->evlist;
363 	struct perf_session *session = rec->session;
364 	struct record_opts *opts = &rec->opts;
365 	struct perf_evsel_config_term *err_term;
366 	int rc = 0;
367 
368 	/*
369 	 * For initial_delay we need to add a dummy event so that we can track
370 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
371 	 * real events, the ones asked by the user.
372 	 */
373 	if (opts->initial_delay) {
374 		if (perf_evlist__add_dummy(evlist))
375 			return -ENOMEM;
376 
377 		pos = perf_evlist__first(evlist);
378 		pos->tracking = 0;
379 		pos = perf_evlist__last(evlist);
380 		pos->tracking = 1;
381 		pos->attr.enable_on_exec = 1;
382 	}
383 
384 	perf_evlist__config(evlist, opts, &callchain_param);
385 
386 	evlist__for_each_entry(evlist, pos) {
387 try_again:
388 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
389 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
390 				if (verbose > 0)
391 					ui__warning("%s\n", msg);
392 				goto try_again;
393 			}
394 
395 			rc = -errno;
396 			perf_evsel__open_strerror(pos, &opts->target,
397 						  errno, msg, sizeof(msg));
398 			ui__error("%s\n", msg);
399 			goto out;
400 		}
401 
402 		pos->supported = true;
403 	}
404 
405 	if (perf_evlist__apply_filters(evlist, &pos)) {
406 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
407 			pos->filter, perf_evsel__name(pos), errno,
408 			str_error_r(errno, msg, sizeof(msg)));
409 		rc = -1;
410 		goto out;
411 	}
412 
413 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
414 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
415 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
416 		      str_error_r(errno, msg, sizeof(msg)));
417 		rc = -1;
418 		goto out;
419 	}
420 
421 	rc = record__mmap(rec);
422 	if (rc)
423 		goto out;
424 
425 	session->evlist = evlist;
426 	perf_session__set_id_hdr_size(session);
427 out:
428 	return rc;
429 }
430 
431 static int process_sample_event(struct perf_tool *tool,
432 				union perf_event *event,
433 				struct perf_sample *sample,
434 				struct perf_evsel *evsel,
435 				struct machine *machine)
436 {
437 	struct record *rec = container_of(tool, struct record, tool);
438 
439 	if (rec->evlist->first_sample_time == 0)
440 		rec->evlist->first_sample_time = sample->time;
441 
442 	rec->evlist->last_sample_time = sample->time;
443 
444 	if (rec->buildid_all)
445 		return 0;
446 
447 	rec->samples++;
448 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
449 }
450 
451 static int process_buildids(struct record *rec)
452 {
453 	struct perf_data *data = &rec->data;
454 	struct perf_session *session = rec->session;
455 
456 	if (data->size == 0)
457 		return 0;
458 
459 	/*
460 	 * During this process, it'll load kernel map and replace the
461 	 * dso->long_name to a real pathname it found.  In this case
462 	 * we prefer the vmlinux path like
463 	 *   /lib/modules/3.16.4/build/vmlinux
464 	 *
465 	 * rather than build-id path (in debug directory).
466 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
467 	 */
468 	symbol_conf.ignore_vmlinux_buildid = true;
469 
470 	/*
471 	 * If --buildid-all is given, it marks all DSO regardless of hits,
472 	 * so no need to process samples. But if timestamp_boundary is enabled,
473 	 * it still needs to walk on all samples to get the timestamps of
474 	 * first/last samples.
475 	 */
476 	if (rec->buildid_all && !rec->timestamp_boundary)
477 		rec->tool.sample = NULL;
478 
479 	return perf_session__process_events(session);
480 }
481 
482 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
483 {
484 	int err;
485 	struct perf_tool *tool = data;
486 	/*
487 	 *As for guest kernel when processing subcommand record&report,
488 	 *we arrange module mmap prior to guest kernel mmap and trigger
489 	 *a preload dso because default guest module symbols are loaded
490 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
491 	 *method is used to avoid symbol missing when the first addr is
492 	 *in module instead of in guest kernel.
493 	 */
494 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
495 					     machine);
496 	if (err < 0)
497 		pr_err("Couldn't record guest kernel [%d]'s reference"
498 		       " relocation symbol.\n", machine->pid);
499 
500 	/*
501 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
502 	 * have no _text sometimes.
503 	 */
504 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
505 						 machine);
506 	if (err < 0)
507 		pr_err("Couldn't record guest kernel [%d]'s reference"
508 		       " relocation symbol.\n", machine->pid);
509 }
510 
511 static struct perf_event_header finished_round_event = {
512 	.size = sizeof(struct perf_event_header),
513 	.type = PERF_RECORD_FINISHED_ROUND,
514 };
515 
516 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
517 				    bool overwrite)
518 {
519 	u64 bytes_written = rec->bytes_written;
520 	int i;
521 	int rc = 0;
522 	struct perf_mmap *maps;
523 
524 	if (!evlist)
525 		return 0;
526 
527 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
528 	if (!maps)
529 		return 0;
530 
531 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
532 		return 0;
533 
534 	for (i = 0; i < evlist->nr_mmaps; i++) {
535 		struct perf_mmap *map = &maps[i];
536 
537 		if (map->base) {
538 			if (perf_mmap__push(map, rec, record__pushfn) != 0) {
539 				rc = -1;
540 				goto out;
541 			}
542 		}
543 
544 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
545 		    record__auxtrace_mmap_read(rec, map) != 0) {
546 			rc = -1;
547 			goto out;
548 		}
549 	}
550 
551 	/*
552 	 * Mark the round finished in case we wrote
553 	 * at least one event.
554 	 */
555 	if (bytes_written != rec->bytes_written)
556 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
557 
558 	if (overwrite)
559 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
560 out:
561 	return rc;
562 }
563 
564 static int record__mmap_read_all(struct record *rec)
565 {
566 	int err;
567 
568 	err = record__mmap_read_evlist(rec, rec->evlist, false);
569 	if (err)
570 		return err;
571 
572 	return record__mmap_read_evlist(rec, rec->evlist, true);
573 }
574 
575 static void record__init_features(struct record *rec)
576 {
577 	struct perf_session *session = rec->session;
578 	int feat;
579 
580 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
581 		perf_header__set_feat(&session->header, feat);
582 
583 	if (rec->no_buildid)
584 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
585 
586 	if (!have_tracepoints(&rec->evlist->entries))
587 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
588 
589 	if (!rec->opts.branch_stack)
590 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
591 
592 	if (!rec->opts.full_auxtrace)
593 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
594 
595 	perf_header__clear_feat(&session->header, HEADER_STAT);
596 }
597 
598 static void
599 record__finish_output(struct record *rec)
600 {
601 	struct perf_data *data = &rec->data;
602 	int fd = perf_data__fd(data);
603 
604 	if (data->is_pipe)
605 		return;
606 
607 	rec->session->header.data_size += rec->bytes_written;
608 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
609 
610 	if (!rec->no_buildid) {
611 		process_buildids(rec);
612 
613 		if (rec->buildid_all)
614 			dsos__hit_all(rec->session);
615 	}
616 	perf_session__write_header(rec->session, rec->evlist, fd, true);
617 
618 	return;
619 }
620 
621 static int record__synthesize_workload(struct record *rec, bool tail)
622 {
623 	int err;
624 	struct thread_map *thread_map;
625 
626 	if (rec->opts.tail_synthesize != tail)
627 		return 0;
628 
629 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
630 	if (thread_map == NULL)
631 		return -1;
632 
633 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
634 						 process_synthesized_event,
635 						 &rec->session->machines.host,
636 						 rec->opts.sample_address,
637 						 rec->opts.proc_map_timeout);
638 	thread_map__put(thread_map);
639 	return err;
640 }
641 
642 static int record__synthesize(struct record *rec, bool tail);
643 
644 static int
645 record__switch_output(struct record *rec, bool at_exit)
646 {
647 	struct perf_data *data = &rec->data;
648 	int fd, err;
649 
650 	/* Same Size:      "2015122520103046"*/
651 	char timestamp[] = "InvalidTimestamp";
652 
653 	record__synthesize(rec, true);
654 	if (target__none(&rec->opts.target))
655 		record__synthesize_workload(rec, true);
656 
657 	rec->samples = 0;
658 	record__finish_output(rec);
659 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
660 	if (err) {
661 		pr_err("Failed to get current timestamp\n");
662 		return -EINVAL;
663 	}
664 
665 	fd = perf_data__switch(data, timestamp,
666 				    rec->session->header.data_offset,
667 				    at_exit);
668 	if (fd >= 0 && !at_exit) {
669 		rec->bytes_written = 0;
670 		rec->session->header.data_size = 0;
671 	}
672 
673 	if (!quiet)
674 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
675 			data->file.path, timestamp);
676 
677 	/* Output tracking events */
678 	if (!at_exit) {
679 		record__synthesize(rec, false);
680 
681 		/*
682 		 * In 'perf record --switch-output' without -a,
683 		 * record__synthesize() in record__switch_output() won't
684 		 * generate tracking events because there's no thread_map
685 		 * in evlist. Which causes newly created perf.data doesn't
686 		 * contain map and comm information.
687 		 * Create a fake thread_map and directly call
688 		 * perf_event__synthesize_thread_map() for those events.
689 		 */
690 		if (target__none(&rec->opts.target))
691 			record__synthesize_workload(rec, false);
692 	}
693 	return fd;
694 }
695 
696 static volatile int workload_exec_errno;
697 
698 /*
699  * perf_evlist__prepare_workload will send a SIGUSR1
700  * if the fork fails, since we asked by setting its
701  * want_signal to true.
702  */
703 static void workload_exec_failed_signal(int signo __maybe_unused,
704 					siginfo_t *info,
705 					void *ucontext __maybe_unused)
706 {
707 	workload_exec_errno = info->si_value.sival_int;
708 	done = 1;
709 	child_finished = 1;
710 }
711 
712 static void snapshot_sig_handler(int sig);
713 static void alarm_sig_handler(int sig);
714 
715 int __weak
716 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
717 			    struct perf_tool *tool __maybe_unused,
718 			    perf_event__handler_t process __maybe_unused,
719 			    struct machine *machine __maybe_unused)
720 {
721 	return 0;
722 }
723 
724 static const struct perf_event_mmap_page *
725 perf_evlist__pick_pc(struct perf_evlist *evlist)
726 {
727 	if (evlist) {
728 		if (evlist->mmap && evlist->mmap[0].base)
729 			return evlist->mmap[0].base;
730 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
731 			return evlist->overwrite_mmap[0].base;
732 	}
733 	return NULL;
734 }
735 
736 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
737 {
738 	const struct perf_event_mmap_page *pc;
739 
740 	pc = perf_evlist__pick_pc(rec->evlist);
741 	if (pc)
742 		return pc;
743 	return NULL;
744 }
745 
746 static int record__synthesize(struct record *rec, bool tail)
747 {
748 	struct perf_session *session = rec->session;
749 	struct machine *machine = &session->machines.host;
750 	struct perf_data *data = &rec->data;
751 	struct record_opts *opts = &rec->opts;
752 	struct perf_tool *tool = &rec->tool;
753 	int fd = perf_data__fd(data);
754 	int err = 0;
755 
756 	if (rec->opts.tail_synthesize != tail)
757 		return 0;
758 
759 	if (data->is_pipe) {
760 		/*
761 		 * We need to synthesize events first, because some
762 		 * features works on top of them (on report side).
763 		 */
764 		err = perf_event__synthesize_attrs(tool, rec->evlist,
765 						   process_synthesized_event);
766 		if (err < 0) {
767 			pr_err("Couldn't synthesize attrs.\n");
768 			goto out;
769 		}
770 
771 		err = perf_event__synthesize_features(tool, session, rec->evlist,
772 						      process_synthesized_event);
773 		if (err < 0) {
774 			pr_err("Couldn't synthesize features.\n");
775 			return err;
776 		}
777 
778 		if (have_tracepoints(&rec->evlist->entries)) {
779 			/*
780 			 * FIXME err <= 0 here actually means that
781 			 * there were no tracepoints so its not really
782 			 * an error, just that we don't need to
783 			 * synthesize anything.  We really have to
784 			 * return this more properly and also
785 			 * propagate errors that now are calling die()
786 			 */
787 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
788 								  process_synthesized_event);
789 			if (err <= 0) {
790 				pr_err("Couldn't record tracing data.\n");
791 				goto out;
792 			}
793 			rec->bytes_written += err;
794 		}
795 	}
796 
797 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
798 					  process_synthesized_event, machine);
799 	if (err)
800 		goto out;
801 
802 	if (rec->opts.full_auxtrace) {
803 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
804 					session, process_synthesized_event);
805 		if (err)
806 			goto out;
807 	}
808 
809 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
810 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
811 							 machine);
812 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
813 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
814 				   "Check /proc/kallsyms permission or run as root.\n");
815 
816 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
817 						     machine);
818 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
819 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
820 				   "Check /proc/modules permission or run as root.\n");
821 	}
822 
823 	if (perf_guest) {
824 		machines__process_guests(&session->machines,
825 					 perf_event__synthesize_guest_os, tool);
826 	}
827 
828 	err = perf_event__synthesize_extra_attr(&rec->tool,
829 						rec->evlist,
830 						process_synthesized_event,
831 						data->is_pipe);
832 	if (err)
833 		goto out;
834 
835 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
836 						 process_synthesized_event,
837 						NULL);
838 	if (err < 0) {
839 		pr_err("Couldn't synthesize thread map.\n");
840 		return err;
841 	}
842 
843 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
844 					     process_synthesized_event, NULL);
845 	if (err < 0) {
846 		pr_err("Couldn't synthesize cpu map.\n");
847 		return err;
848 	}
849 
850 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
851 					    process_synthesized_event, opts->sample_address,
852 					    opts->proc_map_timeout, 1);
853 out:
854 	return err;
855 }
856 
857 static int __cmd_record(struct record *rec, int argc, const char **argv)
858 {
859 	int err;
860 	int status = 0;
861 	unsigned long waking = 0;
862 	const bool forks = argc > 0;
863 	struct perf_tool *tool = &rec->tool;
864 	struct record_opts *opts = &rec->opts;
865 	struct perf_data *data = &rec->data;
866 	struct perf_session *session;
867 	bool disabled = false, draining = false;
868 	int fd;
869 
870 	atexit(record__sig_exit);
871 	signal(SIGCHLD, sig_handler);
872 	signal(SIGINT, sig_handler);
873 	signal(SIGTERM, sig_handler);
874 	signal(SIGSEGV, sigsegv_handler);
875 
876 	if (rec->opts.record_namespaces)
877 		tool->namespace_events = true;
878 
879 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
880 		signal(SIGUSR2, snapshot_sig_handler);
881 		if (rec->opts.auxtrace_snapshot_mode)
882 			trigger_on(&auxtrace_snapshot_trigger);
883 		if (rec->switch_output.enabled)
884 			trigger_on(&switch_output_trigger);
885 	} else {
886 		signal(SIGUSR2, SIG_IGN);
887 	}
888 
889 	session = perf_session__new(data, false, tool);
890 	if (session == NULL) {
891 		pr_err("Perf session creation failed.\n");
892 		return -1;
893 	}
894 
895 	fd = perf_data__fd(data);
896 	rec->session = session;
897 
898 	record__init_features(rec);
899 
900 	if (forks) {
901 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
902 						    argv, data->is_pipe,
903 						    workload_exec_failed_signal);
904 		if (err < 0) {
905 			pr_err("Couldn't run the workload!\n");
906 			status = err;
907 			goto out_delete_session;
908 		}
909 	}
910 
911 	/*
912 	 * If we have just single event and are sending data
913 	 * through pipe, we need to force the ids allocation,
914 	 * because we synthesize event name through the pipe
915 	 * and need the id for that.
916 	 */
917 	if (data->is_pipe && rec->evlist->nr_entries == 1)
918 		rec->opts.sample_id = true;
919 
920 	if (record__open(rec) != 0) {
921 		err = -1;
922 		goto out_child;
923 	}
924 
925 	err = bpf__apply_obj_config();
926 	if (err) {
927 		char errbuf[BUFSIZ];
928 
929 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
930 		pr_err("ERROR: Apply config to BPF failed: %s\n",
931 			 errbuf);
932 		goto out_child;
933 	}
934 
935 	/*
936 	 * Normally perf_session__new would do this, but it doesn't have the
937 	 * evlist.
938 	 */
939 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
940 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
941 		rec->tool.ordered_events = false;
942 	}
943 
944 	if (!rec->evlist->nr_groups)
945 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
946 
947 	if (data->is_pipe) {
948 		err = perf_header__write_pipe(fd);
949 		if (err < 0)
950 			goto out_child;
951 	} else {
952 		err = perf_session__write_header(session, rec->evlist, fd, false);
953 		if (err < 0)
954 			goto out_child;
955 	}
956 
957 	if (!rec->no_buildid
958 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
959 		pr_err("Couldn't generate buildids. "
960 		       "Use --no-buildid to profile anyway.\n");
961 		err = -1;
962 		goto out_child;
963 	}
964 
965 	err = record__synthesize(rec, false);
966 	if (err < 0)
967 		goto out_child;
968 
969 	if (rec->realtime_prio) {
970 		struct sched_param param;
971 
972 		param.sched_priority = rec->realtime_prio;
973 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
974 			pr_err("Could not set realtime priority.\n");
975 			err = -1;
976 			goto out_child;
977 		}
978 	}
979 
980 	/*
981 	 * When perf is starting the traced process, all the events
982 	 * (apart from group members) have enable_on_exec=1 set,
983 	 * so don't spoil it by prematurely enabling them.
984 	 */
985 	if (!target__none(&opts->target) && !opts->initial_delay)
986 		perf_evlist__enable(rec->evlist);
987 
988 	/*
989 	 * Let the child rip
990 	 */
991 	if (forks) {
992 		struct machine *machine = &session->machines.host;
993 		union perf_event *event;
994 		pid_t tgid;
995 
996 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
997 		if (event == NULL) {
998 			err = -ENOMEM;
999 			goto out_child;
1000 		}
1001 
1002 		/*
1003 		 * Some H/W events are generated before COMM event
1004 		 * which is emitted during exec(), so perf script
1005 		 * cannot see a correct process name for those events.
1006 		 * Synthesize COMM event to prevent it.
1007 		 */
1008 		tgid = perf_event__synthesize_comm(tool, event,
1009 						   rec->evlist->workload.pid,
1010 						   process_synthesized_event,
1011 						   machine);
1012 		free(event);
1013 
1014 		if (tgid == -1)
1015 			goto out_child;
1016 
1017 		event = malloc(sizeof(event->namespaces) +
1018 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1019 			       machine->id_hdr_size);
1020 		if (event == NULL) {
1021 			err = -ENOMEM;
1022 			goto out_child;
1023 		}
1024 
1025 		/*
1026 		 * Synthesize NAMESPACES event for the command specified.
1027 		 */
1028 		perf_event__synthesize_namespaces(tool, event,
1029 						  rec->evlist->workload.pid,
1030 						  tgid, process_synthesized_event,
1031 						  machine);
1032 		free(event);
1033 
1034 		perf_evlist__start_workload(rec->evlist);
1035 	}
1036 
1037 	if (opts->initial_delay) {
1038 		usleep(opts->initial_delay * USEC_PER_MSEC);
1039 		perf_evlist__enable(rec->evlist);
1040 	}
1041 
1042 	trigger_ready(&auxtrace_snapshot_trigger);
1043 	trigger_ready(&switch_output_trigger);
1044 	perf_hooks__invoke_record_start();
1045 	for (;;) {
1046 		unsigned long long hits = rec->samples;
1047 
1048 		/*
1049 		 * rec->evlist->bkw_mmap_state is possible to be
1050 		 * BKW_MMAP_EMPTY here: when done == true and
1051 		 * hits != rec->samples in previous round.
1052 		 *
1053 		 * perf_evlist__toggle_bkw_mmap ensure we never
1054 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1055 		 */
1056 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1057 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1058 
1059 		if (record__mmap_read_all(rec) < 0) {
1060 			trigger_error(&auxtrace_snapshot_trigger);
1061 			trigger_error(&switch_output_trigger);
1062 			err = -1;
1063 			goto out_child;
1064 		}
1065 
1066 		if (auxtrace_record__snapshot_started) {
1067 			auxtrace_record__snapshot_started = 0;
1068 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1069 				record__read_auxtrace_snapshot(rec);
1070 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1071 				pr_err("AUX area tracing snapshot failed\n");
1072 				err = -1;
1073 				goto out_child;
1074 			}
1075 		}
1076 
1077 		if (trigger_is_hit(&switch_output_trigger)) {
1078 			/*
1079 			 * If switch_output_trigger is hit, the data in
1080 			 * overwritable ring buffer should have been collected,
1081 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1082 			 *
1083 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1084 			 * record__mmap_read_all() didn't collect data from
1085 			 * overwritable ring buffer. Read again.
1086 			 */
1087 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1088 				continue;
1089 			trigger_ready(&switch_output_trigger);
1090 
1091 			/*
1092 			 * Reenable events in overwrite ring buffer after
1093 			 * record__mmap_read_all(): we should have collected
1094 			 * data from it.
1095 			 */
1096 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1097 
1098 			if (!quiet)
1099 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1100 					waking);
1101 			waking = 0;
1102 			fd = record__switch_output(rec, false);
1103 			if (fd < 0) {
1104 				pr_err("Failed to switch to new file\n");
1105 				trigger_error(&switch_output_trigger);
1106 				err = fd;
1107 				goto out_child;
1108 			}
1109 
1110 			/* re-arm the alarm */
1111 			if (rec->switch_output.time)
1112 				alarm(rec->switch_output.time);
1113 		}
1114 
1115 		if (hits == rec->samples) {
1116 			if (done || draining)
1117 				break;
1118 			err = perf_evlist__poll(rec->evlist, -1);
1119 			/*
1120 			 * Propagate error, only if there's any. Ignore positive
1121 			 * number of returned events and interrupt error.
1122 			 */
1123 			if (err > 0 || (err < 0 && errno == EINTR))
1124 				err = 0;
1125 			waking++;
1126 
1127 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1128 				draining = true;
1129 		}
1130 
1131 		/*
1132 		 * When perf is starting the traced process, at the end events
1133 		 * die with the process and we wait for that. Thus no need to
1134 		 * disable events in this case.
1135 		 */
1136 		if (done && !disabled && !target__none(&opts->target)) {
1137 			trigger_off(&auxtrace_snapshot_trigger);
1138 			perf_evlist__disable(rec->evlist);
1139 			disabled = true;
1140 		}
1141 	}
1142 	trigger_off(&auxtrace_snapshot_trigger);
1143 	trigger_off(&switch_output_trigger);
1144 
1145 	if (forks && workload_exec_errno) {
1146 		char msg[STRERR_BUFSIZE];
1147 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1148 		pr_err("Workload failed: %s\n", emsg);
1149 		err = -1;
1150 		goto out_child;
1151 	}
1152 
1153 	if (!quiet)
1154 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1155 
1156 	if (target__none(&rec->opts.target))
1157 		record__synthesize_workload(rec, true);
1158 
1159 out_child:
1160 	if (forks) {
1161 		int exit_status;
1162 
1163 		if (!child_finished)
1164 			kill(rec->evlist->workload.pid, SIGTERM);
1165 
1166 		wait(&exit_status);
1167 
1168 		if (err < 0)
1169 			status = err;
1170 		else if (WIFEXITED(exit_status))
1171 			status = WEXITSTATUS(exit_status);
1172 		else if (WIFSIGNALED(exit_status))
1173 			signr = WTERMSIG(exit_status);
1174 	} else
1175 		status = err;
1176 
1177 	record__synthesize(rec, true);
1178 	/* this will be recalculated during process_buildids() */
1179 	rec->samples = 0;
1180 
1181 	if (!err) {
1182 		if (!rec->timestamp_filename) {
1183 			record__finish_output(rec);
1184 		} else {
1185 			fd = record__switch_output(rec, true);
1186 			if (fd < 0) {
1187 				status = fd;
1188 				goto out_delete_session;
1189 			}
1190 		}
1191 	}
1192 
1193 	perf_hooks__invoke_record_end();
1194 
1195 	if (!err && !quiet) {
1196 		char samples[128];
1197 		const char *postfix = rec->timestamp_filename ?
1198 					".<timestamp>" : "";
1199 
1200 		if (rec->samples && !rec->opts.full_auxtrace)
1201 			scnprintf(samples, sizeof(samples),
1202 				  " (%" PRIu64 " samples)", rec->samples);
1203 		else
1204 			samples[0] = '\0';
1205 
1206 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1207 			perf_data__size(data) / 1024.0 / 1024.0,
1208 			data->file.path, postfix, samples);
1209 	}
1210 
1211 out_delete_session:
1212 	perf_session__delete(session);
1213 	return status;
1214 }
1215 
1216 static void callchain_debug(struct callchain_param *callchain)
1217 {
1218 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1219 
1220 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1221 
1222 	if (callchain->record_mode == CALLCHAIN_DWARF)
1223 		pr_debug("callchain: stack dump size %d\n",
1224 			 callchain->dump_size);
1225 }
1226 
1227 int record_opts__parse_callchain(struct record_opts *record,
1228 				 struct callchain_param *callchain,
1229 				 const char *arg, bool unset)
1230 {
1231 	int ret;
1232 	callchain->enabled = !unset;
1233 
1234 	/* --no-call-graph */
1235 	if (unset) {
1236 		callchain->record_mode = CALLCHAIN_NONE;
1237 		pr_debug("callchain: disabled\n");
1238 		return 0;
1239 	}
1240 
1241 	ret = parse_callchain_record_opt(arg, callchain);
1242 	if (!ret) {
1243 		/* Enable data address sampling for DWARF unwind. */
1244 		if (callchain->record_mode == CALLCHAIN_DWARF)
1245 			record->sample_address = true;
1246 		callchain_debug(callchain);
1247 	}
1248 
1249 	return ret;
1250 }
1251 
1252 int record_parse_callchain_opt(const struct option *opt,
1253 			       const char *arg,
1254 			       int unset)
1255 {
1256 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1257 }
1258 
1259 int record_callchain_opt(const struct option *opt,
1260 			 const char *arg __maybe_unused,
1261 			 int unset __maybe_unused)
1262 {
1263 	struct callchain_param *callchain = opt->value;
1264 
1265 	callchain->enabled = true;
1266 
1267 	if (callchain->record_mode == CALLCHAIN_NONE)
1268 		callchain->record_mode = CALLCHAIN_FP;
1269 
1270 	callchain_debug(callchain);
1271 	return 0;
1272 }
1273 
1274 static int perf_record_config(const char *var, const char *value, void *cb)
1275 {
1276 	struct record *rec = cb;
1277 
1278 	if (!strcmp(var, "record.build-id")) {
1279 		if (!strcmp(value, "cache"))
1280 			rec->no_buildid_cache = false;
1281 		else if (!strcmp(value, "no-cache"))
1282 			rec->no_buildid_cache = true;
1283 		else if (!strcmp(value, "skip"))
1284 			rec->no_buildid = true;
1285 		else
1286 			return -1;
1287 		return 0;
1288 	}
1289 	if (!strcmp(var, "record.call-graph")) {
1290 		var = "call-graph.record-mode";
1291 		return perf_default_config(var, value, cb);
1292 	}
1293 
1294 	return 0;
1295 }
1296 
1297 struct clockid_map {
1298 	const char *name;
1299 	int clockid;
1300 };
1301 
1302 #define CLOCKID_MAP(n, c)	\
1303 	{ .name = n, .clockid = (c), }
1304 
1305 #define CLOCKID_END	{ .name = NULL, }
1306 
1307 
1308 /*
1309  * Add the missing ones, we need to build on many distros...
1310  */
1311 #ifndef CLOCK_MONOTONIC_RAW
1312 #define CLOCK_MONOTONIC_RAW 4
1313 #endif
1314 #ifndef CLOCK_BOOTTIME
1315 #define CLOCK_BOOTTIME 7
1316 #endif
1317 #ifndef CLOCK_TAI
1318 #define CLOCK_TAI 11
1319 #endif
1320 
1321 static const struct clockid_map clockids[] = {
1322 	/* available for all events, NMI safe */
1323 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1324 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1325 
1326 	/* available for some events */
1327 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1328 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1329 	CLOCKID_MAP("tai", CLOCK_TAI),
1330 
1331 	/* available for the lazy */
1332 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1333 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1334 	CLOCKID_MAP("real", CLOCK_REALTIME),
1335 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1336 
1337 	CLOCKID_END,
1338 };
1339 
1340 static int parse_clockid(const struct option *opt, const char *str, int unset)
1341 {
1342 	struct record_opts *opts = (struct record_opts *)opt->value;
1343 	const struct clockid_map *cm;
1344 	const char *ostr = str;
1345 
1346 	if (unset) {
1347 		opts->use_clockid = 0;
1348 		return 0;
1349 	}
1350 
1351 	/* no arg passed */
1352 	if (!str)
1353 		return 0;
1354 
1355 	/* no setting it twice */
1356 	if (opts->use_clockid)
1357 		return -1;
1358 
1359 	opts->use_clockid = true;
1360 
1361 	/* if its a number, we're done */
1362 	if (sscanf(str, "%d", &opts->clockid) == 1)
1363 		return 0;
1364 
1365 	/* allow a "CLOCK_" prefix to the name */
1366 	if (!strncasecmp(str, "CLOCK_", 6))
1367 		str += 6;
1368 
1369 	for (cm = clockids; cm->name; cm++) {
1370 		if (!strcasecmp(str, cm->name)) {
1371 			opts->clockid = cm->clockid;
1372 			return 0;
1373 		}
1374 	}
1375 
1376 	opts->use_clockid = false;
1377 	ui__warning("unknown clockid %s, check man page\n", ostr);
1378 	return -1;
1379 }
1380 
1381 static int record__parse_mmap_pages(const struct option *opt,
1382 				    const char *str,
1383 				    int unset __maybe_unused)
1384 {
1385 	struct record_opts *opts = opt->value;
1386 	char *s, *p;
1387 	unsigned int mmap_pages;
1388 	int ret;
1389 
1390 	if (!str)
1391 		return -EINVAL;
1392 
1393 	s = strdup(str);
1394 	if (!s)
1395 		return -ENOMEM;
1396 
1397 	p = strchr(s, ',');
1398 	if (p)
1399 		*p = '\0';
1400 
1401 	if (*s) {
1402 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1403 		if (ret)
1404 			goto out_free;
1405 		opts->mmap_pages = mmap_pages;
1406 	}
1407 
1408 	if (!p) {
1409 		ret = 0;
1410 		goto out_free;
1411 	}
1412 
1413 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1414 	if (ret)
1415 		goto out_free;
1416 
1417 	opts->auxtrace_mmap_pages = mmap_pages;
1418 
1419 out_free:
1420 	free(s);
1421 	return ret;
1422 }
1423 
1424 static void switch_output_size_warn(struct record *rec)
1425 {
1426 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1427 	struct switch_output *s = &rec->switch_output;
1428 
1429 	wakeup_size /= 2;
1430 
1431 	if (s->size < wakeup_size) {
1432 		char buf[100];
1433 
1434 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1435 		pr_warning("WARNING: switch-output data size lower than "
1436 			   "wakeup kernel buffer size (%s) "
1437 			   "expect bigger perf.data sizes\n", buf);
1438 	}
1439 }
1440 
1441 static int switch_output_setup(struct record *rec)
1442 {
1443 	struct switch_output *s = &rec->switch_output;
1444 	static struct parse_tag tags_size[] = {
1445 		{ .tag  = 'B', .mult = 1       },
1446 		{ .tag  = 'K', .mult = 1 << 10 },
1447 		{ .tag  = 'M', .mult = 1 << 20 },
1448 		{ .tag  = 'G', .mult = 1 << 30 },
1449 		{ .tag  = 0 },
1450 	};
1451 	static struct parse_tag tags_time[] = {
1452 		{ .tag  = 's', .mult = 1        },
1453 		{ .tag  = 'm', .mult = 60       },
1454 		{ .tag  = 'h', .mult = 60*60    },
1455 		{ .tag  = 'd', .mult = 60*60*24 },
1456 		{ .tag  = 0 },
1457 	};
1458 	unsigned long val;
1459 
1460 	if (!s->set)
1461 		return 0;
1462 
1463 	if (!strcmp(s->str, "signal")) {
1464 		s->signal = true;
1465 		pr_debug("switch-output with SIGUSR2 signal\n");
1466 		goto enabled;
1467 	}
1468 
1469 	val = parse_tag_value(s->str, tags_size);
1470 	if (val != (unsigned long) -1) {
1471 		s->size = val;
1472 		pr_debug("switch-output with %s size threshold\n", s->str);
1473 		goto enabled;
1474 	}
1475 
1476 	val = parse_tag_value(s->str, tags_time);
1477 	if (val != (unsigned long) -1) {
1478 		s->time = val;
1479 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1480 			 s->str, s->time);
1481 		goto enabled;
1482 	}
1483 
1484 	return -1;
1485 
1486 enabled:
1487 	rec->timestamp_filename = true;
1488 	s->enabled              = true;
1489 
1490 	if (s->size && !rec->opts.no_buffering)
1491 		switch_output_size_warn(rec);
1492 
1493 	return 0;
1494 }
1495 
1496 static const char * const __record_usage[] = {
1497 	"perf record [<options>] [<command>]",
1498 	"perf record [<options>] -- <command> [<options>]",
1499 	NULL
1500 };
1501 const char * const *record_usage = __record_usage;
1502 
1503 /*
1504  * XXX Ideally would be local to cmd_record() and passed to a record__new
1505  * because we need to have access to it in record__exit, that is called
1506  * after cmd_record() exits, but since record_options need to be accessible to
1507  * builtin-script, leave it here.
1508  *
1509  * At least we don't ouch it in all the other functions here directly.
1510  *
1511  * Just say no to tons of global variables, sigh.
1512  */
1513 static struct record record = {
1514 	.opts = {
1515 		.sample_time	     = true,
1516 		.mmap_pages	     = UINT_MAX,
1517 		.user_freq	     = UINT_MAX,
1518 		.user_interval	     = ULLONG_MAX,
1519 		.freq		     = 4000,
1520 		.target		     = {
1521 			.uses_mmap   = true,
1522 			.default_per_cpu = true,
1523 		},
1524 		.proc_map_timeout     = 500,
1525 	},
1526 	.tool = {
1527 		.sample		= process_sample_event,
1528 		.fork		= perf_event__process_fork,
1529 		.exit		= perf_event__process_exit,
1530 		.comm		= perf_event__process_comm,
1531 		.namespaces	= perf_event__process_namespaces,
1532 		.mmap		= perf_event__process_mmap,
1533 		.mmap2		= perf_event__process_mmap2,
1534 		.ordered_events	= true,
1535 	},
1536 };
1537 
1538 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1539 	"\n\t\t\t\tDefault: fp";
1540 
1541 static bool dry_run;
1542 
1543 /*
1544  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1545  * with it and switch to use the library functions in perf_evlist that came
1546  * from builtin-record.c, i.e. use record_opts,
1547  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1548  * using pipes, etc.
1549  */
1550 static struct option __record_options[] = {
1551 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1552 		     "event selector. use 'perf list' to list available events",
1553 		     parse_events_option),
1554 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1555 		     "event filter", parse_filter),
1556 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1557 			   NULL, "don't record events from perf itself",
1558 			   exclude_perf),
1559 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1560 		    "record events on existing process id"),
1561 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1562 		    "record events on existing thread id"),
1563 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1564 		    "collect data with this RT SCHED_FIFO priority"),
1565 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1566 		    "collect data without buffering"),
1567 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1568 		    "collect raw sample records from all opened counters"),
1569 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1570 			    "system-wide collection from all CPUs"),
1571 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1572 		    "list of cpus to monitor"),
1573 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1574 	OPT_STRING('o', "output", &record.data.file.path, "file",
1575 		    "output file name"),
1576 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1577 			&record.opts.no_inherit_set,
1578 			"child tasks do not inherit counters"),
1579 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1580 		    "synthesize non-sample events at the end of output"),
1581 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1582 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1583 		    "Fail if the specified frequency can't be used"),
1584 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1585 		     "profile at this frequency",
1586 		      record__parse_freq),
1587 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1588 		     "number of mmap data pages and AUX area tracing mmap pages",
1589 		     record__parse_mmap_pages),
1590 	OPT_BOOLEAN(0, "group", &record.opts.group,
1591 		    "put the counters into a counter group"),
1592 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1593 			   NULL, "enables call-graph recording" ,
1594 			   &record_callchain_opt),
1595 	OPT_CALLBACK(0, "call-graph", &record.opts,
1596 		     "record_mode[,record_size]", record_callchain_help,
1597 		     &record_parse_callchain_opt),
1598 	OPT_INCR('v', "verbose", &verbose,
1599 		    "be more verbose (show counter open errors, etc)"),
1600 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1601 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1602 		    "per thread counts"),
1603 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1604 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1605 		    "Record the sample physical addresses"),
1606 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1607 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1608 			&record.opts.sample_time_set,
1609 			"Record the sample timestamps"),
1610 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1611 			"Record the sample period"),
1612 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1613 		    "don't sample"),
1614 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1615 			&record.no_buildid_cache_set,
1616 			"do not update the buildid cache"),
1617 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1618 			&record.no_buildid_set,
1619 			"do not collect buildids in perf.data"),
1620 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1621 		     "monitor event in cgroup name only",
1622 		     parse_cgroups),
1623 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1624 		  "ms to wait before starting measurement after program start"),
1625 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1626 		   "user to profile"),
1627 
1628 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1629 		     "branch any", "sample any taken branches",
1630 		     parse_branch_stack),
1631 
1632 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1633 		     "branch filter mask", "branch stack filter modes",
1634 		     parse_branch_stack),
1635 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1636 		    "sample by weight (on special events only)"),
1637 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1638 		    "sample transaction flags (special events only)"),
1639 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1640 		    "use per-thread mmaps"),
1641 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1642 		    "sample selected machine registers on interrupt,"
1643 		    " use -I ? to list register names", parse_regs),
1644 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1645 		    "sample selected machine registers on interrupt,"
1646 		    " use -I ? to list register names", parse_regs),
1647 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1648 		    "Record running/enabled time of read (:S) events"),
1649 	OPT_CALLBACK('k', "clockid", &record.opts,
1650 	"clockid", "clockid to use for events, see clock_gettime()",
1651 	parse_clockid),
1652 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1653 			  "opts", "AUX area tracing Snapshot Mode", ""),
1654 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1655 			"per thread proc mmap processing timeout in ms"),
1656 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1657 		    "Record namespaces events"),
1658 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1659 		    "Record context switch events"),
1660 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1661 			 "Configure all used events to run in kernel space.",
1662 			 PARSE_OPT_EXCLUSIVE),
1663 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1664 			 "Configure all used events to run in user space.",
1665 			 PARSE_OPT_EXCLUSIVE),
1666 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1667 		   "clang binary to use for compiling BPF scriptlets"),
1668 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1669 		   "options passed to clang when compiling BPF scriptlets"),
1670 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1671 		   "file", "vmlinux pathname"),
1672 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1673 		    "Record build-id of all DSOs regardless of hits"),
1674 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1675 		    "append timestamp to output filename"),
1676 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1677 		    "Record timestamp boundary (time of first/last samples)"),
1678 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1679 			  &record.switch_output.set, "signal,size,time",
1680 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1681 			  "signal"),
1682 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1683 		    "Parse options then exit"),
1684 	OPT_END()
1685 };
1686 
1687 struct option *record_options = __record_options;
1688 
1689 int cmd_record(int argc, const char **argv)
1690 {
1691 	int err;
1692 	struct record *rec = &record;
1693 	char errbuf[BUFSIZ];
1694 
1695 	setlocale(LC_ALL, "");
1696 
1697 #ifndef HAVE_LIBBPF_SUPPORT
1698 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1699 	set_nobuild('\0', "clang-path", true);
1700 	set_nobuild('\0', "clang-opt", true);
1701 # undef set_nobuild
1702 #endif
1703 
1704 #ifndef HAVE_BPF_PROLOGUE
1705 # if !defined (HAVE_DWARF_SUPPORT)
1706 #  define REASON  "NO_DWARF=1"
1707 # elif !defined (HAVE_LIBBPF_SUPPORT)
1708 #  define REASON  "NO_LIBBPF=1"
1709 # else
1710 #  define REASON  "this architecture doesn't support BPF prologue"
1711 # endif
1712 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1713 	set_nobuild('\0', "vmlinux", true);
1714 # undef set_nobuild
1715 # undef REASON
1716 #endif
1717 
1718 	rec->evlist = perf_evlist__new();
1719 	if (rec->evlist == NULL)
1720 		return -ENOMEM;
1721 
1722 	err = perf_config(perf_record_config, rec);
1723 	if (err)
1724 		return err;
1725 
1726 	argc = parse_options(argc, argv, record_options, record_usage,
1727 			    PARSE_OPT_STOP_AT_NON_OPTION);
1728 	if (quiet)
1729 		perf_quiet_option();
1730 
1731 	/* Make system wide (-a) the default target. */
1732 	if (!argc && target__none(&rec->opts.target))
1733 		rec->opts.target.system_wide = true;
1734 
1735 	if (nr_cgroups && !rec->opts.target.system_wide) {
1736 		usage_with_options_msg(record_usage, record_options,
1737 			"cgroup monitoring only available in system-wide mode");
1738 
1739 	}
1740 	if (rec->opts.record_switch_events &&
1741 	    !perf_can_record_switch_events()) {
1742 		ui__error("kernel does not support recording context switch events\n");
1743 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1744 		return -EINVAL;
1745 	}
1746 
1747 	if (switch_output_setup(rec)) {
1748 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1749 		return -EINVAL;
1750 	}
1751 
1752 	if (rec->switch_output.time) {
1753 		signal(SIGALRM, alarm_sig_handler);
1754 		alarm(rec->switch_output.time);
1755 	}
1756 
1757 	/*
1758 	 * Allow aliases to facilitate the lookup of symbols for address
1759 	 * filters. Refer to auxtrace_parse_filters().
1760 	 */
1761 	symbol_conf.allow_aliases = true;
1762 
1763 	symbol__init(NULL);
1764 
1765 	err = record__auxtrace_init(rec);
1766 	if (err)
1767 		goto out;
1768 
1769 	if (dry_run)
1770 		goto out;
1771 
1772 	err = bpf__setup_stdout(rec->evlist);
1773 	if (err) {
1774 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1775 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1776 			 errbuf);
1777 		goto out;
1778 	}
1779 
1780 	err = -ENOMEM;
1781 
1782 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1783 		pr_warning(
1784 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1785 "check /proc/sys/kernel/kptr_restrict.\n\n"
1786 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1787 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1788 "Samples in kernel modules won't be resolved at all.\n\n"
1789 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1790 "even with a suitable vmlinux or kallsyms file.\n\n");
1791 
1792 	if (rec->no_buildid_cache || rec->no_buildid) {
1793 		disable_buildid_cache();
1794 	} else if (rec->switch_output.enabled) {
1795 		/*
1796 		 * In 'perf record --switch-output', disable buildid
1797 		 * generation by default to reduce data file switching
1798 		 * overhead. Still generate buildid if they are required
1799 		 * explicitly using
1800 		 *
1801 		 *  perf record --switch-output --no-no-buildid \
1802 		 *              --no-no-buildid-cache
1803 		 *
1804 		 * Following code equals to:
1805 		 *
1806 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1807 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1808 		 *         disable_buildid_cache();
1809 		 */
1810 		bool disable = true;
1811 
1812 		if (rec->no_buildid_set && !rec->no_buildid)
1813 			disable = false;
1814 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1815 			disable = false;
1816 		if (disable) {
1817 			rec->no_buildid = true;
1818 			rec->no_buildid_cache = true;
1819 			disable_buildid_cache();
1820 		}
1821 	}
1822 
1823 	if (record.opts.overwrite)
1824 		record.opts.tail_synthesize = true;
1825 
1826 	if (rec->evlist->nr_entries == 0 &&
1827 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1828 		pr_err("Not enough memory for event selector list\n");
1829 		goto out;
1830 	}
1831 
1832 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1833 		rec->opts.no_inherit = true;
1834 
1835 	err = target__validate(&rec->opts.target);
1836 	if (err) {
1837 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1838 		ui__warning("%s\n", errbuf);
1839 	}
1840 
1841 	err = target__parse_uid(&rec->opts.target);
1842 	if (err) {
1843 		int saved_errno = errno;
1844 
1845 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1846 		ui__error("%s", errbuf);
1847 
1848 		err = -saved_errno;
1849 		goto out;
1850 	}
1851 
1852 	/* Enable ignoring missing threads when -u/-p option is defined. */
1853 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1854 
1855 	err = -ENOMEM;
1856 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1857 		usage_with_options(record_usage, record_options);
1858 
1859 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1860 	if (err)
1861 		goto out;
1862 
1863 	/*
1864 	 * We take all buildids when the file contains
1865 	 * AUX area tracing data because we do not decode the
1866 	 * trace because it would take too long.
1867 	 */
1868 	if (rec->opts.full_auxtrace)
1869 		rec->buildid_all = true;
1870 
1871 	if (record_opts__config(&rec->opts)) {
1872 		err = -EINVAL;
1873 		goto out;
1874 	}
1875 
1876 	err = __cmd_record(&record, argc, argv);
1877 out:
1878 	perf_evlist__delete(rec->evlist);
1879 	symbol__exit();
1880 	auxtrace_record__free(rec->itr);
1881 	return err;
1882 }
1883 
1884 static void snapshot_sig_handler(int sig __maybe_unused)
1885 {
1886 	struct record *rec = &record;
1887 
1888 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1889 		trigger_hit(&auxtrace_snapshot_trigger);
1890 		auxtrace_record__snapshot_started = 1;
1891 		if (auxtrace_record__snapshot_start(record.itr))
1892 			trigger_error(&auxtrace_snapshot_trigger);
1893 	}
1894 
1895 	if (switch_output_signal(rec))
1896 		trigger_hit(&switch_output_trigger);
1897 }
1898 
1899 static void alarm_sig_handler(int sig __maybe_unused)
1900 {
1901 	struct record *rec = &record;
1902 
1903 	if (switch_output_time(rec))
1904 		trigger_hit(&switch_output_trigger);
1905 }
1906