xref: /openbmc/linux/tools/perf/builtin-record.c (revision 584eab291c67894cb17cc87544b9d086228ea70f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <locale.h>
49 #include <poll.h>
50 #include <unistd.h>
51 #include <sched.h>
52 #include <signal.h>
53 #include <sys/mman.h>
54 #include <sys/wait.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data	data;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	bool			timestamp_boundary;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
109 static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
110 			 void *bf, size_t size)
111 {
112 	struct perf_data_file *file = &rec->session->data->file;
113 
114 	if (perf_data_file__write(file, bf, size) < 0) {
115 		pr_err("failed to write perf data, error: %m\n");
116 		return -1;
117 	}
118 
119 	rec->bytes_written += size;
120 
121 	if (switch_output_size(rec))
122 		trigger_hit(&switch_output_trigger);
123 
124 	return 0;
125 }
126 
127 static int process_synthesized_event(struct perf_tool *tool,
128 				     union perf_event *event,
129 				     struct perf_sample *sample __maybe_unused,
130 				     struct machine *machine __maybe_unused)
131 {
132 	struct record *rec = container_of(tool, struct record, tool);
133 	return record__write(rec, NULL, event, event->header.size);
134 }
135 
136 static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
137 {
138 	struct record *rec = to;
139 
140 	rec->samples++;
141 	return record__write(rec, map, bf, size);
142 }
143 
144 static volatile int done;
145 static volatile int signr = -1;
146 static volatile int child_finished;
147 
148 static void sig_handler(int sig)
149 {
150 	if (sig == SIGCHLD)
151 		child_finished = 1;
152 	else
153 		signr = sig;
154 
155 	done = 1;
156 }
157 
158 static void sigsegv_handler(int sig)
159 {
160 	perf_hooks__recover();
161 	sighandler_dump_stack(sig);
162 }
163 
164 static void record__sig_exit(void)
165 {
166 	if (signr == -1)
167 		return;
168 
169 	signal(signr, SIG_DFL);
170 	raise(signr);
171 }
172 
173 #ifdef HAVE_AUXTRACE_SUPPORT
174 
175 static int record__process_auxtrace(struct perf_tool *tool,
176 				    struct perf_mmap *map,
177 				    union perf_event *event, void *data1,
178 				    size_t len1, void *data2, size_t len2)
179 {
180 	struct record *rec = container_of(tool, struct record, tool);
181 	struct perf_data *data = &rec->data;
182 	size_t padding;
183 	u8 pad[8] = {0};
184 
185 	if (!perf_data__is_pipe(data)) {
186 		off_t file_offset;
187 		int fd = perf_data__fd(data);
188 		int err;
189 
190 		file_offset = lseek(fd, 0, SEEK_CUR);
191 		if (file_offset == -1)
192 			return -1;
193 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
194 						     event, file_offset);
195 		if (err)
196 			return err;
197 	}
198 
199 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
200 	padding = (len1 + len2) & 7;
201 	if (padding)
202 		padding = 8 - padding;
203 
204 	record__write(rec, map, event, event->header.size);
205 	record__write(rec, map, data1, len1);
206 	if (len2)
207 		record__write(rec, map, data2, len2);
208 	record__write(rec, map, &pad, padding);
209 
210 	return 0;
211 }
212 
213 static int record__auxtrace_mmap_read(struct record *rec,
214 				      struct perf_mmap *map)
215 {
216 	int ret;
217 
218 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
219 				  record__process_auxtrace);
220 	if (ret < 0)
221 		return ret;
222 
223 	if (ret)
224 		rec->samples++;
225 
226 	return 0;
227 }
228 
229 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
230 					       struct perf_mmap *map)
231 {
232 	int ret;
233 
234 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
235 					   record__process_auxtrace,
236 					   rec->opts.auxtrace_snapshot_size);
237 	if (ret < 0)
238 		return ret;
239 
240 	if (ret)
241 		rec->samples++;
242 
243 	return 0;
244 }
245 
246 static int record__auxtrace_read_snapshot_all(struct record *rec)
247 {
248 	int i;
249 	int rc = 0;
250 
251 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
252 		struct perf_mmap *map = &rec->evlist->mmap[i];
253 
254 		if (!map->auxtrace_mmap.base)
255 			continue;
256 
257 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
258 			rc = -1;
259 			goto out;
260 		}
261 	}
262 out:
263 	return rc;
264 }
265 
266 static void record__read_auxtrace_snapshot(struct record *rec)
267 {
268 	pr_debug("Recording AUX area tracing snapshot\n");
269 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
270 		trigger_error(&auxtrace_snapshot_trigger);
271 	} else {
272 		if (auxtrace_record__snapshot_finish(rec->itr))
273 			trigger_error(&auxtrace_snapshot_trigger);
274 		else
275 			trigger_ready(&auxtrace_snapshot_trigger);
276 	}
277 }
278 
279 static int record__auxtrace_init(struct record *rec)
280 {
281 	int err;
282 
283 	if (!rec->itr) {
284 		rec->itr = auxtrace_record__init(rec->evlist, &err);
285 		if (err)
286 			return err;
287 	}
288 
289 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
290 					      rec->opts.auxtrace_snapshot_opts);
291 	if (err)
292 		return err;
293 
294 	return auxtrace_parse_filters(rec->evlist);
295 }
296 
297 #else
298 
299 static inline
300 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
301 			       struct perf_mmap *map __maybe_unused)
302 {
303 	return 0;
304 }
305 
306 static inline
307 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
308 {
309 }
310 
311 static inline
312 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
313 {
314 	return 0;
315 }
316 
317 static int record__auxtrace_init(struct record *rec __maybe_unused)
318 {
319 	return 0;
320 }
321 
322 #endif
323 
324 static int record__mmap_evlist(struct record *rec,
325 			       struct perf_evlist *evlist)
326 {
327 	struct record_opts *opts = &rec->opts;
328 	char msg[512];
329 
330 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
331 				 opts->auxtrace_mmap_pages,
332 				 opts->auxtrace_snapshot_mode) < 0) {
333 		if (errno == EPERM) {
334 			pr_err("Permission error mapping pages.\n"
335 			       "Consider increasing "
336 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
337 			       "or try again with a smaller value of -m/--mmap_pages.\n"
338 			       "(current value: %u,%u)\n",
339 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
340 			return -errno;
341 		} else {
342 			pr_err("failed to mmap with %d (%s)\n", errno,
343 				str_error_r(errno, msg, sizeof(msg)));
344 			if (errno)
345 				return -errno;
346 			else
347 				return -EINVAL;
348 		}
349 	}
350 	return 0;
351 }
352 
353 static int record__mmap(struct record *rec)
354 {
355 	return record__mmap_evlist(rec, rec->evlist);
356 }
357 
358 static int record__open(struct record *rec)
359 {
360 	char msg[BUFSIZ];
361 	struct perf_evsel *pos;
362 	struct perf_evlist *evlist = rec->evlist;
363 	struct perf_session *session = rec->session;
364 	struct record_opts *opts = &rec->opts;
365 	struct perf_evsel_config_term *err_term;
366 	int rc = 0;
367 
368 	/*
369 	 * For initial_delay we need to add a dummy event so that we can track
370 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
371 	 * real events, the ones asked by the user.
372 	 */
373 	if (opts->initial_delay) {
374 		if (perf_evlist__add_dummy(evlist))
375 			return -ENOMEM;
376 
377 		pos = perf_evlist__first(evlist);
378 		pos->tracking = 0;
379 		pos = perf_evlist__last(evlist);
380 		pos->tracking = 1;
381 		pos->attr.enable_on_exec = 1;
382 	}
383 
384 	perf_evlist__config(evlist, opts, &callchain_param);
385 
386 	evlist__for_each_entry(evlist, pos) {
387 try_again:
388 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
389 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
390 				if (verbose > 0)
391 					ui__warning("%s\n", msg);
392 				goto try_again;
393 			}
394 			if ((errno == EINVAL || errno == EBADF) &&
395 			    pos->leader != pos &&
396 			    pos->weak_group) {
397 			        pos = perf_evlist__reset_weak_group(evlist, pos);
398 				goto try_again;
399 			}
400 			rc = -errno;
401 			perf_evsel__open_strerror(pos, &opts->target,
402 						  errno, msg, sizeof(msg));
403 			ui__error("%s\n", msg);
404 			goto out;
405 		}
406 
407 		pos->supported = true;
408 	}
409 
410 	if (perf_evlist__apply_filters(evlist, &pos)) {
411 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
412 			pos->filter, perf_evsel__name(pos), errno,
413 			str_error_r(errno, msg, sizeof(msg)));
414 		rc = -1;
415 		goto out;
416 	}
417 
418 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
419 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
420 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
421 		      str_error_r(errno, msg, sizeof(msg)));
422 		rc = -1;
423 		goto out;
424 	}
425 
426 	rc = record__mmap(rec);
427 	if (rc)
428 		goto out;
429 
430 	session->evlist = evlist;
431 	perf_session__set_id_hdr_size(session);
432 out:
433 	return rc;
434 }
435 
436 static int process_sample_event(struct perf_tool *tool,
437 				union perf_event *event,
438 				struct perf_sample *sample,
439 				struct perf_evsel *evsel,
440 				struct machine *machine)
441 {
442 	struct record *rec = container_of(tool, struct record, tool);
443 
444 	if (rec->evlist->first_sample_time == 0)
445 		rec->evlist->first_sample_time = sample->time;
446 
447 	rec->evlist->last_sample_time = sample->time;
448 
449 	if (rec->buildid_all)
450 		return 0;
451 
452 	rec->samples++;
453 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
454 }
455 
456 static int process_buildids(struct record *rec)
457 {
458 	struct perf_data *data = &rec->data;
459 	struct perf_session *session = rec->session;
460 
461 	if (data->size == 0)
462 		return 0;
463 
464 	/*
465 	 * During this process, it'll load kernel map and replace the
466 	 * dso->long_name to a real pathname it found.  In this case
467 	 * we prefer the vmlinux path like
468 	 *   /lib/modules/3.16.4/build/vmlinux
469 	 *
470 	 * rather than build-id path (in debug directory).
471 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
472 	 */
473 	symbol_conf.ignore_vmlinux_buildid = true;
474 
475 	/*
476 	 * If --buildid-all is given, it marks all DSO regardless of hits,
477 	 * so no need to process samples. But if timestamp_boundary is enabled,
478 	 * it still needs to walk on all samples to get the timestamps of
479 	 * first/last samples.
480 	 */
481 	if (rec->buildid_all && !rec->timestamp_boundary)
482 		rec->tool.sample = NULL;
483 
484 	return perf_session__process_events(session);
485 }
486 
487 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
488 {
489 	int err;
490 	struct perf_tool *tool = data;
491 	/*
492 	 *As for guest kernel when processing subcommand record&report,
493 	 *we arrange module mmap prior to guest kernel mmap and trigger
494 	 *a preload dso because default guest module symbols are loaded
495 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
496 	 *method is used to avoid symbol missing when the first addr is
497 	 *in module instead of in guest kernel.
498 	 */
499 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
500 					     machine);
501 	if (err < 0)
502 		pr_err("Couldn't record guest kernel [%d]'s reference"
503 		       " relocation symbol.\n", machine->pid);
504 
505 	/*
506 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
507 	 * have no _text sometimes.
508 	 */
509 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
510 						 machine);
511 	if (err < 0)
512 		pr_err("Couldn't record guest kernel [%d]'s reference"
513 		       " relocation symbol.\n", machine->pid);
514 }
515 
516 static struct perf_event_header finished_round_event = {
517 	.size = sizeof(struct perf_event_header),
518 	.type = PERF_RECORD_FINISHED_ROUND,
519 };
520 
521 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
522 				    bool overwrite)
523 {
524 	u64 bytes_written = rec->bytes_written;
525 	int i;
526 	int rc = 0;
527 	struct perf_mmap *maps;
528 
529 	if (!evlist)
530 		return 0;
531 
532 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
533 	if (!maps)
534 		return 0;
535 
536 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
537 		return 0;
538 
539 	for (i = 0; i < evlist->nr_mmaps; i++) {
540 		struct perf_mmap *map = &maps[i];
541 
542 		if (map->base) {
543 			if (perf_mmap__push(map, rec, record__pushfn) != 0) {
544 				rc = -1;
545 				goto out;
546 			}
547 		}
548 
549 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
550 		    record__auxtrace_mmap_read(rec, map) != 0) {
551 			rc = -1;
552 			goto out;
553 		}
554 	}
555 
556 	/*
557 	 * Mark the round finished in case we wrote
558 	 * at least one event.
559 	 */
560 	if (bytes_written != rec->bytes_written)
561 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
562 
563 	if (overwrite)
564 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
565 out:
566 	return rc;
567 }
568 
569 static int record__mmap_read_all(struct record *rec)
570 {
571 	int err;
572 
573 	err = record__mmap_read_evlist(rec, rec->evlist, false);
574 	if (err)
575 		return err;
576 
577 	return record__mmap_read_evlist(rec, rec->evlist, true);
578 }
579 
580 static void record__init_features(struct record *rec)
581 {
582 	struct perf_session *session = rec->session;
583 	int feat;
584 
585 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
586 		perf_header__set_feat(&session->header, feat);
587 
588 	if (rec->no_buildid)
589 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
590 
591 	if (!have_tracepoints(&rec->evlist->entries))
592 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
593 
594 	if (!rec->opts.branch_stack)
595 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
596 
597 	if (!rec->opts.full_auxtrace)
598 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
599 
600 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
601 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
602 
603 	perf_header__clear_feat(&session->header, HEADER_STAT);
604 }
605 
606 static void
607 record__finish_output(struct record *rec)
608 {
609 	struct perf_data *data = &rec->data;
610 	int fd = perf_data__fd(data);
611 
612 	if (data->is_pipe)
613 		return;
614 
615 	rec->session->header.data_size += rec->bytes_written;
616 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
617 
618 	if (!rec->no_buildid) {
619 		process_buildids(rec);
620 
621 		if (rec->buildid_all)
622 			dsos__hit_all(rec->session);
623 	}
624 	perf_session__write_header(rec->session, rec->evlist, fd, true);
625 
626 	return;
627 }
628 
629 static int record__synthesize_workload(struct record *rec, bool tail)
630 {
631 	int err;
632 	struct thread_map *thread_map;
633 
634 	if (rec->opts.tail_synthesize != tail)
635 		return 0;
636 
637 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
638 	if (thread_map == NULL)
639 		return -1;
640 
641 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
642 						 process_synthesized_event,
643 						 &rec->session->machines.host,
644 						 rec->opts.sample_address,
645 						 rec->opts.proc_map_timeout);
646 	thread_map__put(thread_map);
647 	return err;
648 }
649 
650 static int record__synthesize(struct record *rec, bool tail);
651 
652 static int
653 record__switch_output(struct record *rec, bool at_exit)
654 {
655 	struct perf_data *data = &rec->data;
656 	int fd, err;
657 
658 	/* Same Size:      "2015122520103046"*/
659 	char timestamp[] = "InvalidTimestamp";
660 
661 	record__synthesize(rec, true);
662 	if (target__none(&rec->opts.target))
663 		record__synthesize_workload(rec, true);
664 
665 	rec->samples = 0;
666 	record__finish_output(rec);
667 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
668 	if (err) {
669 		pr_err("Failed to get current timestamp\n");
670 		return -EINVAL;
671 	}
672 
673 	fd = perf_data__switch(data, timestamp,
674 				    rec->session->header.data_offset,
675 				    at_exit);
676 	if (fd >= 0 && !at_exit) {
677 		rec->bytes_written = 0;
678 		rec->session->header.data_size = 0;
679 	}
680 
681 	if (!quiet)
682 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
683 			data->file.path, timestamp);
684 
685 	/* Output tracking events */
686 	if (!at_exit) {
687 		record__synthesize(rec, false);
688 
689 		/*
690 		 * In 'perf record --switch-output' without -a,
691 		 * record__synthesize() in record__switch_output() won't
692 		 * generate tracking events because there's no thread_map
693 		 * in evlist. Which causes newly created perf.data doesn't
694 		 * contain map and comm information.
695 		 * Create a fake thread_map and directly call
696 		 * perf_event__synthesize_thread_map() for those events.
697 		 */
698 		if (target__none(&rec->opts.target))
699 			record__synthesize_workload(rec, false);
700 	}
701 	return fd;
702 }
703 
704 static volatile int workload_exec_errno;
705 
706 /*
707  * perf_evlist__prepare_workload will send a SIGUSR1
708  * if the fork fails, since we asked by setting its
709  * want_signal to true.
710  */
711 static void workload_exec_failed_signal(int signo __maybe_unused,
712 					siginfo_t *info,
713 					void *ucontext __maybe_unused)
714 {
715 	workload_exec_errno = info->si_value.sival_int;
716 	done = 1;
717 	child_finished = 1;
718 }
719 
720 static void snapshot_sig_handler(int sig);
721 static void alarm_sig_handler(int sig);
722 
723 int __weak
724 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
725 			    struct perf_tool *tool __maybe_unused,
726 			    perf_event__handler_t process __maybe_unused,
727 			    struct machine *machine __maybe_unused)
728 {
729 	return 0;
730 }
731 
732 static const struct perf_event_mmap_page *
733 perf_evlist__pick_pc(struct perf_evlist *evlist)
734 {
735 	if (evlist) {
736 		if (evlist->mmap && evlist->mmap[0].base)
737 			return evlist->mmap[0].base;
738 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
739 			return evlist->overwrite_mmap[0].base;
740 	}
741 	return NULL;
742 }
743 
744 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
745 {
746 	const struct perf_event_mmap_page *pc;
747 
748 	pc = perf_evlist__pick_pc(rec->evlist);
749 	if (pc)
750 		return pc;
751 	return NULL;
752 }
753 
754 static int record__synthesize(struct record *rec, bool tail)
755 {
756 	struct perf_session *session = rec->session;
757 	struct machine *machine = &session->machines.host;
758 	struct perf_data *data = &rec->data;
759 	struct record_opts *opts = &rec->opts;
760 	struct perf_tool *tool = &rec->tool;
761 	int fd = perf_data__fd(data);
762 	int err = 0;
763 
764 	if (rec->opts.tail_synthesize != tail)
765 		return 0;
766 
767 	if (data->is_pipe) {
768 		/*
769 		 * We need to synthesize events first, because some
770 		 * features works on top of them (on report side).
771 		 */
772 		err = perf_event__synthesize_attrs(tool, rec->evlist,
773 						   process_synthesized_event);
774 		if (err < 0) {
775 			pr_err("Couldn't synthesize attrs.\n");
776 			goto out;
777 		}
778 
779 		err = perf_event__synthesize_features(tool, session, rec->evlist,
780 						      process_synthesized_event);
781 		if (err < 0) {
782 			pr_err("Couldn't synthesize features.\n");
783 			return err;
784 		}
785 
786 		if (have_tracepoints(&rec->evlist->entries)) {
787 			/*
788 			 * FIXME err <= 0 here actually means that
789 			 * there were no tracepoints so its not really
790 			 * an error, just that we don't need to
791 			 * synthesize anything.  We really have to
792 			 * return this more properly and also
793 			 * propagate errors that now are calling die()
794 			 */
795 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
796 								  process_synthesized_event);
797 			if (err <= 0) {
798 				pr_err("Couldn't record tracing data.\n");
799 				goto out;
800 			}
801 			rec->bytes_written += err;
802 		}
803 	}
804 
805 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
806 					  process_synthesized_event, machine);
807 	if (err)
808 		goto out;
809 
810 	if (rec->opts.full_auxtrace) {
811 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
812 					session, process_synthesized_event);
813 		if (err)
814 			goto out;
815 	}
816 
817 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
818 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
819 							 machine);
820 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
821 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
822 				   "Check /proc/kallsyms permission or run as root.\n");
823 
824 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
825 						     machine);
826 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
827 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
828 				   "Check /proc/modules permission or run as root.\n");
829 	}
830 
831 	if (perf_guest) {
832 		machines__process_guests(&session->machines,
833 					 perf_event__synthesize_guest_os, tool);
834 	}
835 
836 	err = perf_event__synthesize_extra_attr(&rec->tool,
837 						rec->evlist,
838 						process_synthesized_event,
839 						data->is_pipe);
840 	if (err)
841 		goto out;
842 
843 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
844 						 process_synthesized_event,
845 						NULL);
846 	if (err < 0) {
847 		pr_err("Couldn't synthesize thread map.\n");
848 		return err;
849 	}
850 
851 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
852 					     process_synthesized_event, NULL);
853 	if (err < 0) {
854 		pr_err("Couldn't synthesize cpu map.\n");
855 		return err;
856 	}
857 
858 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
859 					    process_synthesized_event, opts->sample_address,
860 					    opts->proc_map_timeout, 1);
861 out:
862 	return err;
863 }
864 
865 static int __cmd_record(struct record *rec, int argc, const char **argv)
866 {
867 	int err;
868 	int status = 0;
869 	unsigned long waking = 0;
870 	const bool forks = argc > 0;
871 	struct perf_tool *tool = &rec->tool;
872 	struct record_opts *opts = &rec->opts;
873 	struct perf_data *data = &rec->data;
874 	struct perf_session *session;
875 	bool disabled = false, draining = false;
876 	int fd;
877 
878 	atexit(record__sig_exit);
879 	signal(SIGCHLD, sig_handler);
880 	signal(SIGINT, sig_handler);
881 	signal(SIGTERM, sig_handler);
882 	signal(SIGSEGV, sigsegv_handler);
883 
884 	if (rec->opts.record_namespaces)
885 		tool->namespace_events = true;
886 
887 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
888 		signal(SIGUSR2, snapshot_sig_handler);
889 		if (rec->opts.auxtrace_snapshot_mode)
890 			trigger_on(&auxtrace_snapshot_trigger);
891 		if (rec->switch_output.enabled)
892 			trigger_on(&switch_output_trigger);
893 	} else {
894 		signal(SIGUSR2, SIG_IGN);
895 	}
896 
897 	session = perf_session__new(data, false, tool);
898 	if (session == NULL) {
899 		pr_err("Perf session creation failed.\n");
900 		return -1;
901 	}
902 
903 	fd = perf_data__fd(data);
904 	rec->session = session;
905 
906 	record__init_features(rec);
907 
908 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
909 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
910 
911 	if (forks) {
912 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
913 						    argv, data->is_pipe,
914 						    workload_exec_failed_signal);
915 		if (err < 0) {
916 			pr_err("Couldn't run the workload!\n");
917 			status = err;
918 			goto out_delete_session;
919 		}
920 	}
921 
922 	/*
923 	 * If we have just single event and are sending data
924 	 * through pipe, we need to force the ids allocation,
925 	 * because we synthesize event name through the pipe
926 	 * and need the id for that.
927 	 */
928 	if (data->is_pipe && rec->evlist->nr_entries == 1)
929 		rec->opts.sample_id = true;
930 
931 	if (record__open(rec) != 0) {
932 		err = -1;
933 		goto out_child;
934 	}
935 
936 	err = bpf__apply_obj_config();
937 	if (err) {
938 		char errbuf[BUFSIZ];
939 
940 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
941 		pr_err("ERROR: Apply config to BPF failed: %s\n",
942 			 errbuf);
943 		goto out_child;
944 	}
945 
946 	/*
947 	 * Normally perf_session__new would do this, but it doesn't have the
948 	 * evlist.
949 	 */
950 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
951 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
952 		rec->tool.ordered_events = false;
953 	}
954 
955 	if (!rec->evlist->nr_groups)
956 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
957 
958 	if (data->is_pipe) {
959 		err = perf_header__write_pipe(fd);
960 		if (err < 0)
961 			goto out_child;
962 	} else {
963 		err = perf_session__write_header(session, rec->evlist, fd, false);
964 		if (err < 0)
965 			goto out_child;
966 	}
967 
968 	if (!rec->no_buildid
969 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
970 		pr_err("Couldn't generate buildids. "
971 		       "Use --no-buildid to profile anyway.\n");
972 		err = -1;
973 		goto out_child;
974 	}
975 
976 	err = record__synthesize(rec, false);
977 	if (err < 0)
978 		goto out_child;
979 
980 	if (rec->realtime_prio) {
981 		struct sched_param param;
982 
983 		param.sched_priority = rec->realtime_prio;
984 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
985 			pr_err("Could not set realtime priority.\n");
986 			err = -1;
987 			goto out_child;
988 		}
989 	}
990 
991 	/*
992 	 * When perf is starting the traced process, all the events
993 	 * (apart from group members) have enable_on_exec=1 set,
994 	 * so don't spoil it by prematurely enabling them.
995 	 */
996 	if (!target__none(&opts->target) && !opts->initial_delay)
997 		perf_evlist__enable(rec->evlist);
998 
999 	/*
1000 	 * Let the child rip
1001 	 */
1002 	if (forks) {
1003 		struct machine *machine = &session->machines.host;
1004 		union perf_event *event;
1005 		pid_t tgid;
1006 
1007 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1008 		if (event == NULL) {
1009 			err = -ENOMEM;
1010 			goto out_child;
1011 		}
1012 
1013 		/*
1014 		 * Some H/W events are generated before COMM event
1015 		 * which is emitted during exec(), so perf script
1016 		 * cannot see a correct process name for those events.
1017 		 * Synthesize COMM event to prevent it.
1018 		 */
1019 		tgid = perf_event__synthesize_comm(tool, event,
1020 						   rec->evlist->workload.pid,
1021 						   process_synthesized_event,
1022 						   machine);
1023 		free(event);
1024 
1025 		if (tgid == -1)
1026 			goto out_child;
1027 
1028 		event = malloc(sizeof(event->namespaces) +
1029 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1030 			       machine->id_hdr_size);
1031 		if (event == NULL) {
1032 			err = -ENOMEM;
1033 			goto out_child;
1034 		}
1035 
1036 		/*
1037 		 * Synthesize NAMESPACES event for the command specified.
1038 		 */
1039 		perf_event__synthesize_namespaces(tool, event,
1040 						  rec->evlist->workload.pid,
1041 						  tgid, process_synthesized_event,
1042 						  machine);
1043 		free(event);
1044 
1045 		perf_evlist__start_workload(rec->evlist);
1046 	}
1047 
1048 	if (opts->initial_delay) {
1049 		usleep(opts->initial_delay * USEC_PER_MSEC);
1050 		perf_evlist__enable(rec->evlist);
1051 	}
1052 
1053 	trigger_ready(&auxtrace_snapshot_trigger);
1054 	trigger_ready(&switch_output_trigger);
1055 	perf_hooks__invoke_record_start();
1056 	for (;;) {
1057 		unsigned long long hits = rec->samples;
1058 
1059 		/*
1060 		 * rec->evlist->bkw_mmap_state is possible to be
1061 		 * BKW_MMAP_EMPTY here: when done == true and
1062 		 * hits != rec->samples in previous round.
1063 		 *
1064 		 * perf_evlist__toggle_bkw_mmap ensure we never
1065 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1066 		 */
1067 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1068 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1069 
1070 		if (record__mmap_read_all(rec) < 0) {
1071 			trigger_error(&auxtrace_snapshot_trigger);
1072 			trigger_error(&switch_output_trigger);
1073 			err = -1;
1074 			goto out_child;
1075 		}
1076 
1077 		if (auxtrace_record__snapshot_started) {
1078 			auxtrace_record__snapshot_started = 0;
1079 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1080 				record__read_auxtrace_snapshot(rec);
1081 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1082 				pr_err("AUX area tracing snapshot failed\n");
1083 				err = -1;
1084 				goto out_child;
1085 			}
1086 		}
1087 
1088 		if (trigger_is_hit(&switch_output_trigger)) {
1089 			/*
1090 			 * If switch_output_trigger is hit, the data in
1091 			 * overwritable ring buffer should have been collected,
1092 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1093 			 *
1094 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1095 			 * record__mmap_read_all() didn't collect data from
1096 			 * overwritable ring buffer. Read again.
1097 			 */
1098 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1099 				continue;
1100 			trigger_ready(&switch_output_trigger);
1101 
1102 			/*
1103 			 * Reenable events in overwrite ring buffer after
1104 			 * record__mmap_read_all(): we should have collected
1105 			 * data from it.
1106 			 */
1107 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1108 
1109 			if (!quiet)
1110 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1111 					waking);
1112 			waking = 0;
1113 			fd = record__switch_output(rec, false);
1114 			if (fd < 0) {
1115 				pr_err("Failed to switch to new file\n");
1116 				trigger_error(&switch_output_trigger);
1117 				err = fd;
1118 				goto out_child;
1119 			}
1120 
1121 			/* re-arm the alarm */
1122 			if (rec->switch_output.time)
1123 				alarm(rec->switch_output.time);
1124 		}
1125 
1126 		if (hits == rec->samples) {
1127 			if (done || draining)
1128 				break;
1129 			err = perf_evlist__poll(rec->evlist, -1);
1130 			/*
1131 			 * Propagate error, only if there's any. Ignore positive
1132 			 * number of returned events and interrupt error.
1133 			 */
1134 			if (err > 0 || (err < 0 && errno == EINTR))
1135 				err = 0;
1136 			waking++;
1137 
1138 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1139 				draining = true;
1140 		}
1141 
1142 		/*
1143 		 * When perf is starting the traced process, at the end events
1144 		 * die with the process and we wait for that. Thus no need to
1145 		 * disable events in this case.
1146 		 */
1147 		if (done && !disabled && !target__none(&opts->target)) {
1148 			trigger_off(&auxtrace_snapshot_trigger);
1149 			perf_evlist__disable(rec->evlist);
1150 			disabled = true;
1151 		}
1152 	}
1153 	trigger_off(&auxtrace_snapshot_trigger);
1154 	trigger_off(&switch_output_trigger);
1155 
1156 	if (forks && workload_exec_errno) {
1157 		char msg[STRERR_BUFSIZE];
1158 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1159 		pr_err("Workload failed: %s\n", emsg);
1160 		err = -1;
1161 		goto out_child;
1162 	}
1163 
1164 	if (!quiet)
1165 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1166 
1167 	if (target__none(&rec->opts.target))
1168 		record__synthesize_workload(rec, true);
1169 
1170 out_child:
1171 	if (forks) {
1172 		int exit_status;
1173 
1174 		if (!child_finished)
1175 			kill(rec->evlist->workload.pid, SIGTERM);
1176 
1177 		wait(&exit_status);
1178 
1179 		if (err < 0)
1180 			status = err;
1181 		else if (WIFEXITED(exit_status))
1182 			status = WEXITSTATUS(exit_status);
1183 		else if (WIFSIGNALED(exit_status))
1184 			signr = WTERMSIG(exit_status);
1185 	} else
1186 		status = err;
1187 
1188 	record__synthesize(rec, true);
1189 	/* this will be recalculated during process_buildids() */
1190 	rec->samples = 0;
1191 
1192 	if (!err) {
1193 		if (!rec->timestamp_filename) {
1194 			record__finish_output(rec);
1195 		} else {
1196 			fd = record__switch_output(rec, true);
1197 			if (fd < 0) {
1198 				status = fd;
1199 				goto out_delete_session;
1200 			}
1201 		}
1202 	}
1203 
1204 	perf_hooks__invoke_record_end();
1205 
1206 	if (!err && !quiet) {
1207 		char samples[128];
1208 		const char *postfix = rec->timestamp_filename ?
1209 					".<timestamp>" : "";
1210 
1211 		if (rec->samples && !rec->opts.full_auxtrace)
1212 			scnprintf(samples, sizeof(samples),
1213 				  " (%" PRIu64 " samples)", rec->samples);
1214 		else
1215 			samples[0] = '\0';
1216 
1217 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1218 			perf_data__size(data) / 1024.0 / 1024.0,
1219 			data->file.path, postfix, samples);
1220 	}
1221 
1222 out_delete_session:
1223 	perf_session__delete(session);
1224 	return status;
1225 }
1226 
1227 static void callchain_debug(struct callchain_param *callchain)
1228 {
1229 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1230 
1231 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1232 
1233 	if (callchain->record_mode == CALLCHAIN_DWARF)
1234 		pr_debug("callchain: stack dump size %d\n",
1235 			 callchain->dump_size);
1236 }
1237 
1238 int record_opts__parse_callchain(struct record_opts *record,
1239 				 struct callchain_param *callchain,
1240 				 const char *arg, bool unset)
1241 {
1242 	int ret;
1243 	callchain->enabled = !unset;
1244 
1245 	/* --no-call-graph */
1246 	if (unset) {
1247 		callchain->record_mode = CALLCHAIN_NONE;
1248 		pr_debug("callchain: disabled\n");
1249 		return 0;
1250 	}
1251 
1252 	ret = parse_callchain_record_opt(arg, callchain);
1253 	if (!ret) {
1254 		/* Enable data address sampling for DWARF unwind. */
1255 		if (callchain->record_mode == CALLCHAIN_DWARF)
1256 			record->sample_address = true;
1257 		callchain_debug(callchain);
1258 	}
1259 
1260 	return ret;
1261 }
1262 
1263 int record_parse_callchain_opt(const struct option *opt,
1264 			       const char *arg,
1265 			       int unset)
1266 {
1267 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1268 }
1269 
1270 int record_callchain_opt(const struct option *opt,
1271 			 const char *arg __maybe_unused,
1272 			 int unset __maybe_unused)
1273 {
1274 	struct callchain_param *callchain = opt->value;
1275 
1276 	callchain->enabled = true;
1277 
1278 	if (callchain->record_mode == CALLCHAIN_NONE)
1279 		callchain->record_mode = CALLCHAIN_FP;
1280 
1281 	callchain_debug(callchain);
1282 	return 0;
1283 }
1284 
1285 static int perf_record_config(const char *var, const char *value, void *cb)
1286 {
1287 	struct record *rec = cb;
1288 
1289 	if (!strcmp(var, "record.build-id")) {
1290 		if (!strcmp(value, "cache"))
1291 			rec->no_buildid_cache = false;
1292 		else if (!strcmp(value, "no-cache"))
1293 			rec->no_buildid_cache = true;
1294 		else if (!strcmp(value, "skip"))
1295 			rec->no_buildid = true;
1296 		else
1297 			return -1;
1298 		return 0;
1299 	}
1300 	if (!strcmp(var, "record.call-graph")) {
1301 		var = "call-graph.record-mode";
1302 		return perf_default_config(var, value, cb);
1303 	}
1304 
1305 	return 0;
1306 }
1307 
1308 struct clockid_map {
1309 	const char *name;
1310 	int clockid;
1311 };
1312 
1313 #define CLOCKID_MAP(n, c)	\
1314 	{ .name = n, .clockid = (c), }
1315 
1316 #define CLOCKID_END	{ .name = NULL, }
1317 
1318 
1319 /*
1320  * Add the missing ones, we need to build on many distros...
1321  */
1322 #ifndef CLOCK_MONOTONIC_RAW
1323 #define CLOCK_MONOTONIC_RAW 4
1324 #endif
1325 #ifndef CLOCK_BOOTTIME
1326 #define CLOCK_BOOTTIME 7
1327 #endif
1328 #ifndef CLOCK_TAI
1329 #define CLOCK_TAI 11
1330 #endif
1331 
1332 static const struct clockid_map clockids[] = {
1333 	/* available for all events, NMI safe */
1334 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1335 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1336 
1337 	/* available for some events */
1338 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1339 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1340 	CLOCKID_MAP("tai", CLOCK_TAI),
1341 
1342 	/* available for the lazy */
1343 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1344 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1345 	CLOCKID_MAP("real", CLOCK_REALTIME),
1346 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1347 
1348 	CLOCKID_END,
1349 };
1350 
1351 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1352 {
1353 	struct timespec res;
1354 
1355 	*res_ns = 0;
1356 	if (!clock_getres(clk_id, &res))
1357 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1358 	else
1359 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1360 
1361 	return 0;
1362 }
1363 
1364 static int parse_clockid(const struct option *opt, const char *str, int unset)
1365 {
1366 	struct record_opts *opts = (struct record_opts *)opt->value;
1367 	const struct clockid_map *cm;
1368 	const char *ostr = str;
1369 
1370 	if (unset) {
1371 		opts->use_clockid = 0;
1372 		return 0;
1373 	}
1374 
1375 	/* no arg passed */
1376 	if (!str)
1377 		return 0;
1378 
1379 	/* no setting it twice */
1380 	if (opts->use_clockid)
1381 		return -1;
1382 
1383 	opts->use_clockid = true;
1384 
1385 	/* if its a number, we're done */
1386 	if (sscanf(str, "%d", &opts->clockid) == 1)
1387 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1388 
1389 	/* allow a "CLOCK_" prefix to the name */
1390 	if (!strncasecmp(str, "CLOCK_", 6))
1391 		str += 6;
1392 
1393 	for (cm = clockids; cm->name; cm++) {
1394 		if (!strcasecmp(str, cm->name)) {
1395 			opts->clockid = cm->clockid;
1396 			return get_clockid_res(opts->clockid,
1397 					       &opts->clockid_res_ns);
1398 		}
1399 	}
1400 
1401 	opts->use_clockid = false;
1402 	ui__warning("unknown clockid %s, check man page\n", ostr);
1403 	return -1;
1404 }
1405 
1406 static int record__parse_mmap_pages(const struct option *opt,
1407 				    const char *str,
1408 				    int unset __maybe_unused)
1409 {
1410 	struct record_opts *opts = opt->value;
1411 	char *s, *p;
1412 	unsigned int mmap_pages;
1413 	int ret;
1414 
1415 	if (!str)
1416 		return -EINVAL;
1417 
1418 	s = strdup(str);
1419 	if (!s)
1420 		return -ENOMEM;
1421 
1422 	p = strchr(s, ',');
1423 	if (p)
1424 		*p = '\0';
1425 
1426 	if (*s) {
1427 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1428 		if (ret)
1429 			goto out_free;
1430 		opts->mmap_pages = mmap_pages;
1431 	}
1432 
1433 	if (!p) {
1434 		ret = 0;
1435 		goto out_free;
1436 	}
1437 
1438 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1439 	if (ret)
1440 		goto out_free;
1441 
1442 	opts->auxtrace_mmap_pages = mmap_pages;
1443 
1444 out_free:
1445 	free(s);
1446 	return ret;
1447 }
1448 
1449 static void switch_output_size_warn(struct record *rec)
1450 {
1451 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1452 	struct switch_output *s = &rec->switch_output;
1453 
1454 	wakeup_size /= 2;
1455 
1456 	if (s->size < wakeup_size) {
1457 		char buf[100];
1458 
1459 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1460 		pr_warning("WARNING: switch-output data size lower than "
1461 			   "wakeup kernel buffer size (%s) "
1462 			   "expect bigger perf.data sizes\n", buf);
1463 	}
1464 }
1465 
1466 static int switch_output_setup(struct record *rec)
1467 {
1468 	struct switch_output *s = &rec->switch_output;
1469 	static struct parse_tag tags_size[] = {
1470 		{ .tag  = 'B', .mult = 1       },
1471 		{ .tag  = 'K', .mult = 1 << 10 },
1472 		{ .tag  = 'M', .mult = 1 << 20 },
1473 		{ .tag  = 'G', .mult = 1 << 30 },
1474 		{ .tag  = 0 },
1475 	};
1476 	static struct parse_tag tags_time[] = {
1477 		{ .tag  = 's', .mult = 1        },
1478 		{ .tag  = 'm', .mult = 60       },
1479 		{ .tag  = 'h', .mult = 60*60    },
1480 		{ .tag  = 'd', .mult = 60*60*24 },
1481 		{ .tag  = 0 },
1482 	};
1483 	unsigned long val;
1484 
1485 	if (!s->set)
1486 		return 0;
1487 
1488 	if (!strcmp(s->str, "signal")) {
1489 		s->signal = true;
1490 		pr_debug("switch-output with SIGUSR2 signal\n");
1491 		goto enabled;
1492 	}
1493 
1494 	val = parse_tag_value(s->str, tags_size);
1495 	if (val != (unsigned long) -1) {
1496 		s->size = val;
1497 		pr_debug("switch-output with %s size threshold\n", s->str);
1498 		goto enabled;
1499 	}
1500 
1501 	val = parse_tag_value(s->str, tags_time);
1502 	if (val != (unsigned long) -1) {
1503 		s->time = val;
1504 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1505 			 s->str, s->time);
1506 		goto enabled;
1507 	}
1508 
1509 	return -1;
1510 
1511 enabled:
1512 	rec->timestamp_filename = true;
1513 	s->enabled              = true;
1514 
1515 	if (s->size && !rec->opts.no_buffering)
1516 		switch_output_size_warn(rec);
1517 
1518 	return 0;
1519 }
1520 
1521 static const char * const __record_usage[] = {
1522 	"perf record [<options>] [<command>]",
1523 	"perf record [<options>] -- <command> [<options>]",
1524 	NULL
1525 };
1526 const char * const *record_usage = __record_usage;
1527 
1528 /*
1529  * XXX Ideally would be local to cmd_record() and passed to a record__new
1530  * because we need to have access to it in record__exit, that is called
1531  * after cmd_record() exits, but since record_options need to be accessible to
1532  * builtin-script, leave it here.
1533  *
1534  * At least we don't ouch it in all the other functions here directly.
1535  *
1536  * Just say no to tons of global variables, sigh.
1537  */
1538 static struct record record = {
1539 	.opts = {
1540 		.sample_time	     = true,
1541 		.mmap_pages	     = UINT_MAX,
1542 		.user_freq	     = UINT_MAX,
1543 		.user_interval	     = ULLONG_MAX,
1544 		.freq		     = 4000,
1545 		.target		     = {
1546 			.uses_mmap   = true,
1547 			.default_per_cpu = true,
1548 		},
1549 		.proc_map_timeout     = 500,
1550 	},
1551 	.tool = {
1552 		.sample		= process_sample_event,
1553 		.fork		= perf_event__process_fork,
1554 		.exit		= perf_event__process_exit,
1555 		.comm		= perf_event__process_comm,
1556 		.namespaces	= perf_event__process_namespaces,
1557 		.mmap		= perf_event__process_mmap,
1558 		.mmap2		= perf_event__process_mmap2,
1559 		.ordered_events	= true,
1560 	},
1561 };
1562 
1563 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1564 	"\n\t\t\t\tDefault: fp";
1565 
1566 static bool dry_run;
1567 
1568 /*
1569  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1570  * with it and switch to use the library functions in perf_evlist that came
1571  * from builtin-record.c, i.e. use record_opts,
1572  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1573  * using pipes, etc.
1574  */
1575 static struct option __record_options[] = {
1576 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1577 		     "event selector. use 'perf list' to list available events",
1578 		     parse_events_option),
1579 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1580 		     "event filter", parse_filter),
1581 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1582 			   NULL, "don't record events from perf itself",
1583 			   exclude_perf),
1584 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1585 		    "record events on existing process id"),
1586 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1587 		    "record events on existing thread id"),
1588 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1589 		    "collect data with this RT SCHED_FIFO priority"),
1590 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1591 		    "collect data without buffering"),
1592 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1593 		    "collect raw sample records from all opened counters"),
1594 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1595 			    "system-wide collection from all CPUs"),
1596 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1597 		    "list of cpus to monitor"),
1598 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1599 	OPT_STRING('o', "output", &record.data.file.path, "file",
1600 		    "output file name"),
1601 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1602 			&record.opts.no_inherit_set,
1603 			"child tasks do not inherit counters"),
1604 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1605 		    "synthesize non-sample events at the end of output"),
1606 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1607 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1608 		    "Fail if the specified frequency can't be used"),
1609 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1610 		     "profile at this frequency",
1611 		      record__parse_freq),
1612 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1613 		     "number of mmap data pages and AUX area tracing mmap pages",
1614 		     record__parse_mmap_pages),
1615 	OPT_BOOLEAN(0, "group", &record.opts.group,
1616 		    "put the counters into a counter group"),
1617 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1618 			   NULL, "enables call-graph recording" ,
1619 			   &record_callchain_opt),
1620 	OPT_CALLBACK(0, "call-graph", &record.opts,
1621 		     "record_mode[,record_size]", record_callchain_help,
1622 		     &record_parse_callchain_opt),
1623 	OPT_INCR('v', "verbose", &verbose,
1624 		    "be more verbose (show counter open errors, etc)"),
1625 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1626 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1627 		    "per thread counts"),
1628 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1629 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1630 		    "Record the sample physical addresses"),
1631 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1632 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1633 			&record.opts.sample_time_set,
1634 			"Record the sample timestamps"),
1635 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1636 			"Record the sample period"),
1637 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1638 		    "don't sample"),
1639 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1640 			&record.no_buildid_cache_set,
1641 			"do not update the buildid cache"),
1642 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1643 			&record.no_buildid_set,
1644 			"do not collect buildids in perf.data"),
1645 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1646 		     "monitor event in cgroup name only",
1647 		     parse_cgroups),
1648 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1649 		  "ms to wait before starting measurement after program start"),
1650 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1651 		   "user to profile"),
1652 
1653 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1654 		     "branch any", "sample any taken branches",
1655 		     parse_branch_stack),
1656 
1657 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1658 		     "branch filter mask", "branch stack filter modes",
1659 		     parse_branch_stack),
1660 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1661 		    "sample by weight (on special events only)"),
1662 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1663 		    "sample transaction flags (special events only)"),
1664 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1665 		    "use per-thread mmaps"),
1666 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1667 		    "sample selected machine registers on interrupt,"
1668 		    " use -I ? to list register names", parse_regs),
1669 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1670 		    "sample selected machine registers on interrupt,"
1671 		    " use -I ? to list register names", parse_regs),
1672 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1673 		    "Record running/enabled time of read (:S) events"),
1674 	OPT_CALLBACK('k', "clockid", &record.opts,
1675 	"clockid", "clockid to use for events, see clock_gettime()",
1676 	parse_clockid),
1677 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1678 			  "opts", "AUX area tracing Snapshot Mode", ""),
1679 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1680 			"per thread proc mmap processing timeout in ms"),
1681 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1682 		    "Record namespaces events"),
1683 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1684 		    "Record context switch events"),
1685 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1686 			 "Configure all used events to run in kernel space.",
1687 			 PARSE_OPT_EXCLUSIVE),
1688 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1689 			 "Configure all used events to run in user space.",
1690 			 PARSE_OPT_EXCLUSIVE),
1691 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1692 		   "clang binary to use for compiling BPF scriptlets"),
1693 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1694 		   "options passed to clang when compiling BPF scriptlets"),
1695 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1696 		   "file", "vmlinux pathname"),
1697 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1698 		    "Record build-id of all DSOs regardless of hits"),
1699 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1700 		    "append timestamp to output filename"),
1701 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1702 		    "Record timestamp boundary (time of first/last samples)"),
1703 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1704 			  &record.switch_output.set, "signal,size,time",
1705 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1706 			  "signal"),
1707 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1708 		    "Parse options then exit"),
1709 	OPT_END()
1710 };
1711 
1712 struct option *record_options = __record_options;
1713 
1714 int cmd_record(int argc, const char **argv)
1715 {
1716 	int err;
1717 	struct record *rec = &record;
1718 	char errbuf[BUFSIZ];
1719 
1720 	setlocale(LC_ALL, "");
1721 
1722 #ifndef HAVE_LIBBPF_SUPPORT
1723 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1724 	set_nobuild('\0', "clang-path", true);
1725 	set_nobuild('\0', "clang-opt", true);
1726 # undef set_nobuild
1727 #endif
1728 
1729 #ifndef HAVE_BPF_PROLOGUE
1730 # if !defined (HAVE_DWARF_SUPPORT)
1731 #  define REASON  "NO_DWARF=1"
1732 # elif !defined (HAVE_LIBBPF_SUPPORT)
1733 #  define REASON  "NO_LIBBPF=1"
1734 # else
1735 #  define REASON  "this architecture doesn't support BPF prologue"
1736 # endif
1737 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1738 	set_nobuild('\0', "vmlinux", true);
1739 # undef set_nobuild
1740 # undef REASON
1741 #endif
1742 
1743 	rec->evlist = perf_evlist__new();
1744 	if (rec->evlist == NULL)
1745 		return -ENOMEM;
1746 
1747 	err = perf_config(perf_record_config, rec);
1748 	if (err)
1749 		return err;
1750 
1751 	argc = parse_options(argc, argv, record_options, record_usage,
1752 			    PARSE_OPT_STOP_AT_NON_OPTION);
1753 	if (quiet)
1754 		perf_quiet_option();
1755 
1756 	/* Make system wide (-a) the default target. */
1757 	if (!argc && target__none(&rec->opts.target))
1758 		rec->opts.target.system_wide = true;
1759 
1760 	if (nr_cgroups && !rec->opts.target.system_wide) {
1761 		usage_with_options_msg(record_usage, record_options,
1762 			"cgroup monitoring only available in system-wide mode");
1763 
1764 	}
1765 	if (rec->opts.record_switch_events &&
1766 	    !perf_can_record_switch_events()) {
1767 		ui__error("kernel does not support recording context switch events\n");
1768 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1769 		return -EINVAL;
1770 	}
1771 
1772 	if (switch_output_setup(rec)) {
1773 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1774 		return -EINVAL;
1775 	}
1776 
1777 	if (rec->switch_output.time) {
1778 		signal(SIGALRM, alarm_sig_handler);
1779 		alarm(rec->switch_output.time);
1780 	}
1781 
1782 	/*
1783 	 * Allow aliases to facilitate the lookup of symbols for address
1784 	 * filters. Refer to auxtrace_parse_filters().
1785 	 */
1786 	symbol_conf.allow_aliases = true;
1787 
1788 	symbol__init(NULL);
1789 
1790 	err = record__auxtrace_init(rec);
1791 	if (err)
1792 		goto out;
1793 
1794 	if (dry_run)
1795 		goto out;
1796 
1797 	err = bpf__setup_stdout(rec->evlist);
1798 	if (err) {
1799 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1800 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1801 			 errbuf);
1802 		goto out;
1803 	}
1804 
1805 	err = -ENOMEM;
1806 
1807 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1808 		pr_warning(
1809 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1810 "check /proc/sys/kernel/kptr_restrict.\n\n"
1811 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1812 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1813 "Samples in kernel modules won't be resolved at all.\n\n"
1814 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1815 "even with a suitable vmlinux or kallsyms file.\n\n");
1816 
1817 	if (rec->no_buildid_cache || rec->no_buildid) {
1818 		disable_buildid_cache();
1819 	} else if (rec->switch_output.enabled) {
1820 		/*
1821 		 * In 'perf record --switch-output', disable buildid
1822 		 * generation by default to reduce data file switching
1823 		 * overhead. Still generate buildid if they are required
1824 		 * explicitly using
1825 		 *
1826 		 *  perf record --switch-output --no-no-buildid \
1827 		 *              --no-no-buildid-cache
1828 		 *
1829 		 * Following code equals to:
1830 		 *
1831 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1832 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1833 		 *         disable_buildid_cache();
1834 		 */
1835 		bool disable = true;
1836 
1837 		if (rec->no_buildid_set && !rec->no_buildid)
1838 			disable = false;
1839 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1840 			disable = false;
1841 		if (disable) {
1842 			rec->no_buildid = true;
1843 			rec->no_buildid_cache = true;
1844 			disable_buildid_cache();
1845 		}
1846 	}
1847 
1848 	if (record.opts.overwrite)
1849 		record.opts.tail_synthesize = true;
1850 
1851 	if (rec->evlist->nr_entries == 0 &&
1852 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1853 		pr_err("Not enough memory for event selector list\n");
1854 		goto out;
1855 	}
1856 
1857 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1858 		rec->opts.no_inherit = true;
1859 
1860 	err = target__validate(&rec->opts.target);
1861 	if (err) {
1862 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1863 		ui__warning("%s\n", errbuf);
1864 	}
1865 
1866 	err = target__parse_uid(&rec->opts.target);
1867 	if (err) {
1868 		int saved_errno = errno;
1869 
1870 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1871 		ui__error("%s", errbuf);
1872 
1873 		err = -saved_errno;
1874 		goto out;
1875 	}
1876 
1877 	/* Enable ignoring missing threads when -u/-p option is defined. */
1878 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1879 
1880 	err = -ENOMEM;
1881 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1882 		usage_with_options(record_usage, record_options);
1883 
1884 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1885 	if (err)
1886 		goto out;
1887 
1888 	/*
1889 	 * We take all buildids when the file contains
1890 	 * AUX area tracing data because we do not decode the
1891 	 * trace because it would take too long.
1892 	 */
1893 	if (rec->opts.full_auxtrace)
1894 		rec->buildid_all = true;
1895 
1896 	if (record_opts__config(&rec->opts)) {
1897 		err = -EINVAL;
1898 		goto out;
1899 	}
1900 
1901 	err = __cmd_record(&record, argc, argv);
1902 out:
1903 	perf_evlist__delete(rec->evlist);
1904 	symbol__exit();
1905 	auxtrace_record__free(rec->itr);
1906 	return err;
1907 }
1908 
1909 static void snapshot_sig_handler(int sig __maybe_unused)
1910 {
1911 	struct record *rec = &record;
1912 
1913 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1914 		trigger_hit(&auxtrace_snapshot_trigger);
1915 		auxtrace_record__snapshot_started = 1;
1916 		if (auxtrace_record__snapshot_start(record.itr))
1917 			trigger_error(&auxtrace_snapshot_trigger);
1918 	}
1919 
1920 	if (switch_output_signal(rec))
1921 		trigger_hit(&switch_output_trigger);
1922 }
1923 
1924 static void alarm_sig_handler(int sig __maybe_unused)
1925 {
1926 	struct record *rec = &record;
1927 
1928 	if (switch_output_time(rec))
1929 		trigger_hit(&switch_output_trigger);
1930 }
1931