xref: /openbmc/linux/tools/perf/builtin-record.c (revision ba61bb17)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <locale.h>
49 #include <poll.h>
50 #include <unistd.h>
51 #include <sched.h>
52 #include <signal.h>
53 #include <sys/mman.h>
54 #include <sys/wait.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data	data;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	bool			timestamp_boundary;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
109 static int record__write(struct record *rec, void *bf, size_t size)
110 {
111 	if (perf_data__write(rec->session->data, bf, size) < 0) {
112 		pr_err("failed to write perf data, error: %m\n");
113 		return -1;
114 	}
115 
116 	rec->bytes_written += size;
117 
118 	if (switch_output_size(rec))
119 		trigger_hit(&switch_output_trigger);
120 
121 	return 0;
122 }
123 
124 static int process_synthesized_event(struct perf_tool *tool,
125 				     union perf_event *event,
126 				     struct perf_sample *sample __maybe_unused,
127 				     struct machine *machine __maybe_unused)
128 {
129 	struct record *rec = container_of(tool, struct record, tool);
130 	return record__write(rec, event, event->header.size);
131 }
132 
133 static int record__pushfn(void *to, void *bf, size_t size)
134 {
135 	struct record *rec = to;
136 
137 	rec->samples++;
138 	return record__write(rec, bf, size);
139 }
140 
141 static volatile int done;
142 static volatile int signr = -1;
143 static volatile int child_finished;
144 
145 static void sig_handler(int sig)
146 {
147 	if (sig == SIGCHLD)
148 		child_finished = 1;
149 	else
150 		signr = sig;
151 
152 	done = 1;
153 }
154 
155 static void sigsegv_handler(int sig)
156 {
157 	perf_hooks__recover();
158 	sighandler_dump_stack(sig);
159 }
160 
161 static void record__sig_exit(void)
162 {
163 	if (signr == -1)
164 		return;
165 
166 	signal(signr, SIG_DFL);
167 	raise(signr);
168 }
169 
170 #ifdef HAVE_AUXTRACE_SUPPORT
171 
172 static int record__process_auxtrace(struct perf_tool *tool,
173 				    union perf_event *event, void *data1,
174 				    size_t len1, void *data2, size_t len2)
175 {
176 	struct record *rec = container_of(tool, struct record, tool);
177 	struct perf_data *data = &rec->data;
178 	size_t padding;
179 	u8 pad[8] = {0};
180 
181 	if (!perf_data__is_pipe(data)) {
182 		off_t file_offset;
183 		int fd = perf_data__fd(data);
184 		int err;
185 
186 		file_offset = lseek(fd, 0, SEEK_CUR);
187 		if (file_offset == -1)
188 			return -1;
189 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
190 						     event, file_offset);
191 		if (err)
192 			return err;
193 	}
194 
195 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
196 	padding = (len1 + len2) & 7;
197 	if (padding)
198 		padding = 8 - padding;
199 
200 	record__write(rec, event, event->header.size);
201 	record__write(rec, data1, len1);
202 	if (len2)
203 		record__write(rec, data2, len2);
204 	record__write(rec, &pad, padding);
205 
206 	return 0;
207 }
208 
209 static int record__auxtrace_mmap_read(struct record *rec,
210 				      struct auxtrace_mmap *mm)
211 {
212 	int ret;
213 
214 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
215 				  record__process_auxtrace);
216 	if (ret < 0)
217 		return ret;
218 
219 	if (ret)
220 		rec->samples++;
221 
222 	return 0;
223 }
224 
225 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
226 					       struct auxtrace_mmap *mm)
227 {
228 	int ret;
229 
230 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
231 					   record__process_auxtrace,
232 					   rec->opts.auxtrace_snapshot_size);
233 	if (ret < 0)
234 		return ret;
235 
236 	if (ret)
237 		rec->samples++;
238 
239 	return 0;
240 }
241 
242 static int record__auxtrace_read_snapshot_all(struct record *rec)
243 {
244 	int i;
245 	int rc = 0;
246 
247 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
248 		struct auxtrace_mmap *mm =
249 				&rec->evlist->mmap[i].auxtrace_mmap;
250 
251 		if (!mm->base)
252 			continue;
253 
254 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
255 			rc = -1;
256 			goto out;
257 		}
258 	}
259 out:
260 	return rc;
261 }
262 
263 static void record__read_auxtrace_snapshot(struct record *rec)
264 {
265 	pr_debug("Recording AUX area tracing snapshot\n");
266 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
267 		trigger_error(&auxtrace_snapshot_trigger);
268 	} else {
269 		if (auxtrace_record__snapshot_finish(rec->itr))
270 			trigger_error(&auxtrace_snapshot_trigger);
271 		else
272 			trigger_ready(&auxtrace_snapshot_trigger);
273 	}
274 }
275 
276 static int record__auxtrace_init(struct record *rec)
277 {
278 	int err;
279 
280 	if (!rec->itr) {
281 		rec->itr = auxtrace_record__init(rec->evlist, &err);
282 		if (err)
283 			return err;
284 	}
285 
286 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
287 					      rec->opts.auxtrace_snapshot_opts);
288 	if (err)
289 		return err;
290 
291 	return auxtrace_parse_filters(rec->evlist);
292 }
293 
294 #else
295 
296 static inline
297 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
298 			       struct auxtrace_mmap *mm __maybe_unused)
299 {
300 	return 0;
301 }
302 
303 static inline
304 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
305 {
306 }
307 
308 static inline
309 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
310 {
311 	return 0;
312 }
313 
314 static int record__auxtrace_init(struct record *rec __maybe_unused)
315 {
316 	return 0;
317 }
318 
319 #endif
320 
321 static int record__mmap_evlist(struct record *rec,
322 			       struct perf_evlist *evlist)
323 {
324 	struct record_opts *opts = &rec->opts;
325 	char msg[512];
326 
327 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
328 				 opts->auxtrace_mmap_pages,
329 				 opts->auxtrace_snapshot_mode) < 0) {
330 		if (errno == EPERM) {
331 			pr_err("Permission error mapping pages.\n"
332 			       "Consider increasing "
333 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
334 			       "or try again with a smaller value of -m/--mmap_pages.\n"
335 			       "(current value: %u,%u)\n",
336 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
337 			return -errno;
338 		} else {
339 			pr_err("failed to mmap with %d (%s)\n", errno,
340 				str_error_r(errno, msg, sizeof(msg)));
341 			if (errno)
342 				return -errno;
343 			else
344 				return -EINVAL;
345 		}
346 	}
347 	return 0;
348 }
349 
350 static int record__mmap(struct record *rec)
351 {
352 	return record__mmap_evlist(rec, rec->evlist);
353 }
354 
355 static int record__open(struct record *rec)
356 {
357 	char msg[BUFSIZ];
358 	struct perf_evsel *pos;
359 	struct perf_evlist *evlist = rec->evlist;
360 	struct perf_session *session = rec->session;
361 	struct record_opts *opts = &rec->opts;
362 	struct perf_evsel_config_term *err_term;
363 	int rc = 0;
364 
365 	/*
366 	 * For initial_delay we need to add a dummy event so that we can track
367 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
368 	 * real events, the ones asked by the user.
369 	 */
370 	if (opts->initial_delay) {
371 		if (perf_evlist__add_dummy(evlist))
372 			return -ENOMEM;
373 
374 		pos = perf_evlist__first(evlist);
375 		pos->tracking = 0;
376 		pos = perf_evlist__last(evlist);
377 		pos->tracking = 1;
378 		pos->attr.enable_on_exec = 1;
379 	}
380 
381 	perf_evlist__config(evlist, opts, &callchain_param);
382 
383 	evlist__for_each_entry(evlist, pos) {
384 try_again:
385 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
386 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
387 				if (verbose > 0)
388 					ui__warning("%s\n", msg);
389 				goto try_again;
390 			}
391 
392 			rc = -errno;
393 			perf_evsel__open_strerror(pos, &opts->target,
394 						  errno, msg, sizeof(msg));
395 			ui__error("%s\n", msg);
396 			goto out;
397 		}
398 
399 		pos->supported = true;
400 	}
401 
402 	if (perf_evlist__apply_filters(evlist, &pos)) {
403 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
404 			pos->filter, perf_evsel__name(pos), errno,
405 			str_error_r(errno, msg, sizeof(msg)));
406 		rc = -1;
407 		goto out;
408 	}
409 
410 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
411 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
412 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
413 		      str_error_r(errno, msg, sizeof(msg)));
414 		rc = -1;
415 		goto out;
416 	}
417 
418 	rc = record__mmap(rec);
419 	if (rc)
420 		goto out;
421 
422 	session->evlist = evlist;
423 	perf_session__set_id_hdr_size(session);
424 out:
425 	return rc;
426 }
427 
428 static int process_sample_event(struct perf_tool *tool,
429 				union perf_event *event,
430 				struct perf_sample *sample,
431 				struct perf_evsel *evsel,
432 				struct machine *machine)
433 {
434 	struct record *rec = container_of(tool, struct record, tool);
435 
436 	if (rec->evlist->first_sample_time == 0)
437 		rec->evlist->first_sample_time = sample->time;
438 
439 	rec->evlist->last_sample_time = sample->time;
440 
441 	if (rec->buildid_all)
442 		return 0;
443 
444 	rec->samples++;
445 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
446 }
447 
448 static int process_buildids(struct record *rec)
449 {
450 	struct perf_data *data = &rec->data;
451 	struct perf_session *session = rec->session;
452 
453 	if (data->size == 0)
454 		return 0;
455 
456 	/*
457 	 * During this process, it'll load kernel map and replace the
458 	 * dso->long_name to a real pathname it found.  In this case
459 	 * we prefer the vmlinux path like
460 	 *   /lib/modules/3.16.4/build/vmlinux
461 	 *
462 	 * rather than build-id path (in debug directory).
463 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
464 	 */
465 	symbol_conf.ignore_vmlinux_buildid = true;
466 
467 	/*
468 	 * If --buildid-all is given, it marks all DSO regardless of hits,
469 	 * so no need to process samples. But if timestamp_boundary is enabled,
470 	 * it still needs to walk on all samples to get the timestamps of
471 	 * first/last samples.
472 	 */
473 	if (rec->buildid_all && !rec->timestamp_boundary)
474 		rec->tool.sample = NULL;
475 
476 	return perf_session__process_events(session);
477 }
478 
479 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
480 {
481 	int err;
482 	struct perf_tool *tool = data;
483 	/*
484 	 *As for guest kernel when processing subcommand record&report,
485 	 *we arrange module mmap prior to guest kernel mmap and trigger
486 	 *a preload dso because default guest module symbols are loaded
487 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
488 	 *method is used to avoid symbol missing when the first addr is
489 	 *in module instead of in guest kernel.
490 	 */
491 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
492 					     machine);
493 	if (err < 0)
494 		pr_err("Couldn't record guest kernel [%d]'s reference"
495 		       " relocation symbol.\n", machine->pid);
496 
497 	/*
498 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
499 	 * have no _text sometimes.
500 	 */
501 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
502 						 machine);
503 	if (err < 0)
504 		pr_err("Couldn't record guest kernel [%d]'s reference"
505 		       " relocation symbol.\n", machine->pid);
506 }
507 
508 static struct perf_event_header finished_round_event = {
509 	.size = sizeof(struct perf_event_header),
510 	.type = PERF_RECORD_FINISHED_ROUND,
511 };
512 
513 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
514 				    bool overwrite)
515 {
516 	u64 bytes_written = rec->bytes_written;
517 	int i;
518 	int rc = 0;
519 	struct perf_mmap *maps;
520 
521 	if (!evlist)
522 		return 0;
523 
524 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
525 	if (!maps)
526 		return 0;
527 
528 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
529 		return 0;
530 
531 	for (i = 0; i < evlist->nr_mmaps; i++) {
532 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
533 
534 		if (maps[i].base) {
535 			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
536 				rc = -1;
537 				goto out;
538 			}
539 		}
540 
541 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
542 		    record__auxtrace_mmap_read(rec, mm) != 0) {
543 			rc = -1;
544 			goto out;
545 		}
546 	}
547 
548 	/*
549 	 * Mark the round finished in case we wrote
550 	 * at least one event.
551 	 */
552 	if (bytes_written != rec->bytes_written)
553 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
554 
555 	if (overwrite)
556 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
557 out:
558 	return rc;
559 }
560 
561 static int record__mmap_read_all(struct record *rec)
562 {
563 	int err;
564 
565 	err = record__mmap_read_evlist(rec, rec->evlist, false);
566 	if (err)
567 		return err;
568 
569 	return record__mmap_read_evlist(rec, rec->evlist, true);
570 }
571 
572 static void record__init_features(struct record *rec)
573 {
574 	struct perf_session *session = rec->session;
575 	int feat;
576 
577 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
578 		perf_header__set_feat(&session->header, feat);
579 
580 	if (rec->no_buildid)
581 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
582 
583 	if (!have_tracepoints(&rec->evlist->entries))
584 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
585 
586 	if (!rec->opts.branch_stack)
587 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
588 
589 	if (!rec->opts.full_auxtrace)
590 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
591 
592 	perf_header__clear_feat(&session->header, HEADER_STAT);
593 }
594 
595 static void
596 record__finish_output(struct record *rec)
597 {
598 	struct perf_data *data = &rec->data;
599 	int fd = perf_data__fd(data);
600 
601 	if (data->is_pipe)
602 		return;
603 
604 	rec->session->header.data_size += rec->bytes_written;
605 	data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
606 
607 	if (!rec->no_buildid) {
608 		process_buildids(rec);
609 
610 		if (rec->buildid_all)
611 			dsos__hit_all(rec->session);
612 	}
613 	perf_session__write_header(rec->session, rec->evlist, fd, true);
614 
615 	return;
616 }
617 
618 static int record__synthesize_workload(struct record *rec, bool tail)
619 {
620 	int err;
621 	struct thread_map *thread_map;
622 
623 	if (rec->opts.tail_synthesize != tail)
624 		return 0;
625 
626 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
627 	if (thread_map == NULL)
628 		return -1;
629 
630 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
631 						 process_synthesized_event,
632 						 &rec->session->machines.host,
633 						 rec->opts.sample_address,
634 						 rec->opts.proc_map_timeout);
635 	thread_map__put(thread_map);
636 	return err;
637 }
638 
639 static int record__synthesize(struct record *rec, bool tail);
640 
641 static int
642 record__switch_output(struct record *rec, bool at_exit)
643 {
644 	struct perf_data *data = &rec->data;
645 	int fd, err;
646 
647 	/* Same Size:      "2015122520103046"*/
648 	char timestamp[] = "InvalidTimestamp";
649 
650 	record__synthesize(rec, true);
651 	if (target__none(&rec->opts.target))
652 		record__synthesize_workload(rec, true);
653 
654 	rec->samples = 0;
655 	record__finish_output(rec);
656 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
657 	if (err) {
658 		pr_err("Failed to get current timestamp\n");
659 		return -EINVAL;
660 	}
661 
662 	fd = perf_data__switch(data, timestamp,
663 				    rec->session->header.data_offset,
664 				    at_exit);
665 	if (fd >= 0 && !at_exit) {
666 		rec->bytes_written = 0;
667 		rec->session->header.data_size = 0;
668 	}
669 
670 	if (!quiet)
671 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
672 			data->file.path, timestamp);
673 
674 	/* Output tracking events */
675 	if (!at_exit) {
676 		record__synthesize(rec, false);
677 
678 		/*
679 		 * In 'perf record --switch-output' without -a,
680 		 * record__synthesize() in record__switch_output() won't
681 		 * generate tracking events because there's no thread_map
682 		 * in evlist. Which causes newly created perf.data doesn't
683 		 * contain map and comm information.
684 		 * Create a fake thread_map and directly call
685 		 * perf_event__synthesize_thread_map() for those events.
686 		 */
687 		if (target__none(&rec->opts.target))
688 			record__synthesize_workload(rec, false);
689 	}
690 	return fd;
691 }
692 
693 static volatile int workload_exec_errno;
694 
695 /*
696  * perf_evlist__prepare_workload will send a SIGUSR1
697  * if the fork fails, since we asked by setting its
698  * want_signal to true.
699  */
700 static void workload_exec_failed_signal(int signo __maybe_unused,
701 					siginfo_t *info,
702 					void *ucontext __maybe_unused)
703 {
704 	workload_exec_errno = info->si_value.sival_int;
705 	done = 1;
706 	child_finished = 1;
707 }
708 
709 static void snapshot_sig_handler(int sig);
710 static void alarm_sig_handler(int sig);
711 
712 int __weak
713 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
714 			    struct perf_tool *tool __maybe_unused,
715 			    perf_event__handler_t process __maybe_unused,
716 			    struct machine *machine __maybe_unused)
717 {
718 	return 0;
719 }
720 
721 static const struct perf_event_mmap_page *
722 perf_evlist__pick_pc(struct perf_evlist *evlist)
723 {
724 	if (evlist) {
725 		if (evlist->mmap && evlist->mmap[0].base)
726 			return evlist->mmap[0].base;
727 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
728 			return evlist->overwrite_mmap[0].base;
729 	}
730 	return NULL;
731 }
732 
733 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
734 {
735 	const struct perf_event_mmap_page *pc;
736 
737 	pc = perf_evlist__pick_pc(rec->evlist);
738 	if (pc)
739 		return pc;
740 	return NULL;
741 }
742 
743 static int record__synthesize(struct record *rec, bool tail)
744 {
745 	struct perf_session *session = rec->session;
746 	struct machine *machine = &session->machines.host;
747 	struct perf_data *data = &rec->data;
748 	struct record_opts *opts = &rec->opts;
749 	struct perf_tool *tool = &rec->tool;
750 	int fd = perf_data__fd(data);
751 	int err = 0;
752 
753 	if (rec->opts.tail_synthesize != tail)
754 		return 0;
755 
756 	if (data->is_pipe) {
757 		/*
758 		 * We need to synthesize events first, because some
759 		 * features works on top of them (on report side).
760 		 */
761 		err = perf_event__synthesize_attrs(tool, session,
762 						   process_synthesized_event);
763 		if (err < 0) {
764 			pr_err("Couldn't synthesize attrs.\n");
765 			goto out;
766 		}
767 
768 		err = perf_event__synthesize_features(tool, session, rec->evlist,
769 						      process_synthesized_event);
770 		if (err < 0) {
771 			pr_err("Couldn't synthesize features.\n");
772 			return err;
773 		}
774 
775 		if (have_tracepoints(&rec->evlist->entries)) {
776 			/*
777 			 * FIXME err <= 0 here actually means that
778 			 * there were no tracepoints so its not really
779 			 * an error, just that we don't need to
780 			 * synthesize anything.  We really have to
781 			 * return this more properly and also
782 			 * propagate errors that now are calling die()
783 			 */
784 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
785 								  process_synthesized_event);
786 			if (err <= 0) {
787 				pr_err("Couldn't record tracing data.\n");
788 				goto out;
789 			}
790 			rec->bytes_written += err;
791 		}
792 	}
793 
794 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
795 					  process_synthesized_event, machine);
796 	if (err)
797 		goto out;
798 
799 	if (rec->opts.full_auxtrace) {
800 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
801 					session, process_synthesized_event);
802 		if (err)
803 			goto out;
804 	}
805 
806 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
807 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
808 							 machine);
809 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
810 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
811 				   "Check /proc/kallsyms permission or run as root.\n");
812 
813 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
814 						     machine);
815 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
816 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
817 				   "Check /proc/modules permission or run as root.\n");
818 	}
819 
820 	if (perf_guest) {
821 		machines__process_guests(&session->machines,
822 					 perf_event__synthesize_guest_os, tool);
823 	}
824 
825 	err = perf_event__synthesize_extra_attr(&rec->tool,
826 						rec->evlist,
827 						process_synthesized_event,
828 						data->is_pipe);
829 	if (err)
830 		goto out;
831 
832 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
833 						 process_synthesized_event,
834 						NULL);
835 	if (err < 0) {
836 		pr_err("Couldn't synthesize thread map.\n");
837 		return err;
838 	}
839 
840 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
841 					     process_synthesized_event, NULL);
842 	if (err < 0) {
843 		pr_err("Couldn't synthesize cpu map.\n");
844 		return err;
845 	}
846 
847 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
848 					    process_synthesized_event, opts->sample_address,
849 					    opts->proc_map_timeout, 1);
850 out:
851 	return err;
852 }
853 
854 static int __cmd_record(struct record *rec, int argc, const char **argv)
855 {
856 	int err;
857 	int status = 0;
858 	unsigned long waking = 0;
859 	const bool forks = argc > 0;
860 	struct perf_tool *tool = &rec->tool;
861 	struct record_opts *opts = &rec->opts;
862 	struct perf_data *data = &rec->data;
863 	struct perf_session *session;
864 	bool disabled = false, draining = false;
865 	int fd;
866 
867 	atexit(record__sig_exit);
868 	signal(SIGCHLD, sig_handler);
869 	signal(SIGINT, sig_handler);
870 	signal(SIGTERM, sig_handler);
871 	signal(SIGSEGV, sigsegv_handler);
872 
873 	if (rec->opts.record_namespaces)
874 		tool->namespace_events = true;
875 
876 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
877 		signal(SIGUSR2, snapshot_sig_handler);
878 		if (rec->opts.auxtrace_snapshot_mode)
879 			trigger_on(&auxtrace_snapshot_trigger);
880 		if (rec->switch_output.enabled)
881 			trigger_on(&switch_output_trigger);
882 	} else {
883 		signal(SIGUSR2, SIG_IGN);
884 	}
885 
886 	session = perf_session__new(data, false, tool);
887 	if (session == NULL) {
888 		pr_err("Perf session creation failed.\n");
889 		return -1;
890 	}
891 
892 	fd = perf_data__fd(data);
893 	rec->session = session;
894 
895 	record__init_features(rec);
896 
897 	if (forks) {
898 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
899 						    argv, data->is_pipe,
900 						    workload_exec_failed_signal);
901 		if (err < 0) {
902 			pr_err("Couldn't run the workload!\n");
903 			status = err;
904 			goto out_delete_session;
905 		}
906 	}
907 
908 	/*
909 	 * If we have just single event and are sending data
910 	 * through pipe, we need to force the ids allocation,
911 	 * because we synthesize event name through the pipe
912 	 * and need the id for that.
913 	 */
914 	if (data->is_pipe && rec->evlist->nr_entries == 1)
915 		rec->opts.sample_id = true;
916 
917 	if (record__open(rec) != 0) {
918 		err = -1;
919 		goto out_child;
920 	}
921 
922 	err = bpf__apply_obj_config();
923 	if (err) {
924 		char errbuf[BUFSIZ];
925 
926 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
927 		pr_err("ERROR: Apply config to BPF failed: %s\n",
928 			 errbuf);
929 		goto out_child;
930 	}
931 
932 	/*
933 	 * Normally perf_session__new would do this, but it doesn't have the
934 	 * evlist.
935 	 */
936 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
937 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
938 		rec->tool.ordered_events = false;
939 	}
940 
941 	if (!rec->evlist->nr_groups)
942 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
943 
944 	if (data->is_pipe) {
945 		err = perf_header__write_pipe(fd);
946 		if (err < 0)
947 			goto out_child;
948 	} else {
949 		err = perf_session__write_header(session, rec->evlist, fd, false);
950 		if (err < 0)
951 			goto out_child;
952 	}
953 
954 	if (!rec->no_buildid
955 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
956 		pr_err("Couldn't generate buildids. "
957 		       "Use --no-buildid to profile anyway.\n");
958 		err = -1;
959 		goto out_child;
960 	}
961 
962 	err = record__synthesize(rec, false);
963 	if (err < 0)
964 		goto out_child;
965 
966 	if (rec->realtime_prio) {
967 		struct sched_param param;
968 
969 		param.sched_priority = rec->realtime_prio;
970 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
971 			pr_err("Could not set realtime priority.\n");
972 			err = -1;
973 			goto out_child;
974 		}
975 	}
976 
977 	/*
978 	 * When perf is starting the traced process, all the events
979 	 * (apart from group members) have enable_on_exec=1 set,
980 	 * so don't spoil it by prematurely enabling them.
981 	 */
982 	if (!target__none(&opts->target) && !opts->initial_delay)
983 		perf_evlist__enable(rec->evlist);
984 
985 	/*
986 	 * Let the child rip
987 	 */
988 	if (forks) {
989 		struct machine *machine = &session->machines.host;
990 		union perf_event *event;
991 		pid_t tgid;
992 
993 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
994 		if (event == NULL) {
995 			err = -ENOMEM;
996 			goto out_child;
997 		}
998 
999 		/*
1000 		 * Some H/W events are generated before COMM event
1001 		 * which is emitted during exec(), so perf script
1002 		 * cannot see a correct process name for those events.
1003 		 * Synthesize COMM event to prevent it.
1004 		 */
1005 		tgid = perf_event__synthesize_comm(tool, event,
1006 						   rec->evlist->workload.pid,
1007 						   process_synthesized_event,
1008 						   machine);
1009 		free(event);
1010 
1011 		if (tgid == -1)
1012 			goto out_child;
1013 
1014 		event = malloc(sizeof(event->namespaces) +
1015 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1016 			       machine->id_hdr_size);
1017 		if (event == NULL) {
1018 			err = -ENOMEM;
1019 			goto out_child;
1020 		}
1021 
1022 		/*
1023 		 * Synthesize NAMESPACES event for the command specified.
1024 		 */
1025 		perf_event__synthesize_namespaces(tool, event,
1026 						  rec->evlist->workload.pid,
1027 						  tgid, process_synthesized_event,
1028 						  machine);
1029 		free(event);
1030 
1031 		perf_evlist__start_workload(rec->evlist);
1032 	}
1033 
1034 	if (opts->initial_delay) {
1035 		usleep(opts->initial_delay * USEC_PER_MSEC);
1036 		perf_evlist__enable(rec->evlist);
1037 	}
1038 
1039 	trigger_ready(&auxtrace_snapshot_trigger);
1040 	trigger_ready(&switch_output_trigger);
1041 	perf_hooks__invoke_record_start();
1042 	for (;;) {
1043 		unsigned long long hits = rec->samples;
1044 
1045 		/*
1046 		 * rec->evlist->bkw_mmap_state is possible to be
1047 		 * BKW_MMAP_EMPTY here: when done == true and
1048 		 * hits != rec->samples in previous round.
1049 		 *
1050 		 * perf_evlist__toggle_bkw_mmap ensure we never
1051 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1052 		 */
1053 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1054 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1055 
1056 		if (record__mmap_read_all(rec) < 0) {
1057 			trigger_error(&auxtrace_snapshot_trigger);
1058 			trigger_error(&switch_output_trigger);
1059 			err = -1;
1060 			goto out_child;
1061 		}
1062 
1063 		if (auxtrace_record__snapshot_started) {
1064 			auxtrace_record__snapshot_started = 0;
1065 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1066 				record__read_auxtrace_snapshot(rec);
1067 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1068 				pr_err("AUX area tracing snapshot failed\n");
1069 				err = -1;
1070 				goto out_child;
1071 			}
1072 		}
1073 
1074 		if (trigger_is_hit(&switch_output_trigger)) {
1075 			/*
1076 			 * If switch_output_trigger is hit, the data in
1077 			 * overwritable ring buffer should have been collected,
1078 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1079 			 *
1080 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1081 			 * record__mmap_read_all() didn't collect data from
1082 			 * overwritable ring buffer. Read again.
1083 			 */
1084 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1085 				continue;
1086 			trigger_ready(&switch_output_trigger);
1087 
1088 			/*
1089 			 * Reenable events in overwrite ring buffer after
1090 			 * record__mmap_read_all(): we should have collected
1091 			 * data from it.
1092 			 */
1093 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1094 
1095 			if (!quiet)
1096 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1097 					waking);
1098 			waking = 0;
1099 			fd = record__switch_output(rec, false);
1100 			if (fd < 0) {
1101 				pr_err("Failed to switch to new file\n");
1102 				trigger_error(&switch_output_trigger);
1103 				err = fd;
1104 				goto out_child;
1105 			}
1106 
1107 			/* re-arm the alarm */
1108 			if (rec->switch_output.time)
1109 				alarm(rec->switch_output.time);
1110 		}
1111 
1112 		if (hits == rec->samples) {
1113 			if (done || draining)
1114 				break;
1115 			err = perf_evlist__poll(rec->evlist, -1);
1116 			/*
1117 			 * Propagate error, only if there's any. Ignore positive
1118 			 * number of returned events and interrupt error.
1119 			 */
1120 			if (err > 0 || (err < 0 && errno == EINTR))
1121 				err = 0;
1122 			waking++;
1123 
1124 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1125 				draining = true;
1126 		}
1127 
1128 		/*
1129 		 * When perf is starting the traced process, at the end events
1130 		 * die with the process and we wait for that. Thus no need to
1131 		 * disable events in this case.
1132 		 */
1133 		if (done && !disabled && !target__none(&opts->target)) {
1134 			trigger_off(&auxtrace_snapshot_trigger);
1135 			perf_evlist__disable(rec->evlist);
1136 			disabled = true;
1137 		}
1138 	}
1139 	trigger_off(&auxtrace_snapshot_trigger);
1140 	trigger_off(&switch_output_trigger);
1141 
1142 	if (forks && workload_exec_errno) {
1143 		char msg[STRERR_BUFSIZE];
1144 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1145 		pr_err("Workload failed: %s\n", emsg);
1146 		err = -1;
1147 		goto out_child;
1148 	}
1149 
1150 	if (!quiet)
1151 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1152 
1153 	if (target__none(&rec->opts.target))
1154 		record__synthesize_workload(rec, true);
1155 
1156 out_child:
1157 	if (forks) {
1158 		int exit_status;
1159 
1160 		if (!child_finished)
1161 			kill(rec->evlist->workload.pid, SIGTERM);
1162 
1163 		wait(&exit_status);
1164 
1165 		if (err < 0)
1166 			status = err;
1167 		else if (WIFEXITED(exit_status))
1168 			status = WEXITSTATUS(exit_status);
1169 		else if (WIFSIGNALED(exit_status))
1170 			signr = WTERMSIG(exit_status);
1171 	} else
1172 		status = err;
1173 
1174 	record__synthesize(rec, true);
1175 	/* this will be recalculated during process_buildids() */
1176 	rec->samples = 0;
1177 
1178 	if (!err) {
1179 		if (!rec->timestamp_filename) {
1180 			record__finish_output(rec);
1181 		} else {
1182 			fd = record__switch_output(rec, true);
1183 			if (fd < 0) {
1184 				status = fd;
1185 				goto out_delete_session;
1186 			}
1187 		}
1188 	}
1189 
1190 	perf_hooks__invoke_record_end();
1191 
1192 	if (!err && !quiet) {
1193 		char samples[128];
1194 		const char *postfix = rec->timestamp_filename ?
1195 					".<timestamp>" : "";
1196 
1197 		if (rec->samples && !rec->opts.full_auxtrace)
1198 			scnprintf(samples, sizeof(samples),
1199 				  " (%" PRIu64 " samples)", rec->samples);
1200 		else
1201 			samples[0] = '\0';
1202 
1203 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1204 			perf_data__size(data) / 1024.0 / 1024.0,
1205 			data->file.path, postfix, samples);
1206 	}
1207 
1208 out_delete_session:
1209 	perf_session__delete(session);
1210 	return status;
1211 }
1212 
1213 static void callchain_debug(struct callchain_param *callchain)
1214 {
1215 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1216 
1217 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1218 
1219 	if (callchain->record_mode == CALLCHAIN_DWARF)
1220 		pr_debug("callchain: stack dump size %d\n",
1221 			 callchain->dump_size);
1222 }
1223 
1224 int record_opts__parse_callchain(struct record_opts *record,
1225 				 struct callchain_param *callchain,
1226 				 const char *arg, bool unset)
1227 {
1228 	int ret;
1229 	callchain->enabled = !unset;
1230 
1231 	/* --no-call-graph */
1232 	if (unset) {
1233 		callchain->record_mode = CALLCHAIN_NONE;
1234 		pr_debug("callchain: disabled\n");
1235 		return 0;
1236 	}
1237 
1238 	ret = parse_callchain_record_opt(arg, callchain);
1239 	if (!ret) {
1240 		/* Enable data address sampling for DWARF unwind. */
1241 		if (callchain->record_mode == CALLCHAIN_DWARF)
1242 			record->sample_address = true;
1243 		callchain_debug(callchain);
1244 	}
1245 
1246 	return ret;
1247 }
1248 
1249 int record_parse_callchain_opt(const struct option *opt,
1250 			       const char *arg,
1251 			       int unset)
1252 {
1253 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1254 }
1255 
1256 int record_callchain_opt(const struct option *opt,
1257 			 const char *arg __maybe_unused,
1258 			 int unset __maybe_unused)
1259 {
1260 	struct callchain_param *callchain = opt->value;
1261 
1262 	callchain->enabled = true;
1263 
1264 	if (callchain->record_mode == CALLCHAIN_NONE)
1265 		callchain->record_mode = CALLCHAIN_FP;
1266 
1267 	callchain_debug(callchain);
1268 	return 0;
1269 }
1270 
1271 static int perf_record_config(const char *var, const char *value, void *cb)
1272 {
1273 	struct record *rec = cb;
1274 
1275 	if (!strcmp(var, "record.build-id")) {
1276 		if (!strcmp(value, "cache"))
1277 			rec->no_buildid_cache = false;
1278 		else if (!strcmp(value, "no-cache"))
1279 			rec->no_buildid_cache = true;
1280 		else if (!strcmp(value, "skip"))
1281 			rec->no_buildid = true;
1282 		else
1283 			return -1;
1284 		return 0;
1285 	}
1286 	if (!strcmp(var, "record.call-graph")) {
1287 		var = "call-graph.record-mode";
1288 		return perf_default_config(var, value, cb);
1289 	}
1290 
1291 	return 0;
1292 }
1293 
1294 struct clockid_map {
1295 	const char *name;
1296 	int clockid;
1297 };
1298 
1299 #define CLOCKID_MAP(n, c)	\
1300 	{ .name = n, .clockid = (c), }
1301 
1302 #define CLOCKID_END	{ .name = NULL, }
1303 
1304 
1305 /*
1306  * Add the missing ones, we need to build on many distros...
1307  */
1308 #ifndef CLOCK_MONOTONIC_RAW
1309 #define CLOCK_MONOTONIC_RAW 4
1310 #endif
1311 #ifndef CLOCK_BOOTTIME
1312 #define CLOCK_BOOTTIME 7
1313 #endif
1314 #ifndef CLOCK_TAI
1315 #define CLOCK_TAI 11
1316 #endif
1317 
1318 static const struct clockid_map clockids[] = {
1319 	/* available for all events, NMI safe */
1320 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1321 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1322 
1323 	/* available for some events */
1324 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1325 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1326 	CLOCKID_MAP("tai", CLOCK_TAI),
1327 
1328 	/* available for the lazy */
1329 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1330 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1331 	CLOCKID_MAP("real", CLOCK_REALTIME),
1332 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1333 
1334 	CLOCKID_END,
1335 };
1336 
1337 static int parse_clockid(const struct option *opt, const char *str, int unset)
1338 {
1339 	struct record_opts *opts = (struct record_opts *)opt->value;
1340 	const struct clockid_map *cm;
1341 	const char *ostr = str;
1342 
1343 	if (unset) {
1344 		opts->use_clockid = 0;
1345 		return 0;
1346 	}
1347 
1348 	/* no arg passed */
1349 	if (!str)
1350 		return 0;
1351 
1352 	/* no setting it twice */
1353 	if (opts->use_clockid)
1354 		return -1;
1355 
1356 	opts->use_clockid = true;
1357 
1358 	/* if its a number, we're done */
1359 	if (sscanf(str, "%d", &opts->clockid) == 1)
1360 		return 0;
1361 
1362 	/* allow a "CLOCK_" prefix to the name */
1363 	if (!strncasecmp(str, "CLOCK_", 6))
1364 		str += 6;
1365 
1366 	for (cm = clockids; cm->name; cm++) {
1367 		if (!strcasecmp(str, cm->name)) {
1368 			opts->clockid = cm->clockid;
1369 			return 0;
1370 		}
1371 	}
1372 
1373 	opts->use_clockid = false;
1374 	ui__warning("unknown clockid %s, check man page\n", ostr);
1375 	return -1;
1376 }
1377 
1378 static int record__parse_mmap_pages(const struct option *opt,
1379 				    const char *str,
1380 				    int unset __maybe_unused)
1381 {
1382 	struct record_opts *opts = opt->value;
1383 	char *s, *p;
1384 	unsigned int mmap_pages;
1385 	int ret;
1386 
1387 	if (!str)
1388 		return -EINVAL;
1389 
1390 	s = strdup(str);
1391 	if (!s)
1392 		return -ENOMEM;
1393 
1394 	p = strchr(s, ',');
1395 	if (p)
1396 		*p = '\0';
1397 
1398 	if (*s) {
1399 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1400 		if (ret)
1401 			goto out_free;
1402 		opts->mmap_pages = mmap_pages;
1403 	}
1404 
1405 	if (!p) {
1406 		ret = 0;
1407 		goto out_free;
1408 	}
1409 
1410 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1411 	if (ret)
1412 		goto out_free;
1413 
1414 	opts->auxtrace_mmap_pages = mmap_pages;
1415 
1416 out_free:
1417 	free(s);
1418 	return ret;
1419 }
1420 
1421 static void switch_output_size_warn(struct record *rec)
1422 {
1423 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1424 	struct switch_output *s = &rec->switch_output;
1425 
1426 	wakeup_size /= 2;
1427 
1428 	if (s->size < wakeup_size) {
1429 		char buf[100];
1430 
1431 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1432 		pr_warning("WARNING: switch-output data size lower than "
1433 			   "wakeup kernel buffer size (%s) "
1434 			   "expect bigger perf.data sizes\n", buf);
1435 	}
1436 }
1437 
1438 static int switch_output_setup(struct record *rec)
1439 {
1440 	struct switch_output *s = &rec->switch_output;
1441 	static struct parse_tag tags_size[] = {
1442 		{ .tag  = 'B', .mult = 1       },
1443 		{ .tag  = 'K', .mult = 1 << 10 },
1444 		{ .tag  = 'M', .mult = 1 << 20 },
1445 		{ .tag  = 'G', .mult = 1 << 30 },
1446 		{ .tag  = 0 },
1447 	};
1448 	static struct parse_tag tags_time[] = {
1449 		{ .tag  = 's', .mult = 1        },
1450 		{ .tag  = 'm', .mult = 60       },
1451 		{ .tag  = 'h', .mult = 60*60    },
1452 		{ .tag  = 'd', .mult = 60*60*24 },
1453 		{ .tag  = 0 },
1454 	};
1455 	unsigned long val;
1456 
1457 	if (!s->set)
1458 		return 0;
1459 
1460 	if (!strcmp(s->str, "signal")) {
1461 		s->signal = true;
1462 		pr_debug("switch-output with SIGUSR2 signal\n");
1463 		goto enabled;
1464 	}
1465 
1466 	val = parse_tag_value(s->str, tags_size);
1467 	if (val != (unsigned long) -1) {
1468 		s->size = val;
1469 		pr_debug("switch-output with %s size threshold\n", s->str);
1470 		goto enabled;
1471 	}
1472 
1473 	val = parse_tag_value(s->str, tags_time);
1474 	if (val != (unsigned long) -1) {
1475 		s->time = val;
1476 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1477 			 s->str, s->time);
1478 		goto enabled;
1479 	}
1480 
1481 	return -1;
1482 
1483 enabled:
1484 	rec->timestamp_filename = true;
1485 	s->enabled              = true;
1486 
1487 	if (s->size && !rec->opts.no_buffering)
1488 		switch_output_size_warn(rec);
1489 
1490 	return 0;
1491 }
1492 
1493 static const char * const __record_usage[] = {
1494 	"perf record [<options>] [<command>]",
1495 	"perf record [<options>] -- <command> [<options>]",
1496 	NULL
1497 };
1498 const char * const *record_usage = __record_usage;
1499 
1500 /*
1501  * XXX Ideally would be local to cmd_record() and passed to a record__new
1502  * because we need to have access to it in record__exit, that is called
1503  * after cmd_record() exits, but since record_options need to be accessible to
1504  * builtin-script, leave it here.
1505  *
1506  * At least we don't ouch it in all the other functions here directly.
1507  *
1508  * Just say no to tons of global variables, sigh.
1509  */
1510 static struct record record = {
1511 	.opts = {
1512 		.sample_time	     = true,
1513 		.mmap_pages	     = UINT_MAX,
1514 		.user_freq	     = UINT_MAX,
1515 		.user_interval	     = ULLONG_MAX,
1516 		.freq		     = 4000,
1517 		.target		     = {
1518 			.uses_mmap   = true,
1519 			.default_per_cpu = true,
1520 		},
1521 		.proc_map_timeout     = 500,
1522 	},
1523 	.tool = {
1524 		.sample		= process_sample_event,
1525 		.fork		= perf_event__process_fork,
1526 		.exit		= perf_event__process_exit,
1527 		.comm		= perf_event__process_comm,
1528 		.namespaces	= perf_event__process_namespaces,
1529 		.mmap		= perf_event__process_mmap,
1530 		.mmap2		= perf_event__process_mmap2,
1531 		.ordered_events	= true,
1532 	},
1533 };
1534 
1535 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1536 	"\n\t\t\t\tDefault: fp";
1537 
1538 static bool dry_run;
1539 
1540 /*
1541  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1542  * with it and switch to use the library functions in perf_evlist that came
1543  * from builtin-record.c, i.e. use record_opts,
1544  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1545  * using pipes, etc.
1546  */
1547 static struct option __record_options[] = {
1548 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1549 		     "event selector. use 'perf list' to list available events",
1550 		     parse_events_option),
1551 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1552 		     "event filter", parse_filter),
1553 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1554 			   NULL, "don't record events from perf itself",
1555 			   exclude_perf),
1556 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1557 		    "record events on existing process id"),
1558 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1559 		    "record events on existing thread id"),
1560 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1561 		    "collect data with this RT SCHED_FIFO priority"),
1562 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1563 		    "collect data without buffering"),
1564 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1565 		    "collect raw sample records from all opened counters"),
1566 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1567 			    "system-wide collection from all CPUs"),
1568 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1569 		    "list of cpus to monitor"),
1570 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1571 	OPT_STRING('o', "output", &record.data.file.path, "file",
1572 		    "output file name"),
1573 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1574 			&record.opts.no_inherit_set,
1575 			"child tasks do not inherit counters"),
1576 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1577 		    "synthesize non-sample events at the end of output"),
1578 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1579 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1580 		    "Fail if the specified frequency can't be used"),
1581 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1582 		     "profile at this frequency",
1583 		      record__parse_freq),
1584 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1585 		     "number of mmap data pages and AUX area tracing mmap pages",
1586 		     record__parse_mmap_pages),
1587 	OPT_BOOLEAN(0, "group", &record.opts.group,
1588 		    "put the counters into a counter group"),
1589 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1590 			   NULL, "enables call-graph recording" ,
1591 			   &record_callchain_opt),
1592 	OPT_CALLBACK(0, "call-graph", &record.opts,
1593 		     "record_mode[,record_size]", record_callchain_help,
1594 		     &record_parse_callchain_opt),
1595 	OPT_INCR('v', "verbose", &verbose,
1596 		    "be more verbose (show counter open errors, etc)"),
1597 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1598 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1599 		    "per thread counts"),
1600 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1601 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1602 		    "Record the sample physical addresses"),
1603 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1604 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1605 			&record.opts.sample_time_set,
1606 			"Record the sample timestamps"),
1607 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1608 			"Record the sample period"),
1609 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1610 		    "don't sample"),
1611 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1612 			&record.no_buildid_cache_set,
1613 			"do not update the buildid cache"),
1614 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1615 			&record.no_buildid_set,
1616 			"do not collect buildids in perf.data"),
1617 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1618 		     "monitor event in cgroup name only",
1619 		     parse_cgroups),
1620 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1621 		  "ms to wait before starting measurement after program start"),
1622 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1623 		   "user to profile"),
1624 
1625 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1626 		     "branch any", "sample any taken branches",
1627 		     parse_branch_stack),
1628 
1629 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1630 		     "branch filter mask", "branch stack filter modes",
1631 		     parse_branch_stack),
1632 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1633 		    "sample by weight (on special events only)"),
1634 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1635 		    "sample transaction flags (special events only)"),
1636 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1637 		    "use per-thread mmaps"),
1638 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1639 		    "sample selected machine registers on interrupt,"
1640 		    " use -I ? to list register names", parse_regs),
1641 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1642 		    "sample selected machine registers on interrupt,"
1643 		    " use -I ? to list register names", parse_regs),
1644 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1645 		    "Record running/enabled time of read (:S) events"),
1646 	OPT_CALLBACK('k', "clockid", &record.opts,
1647 	"clockid", "clockid to use for events, see clock_gettime()",
1648 	parse_clockid),
1649 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1650 			  "opts", "AUX area tracing Snapshot Mode", ""),
1651 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1652 			"per thread proc mmap processing timeout in ms"),
1653 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1654 		    "Record namespaces events"),
1655 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1656 		    "Record context switch events"),
1657 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1658 			 "Configure all used events to run in kernel space.",
1659 			 PARSE_OPT_EXCLUSIVE),
1660 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1661 			 "Configure all used events to run in user space.",
1662 			 PARSE_OPT_EXCLUSIVE),
1663 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1664 		   "clang binary to use for compiling BPF scriptlets"),
1665 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1666 		   "options passed to clang when compiling BPF scriptlets"),
1667 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1668 		   "file", "vmlinux pathname"),
1669 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1670 		    "Record build-id of all DSOs regardless of hits"),
1671 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1672 		    "append timestamp to output filename"),
1673 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1674 		    "Record timestamp boundary (time of first/last samples)"),
1675 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1676 			  &record.switch_output.set, "signal,size,time",
1677 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1678 			  "signal"),
1679 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1680 		    "Parse options then exit"),
1681 	OPT_END()
1682 };
1683 
1684 struct option *record_options = __record_options;
1685 
1686 int cmd_record(int argc, const char **argv)
1687 {
1688 	int err;
1689 	struct record *rec = &record;
1690 	char errbuf[BUFSIZ];
1691 
1692 	setlocale(LC_ALL, "");
1693 
1694 #ifndef HAVE_LIBBPF_SUPPORT
1695 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1696 	set_nobuild('\0', "clang-path", true);
1697 	set_nobuild('\0', "clang-opt", true);
1698 # undef set_nobuild
1699 #endif
1700 
1701 #ifndef HAVE_BPF_PROLOGUE
1702 # if !defined (HAVE_DWARF_SUPPORT)
1703 #  define REASON  "NO_DWARF=1"
1704 # elif !defined (HAVE_LIBBPF_SUPPORT)
1705 #  define REASON  "NO_LIBBPF=1"
1706 # else
1707 #  define REASON  "this architecture doesn't support BPF prologue"
1708 # endif
1709 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1710 	set_nobuild('\0', "vmlinux", true);
1711 # undef set_nobuild
1712 # undef REASON
1713 #endif
1714 
1715 	rec->evlist = perf_evlist__new();
1716 	if (rec->evlist == NULL)
1717 		return -ENOMEM;
1718 
1719 	err = perf_config(perf_record_config, rec);
1720 	if (err)
1721 		return err;
1722 
1723 	argc = parse_options(argc, argv, record_options, record_usage,
1724 			    PARSE_OPT_STOP_AT_NON_OPTION);
1725 	if (quiet)
1726 		perf_quiet_option();
1727 
1728 	/* Make system wide (-a) the default target. */
1729 	if (!argc && target__none(&rec->opts.target))
1730 		rec->opts.target.system_wide = true;
1731 
1732 	if (nr_cgroups && !rec->opts.target.system_wide) {
1733 		usage_with_options_msg(record_usage, record_options,
1734 			"cgroup monitoring only available in system-wide mode");
1735 
1736 	}
1737 	if (rec->opts.record_switch_events &&
1738 	    !perf_can_record_switch_events()) {
1739 		ui__error("kernel does not support recording context switch events\n");
1740 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1741 		return -EINVAL;
1742 	}
1743 
1744 	if (switch_output_setup(rec)) {
1745 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1746 		return -EINVAL;
1747 	}
1748 
1749 	if (rec->switch_output.time) {
1750 		signal(SIGALRM, alarm_sig_handler);
1751 		alarm(rec->switch_output.time);
1752 	}
1753 
1754 	/*
1755 	 * Allow aliases to facilitate the lookup of symbols for address
1756 	 * filters. Refer to auxtrace_parse_filters().
1757 	 */
1758 	symbol_conf.allow_aliases = true;
1759 
1760 	symbol__init(NULL);
1761 
1762 	err = record__auxtrace_init(rec);
1763 	if (err)
1764 		goto out;
1765 
1766 	if (dry_run)
1767 		goto out;
1768 
1769 	err = bpf__setup_stdout(rec->evlist);
1770 	if (err) {
1771 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1772 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1773 			 errbuf);
1774 		goto out;
1775 	}
1776 
1777 	err = -ENOMEM;
1778 
1779 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1780 		pr_warning(
1781 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1782 "check /proc/sys/kernel/kptr_restrict.\n\n"
1783 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1784 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1785 "Samples in kernel modules won't be resolved at all.\n\n"
1786 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1787 "even with a suitable vmlinux or kallsyms file.\n\n");
1788 
1789 	if (rec->no_buildid_cache || rec->no_buildid) {
1790 		disable_buildid_cache();
1791 	} else if (rec->switch_output.enabled) {
1792 		/*
1793 		 * In 'perf record --switch-output', disable buildid
1794 		 * generation by default to reduce data file switching
1795 		 * overhead. Still generate buildid if they are required
1796 		 * explicitly using
1797 		 *
1798 		 *  perf record --switch-output --no-no-buildid \
1799 		 *              --no-no-buildid-cache
1800 		 *
1801 		 * Following code equals to:
1802 		 *
1803 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1804 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1805 		 *         disable_buildid_cache();
1806 		 */
1807 		bool disable = true;
1808 
1809 		if (rec->no_buildid_set && !rec->no_buildid)
1810 			disable = false;
1811 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1812 			disable = false;
1813 		if (disable) {
1814 			rec->no_buildid = true;
1815 			rec->no_buildid_cache = true;
1816 			disable_buildid_cache();
1817 		}
1818 	}
1819 
1820 	if (record.opts.overwrite)
1821 		record.opts.tail_synthesize = true;
1822 
1823 	if (rec->evlist->nr_entries == 0 &&
1824 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1825 		pr_err("Not enough memory for event selector list\n");
1826 		goto out;
1827 	}
1828 
1829 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1830 		rec->opts.no_inherit = true;
1831 
1832 	err = target__validate(&rec->opts.target);
1833 	if (err) {
1834 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1835 		ui__warning("%s\n", errbuf);
1836 	}
1837 
1838 	err = target__parse_uid(&rec->opts.target);
1839 	if (err) {
1840 		int saved_errno = errno;
1841 
1842 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1843 		ui__error("%s", errbuf);
1844 
1845 		err = -saved_errno;
1846 		goto out;
1847 	}
1848 
1849 	/* Enable ignoring missing threads when -u/-p option is defined. */
1850 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1851 
1852 	err = -ENOMEM;
1853 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1854 		usage_with_options(record_usage, record_options);
1855 
1856 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1857 	if (err)
1858 		goto out;
1859 
1860 	/*
1861 	 * We take all buildids when the file contains
1862 	 * AUX area tracing data because we do not decode the
1863 	 * trace because it would take too long.
1864 	 */
1865 	if (rec->opts.full_auxtrace)
1866 		rec->buildid_all = true;
1867 
1868 	if (record_opts__config(&rec->opts)) {
1869 		err = -EINVAL;
1870 		goto out;
1871 	}
1872 
1873 	err = __cmd_record(&record, argc, argv);
1874 out:
1875 	perf_evlist__delete(rec->evlist);
1876 	symbol__exit();
1877 	auxtrace_record__free(rec->itr);
1878 	return err;
1879 }
1880 
1881 static void snapshot_sig_handler(int sig __maybe_unused)
1882 {
1883 	struct record *rec = &record;
1884 
1885 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1886 		trigger_hit(&auxtrace_snapshot_trigger);
1887 		auxtrace_record__snapshot_started = 1;
1888 		if (auxtrace_record__snapshot_start(record.itr))
1889 			trigger_error(&auxtrace_snapshot_trigger);
1890 	}
1891 
1892 	if (switch_output_signal(rec))
1893 		trigger_hit(&switch_output_trigger);
1894 }
1895 
1896 static void alarm_sig_handler(int sig __maybe_unused)
1897 {
1898 	struct record *rec = &record;
1899 
1900 	if (switch_output_time(rec))
1901 		trigger_hit(&switch_output_trigger);
1902 }
1903