xref: /openbmc/linux/tools/perf/builtin-record.c (revision 5a244f48)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17 
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/time-utils.h"
42 #include "util/units.h"
43 #include "asm/bug.h"
44 
45 #include <errno.h>
46 #include <inttypes.h>
47 #include <poll.h>
48 #include <unistd.h>
49 #include <sched.h>
50 #include <signal.h>
51 #include <sys/mman.h>
52 #include <sys/wait.h>
53 #include <asm/bug.h>
54 #include <linux/time64.h>
55 
56 struct switch_output {
57 	bool		 enabled;
58 	bool		 signal;
59 	unsigned long	 size;
60 	unsigned long	 time;
61 	const char	*str;
62 	bool		 set;
63 };
64 
65 struct record {
66 	struct perf_tool	tool;
67 	struct record_opts	opts;
68 	u64			bytes_written;
69 	struct perf_data_file	file;
70 	struct auxtrace_record	*itr;
71 	struct perf_evlist	*evlist;
72 	struct perf_session	*session;
73 	const char		*progname;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	struct switch_output	switch_output;
82 	unsigned long long	samples;
83 };
84 
85 static volatile int auxtrace_record__snapshot_started;
86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
87 static DEFINE_TRIGGER(switch_output_trigger);
88 
89 static bool switch_output_signal(struct record *rec)
90 {
91 	return rec->switch_output.signal &&
92 	       trigger_is_ready(&switch_output_trigger);
93 }
94 
95 static bool switch_output_size(struct record *rec)
96 {
97 	return rec->switch_output.size &&
98 	       trigger_is_ready(&switch_output_trigger) &&
99 	       (rec->bytes_written >= rec->switch_output.size);
100 }
101 
102 static bool switch_output_time(struct record *rec)
103 {
104 	return rec->switch_output.time &&
105 	       trigger_is_ready(&switch_output_trigger);
106 }
107 
108 static int record__write(struct record *rec, void *bf, size_t size)
109 {
110 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
111 		pr_err("failed to write perf data, error: %m\n");
112 		return -1;
113 	}
114 
115 	rec->bytes_written += size;
116 
117 	if (switch_output_size(rec))
118 		trigger_hit(&switch_output_trigger);
119 
120 	return 0;
121 }
122 
123 static int process_synthesized_event(struct perf_tool *tool,
124 				     union perf_event *event,
125 				     struct perf_sample *sample __maybe_unused,
126 				     struct machine *machine __maybe_unused)
127 {
128 	struct record *rec = container_of(tool, struct record, tool);
129 	return record__write(rec, event, event->header.size);
130 }
131 
132 static int
133 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
134 {
135 	struct perf_event_header *pheader;
136 	u64 evt_head = head;
137 	int size = mask + 1;
138 
139 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
140 	pheader = (struct perf_event_header *)(buf + (head & mask));
141 	*start = head;
142 	while (true) {
143 		if (evt_head - head >= (unsigned int)size) {
144 			pr_debug("Finished reading backward ring buffer: rewind\n");
145 			if (evt_head - head > (unsigned int)size)
146 				evt_head -= pheader->size;
147 			*end = evt_head;
148 			return 0;
149 		}
150 
151 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
152 
153 		if (pheader->size == 0) {
154 			pr_debug("Finished reading backward ring buffer: get start\n");
155 			*end = evt_head;
156 			return 0;
157 		}
158 
159 		evt_head += pheader->size;
160 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
161 	}
162 	WARN_ONCE(1, "Shouldn't get here\n");
163 	return -1;
164 }
165 
166 static int
167 rb_find_range(void *data, int mask, u64 head, u64 old,
168 	      u64 *start, u64 *end, bool backward)
169 {
170 	if (!backward) {
171 		*start = old;
172 		*end = head;
173 		return 0;
174 	}
175 
176 	return backward_rb_find_range(data, mask, head, start, end);
177 }
178 
179 static int
180 record__mmap_read(struct record *rec, struct perf_mmap *md,
181 		  bool overwrite, bool backward)
182 {
183 	u64 head = perf_mmap__read_head(md);
184 	u64 old = md->prev;
185 	u64 end = head, start = old;
186 	unsigned char *data = md->base + page_size;
187 	unsigned long size;
188 	void *buf;
189 	int rc = 0;
190 
191 	if (rb_find_range(data, md->mask, head,
192 			  old, &start, &end, backward))
193 		return -1;
194 
195 	if (start == end)
196 		return 0;
197 
198 	rec->samples++;
199 
200 	size = end - start;
201 	if (size > (unsigned long)(md->mask) + 1) {
202 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
203 
204 		md->prev = head;
205 		perf_mmap__consume(md, overwrite || backward);
206 		return 0;
207 	}
208 
209 	if ((start & md->mask) + size != (end & md->mask)) {
210 		buf = &data[start & md->mask];
211 		size = md->mask + 1 - (start & md->mask);
212 		start += size;
213 
214 		if (record__write(rec, buf, size) < 0) {
215 			rc = -1;
216 			goto out;
217 		}
218 	}
219 
220 	buf = &data[start & md->mask];
221 	size = end - start;
222 	start += size;
223 
224 	if (record__write(rec, buf, size) < 0) {
225 		rc = -1;
226 		goto out;
227 	}
228 
229 	md->prev = head;
230 	perf_mmap__consume(md, overwrite || backward);
231 out:
232 	return rc;
233 }
234 
235 static volatile int done;
236 static volatile int signr = -1;
237 static volatile int child_finished;
238 
239 static void sig_handler(int sig)
240 {
241 	if (sig == SIGCHLD)
242 		child_finished = 1;
243 	else
244 		signr = sig;
245 
246 	done = 1;
247 }
248 
249 static void sigsegv_handler(int sig)
250 {
251 	perf_hooks__recover();
252 	sighandler_dump_stack(sig);
253 }
254 
255 static void record__sig_exit(void)
256 {
257 	if (signr == -1)
258 		return;
259 
260 	signal(signr, SIG_DFL);
261 	raise(signr);
262 }
263 
264 #ifdef HAVE_AUXTRACE_SUPPORT
265 
266 static int record__process_auxtrace(struct perf_tool *tool,
267 				    union perf_event *event, void *data1,
268 				    size_t len1, void *data2, size_t len2)
269 {
270 	struct record *rec = container_of(tool, struct record, tool);
271 	struct perf_data_file *file = &rec->file;
272 	size_t padding;
273 	u8 pad[8] = {0};
274 
275 	if (!perf_data_file__is_pipe(file)) {
276 		off_t file_offset;
277 		int fd = perf_data_file__fd(file);
278 		int err;
279 
280 		file_offset = lseek(fd, 0, SEEK_CUR);
281 		if (file_offset == -1)
282 			return -1;
283 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
284 						     event, file_offset);
285 		if (err)
286 			return err;
287 	}
288 
289 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
290 	padding = (len1 + len2) & 7;
291 	if (padding)
292 		padding = 8 - padding;
293 
294 	record__write(rec, event, event->header.size);
295 	record__write(rec, data1, len1);
296 	if (len2)
297 		record__write(rec, data2, len2);
298 	record__write(rec, &pad, padding);
299 
300 	return 0;
301 }
302 
303 static int record__auxtrace_mmap_read(struct record *rec,
304 				      struct auxtrace_mmap *mm)
305 {
306 	int ret;
307 
308 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
309 				  record__process_auxtrace);
310 	if (ret < 0)
311 		return ret;
312 
313 	if (ret)
314 		rec->samples++;
315 
316 	return 0;
317 }
318 
319 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
320 					       struct auxtrace_mmap *mm)
321 {
322 	int ret;
323 
324 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
325 					   record__process_auxtrace,
326 					   rec->opts.auxtrace_snapshot_size);
327 	if (ret < 0)
328 		return ret;
329 
330 	if (ret)
331 		rec->samples++;
332 
333 	return 0;
334 }
335 
336 static int record__auxtrace_read_snapshot_all(struct record *rec)
337 {
338 	int i;
339 	int rc = 0;
340 
341 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
342 		struct auxtrace_mmap *mm =
343 				&rec->evlist->mmap[i].auxtrace_mmap;
344 
345 		if (!mm->base)
346 			continue;
347 
348 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
349 			rc = -1;
350 			goto out;
351 		}
352 	}
353 out:
354 	return rc;
355 }
356 
357 static void record__read_auxtrace_snapshot(struct record *rec)
358 {
359 	pr_debug("Recording AUX area tracing snapshot\n");
360 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
361 		trigger_error(&auxtrace_snapshot_trigger);
362 	} else {
363 		if (auxtrace_record__snapshot_finish(rec->itr))
364 			trigger_error(&auxtrace_snapshot_trigger);
365 		else
366 			trigger_ready(&auxtrace_snapshot_trigger);
367 	}
368 }
369 
370 #else
371 
372 static inline
373 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
374 			       struct auxtrace_mmap *mm __maybe_unused)
375 {
376 	return 0;
377 }
378 
379 static inline
380 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
381 {
382 }
383 
384 static inline
385 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
386 {
387 	return 0;
388 }
389 
390 #endif
391 
392 static int record__mmap_evlist(struct record *rec,
393 			       struct perf_evlist *evlist)
394 {
395 	struct record_opts *opts = &rec->opts;
396 	char msg[512];
397 
398 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
399 				 opts->auxtrace_mmap_pages,
400 				 opts->auxtrace_snapshot_mode) < 0) {
401 		if (errno == EPERM) {
402 			pr_err("Permission error mapping pages.\n"
403 			       "Consider increasing "
404 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
405 			       "or try again with a smaller value of -m/--mmap_pages.\n"
406 			       "(current value: %u,%u)\n",
407 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
408 			return -errno;
409 		} else {
410 			pr_err("failed to mmap with %d (%s)\n", errno,
411 				str_error_r(errno, msg, sizeof(msg)));
412 			if (errno)
413 				return -errno;
414 			else
415 				return -EINVAL;
416 		}
417 	}
418 	return 0;
419 }
420 
421 static int record__mmap(struct record *rec)
422 {
423 	return record__mmap_evlist(rec, rec->evlist);
424 }
425 
426 static int record__open(struct record *rec)
427 {
428 	char msg[BUFSIZ];
429 	struct perf_evsel *pos;
430 	struct perf_evlist *evlist = rec->evlist;
431 	struct perf_session *session = rec->session;
432 	struct record_opts *opts = &rec->opts;
433 	struct perf_evsel_config_term *err_term;
434 	int rc = 0;
435 
436 	perf_evlist__config(evlist, opts, &callchain_param);
437 
438 	evlist__for_each_entry(evlist, pos) {
439 try_again:
440 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
441 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
442 				if (verbose > 0)
443 					ui__warning("%s\n", msg);
444 				goto try_again;
445 			}
446 
447 			rc = -errno;
448 			perf_evsel__open_strerror(pos, &opts->target,
449 						  errno, msg, sizeof(msg));
450 			ui__error("%s\n", msg);
451 			goto out;
452 		}
453 	}
454 
455 	if (perf_evlist__apply_filters(evlist, &pos)) {
456 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
457 			pos->filter, perf_evsel__name(pos), errno,
458 			str_error_r(errno, msg, sizeof(msg)));
459 		rc = -1;
460 		goto out;
461 	}
462 
463 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
464 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
465 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
466 		      str_error_r(errno, msg, sizeof(msg)));
467 		rc = -1;
468 		goto out;
469 	}
470 
471 	rc = record__mmap(rec);
472 	if (rc)
473 		goto out;
474 
475 	session->evlist = evlist;
476 	perf_session__set_id_hdr_size(session);
477 out:
478 	return rc;
479 }
480 
481 static int process_sample_event(struct perf_tool *tool,
482 				union perf_event *event,
483 				struct perf_sample *sample,
484 				struct perf_evsel *evsel,
485 				struct machine *machine)
486 {
487 	struct record *rec = container_of(tool, struct record, tool);
488 
489 	rec->samples++;
490 
491 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
492 }
493 
494 static int process_buildids(struct record *rec)
495 {
496 	struct perf_data_file *file  = &rec->file;
497 	struct perf_session *session = rec->session;
498 
499 	if (file->size == 0)
500 		return 0;
501 
502 	/*
503 	 * During this process, it'll load kernel map and replace the
504 	 * dso->long_name to a real pathname it found.  In this case
505 	 * we prefer the vmlinux path like
506 	 *   /lib/modules/3.16.4/build/vmlinux
507 	 *
508 	 * rather than build-id path (in debug directory).
509 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
510 	 */
511 	symbol_conf.ignore_vmlinux_buildid = true;
512 
513 	/*
514 	 * If --buildid-all is given, it marks all DSO regardless of hits,
515 	 * so no need to process samples.
516 	 */
517 	if (rec->buildid_all)
518 		rec->tool.sample = NULL;
519 
520 	return perf_session__process_events(session);
521 }
522 
523 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
524 {
525 	int err;
526 	struct perf_tool *tool = data;
527 	/*
528 	 *As for guest kernel when processing subcommand record&report,
529 	 *we arrange module mmap prior to guest kernel mmap and trigger
530 	 *a preload dso because default guest module symbols are loaded
531 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
532 	 *method is used to avoid symbol missing when the first addr is
533 	 *in module instead of in guest kernel.
534 	 */
535 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
536 					     machine);
537 	if (err < 0)
538 		pr_err("Couldn't record guest kernel [%d]'s reference"
539 		       " relocation symbol.\n", machine->pid);
540 
541 	/*
542 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
543 	 * have no _text sometimes.
544 	 */
545 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
546 						 machine);
547 	if (err < 0)
548 		pr_err("Couldn't record guest kernel [%d]'s reference"
549 		       " relocation symbol.\n", machine->pid);
550 }
551 
552 static struct perf_event_header finished_round_event = {
553 	.size = sizeof(struct perf_event_header),
554 	.type = PERF_RECORD_FINISHED_ROUND,
555 };
556 
557 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
558 				    bool backward)
559 {
560 	u64 bytes_written = rec->bytes_written;
561 	int i;
562 	int rc = 0;
563 	struct perf_mmap *maps;
564 
565 	if (!evlist)
566 		return 0;
567 
568 	maps = backward ? evlist->backward_mmap : evlist->mmap;
569 	if (!maps)
570 		return 0;
571 
572 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
573 		return 0;
574 
575 	for (i = 0; i < evlist->nr_mmaps; i++) {
576 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
577 
578 		if (maps[i].base) {
579 			if (record__mmap_read(rec, &maps[i],
580 					      evlist->overwrite, backward) != 0) {
581 				rc = -1;
582 				goto out;
583 			}
584 		}
585 
586 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
587 		    record__auxtrace_mmap_read(rec, mm) != 0) {
588 			rc = -1;
589 			goto out;
590 		}
591 	}
592 
593 	/*
594 	 * Mark the round finished in case we wrote
595 	 * at least one event.
596 	 */
597 	if (bytes_written != rec->bytes_written)
598 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
599 
600 	if (backward)
601 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
602 out:
603 	return rc;
604 }
605 
606 static int record__mmap_read_all(struct record *rec)
607 {
608 	int err;
609 
610 	err = record__mmap_read_evlist(rec, rec->evlist, false);
611 	if (err)
612 		return err;
613 
614 	return record__mmap_read_evlist(rec, rec->evlist, true);
615 }
616 
617 static void record__init_features(struct record *rec)
618 {
619 	struct perf_session *session = rec->session;
620 	int feat;
621 
622 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
623 		perf_header__set_feat(&session->header, feat);
624 
625 	if (rec->no_buildid)
626 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
627 
628 	if (!have_tracepoints(&rec->evlist->entries))
629 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
630 
631 	if (!rec->opts.branch_stack)
632 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
633 
634 	if (!rec->opts.full_auxtrace)
635 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
636 
637 	perf_header__clear_feat(&session->header, HEADER_STAT);
638 }
639 
640 static void
641 record__finish_output(struct record *rec)
642 {
643 	struct perf_data_file *file = &rec->file;
644 	int fd = perf_data_file__fd(file);
645 
646 	if (file->is_pipe)
647 		return;
648 
649 	rec->session->header.data_size += rec->bytes_written;
650 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
651 
652 	if (!rec->no_buildid) {
653 		process_buildids(rec);
654 
655 		if (rec->buildid_all)
656 			dsos__hit_all(rec->session);
657 	}
658 	perf_session__write_header(rec->session, rec->evlist, fd, true);
659 
660 	return;
661 }
662 
663 static int record__synthesize_workload(struct record *rec, bool tail)
664 {
665 	int err;
666 	struct thread_map *thread_map;
667 
668 	if (rec->opts.tail_synthesize != tail)
669 		return 0;
670 
671 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
672 	if (thread_map == NULL)
673 		return -1;
674 
675 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
676 						 process_synthesized_event,
677 						 &rec->session->machines.host,
678 						 rec->opts.sample_address,
679 						 rec->opts.proc_map_timeout);
680 	thread_map__put(thread_map);
681 	return err;
682 }
683 
684 static int record__synthesize(struct record *rec, bool tail);
685 
686 static int
687 record__switch_output(struct record *rec, bool at_exit)
688 {
689 	struct perf_data_file *file = &rec->file;
690 	int fd, err;
691 
692 	/* Same Size:      "2015122520103046"*/
693 	char timestamp[] = "InvalidTimestamp";
694 
695 	record__synthesize(rec, true);
696 	if (target__none(&rec->opts.target))
697 		record__synthesize_workload(rec, true);
698 
699 	rec->samples = 0;
700 	record__finish_output(rec);
701 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
702 	if (err) {
703 		pr_err("Failed to get current timestamp\n");
704 		return -EINVAL;
705 	}
706 
707 	fd = perf_data_file__switch(file, timestamp,
708 				    rec->session->header.data_offset,
709 				    at_exit);
710 	if (fd >= 0 && !at_exit) {
711 		rec->bytes_written = 0;
712 		rec->session->header.data_size = 0;
713 	}
714 
715 	if (!quiet)
716 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
717 			file->path, timestamp);
718 
719 	/* Output tracking events */
720 	if (!at_exit) {
721 		record__synthesize(rec, false);
722 
723 		/*
724 		 * In 'perf record --switch-output' without -a,
725 		 * record__synthesize() in record__switch_output() won't
726 		 * generate tracking events because there's no thread_map
727 		 * in evlist. Which causes newly created perf.data doesn't
728 		 * contain map and comm information.
729 		 * Create a fake thread_map and directly call
730 		 * perf_event__synthesize_thread_map() for those events.
731 		 */
732 		if (target__none(&rec->opts.target))
733 			record__synthesize_workload(rec, false);
734 	}
735 	return fd;
736 }
737 
738 static volatile int workload_exec_errno;
739 
740 /*
741  * perf_evlist__prepare_workload will send a SIGUSR1
742  * if the fork fails, since we asked by setting its
743  * want_signal to true.
744  */
745 static void workload_exec_failed_signal(int signo __maybe_unused,
746 					siginfo_t *info,
747 					void *ucontext __maybe_unused)
748 {
749 	workload_exec_errno = info->si_value.sival_int;
750 	done = 1;
751 	child_finished = 1;
752 }
753 
754 static void snapshot_sig_handler(int sig);
755 static void alarm_sig_handler(int sig);
756 
757 int __weak
758 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
759 			    struct perf_tool *tool __maybe_unused,
760 			    perf_event__handler_t process __maybe_unused,
761 			    struct machine *machine __maybe_unused)
762 {
763 	return 0;
764 }
765 
766 static const struct perf_event_mmap_page *
767 perf_evlist__pick_pc(struct perf_evlist *evlist)
768 {
769 	if (evlist) {
770 		if (evlist->mmap && evlist->mmap[0].base)
771 			return evlist->mmap[0].base;
772 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
773 			return evlist->backward_mmap[0].base;
774 	}
775 	return NULL;
776 }
777 
778 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
779 {
780 	const struct perf_event_mmap_page *pc;
781 
782 	pc = perf_evlist__pick_pc(rec->evlist);
783 	if (pc)
784 		return pc;
785 	return NULL;
786 }
787 
788 static int record__synthesize(struct record *rec, bool tail)
789 {
790 	struct perf_session *session = rec->session;
791 	struct machine *machine = &session->machines.host;
792 	struct perf_data_file *file = &rec->file;
793 	struct record_opts *opts = &rec->opts;
794 	struct perf_tool *tool = &rec->tool;
795 	int fd = perf_data_file__fd(file);
796 	int err = 0;
797 
798 	if (rec->opts.tail_synthesize != tail)
799 		return 0;
800 
801 	if (file->is_pipe) {
802 		err = perf_event__synthesize_features(
803 			tool, session, rec->evlist, process_synthesized_event);
804 		if (err < 0) {
805 			pr_err("Couldn't synthesize features.\n");
806 			return err;
807 		}
808 
809 		err = perf_event__synthesize_attrs(tool, session,
810 						   process_synthesized_event);
811 		if (err < 0) {
812 			pr_err("Couldn't synthesize attrs.\n");
813 			goto out;
814 		}
815 
816 		if (have_tracepoints(&rec->evlist->entries)) {
817 			/*
818 			 * FIXME err <= 0 here actually means that
819 			 * there were no tracepoints so its not really
820 			 * an error, just that we don't need to
821 			 * synthesize anything.  We really have to
822 			 * return this more properly and also
823 			 * propagate errors that now are calling die()
824 			 */
825 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
826 								  process_synthesized_event);
827 			if (err <= 0) {
828 				pr_err("Couldn't record tracing data.\n");
829 				goto out;
830 			}
831 			rec->bytes_written += err;
832 		}
833 	}
834 
835 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
836 					  process_synthesized_event, machine);
837 	if (err)
838 		goto out;
839 
840 	if (rec->opts.full_auxtrace) {
841 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
842 					session, process_synthesized_event);
843 		if (err)
844 			goto out;
845 	}
846 
847 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
848 						 machine);
849 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
850 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
851 			   "Check /proc/kallsyms permission or run as root.\n");
852 
853 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
854 					     machine);
855 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
856 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
857 			   "Check /proc/modules permission or run as root.\n");
858 
859 	if (perf_guest) {
860 		machines__process_guests(&session->machines,
861 					 perf_event__synthesize_guest_os, tool);
862 	}
863 
864 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
865 					    process_synthesized_event, opts->sample_address,
866 					    opts->proc_map_timeout);
867 out:
868 	return err;
869 }
870 
871 static int __cmd_record(struct record *rec, int argc, const char **argv)
872 {
873 	int err;
874 	int status = 0;
875 	unsigned long waking = 0;
876 	const bool forks = argc > 0;
877 	struct machine *machine;
878 	struct perf_tool *tool = &rec->tool;
879 	struct record_opts *opts = &rec->opts;
880 	struct perf_data_file *file = &rec->file;
881 	struct perf_session *session;
882 	bool disabled = false, draining = false;
883 	int fd;
884 
885 	rec->progname = argv[0];
886 
887 	atexit(record__sig_exit);
888 	signal(SIGCHLD, sig_handler);
889 	signal(SIGINT, sig_handler);
890 	signal(SIGTERM, sig_handler);
891 	signal(SIGSEGV, sigsegv_handler);
892 
893 	if (rec->opts.record_namespaces)
894 		tool->namespace_events = true;
895 
896 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
897 		signal(SIGUSR2, snapshot_sig_handler);
898 		if (rec->opts.auxtrace_snapshot_mode)
899 			trigger_on(&auxtrace_snapshot_trigger);
900 		if (rec->switch_output.enabled)
901 			trigger_on(&switch_output_trigger);
902 	} else {
903 		signal(SIGUSR2, SIG_IGN);
904 	}
905 
906 	session = perf_session__new(file, false, tool);
907 	if (session == NULL) {
908 		pr_err("Perf session creation failed.\n");
909 		return -1;
910 	}
911 
912 	fd = perf_data_file__fd(file);
913 	rec->session = session;
914 
915 	record__init_features(rec);
916 
917 	if (forks) {
918 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
919 						    argv, file->is_pipe,
920 						    workload_exec_failed_signal);
921 		if (err < 0) {
922 			pr_err("Couldn't run the workload!\n");
923 			status = err;
924 			goto out_delete_session;
925 		}
926 	}
927 
928 	if (record__open(rec) != 0) {
929 		err = -1;
930 		goto out_child;
931 	}
932 
933 	err = bpf__apply_obj_config();
934 	if (err) {
935 		char errbuf[BUFSIZ];
936 
937 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
938 		pr_err("ERROR: Apply config to BPF failed: %s\n",
939 			 errbuf);
940 		goto out_child;
941 	}
942 
943 	/*
944 	 * Normally perf_session__new would do this, but it doesn't have the
945 	 * evlist.
946 	 */
947 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
948 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
949 		rec->tool.ordered_events = false;
950 	}
951 
952 	if (!rec->evlist->nr_groups)
953 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
954 
955 	if (file->is_pipe) {
956 		err = perf_header__write_pipe(fd);
957 		if (err < 0)
958 			goto out_child;
959 	} else {
960 		err = perf_session__write_header(session, rec->evlist, fd, false);
961 		if (err < 0)
962 			goto out_child;
963 	}
964 
965 	if (!rec->no_buildid
966 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
967 		pr_err("Couldn't generate buildids. "
968 		       "Use --no-buildid to profile anyway.\n");
969 		err = -1;
970 		goto out_child;
971 	}
972 
973 	machine = &session->machines.host;
974 
975 	err = record__synthesize(rec, false);
976 	if (err < 0)
977 		goto out_child;
978 
979 	if (rec->realtime_prio) {
980 		struct sched_param param;
981 
982 		param.sched_priority = rec->realtime_prio;
983 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
984 			pr_err("Could not set realtime priority.\n");
985 			err = -1;
986 			goto out_child;
987 		}
988 	}
989 
990 	/*
991 	 * When perf is starting the traced process, all the events
992 	 * (apart from group members) have enable_on_exec=1 set,
993 	 * so don't spoil it by prematurely enabling them.
994 	 */
995 	if (!target__none(&opts->target) && !opts->initial_delay)
996 		perf_evlist__enable(rec->evlist);
997 
998 	/*
999 	 * Let the child rip
1000 	 */
1001 	if (forks) {
1002 		union perf_event *event;
1003 		pid_t tgid;
1004 
1005 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1006 		if (event == NULL) {
1007 			err = -ENOMEM;
1008 			goto out_child;
1009 		}
1010 
1011 		/*
1012 		 * Some H/W events are generated before COMM event
1013 		 * which is emitted during exec(), so perf script
1014 		 * cannot see a correct process name for those events.
1015 		 * Synthesize COMM event to prevent it.
1016 		 */
1017 		tgid = perf_event__synthesize_comm(tool, event,
1018 						   rec->evlist->workload.pid,
1019 						   process_synthesized_event,
1020 						   machine);
1021 		free(event);
1022 
1023 		if (tgid == -1)
1024 			goto out_child;
1025 
1026 		event = malloc(sizeof(event->namespaces) +
1027 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1028 			       machine->id_hdr_size);
1029 		if (event == NULL) {
1030 			err = -ENOMEM;
1031 			goto out_child;
1032 		}
1033 
1034 		/*
1035 		 * Synthesize NAMESPACES event for the command specified.
1036 		 */
1037 		perf_event__synthesize_namespaces(tool, event,
1038 						  rec->evlist->workload.pid,
1039 						  tgid, process_synthesized_event,
1040 						  machine);
1041 		free(event);
1042 
1043 		perf_evlist__start_workload(rec->evlist);
1044 	}
1045 
1046 	if (opts->initial_delay) {
1047 		usleep(opts->initial_delay * USEC_PER_MSEC);
1048 		perf_evlist__enable(rec->evlist);
1049 	}
1050 
1051 	trigger_ready(&auxtrace_snapshot_trigger);
1052 	trigger_ready(&switch_output_trigger);
1053 	perf_hooks__invoke_record_start();
1054 	for (;;) {
1055 		unsigned long long hits = rec->samples;
1056 
1057 		/*
1058 		 * rec->evlist->bkw_mmap_state is possible to be
1059 		 * BKW_MMAP_EMPTY here: when done == true and
1060 		 * hits != rec->samples in previous round.
1061 		 *
1062 		 * perf_evlist__toggle_bkw_mmap ensure we never
1063 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1064 		 */
1065 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1066 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1067 
1068 		if (record__mmap_read_all(rec) < 0) {
1069 			trigger_error(&auxtrace_snapshot_trigger);
1070 			trigger_error(&switch_output_trigger);
1071 			err = -1;
1072 			goto out_child;
1073 		}
1074 
1075 		if (auxtrace_record__snapshot_started) {
1076 			auxtrace_record__snapshot_started = 0;
1077 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1078 				record__read_auxtrace_snapshot(rec);
1079 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1080 				pr_err("AUX area tracing snapshot failed\n");
1081 				err = -1;
1082 				goto out_child;
1083 			}
1084 		}
1085 
1086 		if (trigger_is_hit(&switch_output_trigger)) {
1087 			/*
1088 			 * If switch_output_trigger is hit, the data in
1089 			 * overwritable ring buffer should have been collected,
1090 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1091 			 *
1092 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1093 			 * record__mmap_read_all() didn't collect data from
1094 			 * overwritable ring buffer. Read again.
1095 			 */
1096 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1097 				continue;
1098 			trigger_ready(&switch_output_trigger);
1099 
1100 			/*
1101 			 * Reenable events in overwrite ring buffer after
1102 			 * record__mmap_read_all(): we should have collected
1103 			 * data from it.
1104 			 */
1105 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1106 
1107 			if (!quiet)
1108 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1109 					waking);
1110 			waking = 0;
1111 			fd = record__switch_output(rec, false);
1112 			if (fd < 0) {
1113 				pr_err("Failed to switch to new file\n");
1114 				trigger_error(&switch_output_trigger);
1115 				err = fd;
1116 				goto out_child;
1117 			}
1118 
1119 			/* re-arm the alarm */
1120 			if (rec->switch_output.time)
1121 				alarm(rec->switch_output.time);
1122 		}
1123 
1124 		if (hits == rec->samples) {
1125 			if (done || draining)
1126 				break;
1127 			err = perf_evlist__poll(rec->evlist, -1);
1128 			/*
1129 			 * Propagate error, only if there's any. Ignore positive
1130 			 * number of returned events and interrupt error.
1131 			 */
1132 			if (err > 0 || (err < 0 && errno == EINTR))
1133 				err = 0;
1134 			waking++;
1135 
1136 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1137 				draining = true;
1138 		}
1139 
1140 		/*
1141 		 * When perf is starting the traced process, at the end events
1142 		 * die with the process and we wait for that. Thus no need to
1143 		 * disable events in this case.
1144 		 */
1145 		if (done && !disabled && !target__none(&opts->target)) {
1146 			trigger_off(&auxtrace_snapshot_trigger);
1147 			perf_evlist__disable(rec->evlist);
1148 			disabled = true;
1149 		}
1150 	}
1151 	trigger_off(&auxtrace_snapshot_trigger);
1152 	trigger_off(&switch_output_trigger);
1153 
1154 	if (forks && workload_exec_errno) {
1155 		char msg[STRERR_BUFSIZE];
1156 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1157 		pr_err("Workload failed: %s\n", emsg);
1158 		err = -1;
1159 		goto out_child;
1160 	}
1161 
1162 	if (!quiet)
1163 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1164 
1165 	if (target__none(&rec->opts.target))
1166 		record__synthesize_workload(rec, true);
1167 
1168 out_child:
1169 	if (forks) {
1170 		int exit_status;
1171 
1172 		if (!child_finished)
1173 			kill(rec->evlist->workload.pid, SIGTERM);
1174 
1175 		wait(&exit_status);
1176 
1177 		if (err < 0)
1178 			status = err;
1179 		else if (WIFEXITED(exit_status))
1180 			status = WEXITSTATUS(exit_status);
1181 		else if (WIFSIGNALED(exit_status))
1182 			signr = WTERMSIG(exit_status);
1183 	} else
1184 		status = err;
1185 
1186 	record__synthesize(rec, true);
1187 	/* this will be recalculated during process_buildids() */
1188 	rec->samples = 0;
1189 
1190 	if (!err) {
1191 		if (!rec->timestamp_filename) {
1192 			record__finish_output(rec);
1193 		} else {
1194 			fd = record__switch_output(rec, true);
1195 			if (fd < 0) {
1196 				status = fd;
1197 				goto out_delete_session;
1198 			}
1199 		}
1200 	}
1201 
1202 	perf_hooks__invoke_record_end();
1203 
1204 	if (!err && !quiet) {
1205 		char samples[128];
1206 		const char *postfix = rec->timestamp_filename ?
1207 					".<timestamp>" : "";
1208 
1209 		if (rec->samples && !rec->opts.full_auxtrace)
1210 			scnprintf(samples, sizeof(samples),
1211 				  " (%" PRIu64 " samples)", rec->samples);
1212 		else
1213 			samples[0] = '\0';
1214 
1215 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1216 			perf_data_file__size(file) / 1024.0 / 1024.0,
1217 			file->path, postfix, samples);
1218 	}
1219 
1220 out_delete_session:
1221 	perf_session__delete(session);
1222 	return status;
1223 }
1224 
1225 static void callchain_debug(struct callchain_param *callchain)
1226 {
1227 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1228 
1229 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1230 
1231 	if (callchain->record_mode == CALLCHAIN_DWARF)
1232 		pr_debug("callchain: stack dump size %d\n",
1233 			 callchain->dump_size);
1234 }
1235 
1236 int record_opts__parse_callchain(struct record_opts *record,
1237 				 struct callchain_param *callchain,
1238 				 const char *arg, bool unset)
1239 {
1240 	int ret;
1241 	callchain->enabled = !unset;
1242 
1243 	/* --no-call-graph */
1244 	if (unset) {
1245 		callchain->record_mode = CALLCHAIN_NONE;
1246 		pr_debug("callchain: disabled\n");
1247 		return 0;
1248 	}
1249 
1250 	ret = parse_callchain_record_opt(arg, callchain);
1251 	if (!ret) {
1252 		/* Enable data address sampling for DWARF unwind. */
1253 		if (callchain->record_mode == CALLCHAIN_DWARF)
1254 			record->sample_address = true;
1255 		callchain_debug(callchain);
1256 	}
1257 
1258 	return ret;
1259 }
1260 
1261 int record_parse_callchain_opt(const struct option *opt,
1262 			       const char *arg,
1263 			       int unset)
1264 {
1265 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1266 }
1267 
1268 int record_callchain_opt(const struct option *opt,
1269 			 const char *arg __maybe_unused,
1270 			 int unset __maybe_unused)
1271 {
1272 	struct callchain_param *callchain = opt->value;
1273 
1274 	callchain->enabled = true;
1275 
1276 	if (callchain->record_mode == CALLCHAIN_NONE)
1277 		callchain->record_mode = CALLCHAIN_FP;
1278 
1279 	callchain_debug(callchain);
1280 	return 0;
1281 }
1282 
1283 static int perf_record_config(const char *var, const char *value, void *cb)
1284 {
1285 	struct record *rec = cb;
1286 
1287 	if (!strcmp(var, "record.build-id")) {
1288 		if (!strcmp(value, "cache"))
1289 			rec->no_buildid_cache = false;
1290 		else if (!strcmp(value, "no-cache"))
1291 			rec->no_buildid_cache = true;
1292 		else if (!strcmp(value, "skip"))
1293 			rec->no_buildid = true;
1294 		else
1295 			return -1;
1296 		return 0;
1297 	}
1298 	if (!strcmp(var, "record.call-graph"))
1299 		var = "call-graph.record-mode"; /* fall-through */
1300 
1301 	return perf_default_config(var, value, cb);
1302 }
1303 
1304 struct clockid_map {
1305 	const char *name;
1306 	int clockid;
1307 };
1308 
1309 #define CLOCKID_MAP(n, c)	\
1310 	{ .name = n, .clockid = (c), }
1311 
1312 #define CLOCKID_END	{ .name = NULL, }
1313 
1314 
1315 /*
1316  * Add the missing ones, we need to build on many distros...
1317  */
1318 #ifndef CLOCK_MONOTONIC_RAW
1319 #define CLOCK_MONOTONIC_RAW 4
1320 #endif
1321 #ifndef CLOCK_BOOTTIME
1322 #define CLOCK_BOOTTIME 7
1323 #endif
1324 #ifndef CLOCK_TAI
1325 #define CLOCK_TAI 11
1326 #endif
1327 
1328 static const struct clockid_map clockids[] = {
1329 	/* available for all events, NMI safe */
1330 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1331 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1332 
1333 	/* available for some events */
1334 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1335 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1336 	CLOCKID_MAP("tai", CLOCK_TAI),
1337 
1338 	/* available for the lazy */
1339 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1340 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1341 	CLOCKID_MAP("real", CLOCK_REALTIME),
1342 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1343 
1344 	CLOCKID_END,
1345 };
1346 
1347 static int parse_clockid(const struct option *opt, const char *str, int unset)
1348 {
1349 	struct record_opts *opts = (struct record_opts *)opt->value;
1350 	const struct clockid_map *cm;
1351 	const char *ostr = str;
1352 
1353 	if (unset) {
1354 		opts->use_clockid = 0;
1355 		return 0;
1356 	}
1357 
1358 	/* no arg passed */
1359 	if (!str)
1360 		return 0;
1361 
1362 	/* no setting it twice */
1363 	if (opts->use_clockid)
1364 		return -1;
1365 
1366 	opts->use_clockid = true;
1367 
1368 	/* if its a number, we're done */
1369 	if (sscanf(str, "%d", &opts->clockid) == 1)
1370 		return 0;
1371 
1372 	/* allow a "CLOCK_" prefix to the name */
1373 	if (!strncasecmp(str, "CLOCK_", 6))
1374 		str += 6;
1375 
1376 	for (cm = clockids; cm->name; cm++) {
1377 		if (!strcasecmp(str, cm->name)) {
1378 			opts->clockid = cm->clockid;
1379 			return 0;
1380 		}
1381 	}
1382 
1383 	opts->use_clockid = false;
1384 	ui__warning("unknown clockid %s, check man page\n", ostr);
1385 	return -1;
1386 }
1387 
1388 static int record__parse_mmap_pages(const struct option *opt,
1389 				    const char *str,
1390 				    int unset __maybe_unused)
1391 {
1392 	struct record_opts *opts = opt->value;
1393 	char *s, *p;
1394 	unsigned int mmap_pages;
1395 	int ret;
1396 
1397 	if (!str)
1398 		return -EINVAL;
1399 
1400 	s = strdup(str);
1401 	if (!s)
1402 		return -ENOMEM;
1403 
1404 	p = strchr(s, ',');
1405 	if (p)
1406 		*p = '\0';
1407 
1408 	if (*s) {
1409 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1410 		if (ret)
1411 			goto out_free;
1412 		opts->mmap_pages = mmap_pages;
1413 	}
1414 
1415 	if (!p) {
1416 		ret = 0;
1417 		goto out_free;
1418 	}
1419 
1420 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1421 	if (ret)
1422 		goto out_free;
1423 
1424 	opts->auxtrace_mmap_pages = mmap_pages;
1425 
1426 out_free:
1427 	free(s);
1428 	return ret;
1429 }
1430 
1431 static void switch_output_size_warn(struct record *rec)
1432 {
1433 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1434 	struct switch_output *s = &rec->switch_output;
1435 
1436 	wakeup_size /= 2;
1437 
1438 	if (s->size < wakeup_size) {
1439 		char buf[100];
1440 
1441 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1442 		pr_warning("WARNING: switch-output data size lower than "
1443 			   "wakeup kernel buffer size (%s) "
1444 			   "expect bigger perf.data sizes\n", buf);
1445 	}
1446 }
1447 
1448 static int switch_output_setup(struct record *rec)
1449 {
1450 	struct switch_output *s = &rec->switch_output;
1451 	static struct parse_tag tags_size[] = {
1452 		{ .tag  = 'B', .mult = 1       },
1453 		{ .tag  = 'K', .mult = 1 << 10 },
1454 		{ .tag  = 'M', .mult = 1 << 20 },
1455 		{ .tag  = 'G', .mult = 1 << 30 },
1456 		{ .tag  = 0 },
1457 	};
1458 	static struct parse_tag tags_time[] = {
1459 		{ .tag  = 's', .mult = 1        },
1460 		{ .tag  = 'm', .mult = 60       },
1461 		{ .tag  = 'h', .mult = 60*60    },
1462 		{ .tag  = 'd', .mult = 60*60*24 },
1463 		{ .tag  = 0 },
1464 	};
1465 	unsigned long val;
1466 
1467 	if (!s->set)
1468 		return 0;
1469 
1470 	if (!strcmp(s->str, "signal")) {
1471 		s->signal = true;
1472 		pr_debug("switch-output with SIGUSR2 signal\n");
1473 		goto enabled;
1474 	}
1475 
1476 	val = parse_tag_value(s->str, tags_size);
1477 	if (val != (unsigned long) -1) {
1478 		s->size = val;
1479 		pr_debug("switch-output with %s size threshold\n", s->str);
1480 		goto enabled;
1481 	}
1482 
1483 	val = parse_tag_value(s->str, tags_time);
1484 	if (val != (unsigned long) -1) {
1485 		s->time = val;
1486 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1487 			 s->str, s->time);
1488 		goto enabled;
1489 	}
1490 
1491 	return -1;
1492 
1493 enabled:
1494 	rec->timestamp_filename = true;
1495 	s->enabled              = true;
1496 
1497 	if (s->size && !rec->opts.no_buffering)
1498 		switch_output_size_warn(rec);
1499 
1500 	return 0;
1501 }
1502 
1503 static const char * const __record_usage[] = {
1504 	"perf record [<options>] [<command>]",
1505 	"perf record [<options>] -- <command> [<options>]",
1506 	NULL
1507 };
1508 const char * const *record_usage = __record_usage;
1509 
1510 /*
1511  * XXX Ideally would be local to cmd_record() and passed to a record__new
1512  * because we need to have access to it in record__exit, that is called
1513  * after cmd_record() exits, but since record_options need to be accessible to
1514  * builtin-script, leave it here.
1515  *
1516  * At least we don't ouch it in all the other functions here directly.
1517  *
1518  * Just say no to tons of global variables, sigh.
1519  */
1520 static struct record record = {
1521 	.opts = {
1522 		.sample_time	     = true,
1523 		.mmap_pages	     = UINT_MAX,
1524 		.user_freq	     = UINT_MAX,
1525 		.user_interval	     = ULLONG_MAX,
1526 		.freq		     = 4000,
1527 		.target		     = {
1528 			.uses_mmap   = true,
1529 			.default_per_cpu = true,
1530 		},
1531 		.proc_map_timeout     = 500,
1532 	},
1533 	.tool = {
1534 		.sample		= process_sample_event,
1535 		.fork		= perf_event__process_fork,
1536 		.exit		= perf_event__process_exit,
1537 		.comm		= perf_event__process_comm,
1538 		.namespaces	= perf_event__process_namespaces,
1539 		.mmap		= perf_event__process_mmap,
1540 		.mmap2		= perf_event__process_mmap2,
1541 		.ordered_events	= true,
1542 	},
1543 };
1544 
1545 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1546 	"\n\t\t\t\tDefault: fp";
1547 
1548 static bool dry_run;
1549 
1550 /*
1551  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1552  * with it and switch to use the library functions in perf_evlist that came
1553  * from builtin-record.c, i.e. use record_opts,
1554  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1555  * using pipes, etc.
1556  */
1557 static struct option __record_options[] = {
1558 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1559 		     "event selector. use 'perf list' to list available events",
1560 		     parse_events_option),
1561 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1562 		     "event filter", parse_filter),
1563 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1564 			   NULL, "don't record events from perf itself",
1565 			   exclude_perf),
1566 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1567 		    "record events on existing process id"),
1568 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1569 		    "record events on existing thread id"),
1570 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1571 		    "collect data with this RT SCHED_FIFO priority"),
1572 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1573 		    "collect data without buffering"),
1574 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1575 		    "collect raw sample records from all opened counters"),
1576 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1577 			    "system-wide collection from all CPUs"),
1578 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1579 		    "list of cpus to monitor"),
1580 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1581 	OPT_STRING('o', "output", &record.file.path, "file",
1582 		    "output file name"),
1583 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1584 			&record.opts.no_inherit_set,
1585 			"child tasks do not inherit counters"),
1586 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1587 		    "synthesize non-sample events at the end of output"),
1588 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1589 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1590 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1591 		     "number of mmap data pages and AUX area tracing mmap pages",
1592 		     record__parse_mmap_pages),
1593 	OPT_BOOLEAN(0, "group", &record.opts.group,
1594 		    "put the counters into a counter group"),
1595 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1596 			   NULL, "enables call-graph recording" ,
1597 			   &record_callchain_opt),
1598 	OPT_CALLBACK(0, "call-graph", &record.opts,
1599 		     "record_mode[,record_size]", record_callchain_help,
1600 		     &record_parse_callchain_opt),
1601 	OPT_INCR('v', "verbose", &verbose,
1602 		    "be more verbose (show counter open errors, etc)"),
1603 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1604 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1605 		    "per thread counts"),
1606 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1607 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1608 		    "Record the sample physical addresses"),
1609 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1610 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1611 			&record.opts.sample_time_set,
1612 			"Record the sample timestamps"),
1613 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1614 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1615 		    "don't sample"),
1616 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1617 			&record.no_buildid_cache_set,
1618 			"do not update the buildid cache"),
1619 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1620 			&record.no_buildid_set,
1621 			"do not collect buildids in perf.data"),
1622 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1623 		     "monitor event in cgroup name only",
1624 		     parse_cgroups),
1625 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1626 		  "ms to wait before starting measurement after program start"),
1627 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1628 		   "user to profile"),
1629 
1630 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1631 		     "branch any", "sample any taken branches",
1632 		     parse_branch_stack),
1633 
1634 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1635 		     "branch filter mask", "branch stack filter modes",
1636 		     parse_branch_stack),
1637 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1638 		    "sample by weight (on special events only)"),
1639 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1640 		    "sample transaction flags (special events only)"),
1641 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1642 		    "use per-thread mmaps"),
1643 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1644 		    "sample selected machine registers on interrupt,"
1645 		    " use -I ? to list register names", parse_regs),
1646 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1647 		    "Record running/enabled time of read (:S) events"),
1648 	OPT_CALLBACK('k', "clockid", &record.opts,
1649 	"clockid", "clockid to use for events, see clock_gettime()",
1650 	parse_clockid),
1651 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1652 			  "opts", "AUX area tracing Snapshot Mode", ""),
1653 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1654 			"per thread proc mmap processing timeout in ms"),
1655 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1656 		    "Record namespaces events"),
1657 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1658 		    "Record context switch events"),
1659 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1660 			 "Configure all used events to run in kernel space.",
1661 			 PARSE_OPT_EXCLUSIVE),
1662 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1663 			 "Configure all used events to run in user space.",
1664 			 PARSE_OPT_EXCLUSIVE),
1665 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1666 		   "clang binary to use for compiling BPF scriptlets"),
1667 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1668 		   "options passed to clang when compiling BPF scriptlets"),
1669 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1670 		   "file", "vmlinux pathname"),
1671 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1672 		    "Record build-id of all DSOs regardless of hits"),
1673 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1674 		    "append timestamp to output filename"),
1675 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1676 			  &record.switch_output.set, "signal,size,time",
1677 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1678 			  "signal"),
1679 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1680 		    "Parse options then exit"),
1681 	OPT_END()
1682 };
1683 
1684 struct option *record_options = __record_options;
1685 
1686 int cmd_record(int argc, const char **argv)
1687 {
1688 	int err;
1689 	struct record *rec = &record;
1690 	char errbuf[BUFSIZ];
1691 
1692 #ifndef HAVE_LIBBPF_SUPPORT
1693 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1694 	set_nobuild('\0', "clang-path", true);
1695 	set_nobuild('\0', "clang-opt", true);
1696 # undef set_nobuild
1697 #endif
1698 
1699 #ifndef HAVE_BPF_PROLOGUE
1700 # if !defined (HAVE_DWARF_SUPPORT)
1701 #  define REASON  "NO_DWARF=1"
1702 # elif !defined (HAVE_LIBBPF_SUPPORT)
1703 #  define REASON  "NO_LIBBPF=1"
1704 # else
1705 #  define REASON  "this architecture doesn't support BPF prologue"
1706 # endif
1707 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1708 	set_nobuild('\0', "vmlinux", true);
1709 # undef set_nobuild
1710 # undef REASON
1711 #endif
1712 
1713 	rec->evlist = perf_evlist__new();
1714 	if (rec->evlist == NULL)
1715 		return -ENOMEM;
1716 
1717 	err = perf_config(perf_record_config, rec);
1718 	if (err)
1719 		return err;
1720 
1721 	argc = parse_options(argc, argv, record_options, record_usage,
1722 			    PARSE_OPT_STOP_AT_NON_OPTION);
1723 	if (quiet)
1724 		perf_quiet_option();
1725 
1726 	/* Make system wide (-a) the default target. */
1727 	if (!argc && target__none(&rec->opts.target))
1728 		rec->opts.target.system_wide = true;
1729 
1730 	if (nr_cgroups && !rec->opts.target.system_wide) {
1731 		usage_with_options_msg(record_usage, record_options,
1732 			"cgroup monitoring only available in system-wide mode");
1733 
1734 	}
1735 	if (rec->opts.record_switch_events &&
1736 	    !perf_can_record_switch_events()) {
1737 		ui__error("kernel does not support recording context switch events\n");
1738 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1739 		return -EINVAL;
1740 	}
1741 
1742 	if (switch_output_setup(rec)) {
1743 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1744 		return -EINVAL;
1745 	}
1746 
1747 	if (rec->switch_output.time) {
1748 		signal(SIGALRM, alarm_sig_handler);
1749 		alarm(rec->switch_output.time);
1750 	}
1751 
1752 	if (!rec->itr) {
1753 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1754 		if (err)
1755 			goto out;
1756 	}
1757 
1758 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1759 					      rec->opts.auxtrace_snapshot_opts);
1760 	if (err)
1761 		goto out;
1762 
1763 	/*
1764 	 * Allow aliases to facilitate the lookup of symbols for address
1765 	 * filters. Refer to auxtrace_parse_filters().
1766 	 */
1767 	symbol_conf.allow_aliases = true;
1768 
1769 	symbol__init(NULL);
1770 
1771 	err = auxtrace_parse_filters(rec->evlist);
1772 	if (err)
1773 		goto out;
1774 
1775 	if (dry_run)
1776 		goto out;
1777 
1778 	err = bpf__setup_stdout(rec->evlist);
1779 	if (err) {
1780 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1781 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1782 			 errbuf);
1783 		goto out;
1784 	}
1785 
1786 	err = -ENOMEM;
1787 
1788 	if (symbol_conf.kptr_restrict)
1789 		pr_warning(
1790 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1791 "check /proc/sys/kernel/kptr_restrict.\n\n"
1792 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1793 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1794 "Samples in kernel modules won't be resolved at all.\n\n"
1795 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1796 "even with a suitable vmlinux or kallsyms file.\n\n");
1797 
1798 	if (rec->no_buildid_cache || rec->no_buildid) {
1799 		disable_buildid_cache();
1800 	} else if (rec->switch_output.enabled) {
1801 		/*
1802 		 * In 'perf record --switch-output', disable buildid
1803 		 * generation by default to reduce data file switching
1804 		 * overhead. Still generate buildid if they are required
1805 		 * explicitly using
1806 		 *
1807 		 *  perf record --switch-output --no-no-buildid \
1808 		 *              --no-no-buildid-cache
1809 		 *
1810 		 * Following code equals to:
1811 		 *
1812 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1813 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1814 		 *         disable_buildid_cache();
1815 		 */
1816 		bool disable = true;
1817 
1818 		if (rec->no_buildid_set && !rec->no_buildid)
1819 			disable = false;
1820 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1821 			disable = false;
1822 		if (disable) {
1823 			rec->no_buildid = true;
1824 			rec->no_buildid_cache = true;
1825 			disable_buildid_cache();
1826 		}
1827 	}
1828 
1829 	if (record.opts.overwrite)
1830 		record.opts.tail_synthesize = true;
1831 
1832 	if (rec->evlist->nr_entries == 0 &&
1833 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1834 		pr_err("Not enough memory for event selector list\n");
1835 		goto out;
1836 	}
1837 
1838 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1839 		rec->opts.no_inherit = true;
1840 
1841 	err = target__validate(&rec->opts.target);
1842 	if (err) {
1843 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1844 		ui__warning("%s", errbuf);
1845 	}
1846 
1847 	err = target__parse_uid(&rec->opts.target);
1848 	if (err) {
1849 		int saved_errno = errno;
1850 
1851 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1852 		ui__error("%s", errbuf);
1853 
1854 		err = -saved_errno;
1855 		goto out;
1856 	}
1857 
1858 	/* Enable ignoring missing threads when -u option is defined. */
1859 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1860 
1861 	err = -ENOMEM;
1862 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1863 		usage_with_options(record_usage, record_options);
1864 
1865 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1866 	if (err)
1867 		goto out;
1868 
1869 	/*
1870 	 * We take all buildids when the file contains
1871 	 * AUX area tracing data because we do not decode the
1872 	 * trace because it would take too long.
1873 	 */
1874 	if (rec->opts.full_auxtrace)
1875 		rec->buildid_all = true;
1876 
1877 	if (record_opts__config(&rec->opts)) {
1878 		err = -EINVAL;
1879 		goto out;
1880 	}
1881 
1882 	err = __cmd_record(&record, argc, argv);
1883 out:
1884 	perf_evlist__delete(rec->evlist);
1885 	symbol__exit();
1886 	auxtrace_record__free(rec->itr);
1887 	return err;
1888 }
1889 
1890 static void snapshot_sig_handler(int sig __maybe_unused)
1891 {
1892 	struct record *rec = &record;
1893 
1894 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1895 		trigger_hit(&auxtrace_snapshot_trigger);
1896 		auxtrace_record__snapshot_started = 1;
1897 		if (auxtrace_record__snapshot_start(record.itr))
1898 			trigger_error(&auxtrace_snapshot_trigger);
1899 	}
1900 
1901 	if (switch_output_signal(rec))
1902 		trigger_hit(&switch_output_trigger);
1903 }
1904 
1905 static void alarm_sig_handler(int sig __maybe_unused)
1906 {
1907 	struct record *rec = &record;
1908 
1909 	if (switch_output_time(rec))
1910 		trigger_hit(&switch_output_trigger);
1911 }
1912