xref: /openbmc/linux/tools/perf/builtin-record.c (revision e5c86679)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17 
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "asm/bug.h"
42 
43 #include <unistd.h>
44 #include <sched.h>
45 #include <sys/mman.h>
46 #include <asm/bug.h>
47 #include <linux/time64.h>
48 
49 struct switch_output {
50 	bool		 enabled;
51 	bool		 signal;
52 	unsigned long	 size;
53 	unsigned long	 time;
54 	const char	*str;
55 	bool		 set;
56 };
57 
58 struct record {
59 	struct perf_tool	tool;
60 	struct record_opts	opts;
61 	u64			bytes_written;
62 	struct perf_data_file	file;
63 	struct auxtrace_record	*itr;
64 	struct perf_evlist	*evlist;
65 	struct perf_session	*session;
66 	const char		*progname;
67 	int			realtime_prio;
68 	bool			no_buildid;
69 	bool			no_buildid_set;
70 	bool			no_buildid_cache;
71 	bool			no_buildid_cache_set;
72 	bool			buildid_all;
73 	bool			timestamp_filename;
74 	struct switch_output	switch_output;
75 	unsigned long long	samples;
76 };
77 
78 static volatile int auxtrace_record__snapshot_started;
79 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
80 static DEFINE_TRIGGER(switch_output_trigger);
81 
82 static bool switch_output_signal(struct record *rec)
83 {
84 	return rec->switch_output.signal &&
85 	       trigger_is_ready(&switch_output_trigger);
86 }
87 
88 static bool switch_output_size(struct record *rec)
89 {
90 	return rec->switch_output.size &&
91 	       trigger_is_ready(&switch_output_trigger) &&
92 	       (rec->bytes_written >= rec->switch_output.size);
93 }
94 
95 static bool switch_output_time(struct record *rec)
96 {
97 	return rec->switch_output.time &&
98 	       trigger_is_ready(&switch_output_trigger);
99 }
100 
101 static int record__write(struct record *rec, void *bf, size_t size)
102 {
103 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
104 		pr_err("failed to write perf data, error: %m\n");
105 		return -1;
106 	}
107 
108 	rec->bytes_written += size;
109 
110 	if (switch_output_size(rec))
111 		trigger_hit(&switch_output_trigger);
112 
113 	return 0;
114 }
115 
116 static int process_synthesized_event(struct perf_tool *tool,
117 				     union perf_event *event,
118 				     struct perf_sample *sample __maybe_unused,
119 				     struct machine *machine __maybe_unused)
120 {
121 	struct record *rec = container_of(tool, struct record, tool);
122 	return record__write(rec, event, event->header.size);
123 }
124 
125 static int
126 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
127 {
128 	struct perf_event_header *pheader;
129 	u64 evt_head = head;
130 	int size = mask + 1;
131 
132 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
133 	pheader = (struct perf_event_header *)(buf + (head & mask));
134 	*start = head;
135 	while (true) {
136 		if (evt_head - head >= (unsigned int)size) {
137 			pr_debug("Finished reading backward ring buffer: rewind\n");
138 			if (evt_head - head > (unsigned int)size)
139 				evt_head -= pheader->size;
140 			*end = evt_head;
141 			return 0;
142 		}
143 
144 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
145 
146 		if (pheader->size == 0) {
147 			pr_debug("Finished reading backward ring buffer: get start\n");
148 			*end = evt_head;
149 			return 0;
150 		}
151 
152 		evt_head += pheader->size;
153 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
154 	}
155 	WARN_ONCE(1, "Shouldn't get here\n");
156 	return -1;
157 }
158 
159 static int
160 rb_find_range(void *data, int mask, u64 head, u64 old,
161 	      u64 *start, u64 *end, bool backward)
162 {
163 	if (!backward) {
164 		*start = old;
165 		*end = head;
166 		return 0;
167 	}
168 
169 	return backward_rb_find_range(data, mask, head, start, end);
170 }
171 
172 static int
173 record__mmap_read(struct record *rec, struct perf_mmap *md,
174 		  bool overwrite, bool backward)
175 {
176 	u64 head = perf_mmap__read_head(md);
177 	u64 old = md->prev;
178 	u64 end = head, start = old;
179 	unsigned char *data = md->base + page_size;
180 	unsigned long size;
181 	void *buf;
182 	int rc = 0;
183 
184 	if (rb_find_range(data, md->mask, head,
185 			  old, &start, &end, backward))
186 		return -1;
187 
188 	if (start == end)
189 		return 0;
190 
191 	rec->samples++;
192 
193 	size = end - start;
194 	if (size > (unsigned long)(md->mask) + 1) {
195 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
196 
197 		md->prev = head;
198 		perf_mmap__consume(md, overwrite || backward);
199 		return 0;
200 	}
201 
202 	if ((start & md->mask) + size != (end & md->mask)) {
203 		buf = &data[start & md->mask];
204 		size = md->mask + 1 - (start & md->mask);
205 		start += size;
206 
207 		if (record__write(rec, buf, size) < 0) {
208 			rc = -1;
209 			goto out;
210 		}
211 	}
212 
213 	buf = &data[start & md->mask];
214 	size = end - start;
215 	start += size;
216 
217 	if (record__write(rec, buf, size) < 0) {
218 		rc = -1;
219 		goto out;
220 	}
221 
222 	md->prev = head;
223 	perf_mmap__consume(md, overwrite || backward);
224 out:
225 	return rc;
226 }
227 
228 static volatile int done;
229 static volatile int signr = -1;
230 static volatile int child_finished;
231 
232 static void sig_handler(int sig)
233 {
234 	if (sig == SIGCHLD)
235 		child_finished = 1;
236 	else
237 		signr = sig;
238 
239 	done = 1;
240 }
241 
242 static void sigsegv_handler(int sig)
243 {
244 	perf_hooks__recover();
245 	sighandler_dump_stack(sig);
246 }
247 
248 static void record__sig_exit(void)
249 {
250 	if (signr == -1)
251 		return;
252 
253 	signal(signr, SIG_DFL);
254 	raise(signr);
255 }
256 
257 #ifdef HAVE_AUXTRACE_SUPPORT
258 
259 static int record__process_auxtrace(struct perf_tool *tool,
260 				    union perf_event *event, void *data1,
261 				    size_t len1, void *data2, size_t len2)
262 {
263 	struct record *rec = container_of(tool, struct record, tool);
264 	struct perf_data_file *file = &rec->file;
265 	size_t padding;
266 	u8 pad[8] = {0};
267 
268 	if (!perf_data_file__is_pipe(file)) {
269 		off_t file_offset;
270 		int fd = perf_data_file__fd(file);
271 		int err;
272 
273 		file_offset = lseek(fd, 0, SEEK_CUR);
274 		if (file_offset == -1)
275 			return -1;
276 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
277 						     event, file_offset);
278 		if (err)
279 			return err;
280 	}
281 
282 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
283 	padding = (len1 + len2) & 7;
284 	if (padding)
285 		padding = 8 - padding;
286 
287 	record__write(rec, event, event->header.size);
288 	record__write(rec, data1, len1);
289 	if (len2)
290 		record__write(rec, data2, len2);
291 	record__write(rec, &pad, padding);
292 
293 	return 0;
294 }
295 
296 static int record__auxtrace_mmap_read(struct record *rec,
297 				      struct auxtrace_mmap *mm)
298 {
299 	int ret;
300 
301 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
302 				  record__process_auxtrace);
303 	if (ret < 0)
304 		return ret;
305 
306 	if (ret)
307 		rec->samples++;
308 
309 	return 0;
310 }
311 
312 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
313 					       struct auxtrace_mmap *mm)
314 {
315 	int ret;
316 
317 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
318 					   record__process_auxtrace,
319 					   rec->opts.auxtrace_snapshot_size);
320 	if (ret < 0)
321 		return ret;
322 
323 	if (ret)
324 		rec->samples++;
325 
326 	return 0;
327 }
328 
329 static int record__auxtrace_read_snapshot_all(struct record *rec)
330 {
331 	int i;
332 	int rc = 0;
333 
334 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
335 		struct auxtrace_mmap *mm =
336 				&rec->evlist->mmap[i].auxtrace_mmap;
337 
338 		if (!mm->base)
339 			continue;
340 
341 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
342 			rc = -1;
343 			goto out;
344 		}
345 	}
346 out:
347 	return rc;
348 }
349 
350 static void record__read_auxtrace_snapshot(struct record *rec)
351 {
352 	pr_debug("Recording AUX area tracing snapshot\n");
353 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
354 		trigger_error(&auxtrace_snapshot_trigger);
355 	} else {
356 		if (auxtrace_record__snapshot_finish(rec->itr))
357 			trigger_error(&auxtrace_snapshot_trigger);
358 		else
359 			trigger_ready(&auxtrace_snapshot_trigger);
360 	}
361 }
362 
363 #else
364 
365 static inline
366 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
367 			       struct auxtrace_mmap *mm __maybe_unused)
368 {
369 	return 0;
370 }
371 
372 static inline
373 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
374 {
375 }
376 
377 static inline
378 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
379 {
380 	return 0;
381 }
382 
383 #endif
384 
385 static int record__mmap_evlist(struct record *rec,
386 			       struct perf_evlist *evlist)
387 {
388 	struct record_opts *opts = &rec->opts;
389 	char msg[512];
390 
391 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
392 				 opts->auxtrace_mmap_pages,
393 				 opts->auxtrace_snapshot_mode) < 0) {
394 		if (errno == EPERM) {
395 			pr_err("Permission error mapping pages.\n"
396 			       "Consider increasing "
397 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
398 			       "or try again with a smaller value of -m/--mmap_pages.\n"
399 			       "(current value: %u,%u)\n",
400 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
401 			return -errno;
402 		} else {
403 			pr_err("failed to mmap with %d (%s)\n", errno,
404 				str_error_r(errno, msg, sizeof(msg)));
405 			if (errno)
406 				return -errno;
407 			else
408 				return -EINVAL;
409 		}
410 	}
411 	return 0;
412 }
413 
414 static int record__mmap(struct record *rec)
415 {
416 	return record__mmap_evlist(rec, rec->evlist);
417 }
418 
419 static int record__open(struct record *rec)
420 {
421 	char msg[BUFSIZ];
422 	struct perf_evsel *pos;
423 	struct perf_evlist *evlist = rec->evlist;
424 	struct perf_session *session = rec->session;
425 	struct record_opts *opts = &rec->opts;
426 	struct perf_evsel_config_term *err_term;
427 	int rc = 0;
428 
429 	perf_evlist__config(evlist, opts, &callchain_param);
430 
431 	evlist__for_each_entry(evlist, pos) {
432 try_again:
433 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
434 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
435 				if (verbose > 0)
436 					ui__warning("%s\n", msg);
437 				goto try_again;
438 			}
439 
440 			rc = -errno;
441 			perf_evsel__open_strerror(pos, &opts->target,
442 						  errno, msg, sizeof(msg));
443 			ui__error("%s\n", msg);
444 			goto out;
445 		}
446 	}
447 
448 	if (perf_evlist__apply_filters(evlist, &pos)) {
449 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
450 			pos->filter, perf_evsel__name(pos), errno,
451 			str_error_r(errno, msg, sizeof(msg)));
452 		rc = -1;
453 		goto out;
454 	}
455 
456 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
457 		error("failed to set config \"%s\" on event %s with %d (%s)\n",
458 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
459 		      str_error_r(errno, msg, sizeof(msg)));
460 		rc = -1;
461 		goto out;
462 	}
463 
464 	rc = record__mmap(rec);
465 	if (rc)
466 		goto out;
467 
468 	session->evlist = evlist;
469 	perf_session__set_id_hdr_size(session);
470 out:
471 	return rc;
472 }
473 
474 static int process_sample_event(struct perf_tool *tool,
475 				union perf_event *event,
476 				struct perf_sample *sample,
477 				struct perf_evsel *evsel,
478 				struct machine *machine)
479 {
480 	struct record *rec = container_of(tool, struct record, tool);
481 
482 	rec->samples++;
483 
484 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
485 }
486 
487 static int process_buildids(struct record *rec)
488 {
489 	struct perf_data_file *file  = &rec->file;
490 	struct perf_session *session = rec->session;
491 
492 	if (file->size == 0)
493 		return 0;
494 
495 	/*
496 	 * During this process, it'll load kernel map and replace the
497 	 * dso->long_name to a real pathname it found.  In this case
498 	 * we prefer the vmlinux path like
499 	 *   /lib/modules/3.16.4/build/vmlinux
500 	 *
501 	 * rather than build-id path (in debug directory).
502 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
503 	 */
504 	symbol_conf.ignore_vmlinux_buildid = true;
505 
506 	/*
507 	 * If --buildid-all is given, it marks all DSO regardless of hits,
508 	 * so no need to process samples.
509 	 */
510 	if (rec->buildid_all)
511 		rec->tool.sample = NULL;
512 
513 	return perf_session__process_events(session);
514 }
515 
516 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
517 {
518 	int err;
519 	struct perf_tool *tool = data;
520 	/*
521 	 *As for guest kernel when processing subcommand record&report,
522 	 *we arrange module mmap prior to guest kernel mmap and trigger
523 	 *a preload dso because default guest module symbols are loaded
524 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
525 	 *method is used to avoid symbol missing when the first addr is
526 	 *in module instead of in guest kernel.
527 	 */
528 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
529 					     machine);
530 	if (err < 0)
531 		pr_err("Couldn't record guest kernel [%d]'s reference"
532 		       " relocation symbol.\n", machine->pid);
533 
534 	/*
535 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
536 	 * have no _text sometimes.
537 	 */
538 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
539 						 machine);
540 	if (err < 0)
541 		pr_err("Couldn't record guest kernel [%d]'s reference"
542 		       " relocation symbol.\n", machine->pid);
543 }
544 
545 static struct perf_event_header finished_round_event = {
546 	.size = sizeof(struct perf_event_header),
547 	.type = PERF_RECORD_FINISHED_ROUND,
548 };
549 
550 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
551 				    bool backward)
552 {
553 	u64 bytes_written = rec->bytes_written;
554 	int i;
555 	int rc = 0;
556 	struct perf_mmap *maps;
557 
558 	if (!evlist)
559 		return 0;
560 
561 	maps = backward ? evlist->backward_mmap : evlist->mmap;
562 	if (!maps)
563 		return 0;
564 
565 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
566 		return 0;
567 
568 	for (i = 0; i < evlist->nr_mmaps; i++) {
569 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
570 
571 		if (maps[i].base) {
572 			if (record__mmap_read(rec, &maps[i],
573 					      evlist->overwrite, backward) != 0) {
574 				rc = -1;
575 				goto out;
576 			}
577 		}
578 
579 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
580 		    record__auxtrace_mmap_read(rec, mm) != 0) {
581 			rc = -1;
582 			goto out;
583 		}
584 	}
585 
586 	/*
587 	 * Mark the round finished in case we wrote
588 	 * at least one event.
589 	 */
590 	if (bytes_written != rec->bytes_written)
591 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
592 
593 	if (backward)
594 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
595 out:
596 	return rc;
597 }
598 
599 static int record__mmap_read_all(struct record *rec)
600 {
601 	int err;
602 
603 	err = record__mmap_read_evlist(rec, rec->evlist, false);
604 	if (err)
605 		return err;
606 
607 	return record__mmap_read_evlist(rec, rec->evlist, true);
608 }
609 
610 static void record__init_features(struct record *rec)
611 {
612 	struct perf_session *session = rec->session;
613 	int feat;
614 
615 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
616 		perf_header__set_feat(&session->header, feat);
617 
618 	if (rec->no_buildid)
619 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
620 
621 	if (!have_tracepoints(&rec->evlist->entries))
622 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
623 
624 	if (!rec->opts.branch_stack)
625 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
626 
627 	if (!rec->opts.full_auxtrace)
628 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
629 
630 	perf_header__clear_feat(&session->header, HEADER_STAT);
631 }
632 
633 static void
634 record__finish_output(struct record *rec)
635 {
636 	struct perf_data_file *file = &rec->file;
637 	int fd = perf_data_file__fd(file);
638 
639 	if (file->is_pipe)
640 		return;
641 
642 	rec->session->header.data_size += rec->bytes_written;
643 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
644 
645 	if (!rec->no_buildid) {
646 		process_buildids(rec);
647 
648 		if (rec->buildid_all)
649 			dsos__hit_all(rec->session);
650 	}
651 	perf_session__write_header(rec->session, rec->evlist, fd, true);
652 
653 	return;
654 }
655 
656 static int record__synthesize_workload(struct record *rec, bool tail)
657 {
658 	int err;
659 	struct thread_map *thread_map;
660 
661 	if (rec->opts.tail_synthesize != tail)
662 		return 0;
663 
664 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
665 	if (thread_map == NULL)
666 		return -1;
667 
668 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
669 						 process_synthesized_event,
670 						 &rec->session->machines.host,
671 						 rec->opts.sample_address,
672 						 rec->opts.proc_map_timeout);
673 	thread_map__put(thread_map);
674 	return err;
675 }
676 
677 static int record__synthesize(struct record *rec, bool tail);
678 
679 static int
680 record__switch_output(struct record *rec, bool at_exit)
681 {
682 	struct perf_data_file *file = &rec->file;
683 	int fd, err;
684 
685 	/* Same Size:      "2015122520103046"*/
686 	char timestamp[] = "InvalidTimestamp";
687 
688 	record__synthesize(rec, true);
689 	if (target__none(&rec->opts.target))
690 		record__synthesize_workload(rec, true);
691 
692 	rec->samples = 0;
693 	record__finish_output(rec);
694 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
695 	if (err) {
696 		pr_err("Failed to get current timestamp\n");
697 		return -EINVAL;
698 	}
699 
700 	fd = perf_data_file__switch(file, timestamp,
701 				    rec->session->header.data_offset,
702 				    at_exit);
703 	if (fd >= 0 && !at_exit) {
704 		rec->bytes_written = 0;
705 		rec->session->header.data_size = 0;
706 	}
707 
708 	if (!quiet)
709 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
710 			file->path, timestamp);
711 
712 	/* Output tracking events */
713 	if (!at_exit) {
714 		record__synthesize(rec, false);
715 
716 		/*
717 		 * In 'perf record --switch-output' without -a,
718 		 * record__synthesize() in record__switch_output() won't
719 		 * generate tracking events because there's no thread_map
720 		 * in evlist. Which causes newly created perf.data doesn't
721 		 * contain map and comm information.
722 		 * Create a fake thread_map and directly call
723 		 * perf_event__synthesize_thread_map() for those events.
724 		 */
725 		if (target__none(&rec->opts.target))
726 			record__synthesize_workload(rec, false);
727 	}
728 	return fd;
729 }
730 
731 static volatile int workload_exec_errno;
732 
733 /*
734  * perf_evlist__prepare_workload will send a SIGUSR1
735  * if the fork fails, since we asked by setting its
736  * want_signal to true.
737  */
738 static void workload_exec_failed_signal(int signo __maybe_unused,
739 					siginfo_t *info,
740 					void *ucontext __maybe_unused)
741 {
742 	workload_exec_errno = info->si_value.sival_int;
743 	done = 1;
744 	child_finished = 1;
745 }
746 
747 static void snapshot_sig_handler(int sig);
748 static void alarm_sig_handler(int sig);
749 
750 int __weak
751 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
752 			    struct perf_tool *tool __maybe_unused,
753 			    perf_event__handler_t process __maybe_unused,
754 			    struct machine *machine __maybe_unused)
755 {
756 	return 0;
757 }
758 
759 static const struct perf_event_mmap_page *
760 perf_evlist__pick_pc(struct perf_evlist *evlist)
761 {
762 	if (evlist) {
763 		if (evlist->mmap && evlist->mmap[0].base)
764 			return evlist->mmap[0].base;
765 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
766 			return evlist->backward_mmap[0].base;
767 	}
768 	return NULL;
769 }
770 
771 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
772 {
773 	const struct perf_event_mmap_page *pc;
774 
775 	pc = perf_evlist__pick_pc(rec->evlist);
776 	if (pc)
777 		return pc;
778 	return NULL;
779 }
780 
781 static int record__synthesize(struct record *rec, bool tail)
782 {
783 	struct perf_session *session = rec->session;
784 	struct machine *machine = &session->machines.host;
785 	struct perf_data_file *file = &rec->file;
786 	struct record_opts *opts = &rec->opts;
787 	struct perf_tool *tool = &rec->tool;
788 	int fd = perf_data_file__fd(file);
789 	int err = 0;
790 
791 	if (rec->opts.tail_synthesize != tail)
792 		return 0;
793 
794 	if (file->is_pipe) {
795 		err = perf_event__synthesize_attrs(tool, session,
796 						   process_synthesized_event);
797 		if (err < 0) {
798 			pr_err("Couldn't synthesize attrs.\n");
799 			goto out;
800 		}
801 
802 		if (have_tracepoints(&rec->evlist->entries)) {
803 			/*
804 			 * FIXME err <= 0 here actually means that
805 			 * there were no tracepoints so its not really
806 			 * an error, just that we don't need to
807 			 * synthesize anything.  We really have to
808 			 * return this more properly and also
809 			 * propagate errors that now are calling die()
810 			 */
811 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
812 								  process_synthesized_event);
813 			if (err <= 0) {
814 				pr_err("Couldn't record tracing data.\n");
815 				goto out;
816 			}
817 			rec->bytes_written += err;
818 		}
819 	}
820 
821 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
822 					  process_synthesized_event, machine);
823 	if (err)
824 		goto out;
825 
826 	if (rec->opts.full_auxtrace) {
827 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
828 					session, process_synthesized_event);
829 		if (err)
830 			goto out;
831 	}
832 
833 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
834 						 machine);
835 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
836 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
837 			   "Check /proc/kallsyms permission or run as root.\n");
838 
839 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
840 					     machine);
841 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
842 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
843 			   "Check /proc/modules permission or run as root.\n");
844 
845 	if (perf_guest) {
846 		machines__process_guests(&session->machines,
847 					 perf_event__synthesize_guest_os, tool);
848 	}
849 
850 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
851 					    process_synthesized_event, opts->sample_address,
852 					    opts->proc_map_timeout);
853 out:
854 	return err;
855 }
856 
857 static int __cmd_record(struct record *rec, int argc, const char **argv)
858 {
859 	int err;
860 	int status = 0;
861 	unsigned long waking = 0;
862 	const bool forks = argc > 0;
863 	struct machine *machine;
864 	struct perf_tool *tool = &rec->tool;
865 	struct record_opts *opts = &rec->opts;
866 	struct perf_data_file *file = &rec->file;
867 	struct perf_session *session;
868 	bool disabled = false, draining = false;
869 	int fd;
870 
871 	rec->progname = argv[0];
872 
873 	atexit(record__sig_exit);
874 	signal(SIGCHLD, sig_handler);
875 	signal(SIGINT, sig_handler);
876 	signal(SIGTERM, sig_handler);
877 	signal(SIGSEGV, sigsegv_handler);
878 
879 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
880 		signal(SIGUSR2, snapshot_sig_handler);
881 		if (rec->opts.auxtrace_snapshot_mode)
882 			trigger_on(&auxtrace_snapshot_trigger);
883 		if (rec->switch_output.enabled)
884 			trigger_on(&switch_output_trigger);
885 	} else {
886 		signal(SIGUSR2, SIG_IGN);
887 	}
888 
889 	session = perf_session__new(file, false, tool);
890 	if (session == NULL) {
891 		pr_err("Perf session creation failed.\n");
892 		return -1;
893 	}
894 
895 	fd = perf_data_file__fd(file);
896 	rec->session = session;
897 
898 	record__init_features(rec);
899 
900 	if (forks) {
901 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
902 						    argv, file->is_pipe,
903 						    workload_exec_failed_signal);
904 		if (err < 0) {
905 			pr_err("Couldn't run the workload!\n");
906 			status = err;
907 			goto out_delete_session;
908 		}
909 	}
910 
911 	if (record__open(rec) != 0) {
912 		err = -1;
913 		goto out_child;
914 	}
915 
916 	err = bpf__apply_obj_config();
917 	if (err) {
918 		char errbuf[BUFSIZ];
919 
920 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
921 		pr_err("ERROR: Apply config to BPF failed: %s\n",
922 			 errbuf);
923 		goto out_child;
924 	}
925 
926 	/*
927 	 * Normally perf_session__new would do this, but it doesn't have the
928 	 * evlist.
929 	 */
930 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
931 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
932 		rec->tool.ordered_events = false;
933 	}
934 
935 	if (!rec->evlist->nr_groups)
936 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
937 
938 	if (file->is_pipe) {
939 		err = perf_header__write_pipe(fd);
940 		if (err < 0)
941 			goto out_child;
942 	} else {
943 		err = perf_session__write_header(session, rec->evlist, fd, false);
944 		if (err < 0)
945 			goto out_child;
946 	}
947 
948 	if (!rec->no_buildid
949 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
950 		pr_err("Couldn't generate buildids. "
951 		       "Use --no-buildid to profile anyway.\n");
952 		err = -1;
953 		goto out_child;
954 	}
955 
956 	machine = &session->machines.host;
957 
958 	err = record__synthesize(rec, false);
959 	if (err < 0)
960 		goto out_child;
961 
962 	if (rec->realtime_prio) {
963 		struct sched_param param;
964 
965 		param.sched_priority = rec->realtime_prio;
966 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
967 			pr_err("Could not set realtime priority.\n");
968 			err = -1;
969 			goto out_child;
970 		}
971 	}
972 
973 	/*
974 	 * When perf is starting the traced process, all the events
975 	 * (apart from group members) have enable_on_exec=1 set,
976 	 * so don't spoil it by prematurely enabling them.
977 	 */
978 	if (!target__none(&opts->target) && !opts->initial_delay)
979 		perf_evlist__enable(rec->evlist);
980 
981 	/*
982 	 * Let the child rip
983 	 */
984 	if (forks) {
985 		union perf_event *event;
986 
987 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
988 		if (event == NULL) {
989 			err = -ENOMEM;
990 			goto out_child;
991 		}
992 
993 		/*
994 		 * Some H/W events are generated before COMM event
995 		 * which is emitted during exec(), so perf script
996 		 * cannot see a correct process name for those events.
997 		 * Synthesize COMM event to prevent it.
998 		 */
999 		perf_event__synthesize_comm(tool, event,
1000 					    rec->evlist->workload.pid,
1001 					    process_synthesized_event,
1002 					    machine);
1003 		free(event);
1004 
1005 		perf_evlist__start_workload(rec->evlist);
1006 	}
1007 
1008 	if (opts->initial_delay) {
1009 		usleep(opts->initial_delay * USEC_PER_MSEC);
1010 		perf_evlist__enable(rec->evlist);
1011 	}
1012 
1013 	trigger_ready(&auxtrace_snapshot_trigger);
1014 	trigger_ready(&switch_output_trigger);
1015 	perf_hooks__invoke_record_start();
1016 	for (;;) {
1017 		unsigned long long hits = rec->samples;
1018 
1019 		/*
1020 		 * rec->evlist->bkw_mmap_state is possible to be
1021 		 * BKW_MMAP_EMPTY here: when done == true and
1022 		 * hits != rec->samples in previous round.
1023 		 *
1024 		 * perf_evlist__toggle_bkw_mmap ensure we never
1025 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1026 		 */
1027 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1028 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1029 
1030 		if (record__mmap_read_all(rec) < 0) {
1031 			trigger_error(&auxtrace_snapshot_trigger);
1032 			trigger_error(&switch_output_trigger);
1033 			err = -1;
1034 			goto out_child;
1035 		}
1036 
1037 		if (auxtrace_record__snapshot_started) {
1038 			auxtrace_record__snapshot_started = 0;
1039 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1040 				record__read_auxtrace_snapshot(rec);
1041 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1042 				pr_err("AUX area tracing snapshot failed\n");
1043 				err = -1;
1044 				goto out_child;
1045 			}
1046 		}
1047 
1048 		if (trigger_is_hit(&switch_output_trigger)) {
1049 			/*
1050 			 * If switch_output_trigger is hit, the data in
1051 			 * overwritable ring buffer should have been collected,
1052 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1053 			 *
1054 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1055 			 * record__mmap_read_all() didn't collect data from
1056 			 * overwritable ring buffer. Read again.
1057 			 */
1058 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1059 				continue;
1060 			trigger_ready(&switch_output_trigger);
1061 
1062 			/*
1063 			 * Reenable events in overwrite ring buffer after
1064 			 * record__mmap_read_all(): we should have collected
1065 			 * data from it.
1066 			 */
1067 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1068 
1069 			if (!quiet)
1070 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1071 					waking);
1072 			waking = 0;
1073 			fd = record__switch_output(rec, false);
1074 			if (fd < 0) {
1075 				pr_err("Failed to switch to new file\n");
1076 				trigger_error(&switch_output_trigger);
1077 				err = fd;
1078 				goto out_child;
1079 			}
1080 
1081 			/* re-arm the alarm */
1082 			if (rec->switch_output.time)
1083 				alarm(rec->switch_output.time);
1084 		}
1085 
1086 		if (hits == rec->samples) {
1087 			if (done || draining)
1088 				break;
1089 			err = perf_evlist__poll(rec->evlist, -1);
1090 			/*
1091 			 * Propagate error, only if there's any. Ignore positive
1092 			 * number of returned events and interrupt error.
1093 			 */
1094 			if (err > 0 || (err < 0 && errno == EINTR))
1095 				err = 0;
1096 			waking++;
1097 
1098 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1099 				draining = true;
1100 		}
1101 
1102 		/*
1103 		 * When perf is starting the traced process, at the end events
1104 		 * die with the process and we wait for that. Thus no need to
1105 		 * disable events in this case.
1106 		 */
1107 		if (done && !disabled && !target__none(&opts->target)) {
1108 			trigger_off(&auxtrace_snapshot_trigger);
1109 			perf_evlist__disable(rec->evlist);
1110 			disabled = true;
1111 		}
1112 	}
1113 	trigger_off(&auxtrace_snapshot_trigger);
1114 	trigger_off(&switch_output_trigger);
1115 
1116 	if (forks && workload_exec_errno) {
1117 		char msg[STRERR_BUFSIZE];
1118 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1119 		pr_err("Workload failed: %s\n", emsg);
1120 		err = -1;
1121 		goto out_child;
1122 	}
1123 
1124 	if (!quiet)
1125 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1126 
1127 	if (target__none(&rec->opts.target))
1128 		record__synthesize_workload(rec, true);
1129 
1130 out_child:
1131 	if (forks) {
1132 		int exit_status;
1133 
1134 		if (!child_finished)
1135 			kill(rec->evlist->workload.pid, SIGTERM);
1136 
1137 		wait(&exit_status);
1138 
1139 		if (err < 0)
1140 			status = err;
1141 		else if (WIFEXITED(exit_status))
1142 			status = WEXITSTATUS(exit_status);
1143 		else if (WIFSIGNALED(exit_status))
1144 			signr = WTERMSIG(exit_status);
1145 	} else
1146 		status = err;
1147 
1148 	record__synthesize(rec, true);
1149 	/* this will be recalculated during process_buildids() */
1150 	rec->samples = 0;
1151 
1152 	if (!err) {
1153 		if (!rec->timestamp_filename) {
1154 			record__finish_output(rec);
1155 		} else {
1156 			fd = record__switch_output(rec, true);
1157 			if (fd < 0) {
1158 				status = fd;
1159 				goto out_delete_session;
1160 			}
1161 		}
1162 	}
1163 
1164 	perf_hooks__invoke_record_end();
1165 
1166 	if (!err && !quiet) {
1167 		char samples[128];
1168 		const char *postfix = rec->timestamp_filename ?
1169 					".<timestamp>" : "";
1170 
1171 		if (rec->samples && !rec->opts.full_auxtrace)
1172 			scnprintf(samples, sizeof(samples),
1173 				  " (%" PRIu64 " samples)", rec->samples);
1174 		else
1175 			samples[0] = '\0';
1176 
1177 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1178 			perf_data_file__size(file) / 1024.0 / 1024.0,
1179 			file->path, postfix, samples);
1180 	}
1181 
1182 out_delete_session:
1183 	perf_session__delete(session);
1184 	return status;
1185 }
1186 
1187 static void callchain_debug(struct callchain_param *callchain)
1188 {
1189 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1190 
1191 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1192 
1193 	if (callchain->record_mode == CALLCHAIN_DWARF)
1194 		pr_debug("callchain: stack dump size %d\n",
1195 			 callchain->dump_size);
1196 }
1197 
1198 int record_opts__parse_callchain(struct record_opts *record,
1199 				 struct callchain_param *callchain,
1200 				 const char *arg, bool unset)
1201 {
1202 	int ret;
1203 	callchain->enabled = !unset;
1204 
1205 	/* --no-call-graph */
1206 	if (unset) {
1207 		callchain->record_mode = CALLCHAIN_NONE;
1208 		pr_debug("callchain: disabled\n");
1209 		return 0;
1210 	}
1211 
1212 	ret = parse_callchain_record_opt(arg, callchain);
1213 	if (!ret) {
1214 		/* Enable data address sampling for DWARF unwind. */
1215 		if (callchain->record_mode == CALLCHAIN_DWARF)
1216 			record->sample_address = true;
1217 		callchain_debug(callchain);
1218 	}
1219 
1220 	return ret;
1221 }
1222 
1223 int record_parse_callchain_opt(const struct option *opt,
1224 			       const char *arg,
1225 			       int unset)
1226 {
1227 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1228 }
1229 
1230 int record_callchain_opt(const struct option *opt,
1231 			 const char *arg __maybe_unused,
1232 			 int unset __maybe_unused)
1233 {
1234 	struct callchain_param *callchain = opt->value;
1235 
1236 	callchain->enabled = true;
1237 
1238 	if (callchain->record_mode == CALLCHAIN_NONE)
1239 		callchain->record_mode = CALLCHAIN_FP;
1240 
1241 	callchain_debug(callchain);
1242 	return 0;
1243 }
1244 
1245 static int perf_record_config(const char *var, const char *value, void *cb)
1246 {
1247 	struct record *rec = cb;
1248 
1249 	if (!strcmp(var, "record.build-id")) {
1250 		if (!strcmp(value, "cache"))
1251 			rec->no_buildid_cache = false;
1252 		else if (!strcmp(value, "no-cache"))
1253 			rec->no_buildid_cache = true;
1254 		else if (!strcmp(value, "skip"))
1255 			rec->no_buildid = true;
1256 		else
1257 			return -1;
1258 		return 0;
1259 	}
1260 	if (!strcmp(var, "record.call-graph"))
1261 		var = "call-graph.record-mode"; /* fall-through */
1262 
1263 	return perf_default_config(var, value, cb);
1264 }
1265 
1266 struct clockid_map {
1267 	const char *name;
1268 	int clockid;
1269 };
1270 
1271 #define CLOCKID_MAP(n, c)	\
1272 	{ .name = n, .clockid = (c), }
1273 
1274 #define CLOCKID_END	{ .name = NULL, }
1275 
1276 
1277 /*
1278  * Add the missing ones, we need to build on many distros...
1279  */
1280 #ifndef CLOCK_MONOTONIC_RAW
1281 #define CLOCK_MONOTONIC_RAW 4
1282 #endif
1283 #ifndef CLOCK_BOOTTIME
1284 #define CLOCK_BOOTTIME 7
1285 #endif
1286 #ifndef CLOCK_TAI
1287 #define CLOCK_TAI 11
1288 #endif
1289 
1290 static const struct clockid_map clockids[] = {
1291 	/* available for all events, NMI safe */
1292 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1293 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1294 
1295 	/* available for some events */
1296 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1297 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1298 	CLOCKID_MAP("tai", CLOCK_TAI),
1299 
1300 	/* available for the lazy */
1301 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1302 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1303 	CLOCKID_MAP("real", CLOCK_REALTIME),
1304 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1305 
1306 	CLOCKID_END,
1307 };
1308 
1309 static int parse_clockid(const struct option *opt, const char *str, int unset)
1310 {
1311 	struct record_opts *opts = (struct record_opts *)opt->value;
1312 	const struct clockid_map *cm;
1313 	const char *ostr = str;
1314 
1315 	if (unset) {
1316 		opts->use_clockid = 0;
1317 		return 0;
1318 	}
1319 
1320 	/* no arg passed */
1321 	if (!str)
1322 		return 0;
1323 
1324 	/* no setting it twice */
1325 	if (opts->use_clockid)
1326 		return -1;
1327 
1328 	opts->use_clockid = true;
1329 
1330 	/* if its a number, we're done */
1331 	if (sscanf(str, "%d", &opts->clockid) == 1)
1332 		return 0;
1333 
1334 	/* allow a "CLOCK_" prefix to the name */
1335 	if (!strncasecmp(str, "CLOCK_", 6))
1336 		str += 6;
1337 
1338 	for (cm = clockids; cm->name; cm++) {
1339 		if (!strcasecmp(str, cm->name)) {
1340 			opts->clockid = cm->clockid;
1341 			return 0;
1342 		}
1343 	}
1344 
1345 	opts->use_clockid = false;
1346 	ui__warning("unknown clockid %s, check man page\n", ostr);
1347 	return -1;
1348 }
1349 
1350 static int record__parse_mmap_pages(const struct option *opt,
1351 				    const char *str,
1352 				    int unset __maybe_unused)
1353 {
1354 	struct record_opts *opts = opt->value;
1355 	char *s, *p;
1356 	unsigned int mmap_pages;
1357 	int ret;
1358 
1359 	if (!str)
1360 		return -EINVAL;
1361 
1362 	s = strdup(str);
1363 	if (!s)
1364 		return -ENOMEM;
1365 
1366 	p = strchr(s, ',');
1367 	if (p)
1368 		*p = '\0';
1369 
1370 	if (*s) {
1371 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1372 		if (ret)
1373 			goto out_free;
1374 		opts->mmap_pages = mmap_pages;
1375 	}
1376 
1377 	if (!p) {
1378 		ret = 0;
1379 		goto out_free;
1380 	}
1381 
1382 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1383 	if (ret)
1384 		goto out_free;
1385 
1386 	opts->auxtrace_mmap_pages = mmap_pages;
1387 
1388 out_free:
1389 	free(s);
1390 	return ret;
1391 }
1392 
1393 static void switch_output_size_warn(struct record *rec)
1394 {
1395 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1396 	struct switch_output *s = &rec->switch_output;
1397 
1398 	wakeup_size /= 2;
1399 
1400 	if (s->size < wakeup_size) {
1401 		char buf[100];
1402 
1403 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1404 		pr_warning("WARNING: switch-output data size lower than "
1405 			   "wakeup kernel buffer size (%s) "
1406 			   "expect bigger perf.data sizes\n", buf);
1407 	}
1408 }
1409 
1410 static int switch_output_setup(struct record *rec)
1411 {
1412 	struct switch_output *s = &rec->switch_output;
1413 	static struct parse_tag tags_size[] = {
1414 		{ .tag  = 'B', .mult = 1       },
1415 		{ .tag  = 'K', .mult = 1 << 10 },
1416 		{ .tag  = 'M', .mult = 1 << 20 },
1417 		{ .tag  = 'G', .mult = 1 << 30 },
1418 		{ .tag  = 0 },
1419 	};
1420 	static struct parse_tag tags_time[] = {
1421 		{ .tag  = 's', .mult = 1        },
1422 		{ .tag  = 'm', .mult = 60       },
1423 		{ .tag  = 'h', .mult = 60*60    },
1424 		{ .tag  = 'd', .mult = 60*60*24 },
1425 		{ .tag  = 0 },
1426 	};
1427 	unsigned long val;
1428 
1429 	if (!s->set)
1430 		return 0;
1431 
1432 	if (!strcmp(s->str, "signal")) {
1433 		s->signal = true;
1434 		pr_debug("switch-output with SIGUSR2 signal\n");
1435 		goto enabled;
1436 	}
1437 
1438 	val = parse_tag_value(s->str, tags_size);
1439 	if (val != (unsigned long) -1) {
1440 		s->size = val;
1441 		pr_debug("switch-output with %s size threshold\n", s->str);
1442 		goto enabled;
1443 	}
1444 
1445 	val = parse_tag_value(s->str, tags_time);
1446 	if (val != (unsigned long) -1) {
1447 		s->time = val;
1448 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1449 			 s->str, s->time);
1450 		goto enabled;
1451 	}
1452 
1453 	return -1;
1454 
1455 enabled:
1456 	rec->timestamp_filename = true;
1457 	s->enabled              = true;
1458 
1459 	if (s->size && !rec->opts.no_buffering)
1460 		switch_output_size_warn(rec);
1461 
1462 	return 0;
1463 }
1464 
1465 static const char * const __record_usage[] = {
1466 	"perf record [<options>] [<command>]",
1467 	"perf record [<options>] -- <command> [<options>]",
1468 	NULL
1469 };
1470 const char * const *record_usage = __record_usage;
1471 
1472 /*
1473  * XXX Ideally would be local to cmd_record() and passed to a record__new
1474  * because we need to have access to it in record__exit, that is called
1475  * after cmd_record() exits, but since record_options need to be accessible to
1476  * builtin-script, leave it here.
1477  *
1478  * At least we don't ouch it in all the other functions here directly.
1479  *
1480  * Just say no to tons of global variables, sigh.
1481  */
1482 static struct record record = {
1483 	.opts = {
1484 		.sample_time	     = true,
1485 		.mmap_pages	     = UINT_MAX,
1486 		.user_freq	     = UINT_MAX,
1487 		.user_interval	     = ULLONG_MAX,
1488 		.freq		     = 4000,
1489 		.target		     = {
1490 			.uses_mmap   = true,
1491 			.default_per_cpu = true,
1492 		},
1493 		.proc_map_timeout     = 500,
1494 	},
1495 	.tool = {
1496 		.sample		= process_sample_event,
1497 		.fork		= perf_event__process_fork,
1498 		.exit		= perf_event__process_exit,
1499 		.comm		= perf_event__process_comm,
1500 		.mmap		= perf_event__process_mmap,
1501 		.mmap2		= perf_event__process_mmap2,
1502 		.ordered_events	= true,
1503 	},
1504 };
1505 
1506 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1507 	"\n\t\t\t\tDefault: fp";
1508 
1509 static bool dry_run;
1510 
1511 /*
1512  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1513  * with it and switch to use the library functions in perf_evlist that came
1514  * from builtin-record.c, i.e. use record_opts,
1515  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1516  * using pipes, etc.
1517  */
1518 static struct option __record_options[] = {
1519 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1520 		     "event selector. use 'perf list' to list available events",
1521 		     parse_events_option),
1522 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1523 		     "event filter", parse_filter),
1524 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1525 			   NULL, "don't record events from perf itself",
1526 			   exclude_perf),
1527 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1528 		    "record events on existing process id"),
1529 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1530 		    "record events on existing thread id"),
1531 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1532 		    "collect data with this RT SCHED_FIFO priority"),
1533 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1534 		    "collect data without buffering"),
1535 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1536 		    "collect raw sample records from all opened counters"),
1537 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1538 			    "system-wide collection from all CPUs"),
1539 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1540 		    "list of cpus to monitor"),
1541 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1542 	OPT_STRING('o', "output", &record.file.path, "file",
1543 		    "output file name"),
1544 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1545 			&record.opts.no_inherit_set,
1546 			"child tasks do not inherit counters"),
1547 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1548 		    "synthesize non-sample events at the end of output"),
1549 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1550 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1551 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1552 		     "number of mmap data pages and AUX area tracing mmap pages",
1553 		     record__parse_mmap_pages),
1554 	OPT_BOOLEAN(0, "group", &record.opts.group,
1555 		    "put the counters into a counter group"),
1556 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1557 			   NULL, "enables call-graph recording" ,
1558 			   &record_callchain_opt),
1559 	OPT_CALLBACK(0, "call-graph", &record.opts,
1560 		     "record_mode[,record_size]", record_callchain_help,
1561 		     &record_parse_callchain_opt),
1562 	OPT_INCR('v', "verbose", &verbose,
1563 		    "be more verbose (show counter open errors, etc)"),
1564 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1565 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1566 		    "per thread counts"),
1567 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1568 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1569 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1570 			&record.opts.sample_time_set,
1571 			"Record the sample timestamps"),
1572 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1573 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1574 		    "don't sample"),
1575 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1576 			&record.no_buildid_cache_set,
1577 			"do not update the buildid cache"),
1578 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1579 			&record.no_buildid_set,
1580 			"do not collect buildids in perf.data"),
1581 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1582 		     "monitor event in cgroup name only",
1583 		     parse_cgroups),
1584 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1585 		  "ms to wait before starting measurement after program start"),
1586 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1587 		   "user to profile"),
1588 
1589 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1590 		     "branch any", "sample any taken branches",
1591 		     parse_branch_stack),
1592 
1593 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1594 		     "branch filter mask", "branch stack filter modes",
1595 		     parse_branch_stack),
1596 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1597 		    "sample by weight (on special events only)"),
1598 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1599 		    "sample transaction flags (special events only)"),
1600 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1601 		    "use per-thread mmaps"),
1602 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1603 		    "sample selected machine registers on interrupt,"
1604 		    " use -I ? to list register names", parse_regs),
1605 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1606 		    "Record running/enabled time of read (:S) events"),
1607 	OPT_CALLBACK('k', "clockid", &record.opts,
1608 	"clockid", "clockid to use for events, see clock_gettime()",
1609 	parse_clockid),
1610 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1611 			  "opts", "AUX area tracing Snapshot Mode", ""),
1612 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1613 			"per thread proc mmap processing timeout in ms"),
1614 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1615 		    "Record context switch events"),
1616 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1617 			 "Configure all used events to run in kernel space.",
1618 			 PARSE_OPT_EXCLUSIVE),
1619 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1620 			 "Configure all used events to run in user space.",
1621 			 PARSE_OPT_EXCLUSIVE),
1622 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1623 		   "clang binary to use for compiling BPF scriptlets"),
1624 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1625 		   "options passed to clang when compiling BPF scriptlets"),
1626 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1627 		   "file", "vmlinux pathname"),
1628 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1629 		    "Record build-id of all DSOs regardless of hits"),
1630 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1631 		    "append timestamp to output filename"),
1632 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1633 			  &record.switch_output.set, "signal,size,time",
1634 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1635 			  "signal"),
1636 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1637 		    "Parse options then exit"),
1638 	OPT_END()
1639 };
1640 
1641 struct option *record_options = __record_options;
1642 
1643 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1644 {
1645 	int err;
1646 	struct record *rec = &record;
1647 	char errbuf[BUFSIZ];
1648 
1649 #ifndef HAVE_LIBBPF_SUPPORT
1650 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1651 	set_nobuild('\0', "clang-path", true);
1652 	set_nobuild('\0', "clang-opt", true);
1653 # undef set_nobuild
1654 #endif
1655 
1656 #ifndef HAVE_BPF_PROLOGUE
1657 # if !defined (HAVE_DWARF_SUPPORT)
1658 #  define REASON  "NO_DWARF=1"
1659 # elif !defined (HAVE_LIBBPF_SUPPORT)
1660 #  define REASON  "NO_LIBBPF=1"
1661 # else
1662 #  define REASON  "this architecture doesn't support BPF prologue"
1663 # endif
1664 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1665 	set_nobuild('\0', "vmlinux", true);
1666 # undef set_nobuild
1667 # undef REASON
1668 #endif
1669 
1670 	rec->evlist = perf_evlist__new();
1671 	if (rec->evlist == NULL)
1672 		return -ENOMEM;
1673 
1674 	err = perf_config(perf_record_config, rec);
1675 	if (err)
1676 		return err;
1677 
1678 	argc = parse_options(argc, argv, record_options, record_usage,
1679 			    PARSE_OPT_STOP_AT_NON_OPTION);
1680 	if (quiet)
1681 		perf_quiet_option();
1682 
1683 	/* Make system wide (-a) the default target. */
1684 	if (!argc && target__none(&rec->opts.target))
1685 		rec->opts.target.system_wide = true;
1686 
1687 	if (nr_cgroups && !rec->opts.target.system_wide) {
1688 		usage_with_options_msg(record_usage, record_options,
1689 			"cgroup monitoring only available in system-wide mode");
1690 
1691 	}
1692 	if (rec->opts.record_switch_events &&
1693 	    !perf_can_record_switch_events()) {
1694 		ui__error("kernel does not support recording context switch events\n");
1695 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1696 		return -EINVAL;
1697 	}
1698 
1699 	if (switch_output_setup(rec)) {
1700 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1701 		return -EINVAL;
1702 	}
1703 
1704 	if (rec->switch_output.time) {
1705 		signal(SIGALRM, alarm_sig_handler);
1706 		alarm(rec->switch_output.time);
1707 	}
1708 
1709 	if (!rec->itr) {
1710 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1711 		if (err)
1712 			goto out;
1713 	}
1714 
1715 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1716 					      rec->opts.auxtrace_snapshot_opts);
1717 	if (err)
1718 		goto out;
1719 
1720 	/*
1721 	 * Allow aliases to facilitate the lookup of symbols for address
1722 	 * filters. Refer to auxtrace_parse_filters().
1723 	 */
1724 	symbol_conf.allow_aliases = true;
1725 
1726 	symbol__init(NULL);
1727 
1728 	err = auxtrace_parse_filters(rec->evlist);
1729 	if (err)
1730 		goto out;
1731 
1732 	if (dry_run)
1733 		goto out;
1734 
1735 	err = bpf__setup_stdout(rec->evlist);
1736 	if (err) {
1737 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1738 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1739 			 errbuf);
1740 		goto out;
1741 	}
1742 
1743 	err = -ENOMEM;
1744 
1745 	if (symbol_conf.kptr_restrict)
1746 		pr_warning(
1747 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1748 "check /proc/sys/kernel/kptr_restrict.\n\n"
1749 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1750 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1751 "Samples in kernel modules won't be resolved at all.\n\n"
1752 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1753 "even with a suitable vmlinux or kallsyms file.\n\n");
1754 
1755 	if (rec->no_buildid_cache || rec->no_buildid) {
1756 		disable_buildid_cache();
1757 	} else if (rec->switch_output.enabled) {
1758 		/*
1759 		 * In 'perf record --switch-output', disable buildid
1760 		 * generation by default to reduce data file switching
1761 		 * overhead. Still generate buildid if they are required
1762 		 * explicitly using
1763 		 *
1764 		 *  perf record --switch-output --no-no-buildid \
1765 		 *              --no-no-buildid-cache
1766 		 *
1767 		 * Following code equals to:
1768 		 *
1769 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1770 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1771 		 *         disable_buildid_cache();
1772 		 */
1773 		bool disable = true;
1774 
1775 		if (rec->no_buildid_set && !rec->no_buildid)
1776 			disable = false;
1777 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1778 			disable = false;
1779 		if (disable) {
1780 			rec->no_buildid = true;
1781 			rec->no_buildid_cache = true;
1782 			disable_buildid_cache();
1783 		}
1784 	}
1785 
1786 	if (record.opts.overwrite)
1787 		record.opts.tail_synthesize = true;
1788 
1789 	if (rec->evlist->nr_entries == 0 &&
1790 	    perf_evlist__add_default(rec->evlist) < 0) {
1791 		pr_err("Not enough memory for event selector list\n");
1792 		goto out;
1793 	}
1794 
1795 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1796 		rec->opts.no_inherit = true;
1797 
1798 	err = target__validate(&rec->opts.target);
1799 	if (err) {
1800 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1801 		ui__warning("%s", errbuf);
1802 	}
1803 
1804 	err = target__parse_uid(&rec->opts.target);
1805 	if (err) {
1806 		int saved_errno = errno;
1807 
1808 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1809 		ui__error("%s", errbuf);
1810 
1811 		err = -saved_errno;
1812 		goto out;
1813 	}
1814 
1815 	/* Enable ignoring missing threads when -u option is defined. */
1816 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1817 
1818 	err = -ENOMEM;
1819 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1820 		usage_with_options(record_usage, record_options);
1821 
1822 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1823 	if (err)
1824 		goto out;
1825 
1826 	/*
1827 	 * We take all buildids when the file contains
1828 	 * AUX area tracing data because we do not decode the
1829 	 * trace because it would take too long.
1830 	 */
1831 	if (rec->opts.full_auxtrace)
1832 		rec->buildid_all = true;
1833 
1834 	if (record_opts__config(&rec->opts)) {
1835 		err = -EINVAL;
1836 		goto out;
1837 	}
1838 
1839 	err = __cmd_record(&record, argc, argv);
1840 out:
1841 	perf_evlist__delete(rec->evlist);
1842 	symbol__exit();
1843 	auxtrace_record__free(rec->itr);
1844 	return err;
1845 }
1846 
1847 static void snapshot_sig_handler(int sig __maybe_unused)
1848 {
1849 	struct record *rec = &record;
1850 
1851 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1852 		trigger_hit(&auxtrace_snapshot_trigger);
1853 		auxtrace_record__snapshot_started = 1;
1854 		if (auxtrace_record__snapshot_start(record.itr))
1855 			trigger_error(&auxtrace_snapshot_trigger);
1856 	}
1857 
1858 	if (switch_output_signal(rec))
1859 		trigger_hit(&switch_output_trigger);
1860 }
1861 
1862 static void alarm_sig_handler(int sig __maybe_unused)
1863 {
1864 	struct record *rec = &record;
1865 
1866 	if (switch_output_time(rec))
1867 		trigger_hit(&switch_output_trigger);
1868 }
1869