xref: /openbmc/linux/tools/perf/builtin-record.c (revision 1491eaf9)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17 
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/cpumap.h"
29 #include "util/thread_map.h"
30 #include "util/data.h"
31 #include "util/perf_regs.h"
32 #include "util/auxtrace.h"
33 #include "util/tsc.h"
34 #include "util/parse-branch-options.h"
35 #include "util/parse-regs-options.h"
36 #include "util/llvm-utils.h"
37 #include "util/bpf-loader.h"
38 #include "util/trigger.h"
39 #include "asm/bug.h"
40 
41 #include <unistd.h>
42 #include <sched.h>
43 #include <sys/mman.h>
44 #include <asm/bug.h>
45 
46 
47 struct record {
48 	struct perf_tool	tool;
49 	struct record_opts	opts;
50 	u64			bytes_written;
51 	struct perf_data_file	file;
52 	struct auxtrace_record	*itr;
53 	struct perf_evlist	*evlist;
54 	struct perf_session	*session;
55 	const char		*progname;
56 	int			realtime_prio;
57 	bool			no_buildid;
58 	bool			no_buildid_set;
59 	bool			no_buildid_cache;
60 	bool			no_buildid_cache_set;
61 	bool			buildid_all;
62 	bool			timestamp_filename;
63 	bool			switch_output;
64 	unsigned long long	samples;
65 };
66 
67 static int record__write(struct record *rec, void *bf, size_t size)
68 {
69 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
70 		pr_err("failed to write perf data, error: %m\n");
71 		return -1;
72 	}
73 
74 	rec->bytes_written += size;
75 	return 0;
76 }
77 
78 static int process_synthesized_event(struct perf_tool *tool,
79 				     union perf_event *event,
80 				     struct perf_sample *sample __maybe_unused,
81 				     struct machine *machine __maybe_unused)
82 {
83 	struct record *rec = container_of(tool, struct record, tool);
84 	return record__write(rec, event, event->header.size);
85 }
86 
87 static int
88 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
89 {
90 	struct perf_event_header *pheader;
91 	u64 evt_head = head;
92 	int size = mask + 1;
93 
94 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
95 	pheader = (struct perf_event_header *)(buf + (head & mask));
96 	*start = head;
97 	while (true) {
98 		if (evt_head - head >= (unsigned int)size) {
99 			pr_debug("Finshed reading backward ring buffer: rewind\n");
100 			if (evt_head - head > (unsigned int)size)
101 				evt_head -= pheader->size;
102 			*end = evt_head;
103 			return 0;
104 		}
105 
106 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
107 
108 		if (pheader->size == 0) {
109 			pr_debug("Finshed reading backward ring buffer: get start\n");
110 			*end = evt_head;
111 			return 0;
112 		}
113 
114 		evt_head += pheader->size;
115 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
116 	}
117 	WARN_ONCE(1, "Shouldn't get here\n");
118 	return -1;
119 }
120 
121 static int
122 rb_find_range(void *data, int mask, u64 head, u64 old,
123 	      u64 *start, u64 *end, bool backward)
124 {
125 	if (!backward) {
126 		*start = old;
127 		*end = head;
128 		return 0;
129 	}
130 
131 	return backward_rb_find_range(data, mask, head, start, end);
132 }
133 
134 static int
135 record__mmap_read(struct record *rec, struct perf_mmap *md,
136 		  bool overwrite, bool backward)
137 {
138 	u64 head = perf_mmap__read_head(md);
139 	u64 old = md->prev;
140 	u64 end = head, start = old;
141 	unsigned char *data = md->base + page_size;
142 	unsigned long size;
143 	void *buf;
144 	int rc = 0;
145 
146 	if (rb_find_range(data, md->mask, head,
147 			  old, &start, &end, backward))
148 		return -1;
149 
150 	if (start == end)
151 		return 0;
152 
153 	rec->samples++;
154 
155 	size = end - start;
156 	if (size > (unsigned long)(md->mask) + 1) {
157 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
158 
159 		md->prev = head;
160 		perf_mmap__consume(md, overwrite || backward);
161 		return 0;
162 	}
163 
164 	if ((start & md->mask) + size != (end & md->mask)) {
165 		buf = &data[start & md->mask];
166 		size = md->mask + 1 - (start & md->mask);
167 		start += size;
168 
169 		if (record__write(rec, buf, size) < 0) {
170 			rc = -1;
171 			goto out;
172 		}
173 	}
174 
175 	buf = &data[start & md->mask];
176 	size = end - start;
177 	start += size;
178 
179 	if (record__write(rec, buf, size) < 0) {
180 		rc = -1;
181 		goto out;
182 	}
183 
184 	md->prev = head;
185 	perf_mmap__consume(md, overwrite || backward);
186 out:
187 	return rc;
188 }
189 
190 static volatile int done;
191 static volatile int signr = -1;
192 static volatile int child_finished;
193 
194 static volatile int auxtrace_record__snapshot_started;
195 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
196 static DEFINE_TRIGGER(switch_output_trigger);
197 
198 static void sig_handler(int sig)
199 {
200 	if (sig == SIGCHLD)
201 		child_finished = 1;
202 	else
203 		signr = sig;
204 
205 	done = 1;
206 }
207 
208 static void record__sig_exit(void)
209 {
210 	if (signr == -1)
211 		return;
212 
213 	signal(signr, SIG_DFL);
214 	raise(signr);
215 }
216 
217 #ifdef HAVE_AUXTRACE_SUPPORT
218 
219 static int record__process_auxtrace(struct perf_tool *tool,
220 				    union perf_event *event, void *data1,
221 				    size_t len1, void *data2, size_t len2)
222 {
223 	struct record *rec = container_of(tool, struct record, tool);
224 	struct perf_data_file *file = &rec->file;
225 	size_t padding;
226 	u8 pad[8] = {0};
227 
228 	if (!perf_data_file__is_pipe(file)) {
229 		off_t file_offset;
230 		int fd = perf_data_file__fd(file);
231 		int err;
232 
233 		file_offset = lseek(fd, 0, SEEK_CUR);
234 		if (file_offset == -1)
235 			return -1;
236 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
237 						     event, file_offset);
238 		if (err)
239 			return err;
240 	}
241 
242 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
243 	padding = (len1 + len2) & 7;
244 	if (padding)
245 		padding = 8 - padding;
246 
247 	record__write(rec, event, event->header.size);
248 	record__write(rec, data1, len1);
249 	if (len2)
250 		record__write(rec, data2, len2);
251 	record__write(rec, &pad, padding);
252 
253 	return 0;
254 }
255 
256 static int record__auxtrace_mmap_read(struct record *rec,
257 				      struct auxtrace_mmap *mm)
258 {
259 	int ret;
260 
261 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
262 				  record__process_auxtrace);
263 	if (ret < 0)
264 		return ret;
265 
266 	if (ret)
267 		rec->samples++;
268 
269 	return 0;
270 }
271 
272 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
273 					       struct auxtrace_mmap *mm)
274 {
275 	int ret;
276 
277 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
278 					   record__process_auxtrace,
279 					   rec->opts.auxtrace_snapshot_size);
280 	if (ret < 0)
281 		return ret;
282 
283 	if (ret)
284 		rec->samples++;
285 
286 	return 0;
287 }
288 
289 static int record__auxtrace_read_snapshot_all(struct record *rec)
290 {
291 	int i;
292 	int rc = 0;
293 
294 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
295 		struct auxtrace_mmap *mm =
296 				&rec->evlist->mmap[i].auxtrace_mmap;
297 
298 		if (!mm->base)
299 			continue;
300 
301 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
302 			rc = -1;
303 			goto out;
304 		}
305 	}
306 out:
307 	return rc;
308 }
309 
310 static void record__read_auxtrace_snapshot(struct record *rec)
311 {
312 	pr_debug("Recording AUX area tracing snapshot\n");
313 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
314 		trigger_error(&auxtrace_snapshot_trigger);
315 	} else {
316 		if (auxtrace_record__snapshot_finish(rec->itr))
317 			trigger_error(&auxtrace_snapshot_trigger);
318 		else
319 			trigger_ready(&auxtrace_snapshot_trigger);
320 	}
321 }
322 
323 #else
324 
325 static inline
326 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
327 			       struct auxtrace_mmap *mm __maybe_unused)
328 {
329 	return 0;
330 }
331 
332 static inline
333 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
334 {
335 }
336 
337 static inline
338 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
339 {
340 	return 0;
341 }
342 
343 #endif
344 
345 static int record__mmap_evlist(struct record *rec,
346 			       struct perf_evlist *evlist)
347 {
348 	struct record_opts *opts = &rec->opts;
349 	char msg[512];
350 
351 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
352 				 opts->auxtrace_mmap_pages,
353 				 opts->auxtrace_snapshot_mode) < 0) {
354 		if (errno == EPERM) {
355 			pr_err("Permission error mapping pages.\n"
356 			       "Consider increasing "
357 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
358 			       "or try again with a smaller value of -m/--mmap_pages.\n"
359 			       "(current value: %u,%u)\n",
360 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
361 			return -errno;
362 		} else {
363 			pr_err("failed to mmap with %d (%s)\n", errno,
364 				str_error_r(errno, msg, sizeof(msg)));
365 			if (errno)
366 				return -errno;
367 			else
368 				return -EINVAL;
369 		}
370 	}
371 	return 0;
372 }
373 
374 static int record__mmap(struct record *rec)
375 {
376 	return record__mmap_evlist(rec, rec->evlist);
377 }
378 
379 static int record__open(struct record *rec)
380 {
381 	char msg[512];
382 	struct perf_evsel *pos;
383 	struct perf_evlist *evlist = rec->evlist;
384 	struct perf_session *session = rec->session;
385 	struct record_opts *opts = &rec->opts;
386 	int rc = 0;
387 
388 	perf_evlist__config(evlist, opts, &callchain_param);
389 
390 	evlist__for_each_entry(evlist, pos) {
391 try_again:
392 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
393 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
394 				if (verbose)
395 					ui__warning("%s\n", msg);
396 				goto try_again;
397 			}
398 
399 			rc = -errno;
400 			perf_evsel__open_strerror(pos, &opts->target,
401 						  errno, msg, sizeof(msg));
402 			ui__error("%s\n", msg);
403 			goto out;
404 		}
405 	}
406 
407 	if (perf_evlist__apply_filters(evlist, &pos)) {
408 		error("failed to set filter \"%s\" on event %s with %d (%s)\n",
409 			pos->filter, perf_evsel__name(pos), errno,
410 			str_error_r(errno, msg, sizeof(msg)));
411 		rc = -1;
412 		goto out;
413 	}
414 
415 	rc = record__mmap(rec);
416 	if (rc)
417 		goto out;
418 
419 	session->evlist = evlist;
420 	perf_session__set_id_hdr_size(session);
421 out:
422 	return rc;
423 }
424 
425 static int process_sample_event(struct perf_tool *tool,
426 				union perf_event *event,
427 				struct perf_sample *sample,
428 				struct perf_evsel *evsel,
429 				struct machine *machine)
430 {
431 	struct record *rec = container_of(tool, struct record, tool);
432 
433 	rec->samples++;
434 
435 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
436 }
437 
438 static int process_buildids(struct record *rec)
439 {
440 	struct perf_data_file *file  = &rec->file;
441 	struct perf_session *session = rec->session;
442 
443 	if (file->size == 0)
444 		return 0;
445 
446 	/*
447 	 * During this process, it'll load kernel map and replace the
448 	 * dso->long_name to a real pathname it found.  In this case
449 	 * we prefer the vmlinux path like
450 	 *   /lib/modules/3.16.4/build/vmlinux
451 	 *
452 	 * rather than build-id path (in debug directory).
453 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
454 	 */
455 	symbol_conf.ignore_vmlinux_buildid = true;
456 
457 	/*
458 	 * If --buildid-all is given, it marks all DSO regardless of hits,
459 	 * so no need to process samples.
460 	 */
461 	if (rec->buildid_all)
462 		rec->tool.sample = NULL;
463 
464 	return perf_session__process_events(session);
465 }
466 
467 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
468 {
469 	int err;
470 	struct perf_tool *tool = data;
471 	/*
472 	 *As for guest kernel when processing subcommand record&report,
473 	 *we arrange module mmap prior to guest kernel mmap and trigger
474 	 *a preload dso because default guest module symbols are loaded
475 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
476 	 *method is used to avoid symbol missing when the first addr is
477 	 *in module instead of in guest kernel.
478 	 */
479 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
480 					     machine);
481 	if (err < 0)
482 		pr_err("Couldn't record guest kernel [%d]'s reference"
483 		       " relocation symbol.\n", machine->pid);
484 
485 	/*
486 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
487 	 * have no _text sometimes.
488 	 */
489 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
490 						 machine);
491 	if (err < 0)
492 		pr_err("Couldn't record guest kernel [%d]'s reference"
493 		       " relocation symbol.\n", machine->pid);
494 }
495 
496 static struct perf_event_header finished_round_event = {
497 	.size = sizeof(struct perf_event_header),
498 	.type = PERF_RECORD_FINISHED_ROUND,
499 };
500 
501 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
502 				    bool backward)
503 {
504 	u64 bytes_written = rec->bytes_written;
505 	int i;
506 	int rc = 0;
507 	struct perf_mmap *maps;
508 
509 	if (!evlist)
510 		return 0;
511 
512 	maps = backward ? evlist->backward_mmap : evlist->mmap;
513 	if (!maps)
514 		return 0;
515 
516 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
517 		return 0;
518 
519 	for (i = 0; i < evlist->nr_mmaps; i++) {
520 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
521 
522 		if (maps[i].base) {
523 			if (record__mmap_read(rec, &maps[i],
524 					      evlist->overwrite, backward) != 0) {
525 				rc = -1;
526 				goto out;
527 			}
528 		}
529 
530 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
531 		    record__auxtrace_mmap_read(rec, mm) != 0) {
532 			rc = -1;
533 			goto out;
534 		}
535 	}
536 
537 	/*
538 	 * Mark the round finished in case we wrote
539 	 * at least one event.
540 	 */
541 	if (bytes_written != rec->bytes_written)
542 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
543 
544 	if (backward)
545 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
546 out:
547 	return rc;
548 }
549 
550 static int record__mmap_read_all(struct record *rec)
551 {
552 	int err;
553 
554 	err = record__mmap_read_evlist(rec, rec->evlist, false);
555 	if (err)
556 		return err;
557 
558 	return record__mmap_read_evlist(rec, rec->evlist, true);
559 }
560 
561 static void record__init_features(struct record *rec)
562 {
563 	struct perf_session *session = rec->session;
564 	int feat;
565 
566 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
567 		perf_header__set_feat(&session->header, feat);
568 
569 	if (rec->no_buildid)
570 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
571 
572 	if (!have_tracepoints(&rec->evlist->entries))
573 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
574 
575 	if (!rec->opts.branch_stack)
576 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
577 
578 	if (!rec->opts.full_auxtrace)
579 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
580 
581 	perf_header__clear_feat(&session->header, HEADER_STAT);
582 }
583 
584 static void
585 record__finish_output(struct record *rec)
586 {
587 	struct perf_data_file *file = &rec->file;
588 	int fd = perf_data_file__fd(file);
589 
590 	if (file->is_pipe)
591 		return;
592 
593 	rec->session->header.data_size += rec->bytes_written;
594 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
595 
596 	if (!rec->no_buildid) {
597 		process_buildids(rec);
598 
599 		if (rec->buildid_all)
600 			dsos__hit_all(rec->session);
601 	}
602 	perf_session__write_header(rec->session, rec->evlist, fd, true);
603 
604 	return;
605 }
606 
607 static int record__synthesize_workload(struct record *rec, bool tail)
608 {
609 	struct {
610 		struct thread_map map;
611 		struct thread_map_data map_data;
612 	} thread_map;
613 
614 	if (rec->opts.tail_synthesize != tail)
615 		return 0;
616 
617 	thread_map.map.nr = 1;
618 	thread_map.map.map[0].pid = rec->evlist->workload.pid;
619 	thread_map.map.map[0].comm = NULL;
620 	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
621 						 process_synthesized_event,
622 						 &rec->session->machines.host,
623 						 rec->opts.sample_address,
624 						 rec->opts.proc_map_timeout);
625 }
626 
627 static int record__synthesize(struct record *rec, bool tail);
628 
629 static int
630 record__switch_output(struct record *rec, bool at_exit)
631 {
632 	struct perf_data_file *file = &rec->file;
633 	int fd, err;
634 
635 	/* Same Size:      "2015122520103046"*/
636 	char timestamp[] = "InvalidTimestamp";
637 
638 	record__synthesize(rec, true);
639 	if (target__none(&rec->opts.target))
640 		record__synthesize_workload(rec, true);
641 
642 	rec->samples = 0;
643 	record__finish_output(rec);
644 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
645 	if (err) {
646 		pr_err("Failed to get current timestamp\n");
647 		return -EINVAL;
648 	}
649 
650 	fd = perf_data_file__switch(file, timestamp,
651 				    rec->session->header.data_offset,
652 				    at_exit);
653 	if (fd >= 0 && !at_exit) {
654 		rec->bytes_written = 0;
655 		rec->session->header.data_size = 0;
656 	}
657 
658 	if (!quiet)
659 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
660 			file->path, timestamp);
661 
662 	/* Output tracking events */
663 	if (!at_exit) {
664 		record__synthesize(rec, false);
665 
666 		/*
667 		 * In 'perf record --switch-output' without -a,
668 		 * record__synthesize() in record__switch_output() won't
669 		 * generate tracking events because there's no thread_map
670 		 * in evlist. Which causes newly created perf.data doesn't
671 		 * contain map and comm information.
672 		 * Create a fake thread_map and directly call
673 		 * perf_event__synthesize_thread_map() for those events.
674 		 */
675 		if (target__none(&rec->opts.target))
676 			record__synthesize_workload(rec, false);
677 	}
678 	return fd;
679 }
680 
681 static volatile int workload_exec_errno;
682 
683 /*
684  * perf_evlist__prepare_workload will send a SIGUSR1
685  * if the fork fails, since we asked by setting its
686  * want_signal to true.
687  */
688 static void workload_exec_failed_signal(int signo __maybe_unused,
689 					siginfo_t *info,
690 					void *ucontext __maybe_unused)
691 {
692 	workload_exec_errno = info->si_value.sival_int;
693 	done = 1;
694 	child_finished = 1;
695 }
696 
697 static void snapshot_sig_handler(int sig);
698 
699 int __weak
700 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
701 			    struct perf_tool *tool __maybe_unused,
702 			    perf_event__handler_t process __maybe_unused,
703 			    struct machine *machine __maybe_unused)
704 {
705 	return 0;
706 }
707 
708 static const struct perf_event_mmap_page *
709 perf_evlist__pick_pc(struct perf_evlist *evlist)
710 {
711 	if (evlist) {
712 		if (evlist->mmap && evlist->mmap[0].base)
713 			return evlist->mmap[0].base;
714 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
715 			return evlist->backward_mmap[0].base;
716 	}
717 	return NULL;
718 }
719 
720 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
721 {
722 	const struct perf_event_mmap_page *pc;
723 
724 	pc = perf_evlist__pick_pc(rec->evlist);
725 	if (pc)
726 		return pc;
727 	return NULL;
728 }
729 
730 static int record__synthesize(struct record *rec, bool tail)
731 {
732 	struct perf_session *session = rec->session;
733 	struct machine *machine = &session->machines.host;
734 	struct perf_data_file *file = &rec->file;
735 	struct record_opts *opts = &rec->opts;
736 	struct perf_tool *tool = &rec->tool;
737 	int fd = perf_data_file__fd(file);
738 	int err = 0;
739 
740 	if (rec->opts.tail_synthesize != tail)
741 		return 0;
742 
743 	if (file->is_pipe) {
744 		err = perf_event__synthesize_attrs(tool, session,
745 						   process_synthesized_event);
746 		if (err < 0) {
747 			pr_err("Couldn't synthesize attrs.\n");
748 			goto out;
749 		}
750 
751 		if (have_tracepoints(&rec->evlist->entries)) {
752 			/*
753 			 * FIXME err <= 0 here actually means that
754 			 * there were no tracepoints so its not really
755 			 * an error, just that we don't need to
756 			 * synthesize anything.  We really have to
757 			 * return this more properly and also
758 			 * propagate errors that now are calling die()
759 			 */
760 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
761 								  process_synthesized_event);
762 			if (err <= 0) {
763 				pr_err("Couldn't record tracing data.\n");
764 				goto out;
765 			}
766 			rec->bytes_written += err;
767 		}
768 	}
769 
770 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
771 					  process_synthesized_event, machine);
772 	if (err)
773 		goto out;
774 
775 	if (rec->opts.full_auxtrace) {
776 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
777 					session, process_synthesized_event);
778 		if (err)
779 			goto out;
780 	}
781 
782 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
783 						 machine);
784 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
785 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
786 			   "Check /proc/kallsyms permission or run as root.\n");
787 
788 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
789 					     machine);
790 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
791 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
792 			   "Check /proc/modules permission or run as root.\n");
793 
794 	if (perf_guest) {
795 		machines__process_guests(&session->machines,
796 					 perf_event__synthesize_guest_os, tool);
797 	}
798 
799 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
800 					    process_synthesized_event, opts->sample_address,
801 					    opts->proc_map_timeout);
802 out:
803 	return err;
804 }
805 
806 static int __cmd_record(struct record *rec, int argc, const char **argv)
807 {
808 	int err;
809 	int status = 0;
810 	unsigned long waking = 0;
811 	const bool forks = argc > 0;
812 	struct machine *machine;
813 	struct perf_tool *tool = &rec->tool;
814 	struct record_opts *opts = &rec->opts;
815 	struct perf_data_file *file = &rec->file;
816 	struct perf_session *session;
817 	bool disabled = false, draining = false;
818 	int fd;
819 
820 	rec->progname = argv[0];
821 
822 	atexit(record__sig_exit);
823 	signal(SIGCHLD, sig_handler);
824 	signal(SIGINT, sig_handler);
825 	signal(SIGTERM, sig_handler);
826 
827 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
828 		signal(SIGUSR2, snapshot_sig_handler);
829 		if (rec->opts.auxtrace_snapshot_mode)
830 			trigger_on(&auxtrace_snapshot_trigger);
831 		if (rec->switch_output)
832 			trigger_on(&switch_output_trigger);
833 	} else {
834 		signal(SIGUSR2, SIG_IGN);
835 	}
836 
837 	session = perf_session__new(file, false, tool);
838 	if (session == NULL) {
839 		pr_err("Perf session creation failed.\n");
840 		return -1;
841 	}
842 
843 	fd = perf_data_file__fd(file);
844 	rec->session = session;
845 
846 	record__init_features(rec);
847 
848 	if (forks) {
849 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
850 						    argv, file->is_pipe,
851 						    workload_exec_failed_signal);
852 		if (err < 0) {
853 			pr_err("Couldn't run the workload!\n");
854 			status = err;
855 			goto out_delete_session;
856 		}
857 	}
858 
859 	if (record__open(rec) != 0) {
860 		err = -1;
861 		goto out_child;
862 	}
863 
864 	err = bpf__apply_obj_config();
865 	if (err) {
866 		char errbuf[BUFSIZ];
867 
868 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
869 		pr_err("ERROR: Apply config to BPF failed: %s\n",
870 			 errbuf);
871 		goto out_child;
872 	}
873 
874 	/*
875 	 * Normally perf_session__new would do this, but it doesn't have the
876 	 * evlist.
877 	 */
878 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
879 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
880 		rec->tool.ordered_events = false;
881 	}
882 
883 	if (!rec->evlist->nr_groups)
884 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
885 
886 	if (file->is_pipe) {
887 		err = perf_header__write_pipe(fd);
888 		if (err < 0)
889 			goto out_child;
890 	} else {
891 		err = perf_session__write_header(session, rec->evlist, fd, false);
892 		if (err < 0)
893 			goto out_child;
894 	}
895 
896 	if (!rec->no_buildid
897 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
898 		pr_err("Couldn't generate buildids. "
899 		       "Use --no-buildid to profile anyway.\n");
900 		err = -1;
901 		goto out_child;
902 	}
903 
904 	machine = &session->machines.host;
905 
906 	err = record__synthesize(rec, false);
907 	if (err < 0)
908 		goto out_child;
909 
910 	if (rec->realtime_prio) {
911 		struct sched_param param;
912 
913 		param.sched_priority = rec->realtime_prio;
914 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
915 			pr_err("Could not set realtime priority.\n");
916 			err = -1;
917 			goto out_child;
918 		}
919 	}
920 
921 	/*
922 	 * When perf is starting the traced process, all the events
923 	 * (apart from group members) have enable_on_exec=1 set,
924 	 * so don't spoil it by prematurely enabling them.
925 	 */
926 	if (!target__none(&opts->target) && !opts->initial_delay)
927 		perf_evlist__enable(rec->evlist);
928 
929 	/*
930 	 * Let the child rip
931 	 */
932 	if (forks) {
933 		union perf_event *event;
934 
935 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
936 		if (event == NULL) {
937 			err = -ENOMEM;
938 			goto out_child;
939 		}
940 
941 		/*
942 		 * Some H/W events are generated before COMM event
943 		 * which is emitted during exec(), so perf script
944 		 * cannot see a correct process name for those events.
945 		 * Synthesize COMM event to prevent it.
946 		 */
947 		perf_event__synthesize_comm(tool, event,
948 					    rec->evlist->workload.pid,
949 					    process_synthesized_event,
950 					    machine);
951 		free(event);
952 
953 		perf_evlist__start_workload(rec->evlist);
954 	}
955 
956 	if (opts->initial_delay) {
957 		usleep(opts->initial_delay * 1000);
958 		perf_evlist__enable(rec->evlist);
959 	}
960 
961 	trigger_ready(&auxtrace_snapshot_trigger);
962 	trigger_ready(&switch_output_trigger);
963 	for (;;) {
964 		unsigned long long hits = rec->samples;
965 
966 		/*
967 		 * rec->evlist->bkw_mmap_state is possible to be
968 		 * BKW_MMAP_EMPTY here: when done == true and
969 		 * hits != rec->samples in previous round.
970 		 *
971 		 * perf_evlist__toggle_bkw_mmap ensure we never
972 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
973 		 */
974 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
975 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
976 
977 		if (record__mmap_read_all(rec) < 0) {
978 			trigger_error(&auxtrace_snapshot_trigger);
979 			trigger_error(&switch_output_trigger);
980 			err = -1;
981 			goto out_child;
982 		}
983 
984 		if (auxtrace_record__snapshot_started) {
985 			auxtrace_record__snapshot_started = 0;
986 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
987 				record__read_auxtrace_snapshot(rec);
988 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
989 				pr_err("AUX area tracing snapshot failed\n");
990 				err = -1;
991 				goto out_child;
992 			}
993 		}
994 
995 		if (trigger_is_hit(&switch_output_trigger)) {
996 			/*
997 			 * If switch_output_trigger is hit, the data in
998 			 * overwritable ring buffer should have been collected,
999 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1000 			 *
1001 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1002 			 * record__mmap_read_all() didn't collect data from
1003 			 * overwritable ring buffer. Read again.
1004 			 */
1005 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1006 				continue;
1007 			trigger_ready(&switch_output_trigger);
1008 
1009 			/*
1010 			 * Reenable events in overwrite ring buffer after
1011 			 * record__mmap_read_all(): we should have collected
1012 			 * data from it.
1013 			 */
1014 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1015 
1016 			if (!quiet)
1017 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1018 					waking);
1019 			waking = 0;
1020 			fd = record__switch_output(rec, false);
1021 			if (fd < 0) {
1022 				pr_err("Failed to switch to new file\n");
1023 				trigger_error(&switch_output_trigger);
1024 				err = fd;
1025 				goto out_child;
1026 			}
1027 		}
1028 
1029 		if (hits == rec->samples) {
1030 			if (done || draining)
1031 				break;
1032 			err = perf_evlist__poll(rec->evlist, -1);
1033 			/*
1034 			 * Propagate error, only if there's any. Ignore positive
1035 			 * number of returned events and interrupt error.
1036 			 */
1037 			if (err > 0 || (err < 0 && errno == EINTR))
1038 				err = 0;
1039 			waking++;
1040 
1041 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1042 				draining = true;
1043 		}
1044 
1045 		/*
1046 		 * When perf is starting the traced process, at the end events
1047 		 * die with the process and we wait for that. Thus no need to
1048 		 * disable events in this case.
1049 		 */
1050 		if (done && !disabled && !target__none(&opts->target)) {
1051 			trigger_off(&auxtrace_snapshot_trigger);
1052 			perf_evlist__disable(rec->evlist);
1053 			disabled = true;
1054 		}
1055 	}
1056 	trigger_off(&auxtrace_snapshot_trigger);
1057 	trigger_off(&switch_output_trigger);
1058 
1059 	if (forks && workload_exec_errno) {
1060 		char msg[STRERR_BUFSIZE];
1061 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1062 		pr_err("Workload failed: %s\n", emsg);
1063 		err = -1;
1064 		goto out_child;
1065 	}
1066 
1067 	if (!quiet)
1068 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1069 
1070 	if (target__none(&rec->opts.target))
1071 		record__synthesize_workload(rec, true);
1072 
1073 out_child:
1074 	if (forks) {
1075 		int exit_status;
1076 
1077 		if (!child_finished)
1078 			kill(rec->evlist->workload.pid, SIGTERM);
1079 
1080 		wait(&exit_status);
1081 
1082 		if (err < 0)
1083 			status = err;
1084 		else if (WIFEXITED(exit_status))
1085 			status = WEXITSTATUS(exit_status);
1086 		else if (WIFSIGNALED(exit_status))
1087 			signr = WTERMSIG(exit_status);
1088 	} else
1089 		status = err;
1090 
1091 	record__synthesize(rec, true);
1092 	/* this will be recalculated during process_buildids() */
1093 	rec->samples = 0;
1094 
1095 	if (!err) {
1096 		if (!rec->timestamp_filename) {
1097 			record__finish_output(rec);
1098 		} else {
1099 			fd = record__switch_output(rec, true);
1100 			if (fd < 0) {
1101 				status = fd;
1102 				goto out_delete_session;
1103 			}
1104 		}
1105 	}
1106 
1107 	if (!err && !quiet) {
1108 		char samples[128];
1109 		const char *postfix = rec->timestamp_filename ?
1110 					".<timestamp>" : "";
1111 
1112 		if (rec->samples && !rec->opts.full_auxtrace)
1113 			scnprintf(samples, sizeof(samples),
1114 				  " (%" PRIu64 " samples)", rec->samples);
1115 		else
1116 			samples[0] = '\0';
1117 
1118 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1119 			perf_data_file__size(file) / 1024.0 / 1024.0,
1120 			file->path, postfix, samples);
1121 	}
1122 
1123 out_delete_session:
1124 	perf_session__delete(session);
1125 	return status;
1126 }
1127 
1128 static void callchain_debug(struct callchain_param *callchain)
1129 {
1130 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1131 
1132 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1133 
1134 	if (callchain->record_mode == CALLCHAIN_DWARF)
1135 		pr_debug("callchain: stack dump size %d\n",
1136 			 callchain->dump_size);
1137 }
1138 
1139 int record_opts__parse_callchain(struct record_opts *record,
1140 				 struct callchain_param *callchain,
1141 				 const char *arg, bool unset)
1142 {
1143 	int ret;
1144 	callchain->enabled = !unset;
1145 
1146 	/* --no-call-graph */
1147 	if (unset) {
1148 		callchain->record_mode = CALLCHAIN_NONE;
1149 		pr_debug("callchain: disabled\n");
1150 		return 0;
1151 	}
1152 
1153 	ret = parse_callchain_record_opt(arg, callchain);
1154 	if (!ret) {
1155 		/* Enable data address sampling for DWARF unwind. */
1156 		if (callchain->record_mode == CALLCHAIN_DWARF)
1157 			record->sample_address = true;
1158 		callchain_debug(callchain);
1159 	}
1160 
1161 	return ret;
1162 }
1163 
1164 int record_parse_callchain_opt(const struct option *opt,
1165 			       const char *arg,
1166 			       int unset)
1167 {
1168 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1169 }
1170 
1171 int record_callchain_opt(const struct option *opt,
1172 			 const char *arg __maybe_unused,
1173 			 int unset __maybe_unused)
1174 {
1175 	struct callchain_param *callchain = opt->value;
1176 
1177 	callchain->enabled = true;
1178 
1179 	if (callchain->record_mode == CALLCHAIN_NONE)
1180 		callchain->record_mode = CALLCHAIN_FP;
1181 
1182 	callchain_debug(callchain);
1183 	return 0;
1184 }
1185 
1186 static int perf_record_config(const char *var, const char *value, void *cb)
1187 {
1188 	struct record *rec = cb;
1189 
1190 	if (!strcmp(var, "record.build-id")) {
1191 		if (!strcmp(value, "cache"))
1192 			rec->no_buildid_cache = false;
1193 		else if (!strcmp(value, "no-cache"))
1194 			rec->no_buildid_cache = true;
1195 		else if (!strcmp(value, "skip"))
1196 			rec->no_buildid = true;
1197 		else
1198 			return -1;
1199 		return 0;
1200 	}
1201 	if (!strcmp(var, "record.call-graph"))
1202 		var = "call-graph.record-mode"; /* fall-through */
1203 
1204 	return perf_default_config(var, value, cb);
1205 }
1206 
1207 struct clockid_map {
1208 	const char *name;
1209 	int clockid;
1210 };
1211 
1212 #define CLOCKID_MAP(n, c)	\
1213 	{ .name = n, .clockid = (c), }
1214 
1215 #define CLOCKID_END	{ .name = NULL, }
1216 
1217 
1218 /*
1219  * Add the missing ones, we need to build on many distros...
1220  */
1221 #ifndef CLOCK_MONOTONIC_RAW
1222 #define CLOCK_MONOTONIC_RAW 4
1223 #endif
1224 #ifndef CLOCK_BOOTTIME
1225 #define CLOCK_BOOTTIME 7
1226 #endif
1227 #ifndef CLOCK_TAI
1228 #define CLOCK_TAI 11
1229 #endif
1230 
1231 static const struct clockid_map clockids[] = {
1232 	/* available for all events, NMI safe */
1233 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1234 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1235 
1236 	/* available for some events */
1237 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1238 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1239 	CLOCKID_MAP("tai", CLOCK_TAI),
1240 
1241 	/* available for the lazy */
1242 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1243 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1244 	CLOCKID_MAP("real", CLOCK_REALTIME),
1245 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1246 
1247 	CLOCKID_END,
1248 };
1249 
1250 static int parse_clockid(const struct option *opt, const char *str, int unset)
1251 {
1252 	struct record_opts *opts = (struct record_opts *)opt->value;
1253 	const struct clockid_map *cm;
1254 	const char *ostr = str;
1255 
1256 	if (unset) {
1257 		opts->use_clockid = 0;
1258 		return 0;
1259 	}
1260 
1261 	/* no arg passed */
1262 	if (!str)
1263 		return 0;
1264 
1265 	/* no setting it twice */
1266 	if (opts->use_clockid)
1267 		return -1;
1268 
1269 	opts->use_clockid = true;
1270 
1271 	/* if its a number, we're done */
1272 	if (sscanf(str, "%d", &opts->clockid) == 1)
1273 		return 0;
1274 
1275 	/* allow a "CLOCK_" prefix to the name */
1276 	if (!strncasecmp(str, "CLOCK_", 6))
1277 		str += 6;
1278 
1279 	for (cm = clockids; cm->name; cm++) {
1280 		if (!strcasecmp(str, cm->name)) {
1281 			opts->clockid = cm->clockid;
1282 			return 0;
1283 		}
1284 	}
1285 
1286 	opts->use_clockid = false;
1287 	ui__warning("unknown clockid %s, check man page\n", ostr);
1288 	return -1;
1289 }
1290 
1291 static int record__parse_mmap_pages(const struct option *opt,
1292 				    const char *str,
1293 				    int unset __maybe_unused)
1294 {
1295 	struct record_opts *opts = opt->value;
1296 	char *s, *p;
1297 	unsigned int mmap_pages;
1298 	int ret;
1299 
1300 	if (!str)
1301 		return -EINVAL;
1302 
1303 	s = strdup(str);
1304 	if (!s)
1305 		return -ENOMEM;
1306 
1307 	p = strchr(s, ',');
1308 	if (p)
1309 		*p = '\0';
1310 
1311 	if (*s) {
1312 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1313 		if (ret)
1314 			goto out_free;
1315 		opts->mmap_pages = mmap_pages;
1316 	}
1317 
1318 	if (!p) {
1319 		ret = 0;
1320 		goto out_free;
1321 	}
1322 
1323 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1324 	if (ret)
1325 		goto out_free;
1326 
1327 	opts->auxtrace_mmap_pages = mmap_pages;
1328 
1329 out_free:
1330 	free(s);
1331 	return ret;
1332 }
1333 
1334 static const char * const __record_usage[] = {
1335 	"perf record [<options>] [<command>]",
1336 	"perf record [<options>] -- <command> [<options>]",
1337 	NULL
1338 };
1339 const char * const *record_usage = __record_usage;
1340 
1341 /*
1342  * XXX Ideally would be local to cmd_record() and passed to a record__new
1343  * because we need to have access to it in record__exit, that is called
1344  * after cmd_record() exits, but since record_options need to be accessible to
1345  * builtin-script, leave it here.
1346  *
1347  * At least we don't ouch it in all the other functions here directly.
1348  *
1349  * Just say no to tons of global variables, sigh.
1350  */
1351 static struct record record = {
1352 	.opts = {
1353 		.sample_time	     = true,
1354 		.mmap_pages	     = UINT_MAX,
1355 		.user_freq	     = UINT_MAX,
1356 		.user_interval	     = ULLONG_MAX,
1357 		.freq		     = 4000,
1358 		.target		     = {
1359 			.uses_mmap   = true,
1360 			.default_per_cpu = true,
1361 		},
1362 		.proc_map_timeout     = 500,
1363 	},
1364 	.tool = {
1365 		.sample		= process_sample_event,
1366 		.fork		= perf_event__process_fork,
1367 		.exit		= perf_event__process_exit,
1368 		.comm		= perf_event__process_comm,
1369 		.mmap		= perf_event__process_mmap,
1370 		.mmap2		= perf_event__process_mmap2,
1371 		.ordered_events	= true,
1372 	},
1373 };
1374 
1375 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1376 	"\n\t\t\t\tDefault: fp";
1377 
1378 static bool dry_run;
1379 
1380 /*
1381  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1382  * with it and switch to use the library functions in perf_evlist that came
1383  * from builtin-record.c, i.e. use record_opts,
1384  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1385  * using pipes, etc.
1386  */
1387 struct option __record_options[] = {
1388 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1389 		     "event selector. use 'perf list' to list available events",
1390 		     parse_events_option),
1391 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1392 		     "event filter", parse_filter),
1393 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1394 			   NULL, "don't record events from perf itself",
1395 			   exclude_perf),
1396 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1397 		    "record events on existing process id"),
1398 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1399 		    "record events on existing thread id"),
1400 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1401 		    "collect data with this RT SCHED_FIFO priority"),
1402 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1403 		    "collect data without buffering"),
1404 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1405 		    "collect raw sample records from all opened counters"),
1406 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1407 			    "system-wide collection from all CPUs"),
1408 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1409 		    "list of cpus to monitor"),
1410 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1411 	OPT_STRING('o', "output", &record.file.path, "file",
1412 		    "output file name"),
1413 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1414 			&record.opts.no_inherit_set,
1415 			"child tasks do not inherit counters"),
1416 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1417 		    "synthesize non-sample events at the end of output"),
1418 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1419 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1420 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1421 		     "number of mmap data pages and AUX area tracing mmap pages",
1422 		     record__parse_mmap_pages),
1423 	OPT_BOOLEAN(0, "group", &record.opts.group,
1424 		    "put the counters into a counter group"),
1425 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1426 			   NULL, "enables call-graph recording" ,
1427 			   &record_callchain_opt),
1428 	OPT_CALLBACK(0, "call-graph", &record.opts,
1429 		     "record_mode[,record_size]", record_callchain_help,
1430 		     &record_parse_callchain_opt),
1431 	OPT_INCR('v', "verbose", &verbose,
1432 		    "be more verbose (show counter open errors, etc)"),
1433 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1434 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1435 		    "per thread counts"),
1436 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1437 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1438 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1439 			&record.opts.sample_time_set,
1440 			"Record the sample timestamps"),
1441 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1442 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1443 		    "don't sample"),
1444 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1445 			&record.no_buildid_cache_set,
1446 			"do not update the buildid cache"),
1447 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1448 			&record.no_buildid_set,
1449 			"do not collect buildids in perf.data"),
1450 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1451 		     "monitor event in cgroup name only",
1452 		     parse_cgroups),
1453 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1454 		  "ms to wait before starting measurement after program start"),
1455 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1456 		   "user to profile"),
1457 
1458 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1459 		     "branch any", "sample any taken branches",
1460 		     parse_branch_stack),
1461 
1462 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1463 		     "branch filter mask", "branch stack filter modes",
1464 		     parse_branch_stack),
1465 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1466 		    "sample by weight (on special events only)"),
1467 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1468 		    "sample transaction flags (special events only)"),
1469 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1470 		    "use per-thread mmaps"),
1471 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1472 		    "sample selected machine registers on interrupt,"
1473 		    " use -I ? to list register names", parse_regs),
1474 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1475 		    "Record running/enabled time of read (:S) events"),
1476 	OPT_CALLBACK('k', "clockid", &record.opts,
1477 	"clockid", "clockid to use for events, see clock_gettime()",
1478 	parse_clockid),
1479 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1480 			  "opts", "AUX area tracing Snapshot Mode", ""),
1481 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1482 			"per thread proc mmap processing timeout in ms"),
1483 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1484 		    "Record context switch events"),
1485 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1486 			 "Configure all used events to run in kernel space.",
1487 			 PARSE_OPT_EXCLUSIVE),
1488 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1489 			 "Configure all used events to run in user space.",
1490 			 PARSE_OPT_EXCLUSIVE),
1491 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1492 		   "clang binary to use for compiling BPF scriptlets"),
1493 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1494 		   "options passed to clang when compiling BPF scriptlets"),
1495 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1496 		   "file", "vmlinux pathname"),
1497 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1498 		    "Record build-id of all DSOs regardless of hits"),
1499 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1500 		    "append timestamp to output filename"),
1501 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1502 		    "Switch output when receive SIGUSR2"),
1503 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1504 		    "Parse options then exit"),
1505 	OPT_END()
1506 };
1507 
1508 struct option *record_options = __record_options;
1509 
1510 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1511 {
1512 	int err;
1513 	struct record *rec = &record;
1514 	char errbuf[BUFSIZ];
1515 
1516 #ifndef HAVE_LIBBPF_SUPPORT
1517 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1518 	set_nobuild('\0', "clang-path", true);
1519 	set_nobuild('\0', "clang-opt", true);
1520 # undef set_nobuild
1521 #endif
1522 
1523 #ifndef HAVE_BPF_PROLOGUE
1524 # if !defined (HAVE_DWARF_SUPPORT)
1525 #  define REASON  "NO_DWARF=1"
1526 # elif !defined (HAVE_LIBBPF_SUPPORT)
1527 #  define REASON  "NO_LIBBPF=1"
1528 # else
1529 #  define REASON  "this architecture doesn't support BPF prologue"
1530 # endif
1531 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1532 	set_nobuild('\0', "vmlinux", true);
1533 # undef set_nobuild
1534 # undef REASON
1535 #endif
1536 
1537 	rec->evlist = perf_evlist__new();
1538 	if (rec->evlist == NULL)
1539 		return -ENOMEM;
1540 
1541 	perf_config(perf_record_config, rec);
1542 
1543 	argc = parse_options(argc, argv, record_options, record_usage,
1544 			    PARSE_OPT_STOP_AT_NON_OPTION);
1545 	if (!argc && target__none(&rec->opts.target))
1546 		usage_with_options(record_usage, record_options);
1547 
1548 	if (nr_cgroups && !rec->opts.target.system_wide) {
1549 		usage_with_options_msg(record_usage, record_options,
1550 			"cgroup monitoring only available in system-wide mode");
1551 
1552 	}
1553 	if (rec->opts.record_switch_events &&
1554 	    !perf_can_record_switch_events()) {
1555 		ui__error("kernel does not support recording context switch events\n");
1556 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1557 		return -EINVAL;
1558 	}
1559 
1560 	if (rec->switch_output)
1561 		rec->timestamp_filename = true;
1562 
1563 	if (!rec->itr) {
1564 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1565 		if (err)
1566 			return err;
1567 	}
1568 
1569 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1570 					      rec->opts.auxtrace_snapshot_opts);
1571 	if (err)
1572 		return err;
1573 
1574 	if (dry_run)
1575 		return 0;
1576 
1577 	err = bpf__setup_stdout(rec->evlist);
1578 	if (err) {
1579 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1580 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1581 			 errbuf);
1582 		return err;
1583 	}
1584 
1585 	err = -ENOMEM;
1586 
1587 	symbol__init(NULL);
1588 
1589 	if (symbol_conf.kptr_restrict)
1590 		pr_warning(
1591 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1592 "check /proc/sys/kernel/kptr_restrict.\n\n"
1593 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1594 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1595 "Samples in kernel modules won't be resolved at all.\n\n"
1596 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1597 "even with a suitable vmlinux or kallsyms file.\n\n");
1598 
1599 	if (rec->no_buildid_cache || rec->no_buildid) {
1600 		disable_buildid_cache();
1601 	} else if (rec->switch_output) {
1602 		/*
1603 		 * In 'perf record --switch-output', disable buildid
1604 		 * generation by default to reduce data file switching
1605 		 * overhead. Still generate buildid if they are required
1606 		 * explicitly using
1607 		 *
1608 		 *  perf record --signal-trigger --no-no-buildid \
1609 		 *              --no-no-buildid-cache
1610 		 *
1611 		 * Following code equals to:
1612 		 *
1613 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1614 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1615 		 *         disable_buildid_cache();
1616 		 */
1617 		bool disable = true;
1618 
1619 		if (rec->no_buildid_set && !rec->no_buildid)
1620 			disable = false;
1621 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1622 			disable = false;
1623 		if (disable) {
1624 			rec->no_buildid = true;
1625 			rec->no_buildid_cache = true;
1626 			disable_buildid_cache();
1627 		}
1628 	}
1629 
1630 	if (record.opts.overwrite)
1631 		record.opts.tail_synthesize = true;
1632 
1633 	if (rec->evlist->nr_entries == 0 &&
1634 	    perf_evlist__add_default(rec->evlist) < 0) {
1635 		pr_err("Not enough memory for event selector list\n");
1636 		goto out_symbol_exit;
1637 	}
1638 
1639 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1640 		rec->opts.no_inherit = true;
1641 
1642 	err = target__validate(&rec->opts.target);
1643 	if (err) {
1644 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1645 		ui__warning("%s", errbuf);
1646 	}
1647 
1648 	err = target__parse_uid(&rec->opts.target);
1649 	if (err) {
1650 		int saved_errno = errno;
1651 
1652 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1653 		ui__error("%s", errbuf);
1654 
1655 		err = -saved_errno;
1656 		goto out_symbol_exit;
1657 	}
1658 
1659 	err = -ENOMEM;
1660 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1661 		usage_with_options(record_usage, record_options);
1662 
1663 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1664 	if (err)
1665 		goto out_symbol_exit;
1666 
1667 	/*
1668 	 * We take all buildids when the file contains
1669 	 * AUX area tracing data because we do not decode the
1670 	 * trace because it would take too long.
1671 	 */
1672 	if (rec->opts.full_auxtrace)
1673 		rec->buildid_all = true;
1674 
1675 	if (record_opts__config(&rec->opts)) {
1676 		err = -EINVAL;
1677 		goto out_symbol_exit;
1678 	}
1679 
1680 	err = __cmd_record(&record, argc, argv);
1681 out_symbol_exit:
1682 	perf_evlist__delete(rec->evlist);
1683 	symbol__exit();
1684 	auxtrace_record__free(rec->itr);
1685 	return err;
1686 }
1687 
1688 static void snapshot_sig_handler(int sig __maybe_unused)
1689 {
1690 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1691 		trigger_hit(&auxtrace_snapshot_trigger);
1692 		auxtrace_record__snapshot_started = 1;
1693 		if (auxtrace_record__snapshot_start(record.itr))
1694 			trigger_error(&auxtrace_snapshot_trigger);
1695 	}
1696 
1697 	if (trigger_is_ready(&switch_output_trigger))
1698 		trigger_hit(&switch_output_trigger);
1699 }
1700