xref: /openbmc/linux/tools/perf/builtin-record.c (revision 5d0e4d78)
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17 
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "util/perf-hooks.h"
41 #include "util/time-utils.h"
42 #include "util/units.h"
43 #include "asm/bug.h"
44 
45 #include <errno.h>
46 #include <inttypes.h>
47 #include <poll.h>
48 #include <unistd.h>
49 #include <sched.h>
50 #include <signal.h>
51 #include <sys/mman.h>
52 #include <sys/wait.h>
53 #include <asm/bug.h>
54 #include <linux/time64.h>
55 
56 struct switch_output {
57 	bool		 enabled;
58 	bool		 signal;
59 	unsigned long	 size;
60 	unsigned long	 time;
61 	const char	*str;
62 	bool		 set;
63 };
64 
65 struct record {
66 	struct perf_tool	tool;
67 	struct record_opts	opts;
68 	u64			bytes_written;
69 	struct perf_data_file	file;
70 	struct auxtrace_record	*itr;
71 	struct perf_evlist	*evlist;
72 	struct perf_session	*session;
73 	const char		*progname;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_set;
77 	bool			no_buildid_cache;
78 	bool			no_buildid_cache_set;
79 	bool			buildid_all;
80 	bool			timestamp_filename;
81 	struct switch_output	switch_output;
82 	unsigned long long	samples;
83 };
84 
85 static volatile int auxtrace_record__snapshot_started;
86 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
87 static DEFINE_TRIGGER(switch_output_trigger);
88 
89 static bool switch_output_signal(struct record *rec)
90 {
91 	return rec->switch_output.signal &&
92 	       trigger_is_ready(&switch_output_trigger);
93 }
94 
95 static bool switch_output_size(struct record *rec)
96 {
97 	return rec->switch_output.size &&
98 	       trigger_is_ready(&switch_output_trigger) &&
99 	       (rec->bytes_written >= rec->switch_output.size);
100 }
101 
102 static bool switch_output_time(struct record *rec)
103 {
104 	return rec->switch_output.time &&
105 	       trigger_is_ready(&switch_output_trigger);
106 }
107 
108 static int record__write(struct record *rec, void *bf, size_t size)
109 {
110 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
111 		pr_err("failed to write perf data, error: %m\n");
112 		return -1;
113 	}
114 
115 	rec->bytes_written += size;
116 
117 	if (switch_output_size(rec))
118 		trigger_hit(&switch_output_trigger);
119 
120 	return 0;
121 }
122 
123 static int process_synthesized_event(struct perf_tool *tool,
124 				     union perf_event *event,
125 				     struct perf_sample *sample __maybe_unused,
126 				     struct machine *machine __maybe_unused)
127 {
128 	struct record *rec = container_of(tool, struct record, tool);
129 	return record__write(rec, event, event->header.size);
130 }
131 
132 static int
133 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
134 {
135 	struct perf_event_header *pheader;
136 	u64 evt_head = head;
137 	int size = mask + 1;
138 
139 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
140 	pheader = (struct perf_event_header *)(buf + (head & mask));
141 	*start = head;
142 	while (true) {
143 		if (evt_head - head >= (unsigned int)size) {
144 			pr_debug("Finished reading backward ring buffer: rewind\n");
145 			if (evt_head - head > (unsigned int)size)
146 				evt_head -= pheader->size;
147 			*end = evt_head;
148 			return 0;
149 		}
150 
151 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
152 
153 		if (pheader->size == 0) {
154 			pr_debug("Finished reading backward ring buffer: get start\n");
155 			*end = evt_head;
156 			return 0;
157 		}
158 
159 		evt_head += pheader->size;
160 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
161 	}
162 	WARN_ONCE(1, "Shouldn't get here\n");
163 	return -1;
164 }
165 
166 static int
167 rb_find_range(void *data, int mask, u64 head, u64 old,
168 	      u64 *start, u64 *end, bool backward)
169 {
170 	if (!backward) {
171 		*start = old;
172 		*end = head;
173 		return 0;
174 	}
175 
176 	return backward_rb_find_range(data, mask, head, start, end);
177 }
178 
179 static int
180 record__mmap_read(struct record *rec, struct perf_mmap *md,
181 		  bool overwrite, bool backward)
182 {
183 	u64 head = perf_mmap__read_head(md);
184 	u64 old = md->prev;
185 	u64 end = head, start = old;
186 	unsigned char *data = md->base + page_size;
187 	unsigned long size;
188 	void *buf;
189 	int rc = 0;
190 
191 	if (rb_find_range(data, md->mask, head,
192 			  old, &start, &end, backward))
193 		return -1;
194 
195 	if (start == end)
196 		return 0;
197 
198 	rec->samples++;
199 
200 	size = end - start;
201 	if (size > (unsigned long)(md->mask) + 1) {
202 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
203 
204 		md->prev = head;
205 		perf_mmap__consume(md, overwrite || backward);
206 		return 0;
207 	}
208 
209 	if ((start & md->mask) + size != (end & md->mask)) {
210 		buf = &data[start & md->mask];
211 		size = md->mask + 1 - (start & md->mask);
212 		start += size;
213 
214 		if (record__write(rec, buf, size) < 0) {
215 			rc = -1;
216 			goto out;
217 		}
218 	}
219 
220 	buf = &data[start & md->mask];
221 	size = end - start;
222 	start += size;
223 
224 	if (record__write(rec, buf, size) < 0) {
225 		rc = -1;
226 		goto out;
227 	}
228 
229 	md->prev = head;
230 	perf_mmap__consume(md, overwrite || backward);
231 out:
232 	return rc;
233 }
234 
235 static volatile int done;
236 static volatile int signr = -1;
237 static volatile int child_finished;
238 
239 static void sig_handler(int sig)
240 {
241 	if (sig == SIGCHLD)
242 		child_finished = 1;
243 	else
244 		signr = sig;
245 
246 	done = 1;
247 }
248 
249 static void sigsegv_handler(int sig)
250 {
251 	perf_hooks__recover();
252 	sighandler_dump_stack(sig);
253 }
254 
255 static void record__sig_exit(void)
256 {
257 	if (signr == -1)
258 		return;
259 
260 	signal(signr, SIG_DFL);
261 	raise(signr);
262 }
263 
264 #ifdef HAVE_AUXTRACE_SUPPORT
265 
266 static int record__process_auxtrace(struct perf_tool *tool,
267 				    union perf_event *event, void *data1,
268 				    size_t len1, void *data2, size_t len2)
269 {
270 	struct record *rec = container_of(tool, struct record, tool);
271 	struct perf_data_file *file = &rec->file;
272 	size_t padding;
273 	u8 pad[8] = {0};
274 
275 	if (!perf_data_file__is_pipe(file)) {
276 		off_t file_offset;
277 		int fd = perf_data_file__fd(file);
278 		int err;
279 
280 		file_offset = lseek(fd, 0, SEEK_CUR);
281 		if (file_offset == -1)
282 			return -1;
283 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
284 						     event, file_offset);
285 		if (err)
286 			return err;
287 	}
288 
289 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
290 	padding = (len1 + len2) & 7;
291 	if (padding)
292 		padding = 8 - padding;
293 
294 	record__write(rec, event, event->header.size);
295 	record__write(rec, data1, len1);
296 	if (len2)
297 		record__write(rec, data2, len2);
298 	record__write(rec, &pad, padding);
299 
300 	return 0;
301 }
302 
303 static int record__auxtrace_mmap_read(struct record *rec,
304 				      struct auxtrace_mmap *mm)
305 {
306 	int ret;
307 
308 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
309 				  record__process_auxtrace);
310 	if (ret < 0)
311 		return ret;
312 
313 	if (ret)
314 		rec->samples++;
315 
316 	return 0;
317 }
318 
319 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
320 					       struct auxtrace_mmap *mm)
321 {
322 	int ret;
323 
324 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
325 					   record__process_auxtrace,
326 					   rec->opts.auxtrace_snapshot_size);
327 	if (ret < 0)
328 		return ret;
329 
330 	if (ret)
331 		rec->samples++;
332 
333 	return 0;
334 }
335 
336 static int record__auxtrace_read_snapshot_all(struct record *rec)
337 {
338 	int i;
339 	int rc = 0;
340 
341 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
342 		struct auxtrace_mmap *mm =
343 				&rec->evlist->mmap[i].auxtrace_mmap;
344 
345 		if (!mm->base)
346 			continue;
347 
348 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
349 			rc = -1;
350 			goto out;
351 		}
352 	}
353 out:
354 	return rc;
355 }
356 
357 static void record__read_auxtrace_snapshot(struct record *rec)
358 {
359 	pr_debug("Recording AUX area tracing snapshot\n");
360 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
361 		trigger_error(&auxtrace_snapshot_trigger);
362 	} else {
363 		if (auxtrace_record__snapshot_finish(rec->itr))
364 			trigger_error(&auxtrace_snapshot_trigger);
365 		else
366 			trigger_ready(&auxtrace_snapshot_trigger);
367 	}
368 }
369 
370 #else
371 
372 static inline
373 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
374 			       struct auxtrace_mmap *mm __maybe_unused)
375 {
376 	return 0;
377 }
378 
379 static inline
380 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
381 {
382 }
383 
384 static inline
385 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
386 {
387 	return 0;
388 }
389 
390 #endif
391 
392 static int record__mmap_evlist(struct record *rec,
393 			       struct perf_evlist *evlist)
394 {
395 	struct record_opts *opts = &rec->opts;
396 	char msg[512];
397 
398 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
399 				 opts->auxtrace_mmap_pages,
400 				 opts->auxtrace_snapshot_mode) < 0) {
401 		if (errno == EPERM) {
402 			pr_err("Permission error mapping pages.\n"
403 			       "Consider increasing "
404 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
405 			       "or try again with a smaller value of -m/--mmap_pages.\n"
406 			       "(current value: %u,%u)\n",
407 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
408 			return -errno;
409 		} else {
410 			pr_err("failed to mmap with %d (%s)\n", errno,
411 				str_error_r(errno, msg, sizeof(msg)));
412 			if (errno)
413 				return -errno;
414 			else
415 				return -EINVAL;
416 		}
417 	}
418 	return 0;
419 }
420 
421 static int record__mmap(struct record *rec)
422 {
423 	return record__mmap_evlist(rec, rec->evlist);
424 }
425 
426 static int record__open(struct record *rec)
427 {
428 	char msg[BUFSIZ];
429 	struct perf_evsel *pos;
430 	struct perf_evlist *evlist = rec->evlist;
431 	struct perf_session *session = rec->session;
432 	struct record_opts *opts = &rec->opts;
433 	struct perf_evsel_config_term *err_term;
434 	int rc = 0;
435 
436 	perf_evlist__config(evlist, opts, &callchain_param);
437 
438 	evlist__for_each_entry(evlist, pos) {
439 try_again:
440 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
441 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
442 				if (verbose > 0)
443 					ui__warning("%s\n", msg);
444 				goto try_again;
445 			}
446 
447 			rc = -errno;
448 			perf_evsel__open_strerror(pos, &opts->target,
449 						  errno, msg, sizeof(msg));
450 			ui__error("%s\n", msg);
451 			goto out;
452 		}
453 	}
454 
455 	if (perf_evlist__apply_filters(evlist, &pos)) {
456 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
457 			pos->filter, perf_evsel__name(pos), errno,
458 			str_error_r(errno, msg, sizeof(msg)));
459 		rc = -1;
460 		goto out;
461 	}
462 
463 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
464 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
465 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
466 		      str_error_r(errno, msg, sizeof(msg)));
467 		rc = -1;
468 		goto out;
469 	}
470 
471 	rc = record__mmap(rec);
472 	if (rc)
473 		goto out;
474 
475 	session->evlist = evlist;
476 	perf_session__set_id_hdr_size(session);
477 out:
478 	return rc;
479 }
480 
481 static int process_sample_event(struct perf_tool *tool,
482 				union perf_event *event,
483 				struct perf_sample *sample,
484 				struct perf_evsel *evsel,
485 				struct machine *machine)
486 {
487 	struct record *rec = container_of(tool, struct record, tool);
488 
489 	rec->samples++;
490 
491 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
492 }
493 
494 static int process_buildids(struct record *rec)
495 {
496 	struct perf_data_file *file  = &rec->file;
497 	struct perf_session *session = rec->session;
498 
499 	if (file->size == 0)
500 		return 0;
501 
502 	/*
503 	 * During this process, it'll load kernel map and replace the
504 	 * dso->long_name to a real pathname it found.  In this case
505 	 * we prefer the vmlinux path like
506 	 *   /lib/modules/3.16.4/build/vmlinux
507 	 *
508 	 * rather than build-id path (in debug directory).
509 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
510 	 */
511 	symbol_conf.ignore_vmlinux_buildid = true;
512 
513 	/*
514 	 * If --buildid-all is given, it marks all DSO regardless of hits,
515 	 * so no need to process samples.
516 	 */
517 	if (rec->buildid_all)
518 		rec->tool.sample = NULL;
519 
520 	return perf_session__process_events(session);
521 }
522 
523 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
524 {
525 	int err;
526 	struct perf_tool *tool = data;
527 	/*
528 	 *As for guest kernel when processing subcommand record&report,
529 	 *we arrange module mmap prior to guest kernel mmap and trigger
530 	 *a preload dso because default guest module symbols are loaded
531 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
532 	 *method is used to avoid symbol missing when the first addr is
533 	 *in module instead of in guest kernel.
534 	 */
535 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
536 					     machine);
537 	if (err < 0)
538 		pr_err("Couldn't record guest kernel [%d]'s reference"
539 		       " relocation symbol.\n", machine->pid);
540 
541 	/*
542 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
543 	 * have no _text sometimes.
544 	 */
545 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
546 						 machine);
547 	if (err < 0)
548 		pr_err("Couldn't record guest kernel [%d]'s reference"
549 		       " relocation symbol.\n", machine->pid);
550 }
551 
552 static struct perf_event_header finished_round_event = {
553 	.size = sizeof(struct perf_event_header),
554 	.type = PERF_RECORD_FINISHED_ROUND,
555 };
556 
557 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
558 				    bool backward)
559 {
560 	u64 bytes_written = rec->bytes_written;
561 	int i;
562 	int rc = 0;
563 	struct perf_mmap *maps;
564 
565 	if (!evlist)
566 		return 0;
567 
568 	maps = backward ? evlist->backward_mmap : evlist->mmap;
569 	if (!maps)
570 		return 0;
571 
572 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
573 		return 0;
574 
575 	for (i = 0; i < evlist->nr_mmaps; i++) {
576 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
577 
578 		if (maps[i].base) {
579 			if (record__mmap_read(rec, &maps[i],
580 					      evlist->overwrite, backward) != 0) {
581 				rc = -1;
582 				goto out;
583 			}
584 		}
585 
586 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
587 		    record__auxtrace_mmap_read(rec, mm) != 0) {
588 			rc = -1;
589 			goto out;
590 		}
591 	}
592 
593 	/*
594 	 * Mark the round finished in case we wrote
595 	 * at least one event.
596 	 */
597 	if (bytes_written != rec->bytes_written)
598 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
599 
600 	if (backward)
601 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
602 out:
603 	return rc;
604 }
605 
606 static int record__mmap_read_all(struct record *rec)
607 {
608 	int err;
609 
610 	err = record__mmap_read_evlist(rec, rec->evlist, false);
611 	if (err)
612 		return err;
613 
614 	return record__mmap_read_evlist(rec, rec->evlist, true);
615 }
616 
617 static void record__init_features(struct record *rec)
618 {
619 	struct perf_session *session = rec->session;
620 	int feat;
621 
622 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
623 		perf_header__set_feat(&session->header, feat);
624 
625 	if (rec->no_buildid)
626 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
627 
628 	if (!have_tracepoints(&rec->evlist->entries))
629 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
630 
631 	if (!rec->opts.branch_stack)
632 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
633 
634 	if (!rec->opts.full_auxtrace)
635 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
636 
637 	perf_header__clear_feat(&session->header, HEADER_STAT);
638 }
639 
640 static void
641 record__finish_output(struct record *rec)
642 {
643 	struct perf_data_file *file = &rec->file;
644 	int fd = perf_data_file__fd(file);
645 
646 	if (file->is_pipe)
647 		return;
648 
649 	rec->session->header.data_size += rec->bytes_written;
650 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
651 
652 	if (!rec->no_buildid) {
653 		process_buildids(rec);
654 
655 		if (rec->buildid_all)
656 			dsos__hit_all(rec->session);
657 	}
658 	perf_session__write_header(rec->session, rec->evlist, fd, true);
659 
660 	return;
661 }
662 
663 static int record__synthesize_workload(struct record *rec, bool tail)
664 {
665 	int err;
666 	struct thread_map *thread_map;
667 
668 	if (rec->opts.tail_synthesize != tail)
669 		return 0;
670 
671 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
672 	if (thread_map == NULL)
673 		return -1;
674 
675 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
676 						 process_synthesized_event,
677 						 &rec->session->machines.host,
678 						 rec->opts.sample_address,
679 						 rec->opts.proc_map_timeout);
680 	thread_map__put(thread_map);
681 	return err;
682 }
683 
684 static int record__synthesize(struct record *rec, bool tail);
685 
686 static int
687 record__switch_output(struct record *rec, bool at_exit)
688 {
689 	struct perf_data_file *file = &rec->file;
690 	int fd, err;
691 
692 	/* Same Size:      "2015122520103046"*/
693 	char timestamp[] = "InvalidTimestamp";
694 
695 	record__synthesize(rec, true);
696 	if (target__none(&rec->opts.target))
697 		record__synthesize_workload(rec, true);
698 
699 	rec->samples = 0;
700 	record__finish_output(rec);
701 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
702 	if (err) {
703 		pr_err("Failed to get current timestamp\n");
704 		return -EINVAL;
705 	}
706 
707 	fd = perf_data_file__switch(file, timestamp,
708 				    rec->session->header.data_offset,
709 				    at_exit);
710 	if (fd >= 0 && !at_exit) {
711 		rec->bytes_written = 0;
712 		rec->session->header.data_size = 0;
713 	}
714 
715 	if (!quiet)
716 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
717 			file->path, timestamp);
718 
719 	/* Output tracking events */
720 	if (!at_exit) {
721 		record__synthesize(rec, false);
722 
723 		/*
724 		 * In 'perf record --switch-output' without -a,
725 		 * record__synthesize() in record__switch_output() won't
726 		 * generate tracking events because there's no thread_map
727 		 * in evlist. Which causes newly created perf.data doesn't
728 		 * contain map and comm information.
729 		 * Create a fake thread_map and directly call
730 		 * perf_event__synthesize_thread_map() for those events.
731 		 */
732 		if (target__none(&rec->opts.target))
733 			record__synthesize_workload(rec, false);
734 	}
735 	return fd;
736 }
737 
738 static volatile int workload_exec_errno;
739 
740 /*
741  * perf_evlist__prepare_workload will send a SIGUSR1
742  * if the fork fails, since we asked by setting its
743  * want_signal to true.
744  */
745 static void workload_exec_failed_signal(int signo __maybe_unused,
746 					siginfo_t *info,
747 					void *ucontext __maybe_unused)
748 {
749 	workload_exec_errno = info->si_value.sival_int;
750 	done = 1;
751 	child_finished = 1;
752 }
753 
754 static void snapshot_sig_handler(int sig);
755 static void alarm_sig_handler(int sig);
756 
757 int __weak
758 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
759 			    struct perf_tool *tool __maybe_unused,
760 			    perf_event__handler_t process __maybe_unused,
761 			    struct machine *machine __maybe_unused)
762 {
763 	return 0;
764 }
765 
766 static const struct perf_event_mmap_page *
767 perf_evlist__pick_pc(struct perf_evlist *evlist)
768 {
769 	if (evlist) {
770 		if (evlist->mmap && evlist->mmap[0].base)
771 			return evlist->mmap[0].base;
772 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
773 			return evlist->backward_mmap[0].base;
774 	}
775 	return NULL;
776 }
777 
778 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
779 {
780 	const struct perf_event_mmap_page *pc;
781 
782 	pc = perf_evlist__pick_pc(rec->evlist);
783 	if (pc)
784 		return pc;
785 	return NULL;
786 }
787 
788 static int record__synthesize(struct record *rec, bool tail)
789 {
790 	struct perf_session *session = rec->session;
791 	struct machine *machine = &session->machines.host;
792 	struct perf_data_file *file = &rec->file;
793 	struct record_opts *opts = &rec->opts;
794 	struct perf_tool *tool = &rec->tool;
795 	int fd = perf_data_file__fd(file);
796 	int err = 0;
797 
798 	if (rec->opts.tail_synthesize != tail)
799 		return 0;
800 
801 	if (file->is_pipe) {
802 		err = perf_event__synthesize_attrs(tool, session,
803 						   process_synthesized_event);
804 		if (err < 0) {
805 			pr_err("Couldn't synthesize attrs.\n");
806 			goto out;
807 		}
808 
809 		if (have_tracepoints(&rec->evlist->entries)) {
810 			/*
811 			 * FIXME err <= 0 here actually means that
812 			 * there were no tracepoints so its not really
813 			 * an error, just that we don't need to
814 			 * synthesize anything.  We really have to
815 			 * return this more properly and also
816 			 * propagate errors that now are calling die()
817 			 */
818 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
819 								  process_synthesized_event);
820 			if (err <= 0) {
821 				pr_err("Couldn't record tracing data.\n");
822 				goto out;
823 			}
824 			rec->bytes_written += err;
825 		}
826 	}
827 
828 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
829 					  process_synthesized_event, machine);
830 	if (err)
831 		goto out;
832 
833 	if (rec->opts.full_auxtrace) {
834 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
835 					session, process_synthesized_event);
836 		if (err)
837 			goto out;
838 	}
839 
840 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
841 						 machine);
842 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
843 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
844 			   "Check /proc/kallsyms permission or run as root.\n");
845 
846 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
847 					     machine);
848 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
849 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
850 			   "Check /proc/modules permission or run as root.\n");
851 
852 	if (perf_guest) {
853 		machines__process_guests(&session->machines,
854 					 perf_event__synthesize_guest_os, tool);
855 	}
856 
857 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
858 					    process_synthesized_event, opts->sample_address,
859 					    opts->proc_map_timeout);
860 out:
861 	return err;
862 }
863 
864 static int __cmd_record(struct record *rec, int argc, const char **argv)
865 {
866 	int err;
867 	int status = 0;
868 	unsigned long waking = 0;
869 	const bool forks = argc > 0;
870 	struct machine *machine;
871 	struct perf_tool *tool = &rec->tool;
872 	struct record_opts *opts = &rec->opts;
873 	struct perf_data_file *file = &rec->file;
874 	struct perf_session *session;
875 	bool disabled = false, draining = false;
876 	int fd;
877 
878 	rec->progname = argv[0];
879 
880 	atexit(record__sig_exit);
881 	signal(SIGCHLD, sig_handler);
882 	signal(SIGINT, sig_handler);
883 	signal(SIGTERM, sig_handler);
884 	signal(SIGSEGV, sigsegv_handler);
885 
886 	if (rec->opts.record_namespaces)
887 		tool->namespace_events = true;
888 
889 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
890 		signal(SIGUSR2, snapshot_sig_handler);
891 		if (rec->opts.auxtrace_snapshot_mode)
892 			trigger_on(&auxtrace_snapshot_trigger);
893 		if (rec->switch_output.enabled)
894 			trigger_on(&switch_output_trigger);
895 	} else {
896 		signal(SIGUSR2, SIG_IGN);
897 	}
898 
899 	session = perf_session__new(file, false, tool);
900 	if (session == NULL) {
901 		pr_err("Perf session creation failed.\n");
902 		return -1;
903 	}
904 
905 	fd = perf_data_file__fd(file);
906 	rec->session = session;
907 
908 	record__init_features(rec);
909 
910 	if (forks) {
911 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
912 						    argv, file->is_pipe,
913 						    workload_exec_failed_signal);
914 		if (err < 0) {
915 			pr_err("Couldn't run the workload!\n");
916 			status = err;
917 			goto out_delete_session;
918 		}
919 	}
920 
921 	if (record__open(rec) != 0) {
922 		err = -1;
923 		goto out_child;
924 	}
925 
926 	err = bpf__apply_obj_config();
927 	if (err) {
928 		char errbuf[BUFSIZ];
929 
930 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
931 		pr_err("ERROR: Apply config to BPF failed: %s\n",
932 			 errbuf);
933 		goto out_child;
934 	}
935 
936 	/*
937 	 * Normally perf_session__new would do this, but it doesn't have the
938 	 * evlist.
939 	 */
940 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
941 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
942 		rec->tool.ordered_events = false;
943 	}
944 
945 	if (!rec->evlist->nr_groups)
946 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
947 
948 	if (file->is_pipe) {
949 		err = perf_header__write_pipe(fd);
950 		if (err < 0)
951 			goto out_child;
952 	} else {
953 		err = perf_session__write_header(session, rec->evlist, fd, false);
954 		if (err < 0)
955 			goto out_child;
956 	}
957 
958 	if (!rec->no_buildid
959 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
960 		pr_err("Couldn't generate buildids. "
961 		       "Use --no-buildid to profile anyway.\n");
962 		err = -1;
963 		goto out_child;
964 	}
965 
966 	machine = &session->machines.host;
967 
968 	err = record__synthesize(rec, false);
969 	if (err < 0)
970 		goto out_child;
971 
972 	if (rec->realtime_prio) {
973 		struct sched_param param;
974 
975 		param.sched_priority = rec->realtime_prio;
976 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
977 			pr_err("Could not set realtime priority.\n");
978 			err = -1;
979 			goto out_child;
980 		}
981 	}
982 
983 	/*
984 	 * When perf is starting the traced process, all the events
985 	 * (apart from group members) have enable_on_exec=1 set,
986 	 * so don't spoil it by prematurely enabling them.
987 	 */
988 	if (!target__none(&opts->target) && !opts->initial_delay)
989 		perf_evlist__enable(rec->evlist);
990 
991 	/*
992 	 * Let the child rip
993 	 */
994 	if (forks) {
995 		union perf_event *event;
996 		pid_t tgid;
997 
998 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
999 		if (event == NULL) {
1000 			err = -ENOMEM;
1001 			goto out_child;
1002 		}
1003 
1004 		/*
1005 		 * Some H/W events are generated before COMM event
1006 		 * which is emitted during exec(), so perf script
1007 		 * cannot see a correct process name for those events.
1008 		 * Synthesize COMM event to prevent it.
1009 		 */
1010 		tgid = perf_event__synthesize_comm(tool, event,
1011 						   rec->evlist->workload.pid,
1012 						   process_synthesized_event,
1013 						   machine);
1014 		free(event);
1015 
1016 		if (tgid == -1)
1017 			goto out_child;
1018 
1019 		event = malloc(sizeof(event->namespaces) +
1020 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1021 			       machine->id_hdr_size);
1022 		if (event == NULL) {
1023 			err = -ENOMEM;
1024 			goto out_child;
1025 		}
1026 
1027 		/*
1028 		 * Synthesize NAMESPACES event for the command specified.
1029 		 */
1030 		perf_event__synthesize_namespaces(tool, event,
1031 						  rec->evlist->workload.pid,
1032 						  tgid, process_synthesized_event,
1033 						  machine);
1034 		free(event);
1035 
1036 		perf_evlist__start_workload(rec->evlist);
1037 	}
1038 
1039 	if (opts->initial_delay) {
1040 		usleep(opts->initial_delay * USEC_PER_MSEC);
1041 		perf_evlist__enable(rec->evlist);
1042 	}
1043 
1044 	trigger_ready(&auxtrace_snapshot_trigger);
1045 	trigger_ready(&switch_output_trigger);
1046 	perf_hooks__invoke_record_start();
1047 	for (;;) {
1048 		unsigned long long hits = rec->samples;
1049 
1050 		/*
1051 		 * rec->evlist->bkw_mmap_state is possible to be
1052 		 * BKW_MMAP_EMPTY here: when done == true and
1053 		 * hits != rec->samples in previous round.
1054 		 *
1055 		 * perf_evlist__toggle_bkw_mmap ensure we never
1056 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1057 		 */
1058 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1059 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1060 
1061 		if (record__mmap_read_all(rec) < 0) {
1062 			trigger_error(&auxtrace_snapshot_trigger);
1063 			trigger_error(&switch_output_trigger);
1064 			err = -1;
1065 			goto out_child;
1066 		}
1067 
1068 		if (auxtrace_record__snapshot_started) {
1069 			auxtrace_record__snapshot_started = 0;
1070 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1071 				record__read_auxtrace_snapshot(rec);
1072 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1073 				pr_err("AUX area tracing snapshot failed\n");
1074 				err = -1;
1075 				goto out_child;
1076 			}
1077 		}
1078 
1079 		if (trigger_is_hit(&switch_output_trigger)) {
1080 			/*
1081 			 * If switch_output_trigger is hit, the data in
1082 			 * overwritable ring buffer should have been collected,
1083 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1084 			 *
1085 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1086 			 * record__mmap_read_all() didn't collect data from
1087 			 * overwritable ring buffer. Read again.
1088 			 */
1089 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1090 				continue;
1091 			trigger_ready(&switch_output_trigger);
1092 
1093 			/*
1094 			 * Reenable events in overwrite ring buffer after
1095 			 * record__mmap_read_all(): we should have collected
1096 			 * data from it.
1097 			 */
1098 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1099 
1100 			if (!quiet)
1101 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1102 					waking);
1103 			waking = 0;
1104 			fd = record__switch_output(rec, false);
1105 			if (fd < 0) {
1106 				pr_err("Failed to switch to new file\n");
1107 				trigger_error(&switch_output_trigger);
1108 				err = fd;
1109 				goto out_child;
1110 			}
1111 
1112 			/* re-arm the alarm */
1113 			if (rec->switch_output.time)
1114 				alarm(rec->switch_output.time);
1115 		}
1116 
1117 		if (hits == rec->samples) {
1118 			if (done || draining)
1119 				break;
1120 			err = perf_evlist__poll(rec->evlist, -1);
1121 			/*
1122 			 * Propagate error, only if there's any. Ignore positive
1123 			 * number of returned events and interrupt error.
1124 			 */
1125 			if (err > 0 || (err < 0 && errno == EINTR))
1126 				err = 0;
1127 			waking++;
1128 
1129 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1130 				draining = true;
1131 		}
1132 
1133 		/*
1134 		 * When perf is starting the traced process, at the end events
1135 		 * die with the process and we wait for that. Thus no need to
1136 		 * disable events in this case.
1137 		 */
1138 		if (done && !disabled && !target__none(&opts->target)) {
1139 			trigger_off(&auxtrace_snapshot_trigger);
1140 			perf_evlist__disable(rec->evlist);
1141 			disabled = true;
1142 		}
1143 	}
1144 	trigger_off(&auxtrace_snapshot_trigger);
1145 	trigger_off(&switch_output_trigger);
1146 
1147 	if (forks && workload_exec_errno) {
1148 		char msg[STRERR_BUFSIZE];
1149 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1150 		pr_err("Workload failed: %s\n", emsg);
1151 		err = -1;
1152 		goto out_child;
1153 	}
1154 
1155 	if (!quiet)
1156 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1157 
1158 	if (target__none(&rec->opts.target))
1159 		record__synthesize_workload(rec, true);
1160 
1161 out_child:
1162 	if (forks) {
1163 		int exit_status;
1164 
1165 		if (!child_finished)
1166 			kill(rec->evlist->workload.pid, SIGTERM);
1167 
1168 		wait(&exit_status);
1169 
1170 		if (err < 0)
1171 			status = err;
1172 		else if (WIFEXITED(exit_status))
1173 			status = WEXITSTATUS(exit_status);
1174 		else if (WIFSIGNALED(exit_status))
1175 			signr = WTERMSIG(exit_status);
1176 	} else
1177 		status = err;
1178 
1179 	record__synthesize(rec, true);
1180 	/* this will be recalculated during process_buildids() */
1181 	rec->samples = 0;
1182 
1183 	if (!err) {
1184 		if (!rec->timestamp_filename) {
1185 			record__finish_output(rec);
1186 		} else {
1187 			fd = record__switch_output(rec, true);
1188 			if (fd < 0) {
1189 				status = fd;
1190 				goto out_delete_session;
1191 			}
1192 		}
1193 	}
1194 
1195 	perf_hooks__invoke_record_end();
1196 
1197 	if (!err && !quiet) {
1198 		char samples[128];
1199 		const char *postfix = rec->timestamp_filename ?
1200 					".<timestamp>" : "";
1201 
1202 		if (rec->samples && !rec->opts.full_auxtrace)
1203 			scnprintf(samples, sizeof(samples),
1204 				  " (%" PRIu64 " samples)", rec->samples);
1205 		else
1206 			samples[0] = '\0';
1207 
1208 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1209 			perf_data_file__size(file) / 1024.0 / 1024.0,
1210 			file->path, postfix, samples);
1211 	}
1212 
1213 out_delete_session:
1214 	perf_session__delete(session);
1215 	return status;
1216 }
1217 
1218 static void callchain_debug(struct callchain_param *callchain)
1219 {
1220 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1221 
1222 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1223 
1224 	if (callchain->record_mode == CALLCHAIN_DWARF)
1225 		pr_debug("callchain: stack dump size %d\n",
1226 			 callchain->dump_size);
1227 }
1228 
1229 int record_opts__parse_callchain(struct record_opts *record,
1230 				 struct callchain_param *callchain,
1231 				 const char *arg, bool unset)
1232 {
1233 	int ret;
1234 	callchain->enabled = !unset;
1235 
1236 	/* --no-call-graph */
1237 	if (unset) {
1238 		callchain->record_mode = CALLCHAIN_NONE;
1239 		pr_debug("callchain: disabled\n");
1240 		return 0;
1241 	}
1242 
1243 	ret = parse_callchain_record_opt(arg, callchain);
1244 	if (!ret) {
1245 		/* Enable data address sampling for DWARF unwind. */
1246 		if (callchain->record_mode == CALLCHAIN_DWARF)
1247 			record->sample_address = true;
1248 		callchain_debug(callchain);
1249 	}
1250 
1251 	return ret;
1252 }
1253 
1254 int record_parse_callchain_opt(const struct option *opt,
1255 			       const char *arg,
1256 			       int unset)
1257 {
1258 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1259 }
1260 
1261 int record_callchain_opt(const struct option *opt,
1262 			 const char *arg __maybe_unused,
1263 			 int unset __maybe_unused)
1264 {
1265 	struct callchain_param *callchain = opt->value;
1266 
1267 	callchain->enabled = true;
1268 
1269 	if (callchain->record_mode == CALLCHAIN_NONE)
1270 		callchain->record_mode = CALLCHAIN_FP;
1271 
1272 	callchain_debug(callchain);
1273 	return 0;
1274 }
1275 
1276 static int perf_record_config(const char *var, const char *value, void *cb)
1277 {
1278 	struct record *rec = cb;
1279 
1280 	if (!strcmp(var, "record.build-id")) {
1281 		if (!strcmp(value, "cache"))
1282 			rec->no_buildid_cache = false;
1283 		else if (!strcmp(value, "no-cache"))
1284 			rec->no_buildid_cache = true;
1285 		else if (!strcmp(value, "skip"))
1286 			rec->no_buildid = true;
1287 		else
1288 			return -1;
1289 		return 0;
1290 	}
1291 	if (!strcmp(var, "record.call-graph"))
1292 		var = "call-graph.record-mode"; /* fall-through */
1293 
1294 	return perf_default_config(var, value, cb);
1295 }
1296 
1297 struct clockid_map {
1298 	const char *name;
1299 	int clockid;
1300 };
1301 
1302 #define CLOCKID_MAP(n, c)	\
1303 	{ .name = n, .clockid = (c), }
1304 
1305 #define CLOCKID_END	{ .name = NULL, }
1306 
1307 
1308 /*
1309  * Add the missing ones, we need to build on many distros...
1310  */
1311 #ifndef CLOCK_MONOTONIC_RAW
1312 #define CLOCK_MONOTONIC_RAW 4
1313 #endif
1314 #ifndef CLOCK_BOOTTIME
1315 #define CLOCK_BOOTTIME 7
1316 #endif
1317 #ifndef CLOCK_TAI
1318 #define CLOCK_TAI 11
1319 #endif
1320 
1321 static const struct clockid_map clockids[] = {
1322 	/* available for all events, NMI safe */
1323 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1324 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1325 
1326 	/* available for some events */
1327 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1328 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1329 	CLOCKID_MAP("tai", CLOCK_TAI),
1330 
1331 	/* available for the lazy */
1332 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1333 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1334 	CLOCKID_MAP("real", CLOCK_REALTIME),
1335 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1336 
1337 	CLOCKID_END,
1338 };
1339 
1340 static int parse_clockid(const struct option *opt, const char *str, int unset)
1341 {
1342 	struct record_opts *opts = (struct record_opts *)opt->value;
1343 	const struct clockid_map *cm;
1344 	const char *ostr = str;
1345 
1346 	if (unset) {
1347 		opts->use_clockid = 0;
1348 		return 0;
1349 	}
1350 
1351 	/* no arg passed */
1352 	if (!str)
1353 		return 0;
1354 
1355 	/* no setting it twice */
1356 	if (opts->use_clockid)
1357 		return -1;
1358 
1359 	opts->use_clockid = true;
1360 
1361 	/* if its a number, we're done */
1362 	if (sscanf(str, "%d", &opts->clockid) == 1)
1363 		return 0;
1364 
1365 	/* allow a "CLOCK_" prefix to the name */
1366 	if (!strncasecmp(str, "CLOCK_", 6))
1367 		str += 6;
1368 
1369 	for (cm = clockids; cm->name; cm++) {
1370 		if (!strcasecmp(str, cm->name)) {
1371 			opts->clockid = cm->clockid;
1372 			return 0;
1373 		}
1374 	}
1375 
1376 	opts->use_clockid = false;
1377 	ui__warning("unknown clockid %s, check man page\n", ostr);
1378 	return -1;
1379 }
1380 
1381 static int record__parse_mmap_pages(const struct option *opt,
1382 				    const char *str,
1383 				    int unset __maybe_unused)
1384 {
1385 	struct record_opts *opts = opt->value;
1386 	char *s, *p;
1387 	unsigned int mmap_pages;
1388 	int ret;
1389 
1390 	if (!str)
1391 		return -EINVAL;
1392 
1393 	s = strdup(str);
1394 	if (!s)
1395 		return -ENOMEM;
1396 
1397 	p = strchr(s, ',');
1398 	if (p)
1399 		*p = '\0';
1400 
1401 	if (*s) {
1402 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1403 		if (ret)
1404 			goto out_free;
1405 		opts->mmap_pages = mmap_pages;
1406 	}
1407 
1408 	if (!p) {
1409 		ret = 0;
1410 		goto out_free;
1411 	}
1412 
1413 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1414 	if (ret)
1415 		goto out_free;
1416 
1417 	opts->auxtrace_mmap_pages = mmap_pages;
1418 
1419 out_free:
1420 	free(s);
1421 	return ret;
1422 }
1423 
1424 static void switch_output_size_warn(struct record *rec)
1425 {
1426 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1427 	struct switch_output *s = &rec->switch_output;
1428 
1429 	wakeup_size /= 2;
1430 
1431 	if (s->size < wakeup_size) {
1432 		char buf[100];
1433 
1434 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1435 		pr_warning("WARNING: switch-output data size lower than "
1436 			   "wakeup kernel buffer size (%s) "
1437 			   "expect bigger perf.data sizes\n", buf);
1438 	}
1439 }
1440 
1441 static int switch_output_setup(struct record *rec)
1442 {
1443 	struct switch_output *s = &rec->switch_output;
1444 	static struct parse_tag tags_size[] = {
1445 		{ .tag  = 'B', .mult = 1       },
1446 		{ .tag  = 'K', .mult = 1 << 10 },
1447 		{ .tag  = 'M', .mult = 1 << 20 },
1448 		{ .tag  = 'G', .mult = 1 << 30 },
1449 		{ .tag  = 0 },
1450 	};
1451 	static struct parse_tag tags_time[] = {
1452 		{ .tag  = 's', .mult = 1        },
1453 		{ .tag  = 'm', .mult = 60       },
1454 		{ .tag  = 'h', .mult = 60*60    },
1455 		{ .tag  = 'd', .mult = 60*60*24 },
1456 		{ .tag  = 0 },
1457 	};
1458 	unsigned long val;
1459 
1460 	if (!s->set)
1461 		return 0;
1462 
1463 	if (!strcmp(s->str, "signal")) {
1464 		s->signal = true;
1465 		pr_debug("switch-output with SIGUSR2 signal\n");
1466 		goto enabled;
1467 	}
1468 
1469 	val = parse_tag_value(s->str, tags_size);
1470 	if (val != (unsigned long) -1) {
1471 		s->size = val;
1472 		pr_debug("switch-output with %s size threshold\n", s->str);
1473 		goto enabled;
1474 	}
1475 
1476 	val = parse_tag_value(s->str, tags_time);
1477 	if (val != (unsigned long) -1) {
1478 		s->time = val;
1479 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1480 			 s->str, s->time);
1481 		goto enabled;
1482 	}
1483 
1484 	return -1;
1485 
1486 enabled:
1487 	rec->timestamp_filename = true;
1488 	s->enabled              = true;
1489 
1490 	if (s->size && !rec->opts.no_buffering)
1491 		switch_output_size_warn(rec);
1492 
1493 	return 0;
1494 }
1495 
1496 static const char * const __record_usage[] = {
1497 	"perf record [<options>] [<command>]",
1498 	"perf record [<options>] -- <command> [<options>]",
1499 	NULL
1500 };
1501 const char * const *record_usage = __record_usage;
1502 
1503 /*
1504  * XXX Ideally would be local to cmd_record() and passed to a record__new
1505  * because we need to have access to it in record__exit, that is called
1506  * after cmd_record() exits, but since record_options need to be accessible to
1507  * builtin-script, leave it here.
1508  *
1509  * At least we don't ouch it in all the other functions here directly.
1510  *
1511  * Just say no to tons of global variables, sigh.
1512  */
1513 static struct record record = {
1514 	.opts = {
1515 		.sample_time	     = true,
1516 		.mmap_pages	     = UINT_MAX,
1517 		.user_freq	     = UINT_MAX,
1518 		.user_interval	     = ULLONG_MAX,
1519 		.freq		     = 4000,
1520 		.target		     = {
1521 			.uses_mmap   = true,
1522 			.default_per_cpu = true,
1523 		},
1524 		.proc_map_timeout     = 500,
1525 	},
1526 	.tool = {
1527 		.sample		= process_sample_event,
1528 		.fork		= perf_event__process_fork,
1529 		.exit		= perf_event__process_exit,
1530 		.comm		= perf_event__process_comm,
1531 		.namespaces	= perf_event__process_namespaces,
1532 		.mmap		= perf_event__process_mmap,
1533 		.mmap2		= perf_event__process_mmap2,
1534 		.ordered_events	= true,
1535 	},
1536 };
1537 
1538 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1539 	"\n\t\t\t\tDefault: fp";
1540 
1541 static bool dry_run;
1542 
1543 /*
1544  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1545  * with it and switch to use the library functions in perf_evlist that came
1546  * from builtin-record.c, i.e. use record_opts,
1547  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1548  * using pipes, etc.
1549  */
1550 static struct option __record_options[] = {
1551 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1552 		     "event selector. use 'perf list' to list available events",
1553 		     parse_events_option),
1554 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1555 		     "event filter", parse_filter),
1556 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1557 			   NULL, "don't record events from perf itself",
1558 			   exclude_perf),
1559 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1560 		    "record events on existing process id"),
1561 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1562 		    "record events on existing thread id"),
1563 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1564 		    "collect data with this RT SCHED_FIFO priority"),
1565 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1566 		    "collect data without buffering"),
1567 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1568 		    "collect raw sample records from all opened counters"),
1569 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1570 			    "system-wide collection from all CPUs"),
1571 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1572 		    "list of cpus to monitor"),
1573 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1574 	OPT_STRING('o', "output", &record.file.path, "file",
1575 		    "output file name"),
1576 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1577 			&record.opts.no_inherit_set,
1578 			"child tasks do not inherit counters"),
1579 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1580 		    "synthesize non-sample events at the end of output"),
1581 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1582 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1583 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1584 		     "number of mmap data pages and AUX area tracing mmap pages",
1585 		     record__parse_mmap_pages),
1586 	OPT_BOOLEAN(0, "group", &record.opts.group,
1587 		    "put the counters into a counter group"),
1588 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1589 			   NULL, "enables call-graph recording" ,
1590 			   &record_callchain_opt),
1591 	OPT_CALLBACK(0, "call-graph", &record.opts,
1592 		     "record_mode[,record_size]", record_callchain_help,
1593 		     &record_parse_callchain_opt),
1594 	OPT_INCR('v', "verbose", &verbose,
1595 		    "be more verbose (show counter open errors, etc)"),
1596 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1597 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1598 		    "per thread counts"),
1599 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1600 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1601 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1602 			&record.opts.sample_time_set,
1603 			"Record the sample timestamps"),
1604 	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1605 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1606 		    "don't sample"),
1607 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1608 			&record.no_buildid_cache_set,
1609 			"do not update the buildid cache"),
1610 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1611 			&record.no_buildid_set,
1612 			"do not collect buildids in perf.data"),
1613 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1614 		     "monitor event in cgroup name only",
1615 		     parse_cgroups),
1616 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1617 		  "ms to wait before starting measurement after program start"),
1618 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1619 		   "user to profile"),
1620 
1621 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1622 		     "branch any", "sample any taken branches",
1623 		     parse_branch_stack),
1624 
1625 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1626 		     "branch filter mask", "branch stack filter modes",
1627 		     parse_branch_stack),
1628 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1629 		    "sample by weight (on special events only)"),
1630 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1631 		    "sample transaction flags (special events only)"),
1632 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1633 		    "use per-thread mmaps"),
1634 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1635 		    "sample selected machine registers on interrupt,"
1636 		    " use -I ? to list register names", parse_regs),
1637 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1638 		    "Record running/enabled time of read (:S) events"),
1639 	OPT_CALLBACK('k', "clockid", &record.opts,
1640 	"clockid", "clockid to use for events, see clock_gettime()",
1641 	parse_clockid),
1642 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1643 			  "opts", "AUX area tracing Snapshot Mode", ""),
1644 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1645 			"per thread proc mmap processing timeout in ms"),
1646 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1647 		    "Record namespaces events"),
1648 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1649 		    "Record context switch events"),
1650 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1651 			 "Configure all used events to run in kernel space.",
1652 			 PARSE_OPT_EXCLUSIVE),
1653 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1654 			 "Configure all used events to run in user space.",
1655 			 PARSE_OPT_EXCLUSIVE),
1656 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1657 		   "clang binary to use for compiling BPF scriptlets"),
1658 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1659 		   "options passed to clang when compiling BPF scriptlets"),
1660 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1661 		   "file", "vmlinux pathname"),
1662 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1663 		    "Record build-id of all DSOs regardless of hits"),
1664 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1665 		    "append timestamp to output filename"),
1666 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1667 			  &record.switch_output.set, "signal,size,time",
1668 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1669 			  "signal"),
1670 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1671 		    "Parse options then exit"),
1672 	OPT_END()
1673 };
1674 
1675 struct option *record_options = __record_options;
1676 
1677 int cmd_record(int argc, const char **argv)
1678 {
1679 	int err;
1680 	struct record *rec = &record;
1681 	char errbuf[BUFSIZ];
1682 
1683 #ifndef HAVE_LIBBPF_SUPPORT
1684 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1685 	set_nobuild('\0', "clang-path", true);
1686 	set_nobuild('\0', "clang-opt", true);
1687 # undef set_nobuild
1688 #endif
1689 
1690 #ifndef HAVE_BPF_PROLOGUE
1691 # if !defined (HAVE_DWARF_SUPPORT)
1692 #  define REASON  "NO_DWARF=1"
1693 # elif !defined (HAVE_LIBBPF_SUPPORT)
1694 #  define REASON  "NO_LIBBPF=1"
1695 # else
1696 #  define REASON  "this architecture doesn't support BPF prologue"
1697 # endif
1698 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1699 	set_nobuild('\0', "vmlinux", true);
1700 # undef set_nobuild
1701 # undef REASON
1702 #endif
1703 
1704 	rec->evlist = perf_evlist__new();
1705 	if (rec->evlist == NULL)
1706 		return -ENOMEM;
1707 
1708 	err = perf_config(perf_record_config, rec);
1709 	if (err)
1710 		return err;
1711 
1712 	argc = parse_options(argc, argv, record_options, record_usage,
1713 			    PARSE_OPT_STOP_AT_NON_OPTION);
1714 	if (quiet)
1715 		perf_quiet_option();
1716 
1717 	/* Make system wide (-a) the default target. */
1718 	if (!argc && target__none(&rec->opts.target))
1719 		rec->opts.target.system_wide = true;
1720 
1721 	if (nr_cgroups && !rec->opts.target.system_wide) {
1722 		usage_with_options_msg(record_usage, record_options,
1723 			"cgroup monitoring only available in system-wide mode");
1724 
1725 	}
1726 	if (rec->opts.record_switch_events &&
1727 	    !perf_can_record_switch_events()) {
1728 		ui__error("kernel does not support recording context switch events\n");
1729 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1730 		return -EINVAL;
1731 	}
1732 
1733 	if (switch_output_setup(rec)) {
1734 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1735 		return -EINVAL;
1736 	}
1737 
1738 	if (rec->switch_output.time) {
1739 		signal(SIGALRM, alarm_sig_handler);
1740 		alarm(rec->switch_output.time);
1741 	}
1742 
1743 	if (!rec->itr) {
1744 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1745 		if (err)
1746 			goto out;
1747 	}
1748 
1749 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1750 					      rec->opts.auxtrace_snapshot_opts);
1751 	if (err)
1752 		goto out;
1753 
1754 	/*
1755 	 * Allow aliases to facilitate the lookup of symbols for address
1756 	 * filters. Refer to auxtrace_parse_filters().
1757 	 */
1758 	symbol_conf.allow_aliases = true;
1759 
1760 	symbol__init(NULL);
1761 
1762 	err = auxtrace_parse_filters(rec->evlist);
1763 	if (err)
1764 		goto out;
1765 
1766 	if (dry_run)
1767 		goto out;
1768 
1769 	err = bpf__setup_stdout(rec->evlist);
1770 	if (err) {
1771 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1772 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1773 			 errbuf);
1774 		goto out;
1775 	}
1776 
1777 	err = -ENOMEM;
1778 
1779 	if (symbol_conf.kptr_restrict)
1780 		pr_warning(
1781 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1782 "check /proc/sys/kernel/kptr_restrict.\n\n"
1783 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1784 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1785 "Samples in kernel modules won't be resolved at all.\n\n"
1786 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1787 "even with a suitable vmlinux or kallsyms file.\n\n");
1788 
1789 	if (rec->no_buildid_cache || rec->no_buildid) {
1790 		disable_buildid_cache();
1791 	} else if (rec->switch_output.enabled) {
1792 		/*
1793 		 * In 'perf record --switch-output', disable buildid
1794 		 * generation by default to reduce data file switching
1795 		 * overhead. Still generate buildid if they are required
1796 		 * explicitly using
1797 		 *
1798 		 *  perf record --switch-output --no-no-buildid \
1799 		 *              --no-no-buildid-cache
1800 		 *
1801 		 * Following code equals to:
1802 		 *
1803 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1804 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1805 		 *         disable_buildid_cache();
1806 		 */
1807 		bool disable = true;
1808 
1809 		if (rec->no_buildid_set && !rec->no_buildid)
1810 			disable = false;
1811 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1812 			disable = false;
1813 		if (disable) {
1814 			rec->no_buildid = true;
1815 			rec->no_buildid_cache = true;
1816 			disable_buildid_cache();
1817 		}
1818 	}
1819 
1820 	if (record.opts.overwrite)
1821 		record.opts.tail_synthesize = true;
1822 
1823 	if (rec->evlist->nr_entries == 0 &&
1824 	    perf_evlist__add_default(rec->evlist) < 0) {
1825 		pr_err("Not enough memory for event selector list\n");
1826 		goto out;
1827 	}
1828 
1829 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1830 		rec->opts.no_inherit = true;
1831 
1832 	err = target__validate(&rec->opts.target);
1833 	if (err) {
1834 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1835 		ui__warning("%s", errbuf);
1836 	}
1837 
1838 	err = target__parse_uid(&rec->opts.target);
1839 	if (err) {
1840 		int saved_errno = errno;
1841 
1842 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1843 		ui__error("%s", errbuf);
1844 
1845 		err = -saved_errno;
1846 		goto out;
1847 	}
1848 
1849 	/* Enable ignoring missing threads when -u option is defined. */
1850 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1851 
1852 	err = -ENOMEM;
1853 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1854 		usage_with_options(record_usage, record_options);
1855 
1856 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1857 	if (err)
1858 		goto out;
1859 
1860 	/*
1861 	 * We take all buildids when the file contains
1862 	 * AUX area tracing data because we do not decode the
1863 	 * trace because it would take too long.
1864 	 */
1865 	if (rec->opts.full_auxtrace)
1866 		rec->buildid_all = true;
1867 
1868 	if (record_opts__config(&rec->opts)) {
1869 		err = -EINVAL;
1870 		goto out;
1871 	}
1872 
1873 	err = __cmd_record(&record, argc, argv);
1874 out:
1875 	perf_evlist__delete(rec->evlist);
1876 	symbol__exit();
1877 	auxtrace_record__free(rec->itr);
1878 	return err;
1879 }
1880 
1881 static void snapshot_sig_handler(int sig __maybe_unused)
1882 {
1883 	struct record *rec = &record;
1884 
1885 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1886 		trigger_hit(&auxtrace_snapshot_trigger);
1887 		auxtrace_record__snapshot_started = 1;
1888 		if (auxtrace_record__snapshot_start(record.itr))
1889 			trigger_error(&auxtrace_snapshot_trigger);
1890 	}
1891 
1892 	if (switch_output_signal(rec))
1893 		trigger_hit(&switch_output_trigger);
1894 }
1895 
1896 static void alarm_sig_handler(int sig __maybe_unused)
1897 {
1898 	struct record *rec = &record;
1899 
1900 	if (switch_output_time(rec))
1901 		trigger_hit(&switch_output_trigger);
1902 }
1903