xref: /openbmc/linux/tools/perf/builtin-record.c (revision 53df2b93)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "util/build-id.h"
12 #include <subcmd/parse-options.h>
13 #include "util/parse-events.h"
14 #include "util/config.h"
15 
16 #include "util/callchain.h"
17 #include "util/cgroup.h"
18 #include "util/header.h"
19 #include "util/event.h"
20 #include "util/evlist.h"
21 #include "util/evsel.h"
22 #include "util/debug.h"
23 #include "util/mmap.h"
24 #include "util/target.h"
25 #include "util/session.h"
26 #include "util/tool.h"
27 #include "util/symbol.h"
28 #include "util/record.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/perf_api_probe.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/cpu-set-sched.h"
43 #include "util/synthetic-events.h"
44 #include "util/time-utils.h"
45 #include "util/units.h"
46 #include "util/bpf-event.h"
47 #include "util/util.h"
48 #include "asm/bug.h"
49 #include "perf.h"
50 
51 #include <errno.h>
52 #include <inttypes.h>
53 #include <locale.h>
54 #include <poll.h>
55 #include <pthread.h>
56 #include <unistd.h>
57 #include <sched.h>
58 #include <signal.h>
59 #include <sys/mman.h>
60 #include <sys/wait.h>
61 #include <sys/types.h>
62 #include <sys/stat.h>
63 #include <fcntl.h>
64 #include <linux/err.h>
65 #include <linux/string.h>
66 #include <linux/time64.h>
67 #include <linux/zalloc.h>
68 #include <linux/bitmap.h>
69 
70 struct switch_output {
71 	bool		 enabled;
72 	bool		 signal;
73 	unsigned long	 size;
74 	unsigned long	 time;
75 	const char	*str;
76 	bool		 set;
77 	char		 **filenames;
78 	int		 num_files;
79 	int		 cur_file;
80 };
81 
82 struct record {
83 	struct perf_tool	tool;
84 	struct record_opts	opts;
85 	u64			bytes_written;
86 	struct perf_data	data;
87 	struct auxtrace_record	*itr;
88 	struct evlist	*evlist;
89 	struct perf_session	*session;
90 	struct evlist		*sb_evlist;
91 	pthread_t		thread_id;
92 	int			realtime_prio;
93 	bool			switch_output_event_set;
94 	bool			no_buildid;
95 	bool			no_buildid_set;
96 	bool			no_buildid_cache;
97 	bool			no_buildid_cache_set;
98 	bool			buildid_all;
99 	bool			timestamp_filename;
100 	bool			timestamp_boundary;
101 	struct switch_output	switch_output;
102 	unsigned long long	samples;
103 	struct mmap_cpu_mask	affinity_mask;
104 	unsigned long		output_max_size;	/* = 0: unlimited */
105 };
106 
107 static volatile int done;
108 
109 static volatile int auxtrace_record__snapshot_started;
110 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
111 static DEFINE_TRIGGER(switch_output_trigger);
112 
113 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
114 	"SYS", "NODE", "CPU"
115 };
116 
117 static bool switch_output_signal(struct record *rec)
118 {
119 	return rec->switch_output.signal &&
120 	       trigger_is_ready(&switch_output_trigger);
121 }
122 
123 static bool switch_output_size(struct record *rec)
124 {
125 	return rec->switch_output.size &&
126 	       trigger_is_ready(&switch_output_trigger) &&
127 	       (rec->bytes_written >= rec->switch_output.size);
128 }
129 
130 static bool switch_output_time(struct record *rec)
131 {
132 	return rec->switch_output.time &&
133 	       trigger_is_ready(&switch_output_trigger);
134 }
135 
136 static bool record__output_max_size_exceeded(struct record *rec)
137 {
138 	return rec->output_max_size &&
139 	       (rec->bytes_written >= rec->output_max_size);
140 }
141 
142 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
143 			 void *bf, size_t size)
144 {
145 	struct perf_data_file *file = &rec->session->data->file;
146 
147 	if (perf_data_file__write(file, bf, size) < 0) {
148 		pr_err("failed to write perf data, error: %m\n");
149 		return -1;
150 	}
151 
152 	rec->bytes_written += size;
153 
154 	if (record__output_max_size_exceeded(rec) && !done) {
155 		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
156 				" stopping session ]\n",
157 				rec->bytes_written >> 10);
158 		done = 1;
159 	}
160 
161 	if (switch_output_size(rec))
162 		trigger_hit(&switch_output_trigger);
163 
164 	return 0;
165 }
166 
167 static int record__aio_enabled(struct record *rec);
168 static int record__comp_enabled(struct record *rec);
169 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
170 			    void *src, size_t src_size);
171 
172 #ifdef HAVE_AIO_SUPPORT
173 static int record__aio_write(struct aiocb *cblock, int trace_fd,
174 		void *buf, size_t size, off_t off)
175 {
176 	int rc;
177 
178 	cblock->aio_fildes = trace_fd;
179 	cblock->aio_buf    = buf;
180 	cblock->aio_nbytes = size;
181 	cblock->aio_offset = off;
182 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
183 
184 	do {
185 		rc = aio_write(cblock);
186 		if (rc == 0) {
187 			break;
188 		} else if (errno != EAGAIN) {
189 			cblock->aio_fildes = -1;
190 			pr_err("failed to queue perf data, error: %m\n");
191 			break;
192 		}
193 	} while (1);
194 
195 	return rc;
196 }
197 
198 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
199 {
200 	void *rem_buf;
201 	off_t rem_off;
202 	size_t rem_size;
203 	int rc, aio_errno;
204 	ssize_t aio_ret, written;
205 
206 	aio_errno = aio_error(cblock);
207 	if (aio_errno == EINPROGRESS)
208 		return 0;
209 
210 	written = aio_ret = aio_return(cblock);
211 	if (aio_ret < 0) {
212 		if (aio_errno != EINTR)
213 			pr_err("failed to write perf data, error: %m\n");
214 		written = 0;
215 	}
216 
217 	rem_size = cblock->aio_nbytes - written;
218 
219 	if (rem_size == 0) {
220 		cblock->aio_fildes = -1;
221 		/*
222 		 * md->refcount is incremented in record__aio_pushfn() for
223 		 * every aio write request started in record__aio_push() so
224 		 * decrement it because the request is now complete.
225 		 */
226 		perf_mmap__put(&md->core);
227 		rc = 1;
228 	} else {
229 		/*
230 		 * aio write request may require restart with the
231 		 * reminder if the kernel didn't write whole
232 		 * chunk at once.
233 		 */
234 		rem_off = cblock->aio_offset + written;
235 		rem_buf = (void *)(cblock->aio_buf + written);
236 		record__aio_write(cblock, cblock->aio_fildes,
237 				rem_buf, rem_size, rem_off);
238 		rc = 0;
239 	}
240 
241 	return rc;
242 }
243 
244 static int record__aio_sync(struct mmap *md, bool sync_all)
245 {
246 	struct aiocb **aiocb = md->aio.aiocb;
247 	struct aiocb *cblocks = md->aio.cblocks;
248 	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
249 	int i, do_suspend;
250 
251 	do {
252 		do_suspend = 0;
253 		for (i = 0; i < md->aio.nr_cblocks; ++i) {
254 			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
255 				if (sync_all)
256 					aiocb[i] = NULL;
257 				else
258 					return i;
259 			} else {
260 				/*
261 				 * Started aio write is not complete yet
262 				 * so it has to be waited before the
263 				 * next allocation.
264 				 */
265 				aiocb[i] = &cblocks[i];
266 				do_suspend = 1;
267 			}
268 		}
269 		if (!do_suspend)
270 			return -1;
271 
272 		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
273 			if (!(errno == EAGAIN || errno == EINTR))
274 				pr_err("failed to sync perf data, error: %m\n");
275 		}
276 	} while (1);
277 }
278 
279 struct record_aio {
280 	struct record	*rec;
281 	void		*data;
282 	size_t		size;
283 };
284 
285 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
286 {
287 	struct record_aio *aio = to;
288 
289 	/*
290 	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
291 	 * to release space in the kernel buffer as fast as possible, calling
292 	 * perf_mmap__consume() from perf_mmap__push() function.
293 	 *
294 	 * That lets the kernel to proceed with storing more profiling data into
295 	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
296 	 *
297 	 * Coping can be done in two steps in case the chunk of profiling data
298 	 * crosses the upper bound of the kernel buffer. In this case we first move
299 	 * part of data from map->start till the upper bound and then the reminder
300 	 * from the beginning of the kernel buffer till the end of the data chunk.
301 	 */
302 
303 	if (record__comp_enabled(aio->rec)) {
304 		size = zstd_compress(aio->rec->session, aio->data + aio->size,
305 				     mmap__mmap_len(map) - aio->size,
306 				     buf, size);
307 	} else {
308 		memcpy(aio->data + aio->size, buf, size);
309 	}
310 
311 	if (!aio->size) {
312 		/*
313 		 * Increment map->refcount to guard map->aio.data[] buffer
314 		 * from premature deallocation because map object can be
315 		 * released earlier than aio write request started on
316 		 * map->aio.data[] buffer is complete.
317 		 *
318 		 * perf_mmap__put() is done at record__aio_complete()
319 		 * after started aio request completion or at record__aio_push()
320 		 * if the request failed to start.
321 		 */
322 		perf_mmap__get(&map->core);
323 	}
324 
325 	aio->size += size;
326 
327 	return size;
328 }
329 
330 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
331 {
332 	int ret, idx;
333 	int trace_fd = rec->session->data->file.fd;
334 	struct record_aio aio = { .rec = rec, .size = 0 };
335 
336 	/*
337 	 * Call record__aio_sync() to wait till map->aio.data[] buffer
338 	 * becomes available after previous aio write operation.
339 	 */
340 
341 	idx = record__aio_sync(map, false);
342 	aio.data = map->aio.data[idx];
343 	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
344 	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
345 		return ret;
346 
347 	rec->samples++;
348 	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
349 	if (!ret) {
350 		*off += aio.size;
351 		rec->bytes_written += aio.size;
352 		if (switch_output_size(rec))
353 			trigger_hit(&switch_output_trigger);
354 	} else {
355 		/*
356 		 * Decrement map->refcount incremented in record__aio_pushfn()
357 		 * back if record__aio_write() operation failed to start, otherwise
358 		 * map->refcount is decremented in record__aio_complete() after
359 		 * aio write operation finishes successfully.
360 		 */
361 		perf_mmap__put(&map->core);
362 	}
363 
364 	return ret;
365 }
366 
367 static off_t record__aio_get_pos(int trace_fd)
368 {
369 	return lseek(trace_fd, 0, SEEK_CUR);
370 }
371 
372 static void record__aio_set_pos(int trace_fd, off_t pos)
373 {
374 	lseek(trace_fd, pos, SEEK_SET);
375 }
376 
377 static void record__aio_mmap_read_sync(struct record *rec)
378 {
379 	int i;
380 	struct evlist *evlist = rec->evlist;
381 	struct mmap *maps = evlist->mmap;
382 
383 	if (!record__aio_enabled(rec))
384 		return;
385 
386 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
387 		struct mmap *map = &maps[i];
388 
389 		if (map->core.base)
390 			record__aio_sync(map, true);
391 	}
392 }
393 
394 static int nr_cblocks_default = 1;
395 static int nr_cblocks_max = 4;
396 
397 static int record__aio_parse(const struct option *opt,
398 			     const char *str,
399 			     int unset)
400 {
401 	struct record_opts *opts = (struct record_opts *)opt->value;
402 
403 	if (unset) {
404 		opts->nr_cblocks = 0;
405 	} else {
406 		if (str)
407 			opts->nr_cblocks = strtol(str, NULL, 0);
408 		if (!opts->nr_cblocks)
409 			opts->nr_cblocks = nr_cblocks_default;
410 	}
411 
412 	return 0;
413 }
414 #else /* HAVE_AIO_SUPPORT */
415 static int nr_cblocks_max = 0;
416 
417 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
418 			    off_t *off __maybe_unused)
419 {
420 	return -1;
421 }
422 
423 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
424 {
425 	return -1;
426 }
427 
428 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
429 {
430 }
431 
432 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
433 {
434 }
435 #endif
436 
437 static int record__aio_enabled(struct record *rec)
438 {
439 	return rec->opts.nr_cblocks > 0;
440 }
441 
442 #define MMAP_FLUSH_DEFAULT 1
443 static int record__mmap_flush_parse(const struct option *opt,
444 				    const char *str,
445 				    int unset)
446 {
447 	int flush_max;
448 	struct record_opts *opts = (struct record_opts *)opt->value;
449 	static struct parse_tag tags[] = {
450 			{ .tag  = 'B', .mult = 1       },
451 			{ .tag  = 'K', .mult = 1 << 10 },
452 			{ .tag  = 'M', .mult = 1 << 20 },
453 			{ .tag  = 'G', .mult = 1 << 30 },
454 			{ .tag  = 0 },
455 	};
456 
457 	if (unset)
458 		return 0;
459 
460 	if (str) {
461 		opts->mmap_flush = parse_tag_value(str, tags);
462 		if (opts->mmap_flush == (int)-1)
463 			opts->mmap_flush = strtol(str, NULL, 0);
464 	}
465 
466 	if (!opts->mmap_flush)
467 		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
468 
469 	flush_max = evlist__mmap_size(opts->mmap_pages);
470 	flush_max /= 4;
471 	if (opts->mmap_flush > flush_max)
472 		opts->mmap_flush = flush_max;
473 
474 	return 0;
475 }
476 
477 #ifdef HAVE_ZSTD_SUPPORT
478 static unsigned int comp_level_default = 1;
479 
480 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
481 {
482 	struct record_opts *opts = opt->value;
483 
484 	if (unset) {
485 		opts->comp_level = 0;
486 	} else {
487 		if (str)
488 			opts->comp_level = strtol(str, NULL, 0);
489 		if (!opts->comp_level)
490 			opts->comp_level = comp_level_default;
491 	}
492 
493 	return 0;
494 }
495 #endif
496 static unsigned int comp_level_max = 22;
497 
498 static int record__comp_enabled(struct record *rec)
499 {
500 	return rec->opts.comp_level > 0;
501 }
502 
503 static int process_synthesized_event(struct perf_tool *tool,
504 				     union perf_event *event,
505 				     struct perf_sample *sample __maybe_unused,
506 				     struct machine *machine __maybe_unused)
507 {
508 	struct record *rec = container_of(tool, struct record, tool);
509 	return record__write(rec, NULL, event, event->header.size);
510 }
511 
512 static int process_locked_synthesized_event(struct perf_tool *tool,
513 				     union perf_event *event,
514 				     struct perf_sample *sample __maybe_unused,
515 				     struct machine *machine __maybe_unused)
516 {
517 	static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
518 	int ret;
519 
520 	pthread_mutex_lock(&synth_lock);
521 	ret = process_synthesized_event(tool, event, sample, machine);
522 	pthread_mutex_unlock(&synth_lock);
523 	return ret;
524 }
525 
526 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
527 {
528 	struct record *rec = to;
529 
530 	if (record__comp_enabled(rec)) {
531 		size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
532 		bf   = map->data;
533 	}
534 
535 	rec->samples++;
536 	return record__write(rec, map, bf, size);
537 }
538 
539 static volatile int signr = -1;
540 static volatile int child_finished;
541 
542 static void sig_handler(int sig)
543 {
544 	if (sig == SIGCHLD)
545 		child_finished = 1;
546 	else
547 		signr = sig;
548 
549 	done = 1;
550 }
551 
552 static void sigsegv_handler(int sig)
553 {
554 	perf_hooks__recover();
555 	sighandler_dump_stack(sig);
556 }
557 
558 static void record__sig_exit(void)
559 {
560 	if (signr == -1)
561 		return;
562 
563 	signal(signr, SIG_DFL);
564 	raise(signr);
565 }
566 
567 #ifdef HAVE_AUXTRACE_SUPPORT
568 
569 static int record__process_auxtrace(struct perf_tool *tool,
570 				    struct mmap *map,
571 				    union perf_event *event, void *data1,
572 				    size_t len1, void *data2, size_t len2)
573 {
574 	struct record *rec = container_of(tool, struct record, tool);
575 	struct perf_data *data = &rec->data;
576 	size_t padding;
577 	u8 pad[8] = {0};
578 
579 	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
580 		off_t file_offset;
581 		int fd = perf_data__fd(data);
582 		int err;
583 
584 		file_offset = lseek(fd, 0, SEEK_CUR);
585 		if (file_offset == -1)
586 			return -1;
587 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
588 						     event, file_offset);
589 		if (err)
590 			return err;
591 	}
592 
593 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
594 	padding = (len1 + len2) & 7;
595 	if (padding)
596 		padding = 8 - padding;
597 
598 	record__write(rec, map, event, event->header.size);
599 	record__write(rec, map, data1, len1);
600 	if (len2)
601 		record__write(rec, map, data2, len2);
602 	record__write(rec, map, &pad, padding);
603 
604 	return 0;
605 }
606 
607 static int record__auxtrace_mmap_read(struct record *rec,
608 				      struct mmap *map)
609 {
610 	int ret;
611 
612 	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
613 				  record__process_auxtrace);
614 	if (ret < 0)
615 		return ret;
616 
617 	if (ret)
618 		rec->samples++;
619 
620 	return 0;
621 }
622 
623 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
624 					       struct mmap *map)
625 {
626 	int ret;
627 
628 	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
629 					   record__process_auxtrace,
630 					   rec->opts.auxtrace_snapshot_size);
631 	if (ret < 0)
632 		return ret;
633 
634 	if (ret)
635 		rec->samples++;
636 
637 	return 0;
638 }
639 
640 static int record__auxtrace_read_snapshot_all(struct record *rec)
641 {
642 	int i;
643 	int rc = 0;
644 
645 	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
646 		struct mmap *map = &rec->evlist->mmap[i];
647 
648 		if (!map->auxtrace_mmap.base)
649 			continue;
650 
651 		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
652 			rc = -1;
653 			goto out;
654 		}
655 	}
656 out:
657 	return rc;
658 }
659 
660 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
661 {
662 	pr_debug("Recording AUX area tracing snapshot\n");
663 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
664 		trigger_error(&auxtrace_snapshot_trigger);
665 	} else {
666 		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
667 			trigger_error(&auxtrace_snapshot_trigger);
668 		else
669 			trigger_ready(&auxtrace_snapshot_trigger);
670 	}
671 }
672 
673 static int record__auxtrace_snapshot_exit(struct record *rec)
674 {
675 	if (trigger_is_error(&auxtrace_snapshot_trigger))
676 		return 0;
677 
678 	if (!auxtrace_record__snapshot_started &&
679 	    auxtrace_record__snapshot_start(rec->itr))
680 		return -1;
681 
682 	record__read_auxtrace_snapshot(rec, true);
683 	if (trigger_is_error(&auxtrace_snapshot_trigger))
684 		return -1;
685 
686 	return 0;
687 }
688 
689 static int record__auxtrace_init(struct record *rec)
690 {
691 	int err;
692 
693 	if (!rec->itr) {
694 		rec->itr = auxtrace_record__init(rec->evlist, &err);
695 		if (err)
696 			return err;
697 	}
698 
699 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
700 					      rec->opts.auxtrace_snapshot_opts);
701 	if (err)
702 		return err;
703 
704 	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
705 					    rec->opts.auxtrace_sample_opts);
706 	if (err)
707 		return err;
708 
709 	return auxtrace_parse_filters(rec->evlist);
710 }
711 
712 #else
713 
714 static inline
715 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
716 			       struct mmap *map __maybe_unused)
717 {
718 	return 0;
719 }
720 
721 static inline
722 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
723 				    bool on_exit __maybe_unused)
724 {
725 }
726 
727 static inline
728 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
729 {
730 	return 0;
731 }
732 
733 static inline
734 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
735 {
736 	return 0;
737 }
738 
739 static int record__auxtrace_init(struct record *rec __maybe_unused)
740 {
741 	return 0;
742 }
743 
744 #endif
745 
746 static bool record__kcore_readable(struct machine *machine)
747 {
748 	char kcore[PATH_MAX];
749 	int fd;
750 
751 	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
752 
753 	fd = open(kcore, O_RDONLY);
754 	if (fd < 0)
755 		return false;
756 
757 	close(fd);
758 
759 	return true;
760 }
761 
762 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
763 {
764 	char from_dir[PATH_MAX];
765 	char kcore_dir[PATH_MAX];
766 	int ret;
767 
768 	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
769 
770 	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
771 	if (ret)
772 		return ret;
773 
774 	return kcore_copy(from_dir, kcore_dir);
775 }
776 
777 static int record__mmap_evlist(struct record *rec,
778 			       struct evlist *evlist)
779 {
780 	struct record_opts *opts = &rec->opts;
781 	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
782 				  opts->auxtrace_sample_mode;
783 	char msg[512];
784 
785 	if (opts->affinity != PERF_AFFINITY_SYS)
786 		cpu__setup_cpunode_map();
787 
788 	if (evlist__mmap_ex(evlist, opts->mmap_pages,
789 				 opts->auxtrace_mmap_pages,
790 				 auxtrace_overwrite,
791 				 opts->nr_cblocks, opts->affinity,
792 				 opts->mmap_flush, opts->comp_level) < 0) {
793 		if (errno == EPERM) {
794 			pr_err("Permission error mapping pages.\n"
795 			       "Consider increasing "
796 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
797 			       "or try again with a smaller value of -m/--mmap_pages.\n"
798 			       "(current value: %u,%u)\n",
799 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
800 			return -errno;
801 		} else {
802 			pr_err("failed to mmap with %d (%s)\n", errno,
803 				str_error_r(errno, msg, sizeof(msg)));
804 			if (errno)
805 				return -errno;
806 			else
807 				return -EINVAL;
808 		}
809 	}
810 	return 0;
811 }
812 
813 static int record__mmap(struct record *rec)
814 {
815 	return record__mmap_evlist(rec, rec->evlist);
816 }
817 
818 static int record__open(struct record *rec)
819 {
820 	char msg[BUFSIZ];
821 	struct evsel *pos;
822 	struct evlist *evlist = rec->evlist;
823 	struct perf_session *session = rec->session;
824 	struct record_opts *opts = &rec->opts;
825 	int rc = 0;
826 
827 	/*
828 	 * For initial_delay we need to add a dummy event so that we can track
829 	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
830 	 * real events, the ones asked by the user.
831 	 */
832 	if (opts->initial_delay) {
833 		if (perf_evlist__add_dummy(evlist))
834 			return -ENOMEM;
835 
836 		pos = evlist__first(evlist);
837 		pos->tracking = 0;
838 		pos = evlist__last(evlist);
839 		pos->tracking = 1;
840 		pos->core.attr.enable_on_exec = 1;
841 	}
842 
843 	perf_evlist__config(evlist, opts, &callchain_param);
844 
845 	evlist__for_each_entry(evlist, pos) {
846 try_again:
847 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
848 			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
849 				if (verbose > 0)
850 					ui__warning("%s\n", msg);
851 				goto try_again;
852 			}
853 			if ((errno == EINVAL || errno == EBADF) &&
854 			    pos->leader != pos &&
855 			    pos->weak_group) {
856 			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
857 				goto try_again;
858 			}
859 			rc = -errno;
860 			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
861 			ui__error("%s\n", msg);
862 			goto out;
863 		}
864 
865 		pos->supported = true;
866 	}
867 
868 	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
869 		pr_warning(
870 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
871 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
872 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
873 "file is not found in the buildid cache or in the vmlinux path.\n\n"
874 "Samples in kernel modules won't be resolved at all.\n\n"
875 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
876 "even with a suitable vmlinux or kallsyms file.\n\n");
877 	}
878 
879 	if (perf_evlist__apply_filters(evlist, &pos)) {
880 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
881 			pos->filter, evsel__name(pos), errno,
882 			str_error_r(errno, msg, sizeof(msg)));
883 		rc = -1;
884 		goto out;
885 	}
886 
887 	rc = record__mmap(rec);
888 	if (rc)
889 		goto out;
890 
891 	session->evlist = evlist;
892 	perf_session__set_id_hdr_size(session);
893 out:
894 	return rc;
895 }
896 
897 static int process_sample_event(struct perf_tool *tool,
898 				union perf_event *event,
899 				struct perf_sample *sample,
900 				struct evsel *evsel,
901 				struct machine *machine)
902 {
903 	struct record *rec = container_of(tool, struct record, tool);
904 
905 	if (rec->evlist->first_sample_time == 0)
906 		rec->evlist->first_sample_time = sample->time;
907 
908 	rec->evlist->last_sample_time = sample->time;
909 
910 	if (rec->buildid_all)
911 		return 0;
912 
913 	rec->samples++;
914 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
915 }
916 
917 static int process_buildids(struct record *rec)
918 {
919 	struct perf_session *session = rec->session;
920 
921 	if (perf_data__size(&rec->data) == 0)
922 		return 0;
923 
924 	/*
925 	 * During this process, it'll load kernel map and replace the
926 	 * dso->long_name to a real pathname it found.  In this case
927 	 * we prefer the vmlinux path like
928 	 *   /lib/modules/3.16.4/build/vmlinux
929 	 *
930 	 * rather than build-id path (in debug directory).
931 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
932 	 */
933 	symbol_conf.ignore_vmlinux_buildid = true;
934 
935 	/*
936 	 * If --buildid-all is given, it marks all DSO regardless of hits,
937 	 * so no need to process samples. But if timestamp_boundary is enabled,
938 	 * it still needs to walk on all samples to get the timestamps of
939 	 * first/last samples.
940 	 */
941 	if (rec->buildid_all && !rec->timestamp_boundary)
942 		rec->tool.sample = NULL;
943 
944 	return perf_session__process_events(session);
945 }
946 
947 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
948 {
949 	int err;
950 	struct perf_tool *tool = data;
951 	/*
952 	 *As for guest kernel when processing subcommand record&report,
953 	 *we arrange module mmap prior to guest kernel mmap and trigger
954 	 *a preload dso because default guest module symbols are loaded
955 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
956 	 *method is used to avoid symbol missing when the first addr is
957 	 *in module instead of in guest kernel.
958 	 */
959 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
960 					     machine);
961 	if (err < 0)
962 		pr_err("Couldn't record guest kernel [%d]'s reference"
963 		       " relocation symbol.\n", machine->pid);
964 
965 	/*
966 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
967 	 * have no _text sometimes.
968 	 */
969 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
970 						 machine);
971 	if (err < 0)
972 		pr_err("Couldn't record guest kernel [%d]'s reference"
973 		       " relocation symbol.\n", machine->pid);
974 }
975 
976 static struct perf_event_header finished_round_event = {
977 	.size = sizeof(struct perf_event_header),
978 	.type = PERF_RECORD_FINISHED_ROUND,
979 };
980 
981 static void record__adjust_affinity(struct record *rec, struct mmap *map)
982 {
983 	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
984 	    !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
985 			  rec->affinity_mask.nbits)) {
986 		bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
987 		bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
988 			  map->affinity_mask.bits, rec->affinity_mask.nbits);
989 		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
990 				  (cpu_set_t *)rec->affinity_mask.bits);
991 		if (verbose == 2)
992 			mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
993 	}
994 }
995 
996 static size_t process_comp_header(void *record, size_t increment)
997 {
998 	struct perf_record_compressed *event = record;
999 	size_t size = sizeof(*event);
1000 
1001 	if (increment) {
1002 		event->header.size += increment;
1003 		return increment;
1004 	}
1005 
1006 	event->header.type = PERF_RECORD_COMPRESSED;
1007 	event->header.size = size;
1008 
1009 	return size;
1010 }
1011 
1012 static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1013 			    void *src, size_t src_size)
1014 {
1015 	size_t compressed;
1016 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1017 
1018 	compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1019 						     max_record_size, process_comp_header);
1020 
1021 	session->bytes_transferred += src_size;
1022 	session->bytes_compressed  += compressed;
1023 
1024 	return compressed;
1025 }
1026 
1027 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1028 				    bool overwrite, bool synch)
1029 {
1030 	u64 bytes_written = rec->bytes_written;
1031 	int i;
1032 	int rc = 0;
1033 	struct mmap *maps;
1034 	int trace_fd = rec->data.file.fd;
1035 	off_t off = 0;
1036 
1037 	if (!evlist)
1038 		return 0;
1039 
1040 	maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1041 	if (!maps)
1042 		return 0;
1043 
1044 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1045 		return 0;
1046 
1047 	if (record__aio_enabled(rec))
1048 		off = record__aio_get_pos(trace_fd);
1049 
1050 	for (i = 0; i < evlist->core.nr_mmaps; i++) {
1051 		u64 flush = 0;
1052 		struct mmap *map = &maps[i];
1053 
1054 		if (map->core.base) {
1055 			record__adjust_affinity(rec, map);
1056 			if (synch) {
1057 				flush = map->core.flush;
1058 				map->core.flush = 1;
1059 			}
1060 			if (!record__aio_enabled(rec)) {
1061 				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1062 					if (synch)
1063 						map->core.flush = flush;
1064 					rc = -1;
1065 					goto out;
1066 				}
1067 			} else {
1068 				if (record__aio_push(rec, map, &off) < 0) {
1069 					record__aio_set_pos(trace_fd, off);
1070 					if (synch)
1071 						map->core.flush = flush;
1072 					rc = -1;
1073 					goto out;
1074 				}
1075 			}
1076 			if (synch)
1077 				map->core.flush = flush;
1078 		}
1079 
1080 		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1081 		    !rec->opts.auxtrace_sample_mode &&
1082 		    record__auxtrace_mmap_read(rec, map) != 0) {
1083 			rc = -1;
1084 			goto out;
1085 		}
1086 	}
1087 
1088 	if (record__aio_enabled(rec))
1089 		record__aio_set_pos(trace_fd, off);
1090 
1091 	/*
1092 	 * Mark the round finished in case we wrote
1093 	 * at least one event.
1094 	 */
1095 	if (bytes_written != rec->bytes_written)
1096 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1097 
1098 	if (overwrite)
1099 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1100 out:
1101 	return rc;
1102 }
1103 
1104 static int record__mmap_read_all(struct record *rec, bool synch)
1105 {
1106 	int err;
1107 
1108 	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1109 	if (err)
1110 		return err;
1111 
1112 	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1113 }
1114 
1115 static void record__init_features(struct record *rec)
1116 {
1117 	struct perf_session *session = rec->session;
1118 	int feat;
1119 
1120 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1121 		perf_header__set_feat(&session->header, feat);
1122 
1123 	if (rec->no_buildid)
1124 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1125 
1126 	if (!have_tracepoints(&rec->evlist->core.entries))
1127 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1128 
1129 	if (!rec->opts.branch_stack)
1130 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1131 
1132 	if (!rec->opts.full_auxtrace)
1133 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1134 
1135 	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1136 		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1137 
1138 	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1139 	if (!record__comp_enabled(rec))
1140 		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1141 
1142 	perf_header__clear_feat(&session->header, HEADER_STAT);
1143 }
1144 
1145 static void
1146 record__finish_output(struct record *rec)
1147 {
1148 	struct perf_data *data = &rec->data;
1149 	int fd = perf_data__fd(data);
1150 
1151 	if (data->is_pipe)
1152 		return;
1153 
1154 	rec->session->header.data_size += rec->bytes_written;
1155 	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1156 
1157 	if (!rec->no_buildid) {
1158 		process_buildids(rec);
1159 
1160 		if (rec->buildid_all)
1161 			dsos__hit_all(rec->session);
1162 	}
1163 	perf_session__write_header(rec->session, rec->evlist, fd, true);
1164 
1165 	return;
1166 }
1167 
1168 static int record__synthesize_workload(struct record *rec, bool tail)
1169 {
1170 	int err;
1171 	struct perf_thread_map *thread_map;
1172 
1173 	if (rec->opts.tail_synthesize != tail)
1174 		return 0;
1175 
1176 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1177 	if (thread_map == NULL)
1178 		return -1;
1179 
1180 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1181 						 process_synthesized_event,
1182 						 &rec->session->machines.host,
1183 						 rec->opts.sample_address);
1184 	perf_thread_map__put(thread_map);
1185 	return err;
1186 }
1187 
1188 static int record__synthesize(struct record *rec, bool tail);
1189 
1190 static int
1191 record__switch_output(struct record *rec, bool at_exit)
1192 {
1193 	struct perf_data *data = &rec->data;
1194 	int fd, err;
1195 	char *new_filename;
1196 
1197 	/* Same Size:      "2015122520103046"*/
1198 	char timestamp[] = "InvalidTimestamp";
1199 
1200 	record__aio_mmap_read_sync(rec);
1201 
1202 	record__synthesize(rec, true);
1203 	if (target__none(&rec->opts.target))
1204 		record__synthesize_workload(rec, true);
1205 
1206 	rec->samples = 0;
1207 	record__finish_output(rec);
1208 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1209 	if (err) {
1210 		pr_err("Failed to get current timestamp\n");
1211 		return -EINVAL;
1212 	}
1213 
1214 	fd = perf_data__switch(data, timestamp,
1215 				    rec->session->header.data_offset,
1216 				    at_exit, &new_filename);
1217 	if (fd >= 0 && !at_exit) {
1218 		rec->bytes_written = 0;
1219 		rec->session->header.data_size = 0;
1220 	}
1221 
1222 	if (!quiet)
1223 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1224 			data->path, timestamp);
1225 
1226 	if (rec->switch_output.num_files) {
1227 		int n = rec->switch_output.cur_file + 1;
1228 
1229 		if (n >= rec->switch_output.num_files)
1230 			n = 0;
1231 		rec->switch_output.cur_file = n;
1232 		if (rec->switch_output.filenames[n]) {
1233 			remove(rec->switch_output.filenames[n]);
1234 			zfree(&rec->switch_output.filenames[n]);
1235 		}
1236 		rec->switch_output.filenames[n] = new_filename;
1237 	} else {
1238 		free(new_filename);
1239 	}
1240 
1241 	/* Output tracking events */
1242 	if (!at_exit) {
1243 		record__synthesize(rec, false);
1244 
1245 		/*
1246 		 * In 'perf record --switch-output' without -a,
1247 		 * record__synthesize() in record__switch_output() won't
1248 		 * generate tracking events because there's no thread_map
1249 		 * in evlist. Which causes newly created perf.data doesn't
1250 		 * contain map and comm information.
1251 		 * Create a fake thread_map and directly call
1252 		 * perf_event__synthesize_thread_map() for those events.
1253 		 */
1254 		if (target__none(&rec->opts.target))
1255 			record__synthesize_workload(rec, false);
1256 	}
1257 	return fd;
1258 }
1259 
1260 static volatile int workload_exec_errno;
1261 
1262 /*
1263  * perf_evlist__prepare_workload will send a SIGUSR1
1264  * if the fork fails, since we asked by setting its
1265  * want_signal to true.
1266  */
1267 static void workload_exec_failed_signal(int signo __maybe_unused,
1268 					siginfo_t *info,
1269 					void *ucontext __maybe_unused)
1270 {
1271 	workload_exec_errno = info->si_value.sival_int;
1272 	done = 1;
1273 	child_finished = 1;
1274 }
1275 
1276 static void snapshot_sig_handler(int sig);
1277 static void alarm_sig_handler(int sig);
1278 
1279 static const struct perf_event_mmap_page *
1280 perf_evlist__pick_pc(struct evlist *evlist)
1281 {
1282 	if (evlist) {
1283 		if (evlist->mmap && evlist->mmap[0].core.base)
1284 			return evlist->mmap[0].core.base;
1285 		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1286 			return evlist->overwrite_mmap[0].core.base;
1287 	}
1288 	return NULL;
1289 }
1290 
1291 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1292 {
1293 	const struct perf_event_mmap_page *pc;
1294 
1295 	pc = perf_evlist__pick_pc(rec->evlist);
1296 	if (pc)
1297 		return pc;
1298 	return NULL;
1299 }
1300 
1301 static int record__synthesize(struct record *rec, bool tail)
1302 {
1303 	struct perf_session *session = rec->session;
1304 	struct machine *machine = &session->machines.host;
1305 	struct perf_data *data = &rec->data;
1306 	struct record_opts *opts = &rec->opts;
1307 	struct perf_tool *tool = &rec->tool;
1308 	int fd = perf_data__fd(data);
1309 	int err = 0;
1310 	event_op f = process_synthesized_event;
1311 
1312 	if (rec->opts.tail_synthesize != tail)
1313 		return 0;
1314 
1315 	if (data->is_pipe) {
1316 		/*
1317 		 * We need to synthesize events first, because some
1318 		 * features works on top of them (on report side).
1319 		 */
1320 		err = perf_event__synthesize_attrs(tool, rec->evlist,
1321 						   process_synthesized_event);
1322 		if (err < 0) {
1323 			pr_err("Couldn't synthesize attrs.\n");
1324 			goto out;
1325 		}
1326 
1327 		err = perf_event__synthesize_features(tool, session, rec->evlist,
1328 						      process_synthesized_event);
1329 		if (err < 0) {
1330 			pr_err("Couldn't synthesize features.\n");
1331 			return err;
1332 		}
1333 
1334 		if (have_tracepoints(&rec->evlist->core.entries)) {
1335 			/*
1336 			 * FIXME err <= 0 here actually means that
1337 			 * there were no tracepoints so its not really
1338 			 * an error, just that we don't need to
1339 			 * synthesize anything.  We really have to
1340 			 * return this more properly and also
1341 			 * propagate errors that now are calling die()
1342 			 */
1343 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
1344 								  process_synthesized_event);
1345 			if (err <= 0) {
1346 				pr_err("Couldn't record tracing data.\n");
1347 				goto out;
1348 			}
1349 			rec->bytes_written += err;
1350 		}
1351 	}
1352 
1353 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1354 					  process_synthesized_event, machine);
1355 	if (err)
1356 		goto out;
1357 
1358 	/* Synthesize id_index before auxtrace_info */
1359 	if (rec->opts.auxtrace_sample_mode) {
1360 		err = perf_event__synthesize_id_index(tool,
1361 						      process_synthesized_event,
1362 						      session->evlist, machine);
1363 		if (err)
1364 			goto out;
1365 	}
1366 
1367 	if (rec->opts.full_auxtrace) {
1368 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1369 					session, process_synthesized_event);
1370 		if (err)
1371 			goto out;
1372 	}
1373 
1374 	if (!perf_evlist__exclude_kernel(rec->evlist)) {
1375 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1376 							 machine);
1377 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1378 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1379 				   "Check /proc/kallsyms permission or run as root.\n");
1380 
1381 		err = perf_event__synthesize_modules(tool, process_synthesized_event,
1382 						     machine);
1383 		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1384 				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1385 				   "Check /proc/modules permission or run as root.\n");
1386 	}
1387 
1388 	if (perf_guest) {
1389 		machines__process_guests(&session->machines,
1390 					 perf_event__synthesize_guest_os, tool);
1391 	}
1392 
1393 	err = perf_event__synthesize_extra_attr(&rec->tool,
1394 						rec->evlist,
1395 						process_synthesized_event,
1396 						data->is_pipe);
1397 	if (err)
1398 		goto out;
1399 
1400 	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1401 						 process_synthesized_event,
1402 						NULL);
1403 	if (err < 0) {
1404 		pr_err("Couldn't synthesize thread map.\n");
1405 		return err;
1406 	}
1407 
1408 	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1409 					     process_synthesized_event, NULL);
1410 	if (err < 0) {
1411 		pr_err("Couldn't synthesize cpu map.\n");
1412 		return err;
1413 	}
1414 
1415 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1416 						machine, opts);
1417 	if (err < 0)
1418 		pr_warning("Couldn't synthesize bpf events.\n");
1419 
1420 	err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1421 					     machine);
1422 	if (err < 0)
1423 		pr_warning("Couldn't synthesize cgroup events.\n");
1424 
1425 	if (rec->opts.nr_threads_synthesize > 1) {
1426 		perf_set_multithreaded();
1427 		f = process_locked_synthesized_event;
1428 	}
1429 
1430 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1431 					    f, opts->sample_address,
1432 					    rec->opts.nr_threads_synthesize);
1433 
1434 	if (rec->opts.nr_threads_synthesize > 1)
1435 		perf_set_singlethreaded();
1436 
1437 out:
1438 	return err;
1439 }
1440 
1441 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1442 {
1443 	struct record *rec = data;
1444 	pthread_kill(rec->thread_id, SIGUSR2);
1445 	return 0;
1446 }
1447 
1448 static int record__setup_sb_evlist(struct record *rec)
1449 {
1450 	struct record_opts *opts = &rec->opts;
1451 
1452 	if (rec->sb_evlist != NULL) {
1453 		/*
1454 		 * We get here if --switch-output-event populated the
1455 		 * sb_evlist, so associate a callback that will send a SIGUSR2
1456 		 * to the main thread.
1457 		 */
1458 		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1459 		rec->thread_id = pthread_self();
1460 	}
1461 
1462 	if (!opts->no_bpf_event) {
1463 		if (rec->sb_evlist == NULL) {
1464 			rec->sb_evlist = evlist__new();
1465 
1466 			if (rec->sb_evlist == NULL) {
1467 				pr_err("Couldn't create side band evlist.\n.");
1468 				return -1;
1469 			}
1470 		}
1471 
1472 		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1473 			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1474 			return -1;
1475 		}
1476 	}
1477 
1478 	if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1479 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1480 		opts->no_bpf_event = true;
1481 	}
1482 
1483 	return 0;
1484 }
1485 
1486 static int __cmd_record(struct record *rec, int argc, const char **argv)
1487 {
1488 	int err;
1489 	int status = 0;
1490 	unsigned long waking = 0;
1491 	const bool forks = argc > 0;
1492 	struct perf_tool *tool = &rec->tool;
1493 	struct record_opts *opts = &rec->opts;
1494 	struct perf_data *data = &rec->data;
1495 	struct perf_session *session;
1496 	bool disabled = false, draining = false;
1497 	int fd;
1498 	float ratio = 0;
1499 
1500 	atexit(record__sig_exit);
1501 	signal(SIGCHLD, sig_handler);
1502 	signal(SIGINT, sig_handler);
1503 	signal(SIGTERM, sig_handler);
1504 	signal(SIGSEGV, sigsegv_handler);
1505 
1506 	if (rec->opts.record_namespaces)
1507 		tool->namespace_events = true;
1508 
1509 	if (rec->opts.record_cgroup) {
1510 #ifdef HAVE_FILE_HANDLE
1511 		tool->cgroup_events = true;
1512 #else
1513 		pr_err("cgroup tracking is not supported\n");
1514 		return -1;
1515 #endif
1516 	}
1517 
1518 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1519 		signal(SIGUSR2, snapshot_sig_handler);
1520 		if (rec->opts.auxtrace_snapshot_mode)
1521 			trigger_on(&auxtrace_snapshot_trigger);
1522 		if (rec->switch_output.enabled)
1523 			trigger_on(&switch_output_trigger);
1524 	} else {
1525 		signal(SIGUSR2, SIG_IGN);
1526 	}
1527 
1528 	session = perf_session__new(data, false, tool);
1529 	if (IS_ERR(session)) {
1530 		pr_err("Perf session creation failed.\n");
1531 		return PTR_ERR(session);
1532 	}
1533 
1534 	fd = perf_data__fd(data);
1535 	rec->session = session;
1536 
1537 	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1538 		pr_err("Compression initialization failed.\n");
1539 		return -1;
1540 	}
1541 
1542 	session->header.env.comp_type  = PERF_COMP_ZSTD;
1543 	session->header.env.comp_level = rec->opts.comp_level;
1544 
1545 	if (rec->opts.kcore &&
1546 	    !record__kcore_readable(&session->machines.host)) {
1547 		pr_err("ERROR: kcore is not readable.\n");
1548 		return -1;
1549 	}
1550 
1551 	record__init_features(rec);
1552 
1553 	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1554 		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1555 
1556 	if (forks) {
1557 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1558 						    argv, data->is_pipe,
1559 						    workload_exec_failed_signal);
1560 		if (err < 0) {
1561 			pr_err("Couldn't run the workload!\n");
1562 			status = err;
1563 			goto out_delete_session;
1564 		}
1565 	}
1566 
1567 	/*
1568 	 * If we have just single event and are sending data
1569 	 * through pipe, we need to force the ids allocation,
1570 	 * because we synthesize event name through the pipe
1571 	 * and need the id for that.
1572 	 */
1573 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1574 		rec->opts.sample_id = true;
1575 
1576 	if (record__open(rec) != 0) {
1577 		err = -1;
1578 		goto out_child;
1579 	}
1580 	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1581 
1582 	if (rec->opts.kcore) {
1583 		err = record__kcore_copy(&session->machines.host, data);
1584 		if (err) {
1585 			pr_err("ERROR: Failed to copy kcore\n");
1586 			goto out_child;
1587 		}
1588 	}
1589 
1590 	err = bpf__apply_obj_config();
1591 	if (err) {
1592 		char errbuf[BUFSIZ];
1593 
1594 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1595 		pr_err("ERROR: Apply config to BPF failed: %s\n",
1596 			 errbuf);
1597 		goto out_child;
1598 	}
1599 
1600 	/*
1601 	 * Normally perf_session__new would do this, but it doesn't have the
1602 	 * evlist.
1603 	 */
1604 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1605 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1606 		rec->tool.ordered_events = false;
1607 	}
1608 
1609 	if (!rec->evlist->nr_groups)
1610 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1611 
1612 	if (data->is_pipe) {
1613 		err = perf_header__write_pipe(fd);
1614 		if (err < 0)
1615 			goto out_child;
1616 	} else {
1617 		err = perf_session__write_header(session, rec->evlist, fd, false);
1618 		if (err < 0)
1619 			goto out_child;
1620 	}
1621 
1622 	err = -1;
1623 	if (!rec->no_buildid
1624 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1625 		pr_err("Couldn't generate buildids. "
1626 		       "Use --no-buildid to profile anyway.\n");
1627 		goto out_child;
1628 	}
1629 
1630 	err = record__setup_sb_evlist(rec);
1631 	if (err)
1632 		goto out_child;
1633 
1634 	err = record__synthesize(rec, false);
1635 	if (err < 0)
1636 		goto out_child;
1637 
1638 	if (rec->realtime_prio) {
1639 		struct sched_param param;
1640 
1641 		param.sched_priority = rec->realtime_prio;
1642 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1643 			pr_err("Could not set realtime priority.\n");
1644 			err = -1;
1645 			goto out_child;
1646 		}
1647 	}
1648 
1649 	/*
1650 	 * When perf is starting the traced process, all the events
1651 	 * (apart from group members) have enable_on_exec=1 set,
1652 	 * so don't spoil it by prematurely enabling them.
1653 	 */
1654 	if (!target__none(&opts->target) && !opts->initial_delay)
1655 		evlist__enable(rec->evlist);
1656 
1657 	/*
1658 	 * Let the child rip
1659 	 */
1660 	if (forks) {
1661 		struct machine *machine = &session->machines.host;
1662 		union perf_event *event;
1663 		pid_t tgid;
1664 
1665 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1666 		if (event == NULL) {
1667 			err = -ENOMEM;
1668 			goto out_child;
1669 		}
1670 
1671 		/*
1672 		 * Some H/W events are generated before COMM event
1673 		 * which is emitted during exec(), so perf script
1674 		 * cannot see a correct process name for those events.
1675 		 * Synthesize COMM event to prevent it.
1676 		 */
1677 		tgid = perf_event__synthesize_comm(tool, event,
1678 						   rec->evlist->workload.pid,
1679 						   process_synthesized_event,
1680 						   machine);
1681 		free(event);
1682 
1683 		if (tgid == -1)
1684 			goto out_child;
1685 
1686 		event = malloc(sizeof(event->namespaces) +
1687 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1688 			       machine->id_hdr_size);
1689 		if (event == NULL) {
1690 			err = -ENOMEM;
1691 			goto out_child;
1692 		}
1693 
1694 		/*
1695 		 * Synthesize NAMESPACES event for the command specified.
1696 		 */
1697 		perf_event__synthesize_namespaces(tool, event,
1698 						  rec->evlist->workload.pid,
1699 						  tgid, process_synthesized_event,
1700 						  machine);
1701 		free(event);
1702 
1703 		perf_evlist__start_workload(rec->evlist);
1704 	}
1705 
1706 	if (opts->initial_delay) {
1707 		usleep(opts->initial_delay * USEC_PER_MSEC);
1708 		evlist__enable(rec->evlist);
1709 	}
1710 
1711 	trigger_ready(&auxtrace_snapshot_trigger);
1712 	trigger_ready(&switch_output_trigger);
1713 	perf_hooks__invoke_record_start();
1714 	for (;;) {
1715 		unsigned long long hits = rec->samples;
1716 
1717 		/*
1718 		 * rec->evlist->bkw_mmap_state is possible to be
1719 		 * BKW_MMAP_EMPTY here: when done == true and
1720 		 * hits != rec->samples in previous round.
1721 		 *
1722 		 * perf_evlist__toggle_bkw_mmap ensure we never
1723 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1724 		 */
1725 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1726 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1727 
1728 		if (record__mmap_read_all(rec, false) < 0) {
1729 			trigger_error(&auxtrace_snapshot_trigger);
1730 			trigger_error(&switch_output_trigger);
1731 			err = -1;
1732 			goto out_child;
1733 		}
1734 
1735 		if (auxtrace_record__snapshot_started) {
1736 			auxtrace_record__snapshot_started = 0;
1737 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1738 				record__read_auxtrace_snapshot(rec, false);
1739 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1740 				pr_err("AUX area tracing snapshot failed\n");
1741 				err = -1;
1742 				goto out_child;
1743 			}
1744 		}
1745 
1746 		if (trigger_is_hit(&switch_output_trigger)) {
1747 			/*
1748 			 * If switch_output_trigger is hit, the data in
1749 			 * overwritable ring buffer should have been collected,
1750 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1751 			 *
1752 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1753 			 * record__mmap_read_all() didn't collect data from
1754 			 * overwritable ring buffer. Read again.
1755 			 */
1756 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1757 				continue;
1758 			trigger_ready(&switch_output_trigger);
1759 
1760 			/*
1761 			 * Reenable events in overwrite ring buffer after
1762 			 * record__mmap_read_all(): we should have collected
1763 			 * data from it.
1764 			 */
1765 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1766 
1767 			if (!quiet)
1768 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1769 					waking);
1770 			waking = 0;
1771 			fd = record__switch_output(rec, false);
1772 			if (fd < 0) {
1773 				pr_err("Failed to switch to new file\n");
1774 				trigger_error(&switch_output_trigger);
1775 				err = fd;
1776 				goto out_child;
1777 			}
1778 
1779 			/* re-arm the alarm */
1780 			if (rec->switch_output.time)
1781 				alarm(rec->switch_output.time);
1782 		}
1783 
1784 		if (hits == rec->samples) {
1785 			if (done || draining)
1786 				break;
1787 			err = evlist__poll(rec->evlist, -1);
1788 			/*
1789 			 * Propagate error, only if there's any. Ignore positive
1790 			 * number of returned events and interrupt error.
1791 			 */
1792 			if (err > 0 || (err < 0 && errno == EINTR))
1793 				err = 0;
1794 			waking++;
1795 
1796 			if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1797 				draining = true;
1798 		}
1799 
1800 		/*
1801 		 * When perf is starting the traced process, at the end events
1802 		 * die with the process and we wait for that. Thus no need to
1803 		 * disable events in this case.
1804 		 */
1805 		if (done && !disabled && !target__none(&opts->target)) {
1806 			trigger_off(&auxtrace_snapshot_trigger);
1807 			evlist__disable(rec->evlist);
1808 			disabled = true;
1809 		}
1810 	}
1811 
1812 	trigger_off(&auxtrace_snapshot_trigger);
1813 	trigger_off(&switch_output_trigger);
1814 
1815 	if (opts->auxtrace_snapshot_on_exit)
1816 		record__auxtrace_snapshot_exit(rec);
1817 
1818 	if (forks && workload_exec_errno) {
1819 		char msg[STRERR_BUFSIZE];
1820 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1821 		pr_err("Workload failed: %s\n", emsg);
1822 		err = -1;
1823 		goto out_child;
1824 	}
1825 
1826 	if (!quiet)
1827 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1828 
1829 	if (target__none(&rec->opts.target))
1830 		record__synthesize_workload(rec, true);
1831 
1832 out_child:
1833 	record__mmap_read_all(rec, true);
1834 	record__aio_mmap_read_sync(rec);
1835 
1836 	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1837 		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1838 		session->header.env.comp_ratio = ratio + 0.5;
1839 	}
1840 
1841 	if (forks) {
1842 		int exit_status;
1843 
1844 		if (!child_finished)
1845 			kill(rec->evlist->workload.pid, SIGTERM);
1846 
1847 		wait(&exit_status);
1848 
1849 		if (err < 0)
1850 			status = err;
1851 		else if (WIFEXITED(exit_status))
1852 			status = WEXITSTATUS(exit_status);
1853 		else if (WIFSIGNALED(exit_status))
1854 			signr = WTERMSIG(exit_status);
1855 	} else
1856 		status = err;
1857 
1858 	record__synthesize(rec, true);
1859 	/* this will be recalculated during process_buildids() */
1860 	rec->samples = 0;
1861 
1862 	if (!err) {
1863 		if (!rec->timestamp_filename) {
1864 			record__finish_output(rec);
1865 		} else {
1866 			fd = record__switch_output(rec, true);
1867 			if (fd < 0) {
1868 				status = fd;
1869 				goto out_delete_session;
1870 			}
1871 		}
1872 	}
1873 
1874 	perf_hooks__invoke_record_end();
1875 
1876 	if (!err && !quiet) {
1877 		char samples[128];
1878 		const char *postfix = rec->timestamp_filename ?
1879 					".<timestamp>" : "";
1880 
1881 		if (rec->samples && !rec->opts.full_auxtrace)
1882 			scnprintf(samples, sizeof(samples),
1883 				  " (%" PRIu64 " samples)", rec->samples);
1884 		else
1885 			samples[0] = '\0';
1886 
1887 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
1888 			perf_data__size(data) / 1024.0 / 1024.0,
1889 			data->path, postfix, samples);
1890 		if (ratio) {
1891 			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
1892 					rec->session->bytes_transferred / 1024.0 / 1024.0,
1893 					ratio);
1894 		}
1895 		fprintf(stderr, " ]\n");
1896 	}
1897 
1898 out_delete_session:
1899 	zstd_fini(&session->zstd_data);
1900 	perf_session__delete(session);
1901 
1902 	if (!opts->no_bpf_event)
1903 		perf_evlist__stop_sb_thread(rec->sb_evlist);
1904 	return status;
1905 }
1906 
1907 static void callchain_debug(struct callchain_param *callchain)
1908 {
1909 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1910 
1911 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1912 
1913 	if (callchain->record_mode == CALLCHAIN_DWARF)
1914 		pr_debug("callchain: stack dump size %d\n",
1915 			 callchain->dump_size);
1916 }
1917 
1918 int record_opts__parse_callchain(struct record_opts *record,
1919 				 struct callchain_param *callchain,
1920 				 const char *arg, bool unset)
1921 {
1922 	int ret;
1923 	callchain->enabled = !unset;
1924 
1925 	/* --no-call-graph */
1926 	if (unset) {
1927 		callchain->record_mode = CALLCHAIN_NONE;
1928 		pr_debug("callchain: disabled\n");
1929 		return 0;
1930 	}
1931 
1932 	ret = parse_callchain_record_opt(arg, callchain);
1933 	if (!ret) {
1934 		/* Enable data address sampling for DWARF unwind. */
1935 		if (callchain->record_mode == CALLCHAIN_DWARF)
1936 			record->sample_address = true;
1937 		callchain_debug(callchain);
1938 	}
1939 
1940 	return ret;
1941 }
1942 
1943 int record_parse_callchain_opt(const struct option *opt,
1944 			       const char *arg,
1945 			       int unset)
1946 {
1947 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1948 }
1949 
1950 int record_callchain_opt(const struct option *opt,
1951 			 const char *arg __maybe_unused,
1952 			 int unset __maybe_unused)
1953 {
1954 	struct callchain_param *callchain = opt->value;
1955 
1956 	callchain->enabled = true;
1957 
1958 	if (callchain->record_mode == CALLCHAIN_NONE)
1959 		callchain->record_mode = CALLCHAIN_FP;
1960 
1961 	callchain_debug(callchain);
1962 	return 0;
1963 }
1964 
1965 static int perf_record_config(const char *var, const char *value, void *cb)
1966 {
1967 	struct record *rec = cb;
1968 
1969 	if (!strcmp(var, "record.build-id")) {
1970 		if (!strcmp(value, "cache"))
1971 			rec->no_buildid_cache = false;
1972 		else if (!strcmp(value, "no-cache"))
1973 			rec->no_buildid_cache = true;
1974 		else if (!strcmp(value, "skip"))
1975 			rec->no_buildid = true;
1976 		else
1977 			return -1;
1978 		return 0;
1979 	}
1980 	if (!strcmp(var, "record.call-graph")) {
1981 		var = "call-graph.record-mode";
1982 		return perf_default_config(var, value, cb);
1983 	}
1984 #ifdef HAVE_AIO_SUPPORT
1985 	if (!strcmp(var, "record.aio")) {
1986 		rec->opts.nr_cblocks = strtol(value, NULL, 0);
1987 		if (!rec->opts.nr_cblocks)
1988 			rec->opts.nr_cblocks = nr_cblocks_default;
1989 	}
1990 #endif
1991 
1992 	return 0;
1993 }
1994 
1995 struct clockid_map {
1996 	const char *name;
1997 	int clockid;
1998 };
1999 
2000 #define CLOCKID_MAP(n, c)	\
2001 	{ .name = n, .clockid = (c), }
2002 
2003 #define CLOCKID_END	{ .name = NULL, }
2004 
2005 
2006 /*
2007  * Add the missing ones, we need to build on many distros...
2008  */
2009 #ifndef CLOCK_MONOTONIC_RAW
2010 #define CLOCK_MONOTONIC_RAW 4
2011 #endif
2012 #ifndef CLOCK_BOOTTIME
2013 #define CLOCK_BOOTTIME 7
2014 #endif
2015 #ifndef CLOCK_TAI
2016 #define CLOCK_TAI 11
2017 #endif
2018 
2019 static const struct clockid_map clockids[] = {
2020 	/* available for all events, NMI safe */
2021 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
2022 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
2023 
2024 	/* available for some events */
2025 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
2026 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
2027 	CLOCKID_MAP("tai", CLOCK_TAI),
2028 
2029 	/* available for the lazy */
2030 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
2031 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
2032 	CLOCKID_MAP("real", CLOCK_REALTIME),
2033 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
2034 
2035 	CLOCKID_END,
2036 };
2037 
2038 static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
2039 {
2040 	struct timespec res;
2041 
2042 	*res_ns = 0;
2043 	if (!clock_getres(clk_id, &res))
2044 		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
2045 	else
2046 		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
2047 
2048 	return 0;
2049 }
2050 
2051 static int parse_clockid(const struct option *opt, const char *str, int unset)
2052 {
2053 	struct record_opts *opts = (struct record_opts *)opt->value;
2054 	const struct clockid_map *cm;
2055 	const char *ostr = str;
2056 
2057 	if (unset) {
2058 		opts->use_clockid = 0;
2059 		return 0;
2060 	}
2061 
2062 	/* no arg passed */
2063 	if (!str)
2064 		return 0;
2065 
2066 	/* no setting it twice */
2067 	if (opts->use_clockid)
2068 		return -1;
2069 
2070 	opts->use_clockid = true;
2071 
2072 	/* if its a number, we're done */
2073 	if (sscanf(str, "%d", &opts->clockid) == 1)
2074 		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
2075 
2076 	/* allow a "CLOCK_" prefix to the name */
2077 	if (!strncasecmp(str, "CLOCK_", 6))
2078 		str += 6;
2079 
2080 	for (cm = clockids; cm->name; cm++) {
2081 		if (!strcasecmp(str, cm->name)) {
2082 			opts->clockid = cm->clockid;
2083 			return get_clockid_res(opts->clockid,
2084 					       &opts->clockid_res_ns);
2085 		}
2086 	}
2087 
2088 	opts->use_clockid = false;
2089 	ui__warning("unknown clockid %s, check man page\n", ostr);
2090 	return -1;
2091 }
2092 
2093 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2094 {
2095 	struct record_opts *opts = (struct record_opts *)opt->value;
2096 
2097 	if (unset || !str)
2098 		return 0;
2099 
2100 	if (!strcasecmp(str, "node"))
2101 		opts->affinity = PERF_AFFINITY_NODE;
2102 	else if (!strcasecmp(str, "cpu"))
2103 		opts->affinity = PERF_AFFINITY_CPU;
2104 
2105 	return 0;
2106 }
2107 
2108 static int parse_output_max_size(const struct option *opt,
2109 				 const char *str, int unset)
2110 {
2111 	unsigned long *s = (unsigned long *)opt->value;
2112 	static struct parse_tag tags_size[] = {
2113 		{ .tag  = 'B', .mult = 1       },
2114 		{ .tag  = 'K', .mult = 1 << 10 },
2115 		{ .tag  = 'M', .mult = 1 << 20 },
2116 		{ .tag  = 'G', .mult = 1 << 30 },
2117 		{ .tag  = 0 },
2118 	};
2119 	unsigned long val;
2120 
2121 	if (unset) {
2122 		*s = 0;
2123 		return 0;
2124 	}
2125 
2126 	val = parse_tag_value(str, tags_size);
2127 	if (val != (unsigned long) -1) {
2128 		*s = val;
2129 		return 0;
2130 	}
2131 
2132 	return -1;
2133 }
2134 
2135 static int record__parse_mmap_pages(const struct option *opt,
2136 				    const char *str,
2137 				    int unset __maybe_unused)
2138 {
2139 	struct record_opts *opts = opt->value;
2140 	char *s, *p;
2141 	unsigned int mmap_pages;
2142 	int ret;
2143 
2144 	if (!str)
2145 		return -EINVAL;
2146 
2147 	s = strdup(str);
2148 	if (!s)
2149 		return -ENOMEM;
2150 
2151 	p = strchr(s, ',');
2152 	if (p)
2153 		*p = '\0';
2154 
2155 	if (*s) {
2156 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2157 		if (ret)
2158 			goto out_free;
2159 		opts->mmap_pages = mmap_pages;
2160 	}
2161 
2162 	if (!p) {
2163 		ret = 0;
2164 		goto out_free;
2165 	}
2166 
2167 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2168 	if (ret)
2169 		goto out_free;
2170 
2171 	opts->auxtrace_mmap_pages = mmap_pages;
2172 
2173 out_free:
2174 	free(s);
2175 	return ret;
2176 }
2177 
2178 static void switch_output_size_warn(struct record *rec)
2179 {
2180 	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2181 	struct switch_output *s = &rec->switch_output;
2182 
2183 	wakeup_size /= 2;
2184 
2185 	if (s->size < wakeup_size) {
2186 		char buf[100];
2187 
2188 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2189 		pr_warning("WARNING: switch-output data size lower than "
2190 			   "wakeup kernel buffer size (%s) "
2191 			   "expect bigger perf.data sizes\n", buf);
2192 	}
2193 }
2194 
2195 static int switch_output_setup(struct record *rec)
2196 {
2197 	struct switch_output *s = &rec->switch_output;
2198 	static struct parse_tag tags_size[] = {
2199 		{ .tag  = 'B', .mult = 1       },
2200 		{ .tag  = 'K', .mult = 1 << 10 },
2201 		{ .tag  = 'M', .mult = 1 << 20 },
2202 		{ .tag  = 'G', .mult = 1 << 30 },
2203 		{ .tag  = 0 },
2204 	};
2205 	static struct parse_tag tags_time[] = {
2206 		{ .tag  = 's', .mult = 1        },
2207 		{ .tag  = 'm', .mult = 60       },
2208 		{ .tag  = 'h', .mult = 60*60    },
2209 		{ .tag  = 'd', .mult = 60*60*24 },
2210 		{ .tag  = 0 },
2211 	};
2212 	unsigned long val;
2213 
2214 	/*
2215 	 * If we're using --switch-output-events, then we imply its
2216 	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2217 	 *  thread to its parent.
2218 	 */
2219 	if (rec->switch_output_event_set)
2220 		goto do_signal;
2221 
2222 	if (!s->set)
2223 		return 0;
2224 
2225 	if (!strcmp(s->str, "signal")) {
2226 do_signal:
2227 		s->signal = true;
2228 		pr_debug("switch-output with SIGUSR2 signal\n");
2229 		goto enabled;
2230 	}
2231 
2232 	val = parse_tag_value(s->str, tags_size);
2233 	if (val != (unsigned long) -1) {
2234 		s->size = val;
2235 		pr_debug("switch-output with %s size threshold\n", s->str);
2236 		goto enabled;
2237 	}
2238 
2239 	val = parse_tag_value(s->str, tags_time);
2240 	if (val != (unsigned long) -1) {
2241 		s->time = val;
2242 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2243 			 s->str, s->time);
2244 		goto enabled;
2245 	}
2246 
2247 	return -1;
2248 
2249 enabled:
2250 	rec->timestamp_filename = true;
2251 	s->enabled              = true;
2252 
2253 	if (s->size && !rec->opts.no_buffering)
2254 		switch_output_size_warn(rec);
2255 
2256 	return 0;
2257 }
2258 
2259 static const char * const __record_usage[] = {
2260 	"perf record [<options>] [<command>]",
2261 	"perf record [<options>] -- <command> [<options>]",
2262 	NULL
2263 };
2264 const char * const *record_usage = __record_usage;
2265 
2266 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2267 				  struct perf_sample *sample, struct machine *machine)
2268 {
2269 	/*
2270 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2271 	 * no need to add them twice.
2272 	 */
2273 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2274 		return 0;
2275 	return perf_event__process_mmap(tool, event, sample, machine);
2276 }
2277 
2278 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2279 				   struct perf_sample *sample, struct machine *machine)
2280 {
2281 	/*
2282 	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2283 	 * no need to add them twice.
2284 	 */
2285 	if (!(event->header.misc & PERF_RECORD_MISC_USER))
2286 		return 0;
2287 
2288 	return perf_event__process_mmap2(tool, event, sample, machine);
2289 }
2290 
2291 /*
2292  * XXX Ideally would be local to cmd_record() and passed to a record__new
2293  * because we need to have access to it in record__exit, that is called
2294  * after cmd_record() exits, but since record_options need to be accessible to
2295  * builtin-script, leave it here.
2296  *
2297  * At least we don't ouch it in all the other functions here directly.
2298  *
2299  * Just say no to tons of global variables, sigh.
2300  */
2301 static struct record record = {
2302 	.opts = {
2303 		.sample_time	     = true,
2304 		.mmap_pages	     = UINT_MAX,
2305 		.user_freq	     = UINT_MAX,
2306 		.user_interval	     = ULLONG_MAX,
2307 		.freq		     = 4000,
2308 		.target		     = {
2309 			.uses_mmap   = true,
2310 			.default_per_cpu = true,
2311 		},
2312 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
2313 		.nr_threads_synthesize = 1,
2314 	},
2315 	.tool = {
2316 		.sample		= process_sample_event,
2317 		.fork		= perf_event__process_fork,
2318 		.exit		= perf_event__process_exit,
2319 		.comm		= perf_event__process_comm,
2320 		.namespaces	= perf_event__process_namespaces,
2321 		.mmap		= build_id__process_mmap,
2322 		.mmap2		= build_id__process_mmap2,
2323 		.ordered_events	= true,
2324 	},
2325 };
2326 
2327 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2328 	"\n\t\t\t\tDefault: fp";
2329 
2330 static bool dry_run;
2331 
2332 /*
2333  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2334  * with it and switch to use the library functions in perf_evlist that came
2335  * from builtin-record.c, i.e. use record_opts,
2336  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2337  * using pipes, etc.
2338  */
2339 static struct option __record_options[] = {
2340 	OPT_CALLBACK('e', "event", &record.evlist, "event",
2341 		     "event selector. use 'perf list' to list available events",
2342 		     parse_events_option),
2343 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2344 		     "event filter", parse_filter),
2345 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2346 			   NULL, "don't record events from perf itself",
2347 			   exclude_perf),
2348 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2349 		    "record events on existing process id"),
2350 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2351 		    "record events on existing thread id"),
2352 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
2353 		    "collect data with this RT SCHED_FIFO priority"),
2354 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2355 		    "collect data without buffering"),
2356 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2357 		    "collect raw sample records from all opened counters"),
2358 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2359 			    "system-wide collection from all CPUs"),
2360 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2361 		    "list of cpus to monitor"),
2362 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2363 	OPT_STRING('o', "output", &record.data.path, "file",
2364 		    "output file name"),
2365 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2366 			&record.opts.no_inherit_set,
2367 			"child tasks do not inherit counters"),
2368 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2369 		    "synthesize non-sample events at the end of output"),
2370 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2371 	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
2372 	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2373 		    "Fail if the specified frequency can't be used"),
2374 	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2375 		     "profile at this frequency",
2376 		      record__parse_freq),
2377 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2378 		     "number of mmap data pages and AUX area tracing mmap pages",
2379 		     record__parse_mmap_pages),
2380 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2381 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2382 		     record__mmap_flush_parse),
2383 	OPT_BOOLEAN(0, "group", &record.opts.group,
2384 		    "put the counters into a counter group"),
2385 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2386 			   NULL, "enables call-graph recording" ,
2387 			   &record_callchain_opt),
2388 	OPT_CALLBACK(0, "call-graph", &record.opts,
2389 		     "record_mode[,record_size]", record_callchain_help,
2390 		     &record_parse_callchain_opt),
2391 	OPT_INCR('v', "verbose", &verbose,
2392 		    "be more verbose (show counter open errors, etc)"),
2393 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2394 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2395 		    "per thread counts"),
2396 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2397 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2398 		    "Record the sample physical addresses"),
2399 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2400 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2401 			&record.opts.sample_time_set,
2402 			"Record the sample timestamps"),
2403 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2404 			"Record the sample period"),
2405 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2406 		    "don't sample"),
2407 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2408 			&record.no_buildid_cache_set,
2409 			"do not update the buildid cache"),
2410 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2411 			&record.no_buildid_set,
2412 			"do not collect buildids in perf.data"),
2413 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2414 		     "monitor event in cgroup name only",
2415 		     parse_cgroups),
2416 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
2417 		  "ms to wait before starting measurement after program start"),
2418 	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2419 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2420 		   "user to profile"),
2421 
2422 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2423 		     "branch any", "sample any taken branches",
2424 		     parse_branch_stack),
2425 
2426 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2427 		     "branch filter mask", "branch stack filter modes",
2428 		     parse_branch_stack),
2429 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2430 		    "sample by weight (on special events only)"),
2431 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2432 		    "sample transaction flags (special events only)"),
2433 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2434 		    "use per-thread mmaps"),
2435 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2436 		    "sample selected machine registers on interrupt,"
2437 		    " use '-I?' to list register names", parse_intr_regs),
2438 	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2439 		    "sample selected machine registers on interrupt,"
2440 		    " use '--user-regs=?' to list register names", parse_user_regs),
2441 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2442 		    "Record running/enabled time of read (:S) events"),
2443 	OPT_CALLBACK('k', "clockid", &record.opts,
2444 	"clockid", "clockid to use for events, see clock_gettime()",
2445 	parse_clockid),
2446 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2447 			  "opts", "AUX area tracing Snapshot Mode", ""),
2448 	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2449 			  "opts", "sample AUX area", ""),
2450 	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2451 			"per thread proc mmap processing timeout in ms"),
2452 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2453 		    "Record namespaces events"),
2454 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2455 		    "Record cgroup events"),
2456 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
2457 		    "Record context switch events"),
2458 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2459 			 "Configure all used events to run in kernel space.",
2460 			 PARSE_OPT_EXCLUSIVE),
2461 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2462 			 "Configure all used events to run in user space.",
2463 			 PARSE_OPT_EXCLUSIVE),
2464 	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2465 		    "collect kernel callchains"),
2466 	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2467 		    "collect user callchains"),
2468 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2469 		   "clang binary to use for compiling BPF scriptlets"),
2470 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2471 		   "options passed to clang when compiling BPF scriptlets"),
2472 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2473 		   "file", "vmlinux pathname"),
2474 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2475 		    "Record build-id of all DSOs regardless of hits"),
2476 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2477 		    "append timestamp to output filename"),
2478 	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2479 		    "Record timestamp boundary (time of first/last samples)"),
2480 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2481 			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2482 			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2483 			  "signal"),
2484 	OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2485 			 "switch output event selector. use 'perf list' to list available events",
2486 			 parse_events_option_new_evlist),
2487 	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2488 		   "Limit number of switch output generated files"),
2489 	OPT_BOOLEAN(0, "dry-run", &dry_run,
2490 		    "Parse options then exit"),
2491 #ifdef HAVE_AIO_SUPPORT
2492 	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2493 		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2494 		     record__aio_parse),
2495 #endif
2496 	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2497 		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2498 		     record__parse_affinity),
2499 #ifdef HAVE_ZSTD_SUPPORT
2500 	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2501 			    "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2502 			    record__parse_comp_level),
2503 #endif
2504 	OPT_CALLBACK(0, "max-size", &record.output_max_size,
2505 		     "size", "Limit the maximum size of the output file", parse_output_max_size),
2506 	OPT_UINTEGER(0, "num-thread-synthesize",
2507 		     &record.opts.nr_threads_synthesize,
2508 		     "number of threads to run for event synthesis"),
2509 	OPT_END()
2510 };
2511 
2512 struct option *record_options = __record_options;
2513 
2514 int cmd_record(int argc, const char **argv)
2515 {
2516 	int err;
2517 	struct record *rec = &record;
2518 	char errbuf[BUFSIZ];
2519 
2520 	setlocale(LC_ALL, "");
2521 
2522 #ifndef HAVE_LIBBPF_SUPPORT
2523 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2524 	set_nobuild('\0', "clang-path", true);
2525 	set_nobuild('\0', "clang-opt", true);
2526 # undef set_nobuild
2527 #endif
2528 
2529 #ifndef HAVE_BPF_PROLOGUE
2530 # if !defined (HAVE_DWARF_SUPPORT)
2531 #  define REASON  "NO_DWARF=1"
2532 # elif !defined (HAVE_LIBBPF_SUPPORT)
2533 #  define REASON  "NO_LIBBPF=1"
2534 # else
2535 #  define REASON  "this architecture doesn't support BPF prologue"
2536 # endif
2537 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2538 	set_nobuild('\0', "vmlinux", true);
2539 # undef set_nobuild
2540 # undef REASON
2541 #endif
2542 
2543 	rec->opts.affinity = PERF_AFFINITY_SYS;
2544 
2545 	rec->evlist = evlist__new();
2546 	if (rec->evlist == NULL)
2547 		return -ENOMEM;
2548 
2549 	err = perf_config(perf_record_config, rec);
2550 	if (err)
2551 		return err;
2552 
2553 	argc = parse_options(argc, argv, record_options, record_usage,
2554 			    PARSE_OPT_STOP_AT_NON_OPTION);
2555 	if (quiet)
2556 		perf_quiet_option();
2557 
2558 	/* Make system wide (-a) the default target. */
2559 	if (!argc && target__none(&rec->opts.target))
2560 		rec->opts.target.system_wide = true;
2561 
2562 	if (nr_cgroups && !rec->opts.target.system_wide) {
2563 		usage_with_options_msg(record_usage, record_options,
2564 			"cgroup monitoring only available in system-wide mode");
2565 
2566 	}
2567 
2568 	if (rec->opts.kcore)
2569 		rec->data.is_dir = true;
2570 
2571 	if (rec->opts.comp_level != 0) {
2572 		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2573 		rec->no_buildid = true;
2574 	}
2575 
2576 	if (rec->opts.record_switch_events &&
2577 	    !perf_can_record_switch_events()) {
2578 		ui__error("kernel does not support recording context switch events\n");
2579 		parse_options_usage(record_usage, record_options, "switch-events", 0);
2580 		return -EINVAL;
2581 	}
2582 
2583 	if (switch_output_setup(rec)) {
2584 		parse_options_usage(record_usage, record_options, "switch-output", 0);
2585 		return -EINVAL;
2586 	}
2587 
2588 	if (rec->switch_output.time) {
2589 		signal(SIGALRM, alarm_sig_handler);
2590 		alarm(rec->switch_output.time);
2591 	}
2592 
2593 	if (rec->switch_output.num_files) {
2594 		rec->switch_output.filenames = calloc(sizeof(char *),
2595 						      rec->switch_output.num_files);
2596 		if (!rec->switch_output.filenames)
2597 			return -EINVAL;
2598 	}
2599 
2600 	/*
2601 	 * Allow aliases to facilitate the lookup of symbols for address
2602 	 * filters. Refer to auxtrace_parse_filters().
2603 	 */
2604 	symbol_conf.allow_aliases = true;
2605 
2606 	symbol__init(NULL);
2607 
2608 	if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2609 		rec->affinity_mask.nbits = cpu__max_cpu();
2610 		rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2611 		if (!rec->affinity_mask.bits) {
2612 			pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2613 			return -ENOMEM;
2614 		}
2615 		pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2616 	}
2617 
2618 	err = record__auxtrace_init(rec);
2619 	if (err)
2620 		goto out;
2621 
2622 	if (dry_run)
2623 		goto out;
2624 
2625 	err = bpf__setup_stdout(rec->evlist);
2626 	if (err) {
2627 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2628 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
2629 			 errbuf);
2630 		goto out;
2631 	}
2632 
2633 	err = -ENOMEM;
2634 
2635 	if (rec->no_buildid_cache || rec->no_buildid) {
2636 		disable_buildid_cache();
2637 	} else if (rec->switch_output.enabled) {
2638 		/*
2639 		 * In 'perf record --switch-output', disable buildid
2640 		 * generation by default to reduce data file switching
2641 		 * overhead. Still generate buildid if they are required
2642 		 * explicitly using
2643 		 *
2644 		 *  perf record --switch-output --no-no-buildid \
2645 		 *              --no-no-buildid-cache
2646 		 *
2647 		 * Following code equals to:
2648 		 *
2649 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2650 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2651 		 *         disable_buildid_cache();
2652 		 */
2653 		bool disable = true;
2654 
2655 		if (rec->no_buildid_set && !rec->no_buildid)
2656 			disable = false;
2657 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2658 			disable = false;
2659 		if (disable) {
2660 			rec->no_buildid = true;
2661 			rec->no_buildid_cache = true;
2662 			disable_buildid_cache();
2663 		}
2664 	}
2665 
2666 	if (record.opts.overwrite)
2667 		record.opts.tail_synthesize = true;
2668 
2669 	if (rec->evlist->core.nr_entries == 0 &&
2670 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2671 		pr_err("Not enough memory for event selector list\n");
2672 		goto out;
2673 	}
2674 
2675 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2676 		rec->opts.no_inherit = true;
2677 
2678 	err = target__validate(&rec->opts.target);
2679 	if (err) {
2680 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2681 		ui__warning("%s\n", errbuf);
2682 	}
2683 
2684 	err = target__parse_uid(&rec->opts.target);
2685 	if (err) {
2686 		int saved_errno = errno;
2687 
2688 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2689 		ui__error("%s", errbuf);
2690 
2691 		err = -saved_errno;
2692 		goto out;
2693 	}
2694 
2695 	/* Enable ignoring missing threads when -u/-p option is defined. */
2696 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2697 
2698 	err = -ENOMEM;
2699 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2700 		usage_with_options(record_usage, record_options);
2701 
2702 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2703 	if (err)
2704 		goto out;
2705 
2706 	/*
2707 	 * We take all buildids when the file contains
2708 	 * AUX area tracing data because we do not decode the
2709 	 * trace because it would take too long.
2710 	 */
2711 	if (rec->opts.full_auxtrace)
2712 		rec->buildid_all = true;
2713 
2714 	if (record_opts__config(&rec->opts)) {
2715 		err = -EINVAL;
2716 		goto out;
2717 	}
2718 
2719 	if (rec->opts.nr_cblocks > nr_cblocks_max)
2720 		rec->opts.nr_cblocks = nr_cblocks_max;
2721 	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2722 
2723 	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2724 	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2725 
2726 	if (rec->opts.comp_level > comp_level_max)
2727 		rec->opts.comp_level = comp_level_max;
2728 	pr_debug("comp level: %d\n", rec->opts.comp_level);
2729 
2730 	err = __cmd_record(&record, argc, argv);
2731 out:
2732 	bitmap_free(rec->affinity_mask.bits);
2733 	evlist__delete(rec->evlist);
2734 	symbol__exit();
2735 	auxtrace_record__free(rec->itr);
2736 	return err;
2737 }
2738 
2739 static void snapshot_sig_handler(int sig __maybe_unused)
2740 {
2741 	struct record *rec = &record;
2742 
2743 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2744 		trigger_hit(&auxtrace_snapshot_trigger);
2745 		auxtrace_record__snapshot_started = 1;
2746 		if (auxtrace_record__snapshot_start(record.itr))
2747 			trigger_error(&auxtrace_snapshot_trigger);
2748 	}
2749 
2750 	if (switch_output_signal(rec))
2751 		trigger_hit(&switch_output_trigger);
2752 }
2753 
2754 static void alarm_sig_handler(int sig __maybe_unused)
2755 {
2756 	struct record *rec = &record;
2757 
2758 	if (switch_output_time(rec))
2759 		trigger_hit(&switch_output_trigger);
2760 }
2761