xref: /openbmc/linux/tools/perf/util/record.c (revision dc6a81c3)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "debug.h"
3 #include "evlist.h"
4 #include "evsel.h"
5 #include "parse-events.h"
6 #include <errno.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <api/fs/fs.h>
10 #include <subcmd/parse-options.h>
11 #include <perf/cpumap.h>
12 #include "cloexec.h"
13 #include "record.h"
14 #include "../perf-sys.h"
15 
16 typedef void (*setup_probe_fn_t)(struct evsel *evsel);
17 
18 static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
19 {
20 	struct evlist *evlist;
21 	struct evsel *evsel;
22 	unsigned long flags = perf_event_open_cloexec_flag();
23 	int err = -EAGAIN, fd;
24 	static pid_t pid = -1;
25 
26 	evlist = evlist__new();
27 	if (!evlist)
28 		return -ENOMEM;
29 
30 	if (parse_events(evlist, str, NULL))
31 		goto out_delete;
32 
33 	evsel = evlist__first(evlist);
34 
35 	while (1) {
36 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
37 		if (fd < 0) {
38 			if (pid == -1 && errno == EACCES) {
39 				pid = 0;
40 				continue;
41 			}
42 			goto out_delete;
43 		}
44 		break;
45 	}
46 	close(fd);
47 
48 	fn(evsel);
49 
50 	fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
51 	if (fd < 0) {
52 		if (errno == EINVAL)
53 			err = -EINVAL;
54 		goto out_delete;
55 	}
56 	close(fd);
57 	err = 0;
58 
59 out_delete:
60 	evlist__delete(evlist);
61 	return err;
62 }
63 
64 static bool perf_probe_api(setup_probe_fn_t fn)
65 {
66 	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
67 	struct perf_cpu_map *cpus;
68 	int cpu, ret, i = 0;
69 
70 	cpus = perf_cpu_map__new(NULL);
71 	if (!cpus)
72 		return false;
73 	cpu = cpus->map[0];
74 	perf_cpu_map__put(cpus);
75 
76 	do {
77 		ret = perf_do_probe_api(fn, cpu, try[i++]);
78 		if (!ret)
79 			return true;
80 	} while (ret == -EAGAIN && try[i]);
81 
82 	return false;
83 }
84 
85 static void perf_probe_sample_identifier(struct evsel *evsel)
86 {
87 	evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
88 }
89 
90 static void perf_probe_comm_exec(struct evsel *evsel)
91 {
92 	evsel->core.attr.comm_exec = 1;
93 }
94 
95 static void perf_probe_context_switch(struct evsel *evsel)
96 {
97 	evsel->core.attr.context_switch = 1;
98 }
99 
100 bool perf_can_sample_identifier(void)
101 {
102 	return perf_probe_api(perf_probe_sample_identifier);
103 }
104 
105 static bool perf_can_comm_exec(void)
106 {
107 	return perf_probe_api(perf_probe_comm_exec);
108 }
109 
110 bool perf_can_record_switch_events(void)
111 {
112 	return perf_probe_api(perf_probe_context_switch);
113 }
114 
115 bool perf_can_record_cpu_wide(void)
116 {
117 	struct perf_event_attr attr = {
118 		.type = PERF_TYPE_SOFTWARE,
119 		.config = PERF_COUNT_SW_CPU_CLOCK,
120 		.exclude_kernel = 1,
121 	};
122 	struct perf_cpu_map *cpus;
123 	int cpu, fd;
124 
125 	cpus = perf_cpu_map__new(NULL);
126 	if (!cpus)
127 		return false;
128 	cpu = cpus->map[0];
129 	perf_cpu_map__put(cpus);
130 
131 	fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
132 	if (fd < 0)
133 		return false;
134 	close(fd);
135 
136 	return true;
137 }
138 
139 /*
140  * Architectures are expected to know if AUX area sampling is supported by the
141  * hardware. Here we check for kernel support.
142  */
143 bool perf_can_aux_sample(void)
144 {
145 	struct perf_event_attr attr = {
146 		.size = sizeof(struct perf_event_attr),
147 		.exclude_kernel = 1,
148 		/*
149 		 * Non-zero value causes the kernel to calculate the effective
150 		 * attribute size up to that byte.
151 		 */
152 		.aux_sample_size = 1,
153 	};
154 	int fd;
155 
156 	fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
157 	/*
158 	 * If the kernel attribute is big enough to contain aux_sample_size
159 	 * then we assume that it is supported. We are relying on the kernel to
160 	 * validate the attribute size before anything else that could be wrong.
161 	 */
162 	if (fd < 0 && errno == E2BIG)
163 		return false;
164 	if (fd >= 0)
165 		close(fd);
166 
167 	return true;
168 }
169 
170 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
171 			 struct callchain_param *callchain)
172 {
173 	struct evsel *evsel;
174 	bool use_sample_identifier = false;
175 	bool use_comm_exec;
176 	bool sample_id = opts->sample_id;
177 
178 	/*
179 	 * Set the evsel leader links before we configure attributes,
180 	 * since some might depend on this info.
181 	 */
182 	if (opts->group)
183 		perf_evlist__set_leader(evlist);
184 
185 	if (evlist->core.cpus->map[0] < 0)
186 		opts->no_inherit = true;
187 
188 	use_comm_exec = perf_can_comm_exec();
189 
190 	evlist__for_each_entry(evlist, evsel) {
191 		perf_evsel__config(evsel, opts, callchain);
192 		if (evsel->tracking && use_comm_exec)
193 			evsel->core.attr.comm_exec = 1;
194 	}
195 
196 	if (opts->full_auxtrace) {
197 		/*
198 		 * Need to be able to synthesize and parse selected events with
199 		 * arbitrary sample types, which requires always being able to
200 		 * match the id.
201 		 */
202 		use_sample_identifier = perf_can_sample_identifier();
203 		sample_id = true;
204 	} else if (evlist->core.nr_entries > 1) {
205 		struct evsel *first = evlist__first(evlist);
206 
207 		evlist__for_each_entry(evlist, evsel) {
208 			if (evsel->core.attr.sample_type == first->core.attr.sample_type)
209 				continue;
210 			use_sample_identifier = perf_can_sample_identifier();
211 			break;
212 		}
213 		sample_id = true;
214 	}
215 
216 	if (sample_id) {
217 		evlist__for_each_entry(evlist, evsel)
218 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
219 	}
220 
221 	perf_evlist__set_id_pos(evlist);
222 }
223 
224 static int get_max_rate(unsigned int *rate)
225 {
226 	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
227 }
228 
229 static int record_opts__config_freq(struct record_opts *opts)
230 {
231 	bool user_freq = opts->user_freq != UINT_MAX;
232 	unsigned int max_rate;
233 
234 	if (opts->user_interval != ULLONG_MAX)
235 		opts->default_interval = opts->user_interval;
236 	if (user_freq)
237 		opts->freq = opts->user_freq;
238 
239 	/*
240 	 * User specified count overrides default frequency.
241 	 */
242 	if (opts->default_interval)
243 		opts->freq = 0;
244 	else if (opts->freq) {
245 		opts->default_interval = opts->freq;
246 	} else {
247 		pr_err("frequency and count are zero, aborting\n");
248 		return -1;
249 	}
250 
251 	if (get_max_rate(&max_rate))
252 		return 0;
253 
254 	/*
255 	 * User specified frequency is over current maximum.
256 	 */
257 	if (user_freq && (max_rate < opts->freq)) {
258 		if (opts->strict_freq) {
259 			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
260 			       "       Please use -F freq option with a lower value or consider\n"
261 			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
262 			       max_rate);
263 			return -1;
264 		} else {
265 			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
266 				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
267 				   "         The kernel will lower it when perf's interrupts take too long.\n"
268 				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
269 				   max_rate, opts->freq, max_rate);
270 
271 			opts->freq = max_rate;
272 		}
273 	}
274 
275 	/*
276 	 * Default frequency is over current maximum.
277 	 */
278 	if (max_rate < opts->freq) {
279 		pr_warning("Lowering default frequency rate to %u.\n"
280 			   "Please consider tweaking "
281 			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
282 			   max_rate);
283 		opts->freq = max_rate;
284 	}
285 
286 	return 0;
287 }
288 
289 int record_opts__config(struct record_opts *opts)
290 {
291 	return record_opts__config_freq(opts);
292 }
293 
294 bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
295 {
296 	struct evlist *temp_evlist;
297 	struct evsel *evsel;
298 	int err, fd, cpu;
299 	bool ret = false;
300 	pid_t pid = -1;
301 
302 	temp_evlist = evlist__new();
303 	if (!temp_evlist)
304 		return false;
305 
306 	err = parse_events(temp_evlist, str, NULL);
307 	if (err)
308 		goto out_delete;
309 
310 	evsel = evlist__last(temp_evlist);
311 
312 	if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
313 		struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
314 
315 		cpu =  cpus ? cpus->map[0] : 0;
316 		perf_cpu_map__put(cpus);
317 	} else {
318 		cpu = evlist->core.cpus->map[0];
319 	}
320 
321 	while (1) {
322 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
323 					 perf_event_open_cloexec_flag());
324 		if (fd < 0) {
325 			if (pid == -1 && errno == EACCES) {
326 				pid = 0;
327 				continue;
328 			}
329 			goto out_delete;
330 		}
331 		break;
332 	}
333 	close(fd);
334 	ret = true;
335 
336 out_delete:
337 	evlist__delete(temp_evlist);
338 	return ret;
339 }
340 
341 int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
342 {
343 	unsigned int freq;
344 	struct record_opts *opts = opt->value;
345 
346 	if (!str)
347 		return -EINVAL;
348 
349 	if (strcasecmp(str, "max") == 0) {
350 		if (get_max_rate(&freq)) {
351 			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
352 			return -1;
353 		}
354 		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
355 	} else {
356 		freq = atoi(str);
357 	}
358 
359 	opts->user_freq = freq;
360 	return 0;
361 }
362