xref: /openbmc/linux/tools/perf/util/record.c (revision e95770af)
1 // SPDX-License-Identifier: GPL-2.0
2 #include "debug.h"
3 #include "evlist.h"
4 #include "evsel.h"
5 #include "parse-events.h"
6 #include <errno.h>
7 #include <limits.h>
8 #include <stdlib.h>
9 #include <api/fs/fs.h>
10 #include <subcmd/parse-options.h>
11 #include <perf/cpumap.h>
12 #include "cloexec.h"
13 #include "record.h"
14 #include "../perf-sys.h"
15 
16 typedef void (*setup_probe_fn_t)(struct evsel *evsel);
17 
18 static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
19 {
20 	struct evlist *evlist;
21 	struct evsel *evsel;
22 	unsigned long flags = perf_event_open_cloexec_flag();
23 	int err = -EAGAIN, fd;
24 	static pid_t pid = -1;
25 
26 	evlist = evlist__new();
27 	if (!evlist)
28 		return -ENOMEM;
29 
30 	if (parse_events(evlist, str, NULL))
31 		goto out_delete;
32 
33 	evsel = evlist__first(evlist);
34 
35 	while (1) {
36 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
37 		if (fd < 0) {
38 			if (pid == -1 && errno == EACCES) {
39 				pid = 0;
40 				continue;
41 			}
42 			goto out_delete;
43 		}
44 		break;
45 	}
46 	close(fd);
47 
48 	fn(evsel);
49 
50 	fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
51 	if (fd < 0) {
52 		if (errno == EINVAL)
53 			err = -EINVAL;
54 		goto out_delete;
55 	}
56 	close(fd);
57 	err = 0;
58 
59 out_delete:
60 	evlist__delete(evlist);
61 	return err;
62 }
63 
64 static bool perf_probe_api(setup_probe_fn_t fn)
65 {
66 	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
67 	struct perf_cpu_map *cpus;
68 	int cpu, ret, i = 0;
69 
70 	cpus = perf_cpu_map__new(NULL);
71 	if (!cpus)
72 		return false;
73 	cpu = cpus->map[0];
74 	perf_cpu_map__put(cpus);
75 
76 	do {
77 		ret = perf_do_probe_api(fn, cpu, try[i++]);
78 		if (!ret)
79 			return true;
80 	} while (ret == -EAGAIN && try[i]);
81 
82 	return false;
83 }
84 
85 static void perf_probe_sample_identifier(struct evsel *evsel)
86 {
87 	evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
88 }
89 
90 static void perf_probe_comm_exec(struct evsel *evsel)
91 {
92 	evsel->core.attr.comm_exec = 1;
93 }
94 
95 static void perf_probe_context_switch(struct evsel *evsel)
96 {
97 	evsel->core.attr.context_switch = 1;
98 }
99 
100 bool perf_can_sample_identifier(void)
101 {
102 	return perf_probe_api(perf_probe_sample_identifier);
103 }
104 
105 static bool perf_can_comm_exec(void)
106 {
107 	return perf_probe_api(perf_probe_comm_exec);
108 }
109 
110 bool perf_can_record_switch_events(void)
111 {
112 	return perf_probe_api(perf_probe_context_switch);
113 }
114 
115 bool perf_can_record_cpu_wide(void)
116 {
117 	struct perf_event_attr attr = {
118 		.type = PERF_TYPE_SOFTWARE,
119 		.config = PERF_COUNT_SW_CPU_CLOCK,
120 		.exclude_kernel = 1,
121 	};
122 	struct perf_cpu_map *cpus;
123 	int cpu, fd;
124 
125 	cpus = perf_cpu_map__new(NULL);
126 	if (!cpus)
127 		return false;
128 	cpu = cpus->map[0];
129 	perf_cpu_map__put(cpus);
130 
131 	fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
132 	if (fd < 0)
133 		return false;
134 	close(fd);
135 
136 	return true;
137 }
138 
139 /*
140  * Architectures are expected to know if AUX area sampling is supported by the
141  * hardware. Here we check for kernel support.
142  */
143 bool perf_can_aux_sample(void)
144 {
145 	struct perf_event_attr attr = {
146 		.size = sizeof(struct perf_event_attr),
147 		.exclude_kernel = 1,
148 		/*
149 		 * Non-zero value causes the kernel to calculate the effective
150 		 * attribute size up to that byte.
151 		 */
152 		.aux_sample_size = 1,
153 	};
154 	int fd;
155 
156 	fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
157 	/*
158 	 * If the kernel attribute is big enough to contain aux_sample_size
159 	 * then we assume that it is supported. We are relying on the kernel to
160 	 * validate the attribute size before anything else that could be wrong.
161 	 */
162 	if (fd < 0 && errno == E2BIG)
163 		return false;
164 	if (fd >= 0)
165 		close(fd);
166 
167 	return true;
168 }
169 
170 /*
171  * perf_evsel__config_leader_sampling() uses special rules for leader sampling.
172  * However, if the leader is an AUX area event, then assume the event to sample
173  * is the next event.
174  */
175 static struct evsel *perf_evsel__read_sampler(struct evsel *evsel,
176 					      struct evlist *evlist)
177 {
178 	struct evsel *leader = evsel->leader;
179 
180 	if (perf_evsel__is_aux_event(leader)) {
181 		evlist__for_each_entry(evlist, evsel) {
182 			if (evsel->leader == leader && evsel != evsel->leader)
183 				return evsel;
184 		}
185 	}
186 
187 	return leader;
188 }
189 
190 static void perf_evsel__config_leader_sampling(struct evsel *evsel,
191 					       struct evlist *evlist)
192 {
193 	struct perf_event_attr *attr = &evsel->core.attr;
194 	struct evsel *leader = evsel->leader;
195 	struct evsel *read_sampler;
196 
197 	if (!leader->sample_read)
198 		return;
199 
200 	read_sampler = perf_evsel__read_sampler(evsel, evlist);
201 
202 	if (evsel == read_sampler)
203 		return;
204 
205 	/*
206 	 * Disable sampling for all group members other than the leader in
207 	 * case the leader 'leads' the sampling, except when the leader is an
208 	 * AUX area event, in which case the 2nd event in the group is the one
209 	 * that 'leads' the sampling.
210 	 */
211 	attr->freq           = 0;
212 	attr->sample_freq    = 0;
213 	attr->sample_period  = 0;
214 	attr->write_backward = 0;
215 
216 	/*
217 	 * We don't get a sample for slave events, we make them when delivering
218 	 * the group leader sample. Set the slave event to follow the master
219 	 * sample_type to ease up reporting.
220 	 * An AUX area event also has sample_type requirements, so also include
221 	 * the sample type bits from the leader's sample_type to cover that
222 	 * case.
223 	 */
224 	attr->sample_type = read_sampler->core.attr.sample_type |
225 			    leader->core.attr.sample_type;
226 }
227 
228 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
229 			 struct callchain_param *callchain)
230 {
231 	struct evsel *evsel;
232 	bool use_sample_identifier = false;
233 	bool use_comm_exec;
234 	bool sample_id = opts->sample_id;
235 
236 	/*
237 	 * Set the evsel leader links before we configure attributes,
238 	 * since some might depend on this info.
239 	 */
240 	if (opts->group)
241 		perf_evlist__set_leader(evlist);
242 
243 	if (evlist->core.cpus->map[0] < 0)
244 		opts->no_inherit = true;
245 
246 	use_comm_exec = perf_can_comm_exec();
247 
248 	evlist__for_each_entry(evlist, evsel) {
249 		perf_evsel__config(evsel, opts, callchain);
250 		if (evsel->tracking && use_comm_exec)
251 			evsel->core.attr.comm_exec = 1;
252 	}
253 
254 	/* Configure leader sampling here now that the sample type is known */
255 	evlist__for_each_entry(evlist, evsel)
256 		perf_evsel__config_leader_sampling(evsel, evlist);
257 
258 	if (opts->full_auxtrace) {
259 		/*
260 		 * Need to be able to synthesize and parse selected events with
261 		 * arbitrary sample types, which requires always being able to
262 		 * match the id.
263 		 */
264 		use_sample_identifier = perf_can_sample_identifier();
265 		sample_id = true;
266 	} else if (evlist->core.nr_entries > 1) {
267 		struct evsel *first = evlist__first(evlist);
268 
269 		evlist__for_each_entry(evlist, evsel) {
270 			if (evsel->core.attr.sample_type == first->core.attr.sample_type)
271 				continue;
272 			use_sample_identifier = perf_can_sample_identifier();
273 			break;
274 		}
275 		sample_id = true;
276 	}
277 
278 	if (sample_id) {
279 		evlist__for_each_entry(evlist, evsel)
280 			perf_evsel__set_sample_id(evsel, use_sample_identifier);
281 	}
282 
283 	perf_evlist__set_id_pos(evlist);
284 }
285 
286 static int get_max_rate(unsigned int *rate)
287 {
288 	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
289 }
290 
291 static int record_opts__config_freq(struct record_opts *opts)
292 {
293 	bool user_freq = opts->user_freq != UINT_MAX;
294 	unsigned int max_rate;
295 
296 	if (opts->user_interval != ULLONG_MAX)
297 		opts->default_interval = opts->user_interval;
298 	if (user_freq)
299 		opts->freq = opts->user_freq;
300 
301 	/*
302 	 * User specified count overrides default frequency.
303 	 */
304 	if (opts->default_interval)
305 		opts->freq = 0;
306 	else if (opts->freq) {
307 		opts->default_interval = opts->freq;
308 	} else {
309 		pr_err("frequency and count are zero, aborting\n");
310 		return -1;
311 	}
312 
313 	if (get_max_rate(&max_rate))
314 		return 0;
315 
316 	/*
317 	 * User specified frequency is over current maximum.
318 	 */
319 	if (user_freq && (max_rate < opts->freq)) {
320 		if (opts->strict_freq) {
321 			pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
322 			       "       Please use -F freq option with a lower value or consider\n"
323 			       "       tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
324 			       max_rate);
325 			return -1;
326 		} else {
327 			pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
328 				   "         The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
329 				   "         The kernel will lower it when perf's interrupts take too long.\n"
330 				   "         Use --strict-freq to disable this throttling, refusing to record.\n",
331 				   max_rate, opts->freq, max_rate);
332 
333 			opts->freq = max_rate;
334 		}
335 	}
336 
337 	/*
338 	 * Default frequency is over current maximum.
339 	 */
340 	if (max_rate < opts->freq) {
341 		pr_warning("Lowering default frequency rate to %u.\n"
342 			   "Please consider tweaking "
343 			   "/proc/sys/kernel/perf_event_max_sample_rate.\n",
344 			   max_rate);
345 		opts->freq = max_rate;
346 	}
347 
348 	return 0;
349 }
350 
351 int record_opts__config(struct record_opts *opts)
352 {
353 	return record_opts__config_freq(opts);
354 }
355 
356 bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
357 {
358 	struct evlist *temp_evlist;
359 	struct evsel *evsel;
360 	int err, fd, cpu;
361 	bool ret = false;
362 	pid_t pid = -1;
363 
364 	temp_evlist = evlist__new();
365 	if (!temp_evlist)
366 		return false;
367 
368 	err = parse_events(temp_evlist, str, NULL);
369 	if (err)
370 		goto out_delete;
371 
372 	evsel = evlist__last(temp_evlist);
373 
374 	if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) {
375 		struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
376 
377 		cpu =  cpus ? cpus->map[0] : 0;
378 		perf_cpu_map__put(cpus);
379 	} else {
380 		cpu = evlist->core.cpus->map[0];
381 	}
382 
383 	while (1) {
384 		fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1,
385 					 perf_event_open_cloexec_flag());
386 		if (fd < 0) {
387 			if (pid == -1 && errno == EACCES) {
388 				pid = 0;
389 				continue;
390 			}
391 			goto out_delete;
392 		}
393 		break;
394 	}
395 	close(fd);
396 	ret = true;
397 
398 out_delete:
399 	evlist__delete(temp_evlist);
400 	return ret;
401 }
402 
403 int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
404 {
405 	unsigned int freq;
406 	struct record_opts *opts = opt->value;
407 
408 	if (!str)
409 		return -EINVAL;
410 
411 	if (strcasecmp(str, "max") == 0) {
412 		if (get_max_rate(&freq)) {
413 			pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
414 			return -1;
415 		}
416 		pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
417 	} else {
418 		freq = atoi(str);
419 	}
420 
421 	opts->user_freq = freq;
422 	return 0;
423 }
424