xref: /openbmc/linux/tools/lib/perf/evlist.c (revision 8bdc2a19)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <perf/evlist.h>
3 #include <perf/evsel.h>
4 #include <linux/bitops.h>
5 #include <linux/list.h>
6 #include <linux/hash.h>
7 #include <sys/ioctl.h>
8 #include <internal/evlist.h>
9 #include <internal/evsel.h>
10 #include <internal/xyarray.h>
11 #include <internal/mmap.h>
12 #include <internal/cpumap.h>
13 #include <internal/threadmap.h>
14 #include <internal/lib.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <errno.h>
18 #include <unistd.h>
19 #include <fcntl.h>
20 #include <signal.h>
21 #include <poll.h>
22 #include <sys/mman.h>
23 #include <perf/cpumap.h>
24 #include <perf/threadmap.h>
25 #include <api/fd/array.h>
26 
27 void perf_evlist__init(struct perf_evlist *evlist)
28 {
29 	INIT_LIST_HEAD(&evlist->entries);
30 	evlist->nr_entries = 0;
31 	fdarray__init(&evlist->pollfd, 64);
32 	perf_evlist__reset_id_hash(evlist);
33 }
34 
35 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
36 					  struct perf_evsel *evsel)
37 {
38 	/*
39 	 * We already have cpus for evsel (via PMU sysfs) so
40 	 * keep it, if there's no target cpu list defined.
41 	 */
42 	if (!evsel->own_cpus || evlist->has_user_cpus) {
43 		perf_cpu_map__put(evsel->cpus);
44 		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
45 	} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
46 		perf_cpu_map__put(evsel->cpus);
47 		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
48 	} else if (evsel->cpus != evsel->own_cpus) {
49 		perf_cpu_map__put(evsel->cpus);
50 		evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
51 	}
52 
53 	perf_thread_map__put(evsel->threads);
54 	evsel->threads = perf_thread_map__get(evlist->threads);
55 	evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
56 }
57 
58 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
59 {
60 	struct perf_evsel *evsel;
61 
62 	/* Recomputing all_cpus, so start with a blank slate. */
63 	perf_cpu_map__put(evlist->all_cpus);
64 	evlist->all_cpus = NULL;
65 
66 	perf_evlist__for_each_evsel(evlist, evsel)
67 		__perf_evlist__propagate_maps(evlist, evsel);
68 }
69 
70 void perf_evlist__add(struct perf_evlist *evlist,
71 		      struct perf_evsel *evsel)
72 {
73 	evsel->idx = evlist->nr_entries;
74 	list_add_tail(&evsel->node, &evlist->entries);
75 	evlist->nr_entries += 1;
76 	__perf_evlist__propagate_maps(evlist, evsel);
77 }
78 
79 void perf_evlist__remove(struct perf_evlist *evlist,
80 			 struct perf_evsel *evsel)
81 {
82 	list_del_init(&evsel->node);
83 	evlist->nr_entries -= 1;
84 }
85 
86 struct perf_evlist *perf_evlist__new(void)
87 {
88 	struct perf_evlist *evlist = zalloc(sizeof(*evlist));
89 
90 	if (evlist != NULL)
91 		perf_evlist__init(evlist);
92 
93 	return evlist;
94 }
95 
96 struct perf_evsel *
97 perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev)
98 {
99 	struct perf_evsel *next;
100 
101 	if (!prev) {
102 		next = list_first_entry(&evlist->entries,
103 					struct perf_evsel,
104 					node);
105 	} else {
106 		next = list_next_entry(prev, node);
107 	}
108 
109 	/* Empty list is noticed here so don't need checking on entry. */
110 	if (&next->node == &evlist->entries)
111 		return NULL;
112 
113 	return next;
114 }
115 
116 static void perf_evlist__purge(struct perf_evlist *evlist)
117 {
118 	struct perf_evsel *pos, *n;
119 
120 	perf_evlist__for_each_entry_safe(evlist, n, pos) {
121 		list_del_init(&pos->node);
122 		perf_evsel__delete(pos);
123 	}
124 
125 	evlist->nr_entries = 0;
126 }
127 
128 void perf_evlist__exit(struct perf_evlist *evlist)
129 {
130 	perf_cpu_map__put(evlist->user_requested_cpus);
131 	perf_cpu_map__put(evlist->all_cpus);
132 	perf_thread_map__put(evlist->threads);
133 	evlist->user_requested_cpus = NULL;
134 	evlist->all_cpus = NULL;
135 	evlist->threads = NULL;
136 	fdarray__exit(&evlist->pollfd);
137 }
138 
139 void perf_evlist__delete(struct perf_evlist *evlist)
140 {
141 	if (evlist == NULL)
142 		return;
143 
144 	perf_evlist__munmap(evlist);
145 	perf_evlist__close(evlist);
146 	perf_evlist__purge(evlist);
147 	perf_evlist__exit(evlist);
148 	free(evlist);
149 }
150 
151 void perf_evlist__set_maps(struct perf_evlist *evlist,
152 			   struct perf_cpu_map *cpus,
153 			   struct perf_thread_map *threads)
154 {
155 	/*
156 	 * Allow for the possibility that one or another of the maps isn't being
157 	 * changed i.e. don't put it.  Note we are assuming the maps that are
158 	 * being applied are brand new and evlist is taking ownership of the
159 	 * original reference count of 1.  If that is not the case it is up to
160 	 * the caller to increase the reference count.
161 	 */
162 	if (cpus != evlist->user_requested_cpus) {
163 		perf_cpu_map__put(evlist->user_requested_cpus);
164 		evlist->user_requested_cpus = perf_cpu_map__get(cpus);
165 	}
166 
167 	if (threads != evlist->threads) {
168 		perf_thread_map__put(evlist->threads);
169 		evlist->threads = perf_thread_map__get(threads);
170 	}
171 
172 	if (!evlist->all_cpus && cpus)
173 		evlist->all_cpus = perf_cpu_map__get(cpus);
174 
175 	perf_evlist__propagate_maps(evlist);
176 }
177 
178 int perf_evlist__open(struct perf_evlist *evlist)
179 {
180 	struct perf_evsel *evsel;
181 	int err;
182 
183 	perf_evlist__for_each_entry(evlist, evsel) {
184 		err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
185 		if (err < 0)
186 			goto out_err;
187 	}
188 
189 	return 0;
190 
191 out_err:
192 	perf_evlist__close(evlist);
193 	return err;
194 }
195 
196 void perf_evlist__close(struct perf_evlist *evlist)
197 {
198 	struct perf_evsel *evsel;
199 
200 	perf_evlist__for_each_entry_reverse(evlist, evsel)
201 		perf_evsel__close(evsel);
202 }
203 
204 void perf_evlist__enable(struct perf_evlist *evlist)
205 {
206 	struct perf_evsel *evsel;
207 
208 	perf_evlist__for_each_entry(evlist, evsel)
209 		perf_evsel__enable(evsel);
210 }
211 
212 void perf_evlist__disable(struct perf_evlist *evlist)
213 {
214 	struct perf_evsel *evsel;
215 
216 	perf_evlist__for_each_entry(evlist, evsel)
217 		perf_evsel__disable(evsel);
218 }
219 
220 u64 perf_evlist__read_format(struct perf_evlist *evlist)
221 {
222 	struct perf_evsel *first = perf_evlist__first(evlist);
223 
224 	return first->attr.read_format;
225 }
226 
227 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
228 
229 static void perf_evlist__id_hash(struct perf_evlist *evlist,
230 				 struct perf_evsel *evsel,
231 				 int cpu, int thread, u64 id)
232 {
233 	int hash;
234 	struct perf_sample_id *sid = SID(evsel, cpu, thread);
235 
236 	sid->id = id;
237 	sid->evsel = evsel;
238 	hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
239 	hlist_add_head(&sid->node, &evlist->heads[hash]);
240 }
241 
242 void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
243 {
244 	int i;
245 
246 	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
247 		INIT_HLIST_HEAD(&evlist->heads[i]);
248 }
249 
250 void perf_evlist__id_add(struct perf_evlist *evlist,
251 			 struct perf_evsel *evsel,
252 			 int cpu, int thread, u64 id)
253 {
254 	perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
255 	evsel->id[evsel->ids++] = id;
256 }
257 
258 int perf_evlist__id_add_fd(struct perf_evlist *evlist,
259 			   struct perf_evsel *evsel,
260 			   int cpu, int thread, int fd)
261 {
262 	u64 read_data[4] = { 0, };
263 	int id_idx = 1; /* The first entry is the counter value */
264 	u64 id;
265 	int ret;
266 
267 	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
268 	if (!ret)
269 		goto add;
270 
271 	if (errno != ENOTTY)
272 		return -1;
273 
274 	/* Legacy way to get event id.. All hail to old kernels! */
275 
276 	/*
277 	 * This way does not work with group format read, so bail
278 	 * out in that case.
279 	 */
280 	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
281 		return -1;
282 
283 	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
284 	    read(fd, &read_data, sizeof(read_data)) == -1)
285 		return -1;
286 
287 	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
288 		++id_idx;
289 	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
290 		++id_idx;
291 
292 	id = read_data[id_idx];
293 
294 add:
295 	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
296 	return 0;
297 }
298 
299 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
300 {
301 	int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
302 	int nr_threads = perf_thread_map__nr(evlist->threads);
303 	int nfds = 0;
304 	struct perf_evsel *evsel;
305 
306 	perf_evlist__for_each_entry(evlist, evsel) {
307 		if (evsel->system_wide)
308 			nfds += nr_cpus;
309 		else
310 			nfds += nr_cpus * nr_threads;
311 	}
312 
313 	if (fdarray__available_entries(&evlist->pollfd) < nfds &&
314 	    fdarray__grow(&evlist->pollfd, nfds) < 0)
315 		return -ENOMEM;
316 
317 	return 0;
318 }
319 
320 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
321 			    void *ptr, short revent, enum fdarray_flags flags)
322 {
323 	int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags);
324 
325 	if (pos >= 0) {
326 		evlist->pollfd.priv[pos].ptr = ptr;
327 		fcntl(fd, F_SETFL, O_NONBLOCK);
328 	}
329 
330 	return pos;
331 }
332 
333 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
334 					 void *arg __maybe_unused)
335 {
336 	struct perf_mmap *map = fda->priv[fd].ptr;
337 
338 	if (map)
339 		perf_mmap__put(map);
340 }
341 
342 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
343 {
344 	return fdarray__filter(&evlist->pollfd, revents_and_mask,
345 			       perf_evlist__munmap_filtered, NULL);
346 }
347 
348 int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
349 {
350 	return fdarray__poll(&evlist->pollfd, timeout);
351 }
352 
353 static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite)
354 {
355 	int i;
356 	struct perf_mmap *map;
357 
358 	map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
359 	if (!map)
360 		return NULL;
361 
362 	for (i = 0; i < evlist->nr_mmaps; i++) {
363 		struct perf_mmap *prev = i ? &map[i - 1] : NULL;
364 
365 		/*
366 		 * When the perf_mmap() call is made we grab one refcount, plus
367 		 * one extra to let perf_mmap__consume() get the last
368 		 * events after all real references (perf_mmap__get()) are
369 		 * dropped.
370 		 *
371 		 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
372 		 * thus does perf_mmap__get() on it.
373 		 */
374 		perf_mmap__init(&map[i], prev, overwrite, NULL);
375 	}
376 
377 	return map;
378 }
379 
380 static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread)
381 {
382 	struct perf_sample_id *sid = SID(evsel, cpu, thread);
383 
384 	sid->idx = idx;
385 	sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu);
386 	sid->tid = perf_thread_map__pid(evsel->threads, thread);
387 }
388 
389 static struct perf_mmap*
390 perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
391 {
392 	struct perf_mmap *maps;
393 
394 	maps = overwrite ? evlist->mmap_ovw : evlist->mmap;
395 
396 	if (!maps) {
397 		maps = perf_evlist__alloc_mmap(evlist, overwrite);
398 		if (!maps)
399 			return NULL;
400 
401 		if (overwrite)
402 			evlist->mmap_ovw = maps;
403 		else
404 			evlist->mmap = maps;
405 	}
406 
407 	return &maps[idx];
408 }
409 
410 #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
411 
412 static int
413 perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
414 			  int output, struct perf_cpu cpu)
415 {
416 	return perf_mmap__mmap(map, mp, output, cpu);
417 }
418 
419 static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map,
420 					bool overwrite)
421 {
422 	if (overwrite)
423 		evlist->mmap_ovw_first = map;
424 	else
425 		evlist->mmap_first = map;
426 }
427 
428 static int
429 mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
430 	       int idx, struct perf_mmap_param *mp, int cpu_idx,
431 	       int thread, int *_output, int *_output_overwrite)
432 {
433 	struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
434 	struct perf_evsel *evsel;
435 	int revent;
436 
437 	perf_evlist__for_each_entry(evlist, evsel) {
438 		bool overwrite = evsel->attr.write_backward;
439 		struct perf_mmap *map;
440 		int *output, fd, cpu;
441 
442 		if (evsel->system_wide && thread)
443 			continue;
444 
445 		cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu);
446 		if (cpu == -1)
447 			continue;
448 
449 		map = ops->get(evlist, overwrite, idx);
450 		if (map == NULL)
451 			return -ENOMEM;
452 
453 		if (overwrite) {
454 			mp->prot = PROT_READ;
455 			output   = _output_overwrite;
456 		} else {
457 			mp->prot = PROT_READ | PROT_WRITE;
458 			output   = _output;
459 		}
460 
461 		fd = FD(evsel, cpu, thread);
462 
463 		if (*output == -1) {
464 			*output = fd;
465 
466 			/*
467 			 * The last one will be done at perf_mmap__consume(), so that we
468 			 * make sure we don't prevent tools from consuming every last event in
469 			 * the ring buffer.
470 			 *
471 			 * I.e. we can get the POLLHUP meaning that the fd doesn't exist
472 			 * anymore, but the last events for it are still in the ring buffer,
473 			 * waiting to be consumed.
474 			 *
475 			 * Tools can chose to ignore this at their own discretion, but the
476 			 * evlist layer can't just drop it when filtering events in
477 			 * perf_evlist__filter_pollfd().
478 			 */
479 			refcount_set(&map->refcnt, 2);
480 
481 			if (ops->idx)
482 				ops->idx(evlist, evsel, mp, idx);
483 
484 			if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
485 				return -1;
486 
487 			if (!idx)
488 				perf_evlist__set_mmap_first(evlist, map, overwrite);
489 		} else {
490 			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
491 				return -1;
492 
493 			perf_mmap__get(map);
494 		}
495 
496 		revent = !overwrite ? POLLIN : 0;
497 
498 		if (!evsel->system_wide &&
499 		    perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
500 			perf_mmap__put(map);
501 			return -1;
502 		}
503 
504 		if (evsel->attr.read_format & PERF_FORMAT_ID) {
505 			if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
506 						   fd) < 0)
507 				return -1;
508 			perf_evsel__set_sid_idx(evsel, idx, cpu, thread);
509 		}
510 	}
511 
512 	return 0;
513 }
514 
515 static int
516 mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
517 		struct perf_mmap_param *mp)
518 {
519 	int thread;
520 	int nr_threads = perf_thread_map__nr(evlist->threads);
521 
522 	for (thread = 0; thread < nr_threads; thread++) {
523 		int output = -1;
524 		int output_overwrite = -1;
525 
526 		if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
527 				   &output, &output_overwrite))
528 			goto out_unmap;
529 	}
530 
531 	return 0;
532 
533 out_unmap:
534 	perf_evlist__munmap(evlist);
535 	return -1;
536 }
537 
538 static int
539 mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
540 	     struct perf_mmap_param *mp)
541 {
542 	int nr_threads = perf_thread_map__nr(evlist->threads);
543 	int nr_cpus    = perf_cpu_map__nr(evlist->user_requested_cpus);
544 	int cpu, thread;
545 
546 	for (cpu = 0; cpu < nr_cpus; cpu++) {
547 		int output = -1;
548 		int output_overwrite = -1;
549 
550 		for (thread = 0; thread < nr_threads; thread++) {
551 			if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
552 					   thread, &output, &output_overwrite))
553 				goto out_unmap;
554 		}
555 	}
556 
557 	return 0;
558 
559 out_unmap:
560 	perf_evlist__munmap(evlist);
561 	return -1;
562 }
563 
564 static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
565 {
566 	int nr_mmaps;
567 
568 	nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
569 	if (perf_cpu_map__empty(evlist->user_requested_cpus))
570 		nr_mmaps = perf_thread_map__nr(evlist->threads);
571 
572 	return nr_mmaps;
573 }
574 
575 int perf_evlist__mmap_ops(struct perf_evlist *evlist,
576 			  struct perf_evlist_mmap_ops *ops,
577 			  struct perf_mmap_param *mp)
578 {
579 	struct perf_evsel *evsel;
580 	const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
581 
582 	if (!ops || !ops->get || !ops->mmap)
583 		return -EINVAL;
584 
585 	mp->mask = evlist->mmap_len - page_size - 1;
586 
587 	evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist);
588 
589 	perf_evlist__for_each_entry(evlist, evsel) {
590 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
591 		    evsel->sample_id == NULL &&
592 		    perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
593 			return -ENOMEM;
594 	}
595 
596 	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
597 		return -ENOMEM;
598 
599 	if (perf_cpu_map__empty(cpus))
600 		return mmap_per_thread(evlist, ops, mp);
601 
602 	return mmap_per_cpu(evlist, ops, mp);
603 }
604 
605 int perf_evlist__mmap(struct perf_evlist *evlist, int pages)
606 {
607 	struct perf_mmap_param mp;
608 	struct perf_evlist_mmap_ops ops = {
609 		.get  = perf_evlist__mmap_cb_get,
610 		.mmap = perf_evlist__mmap_cb_mmap,
611 	};
612 
613 	evlist->mmap_len = (pages + 1) * page_size;
614 
615 	return perf_evlist__mmap_ops(evlist, &ops, &mp);
616 }
617 
618 void perf_evlist__munmap(struct perf_evlist *evlist)
619 {
620 	int i;
621 
622 	if (evlist->mmap) {
623 		for (i = 0; i < evlist->nr_mmaps; i++)
624 			perf_mmap__munmap(&evlist->mmap[i]);
625 	}
626 
627 	if (evlist->mmap_ovw) {
628 		for (i = 0; i < evlist->nr_mmaps; i++)
629 			perf_mmap__munmap(&evlist->mmap_ovw[i]);
630 	}
631 
632 	zfree(&evlist->mmap);
633 	zfree(&evlist->mmap_ovw);
634 }
635 
636 struct perf_mmap*
637 perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
638 		       bool overwrite)
639 {
640 	if (map)
641 		return map->next;
642 
643 	return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
644 }
645 
646 void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
647 {
648 	struct perf_evsel *first, *last, *evsel;
649 
650 	first = list_first_entry(list, struct perf_evsel, node);
651 	last = list_last_entry(list, struct perf_evsel, node);
652 
653 	leader->nr_members = last->idx - first->idx + 1;
654 
655 	__perf_evlist__for_each_entry(list, evsel)
656 		evsel->leader = leader;
657 }
658 
659 void perf_evlist__set_leader(struct perf_evlist *evlist)
660 {
661 	if (evlist->nr_entries) {
662 		struct perf_evsel *first = list_entry(evlist->entries.next,
663 						struct perf_evsel, node);
664 
665 		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
666 		__perf_evlist__set_leader(&evlist->entries, first);
667 	}
668 }
669