1 // SPDX-License-Identifier: GPL-2.0 2 #include <perf/evlist.h> 3 #include <perf/evsel.h> 4 #include <linux/bitops.h> 5 #include <linux/list.h> 6 #include <linux/hash.h> 7 #include <sys/ioctl.h> 8 #include <internal/evlist.h> 9 #include <internal/evsel.h> 10 #include <internal/xyarray.h> 11 #include <internal/mmap.h> 12 #include <internal/cpumap.h> 13 #include <internal/threadmap.h> 14 #include <internal/xyarray.h> 15 #include <internal/lib.h> 16 #include <linux/zalloc.h> 17 #include <sys/ioctl.h> 18 #include <stdlib.h> 19 #include <errno.h> 20 #include <unistd.h> 21 #include <fcntl.h> 22 #include <signal.h> 23 #include <poll.h> 24 #include <sys/mman.h> 25 #include <perf/cpumap.h> 26 #include <perf/threadmap.h> 27 #include <api/fd/array.h> 28 29 void perf_evlist__init(struct perf_evlist *evlist) 30 { 31 int i; 32 33 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 34 INIT_HLIST_HEAD(&evlist->heads[i]); 35 INIT_LIST_HEAD(&evlist->entries); 36 evlist->nr_entries = 0; 37 fdarray__init(&evlist->pollfd, 64); 38 } 39 40 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 41 struct perf_evsel *evsel) 42 { 43 /* 44 * We already have cpus for evsel (via PMU sysfs) so 45 * keep it, if there's no target cpu list defined. 46 */ 47 if (!evsel->own_cpus || evlist->has_user_cpus) { 48 perf_cpu_map__put(evsel->cpus); 49 evsel->cpus = perf_cpu_map__get(evlist->cpus); 50 } else if (evsel->cpus != evsel->own_cpus) { 51 perf_cpu_map__put(evsel->cpus); 52 evsel->cpus = perf_cpu_map__get(evsel->own_cpus); 53 } 54 55 perf_thread_map__put(evsel->threads); 56 evsel->threads = perf_thread_map__get(evlist->threads); 57 evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); 58 } 59 60 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 61 { 62 struct perf_evsel *evsel; 63 64 perf_evlist__for_each_evsel(evlist, evsel) 65 __perf_evlist__propagate_maps(evlist, evsel); 66 } 67 68 void perf_evlist__add(struct perf_evlist *evlist, 69 struct perf_evsel *evsel) 70 { 71 list_add_tail(&evsel->node, &evlist->entries); 72 evlist->nr_entries += 1; 73 __perf_evlist__propagate_maps(evlist, evsel); 74 } 75 76 void perf_evlist__remove(struct perf_evlist *evlist, 77 struct perf_evsel *evsel) 78 { 79 list_del_init(&evsel->node); 80 evlist->nr_entries -= 1; 81 } 82 83 struct perf_evlist *perf_evlist__new(void) 84 { 85 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 86 87 if (evlist != NULL) 88 perf_evlist__init(evlist); 89 90 return evlist; 91 } 92 93 struct perf_evsel * 94 perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) 95 { 96 struct perf_evsel *next; 97 98 if (!prev) { 99 next = list_first_entry(&evlist->entries, 100 struct perf_evsel, 101 node); 102 } else { 103 next = list_next_entry(prev, node); 104 } 105 106 /* Empty list is noticed here so don't need checking on entry. */ 107 if (&next->node == &evlist->entries) 108 return NULL; 109 110 return next; 111 } 112 113 static void perf_evlist__purge(struct perf_evlist *evlist) 114 { 115 struct perf_evsel *pos, *n; 116 117 perf_evlist__for_each_entry_safe(evlist, n, pos) { 118 list_del_init(&pos->node); 119 perf_evsel__delete(pos); 120 } 121 122 evlist->nr_entries = 0; 123 } 124 125 void perf_evlist__exit(struct perf_evlist *evlist) 126 { 127 perf_cpu_map__put(evlist->cpus); 128 perf_thread_map__put(evlist->threads); 129 evlist->cpus = NULL; 130 evlist->threads = NULL; 131 fdarray__exit(&evlist->pollfd); 132 } 133 134 void perf_evlist__delete(struct perf_evlist *evlist) 135 { 136 if (evlist == NULL) 137 return; 138 139 perf_evlist__munmap(evlist); 140 perf_evlist__close(evlist); 141 perf_evlist__purge(evlist); 142 perf_evlist__exit(evlist); 143 free(evlist); 144 } 145 146 void perf_evlist__set_maps(struct perf_evlist *evlist, 147 struct perf_cpu_map *cpus, 148 struct perf_thread_map *threads) 149 { 150 /* 151 * Allow for the possibility that one or another of the maps isn't being 152 * changed i.e. don't put it. Note we are assuming the maps that are 153 * being applied are brand new and evlist is taking ownership of the 154 * original reference count of 1. If that is not the case it is up to 155 * the caller to increase the reference count. 156 */ 157 if (cpus != evlist->cpus) { 158 perf_cpu_map__put(evlist->cpus); 159 evlist->cpus = perf_cpu_map__get(cpus); 160 } 161 162 if (threads != evlist->threads) { 163 perf_thread_map__put(evlist->threads); 164 evlist->threads = perf_thread_map__get(threads); 165 } 166 167 perf_evlist__propagate_maps(evlist); 168 } 169 170 int perf_evlist__open(struct perf_evlist *evlist) 171 { 172 struct perf_evsel *evsel; 173 int err; 174 175 perf_evlist__for_each_entry(evlist, evsel) { 176 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 177 if (err < 0) 178 goto out_err; 179 } 180 181 return 0; 182 183 out_err: 184 perf_evlist__close(evlist); 185 return err; 186 } 187 188 void perf_evlist__close(struct perf_evlist *evlist) 189 { 190 struct perf_evsel *evsel; 191 192 perf_evlist__for_each_entry_reverse(evlist, evsel) 193 perf_evsel__close(evsel); 194 } 195 196 void perf_evlist__enable(struct perf_evlist *evlist) 197 { 198 struct perf_evsel *evsel; 199 200 perf_evlist__for_each_entry(evlist, evsel) 201 perf_evsel__enable(evsel); 202 } 203 204 void perf_evlist__disable(struct perf_evlist *evlist) 205 { 206 struct perf_evsel *evsel; 207 208 perf_evlist__for_each_entry(evlist, evsel) 209 perf_evsel__disable(evsel); 210 } 211 212 u64 perf_evlist__read_format(struct perf_evlist *evlist) 213 { 214 struct perf_evsel *first = perf_evlist__first(evlist); 215 216 return first->attr.read_format; 217 } 218 219 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 220 221 static void perf_evlist__id_hash(struct perf_evlist *evlist, 222 struct perf_evsel *evsel, 223 int cpu, int thread, u64 id) 224 { 225 int hash; 226 struct perf_sample_id *sid = SID(evsel, cpu, thread); 227 228 sid->id = id; 229 sid->evsel = evsel; 230 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 231 hlist_add_head(&sid->node, &evlist->heads[hash]); 232 } 233 234 void perf_evlist__id_add(struct perf_evlist *evlist, 235 struct perf_evsel *evsel, 236 int cpu, int thread, u64 id) 237 { 238 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 239 evsel->id[evsel->ids++] = id; 240 } 241 242 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 243 struct perf_evsel *evsel, 244 int cpu, int thread, int fd) 245 { 246 u64 read_data[4] = { 0, }; 247 int id_idx = 1; /* The first entry is the counter value */ 248 u64 id; 249 int ret; 250 251 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 252 if (!ret) 253 goto add; 254 255 if (errno != ENOTTY) 256 return -1; 257 258 /* Legacy way to get event id.. All hail to old kernels! */ 259 260 /* 261 * This way does not work with group format read, so bail 262 * out in that case. 263 */ 264 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 265 return -1; 266 267 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 268 read(fd, &read_data, sizeof(read_data)) == -1) 269 return -1; 270 271 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 272 ++id_idx; 273 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 274 ++id_idx; 275 276 id = read_data[id_idx]; 277 278 add: 279 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 280 return 0; 281 } 282 283 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 284 { 285 int nr_cpus = perf_cpu_map__nr(evlist->cpus); 286 int nr_threads = perf_thread_map__nr(evlist->threads); 287 int nfds = 0; 288 struct perf_evsel *evsel; 289 290 perf_evlist__for_each_entry(evlist, evsel) { 291 if (evsel->system_wide) 292 nfds += nr_cpus; 293 else 294 nfds += nr_cpus * nr_threads; 295 } 296 297 if (fdarray__available_entries(&evlist->pollfd) < nfds && 298 fdarray__grow(&evlist->pollfd, nfds) < 0) 299 return -ENOMEM; 300 301 return 0; 302 } 303 304 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 305 void *ptr, short revent) 306 { 307 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 308 309 if (pos >= 0) { 310 evlist->pollfd.priv[pos].ptr = ptr; 311 fcntl(fd, F_SETFL, O_NONBLOCK); 312 } 313 314 return pos; 315 } 316 317 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 318 void *arg __maybe_unused) 319 { 320 struct perf_mmap *map = fda->priv[fd].ptr; 321 322 if (map) 323 perf_mmap__put(map); 324 } 325 326 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 327 { 328 return fdarray__filter(&evlist->pollfd, revents_and_mask, 329 perf_evlist__munmap_filtered, NULL); 330 } 331 332 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 333 { 334 return fdarray__poll(&evlist->pollfd, timeout); 335 } 336 337 static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) 338 { 339 int i; 340 struct perf_mmap *map; 341 342 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 343 if (!map) 344 return NULL; 345 346 for (i = 0; i < evlist->nr_mmaps; i++) { 347 struct perf_mmap *prev = i ? &map[i - 1] : NULL; 348 349 /* 350 * When the perf_mmap() call is made we grab one refcount, plus 351 * one extra to let perf_mmap__consume() get the last 352 * events after all real references (perf_mmap__get()) are 353 * dropped. 354 * 355 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 356 * thus does perf_mmap__get() on it. 357 */ 358 perf_mmap__init(&map[i], prev, overwrite, NULL); 359 } 360 361 return map; 362 } 363 364 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 365 struct perf_evsel *evsel, int idx, int cpu, 366 int thread) 367 { 368 struct perf_sample_id *sid = SID(evsel, cpu, thread); 369 370 sid->idx = idx; 371 if (evlist->cpus && cpu >= 0) 372 sid->cpu = evlist->cpus->map[cpu]; 373 else 374 sid->cpu = -1; 375 if (!evsel->system_wide && evlist->threads && thread >= 0) 376 sid->tid = perf_thread_map__pid(evlist->threads, thread); 377 else 378 sid->tid = -1; 379 } 380 381 static struct perf_mmap* 382 perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) 383 { 384 struct perf_mmap *maps; 385 386 maps = overwrite ? evlist->mmap_ovw : evlist->mmap; 387 388 if (!maps) { 389 maps = perf_evlist__alloc_mmap(evlist, overwrite); 390 if (!maps) 391 return NULL; 392 393 if (overwrite) 394 evlist->mmap_ovw = maps; 395 else 396 evlist->mmap = maps; 397 } 398 399 return &maps[idx]; 400 } 401 402 #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) 403 404 static int 405 perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, 406 int output, int cpu) 407 { 408 return perf_mmap__mmap(map, mp, output, cpu); 409 } 410 411 static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map, 412 bool overwrite) 413 { 414 if (overwrite) 415 evlist->mmap_ovw_first = map; 416 else 417 evlist->mmap_first = map; 418 } 419 420 static int 421 mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 422 int idx, struct perf_mmap_param *mp, int cpu_idx, 423 int thread, int *_output, int *_output_overwrite) 424 { 425 int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); 426 struct perf_evsel *evsel; 427 int revent; 428 429 perf_evlist__for_each_entry(evlist, evsel) { 430 bool overwrite = evsel->attr.write_backward; 431 struct perf_mmap *map; 432 int *output, fd, cpu; 433 434 if (evsel->system_wide && thread) 435 continue; 436 437 cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); 438 if (cpu == -1) 439 continue; 440 441 map = ops->get(evlist, overwrite, idx); 442 if (map == NULL) 443 return -ENOMEM; 444 445 if (overwrite) { 446 mp->prot = PROT_READ; 447 output = _output_overwrite; 448 } else { 449 mp->prot = PROT_READ | PROT_WRITE; 450 output = _output; 451 } 452 453 fd = FD(evsel, cpu, thread); 454 455 if (*output == -1) { 456 *output = fd; 457 458 /* 459 * The last one will be done at perf_mmap__consume(), so that we 460 * make sure we don't prevent tools from consuming every last event in 461 * the ring buffer. 462 * 463 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 464 * anymore, but the last events for it are still in the ring buffer, 465 * waiting to be consumed. 466 * 467 * Tools can chose to ignore this at their own discretion, but the 468 * evlist layer can't just drop it when filtering events in 469 * perf_evlist__filter_pollfd(). 470 */ 471 refcount_set(&map->refcnt, 2); 472 473 if (ops->mmap(map, mp, *output, evlist_cpu) < 0) 474 return -1; 475 476 if (!idx) 477 perf_evlist__set_mmap_first(evlist, map, overwrite); 478 } else { 479 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 480 return -1; 481 482 perf_mmap__get(map); 483 } 484 485 revent = !overwrite ? POLLIN : 0; 486 487 if (!evsel->system_wide && 488 perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { 489 perf_mmap__put(map); 490 return -1; 491 } 492 493 if (evsel->attr.read_format & PERF_FORMAT_ID) { 494 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 495 fd) < 0) 496 return -1; 497 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 498 thread); 499 } 500 } 501 502 return 0; 503 } 504 505 static int 506 mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 507 struct perf_mmap_param *mp) 508 { 509 int thread; 510 int nr_threads = perf_thread_map__nr(evlist->threads); 511 512 for (thread = 0; thread < nr_threads; thread++) { 513 int output = -1; 514 int output_overwrite = -1; 515 516 if (ops->idx) 517 ops->idx(evlist, mp, thread, false); 518 519 if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, 520 &output, &output_overwrite)) 521 goto out_unmap; 522 } 523 524 return 0; 525 526 out_unmap: 527 perf_evlist__munmap(evlist); 528 return -1; 529 } 530 531 static int 532 mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 533 struct perf_mmap_param *mp) 534 { 535 int nr_threads = perf_thread_map__nr(evlist->threads); 536 int nr_cpus = perf_cpu_map__nr(evlist->cpus); 537 int cpu, thread; 538 539 for (cpu = 0; cpu < nr_cpus; cpu++) { 540 int output = -1; 541 int output_overwrite = -1; 542 543 if (ops->idx) 544 ops->idx(evlist, mp, cpu, true); 545 546 for (thread = 0; thread < nr_threads; thread++) { 547 if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, 548 thread, &output, &output_overwrite)) 549 goto out_unmap; 550 } 551 } 552 553 return 0; 554 555 out_unmap: 556 perf_evlist__munmap(evlist); 557 return -1; 558 } 559 560 static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) 561 { 562 int nr_mmaps; 563 564 nr_mmaps = perf_cpu_map__nr(evlist->cpus); 565 if (perf_cpu_map__empty(evlist->cpus)) 566 nr_mmaps = perf_thread_map__nr(evlist->threads); 567 568 return nr_mmaps; 569 } 570 571 int perf_evlist__mmap_ops(struct perf_evlist *evlist, 572 struct perf_evlist_mmap_ops *ops, 573 struct perf_mmap_param *mp) 574 { 575 struct perf_evsel *evsel; 576 const struct perf_cpu_map *cpus = evlist->cpus; 577 const struct perf_thread_map *threads = evlist->threads; 578 579 if (!ops || !ops->get || !ops->mmap) 580 return -EINVAL; 581 582 mp->mask = evlist->mmap_len - page_size - 1; 583 584 evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist); 585 586 perf_evlist__for_each_entry(evlist, evsel) { 587 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 588 evsel->sample_id == NULL && 589 perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) 590 return -ENOMEM; 591 } 592 593 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 594 return -ENOMEM; 595 596 if (perf_cpu_map__empty(cpus)) 597 return mmap_per_thread(evlist, ops, mp); 598 599 return mmap_per_cpu(evlist, ops, mp); 600 } 601 602 int perf_evlist__mmap(struct perf_evlist *evlist, int pages) 603 { 604 struct perf_mmap_param mp; 605 struct perf_evlist_mmap_ops ops = { 606 .get = perf_evlist__mmap_cb_get, 607 .mmap = perf_evlist__mmap_cb_mmap, 608 }; 609 610 evlist->mmap_len = (pages + 1) * page_size; 611 612 return perf_evlist__mmap_ops(evlist, &ops, &mp); 613 } 614 615 void perf_evlist__munmap(struct perf_evlist *evlist) 616 { 617 int i; 618 619 if (evlist->mmap) { 620 for (i = 0; i < evlist->nr_mmaps; i++) 621 perf_mmap__munmap(&evlist->mmap[i]); 622 } 623 624 if (evlist->mmap_ovw) { 625 for (i = 0; i < evlist->nr_mmaps; i++) 626 perf_mmap__munmap(&evlist->mmap_ovw[i]); 627 } 628 629 zfree(&evlist->mmap); 630 zfree(&evlist->mmap_ovw); 631 } 632 633 struct perf_mmap* 634 perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, 635 bool overwrite) 636 { 637 if (map) 638 return map->next; 639 640 return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; 641 } 642