1 // SPDX-License-Identifier: GPL-2.0 2 #include <perf/evlist.h> 3 #include <perf/evsel.h> 4 #include <linux/bitops.h> 5 #include <linux/list.h> 6 #include <linux/hash.h> 7 #include <sys/ioctl.h> 8 #include <internal/evlist.h> 9 #include <internal/evsel.h> 10 #include <internal/xyarray.h> 11 #include <internal/mmap.h> 12 #include <internal/cpumap.h> 13 #include <internal/threadmap.h> 14 #include <internal/lib.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <errno.h> 18 #include <unistd.h> 19 #include <fcntl.h> 20 #include <signal.h> 21 #include <poll.h> 22 #include <sys/mman.h> 23 #include <perf/cpumap.h> 24 #include <perf/threadmap.h> 25 #include <api/fd/array.h> 26 27 void perf_evlist__init(struct perf_evlist *evlist) 28 { 29 int i; 30 31 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 32 INIT_HLIST_HEAD(&evlist->heads[i]); 33 INIT_LIST_HEAD(&evlist->entries); 34 evlist->nr_entries = 0; 35 fdarray__init(&evlist->pollfd, 64); 36 } 37 38 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 39 struct perf_evsel *evsel) 40 { 41 /* 42 * We already have cpus for evsel (via PMU sysfs) so 43 * keep it, if there's no target cpu list defined. 44 */ 45 if (!evsel->own_cpus || evlist->has_user_cpus) { 46 perf_cpu_map__put(evsel->cpus); 47 evsel->cpus = perf_cpu_map__get(evlist->cpus); 48 } else if (evsel->cpus != evsel->own_cpus) { 49 perf_cpu_map__put(evsel->cpus); 50 evsel->cpus = perf_cpu_map__get(evsel->own_cpus); 51 } 52 53 perf_thread_map__put(evsel->threads); 54 evsel->threads = perf_thread_map__get(evlist->threads); 55 evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); 56 } 57 58 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 59 { 60 struct perf_evsel *evsel; 61 62 perf_evlist__for_each_evsel(evlist, evsel) 63 __perf_evlist__propagate_maps(evlist, evsel); 64 } 65 66 void perf_evlist__add(struct perf_evlist *evlist, 67 struct perf_evsel *evsel) 68 { 69 list_add_tail(&evsel->node, &evlist->entries); 70 evlist->nr_entries += 1; 71 __perf_evlist__propagate_maps(evlist, evsel); 72 } 73 74 void perf_evlist__remove(struct perf_evlist *evlist, 75 struct perf_evsel *evsel) 76 { 77 list_del_init(&evsel->node); 78 evlist->nr_entries -= 1; 79 } 80 81 struct perf_evlist *perf_evlist__new(void) 82 { 83 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 84 85 if (evlist != NULL) 86 perf_evlist__init(evlist); 87 88 return evlist; 89 } 90 91 struct perf_evsel * 92 perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) 93 { 94 struct perf_evsel *next; 95 96 if (!prev) { 97 next = list_first_entry(&evlist->entries, 98 struct perf_evsel, 99 node); 100 } else { 101 next = list_next_entry(prev, node); 102 } 103 104 /* Empty list is noticed here so don't need checking on entry. */ 105 if (&next->node == &evlist->entries) 106 return NULL; 107 108 return next; 109 } 110 111 static void perf_evlist__purge(struct perf_evlist *evlist) 112 { 113 struct perf_evsel *pos, *n; 114 115 perf_evlist__for_each_entry_safe(evlist, n, pos) { 116 list_del_init(&pos->node); 117 perf_evsel__delete(pos); 118 } 119 120 evlist->nr_entries = 0; 121 } 122 123 void perf_evlist__exit(struct perf_evlist *evlist) 124 { 125 perf_cpu_map__put(evlist->cpus); 126 perf_thread_map__put(evlist->threads); 127 evlist->cpus = NULL; 128 evlist->threads = NULL; 129 fdarray__exit(&evlist->pollfd); 130 } 131 132 void perf_evlist__delete(struct perf_evlist *evlist) 133 { 134 if (evlist == NULL) 135 return; 136 137 perf_evlist__munmap(evlist); 138 perf_evlist__close(evlist); 139 perf_evlist__purge(evlist); 140 perf_evlist__exit(evlist); 141 free(evlist); 142 } 143 144 void perf_evlist__set_maps(struct perf_evlist *evlist, 145 struct perf_cpu_map *cpus, 146 struct perf_thread_map *threads) 147 { 148 /* 149 * Allow for the possibility that one or another of the maps isn't being 150 * changed i.e. don't put it. Note we are assuming the maps that are 151 * being applied are brand new and evlist is taking ownership of the 152 * original reference count of 1. If that is not the case it is up to 153 * the caller to increase the reference count. 154 */ 155 if (cpus != evlist->cpus) { 156 perf_cpu_map__put(evlist->cpus); 157 evlist->cpus = perf_cpu_map__get(cpus); 158 } 159 160 if (threads != evlist->threads) { 161 perf_thread_map__put(evlist->threads); 162 evlist->threads = perf_thread_map__get(threads); 163 } 164 165 if (!evlist->all_cpus && cpus) 166 evlist->all_cpus = perf_cpu_map__get(cpus); 167 168 perf_evlist__propagate_maps(evlist); 169 } 170 171 int perf_evlist__open(struct perf_evlist *evlist) 172 { 173 struct perf_evsel *evsel; 174 int err; 175 176 perf_evlist__for_each_entry(evlist, evsel) { 177 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 178 if (err < 0) 179 goto out_err; 180 } 181 182 return 0; 183 184 out_err: 185 perf_evlist__close(evlist); 186 return err; 187 } 188 189 void perf_evlist__close(struct perf_evlist *evlist) 190 { 191 struct perf_evsel *evsel; 192 193 perf_evlist__for_each_entry_reverse(evlist, evsel) 194 perf_evsel__close(evsel); 195 } 196 197 void perf_evlist__enable(struct perf_evlist *evlist) 198 { 199 struct perf_evsel *evsel; 200 201 perf_evlist__for_each_entry(evlist, evsel) 202 perf_evsel__enable(evsel); 203 } 204 205 void perf_evlist__disable(struct perf_evlist *evlist) 206 { 207 struct perf_evsel *evsel; 208 209 perf_evlist__for_each_entry(evlist, evsel) 210 perf_evsel__disable(evsel); 211 } 212 213 u64 perf_evlist__read_format(struct perf_evlist *evlist) 214 { 215 struct perf_evsel *first = perf_evlist__first(evlist); 216 217 return first->attr.read_format; 218 } 219 220 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 221 222 static void perf_evlist__id_hash(struct perf_evlist *evlist, 223 struct perf_evsel *evsel, 224 int cpu, int thread, u64 id) 225 { 226 int hash; 227 struct perf_sample_id *sid = SID(evsel, cpu, thread); 228 229 sid->id = id; 230 sid->evsel = evsel; 231 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 232 hlist_add_head(&sid->node, &evlist->heads[hash]); 233 } 234 235 void perf_evlist__id_add(struct perf_evlist *evlist, 236 struct perf_evsel *evsel, 237 int cpu, int thread, u64 id) 238 { 239 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 240 evsel->id[evsel->ids++] = id; 241 } 242 243 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 244 struct perf_evsel *evsel, 245 int cpu, int thread, int fd) 246 { 247 u64 read_data[4] = { 0, }; 248 int id_idx = 1; /* The first entry is the counter value */ 249 u64 id; 250 int ret; 251 252 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 253 if (!ret) 254 goto add; 255 256 if (errno != ENOTTY) 257 return -1; 258 259 /* Legacy way to get event id.. All hail to old kernels! */ 260 261 /* 262 * This way does not work with group format read, so bail 263 * out in that case. 264 */ 265 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 266 return -1; 267 268 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 269 read(fd, &read_data, sizeof(read_data)) == -1) 270 return -1; 271 272 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 273 ++id_idx; 274 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 275 ++id_idx; 276 277 id = read_data[id_idx]; 278 279 add: 280 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 281 return 0; 282 } 283 284 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 285 { 286 int nr_cpus = perf_cpu_map__nr(evlist->cpus); 287 int nr_threads = perf_thread_map__nr(evlist->threads); 288 int nfds = 0; 289 struct perf_evsel *evsel; 290 291 perf_evlist__for_each_entry(evlist, evsel) { 292 if (evsel->system_wide) 293 nfds += nr_cpus; 294 else 295 nfds += nr_cpus * nr_threads; 296 } 297 298 if (fdarray__available_entries(&evlist->pollfd) < nfds && 299 fdarray__grow(&evlist->pollfd, nfds) < 0) 300 return -ENOMEM; 301 302 return 0; 303 } 304 305 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 306 void *ptr, short revent) 307 { 308 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 309 310 if (pos >= 0) { 311 evlist->pollfd.priv[pos].ptr = ptr; 312 fcntl(fd, F_SETFL, O_NONBLOCK); 313 } 314 315 return pos; 316 } 317 318 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 319 void *arg __maybe_unused) 320 { 321 struct perf_mmap *map = fda->priv[fd].ptr; 322 323 if (map) 324 perf_mmap__put(map); 325 } 326 327 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 328 { 329 return fdarray__filter(&evlist->pollfd, revents_and_mask, 330 perf_evlist__munmap_filtered, NULL); 331 } 332 333 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 334 { 335 return fdarray__poll(&evlist->pollfd, timeout); 336 } 337 338 static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) 339 { 340 int i; 341 struct perf_mmap *map; 342 343 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 344 if (!map) 345 return NULL; 346 347 for (i = 0; i < evlist->nr_mmaps; i++) { 348 struct perf_mmap *prev = i ? &map[i - 1] : NULL; 349 350 /* 351 * When the perf_mmap() call is made we grab one refcount, plus 352 * one extra to let perf_mmap__consume() get the last 353 * events after all real references (perf_mmap__get()) are 354 * dropped. 355 * 356 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 357 * thus does perf_mmap__get() on it. 358 */ 359 perf_mmap__init(&map[i], prev, overwrite, NULL); 360 } 361 362 return map; 363 } 364 365 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 366 struct perf_evsel *evsel, int idx, int cpu, 367 int thread) 368 { 369 struct perf_sample_id *sid = SID(evsel, cpu, thread); 370 371 sid->idx = idx; 372 if (evlist->cpus && cpu >= 0) 373 sid->cpu = evlist->cpus->map[cpu]; 374 else 375 sid->cpu = -1; 376 if (!evsel->system_wide && evlist->threads && thread >= 0) 377 sid->tid = perf_thread_map__pid(evlist->threads, thread); 378 else 379 sid->tid = -1; 380 } 381 382 static struct perf_mmap* 383 perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) 384 { 385 struct perf_mmap *maps; 386 387 maps = overwrite ? evlist->mmap_ovw : evlist->mmap; 388 389 if (!maps) { 390 maps = perf_evlist__alloc_mmap(evlist, overwrite); 391 if (!maps) 392 return NULL; 393 394 if (overwrite) 395 evlist->mmap_ovw = maps; 396 else 397 evlist->mmap = maps; 398 } 399 400 return &maps[idx]; 401 } 402 403 #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) 404 405 static int 406 perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, 407 int output, int cpu) 408 { 409 return perf_mmap__mmap(map, mp, output, cpu); 410 } 411 412 static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map, 413 bool overwrite) 414 { 415 if (overwrite) 416 evlist->mmap_ovw_first = map; 417 else 418 evlist->mmap_first = map; 419 } 420 421 static int 422 mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 423 int idx, struct perf_mmap_param *mp, int cpu_idx, 424 int thread, int *_output, int *_output_overwrite) 425 { 426 int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); 427 struct perf_evsel *evsel; 428 int revent; 429 430 perf_evlist__for_each_entry(evlist, evsel) { 431 bool overwrite = evsel->attr.write_backward; 432 struct perf_mmap *map; 433 int *output, fd, cpu; 434 435 if (evsel->system_wide && thread) 436 continue; 437 438 cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); 439 if (cpu == -1) 440 continue; 441 442 map = ops->get(evlist, overwrite, idx); 443 if (map == NULL) 444 return -ENOMEM; 445 446 if (overwrite) { 447 mp->prot = PROT_READ; 448 output = _output_overwrite; 449 } else { 450 mp->prot = PROT_READ | PROT_WRITE; 451 output = _output; 452 } 453 454 fd = FD(evsel, cpu, thread); 455 456 if (*output == -1) { 457 *output = fd; 458 459 /* 460 * The last one will be done at perf_mmap__consume(), so that we 461 * make sure we don't prevent tools from consuming every last event in 462 * the ring buffer. 463 * 464 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 465 * anymore, but the last events for it are still in the ring buffer, 466 * waiting to be consumed. 467 * 468 * Tools can chose to ignore this at their own discretion, but the 469 * evlist layer can't just drop it when filtering events in 470 * perf_evlist__filter_pollfd(). 471 */ 472 refcount_set(&map->refcnt, 2); 473 474 if (ops->mmap(map, mp, *output, evlist_cpu) < 0) 475 return -1; 476 477 if (!idx) 478 perf_evlist__set_mmap_first(evlist, map, overwrite); 479 } else { 480 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 481 return -1; 482 483 perf_mmap__get(map); 484 } 485 486 revent = !overwrite ? POLLIN : 0; 487 488 if (!evsel->system_wide && 489 perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { 490 perf_mmap__put(map); 491 return -1; 492 } 493 494 if (evsel->attr.read_format & PERF_FORMAT_ID) { 495 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 496 fd) < 0) 497 return -1; 498 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 499 thread); 500 } 501 } 502 503 return 0; 504 } 505 506 static int 507 mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 508 struct perf_mmap_param *mp) 509 { 510 int thread; 511 int nr_threads = perf_thread_map__nr(evlist->threads); 512 513 for (thread = 0; thread < nr_threads; thread++) { 514 int output = -1; 515 int output_overwrite = -1; 516 517 if (ops->idx) 518 ops->idx(evlist, mp, thread, false); 519 520 if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, 521 &output, &output_overwrite)) 522 goto out_unmap; 523 } 524 525 return 0; 526 527 out_unmap: 528 perf_evlist__munmap(evlist); 529 return -1; 530 } 531 532 static int 533 mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, 534 struct perf_mmap_param *mp) 535 { 536 int nr_threads = perf_thread_map__nr(evlist->threads); 537 int nr_cpus = perf_cpu_map__nr(evlist->cpus); 538 int cpu, thread; 539 540 for (cpu = 0; cpu < nr_cpus; cpu++) { 541 int output = -1; 542 int output_overwrite = -1; 543 544 if (ops->idx) 545 ops->idx(evlist, mp, cpu, true); 546 547 for (thread = 0; thread < nr_threads; thread++) { 548 if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, 549 thread, &output, &output_overwrite)) 550 goto out_unmap; 551 } 552 } 553 554 return 0; 555 556 out_unmap: 557 perf_evlist__munmap(evlist); 558 return -1; 559 } 560 561 static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) 562 { 563 int nr_mmaps; 564 565 nr_mmaps = perf_cpu_map__nr(evlist->cpus); 566 if (perf_cpu_map__empty(evlist->cpus)) 567 nr_mmaps = perf_thread_map__nr(evlist->threads); 568 569 return nr_mmaps; 570 } 571 572 int perf_evlist__mmap_ops(struct perf_evlist *evlist, 573 struct perf_evlist_mmap_ops *ops, 574 struct perf_mmap_param *mp) 575 { 576 struct perf_evsel *evsel; 577 const struct perf_cpu_map *cpus = evlist->cpus; 578 const struct perf_thread_map *threads = evlist->threads; 579 580 if (!ops || !ops->get || !ops->mmap) 581 return -EINVAL; 582 583 mp->mask = evlist->mmap_len - page_size - 1; 584 585 evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist); 586 587 perf_evlist__for_each_entry(evlist, evsel) { 588 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 589 evsel->sample_id == NULL && 590 perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) 591 return -ENOMEM; 592 } 593 594 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 595 return -ENOMEM; 596 597 if (perf_cpu_map__empty(cpus)) 598 return mmap_per_thread(evlist, ops, mp); 599 600 return mmap_per_cpu(evlist, ops, mp); 601 } 602 603 int perf_evlist__mmap(struct perf_evlist *evlist, int pages) 604 { 605 struct perf_mmap_param mp; 606 struct perf_evlist_mmap_ops ops = { 607 .get = perf_evlist__mmap_cb_get, 608 .mmap = perf_evlist__mmap_cb_mmap, 609 }; 610 611 evlist->mmap_len = (pages + 1) * page_size; 612 613 return perf_evlist__mmap_ops(evlist, &ops, &mp); 614 } 615 616 void perf_evlist__munmap(struct perf_evlist *evlist) 617 { 618 int i; 619 620 if (evlist->mmap) { 621 for (i = 0; i < evlist->nr_mmaps; i++) 622 perf_mmap__munmap(&evlist->mmap[i]); 623 } 624 625 if (evlist->mmap_ovw) { 626 for (i = 0; i < evlist->nr_mmaps; i++) 627 perf_mmap__munmap(&evlist->mmap_ovw[i]); 628 } 629 630 zfree(&evlist->mmap); 631 zfree(&evlist->mmap_ovw); 632 } 633 634 struct perf_mmap* 635 perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, 636 bool overwrite) 637 { 638 if (map) 639 return map->next; 640 641 return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; 642 } 643