1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 4 * 5 * Parts came from builtin-{top,stat,record}.c, see those files for further 6 * copyright notes. 7 */ 8 #include <api/fs/fs.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "units.h" 19 #include "util.h" 20 #include "../perf.h" 21 #include "asm/bug.h" 22 #include "bpf-event.h" 23 #include <signal.h> 24 #include <unistd.h> 25 #include <sched.h> 26 #include <stdlib.h> 27 28 #include "parse-events.h" 29 #include <subcmd/parse-options.h> 30 31 #include <fcntl.h> 32 #include <sys/ioctl.h> 33 #include <sys/mman.h> 34 35 #include <linux/bitops.h> 36 #include <linux/hash.h> 37 #include <linux/log2.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/zalloc.h> 41 #include <perf/evlist.h> 42 #include <perf/evsel.h> 43 #include <perf/cpumap.h> 44 45 #include <internal/xyarray.h> 46 47 #ifdef LACKS_SIGQUEUE_PROTOTYPE 48 int sigqueue(pid_t pid, int sig, const union sigval value); 49 #endif 50 51 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 52 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 53 54 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, 55 struct perf_thread_map *threads) 56 { 57 int i; 58 59 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 60 INIT_HLIST_HEAD(&evlist->heads[i]); 61 perf_evlist__init(&evlist->core); 62 perf_evlist__set_maps(&evlist->core, cpus, threads); 63 fdarray__init(&evlist->pollfd, 64); 64 evlist->workload.pid = -1; 65 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 66 } 67 68 struct evlist *evlist__new(void) 69 { 70 struct evlist *evlist = zalloc(sizeof(*evlist)); 71 72 if (evlist != NULL) 73 evlist__init(evlist, NULL, NULL); 74 75 return evlist; 76 } 77 78 struct evlist *perf_evlist__new_default(void) 79 { 80 struct evlist *evlist = evlist__new(); 81 82 if (evlist && perf_evlist__add_default(evlist)) { 83 evlist__delete(evlist); 84 evlist = NULL; 85 } 86 87 return evlist; 88 } 89 90 struct evlist *perf_evlist__new_dummy(void) 91 { 92 struct evlist *evlist = evlist__new(); 93 94 if (evlist && perf_evlist__add_dummy(evlist)) { 95 evlist__delete(evlist); 96 evlist = NULL; 97 } 98 99 return evlist; 100 } 101 102 /** 103 * perf_evlist__set_id_pos - set the positions of event ids. 104 * @evlist: selected event list 105 * 106 * Events with compatible sample types all have the same id_pos 107 * and is_pos. For convenience, put a copy on evlist. 108 */ 109 void perf_evlist__set_id_pos(struct evlist *evlist) 110 { 111 struct evsel *first = perf_evlist__first(evlist); 112 113 evlist->id_pos = first->id_pos; 114 evlist->is_pos = first->is_pos; 115 } 116 117 static void perf_evlist__update_id_pos(struct evlist *evlist) 118 { 119 struct evsel *evsel; 120 121 evlist__for_each_entry(evlist, evsel) 122 perf_evsel__calc_id_pos(evsel); 123 124 perf_evlist__set_id_pos(evlist); 125 } 126 127 static void perf_evlist__purge(struct evlist *evlist) 128 { 129 struct evsel *pos, *n; 130 131 evlist__for_each_entry_safe(evlist, n, pos) { 132 list_del_init(&pos->core.node); 133 pos->evlist = NULL; 134 evsel__delete(pos); 135 } 136 137 evlist->core.nr_entries = 0; 138 } 139 140 void perf_evlist__exit(struct evlist *evlist) 141 { 142 zfree(&evlist->mmap); 143 zfree(&evlist->overwrite_mmap); 144 fdarray__exit(&evlist->pollfd); 145 } 146 147 void evlist__delete(struct evlist *evlist) 148 { 149 if (evlist == NULL) 150 return; 151 152 perf_evlist__munmap(evlist); 153 evlist__close(evlist); 154 perf_cpu_map__put(evlist->core.cpus); 155 perf_thread_map__put(evlist->core.threads); 156 evlist->core.cpus = NULL; 157 evlist->core.threads = NULL; 158 perf_evlist__purge(evlist); 159 perf_evlist__exit(evlist); 160 free(evlist); 161 } 162 163 void evlist__add(struct evlist *evlist, struct evsel *entry) 164 { 165 entry->evlist = evlist; 166 entry->idx = evlist->core.nr_entries; 167 entry->tracking = !entry->idx; 168 169 perf_evlist__add(&evlist->core, &entry->core); 170 171 if (evlist->core.nr_entries == 1) 172 perf_evlist__set_id_pos(evlist); 173 } 174 175 void evlist__remove(struct evlist *evlist, struct evsel *evsel) 176 { 177 evsel->evlist = NULL; 178 perf_evlist__remove(&evlist->core, &evsel->core); 179 } 180 181 void perf_evlist__splice_list_tail(struct evlist *evlist, 182 struct list_head *list) 183 { 184 struct evsel *evsel, *temp; 185 186 __evlist__for_each_entry_safe(list, temp, evsel) { 187 list_del_init(&evsel->core.node); 188 evlist__add(evlist, evsel); 189 } 190 } 191 192 void __perf_evlist__set_leader(struct list_head *list) 193 { 194 struct evsel *evsel, *leader; 195 196 leader = list_entry(list->next, struct evsel, core.node); 197 evsel = list_entry(list->prev, struct evsel, core.node); 198 199 leader->core.nr_members = evsel->idx - leader->idx + 1; 200 201 __evlist__for_each_entry(list, evsel) { 202 evsel->leader = leader; 203 } 204 } 205 206 void perf_evlist__set_leader(struct evlist *evlist) 207 { 208 if (evlist->core.nr_entries) { 209 evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; 210 __perf_evlist__set_leader(&evlist->core.entries); 211 } 212 } 213 214 int __perf_evlist__add_default(struct evlist *evlist, bool precise) 215 { 216 struct evsel *evsel = perf_evsel__new_cycles(precise); 217 218 if (evsel == NULL) 219 return -ENOMEM; 220 221 evlist__add(evlist, evsel); 222 return 0; 223 } 224 225 int perf_evlist__add_dummy(struct evlist *evlist) 226 { 227 struct perf_event_attr attr = { 228 .type = PERF_TYPE_SOFTWARE, 229 .config = PERF_COUNT_SW_DUMMY, 230 .size = sizeof(attr), /* to capture ABI version */ 231 }; 232 struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); 233 234 if (evsel == NULL) 235 return -ENOMEM; 236 237 evlist__add(evlist, evsel); 238 return 0; 239 } 240 241 static int evlist__add_attrs(struct evlist *evlist, 242 struct perf_event_attr *attrs, size_t nr_attrs) 243 { 244 struct evsel *evsel, *n; 245 LIST_HEAD(head); 246 size_t i; 247 248 for (i = 0; i < nr_attrs; i++) { 249 evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); 250 if (evsel == NULL) 251 goto out_delete_partial_list; 252 list_add_tail(&evsel->core.node, &head); 253 } 254 255 perf_evlist__splice_list_tail(evlist, &head); 256 257 return 0; 258 259 out_delete_partial_list: 260 __evlist__for_each_entry_safe(&head, n, evsel) 261 evsel__delete(evsel); 262 return -1; 263 } 264 265 int __perf_evlist__add_default_attrs(struct evlist *evlist, 266 struct perf_event_attr *attrs, size_t nr_attrs) 267 { 268 size_t i; 269 270 for (i = 0; i < nr_attrs; i++) 271 event_attr_init(attrs + i); 272 273 return evlist__add_attrs(evlist, attrs, nr_attrs); 274 } 275 276 struct evsel * 277 perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) 278 { 279 struct evsel *evsel; 280 281 evlist__for_each_entry(evlist, evsel) { 282 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && 283 (int)evsel->core.attr.config == id) 284 return evsel; 285 } 286 287 return NULL; 288 } 289 290 struct evsel * 291 perf_evlist__find_tracepoint_by_name(struct evlist *evlist, 292 const char *name) 293 { 294 struct evsel *evsel; 295 296 evlist__for_each_entry(evlist, evsel) { 297 if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) && 298 (strcmp(evsel->name, name) == 0)) 299 return evsel; 300 } 301 302 return NULL; 303 } 304 305 int perf_evlist__add_newtp(struct evlist *evlist, 306 const char *sys, const char *name, void *handler) 307 { 308 struct evsel *evsel = perf_evsel__newtp(sys, name); 309 310 if (IS_ERR(evsel)) 311 return -1; 312 313 evsel->handler = handler; 314 evlist__add(evlist, evsel); 315 return 0; 316 } 317 318 static int perf_evlist__nr_threads(struct evlist *evlist, 319 struct evsel *evsel) 320 { 321 if (evsel->system_wide) 322 return 1; 323 else 324 return perf_thread_map__nr(evlist->core.threads); 325 } 326 327 void evlist__disable(struct evlist *evlist) 328 { 329 struct evsel *pos; 330 331 evlist__for_each_entry(evlist, pos) { 332 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd) 333 continue; 334 evsel__disable(pos); 335 } 336 337 evlist->enabled = false; 338 } 339 340 void evlist__enable(struct evlist *evlist) 341 { 342 struct evsel *pos; 343 344 evlist__for_each_entry(evlist, pos) { 345 if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) 346 continue; 347 evsel__enable(pos); 348 } 349 350 evlist->enabled = true; 351 } 352 353 void perf_evlist__toggle_enable(struct evlist *evlist) 354 { 355 (evlist->enabled ? evlist__disable : evlist__enable)(evlist); 356 } 357 358 static int perf_evlist__enable_event_cpu(struct evlist *evlist, 359 struct evsel *evsel, int cpu) 360 { 361 int thread; 362 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 363 364 if (!evsel->core.fd) 365 return -EINVAL; 366 367 for (thread = 0; thread < nr_threads; thread++) { 368 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 369 if (err) 370 return err; 371 } 372 return 0; 373 } 374 375 static int perf_evlist__enable_event_thread(struct evlist *evlist, 376 struct evsel *evsel, 377 int thread) 378 { 379 int cpu; 380 int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); 381 382 if (!evsel->core.fd) 383 return -EINVAL; 384 385 for (cpu = 0; cpu < nr_cpus; cpu++) { 386 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 387 if (err) 388 return err; 389 } 390 return 0; 391 } 392 393 int perf_evlist__enable_event_idx(struct evlist *evlist, 394 struct evsel *evsel, int idx) 395 { 396 bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus); 397 398 if (per_cpu_mmaps) 399 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 400 else 401 return perf_evlist__enable_event_thread(evlist, evsel, idx); 402 } 403 404 int perf_evlist__alloc_pollfd(struct evlist *evlist) 405 { 406 int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); 407 int nr_threads = perf_thread_map__nr(evlist->core.threads); 408 int nfds = 0; 409 struct evsel *evsel; 410 411 evlist__for_each_entry(evlist, evsel) { 412 if (evsel->system_wide) 413 nfds += nr_cpus; 414 else 415 nfds += nr_cpus * nr_threads; 416 } 417 418 if (fdarray__available_entries(&evlist->pollfd) < nfds && 419 fdarray__grow(&evlist->pollfd, nfds) < 0) 420 return -ENOMEM; 421 422 return 0; 423 } 424 425 static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, 426 struct perf_mmap *map, short revent) 427 { 428 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 429 /* 430 * Save the idx so that when we filter out fds POLLHUP'ed we can 431 * close the associated evlist->mmap[] entry. 432 */ 433 if (pos >= 0) { 434 evlist->pollfd.priv[pos].ptr = map; 435 436 fcntl(fd, F_SETFL, O_NONBLOCK); 437 } 438 439 return pos; 440 } 441 442 int perf_evlist__add_pollfd(struct evlist *evlist, int fd) 443 { 444 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 445 } 446 447 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 448 void *arg __maybe_unused) 449 { 450 struct perf_mmap *map = fda->priv[fd].ptr; 451 452 if (map) 453 perf_mmap__put(map); 454 } 455 456 int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) 457 { 458 return fdarray__filter(&evlist->pollfd, revents_and_mask, 459 perf_evlist__munmap_filtered, NULL); 460 } 461 462 int perf_evlist__poll(struct evlist *evlist, int timeout) 463 { 464 return fdarray__poll(&evlist->pollfd, timeout); 465 } 466 467 static void perf_evlist__id_hash(struct evlist *evlist, 468 struct evsel *evsel, 469 int cpu, int thread, u64 id) 470 { 471 int hash; 472 struct perf_sample_id *sid = SID(evsel, cpu, thread); 473 474 sid->id = id; 475 sid->evsel = evsel; 476 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 477 hlist_add_head(&sid->node, &evlist->heads[hash]); 478 } 479 480 void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, 481 int cpu, int thread, u64 id) 482 { 483 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 484 evsel->id[evsel->ids++] = id; 485 } 486 487 int perf_evlist__id_add_fd(struct evlist *evlist, 488 struct evsel *evsel, 489 int cpu, int thread, int fd) 490 { 491 u64 read_data[4] = { 0, }; 492 int id_idx = 1; /* The first entry is the counter value */ 493 u64 id; 494 int ret; 495 496 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 497 if (!ret) 498 goto add; 499 500 if (errno != ENOTTY) 501 return -1; 502 503 /* Legacy way to get event id.. All hail to old kernels! */ 504 505 /* 506 * This way does not work with group format read, so bail 507 * out in that case. 508 */ 509 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 510 return -1; 511 512 if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || 513 read(fd, &read_data, sizeof(read_data)) == -1) 514 return -1; 515 516 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 517 ++id_idx; 518 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 519 ++id_idx; 520 521 id = read_data[id_idx]; 522 523 add: 524 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 525 return 0; 526 } 527 528 static void perf_evlist__set_sid_idx(struct evlist *evlist, 529 struct evsel *evsel, int idx, int cpu, 530 int thread) 531 { 532 struct perf_sample_id *sid = SID(evsel, cpu, thread); 533 sid->idx = idx; 534 if (evlist->core.cpus && cpu >= 0) 535 sid->cpu = evlist->core.cpus->map[cpu]; 536 else 537 sid->cpu = -1; 538 if (!evsel->system_wide && evlist->core.threads && thread >= 0) 539 sid->tid = perf_thread_map__pid(evlist->core.threads, thread); 540 else 541 sid->tid = -1; 542 } 543 544 struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) 545 { 546 struct hlist_head *head; 547 struct perf_sample_id *sid; 548 int hash; 549 550 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 551 head = &evlist->heads[hash]; 552 553 hlist_for_each_entry(sid, head, node) 554 if (sid->id == id) 555 return sid; 556 557 return NULL; 558 } 559 560 struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) 561 { 562 struct perf_sample_id *sid; 563 564 if (evlist->core.nr_entries == 1 || !id) 565 return perf_evlist__first(evlist); 566 567 sid = perf_evlist__id2sid(evlist, id); 568 if (sid) 569 return sid->evsel; 570 571 if (!perf_evlist__sample_id_all(evlist)) 572 return perf_evlist__first(evlist); 573 574 return NULL; 575 } 576 577 struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, 578 u64 id) 579 { 580 struct perf_sample_id *sid; 581 582 if (!id) 583 return NULL; 584 585 sid = perf_evlist__id2sid(evlist, id); 586 if (sid) 587 return sid->evsel; 588 589 return NULL; 590 } 591 592 static int perf_evlist__event2id(struct evlist *evlist, 593 union perf_event *event, u64 *id) 594 { 595 const __u64 *array = event->sample.array; 596 ssize_t n; 597 598 n = (event->header.size - sizeof(event->header)) >> 3; 599 600 if (event->header.type == PERF_RECORD_SAMPLE) { 601 if (evlist->id_pos >= n) 602 return -1; 603 *id = array[evlist->id_pos]; 604 } else { 605 if (evlist->is_pos > n) 606 return -1; 607 n -= evlist->is_pos; 608 *id = array[n]; 609 } 610 return 0; 611 } 612 613 struct evsel *perf_evlist__event2evsel(struct evlist *evlist, 614 union perf_event *event) 615 { 616 struct evsel *first = perf_evlist__first(evlist); 617 struct hlist_head *head; 618 struct perf_sample_id *sid; 619 int hash; 620 u64 id; 621 622 if (evlist->core.nr_entries == 1) 623 return first; 624 625 if (!first->core.attr.sample_id_all && 626 event->header.type != PERF_RECORD_SAMPLE) 627 return first; 628 629 if (perf_evlist__event2id(evlist, event, &id)) 630 return NULL; 631 632 /* Synthesized events have an id of zero */ 633 if (!id) 634 return first; 635 636 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 637 head = &evlist->heads[hash]; 638 639 hlist_for_each_entry(sid, head, node) { 640 if (sid->id == id) 641 return sid->evsel; 642 } 643 return NULL; 644 } 645 646 static int perf_evlist__set_paused(struct evlist *evlist, bool value) 647 { 648 int i; 649 650 if (!evlist->overwrite_mmap) 651 return 0; 652 653 for (i = 0; i < evlist->nr_mmaps; i++) { 654 int fd = evlist->overwrite_mmap[i].fd; 655 int err; 656 657 if (fd < 0) 658 continue; 659 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 660 if (err) 661 return err; 662 } 663 return 0; 664 } 665 666 static int perf_evlist__pause(struct evlist *evlist) 667 { 668 return perf_evlist__set_paused(evlist, true); 669 } 670 671 static int perf_evlist__resume(struct evlist *evlist) 672 { 673 return perf_evlist__set_paused(evlist, false); 674 } 675 676 static void perf_evlist__munmap_nofree(struct evlist *evlist) 677 { 678 int i; 679 680 if (evlist->mmap) 681 for (i = 0; i < evlist->nr_mmaps; i++) 682 perf_mmap__munmap(&evlist->mmap[i]); 683 684 if (evlist->overwrite_mmap) 685 for (i = 0; i < evlist->nr_mmaps; i++) 686 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 687 } 688 689 void perf_evlist__munmap(struct evlist *evlist) 690 { 691 perf_evlist__munmap_nofree(evlist); 692 zfree(&evlist->mmap); 693 zfree(&evlist->overwrite_mmap); 694 } 695 696 static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, 697 bool overwrite) 698 { 699 int i; 700 struct perf_mmap *map; 701 702 evlist->nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); 703 if (perf_cpu_map__empty(evlist->core.cpus)) 704 evlist->nr_mmaps = perf_thread_map__nr(evlist->core.threads); 705 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 706 if (!map) 707 return NULL; 708 709 for (i = 0; i < evlist->nr_mmaps; i++) { 710 map[i].fd = -1; 711 map[i].overwrite = overwrite; 712 /* 713 * When the perf_mmap() call is made we grab one refcount, plus 714 * one extra to let perf_mmap__consume() get the last 715 * events after all real references (perf_mmap__get()) are 716 * dropped. 717 * 718 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 719 * thus does perf_mmap__get() on it. 720 */ 721 refcount_set(&map[i].refcnt, 0); 722 } 723 return map; 724 } 725 726 static bool 727 perf_evlist__should_poll(struct evlist *evlist __maybe_unused, 728 struct evsel *evsel) 729 { 730 if (evsel->core.attr.write_backward) 731 return false; 732 return true; 733 } 734 735 static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, 736 struct mmap_params *mp, int cpu_idx, 737 int thread, int *_output, int *_output_overwrite) 738 { 739 struct evsel *evsel; 740 int revent; 741 int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); 742 743 evlist__for_each_entry(evlist, evsel) { 744 struct perf_mmap *maps = evlist->mmap; 745 int *output = _output; 746 int fd; 747 int cpu; 748 749 mp->prot = PROT_READ | PROT_WRITE; 750 if (evsel->core.attr.write_backward) { 751 output = _output_overwrite; 752 maps = evlist->overwrite_mmap; 753 754 if (!maps) { 755 maps = perf_evlist__alloc_mmap(evlist, true); 756 if (!maps) 757 return -1; 758 evlist->overwrite_mmap = maps; 759 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 760 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 761 } 762 mp->prot &= ~PROT_WRITE; 763 } 764 765 if (evsel->system_wide && thread) 766 continue; 767 768 cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); 769 if (cpu == -1) 770 continue; 771 772 fd = FD(evsel, cpu, thread); 773 774 if (*output == -1) { 775 *output = fd; 776 777 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 778 return -1; 779 } else { 780 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 781 return -1; 782 783 perf_mmap__get(&maps[idx]); 784 } 785 786 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 787 788 /* 789 * The system_wide flag causes a selected event to be opened 790 * always without a pid. Consequently it will never get a 791 * POLLHUP, but it is used for tracking in combination with 792 * other events, so it should not need to be polled anyway. 793 * Therefore don't add it for polling. 794 */ 795 if (!evsel->system_wide && 796 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 797 perf_mmap__put(&maps[idx]); 798 return -1; 799 } 800 801 if (evsel->core.attr.read_format & PERF_FORMAT_ID) { 802 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 803 fd) < 0) 804 return -1; 805 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 806 thread); 807 } 808 } 809 810 return 0; 811 } 812 813 static int perf_evlist__mmap_per_cpu(struct evlist *evlist, 814 struct mmap_params *mp) 815 { 816 int cpu, thread; 817 int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); 818 int nr_threads = perf_thread_map__nr(evlist->core.threads); 819 820 pr_debug2("perf event ring buffer mmapped per cpu\n"); 821 for (cpu = 0; cpu < nr_cpus; cpu++) { 822 int output = -1; 823 int output_overwrite = -1; 824 825 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 826 true); 827 828 for (thread = 0; thread < nr_threads; thread++) { 829 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 830 thread, &output, &output_overwrite)) 831 goto out_unmap; 832 } 833 } 834 835 return 0; 836 837 out_unmap: 838 perf_evlist__munmap_nofree(evlist); 839 return -1; 840 } 841 842 static int perf_evlist__mmap_per_thread(struct evlist *evlist, 843 struct mmap_params *mp) 844 { 845 int thread; 846 int nr_threads = perf_thread_map__nr(evlist->core.threads); 847 848 pr_debug2("perf event ring buffer mmapped per thread\n"); 849 for (thread = 0; thread < nr_threads; thread++) { 850 int output = -1; 851 int output_overwrite = -1; 852 853 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 854 false); 855 856 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 857 &output, &output_overwrite)) 858 goto out_unmap; 859 } 860 861 return 0; 862 863 out_unmap: 864 perf_evlist__munmap_nofree(evlist); 865 return -1; 866 } 867 868 unsigned long perf_event_mlock_kb_in_pages(void) 869 { 870 unsigned long pages; 871 int max; 872 873 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 874 /* 875 * Pick a once upon a time good value, i.e. things look 876 * strange since we can't read a sysctl value, but lets not 877 * die yet... 878 */ 879 max = 512; 880 } else { 881 max -= (page_size / 1024); 882 } 883 884 pages = (max * 1024) / page_size; 885 if (!is_power_of_2(pages)) 886 pages = rounddown_pow_of_two(pages); 887 888 return pages; 889 } 890 891 size_t perf_evlist__mmap_size(unsigned long pages) 892 { 893 if (pages == UINT_MAX) 894 pages = perf_event_mlock_kb_in_pages(); 895 else if (!is_power_of_2(pages)) 896 return 0; 897 898 return (pages + 1) * page_size; 899 } 900 901 static long parse_pages_arg(const char *str, unsigned long min, 902 unsigned long max) 903 { 904 unsigned long pages, val; 905 static struct parse_tag tags[] = { 906 { .tag = 'B', .mult = 1 }, 907 { .tag = 'K', .mult = 1 << 10 }, 908 { .tag = 'M', .mult = 1 << 20 }, 909 { .tag = 'G', .mult = 1 << 30 }, 910 { .tag = 0 }, 911 }; 912 913 if (str == NULL) 914 return -EINVAL; 915 916 val = parse_tag_value(str, tags); 917 if (val != (unsigned long) -1) { 918 /* we got file size value */ 919 pages = PERF_ALIGN(val, page_size) / page_size; 920 } else { 921 /* we got pages count value */ 922 char *eptr; 923 pages = strtoul(str, &eptr, 10); 924 if (*eptr != '\0') 925 return -EINVAL; 926 } 927 928 if (pages == 0 && min == 0) { 929 /* leave number of pages at 0 */ 930 } else if (!is_power_of_2(pages)) { 931 char buf[100]; 932 933 /* round pages up to next power of 2 */ 934 pages = roundup_pow_of_two(pages); 935 if (!pages) 936 return -EINVAL; 937 938 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 939 pr_info("rounding mmap pages size to %s (%lu pages)\n", 940 buf, pages); 941 } 942 943 if (pages > max) 944 return -EINVAL; 945 946 return pages; 947 } 948 949 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 950 { 951 unsigned long max = UINT_MAX; 952 long pages; 953 954 if (max > SIZE_MAX / page_size) 955 max = SIZE_MAX / page_size; 956 957 pages = parse_pages_arg(str, 1, max); 958 if (pages < 0) { 959 pr_err("Invalid argument for --mmap_pages/-m\n"); 960 return -1; 961 } 962 963 *mmap_pages = pages; 964 return 0; 965 } 966 967 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 968 int unset __maybe_unused) 969 { 970 return __perf_evlist__parse_mmap_pages(opt->value, str); 971 } 972 973 /** 974 * perf_evlist__mmap_ex - Create mmaps to receive events. 975 * @evlist: list of events 976 * @pages: map length in pages 977 * @overwrite: overwrite older events? 978 * @auxtrace_pages - auxtrace map length in pages 979 * @auxtrace_overwrite - overwrite older auxtrace data? 980 * 981 * If @overwrite is %false the user needs to signal event consumption using 982 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 983 * automatically. 984 * 985 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 986 * consumption using auxtrace_mmap__write_tail(). 987 * 988 * Return: %0 on success, negative error code otherwise. 989 */ 990 int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, 991 unsigned int auxtrace_pages, 992 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 993 int comp_level) 994 { 995 struct evsel *evsel; 996 const struct perf_cpu_map *cpus = evlist->core.cpus; 997 const struct perf_thread_map *threads = evlist->core.threads; 998 /* 999 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1000 * Its value is decided by evsel's write_backward. 1001 * So &mp should not be passed through const pointer. 1002 */ 1003 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 1004 .comp_level = comp_level }; 1005 1006 if (!evlist->mmap) 1007 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1008 if (!evlist->mmap) 1009 return -ENOMEM; 1010 1011 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1012 return -ENOMEM; 1013 1014 evlist->mmap_len = perf_evlist__mmap_size(pages); 1015 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1016 mp.mask = evlist->mmap_len - page_size - 1; 1017 1018 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1019 auxtrace_pages, auxtrace_overwrite); 1020 1021 evlist__for_each_entry(evlist, evsel) { 1022 if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && 1023 evsel->sample_id == NULL && 1024 perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) 1025 return -ENOMEM; 1026 } 1027 1028 if (perf_cpu_map__empty(cpus)) 1029 return perf_evlist__mmap_per_thread(evlist, &mp); 1030 1031 return perf_evlist__mmap_per_cpu(evlist, &mp); 1032 } 1033 1034 int perf_evlist__mmap(struct evlist *evlist, unsigned int pages) 1035 { 1036 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); 1037 } 1038 1039 int perf_evlist__create_maps(struct evlist *evlist, struct target *target) 1040 { 1041 bool all_threads = (target->per_thread && target->system_wide); 1042 struct perf_cpu_map *cpus; 1043 struct perf_thread_map *threads; 1044 1045 /* 1046 * If specify '-a' and '--per-thread' to perf record, perf record 1047 * will override '--per-thread'. target->per_thread = false and 1048 * target->system_wide = true. 1049 * 1050 * If specify '--per-thread' only to perf record, 1051 * target->per_thread = true and target->system_wide = false. 1052 * 1053 * So target->per_thread && target->system_wide is false. 1054 * For perf record, thread_map__new_str doesn't call 1055 * thread_map__new_all_cpus. That will keep perf record's 1056 * current behavior. 1057 * 1058 * For perf stat, it allows the case that target->per_thread and 1059 * target->system_wide are all true. It means to collect system-wide 1060 * per-thread data. thread_map__new_str will call 1061 * thread_map__new_all_cpus to enumerate all threads. 1062 */ 1063 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1064 all_threads); 1065 1066 if (!threads) 1067 return -1; 1068 1069 if (target__uses_dummy_map(target)) 1070 cpus = perf_cpu_map__dummy_new(); 1071 else 1072 cpus = perf_cpu_map__new(target->cpu_list); 1073 1074 if (!cpus) 1075 goto out_delete_threads; 1076 1077 evlist->core.has_user_cpus = !!target->cpu_list; 1078 1079 perf_evlist__set_maps(&evlist->core, cpus, threads); 1080 1081 return 0; 1082 1083 out_delete_threads: 1084 perf_thread_map__put(threads); 1085 return -1; 1086 } 1087 1088 void __perf_evlist__set_sample_bit(struct evlist *evlist, 1089 enum perf_event_sample_format bit) 1090 { 1091 struct evsel *evsel; 1092 1093 evlist__for_each_entry(evlist, evsel) 1094 __perf_evsel__set_sample_bit(evsel, bit); 1095 } 1096 1097 void __perf_evlist__reset_sample_bit(struct evlist *evlist, 1098 enum perf_event_sample_format bit) 1099 { 1100 struct evsel *evsel; 1101 1102 evlist__for_each_entry(evlist, evsel) 1103 __perf_evsel__reset_sample_bit(evsel, bit); 1104 } 1105 1106 int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) 1107 { 1108 struct evsel *evsel; 1109 int err = 0; 1110 1111 evlist__for_each_entry(evlist, evsel) { 1112 if (evsel->filter == NULL) 1113 continue; 1114 1115 /* 1116 * filters only work for tracepoint event, which doesn't have cpu limit. 1117 * So evlist and evsel should always be same. 1118 */ 1119 err = perf_evsel__apply_filter(&evsel->core, evsel->filter); 1120 if (err) { 1121 *err_evsel = evsel; 1122 break; 1123 } 1124 } 1125 1126 return err; 1127 } 1128 1129 int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) 1130 { 1131 struct evsel *evsel; 1132 int err = 0; 1133 1134 evlist__for_each_entry(evlist, evsel) { 1135 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) 1136 continue; 1137 1138 err = perf_evsel__set_filter(evsel, filter); 1139 if (err) 1140 break; 1141 } 1142 1143 return err; 1144 } 1145 1146 int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1147 { 1148 char *filter; 1149 int ret = -1; 1150 size_t i; 1151 1152 for (i = 0; i < npids; ++i) { 1153 if (i == 0) { 1154 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1155 return -1; 1156 } else { 1157 char *tmp; 1158 1159 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1160 goto out_free; 1161 1162 free(filter); 1163 filter = tmp; 1164 } 1165 } 1166 1167 ret = perf_evlist__set_tp_filter(evlist, filter); 1168 out_free: 1169 free(filter); 1170 return ret; 1171 } 1172 1173 int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) 1174 { 1175 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1176 } 1177 1178 bool perf_evlist__valid_sample_type(struct evlist *evlist) 1179 { 1180 struct evsel *pos; 1181 1182 if (evlist->core.nr_entries == 1) 1183 return true; 1184 1185 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1186 return false; 1187 1188 evlist__for_each_entry(evlist, pos) { 1189 if (pos->id_pos != evlist->id_pos || 1190 pos->is_pos != evlist->is_pos) 1191 return false; 1192 } 1193 1194 return true; 1195 } 1196 1197 u64 __perf_evlist__combined_sample_type(struct evlist *evlist) 1198 { 1199 struct evsel *evsel; 1200 1201 if (evlist->combined_sample_type) 1202 return evlist->combined_sample_type; 1203 1204 evlist__for_each_entry(evlist, evsel) 1205 evlist->combined_sample_type |= evsel->core.attr.sample_type; 1206 1207 return evlist->combined_sample_type; 1208 } 1209 1210 u64 perf_evlist__combined_sample_type(struct evlist *evlist) 1211 { 1212 evlist->combined_sample_type = 0; 1213 return __perf_evlist__combined_sample_type(evlist); 1214 } 1215 1216 u64 perf_evlist__combined_branch_type(struct evlist *evlist) 1217 { 1218 struct evsel *evsel; 1219 u64 branch_type = 0; 1220 1221 evlist__for_each_entry(evlist, evsel) 1222 branch_type |= evsel->core.attr.branch_sample_type; 1223 return branch_type; 1224 } 1225 1226 bool perf_evlist__valid_read_format(struct evlist *evlist) 1227 { 1228 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1229 u64 read_format = first->core.attr.read_format; 1230 u64 sample_type = first->core.attr.sample_type; 1231 1232 evlist__for_each_entry(evlist, pos) { 1233 if (read_format != pos->core.attr.read_format) 1234 return false; 1235 } 1236 1237 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1238 if ((sample_type & PERF_SAMPLE_READ) && 1239 !(read_format & PERF_FORMAT_ID)) { 1240 return false; 1241 } 1242 1243 return true; 1244 } 1245 1246 u64 perf_evlist__read_format(struct evlist *evlist) 1247 { 1248 struct evsel *first = perf_evlist__first(evlist); 1249 return first->core.attr.read_format; 1250 } 1251 1252 u16 perf_evlist__id_hdr_size(struct evlist *evlist) 1253 { 1254 struct evsel *first = perf_evlist__first(evlist); 1255 struct perf_sample *data; 1256 u64 sample_type; 1257 u16 size = 0; 1258 1259 if (!first->core.attr.sample_id_all) 1260 goto out; 1261 1262 sample_type = first->core.attr.sample_type; 1263 1264 if (sample_type & PERF_SAMPLE_TID) 1265 size += sizeof(data->tid) * 2; 1266 1267 if (sample_type & PERF_SAMPLE_TIME) 1268 size += sizeof(data->time); 1269 1270 if (sample_type & PERF_SAMPLE_ID) 1271 size += sizeof(data->id); 1272 1273 if (sample_type & PERF_SAMPLE_STREAM_ID) 1274 size += sizeof(data->stream_id); 1275 1276 if (sample_type & PERF_SAMPLE_CPU) 1277 size += sizeof(data->cpu) * 2; 1278 1279 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1280 size += sizeof(data->id); 1281 out: 1282 return size; 1283 } 1284 1285 bool perf_evlist__valid_sample_id_all(struct evlist *evlist) 1286 { 1287 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1288 1289 evlist__for_each_entry_continue(evlist, pos) { 1290 if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all) 1291 return false; 1292 } 1293 1294 return true; 1295 } 1296 1297 bool perf_evlist__sample_id_all(struct evlist *evlist) 1298 { 1299 struct evsel *first = perf_evlist__first(evlist); 1300 return first->core.attr.sample_id_all; 1301 } 1302 1303 void perf_evlist__set_selected(struct evlist *evlist, 1304 struct evsel *evsel) 1305 { 1306 evlist->selected = evsel; 1307 } 1308 1309 void evlist__close(struct evlist *evlist) 1310 { 1311 struct evsel *evsel; 1312 1313 evlist__for_each_entry_reverse(evlist, evsel) 1314 evsel__close(evsel); 1315 } 1316 1317 static int perf_evlist__create_syswide_maps(struct evlist *evlist) 1318 { 1319 struct perf_cpu_map *cpus; 1320 struct perf_thread_map *threads; 1321 int err = -ENOMEM; 1322 1323 /* 1324 * Try reading /sys/devices/system/cpu/online to get 1325 * an all cpus map. 1326 * 1327 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1328 * code needs an overhaul to properly forward the 1329 * error, and we may not want to do that fallback to a 1330 * default cpu identity map :-\ 1331 */ 1332 cpus = perf_cpu_map__new(NULL); 1333 if (!cpus) 1334 goto out; 1335 1336 threads = perf_thread_map__new_dummy(); 1337 if (!threads) 1338 goto out_put; 1339 1340 perf_evlist__set_maps(&evlist->core, cpus, threads); 1341 out: 1342 return err; 1343 out_put: 1344 perf_cpu_map__put(cpus); 1345 goto out; 1346 } 1347 1348 int evlist__open(struct evlist *evlist) 1349 { 1350 struct evsel *evsel; 1351 int err; 1352 1353 /* 1354 * Default: one fd per CPU, all threads, aka systemwide 1355 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1356 */ 1357 if (evlist->core.threads == NULL && evlist->core.cpus == NULL) { 1358 err = perf_evlist__create_syswide_maps(evlist); 1359 if (err < 0) 1360 goto out_err; 1361 } 1362 1363 perf_evlist__update_id_pos(evlist); 1364 1365 evlist__for_each_entry(evlist, evsel) { 1366 err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads); 1367 if (err < 0) 1368 goto out_err; 1369 } 1370 1371 return 0; 1372 out_err: 1373 evlist__close(evlist); 1374 errno = -err; 1375 return err; 1376 } 1377 1378 int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, 1379 const char *argv[], bool pipe_output, 1380 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1381 { 1382 int child_ready_pipe[2], go_pipe[2]; 1383 char bf; 1384 1385 if (pipe(child_ready_pipe) < 0) { 1386 perror("failed to create 'ready' pipe"); 1387 return -1; 1388 } 1389 1390 if (pipe(go_pipe) < 0) { 1391 perror("failed to create 'go' pipe"); 1392 goto out_close_ready_pipe; 1393 } 1394 1395 evlist->workload.pid = fork(); 1396 if (evlist->workload.pid < 0) { 1397 perror("failed to fork"); 1398 goto out_close_pipes; 1399 } 1400 1401 if (!evlist->workload.pid) { 1402 int ret; 1403 1404 if (pipe_output) 1405 dup2(2, 1); 1406 1407 signal(SIGTERM, SIG_DFL); 1408 1409 close(child_ready_pipe[0]); 1410 close(go_pipe[1]); 1411 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1412 1413 /* 1414 * Tell the parent we're ready to go 1415 */ 1416 close(child_ready_pipe[1]); 1417 1418 /* 1419 * Wait until the parent tells us to go. 1420 */ 1421 ret = read(go_pipe[0], &bf, 1); 1422 /* 1423 * The parent will ask for the execvp() to be performed by 1424 * writing exactly one byte, in workload.cork_fd, usually via 1425 * perf_evlist__start_workload(). 1426 * 1427 * For cancelling the workload without actually running it, 1428 * the parent will just close workload.cork_fd, without writing 1429 * anything, i.e. read will return zero and we just exit() 1430 * here. 1431 */ 1432 if (ret != 1) { 1433 if (ret == -1) 1434 perror("unable to read pipe"); 1435 exit(ret); 1436 } 1437 1438 execvp(argv[0], (char **)argv); 1439 1440 if (exec_error) { 1441 union sigval val; 1442 1443 val.sival_int = errno; 1444 if (sigqueue(getppid(), SIGUSR1, val)) 1445 perror(argv[0]); 1446 } else 1447 perror(argv[0]); 1448 exit(-1); 1449 } 1450 1451 if (exec_error) { 1452 struct sigaction act = { 1453 .sa_flags = SA_SIGINFO, 1454 .sa_sigaction = exec_error, 1455 }; 1456 sigaction(SIGUSR1, &act, NULL); 1457 } 1458 1459 if (target__none(target)) { 1460 if (evlist->core.threads == NULL) { 1461 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1462 __func__, __LINE__); 1463 goto out_close_pipes; 1464 } 1465 perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid); 1466 } 1467 1468 close(child_ready_pipe[1]); 1469 close(go_pipe[0]); 1470 /* 1471 * wait for child to settle 1472 */ 1473 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1474 perror("unable to read pipe"); 1475 goto out_close_pipes; 1476 } 1477 1478 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1479 evlist->workload.cork_fd = go_pipe[1]; 1480 close(child_ready_pipe[0]); 1481 return 0; 1482 1483 out_close_pipes: 1484 close(go_pipe[0]); 1485 close(go_pipe[1]); 1486 out_close_ready_pipe: 1487 close(child_ready_pipe[0]); 1488 close(child_ready_pipe[1]); 1489 return -1; 1490 } 1491 1492 int perf_evlist__start_workload(struct evlist *evlist) 1493 { 1494 if (evlist->workload.cork_fd > 0) { 1495 char bf = 0; 1496 int ret; 1497 /* 1498 * Remove the cork, let it rip! 1499 */ 1500 ret = write(evlist->workload.cork_fd, &bf, 1); 1501 if (ret < 0) 1502 perror("unable to write to pipe"); 1503 1504 close(evlist->workload.cork_fd); 1505 return ret; 1506 } 1507 1508 return 0; 1509 } 1510 1511 int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, 1512 struct perf_sample *sample) 1513 { 1514 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1515 1516 if (!evsel) 1517 return -EFAULT; 1518 return perf_evsel__parse_sample(evsel, event, sample); 1519 } 1520 1521 int perf_evlist__parse_sample_timestamp(struct evlist *evlist, 1522 union perf_event *event, 1523 u64 *timestamp) 1524 { 1525 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1526 1527 if (!evsel) 1528 return -EFAULT; 1529 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1530 } 1531 1532 size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp) 1533 { 1534 struct evsel *evsel; 1535 size_t printed = 0; 1536 1537 evlist__for_each_entry(evlist, evsel) { 1538 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1539 perf_evsel__name(evsel)); 1540 } 1541 1542 return printed + fprintf(fp, "\n"); 1543 } 1544 1545 int perf_evlist__strerror_open(struct evlist *evlist, 1546 int err, char *buf, size_t size) 1547 { 1548 int printed, value; 1549 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1550 1551 switch (err) { 1552 case EACCES: 1553 case EPERM: 1554 printed = scnprintf(buf, size, 1555 "Error:\t%s.\n" 1556 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1557 1558 value = perf_event_paranoid(); 1559 1560 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1561 1562 if (value >= 2) { 1563 printed += scnprintf(buf + printed, size - printed, 1564 "For your workloads it needs to be <= 1\nHint:\t"); 1565 } 1566 printed += scnprintf(buf + printed, size - printed, 1567 "For system wide tracing it needs to be set to -1.\n"); 1568 1569 printed += scnprintf(buf + printed, size - printed, 1570 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1571 "Hint:\tThe current value is %d.", value); 1572 break; 1573 case EINVAL: { 1574 struct evsel *first = perf_evlist__first(evlist); 1575 int max_freq; 1576 1577 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1578 goto out_default; 1579 1580 if (first->core.attr.sample_freq < (u64)max_freq) 1581 goto out_default; 1582 1583 printed = scnprintf(buf, size, 1584 "Error:\t%s.\n" 1585 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1586 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1587 emsg, max_freq, first->core.attr.sample_freq); 1588 break; 1589 } 1590 default: 1591 out_default: 1592 scnprintf(buf, size, "%s", emsg); 1593 break; 1594 } 1595 1596 return 0; 1597 } 1598 1599 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) 1600 { 1601 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1602 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1603 1604 switch (err) { 1605 case EPERM: 1606 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1607 printed += scnprintf(buf + printed, size - printed, 1608 "Error:\t%s.\n" 1609 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1610 "Hint:\tTried using %zd kB.\n", 1611 emsg, pages_max_per_user, pages_attempted); 1612 1613 if (pages_attempted >= pages_max_per_user) { 1614 printed += scnprintf(buf + printed, size - printed, 1615 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1616 pages_max_per_user + pages_attempted); 1617 } 1618 1619 printed += scnprintf(buf + printed, size - printed, 1620 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1621 break; 1622 default: 1623 scnprintf(buf, size, "%s", emsg); 1624 break; 1625 } 1626 1627 return 0; 1628 } 1629 1630 void perf_evlist__to_front(struct evlist *evlist, 1631 struct evsel *move_evsel) 1632 { 1633 struct evsel *evsel, *n; 1634 LIST_HEAD(move); 1635 1636 if (move_evsel == perf_evlist__first(evlist)) 1637 return; 1638 1639 evlist__for_each_entry_safe(evlist, n, evsel) { 1640 if (evsel->leader == move_evsel->leader) 1641 list_move_tail(&evsel->core.node, &move); 1642 } 1643 1644 list_splice(&move, &evlist->core.entries); 1645 } 1646 1647 void perf_evlist__set_tracking_event(struct evlist *evlist, 1648 struct evsel *tracking_evsel) 1649 { 1650 struct evsel *evsel; 1651 1652 if (tracking_evsel->tracking) 1653 return; 1654 1655 evlist__for_each_entry(evlist, evsel) { 1656 if (evsel != tracking_evsel) 1657 evsel->tracking = false; 1658 } 1659 1660 tracking_evsel->tracking = true; 1661 } 1662 1663 struct evsel * 1664 perf_evlist__find_evsel_by_str(struct evlist *evlist, 1665 const char *str) 1666 { 1667 struct evsel *evsel; 1668 1669 evlist__for_each_entry(evlist, evsel) { 1670 if (!evsel->name) 1671 continue; 1672 if (strcmp(str, evsel->name) == 0) 1673 return evsel; 1674 } 1675 1676 return NULL; 1677 } 1678 1679 void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, 1680 enum bkw_mmap_state state) 1681 { 1682 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1683 enum action { 1684 NONE, 1685 PAUSE, 1686 RESUME, 1687 } action = NONE; 1688 1689 if (!evlist->overwrite_mmap) 1690 return; 1691 1692 switch (old_state) { 1693 case BKW_MMAP_NOTREADY: { 1694 if (state != BKW_MMAP_RUNNING) 1695 goto state_err; 1696 break; 1697 } 1698 case BKW_MMAP_RUNNING: { 1699 if (state != BKW_MMAP_DATA_PENDING) 1700 goto state_err; 1701 action = PAUSE; 1702 break; 1703 } 1704 case BKW_MMAP_DATA_PENDING: { 1705 if (state != BKW_MMAP_EMPTY) 1706 goto state_err; 1707 break; 1708 } 1709 case BKW_MMAP_EMPTY: { 1710 if (state != BKW_MMAP_RUNNING) 1711 goto state_err; 1712 action = RESUME; 1713 break; 1714 } 1715 default: 1716 WARN_ONCE(1, "Shouldn't get there\n"); 1717 } 1718 1719 evlist->bkw_mmap_state = state; 1720 1721 switch (action) { 1722 case PAUSE: 1723 perf_evlist__pause(evlist); 1724 break; 1725 case RESUME: 1726 perf_evlist__resume(evlist); 1727 break; 1728 case NONE: 1729 default: 1730 break; 1731 } 1732 1733 state_err: 1734 return; 1735 } 1736 1737 bool perf_evlist__exclude_kernel(struct evlist *evlist) 1738 { 1739 struct evsel *evsel; 1740 1741 evlist__for_each_entry(evlist, evsel) { 1742 if (!evsel->core.attr.exclude_kernel) 1743 return false; 1744 } 1745 1746 return true; 1747 } 1748 1749 /* 1750 * Events in data file are not collect in groups, but we still want 1751 * the group display. Set the artificial group and set the leader's 1752 * forced_leader flag to notify the display code. 1753 */ 1754 void perf_evlist__force_leader(struct evlist *evlist) 1755 { 1756 if (!evlist->nr_groups) { 1757 struct evsel *leader = perf_evlist__first(evlist); 1758 1759 perf_evlist__set_leader(evlist); 1760 leader->forced_leader = true; 1761 } 1762 } 1763 1764 struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, 1765 struct evsel *evsel) 1766 { 1767 struct evsel *c2, *leader; 1768 bool is_open = true; 1769 1770 leader = evsel->leader; 1771 pr_debug("Weak group for %s/%d failed\n", 1772 leader->name, leader->core.nr_members); 1773 1774 /* 1775 * for_each_group_member doesn't work here because it doesn't 1776 * include the first entry. 1777 */ 1778 evlist__for_each_entry(evsel_list, c2) { 1779 if (c2 == evsel) 1780 is_open = false; 1781 if (c2->leader == leader) { 1782 if (is_open) 1783 evsel__close(c2); 1784 c2->leader = c2; 1785 c2->core.nr_members = 0; 1786 } 1787 } 1788 return leader; 1789 } 1790 1791 int perf_evlist__add_sb_event(struct evlist **evlist, 1792 struct perf_event_attr *attr, 1793 perf_evsel__sb_cb_t cb, 1794 void *data) 1795 { 1796 struct evsel *evsel; 1797 bool new_evlist = (*evlist) == NULL; 1798 1799 if (*evlist == NULL) 1800 *evlist = evlist__new(); 1801 if (*evlist == NULL) 1802 return -1; 1803 1804 if (!attr->sample_id_all) { 1805 pr_warning("enabling sample_id_all for all side band events\n"); 1806 attr->sample_id_all = 1; 1807 } 1808 1809 evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); 1810 if (!evsel) 1811 goto out_err; 1812 1813 evsel->side_band.cb = cb; 1814 evsel->side_band.data = data; 1815 evlist__add(*evlist, evsel); 1816 return 0; 1817 1818 out_err: 1819 if (new_evlist) { 1820 evlist__delete(*evlist); 1821 *evlist = NULL; 1822 } 1823 return -1; 1824 } 1825 1826 static void *perf_evlist__poll_thread(void *arg) 1827 { 1828 struct evlist *evlist = arg; 1829 bool draining = false; 1830 int i, done = 0; 1831 /* 1832 * In order to read symbols from other namespaces perf to needs to call 1833 * setns(2). This isn't permitted if the struct_fs has multiple users. 1834 * unshare(2) the fs so that we may continue to setns into namespaces 1835 * that we're observing when, for instance, reading the build-ids at 1836 * the end of a 'perf record' session. 1837 */ 1838 unshare(CLONE_FS); 1839 1840 while (!done) { 1841 bool got_data = false; 1842 1843 if (evlist->thread.done) 1844 draining = true; 1845 1846 if (!draining) 1847 perf_evlist__poll(evlist, 1000); 1848 1849 for (i = 0; i < evlist->nr_mmaps; i++) { 1850 struct perf_mmap *map = &evlist->mmap[i]; 1851 union perf_event *event; 1852 1853 if (perf_mmap__read_init(map)) 1854 continue; 1855 while ((event = perf_mmap__read_event(map)) != NULL) { 1856 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1857 1858 if (evsel && evsel->side_band.cb) 1859 evsel->side_band.cb(event, evsel->side_band.data); 1860 else 1861 pr_warning("cannot locate proper evsel for the side band event\n"); 1862 1863 perf_mmap__consume(map); 1864 got_data = true; 1865 } 1866 perf_mmap__read_done(map); 1867 } 1868 1869 if (draining && !got_data) 1870 break; 1871 } 1872 return NULL; 1873 } 1874 1875 int perf_evlist__start_sb_thread(struct evlist *evlist, 1876 struct target *target) 1877 { 1878 struct evsel *counter; 1879 1880 if (!evlist) 1881 return 0; 1882 1883 if (perf_evlist__create_maps(evlist, target)) 1884 goto out_delete_evlist; 1885 1886 evlist__for_each_entry(evlist, counter) { 1887 if (evsel__open(counter, evlist->core.cpus, 1888 evlist->core.threads) < 0) 1889 goto out_delete_evlist; 1890 } 1891 1892 if (perf_evlist__mmap(evlist, UINT_MAX)) 1893 goto out_delete_evlist; 1894 1895 evlist__for_each_entry(evlist, counter) { 1896 if (evsel__enable(counter)) 1897 goto out_delete_evlist; 1898 } 1899 1900 evlist->thread.done = 0; 1901 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1902 goto out_delete_evlist; 1903 1904 return 0; 1905 1906 out_delete_evlist: 1907 evlist__delete(evlist); 1908 evlist = NULL; 1909 return -1; 1910 } 1911 1912 void perf_evlist__stop_sb_thread(struct evlist *evlist) 1913 { 1914 if (!evlist) 1915 return; 1916 evlist->thread.done = 1; 1917 pthread_join(evlist->thread.th, NULL); 1918 evlist__delete(evlist); 1919 } 1920