1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 4 * 5 * Parts came from builtin-{top,stat,record}.c, see those files for further 6 * copyright notes. 7 */ 8 #include <api/fs/fs.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "units.h" 19 #include "asm/bug.h" 20 #include "bpf-event.h" 21 #include <signal.h> 22 #include <unistd.h> 23 24 #include "parse-events.h" 25 #include <subcmd/parse-options.h> 26 27 #include <fcntl.h> 28 #include <sys/ioctl.h> 29 #include <sys/mman.h> 30 31 #include <linux/bitops.h> 32 #include <linux/hash.h> 33 #include <linux/log2.h> 34 #include <linux/err.h> 35 #include <linux/zalloc.h> 36 #include <perf/evlist.h> 37 #include <perf/cpumap.h> 38 39 #ifdef LACKS_SIGQUEUE_PROTOTYPE 40 int sigqueue(pid_t pid, int sig, const union sigval value); 41 #endif 42 43 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 44 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 45 46 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, 47 struct perf_thread_map *threads) 48 { 49 int i; 50 51 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 52 INIT_HLIST_HEAD(&evlist->heads[i]); 53 perf_evlist__init(&evlist->core); 54 perf_evlist__set_maps(evlist, cpus, threads); 55 fdarray__init(&evlist->pollfd, 64); 56 evlist->workload.pid = -1; 57 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 58 } 59 60 struct evlist *evlist__new(void) 61 { 62 struct evlist *evlist = zalloc(sizeof(*evlist)); 63 64 if (evlist != NULL) 65 evlist__init(evlist, NULL, NULL); 66 67 return evlist; 68 } 69 70 struct evlist *perf_evlist__new_default(void) 71 { 72 struct evlist *evlist = evlist__new(); 73 74 if (evlist && perf_evlist__add_default(evlist)) { 75 evlist__delete(evlist); 76 evlist = NULL; 77 } 78 79 return evlist; 80 } 81 82 struct evlist *perf_evlist__new_dummy(void) 83 { 84 struct evlist *evlist = evlist__new(); 85 86 if (evlist && perf_evlist__add_dummy(evlist)) { 87 evlist__delete(evlist); 88 evlist = NULL; 89 } 90 91 return evlist; 92 } 93 94 /** 95 * perf_evlist__set_id_pos - set the positions of event ids. 96 * @evlist: selected event list 97 * 98 * Events with compatible sample types all have the same id_pos 99 * and is_pos. For convenience, put a copy on evlist. 100 */ 101 void perf_evlist__set_id_pos(struct evlist *evlist) 102 { 103 struct evsel *first = perf_evlist__first(evlist); 104 105 evlist->id_pos = first->id_pos; 106 evlist->is_pos = first->is_pos; 107 } 108 109 static void perf_evlist__update_id_pos(struct evlist *evlist) 110 { 111 struct evsel *evsel; 112 113 evlist__for_each_entry(evlist, evsel) 114 perf_evsel__calc_id_pos(evsel); 115 116 perf_evlist__set_id_pos(evlist); 117 } 118 119 static void perf_evlist__purge(struct evlist *evlist) 120 { 121 struct evsel *pos, *n; 122 123 evlist__for_each_entry_safe(evlist, n, pos) { 124 list_del_init(&pos->core.node); 125 pos->evlist = NULL; 126 evsel__delete(pos); 127 } 128 129 evlist->core.nr_entries = 0; 130 } 131 132 void perf_evlist__exit(struct evlist *evlist) 133 { 134 zfree(&evlist->mmap); 135 zfree(&evlist->overwrite_mmap); 136 fdarray__exit(&evlist->pollfd); 137 } 138 139 void evlist__delete(struct evlist *evlist) 140 { 141 if (evlist == NULL) 142 return; 143 144 perf_evlist__munmap(evlist); 145 evlist__close(evlist); 146 perf_cpu_map__put(evlist->cpus); 147 perf_thread_map__put(evlist->threads); 148 evlist->cpus = NULL; 149 evlist->threads = NULL; 150 perf_evlist__purge(evlist); 151 perf_evlist__exit(evlist); 152 free(evlist); 153 } 154 155 static void __perf_evlist__propagate_maps(struct evlist *evlist, 156 struct evsel *evsel) 157 { 158 /* 159 * We already have cpus for evsel (via PMU sysfs) so 160 * keep it, if there's no target cpu list defined. 161 */ 162 if (!evsel->core.own_cpus || evlist->has_user_cpus) { 163 perf_cpu_map__put(evsel->core.cpus); 164 evsel->core.cpus = perf_cpu_map__get(evlist->cpus); 165 } else if (evsel->core.cpus != evsel->core.own_cpus) { 166 perf_cpu_map__put(evsel->core.cpus); 167 evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); 168 } 169 170 perf_thread_map__put(evsel->threads); 171 evsel->threads = perf_thread_map__get(evlist->threads); 172 } 173 174 static void perf_evlist__propagate_maps(struct evlist *evlist) 175 { 176 struct evsel *evsel; 177 178 evlist__for_each_entry(evlist, evsel) 179 __perf_evlist__propagate_maps(evlist, evsel); 180 } 181 182 void evlist__add(struct evlist *evlist, struct evsel *entry) 183 { 184 entry->evlist = evlist; 185 entry->idx = evlist->core.nr_entries; 186 entry->tracking = !entry->idx; 187 188 perf_evlist__add(&evlist->core, &entry->core); 189 190 if (evlist->core.nr_entries == 1) 191 perf_evlist__set_id_pos(evlist); 192 193 __perf_evlist__propagate_maps(evlist, entry); 194 } 195 196 void evlist__remove(struct evlist *evlist, struct evsel *evsel) 197 { 198 evsel->evlist = NULL; 199 perf_evlist__remove(&evlist->core, &evsel->core); 200 } 201 202 void perf_evlist__splice_list_tail(struct evlist *evlist, 203 struct list_head *list) 204 { 205 struct evsel *evsel, *temp; 206 207 __evlist__for_each_entry_safe(list, temp, evsel) { 208 list_del_init(&evsel->core.node); 209 evlist__add(evlist, evsel); 210 } 211 } 212 213 void __perf_evlist__set_leader(struct list_head *list) 214 { 215 struct evsel *evsel, *leader; 216 217 leader = list_entry(list->next, struct evsel, core.node); 218 evsel = list_entry(list->prev, struct evsel, core.node); 219 220 leader->nr_members = evsel->idx - leader->idx + 1; 221 222 __evlist__for_each_entry(list, evsel) { 223 evsel->leader = leader; 224 } 225 } 226 227 void perf_evlist__set_leader(struct evlist *evlist) 228 { 229 if (evlist->core.nr_entries) { 230 evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; 231 __perf_evlist__set_leader(&evlist->core.entries); 232 } 233 } 234 235 int __perf_evlist__add_default(struct evlist *evlist, bool precise) 236 { 237 struct evsel *evsel = perf_evsel__new_cycles(precise); 238 239 if (evsel == NULL) 240 return -ENOMEM; 241 242 evlist__add(evlist, evsel); 243 return 0; 244 } 245 246 int perf_evlist__add_dummy(struct evlist *evlist) 247 { 248 struct perf_event_attr attr = { 249 .type = PERF_TYPE_SOFTWARE, 250 .config = PERF_COUNT_SW_DUMMY, 251 .size = sizeof(attr), /* to capture ABI version */ 252 }; 253 struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); 254 255 if (evsel == NULL) 256 return -ENOMEM; 257 258 evlist__add(evlist, evsel); 259 return 0; 260 } 261 262 static int evlist__add_attrs(struct evlist *evlist, 263 struct perf_event_attr *attrs, size_t nr_attrs) 264 { 265 struct evsel *evsel, *n; 266 LIST_HEAD(head); 267 size_t i; 268 269 for (i = 0; i < nr_attrs; i++) { 270 evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); 271 if (evsel == NULL) 272 goto out_delete_partial_list; 273 list_add_tail(&evsel->core.node, &head); 274 } 275 276 perf_evlist__splice_list_tail(evlist, &head); 277 278 return 0; 279 280 out_delete_partial_list: 281 __evlist__for_each_entry_safe(&head, n, evsel) 282 evsel__delete(evsel); 283 return -1; 284 } 285 286 int __perf_evlist__add_default_attrs(struct evlist *evlist, 287 struct perf_event_attr *attrs, size_t nr_attrs) 288 { 289 size_t i; 290 291 for (i = 0; i < nr_attrs; i++) 292 event_attr_init(attrs + i); 293 294 return evlist__add_attrs(evlist, attrs, nr_attrs); 295 } 296 297 struct evsel * 298 perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) 299 { 300 struct evsel *evsel; 301 302 evlist__for_each_entry(evlist, evsel) { 303 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && 304 (int)evsel->core.attr.config == id) 305 return evsel; 306 } 307 308 return NULL; 309 } 310 311 struct evsel * 312 perf_evlist__find_tracepoint_by_name(struct evlist *evlist, 313 const char *name) 314 { 315 struct evsel *evsel; 316 317 evlist__for_each_entry(evlist, evsel) { 318 if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) && 319 (strcmp(evsel->name, name) == 0)) 320 return evsel; 321 } 322 323 return NULL; 324 } 325 326 int perf_evlist__add_newtp(struct evlist *evlist, 327 const char *sys, const char *name, void *handler) 328 { 329 struct evsel *evsel = perf_evsel__newtp(sys, name); 330 331 if (IS_ERR(evsel)) 332 return -1; 333 334 evsel->handler = handler; 335 evlist__add(evlist, evsel); 336 return 0; 337 } 338 339 static int perf_evlist__nr_threads(struct evlist *evlist, 340 struct evsel *evsel) 341 { 342 if (evsel->system_wide) 343 return 1; 344 else 345 return thread_map__nr(evlist->threads); 346 } 347 348 void evlist__disable(struct evlist *evlist) 349 { 350 struct evsel *pos; 351 352 evlist__for_each_entry(evlist, pos) { 353 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) 354 continue; 355 evsel__disable(pos); 356 } 357 358 evlist->enabled = false; 359 } 360 361 void evlist__enable(struct evlist *evlist) 362 { 363 struct evsel *pos; 364 365 evlist__for_each_entry(evlist, pos) { 366 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 367 continue; 368 evsel__enable(pos); 369 } 370 371 evlist->enabled = true; 372 } 373 374 void perf_evlist__toggle_enable(struct evlist *evlist) 375 { 376 (evlist->enabled ? evlist__disable : evlist__enable)(evlist); 377 } 378 379 static int perf_evlist__enable_event_cpu(struct evlist *evlist, 380 struct evsel *evsel, int cpu) 381 { 382 int thread; 383 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 384 385 if (!evsel->fd) 386 return -EINVAL; 387 388 for (thread = 0; thread < nr_threads; thread++) { 389 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 390 if (err) 391 return err; 392 } 393 return 0; 394 } 395 396 static int perf_evlist__enable_event_thread(struct evlist *evlist, 397 struct evsel *evsel, 398 int thread) 399 { 400 int cpu; 401 int nr_cpus = cpu_map__nr(evlist->cpus); 402 403 if (!evsel->fd) 404 return -EINVAL; 405 406 for (cpu = 0; cpu < nr_cpus; cpu++) { 407 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 408 if (err) 409 return err; 410 } 411 return 0; 412 } 413 414 int perf_evlist__enable_event_idx(struct evlist *evlist, 415 struct evsel *evsel, int idx) 416 { 417 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 418 419 if (per_cpu_mmaps) 420 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 421 else 422 return perf_evlist__enable_event_thread(evlist, evsel, idx); 423 } 424 425 int perf_evlist__alloc_pollfd(struct evlist *evlist) 426 { 427 int nr_cpus = cpu_map__nr(evlist->cpus); 428 int nr_threads = thread_map__nr(evlist->threads); 429 int nfds = 0; 430 struct evsel *evsel; 431 432 evlist__for_each_entry(evlist, evsel) { 433 if (evsel->system_wide) 434 nfds += nr_cpus; 435 else 436 nfds += nr_cpus * nr_threads; 437 } 438 439 if (fdarray__available_entries(&evlist->pollfd) < nfds && 440 fdarray__grow(&evlist->pollfd, nfds) < 0) 441 return -ENOMEM; 442 443 return 0; 444 } 445 446 static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, 447 struct perf_mmap *map, short revent) 448 { 449 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 450 /* 451 * Save the idx so that when we filter out fds POLLHUP'ed we can 452 * close the associated evlist->mmap[] entry. 453 */ 454 if (pos >= 0) { 455 evlist->pollfd.priv[pos].ptr = map; 456 457 fcntl(fd, F_SETFL, O_NONBLOCK); 458 } 459 460 return pos; 461 } 462 463 int perf_evlist__add_pollfd(struct evlist *evlist, int fd) 464 { 465 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 466 } 467 468 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 469 void *arg __maybe_unused) 470 { 471 struct perf_mmap *map = fda->priv[fd].ptr; 472 473 if (map) 474 perf_mmap__put(map); 475 } 476 477 int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) 478 { 479 return fdarray__filter(&evlist->pollfd, revents_and_mask, 480 perf_evlist__munmap_filtered, NULL); 481 } 482 483 int perf_evlist__poll(struct evlist *evlist, int timeout) 484 { 485 return fdarray__poll(&evlist->pollfd, timeout); 486 } 487 488 static void perf_evlist__id_hash(struct evlist *evlist, 489 struct evsel *evsel, 490 int cpu, int thread, u64 id) 491 { 492 int hash; 493 struct perf_sample_id *sid = SID(evsel, cpu, thread); 494 495 sid->id = id; 496 sid->evsel = evsel; 497 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 498 hlist_add_head(&sid->node, &evlist->heads[hash]); 499 } 500 501 void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, 502 int cpu, int thread, u64 id) 503 { 504 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 505 evsel->id[evsel->ids++] = id; 506 } 507 508 int perf_evlist__id_add_fd(struct evlist *evlist, 509 struct evsel *evsel, 510 int cpu, int thread, int fd) 511 { 512 u64 read_data[4] = { 0, }; 513 int id_idx = 1; /* The first entry is the counter value */ 514 u64 id; 515 int ret; 516 517 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 518 if (!ret) 519 goto add; 520 521 if (errno != ENOTTY) 522 return -1; 523 524 /* Legacy way to get event id.. All hail to old kernels! */ 525 526 /* 527 * This way does not work with group format read, so bail 528 * out in that case. 529 */ 530 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 531 return -1; 532 533 if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || 534 read(fd, &read_data, sizeof(read_data)) == -1) 535 return -1; 536 537 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 538 ++id_idx; 539 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 540 ++id_idx; 541 542 id = read_data[id_idx]; 543 544 add: 545 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 546 return 0; 547 } 548 549 static void perf_evlist__set_sid_idx(struct evlist *evlist, 550 struct evsel *evsel, int idx, int cpu, 551 int thread) 552 { 553 struct perf_sample_id *sid = SID(evsel, cpu, thread); 554 sid->idx = idx; 555 if (evlist->cpus && cpu >= 0) 556 sid->cpu = evlist->cpus->map[cpu]; 557 else 558 sid->cpu = -1; 559 if (!evsel->system_wide && evlist->threads && thread >= 0) 560 sid->tid = thread_map__pid(evlist->threads, thread); 561 else 562 sid->tid = -1; 563 } 564 565 struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) 566 { 567 struct hlist_head *head; 568 struct perf_sample_id *sid; 569 int hash; 570 571 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 572 head = &evlist->heads[hash]; 573 574 hlist_for_each_entry(sid, head, node) 575 if (sid->id == id) 576 return sid; 577 578 return NULL; 579 } 580 581 struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) 582 { 583 struct perf_sample_id *sid; 584 585 if (evlist->core.nr_entries == 1 || !id) 586 return perf_evlist__first(evlist); 587 588 sid = perf_evlist__id2sid(evlist, id); 589 if (sid) 590 return sid->evsel; 591 592 if (!perf_evlist__sample_id_all(evlist)) 593 return perf_evlist__first(evlist); 594 595 return NULL; 596 } 597 598 struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, 599 u64 id) 600 { 601 struct perf_sample_id *sid; 602 603 if (!id) 604 return NULL; 605 606 sid = perf_evlist__id2sid(evlist, id); 607 if (sid) 608 return sid->evsel; 609 610 return NULL; 611 } 612 613 static int perf_evlist__event2id(struct evlist *evlist, 614 union perf_event *event, u64 *id) 615 { 616 const u64 *array = event->sample.array; 617 ssize_t n; 618 619 n = (event->header.size - sizeof(event->header)) >> 3; 620 621 if (event->header.type == PERF_RECORD_SAMPLE) { 622 if (evlist->id_pos >= n) 623 return -1; 624 *id = array[evlist->id_pos]; 625 } else { 626 if (evlist->is_pos > n) 627 return -1; 628 n -= evlist->is_pos; 629 *id = array[n]; 630 } 631 return 0; 632 } 633 634 struct evsel *perf_evlist__event2evsel(struct evlist *evlist, 635 union perf_event *event) 636 { 637 struct evsel *first = perf_evlist__first(evlist); 638 struct hlist_head *head; 639 struct perf_sample_id *sid; 640 int hash; 641 u64 id; 642 643 if (evlist->core.nr_entries == 1) 644 return first; 645 646 if (!first->core.attr.sample_id_all && 647 event->header.type != PERF_RECORD_SAMPLE) 648 return first; 649 650 if (perf_evlist__event2id(evlist, event, &id)) 651 return NULL; 652 653 /* Synthesized events have an id of zero */ 654 if (!id) 655 return first; 656 657 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 658 head = &evlist->heads[hash]; 659 660 hlist_for_each_entry(sid, head, node) { 661 if (sid->id == id) 662 return sid->evsel; 663 } 664 return NULL; 665 } 666 667 static int perf_evlist__set_paused(struct evlist *evlist, bool value) 668 { 669 int i; 670 671 if (!evlist->overwrite_mmap) 672 return 0; 673 674 for (i = 0; i < evlist->nr_mmaps; i++) { 675 int fd = evlist->overwrite_mmap[i].fd; 676 int err; 677 678 if (fd < 0) 679 continue; 680 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 681 if (err) 682 return err; 683 } 684 return 0; 685 } 686 687 static int perf_evlist__pause(struct evlist *evlist) 688 { 689 return perf_evlist__set_paused(evlist, true); 690 } 691 692 static int perf_evlist__resume(struct evlist *evlist) 693 { 694 return perf_evlist__set_paused(evlist, false); 695 } 696 697 static void perf_evlist__munmap_nofree(struct evlist *evlist) 698 { 699 int i; 700 701 if (evlist->mmap) 702 for (i = 0; i < evlist->nr_mmaps; i++) 703 perf_mmap__munmap(&evlist->mmap[i]); 704 705 if (evlist->overwrite_mmap) 706 for (i = 0; i < evlist->nr_mmaps; i++) 707 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 708 } 709 710 void perf_evlist__munmap(struct evlist *evlist) 711 { 712 perf_evlist__munmap_nofree(evlist); 713 zfree(&evlist->mmap); 714 zfree(&evlist->overwrite_mmap); 715 } 716 717 static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, 718 bool overwrite) 719 { 720 int i; 721 struct perf_mmap *map; 722 723 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 724 if (cpu_map__empty(evlist->cpus)) 725 evlist->nr_mmaps = thread_map__nr(evlist->threads); 726 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 727 if (!map) 728 return NULL; 729 730 for (i = 0; i < evlist->nr_mmaps; i++) { 731 map[i].fd = -1; 732 map[i].overwrite = overwrite; 733 /* 734 * When the perf_mmap() call is made we grab one refcount, plus 735 * one extra to let perf_mmap__consume() get the last 736 * events after all real references (perf_mmap__get()) are 737 * dropped. 738 * 739 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 740 * thus does perf_mmap__get() on it. 741 */ 742 refcount_set(&map[i].refcnt, 0); 743 } 744 return map; 745 } 746 747 static bool 748 perf_evlist__should_poll(struct evlist *evlist __maybe_unused, 749 struct evsel *evsel) 750 { 751 if (evsel->core.attr.write_backward) 752 return false; 753 return true; 754 } 755 756 static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, 757 struct mmap_params *mp, int cpu_idx, 758 int thread, int *_output, int *_output_overwrite) 759 { 760 struct evsel *evsel; 761 int revent; 762 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 763 764 evlist__for_each_entry(evlist, evsel) { 765 struct perf_mmap *maps = evlist->mmap; 766 int *output = _output; 767 int fd; 768 int cpu; 769 770 mp->prot = PROT_READ | PROT_WRITE; 771 if (evsel->core.attr.write_backward) { 772 output = _output_overwrite; 773 maps = evlist->overwrite_mmap; 774 775 if (!maps) { 776 maps = perf_evlist__alloc_mmap(evlist, true); 777 if (!maps) 778 return -1; 779 evlist->overwrite_mmap = maps; 780 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 781 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 782 } 783 mp->prot &= ~PROT_WRITE; 784 } 785 786 if (evsel->system_wide && thread) 787 continue; 788 789 cpu = cpu_map__idx(evsel->core.cpus, evlist_cpu); 790 if (cpu == -1) 791 continue; 792 793 fd = FD(evsel, cpu, thread); 794 795 if (*output == -1) { 796 *output = fd; 797 798 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 799 return -1; 800 } else { 801 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 802 return -1; 803 804 perf_mmap__get(&maps[idx]); 805 } 806 807 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 808 809 /* 810 * The system_wide flag causes a selected event to be opened 811 * always without a pid. Consequently it will never get a 812 * POLLHUP, but it is used for tracking in combination with 813 * other events, so it should not need to be polled anyway. 814 * Therefore don't add it for polling. 815 */ 816 if (!evsel->system_wide && 817 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 818 perf_mmap__put(&maps[idx]); 819 return -1; 820 } 821 822 if (evsel->core.attr.read_format & PERF_FORMAT_ID) { 823 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 824 fd) < 0) 825 return -1; 826 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 827 thread); 828 } 829 } 830 831 return 0; 832 } 833 834 static int perf_evlist__mmap_per_cpu(struct evlist *evlist, 835 struct mmap_params *mp) 836 { 837 int cpu, thread; 838 int nr_cpus = cpu_map__nr(evlist->cpus); 839 int nr_threads = thread_map__nr(evlist->threads); 840 841 pr_debug2("perf event ring buffer mmapped per cpu\n"); 842 for (cpu = 0; cpu < nr_cpus; cpu++) { 843 int output = -1; 844 int output_overwrite = -1; 845 846 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 847 true); 848 849 for (thread = 0; thread < nr_threads; thread++) { 850 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 851 thread, &output, &output_overwrite)) 852 goto out_unmap; 853 } 854 } 855 856 return 0; 857 858 out_unmap: 859 perf_evlist__munmap_nofree(evlist); 860 return -1; 861 } 862 863 static int perf_evlist__mmap_per_thread(struct evlist *evlist, 864 struct mmap_params *mp) 865 { 866 int thread; 867 int nr_threads = thread_map__nr(evlist->threads); 868 869 pr_debug2("perf event ring buffer mmapped per thread\n"); 870 for (thread = 0; thread < nr_threads; thread++) { 871 int output = -1; 872 int output_overwrite = -1; 873 874 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 875 false); 876 877 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 878 &output, &output_overwrite)) 879 goto out_unmap; 880 } 881 882 return 0; 883 884 out_unmap: 885 perf_evlist__munmap_nofree(evlist); 886 return -1; 887 } 888 889 unsigned long perf_event_mlock_kb_in_pages(void) 890 { 891 unsigned long pages; 892 int max; 893 894 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 895 /* 896 * Pick a once upon a time good value, i.e. things look 897 * strange since we can't read a sysctl value, but lets not 898 * die yet... 899 */ 900 max = 512; 901 } else { 902 max -= (page_size / 1024); 903 } 904 905 pages = (max * 1024) / page_size; 906 if (!is_power_of_2(pages)) 907 pages = rounddown_pow_of_two(pages); 908 909 return pages; 910 } 911 912 size_t perf_evlist__mmap_size(unsigned long pages) 913 { 914 if (pages == UINT_MAX) 915 pages = perf_event_mlock_kb_in_pages(); 916 else if (!is_power_of_2(pages)) 917 return 0; 918 919 return (pages + 1) * page_size; 920 } 921 922 static long parse_pages_arg(const char *str, unsigned long min, 923 unsigned long max) 924 { 925 unsigned long pages, val; 926 static struct parse_tag tags[] = { 927 { .tag = 'B', .mult = 1 }, 928 { .tag = 'K', .mult = 1 << 10 }, 929 { .tag = 'M', .mult = 1 << 20 }, 930 { .tag = 'G', .mult = 1 << 30 }, 931 { .tag = 0 }, 932 }; 933 934 if (str == NULL) 935 return -EINVAL; 936 937 val = parse_tag_value(str, tags); 938 if (val != (unsigned long) -1) { 939 /* we got file size value */ 940 pages = PERF_ALIGN(val, page_size) / page_size; 941 } else { 942 /* we got pages count value */ 943 char *eptr; 944 pages = strtoul(str, &eptr, 10); 945 if (*eptr != '\0') 946 return -EINVAL; 947 } 948 949 if (pages == 0 && min == 0) { 950 /* leave number of pages at 0 */ 951 } else if (!is_power_of_2(pages)) { 952 char buf[100]; 953 954 /* round pages up to next power of 2 */ 955 pages = roundup_pow_of_two(pages); 956 if (!pages) 957 return -EINVAL; 958 959 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 960 pr_info("rounding mmap pages size to %s (%lu pages)\n", 961 buf, pages); 962 } 963 964 if (pages > max) 965 return -EINVAL; 966 967 return pages; 968 } 969 970 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 971 { 972 unsigned long max = UINT_MAX; 973 long pages; 974 975 if (max > SIZE_MAX / page_size) 976 max = SIZE_MAX / page_size; 977 978 pages = parse_pages_arg(str, 1, max); 979 if (pages < 0) { 980 pr_err("Invalid argument for --mmap_pages/-m\n"); 981 return -1; 982 } 983 984 *mmap_pages = pages; 985 return 0; 986 } 987 988 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 989 int unset __maybe_unused) 990 { 991 return __perf_evlist__parse_mmap_pages(opt->value, str); 992 } 993 994 /** 995 * perf_evlist__mmap_ex - Create mmaps to receive events. 996 * @evlist: list of events 997 * @pages: map length in pages 998 * @overwrite: overwrite older events? 999 * @auxtrace_pages - auxtrace map length in pages 1000 * @auxtrace_overwrite - overwrite older auxtrace data? 1001 * 1002 * If @overwrite is %false the user needs to signal event consumption using 1003 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1004 * automatically. 1005 * 1006 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1007 * consumption using auxtrace_mmap__write_tail(). 1008 * 1009 * Return: %0 on success, negative error code otherwise. 1010 */ 1011 int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, 1012 unsigned int auxtrace_pages, 1013 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 1014 int comp_level) 1015 { 1016 struct evsel *evsel; 1017 const struct perf_cpu_map *cpus = evlist->cpus; 1018 const struct perf_thread_map *threads = evlist->threads; 1019 /* 1020 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1021 * Its value is decided by evsel's write_backward. 1022 * So &mp should not be passed through const pointer. 1023 */ 1024 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 1025 .comp_level = comp_level }; 1026 1027 if (!evlist->mmap) 1028 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1029 if (!evlist->mmap) 1030 return -ENOMEM; 1031 1032 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1033 return -ENOMEM; 1034 1035 evlist->mmap_len = perf_evlist__mmap_size(pages); 1036 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1037 mp.mask = evlist->mmap_len - page_size - 1; 1038 1039 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1040 auxtrace_pages, auxtrace_overwrite); 1041 1042 evlist__for_each_entry(evlist, evsel) { 1043 if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && 1044 evsel->sample_id == NULL && 1045 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1046 return -ENOMEM; 1047 } 1048 1049 if (cpu_map__empty(cpus)) 1050 return perf_evlist__mmap_per_thread(evlist, &mp); 1051 1052 return perf_evlist__mmap_per_cpu(evlist, &mp); 1053 } 1054 1055 int perf_evlist__mmap(struct evlist *evlist, unsigned int pages) 1056 { 1057 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); 1058 } 1059 1060 int perf_evlist__create_maps(struct evlist *evlist, struct target *target) 1061 { 1062 bool all_threads = (target->per_thread && target->system_wide); 1063 struct perf_cpu_map *cpus; 1064 struct perf_thread_map *threads; 1065 1066 /* 1067 * If specify '-a' and '--per-thread' to perf record, perf record 1068 * will override '--per-thread'. target->per_thread = false and 1069 * target->system_wide = true. 1070 * 1071 * If specify '--per-thread' only to perf record, 1072 * target->per_thread = true and target->system_wide = false. 1073 * 1074 * So target->per_thread && target->system_wide is false. 1075 * For perf record, thread_map__new_str doesn't call 1076 * thread_map__new_all_cpus. That will keep perf record's 1077 * current behavior. 1078 * 1079 * For perf stat, it allows the case that target->per_thread and 1080 * target->system_wide are all true. It means to collect system-wide 1081 * per-thread data. thread_map__new_str will call 1082 * thread_map__new_all_cpus to enumerate all threads. 1083 */ 1084 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1085 all_threads); 1086 1087 if (!threads) 1088 return -1; 1089 1090 if (target__uses_dummy_map(target)) 1091 cpus = perf_cpu_map__dummy_new(); 1092 else 1093 cpus = perf_cpu_map__new(target->cpu_list); 1094 1095 if (!cpus) 1096 goto out_delete_threads; 1097 1098 evlist->has_user_cpus = !!target->cpu_list; 1099 1100 perf_evlist__set_maps(evlist, cpus, threads); 1101 1102 return 0; 1103 1104 out_delete_threads: 1105 perf_thread_map__put(threads); 1106 return -1; 1107 } 1108 1109 void perf_evlist__set_maps(struct evlist *evlist, struct perf_cpu_map *cpus, 1110 struct perf_thread_map *threads) 1111 { 1112 /* 1113 * Allow for the possibility that one or another of the maps isn't being 1114 * changed i.e. don't put it. Note we are assuming the maps that are 1115 * being applied are brand new and evlist is taking ownership of the 1116 * original reference count of 1. If that is not the case it is up to 1117 * the caller to increase the reference count. 1118 */ 1119 if (cpus != evlist->cpus) { 1120 perf_cpu_map__put(evlist->cpus); 1121 evlist->cpus = perf_cpu_map__get(cpus); 1122 } 1123 1124 if (threads != evlist->threads) { 1125 perf_thread_map__put(evlist->threads); 1126 evlist->threads = perf_thread_map__get(threads); 1127 } 1128 1129 perf_evlist__propagate_maps(evlist); 1130 } 1131 1132 void __perf_evlist__set_sample_bit(struct evlist *evlist, 1133 enum perf_event_sample_format bit) 1134 { 1135 struct evsel *evsel; 1136 1137 evlist__for_each_entry(evlist, evsel) 1138 __perf_evsel__set_sample_bit(evsel, bit); 1139 } 1140 1141 void __perf_evlist__reset_sample_bit(struct evlist *evlist, 1142 enum perf_event_sample_format bit) 1143 { 1144 struct evsel *evsel; 1145 1146 evlist__for_each_entry(evlist, evsel) 1147 __perf_evsel__reset_sample_bit(evsel, bit); 1148 } 1149 1150 int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) 1151 { 1152 struct evsel *evsel; 1153 int err = 0; 1154 1155 evlist__for_each_entry(evlist, evsel) { 1156 if (evsel->filter == NULL) 1157 continue; 1158 1159 /* 1160 * filters only work for tracepoint event, which doesn't have cpu limit. 1161 * So evlist and evsel should always be same. 1162 */ 1163 err = evsel__apply_filter(evsel, evsel->filter); 1164 if (err) { 1165 *err_evsel = evsel; 1166 break; 1167 } 1168 } 1169 1170 return err; 1171 } 1172 1173 int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) 1174 { 1175 struct evsel *evsel; 1176 int err = 0; 1177 1178 evlist__for_each_entry(evlist, evsel) { 1179 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) 1180 continue; 1181 1182 err = perf_evsel__set_filter(evsel, filter); 1183 if (err) 1184 break; 1185 } 1186 1187 return err; 1188 } 1189 1190 int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1191 { 1192 char *filter; 1193 int ret = -1; 1194 size_t i; 1195 1196 for (i = 0; i < npids; ++i) { 1197 if (i == 0) { 1198 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1199 return -1; 1200 } else { 1201 char *tmp; 1202 1203 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1204 goto out_free; 1205 1206 free(filter); 1207 filter = tmp; 1208 } 1209 } 1210 1211 ret = perf_evlist__set_tp_filter(evlist, filter); 1212 out_free: 1213 free(filter); 1214 return ret; 1215 } 1216 1217 int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) 1218 { 1219 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1220 } 1221 1222 bool perf_evlist__valid_sample_type(struct evlist *evlist) 1223 { 1224 struct evsel *pos; 1225 1226 if (evlist->core.nr_entries == 1) 1227 return true; 1228 1229 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1230 return false; 1231 1232 evlist__for_each_entry(evlist, pos) { 1233 if (pos->id_pos != evlist->id_pos || 1234 pos->is_pos != evlist->is_pos) 1235 return false; 1236 } 1237 1238 return true; 1239 } 1240 1241 u64 __perf_evlist__combined_sample_type(struct evlist *evlist) 1242 { 1243 struct evsel *evsel; 1244 1245 if (evlist->combined_sample_type) 1246 return evlist->combined_sample_type; 1247 1248 evlist__for_each_entry(evlist, evsel) 1249 evlist->combined_sample_type |= evsel->core.attr.sample_type; 1250 1251 return evlist->combined_sample_type; 1252 } 1253 1254 u64 perf_evlist__combined_sample_type(struct evlist *evlist) 1255 { 1256 evlist->combined_sample_type = 0; 1257 return __perf_evlist__combined_sample_type(evlist); 1258 } 1259 1260 u64 perf_evlist__combined_branch_type(struct evlist *evlist) 1261 { 1262 struct evsel *evsel; 1263 u64 branch_type = 0; 1264 1265 evlist__for_each_entry(evlist, evsel) 1266 branch_type |= evsel->core.attr.branch_sample_type; 1267 return branch_type; 1268 } 1269 1270 bool perf_evlist__valid_read_format(struct evlist *evlist) 1271 { 1272 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1273 u64 read_format = first->core.attr.read_format; 1274 u64 sample_type = first->core.attr.sample_type; 1275 1276 evlist__for_each_entry(evlist, pos) { 1277 if (read_format != pos->core.attr.read_format) 1278 return false; 1279 } 1280 1281 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1282 if ((sample_type & PERF_SAMPLE_READ) && 1283 !(read_format & PERF_FORMAT_ID)) { 1284 return false; 1285 } 1286 1287 return true; 1288 } 1289 1290 u64 perf_evlist__read_format(struct evlist *evlist) 1291 { 1292 struct evsel *first = perf_evlist__first(evlist); 1293 return first->core.attr.read_format; 1294 } 1295 1296 u16 perf_evlist__id_hdr_size(struct evlist *evlist) 1297 { 1298 struct evsel *first = perf_evlist__first(evlist); 1299 struct perf_sample *data; 1300 u64 sample_type; 1301 u16 size = 0; 1302 1303 if (!first->core.attr.sample_id_all) 1304 goto out; 1305 1306 sample_type = first->core.attr.sample_type; 1307 1308 if (sample_type & PERF_SAMPLE_TID) 1309 size += sizeof(data->tid) * 2; 1310 1311 if (sample_type & PERF_SAMPLE_TIME) 1312 size += sizeof(data->time); 1313 1314 if (sample_type & PERF_SAMPLE_ID) 1315 size += sizeof(data->id); 1316 1317 if (sample_type & PERF_SAMPLE_STREAM_ID) 1318 size += sizeof(data->stream_id); 1319 1320 if (sample_type & PERF_SAMPLE_CPU) 1321 size += sizeof(data->cpu) * 2; 1322 1323 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1324 size += sizeof(data->id); 1325 out: 1326 return size; 1327 } 1328 1329 bool perf_evlist__valid_sample_id_all(struct evlist *evlist) 1330 { 1331 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1332 1333 evlist__for_each_entry_continue(evlist, pos) { 1334 if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all) 1335 return false; 1336 } 1337 1338 return true; 1339 } 1340 1341 bool perf_evlist__sample_id_all(struct evlist *evlist) 1342 { 1343 struct evsel *first = perf_evlist__first(evlist); 1344 return first->core.attr.sample_id_all; 1345 } 1346 1347 void perf_evlist__set_selected(struct evlist *evlist, 1348 struct evsel *evsel) 1349 { 1350 evlist->selected = evsel; 1351 } 1352 1353 void evlist__close(struct evlist *evlist) 1354 { 1355 struct evsel *evsel; 1356 1357 evlist__for_each_entry_reverse(evlist, evsel) 1358 perf_evsel__close(evsel); 1359 } 1360 1361 static int perf_evlist__create_syswide_maps(struct evlist *evlist) 1362 { 1363 struct perf_cpu_map *cpus; 1364 struct perf_thread_map *threads; 1365 int err = -ENOMEM; 1366 1367 /* 1368 * Try reading /sys/devices/system/cpu/online to get 1369 * an all cpus map. 1370 * 1371 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1372 * code needs an overhaul to properly forward the 1373 * error, and we may not want to do that fallback to a 1374 * default cpu identity map :-\ 1375 */ 1376 cpus = perf_cpu_map__new(NULL); 1377 if (!cpus) 1378 goto out; 1379 1380 threads = perf_thread_map__new_dummy(); 1381 if (!threads) 1382 goto out_put; 1383 1384 perf_evlist__set_maps(evlist, cpus, threads); 1385 out: 1386 return err; 1387 out_put: 1388 perf_cpu_map__put(cpus); 1389 goto out; 1390 } 1391 1392 int evlist__open(struct evlist *evlist) 1393 { 1394 struct evsel *evsel; 1395 int err; 1396 1397 /* 1398 * Default: one fd per CPU, all threads, aka systemwide 1399 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1400 */ 1401 if (evlist->threads == NULL && evlist->cpus == NULL) { 1402 err = perf_evlist__create_syswide_maps(evlist); 1403 if (err < 0) 1404 goto out_err; 1405 } 1406 1407 perf_evlist__update_id_pos(evlist); 1408 1409 evlist__for_each_entry(evlist, evsel) { 1410 err = evsel__open(evsel, evsel->core.cpus, evsel->threads); 1411 if (err < 0) 1412 goto out_err; 1413 } 1414 1415 return 0; 1416 out_err: 1417 evlist__close(evlist); 1418 errno = -err; 1419 return err; 1420 } 1421 1422 int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, 1423 const char *argv[], bool pipe_output, 1424 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1425 { 1426 int child_ready_pipe[2], go_pipe[2]; 1427 char bf; 1428 1429 if (pipe(child_ready_pipe) < 0) { 1430 perror("failed to create 'ready' pipe"); 1431 return -1; 1432 } 1433 1434 if (pipe(go_pipe) < 0) { 1435 perror("failed to create 'go' pipe"); 1436 goto out_close_ready_pipe; 1437 } 1438 1439 evlist->workload.pid = fork(); 1440 if (evlist->workload.pid < 0) { 1441 perror("failed to fork"); 1442 goto out_close_pipes; 1443 } 1444 1445 if (!evlist->workload.pid) { 1446 int ret; 1447 1448 if (pipe_output) 1449 dup2(2, 1); 1450 1451 signal(SIGTERM, SIG_DFL); 1452 1453 close(child_ready_pipe[0]); 1454 close(go_pipe[1]); 1455 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1456 1457 /* 1458 * Tell the parent we're ready to go 1459 */ 1460 close(child_ready_pipe[1]); 1461 1462 /* 1463 * Wait until the parent tells us to go. 1464 */ 1465 ret = read(go_pipe[0], &bf, 1); 1466 /* 1467 * The parent will ask for the execvp() to be performed by 1468 * writing exactly one byte, in workload.cork_fd, usually via 1469 * perf_evlist__start_workload(). 1470 * 1471 * For cancelling the workload without actually running it, 1472 * the parent will just close workload.cork_fd, without writing 1473 * anything, i.e. read will return zero and we just exit() 1474 * here. 1475 */ 1476 if (ret != 1) { 1477 if (ret == -1) 1478 perror("unable to read pipe"); 1479 exit(ret); 1480 } 1481 1482 execvp(argv[0], (char **)argv); 1483 1484 if (exec_error) { 1485 union sigval val; 1486 1487 val.sival_int = errno; 1488 if (sigqueue(getppid(), SIGUSR1, val)) 1489 perror(argv[0]); 1490 } else 1491 perror(argv[0]); 1492 exit(-1); 1493 } 1494 1495 if (exec_error) { 1496 struct sigaction act = { 1497 .sa_flags = SA_SIGINFO, 1498 .sa_sigaction = exec_error, 1499 }; 1500 sigaction(SIGUSR1, &act, NULL); 1501 } 1502 1503 if (target__none(target)) { 1504 if (evlist->threads == NULL) { 1505 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1506 __func__, __LINE__); 1507 goto out_close_pipes; 1508 } 1509 perf_thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1510 } 1511 1512 close(child_ready_pipe[1]); 1513 close(go_pipe[0]); 1514 /* 1515 * wait for child to settle 1516 */ 1517 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1518 perror("unable to read pipe"); 1519 goto out_close_pipes; 1520 } 1521 1522 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1523 evlist->workload.cork_fd = go_pipe[1]; 1524 close(child_ready_pipe[0]); 1525 return 0; 1526 1527 out_close_pipes: 1528 close(go_pipe[0]); 1529 close(go_pipe[1]); 1530 out_close_ready_pipe: 1531 close(child_ready_pipe[0]); 1532 close(child_ready_pipe[1]); 1533 return -1; 1534 } 1535 1536 int perf_evlist__start_workload(struct evlist *evlist) 1537 { 1538 if (evlist->workload.cork_fd > 0) { 1539 char bf = 0; 1540 int ret; 1541 /* 1542 * Remove the cork, let it rip! 1543 */ 1544 ret = write(evlist->workload.cork_fd, &bf, 1); 1545 if (ret < 0) 1546 perror("unable to write to pipe"); 1547 1548 close(evlist->workload.cork_fd); 1549 return ret; 1550 } 1551 1552 return 0; 1553 } 1554 1555 int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, 1556 struct perf_sample *sample) 1557 { 1558 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1559 1560 if (!evsel) 1561 return -EFAULT; 1562 return perf_evsel__parse_sample(evsel, event, sample); 1563 } 1564 1565 int perf_evlist__parse_sample_timestamp(struct evlist *evlist, 1566 union perf_event *event, 1567 u64 *timestamp) 1568 { 1569 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1570 1571 if (!evsel) 1572 return -EFAULT; 1573 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1574 } 1575 1576 size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp) 1577 { 1578 struct evsel *evsel; 1579 size_t printed = 0; 1580 1581 evlist__for_each_entry(evlist, evsel) { 1582 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1583 perf_evsel__name(evsel)); 1584 } 1585 1586 return printed + fprintf(fp, "\n"); 1587 } 1588 1589 int perf_evlist__strerror_open(struct evlist *evlist, 1590 int err, char *buf, size_t size) 1591 { 1592 int printed, value; 1593 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1594 1595 switch (err) { 1596 case EACCES: 1597 case EPERM: 1598 printed = scnprintf(buf, size, 1599 "Error:\t%s.\n" 1600 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1601 1602 value = perf_event_paranoid(); 1603 1604 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1605 1606 if (value >= 2) { 1607 printed += scnprintf(buf + printed, size - printed, 1608 "For your workloads it needs to be <= 1\nHint:\t"); 1609 } 1610 printed += scnprintf(buf + printed, size - printed, 1611 "For system wide tracing it needs to be set to -1.\n"); 1612 1613 printed += scnprintf(buf + printed, size - printed, 1614 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1615 "Hint:\tThe current value is %d.", value); 1616 break; 1617 case EINVAL: { 1618 struct evsel *first = perf_evlist__first(evlist); 1619 int max_freq; 1620 1621 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1622 goto out_default; 1623 1624 if (first->core.attr.sample_freq < (u64)max_freq) 1625 goto out_default; 1626 1627 printed = scnprintf(buf, size, 1628 "Error:\t%s.\n" 1629 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1630 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1631 emsg, max_freq, first->core.attr.sample_freq); 1632 break; 1633 } 1634 default: 1635 out_default: 1636 scnprintf(buf, size, "%s", emsg); 1637 break; 1638 } 1639 1640 return 0; 1641 } 1642 1643 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) 1644 { 1645 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1646 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1647 1648 switch (err) { 1649 case EPERM: 1650 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1651 printed += scnprintf(buf + printed, size - printed, 1652 "Error:\t%s.\n" 1653 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1654 "Hint:\tTried using %zd kB.\n", 1655 emsg, pages_max_per_user, pages_attempted); 1656 1657 if (pages_attempted >= pages_max_per_user) { 1658 printed += scnprintf(buf + printed, size - printed, 1659 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1660 pages_max_per_user + pages_attempted); 1661 } 1662 1663 printed += scnprintf(buf + printed, size - printed, 1664 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1665 break; 1666 default: 1667 scnprintf(buf, size, "%s", emsg); 1668 break; 1669 } 1670 1671 return 0; 1672 } 1673 1674 void perf_evlist__to_front(struct evlist *evlist, 1675 struct evsel *move_evsel) 1676 { 1677 struct evsel *evsel, *n; 1678 LIST_HEAD(move); 1679 1680 if (move_evsel == perf_evlist__first(evlist)) 1681 return; 1682 1683 evlist__for_each_entry_safe(evlist, n, evsel) { 1684 if (evsel->leader == move_evsel->leader) 1685 list_move_tail(&evsel->core.node, &move); 1686 } 1687 1688 list_splice(&move, &evlist->core.entries); 1689 } 1690 1691 void perf_evlist__set_tracking_event(struct evlist *evlist, 1692 struct evsel *tracking_evsel) 1693 { 1694 struct evsel *evsel; 1695 1696 if (tracking_evsel->tracking) 1697 return; 1698 1699 evlist__for_each_entry(evlist, evsel) { 1700 if (evsel != tracking_evsel) 1701 evsel->tracking = false; 1702 } 1703 1704 tracking_evsel->tracking = true; 1705 } 1706 1707 struct evsel * 1708 perf_evlist__find_evsel_by_str(struct evlist *evlist, 1709 const char *str) 1710 { 1711 struct evsel *evsel; 1712 1713 evlist__for_each_entry(evlist, evsel) { 1714 if (!evsel->name) 1715 continue; 1716 if (strcmp(str, evsel->name) == 0) 1717 return evsel; 1718 } 1719 1720 return NULL; 1721 } 1722 1723 void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, 1724 enum bkw_mmap_state state) 1725 { 1726 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1727 enum action { 1728 NONE, 1729 PAUSE, 1730 RESUME, 1731 } action = NONE; 1732 1733 if (!evlist->overwrite_mmap) 1734 return; 1735 1736 switch (old_state) { 1737 case BKW_MMAP_NOTREADY: { 1738 if (state != BKW_MMAP_RUNNING) 1739 goto state_err; 1740 break; 1741 } 1742 case BKW_MMAP_RUNNING: { 1743 if (state != BKW_MMAP_DATA_PENDING) 1744 goto state_err; 1745 action = PAUSE; 1746 break; 1747 } 1748 case BKW_MMAP_DATA_PENDING: { 1749 if (state != BKW_MMAP_EMPTY) 1750 goto state_err; 1751 break; 1752 } 1753 case BKW_MMAP_EMPTY: { 1754 if (state != BKW_MMAP_RUNNING) 1755 goto state_err; 1756 action = RESUME; 1757 break; 1758 } 1759 default: 1760 WARN_ONCE(1, "Shouldn't get there\n"); 1761 } 1762 1763 evlist->bkw_mmap_state = state; 1764 1765 switch (action) { 1766 case PAUSE: 1767 perf_evlist__pause(evlist); 1768 break; 1769 case RESUME: 1770 perf_evlist__resume(evlist); 1771 break; 1772 case NONE: 1773 default: 1774 break; 1775 } 1776 1777 state_err: 1778 return; 1779 } 1780 1781 bool perf_evlist__exclude_kernel(struct evlist *evlist) 1782 { 1783 struct evsel *evsel; 1784 1785 evlist__for_each_entry(evlist, evsel) { 1786 if (!evsel->core.attr.exclude_kernel) 1787 return false; 1788 } 1789 1790 return true; 1791 } 1792 1793 /* 1794 * Events in data file are not collect in groups, but we still want 1795 * the group display. Set the artificial group and set the leader's 1796 * forced_leader flag to notify the display code. 1797 */ 1798 void perf_evlist__force_leader(struct evlist *evlist) 1799 { 1800 if (!evlist->nr_groups) { 1801 struct evsel *leader = perf_evlist__first(evlist); 1802 1803 perf_evlist__set_leader(evlist); 1804 leader->forced_leader = true; 1805 } 1806 } 1807 1808 struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, 1809 struct evsel *evsel) 1810 { 1811 struct evsel *c2, *leader; 1812 bool is_open = true; 1813 1814 leader = evsel->leader; 1815 pr_debug("Weak group for %s/%d failed\n", 1816 leader->name, leader->nr_members); 1817 1818 /* 1819 * for_each_group_member doesn't work here because it doesn't 1820 * include the first entry. 1821 */ 1822 evlist__for_each_entry(evsel_list, c2) { 1823 if (c2 == evsel) 1824 is_open = false; 1825 if (c2->leader == leader) { 1826 if (is_open) 1827 perf_evsel__close(c2); 1828 c2->leader = c2; 1829 c2->nr_members = 0; 1830 } 1831 } 1832 return leader; 1833 } 1834 1835 int perf_evlist__add_sb_event(struct evlist **evlist, 1836 struct perf_event_attr *attr, 1837 perf_evsel__sb_cb_t cb, 1838 void *data) 1839 { 1840 struct evsel *evsel; 1841 bool new_evlist = (*evlist) == NULL; 1842 1843 if (*evlist == NULL) 1844 *evlist = evlist__new(); 1845 if (*evlist == NULL) 1846 return -1; 1847 1848 if (!attr->sample_id_all) { 1849 pr_warning("enabling sample_id_all for all side band events\n"); 1850 attr->sample_id_all = 1; 1851 } 1852 1853 evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); 1854 if (!evsel) 1855 goto out_err; 1856 1857 evsel->side_band.cb = cb; 1858 evsel->side_band.data = data; 1859 evlist__add(*evlist, evsel); 1860 return 0; 1861 1862 out_err: 1863 if (new_evlist) { 1864 evlist__delete(*evlist); 1865 *evlist = NULL; 1866 } 1867 return -1; 1868 } 1869 1870 static void *perf_evlist__poll_thread(void *arg) 1871 { 1872 struct evlist *evlist = arg; 1873 bool draining = false; 1874 int i, done = 0; 1875 1876 while (!done) { 1877 bool got_data = false; 1878 1879 if (evlist->thread.done) 1880 draining = true; 1881 1882 if (!draining) 1883 perf_evlist__poll(evlist, 1000); 1884 1885 for (i = 0; i < evlist->nr_mmaps; i++) { 1886 struct perf_mmap *map = &evlist->mmap[i]; 1887 union perf_event *event; 1888 1889 if (perf_mmap__read_init(map)) 1890 continue; 1891 while ((event = perf_mmap__read_event(map)) != NULL) { 1892 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1893 1894 if (evsel && evsel->side_band.cb) 1895 evsel->side_band.cb(event, evsel->side_band.data); 1896 else 1897 pr_warning("cannot locate proper evsel for the side band event\n"); 1898 1899 perf_mmap__consume(map); 1900 got_data = true; 1901 } 1902 perf_mmap__read_done(map); 1903 } 1904 1905 if (draining && !got_data) 1906 break; 1907 } 1908 return NULL; 1909 } 1910 1911 int perf_evlist__start_sb_thread(struct evlist *evlist, 1912 struct target *target) 1913 { 1914 struct evsel *counter; 1915 1916 if (!evlist) 1917 return 0; 1918 1919 if (perf_evlist__create_maps(evlist, target)) 1920 goto out_delete_evlist; 1921 1922 evlist__for_each_entry(evlist, counter) { 1923 if (evsel__open(counter, evlist->cpus, 1924 evlist->threads) < 0) 1925 goto out_delete_evlist; 1926 } 1927 1928 if (perf_evlist__mmap(evlist, UINT_MAX)) 1929 goto out_delete_evlist; 1930 1931 evlist__for_each_entry(evlist, counter) { 1932 if (evsel__enable(counter)) 1933 goto out_delete_evlist; 1934 } 1935 1936 evlist->thread.done = 0; 1937 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1938 goto out_delete_evlist; 1939 1940 return 0; 1941 1942 out_delete_evlist: 1943 evlist__delete(evlist); 1944 evlist = NULL; 1945 return -1; 1946 } 1947 1948 void perf_evlist__stop_sb_thread(struct evlist *evlist) 1949 { 1950 if (!evlist) 1951 return; 1952 evlist->thread.done = 1; 1953 pthread_join(evlist->thread.th, NULL); 1954 evlist__delete(evlist); 1955 } 1956