1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 4 * 5 * Parts came from builtin-{top,stat,record}.c, see those files for further 6 * copyright notes. 7 */ 8 #include <api/fs/fs.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "units.h" 19 #include "asm/bug.h" 20 #include "bpf-event.h" 21 #include <signal.h> 22 #include <unistd.h> 23 24 #include "parse-events.h" 25 #include <subcmd/parse-options.h> 26 27 #include <fcntl.h> 28 #include <sys/ioctl.h> 29 #include <sys/mman.h> 30 31 #include <linux/bitops.h> 32 #include <linux/hash.h> 33 #include <linux/log2.h> 34 #include <linux/err.h> 35 #include <linux/zalloc.h> 36 37 #ifdef LACKS_SIGQUEUE_PROTOTYPE 38 int sigqueue(pid_t pid, int sig, const union sigval value); 39 #endif 40 41 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 42 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 43 44 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 45 struct thread_map *threads) 46 { 47 int i; 48 49 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 50 INIT_HLIST_HEAD(&evlist->heads[i]); 51 INIT_LIST_HEAD(&evlist->entries); 52 perf_evlist__set_maps(evlist, cpus, threads); 53 fdarray__init(&evlist->pollfd, 64); 54 evlist->workload.pid = -1; 55 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 56 } 57 58 struct perf_evlist *perf_evlist__new(void) 59 { 60 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 61 62 if (evlist != NULL) 63 perf_evlist__init(evlist, NULL, NULL); 64 65 return evlist; 66 } 67 68 struct perf_evlist *perf_evlist__new_default(void) 69 { 70 struct perf_evlist *evlist = perf_evlist__new(); 71 72 if (evlist && perf_evlist__add_default(evlist)) { 73 perf_evlist__delete(evlist); 74 evlist = NULL; 75 } 76 77 return evlist; 78 } 79 80 struct perf_evlist *perf_evlist__new_dummy(void) 81 { 82 struct perf_evlist *evlist = perf_evlist__new(); 83 84 if (evlist && perf_evlist__add_dummy(evlist)) { 85 perf_evlist__delete(evlist); 86 evlist = NULL; 87 } 88 89 return evlist; 90 } 91 92 /** 93 * perf_evlist__set_id_pos - set the positions of event ids. 94 * @evlist: selected event list 95 * 96 * Events with compatible sample types all have the same id_pos 97 * and is_pos. For convenience, put a copy on evlist. 98 */ 99 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 100 { 101 struct perf_evsel *first = perf_evlist__first(evlist); 102 103 evlist->id_pos = first->id_pos; 104 evlist->is_pos = first->is_pos; 105 } 106 107 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 108 { 109 struct perf_evsel *evsel; 110 111 evlist__for_each_entry(evlist, evsel) 112 perf_evsel__calc_id_pos(evsel); 113 114 perf_evlist__set_id_pos(evlist); 115 } 116 117 static void perf_evlist__purge(struct perf_evlist *evlist) 118 { 119 struct perf_evsel *pos, *n; 120 121 evlist__for_each_entry_safe(evlist, n, pos) { 122 list_del_init(&pos->node); 123 pos->evlist = NULL; 124 perf_evsel__delete(pos); 125 } 126 127 evlist->nr_entries = 0; 128 } 129 130 void perf_evlist__exit(struct perf_evlist *evlist) 131 { 132 zfree(&evlist->mmap); 133 zfree(&evlist->overwrite_mmap); 134 fdarray__exit(&evlist->pollfd); 135 } 136 137 void perf_evlist__delete(struct perf_evlist *evlist) 138 { 139 if (evlist == NULL) 140 return; 141 142 perf_evlist__munmap(evlist); 143 perf_evlist__close(evlist); 144 cpu_map__put(evlist->cpus); 145 thread_map__put(evlist->threads); 146 evlist->cpus = NULL; 147 evlist->threads = NULL; 148 perf_evlist__purge(evlist); 149 perf_evlist__exit(evlist); 150 free(evlist); 151 } 152 153 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 154 struct perf_evsel *evsel) 155 { 156 /* 157 * We already have cpus for evsel (via PMU sysfs) so 158 * keep it, if there's no target cpu list defined. 159 */ 160 if (!evsel->own_cpus || evlist->has_user_cpus) { 161 cpu_map__put(evsel->cpus); 162 evsel->cpus = cpu_map__get(evlist->cpus); 163 } else if (evsel->cpus != evsel->own_cpus) { 164 cpu_map__put(evsel->cpus); 165 evsel->cpus = cpu_map__get(evsel->own_cpus); 166 } 167 168 thread_map__put(evsel->threads); 169 evsel->threads = thread_map__get(evlist->threads); 170 } 171 172 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 173 { 174 struct perf_evsel *evsel; 175 176 evlist__for_each_entry(evlist, evsel) 177 __perf_evlist__propagate_maps(evlist, evsel); 178 } 179 180 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 181 { 182 entry->evlist = evlist; 183 list_add_tail(&entry->node, &evlist->entries); 184 entry->idx = evlist->nr_entries; 185 entry->tracking = !entry->idx; 186 187 if (!evlist->nr_entries++) 188 perf_evlist__set_id_pos(evlist); 189 190 __perf_evlist__propagate_maps(evlist, entry); 191 } 192 193 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 194 { 195 evsel->evlist = NULL; 196 list_del_init(&evsel->node); 197 evlist->nr_entries -= 1; 198 } 199 200 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 201 struct list_head *list) 202 { 203 struct perf_evsel *evsel, *temp; 204 205 __evlist__for_each_entry_safe(list, temp, evsel) { 206 list_del_init(&evsel->node); 207 perf_evlist__add(evlist, evsel); 208 } 209 } 210 211 void __perf_evlist__set_leader(struct list_head *list) 212 { 213 struct perf_evsel *evsel, *leader; 214 215 leader = list_entry(list->next, struct perf_evsel, node); 216 evsel = list_entry(list->prev, struct perf_evsel, node); 217 218 leader->nr_members = evsel->idx - leader->idx + 1; 219 220 __evlist__for_each_entry(list, evsel) { 221 evsel->leader = leader; 222 } 223 } 224 225 void perf_evlist__set_leader(struct perf_evlist *evlist) 226 { 227 if (evlist->nr_entries) { 228 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 229 __perf_evlist__set_leader(&evlist->entries); 230 } 231 } 232 233 int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) 234 { 235 struct perf_evsel *evsel = perf_evsel__new_cycles(precise); 236 237 if (evsel == NULL) 238 return -ENOMEM; 239 240 perf_evlist__add(evlist, evsel); 241 return 0; 242 } 243 244 int perf_evlist__add_dummy(struct perf_evlist *evlist) 245 { 246 struct perf_event_attr attr = { 247 .type = PERF_TYPE_SOFTWARE, 248 .config = PERF_COUNT_SW_DUMMY, 249 .size = sizeof(attr), /* to capture ABI version */ 250 }; 251 struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); 252 253 if (evsel == NULL) 254 return -ENOMEM; 255 256 perf_evlist__add(evlist, evsel); 257 return 0; 258 } 259 260 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 261 struct perf_event_attr *attrs, size_t nr_attrs) 262 { 263 struct perf_evsel *evsel, *n; 264 LIST_HEAD(head); 265 size_t i; 266 267 for (i = 0; i < nr_attrs; i++) { 268 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 269 if (evsel == NULL) 270 goto out_delete_partial_list; 271 list_add_tail(&evsel->node, &head); 272 } 273 274 perf_evlist__splice_list_tail(evlist, &head); 275 276 return 0; 277 278 out_delete_partial_list: 279 __evlist__for_each_entry_safe(&head, n, evsel) 280 perf_evsel__delete(evsel); 281 return -1; 282 } 283 284 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 285 struct perf_event_attr *attrs, size_t nr_attrs) 286 { 287 size_t i; 288 289 for (i = 0; i < nr_attrs; i++) 290 event_attr_init(attrs + i); 291 292 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 293 } 294 295 struct perf_evsel * 296 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 297 { 298 struct perf_evsel *evsel; 299 300 evlist__for_each_entry(evlist, evsel) { 301 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 302 (int)evsel->attr.config == id) 303 return evsel; 304 } 305 306 return NULL; 307 } 308 309 struct perf_evsel * 310 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 311 const char *name) 312 { 313 struct perf_evsel *evsel; 314 315 evlist__for_each_entry(evlist, evsel) { 316 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 317 (strcmp(evsel->name, name) == 0)) 318 return evsel; 319 } 320 321 return NULL; 322 } 323 324 int perf_evlist__add_newtp(struct perf_evlist *evlist, 325 const char *sys, const char *name, void *handler) 326 { 327 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 328 329 if (IS_ERR(evsel)) 330 return -1; 331 332 evsel->handler = handler; 333 perf_evlist__add(evlist, evsel); 334 return 0; 335 } 336 337 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 338 struct perf_evsel *evsel) 339 { 340 if (evsel->system_wide) 341 return 1; 342 else 343 return thread_map__nr(evlist->threads); 344 } 345 346 void perf_evlist__disable(struct perf_evlist *evlist) 347 { 348 struct perf_evsel *pos; 349 350 evlist__for_each_entry(evlist, pos) { 351 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) 352 continue; 353 perf_evsel__disable(pos); 354 } 355 356 evlist->enabled = false; 357 } 358 359 void perf_evlist__enable(struct perf_evlist *evlist) 360 { 361 struct perf_evsel *pos; 362 363 evlist__for_each_entry(evlist, pos) { 364 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 365 continue; 366 perf_evsel__enable(pos); 367 } 368 369 evlist->enabled = true; 370 } 371 372 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 373 { 374 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 375 } 376 377 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 378 struct perf_evsel *evsel, int cpu) 379 { 380 int thread; 381 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 382 383 if (!evsel->fd) 384 return -EINVAL; 385 386 for (thread = 0; thread < nr_threads; thread++) { 387 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 388 if (err) 389 return err; 390 } 391 return 0; 392 } 393 394 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 395 struct perf_evsel *evsel, 396 int thread) 397 { 398 int cpu; 399 int nr_cpus = cpu_map__nr(evlist->cpus); 400 401 if (!evsel->fd) 402 return -EINVAL; 403 404 for (cpu = 0; cpu < nr_cpus; cpu++) { 405 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 406 if (err) 407 return err; 408 } 409 return 0; 410 } 411 412 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 413 struct perf_evsel *evsel, int idx) 414 { 415 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 416 417 if (per_cpu_mmaps) 418 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 419 else 420 return perf_evlist__enable_event_thread(evlist, evsel, idx); 421 } 422 423 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 424 { 425 int nr_cpus = cpu_map__nr(evlist->cpus); 426 int nr_threads = thread_map__nr(evlist->threads); 427 int nfds = 0; 428 struct perf_evsel *evsel; 429 430 evlist__for_each_entry(evlist, evsel) { 431 if (evsel->system_wide) 432 nfds += nr_cpus; 433 else 434 nfds += nr_cpus * nr_threads; 435 } 436 437 if (fdarray__available_entries(&evlist->pollfd) < nfds && 438 fdarray__grow(&evlist->pollfd, nfds) < 0) 439 return -ENOMEM; 440 441 return 0; 442 } 443 444 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 445 struct perf_mmap *map, short revent) 446 { 447 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 448 /* 449 * Save the idx so that when we filter out fds POLLHUP'ed we can 450 * close the associated evlist->mmap[] entry. 451 */ 452 if (pos >= 0) { 453 evlist->pollfd.priv[pos].ptr = map; 454 455 fcntl(fd, F_SETFL, O_NONBLOCK); 456 } 457 458 return pos; 459 } 460 461 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 462 { 463 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 464 } 465 466 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 467 void *arg __maybe_unused) 468 { 469 struct perf_mmap *map = fda->priv[fd].ptr; 470 471 if (map) 472 perf_mmap__put(map); 473 } 474 475 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 476 { 477 return fdarray__filter(&evlist->pollfd, revents_and_mask, 478 perf_evlist__munmap_filtered, NULL); 479 } 480 481 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 482 { 483 return fdarray__poll(&evlist->pollfd, timeout); 484 } 485 486 static void perf_evlist__id_hash(struct perf_evlist *evlist, 487 struct perf_evsel *evsel, 488 int cpu, int thread, u64 id) 489 { 490 int hash; 491 struct perf_sample_id *sid = SID(evsel, cpu, thread); 492 493 sid->id = id; 494 sid->evsel = evsel; 495 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 496 hlist_add_head(&sid->node, &evlist->heads[hash]); 497 } 498 499 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 500 int cpu, int thread, u64 id) 501 { 502 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 503 evsel->id[evsel->ids++] = id; 504 } 505 506 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 507 struct perf_evsel *evsel, 508 int cpu, int thread, int fd) 509 { 510 u64 read_data[4] = { 0, }; 511 int id_idx = 1; /* The first entry is the counter value */ 512 u64 id; 513 int ret; 514 515 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 516 if (!ret) 517 goto add; 518 519 if (errno != ENOTTY) 520 return -1; 521 522 /* Legacy way to get event id.. All hail to old kernels! */ 523 524 /* 525 * This way does not work with group format read, so bail 526 * out in that case. 527 */ 528 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 529 return -1; 530 531 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 532 read(fd, &read_data, sizeof(read_data)) == -1) 533 return -1; 534 535 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 536 ++id_idx; 537 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 538 ++id_idx; 539 540 id = read_data[id_idx]; 541 542 add: 543 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 544 return 0; 545 } 546 547 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 548 struct perf_evsel *evsel, int idx, int cpu, 549 int thread) 550 { 551 struct perf_sample_id *sid = SID(evsel, cpu, thread); 552 sid->idx = idx; 553 if (evlist->cpus && cpu >= 0) 554 sid->cpu = evlist->cpus->map[cpu]; 555 else 556 sid->cpu = -1; 557 if (!evsel->system_wide && evlist->threads && thread >= 0) 558 sid->tid = thread_map__pid(evlist->threads, thread); 559 else 560 sid->tid = -1; 561 } 562 563 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 564 { 565 struct hlist_head *head; 566 struct perf_sample_id *sid; 567 int hash; 568 569 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 570 head = &evlist->heads[hash]; 571 572 hlist_for_each_entry(sid, head, node) 573 if (sid->id == id) 574 return sid; 575 576 return NULL; 577 } 578 579 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 580 { 581 struct perf_sample_id *sid; 582 583 if (evlist->nr_entries == 1 || !id) 584 return perf_evlist__first(evlist); 585 586 sid = perf_evlist__id2sid(evlist, id); 587 if (sid) 588 return sid->evsel; 589 590 if (!perf_evlist__sample_id_all(evlist)) 591 return perf_evlist__first(evlist); 592 593 return NULL; 594 } 595 596 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 597 u64 id) 598 { 599 struct perf_sample_id *sid; 600 601 if (!id) 602 return NULL; 603 604 sid = perf_evlist__id2sid(evlist, id); 605 if (sid) 606 return sid->evsel; 607 608 return NULL; 609 } 610 611 static int perf_evlist__event2id(struct perf_evlist *evlist, 612 union perf_event *event, u64 *id) 613 { 614 const u64 *array = event->sample.array; 615 ssize_t n; 616 617 n = (event->header.size - sizeof(event->header)) >> 3; 618 619 if (event->header.type == PERF_RECORD_SAMPLE) { 620 if (evlist->id_pos >= n) 621 return -1; 622 *id = array[evlist->id_pos]; 623 } else { 624 if (evlist->is_pos > n) 625 return -1; 626 n -= evlist->is_pos; 627 *id = array[n]; 628 } 629 return 0; 630 } 631 632 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 633 union perf_event *event) 634 { 635 struct perf_evsel *first = perf_evlist__first(evlist); 636 struct hlist_head *head; 637 struct perf_sample_id *sid; 638 int hash; 639 u64 id; 640 641 if (evlist->nr_entries == 1) 642 return first; 643 644 if (!first->attr.sample_id_all && 645 event->header.type != PERF_RECORD_SAMPLE) 646 return first; 647 648 if (perf_evlist__event2id(evlist, event, &id)) 649 return NULL; 650 651 /* Synthesized events have an id of zero */ 652 if (!id) 653 return first; 654 655 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 656 head = &evlist->heads[hash]; 657 658 hlist_for_each_entry(sid, head, node) { 659 if (sid->id == id) 660 return sid->evsel; 661 } 662 return NULL; 663 } 664 665 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 666 { 667 int i; 668 669 if (!evlist->overwrite_mmap) 670 return 0; 671 672 for (i = 0; i < evlist->nr_mmaps; i++) { 673 int fd = evlist->overwrite_mmap[i].fd; 674 int err; 675 676 if (fd < 0) 677 continue; 678 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 679 if (err) 680 return err; 681 } 682 return 0; 683 } 684 685 static int perf_evlist__pause(struct perf_evlist *evlist) 686 { 687 return perf_evlist__set_paused(evlist, true); 688 } 689 690 static int perf_evlist__resume(struct perf_evlist *evlist) 691 { 692 return perf_evlist__set_paused(evlist, false); 693 } 694 695 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 696 { 697 int i; 698 699 if (evlist->mmap) 700 for (i = 0; i < evlist->nr_mmaps; i++) 701 perf_mmap__munmap(&evlist->mmap[i]); 702 703 if (evlist->overwrite_mmap) 704 for (i = 0; i < evlist->nr_mmaps; i++) 705 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 706 } 707 708 void perf_evlist__munmap(struct perf_evlist *evlist) 709 { 710 perf_evlist__munmap_nofree(evlist); 711 zfree(&evlist->mmap); 712 zfree(&evlist->overwrite_mmap); 713 } 714 715 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist, 716 bool overwrite) 717 { 718 int i; 719 struct perf_mmap *map; 720 721 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 722 if (cpu_map__empty(evlist->cpus)) 723 evlist->nr_mmaps = thread_map__nr(evlist->threads); 724 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 725 if (!map) 726 return NULL; 727 728 for (i = 0; i < evlist->nr_mmaps; i++) { 729 map[i].fd = -1; 730 map[i].overwrite = overwrite; 731 /* 732 * When the perf_mmap() call is made we grab one refcount, plus 733 * one extra to let perf_mmap__consume() get the last 734 * events after all real references (perf_mmap__get()) are 735 * dropped. 736 * 737 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 738 * thus does perf_mmap__get() on it. 739 */ 740 refcount_set(&map[i].refcnt, 0); 741 } 742 return map; 743 } 744 745 static bool 746 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 747 struct perf_evsel *evsel) 748 { 749 if (evsel->attr.write_backward) 750 return false; 751 return true; 752 } 753 754 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 755 struct mmap_params *mp, int cpu_idx, 756 int thread, int *_output, int *_output_overwrite) 757 { 758 struct perf_evsel *evsel; 759 int revent; 760 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 761 762 evlist__for_each_entry(evlist, evsel) { 763 struct perf_mmap *maps = evlist->mmap; 764 int *output = _output; 765 int fd; 766 int cpu; 767 768 mp->prot = PROT_READ | PROT_WRITE; 769 if (evsel->attr.write_backward) { 770 output = _output_overwrite; 771 maps = evlist->overwrite_mmap; 772 773 if (!maps) { 774 maps = perf_evlist__alloc_mmap(evlist, true); 775 if (!maps) 776 return -1; 777 evlist->overwrite_mmap = maps; 778 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 779 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 780 } 781 mp->prot &= ~PROT_WRITE; 782 } 783 784 if (evsel->system_wide && thread) 785 continue; 786 787 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 788 if (cpu == -1) 789 continue; 790 791 fd = FD(evsel, cpu, thread); 792 793 if (*output == -1) { 794 *output = fd; 795 796 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 797 return -1; 798 } else { 799 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 800 return -1; 801 802 perf_mmap__get(&maps[idx]); 803 } 804 805 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 806 807 /* 808 * The system_wide flag causes a selected event to be opened 809 * always without a pid. Consequently it will never get a 810 * POLLHUP, but it is used for tracking in combination with 811 * other events, so it should not need to be polled anyway. 812 * Therefore don't add it for polling. 813 */ 814 if (!evsel->system_wide && 815 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 816 perf_mmap__put(&maps[idx]); 817 return -1; 818 } 819 820 if (evsel->attr.read_format & PERF_FORMAT_ID) { 821 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 822 fd) < 0) 823 return -1; 824 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 825 thread); 826 } 827 } 828 829 return 0; 830 } 831 832 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 833 struct mmap_params *mp) 834 { 835 int cpu, thread; 836 int nr_cpus = cpu_map__nr(evlist->cpus); 837 int nr_threads = thread_map__nr(evlist->threads); 838 839 pr_debug2("perf event ring buffer mmapped per cpu\n"); 840 for (cpu = 0; cpu < nr_cpus; cpu++) { 841 int output = -1; 842 int output_overwrite = -1; 843 844 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 845 true); 846 847 for (thread = 0; thread < nr_threads; thread++) { 848 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 849 thread, &output, &output_overwrite)) 850 goto out_unmap; 851 } 852 } 853 854 return 0; 855 856 out_unmap: 857 perf_evlist__munmap_nofree(evlist); 858 return -1; 859 } 860 861 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 862 struct mmap_params *mp) 863 { 864 int thread; 865 int nr_threads = thread_map__nr(evlist->threads); 866 867 pr_debug2("perf event ring buffer mmapped per thread\n"); 868 for (thread = 0; thread < nr_threads; thread++) { 869 int output = -1; 870 int output_overwrite = -1; 871 872 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 873 false); 874 875 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 876 &output, &output_overwrite)) 877 goto out_unmap; 878 } 879 880 return 0; 881 882 out_unmap: 883 perf_evlist__munmap_nofree(evlist); 884 return -1; 885 } 886 887 unsigned long perf_event_mlock_kb_in_pages(void) 888 { 889 unsigned long pages; 890 int max; 891 892 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 893 /* 894 * Pick a once upon a time good value, i.e. things look 895 * strange since we can't read a sysctl value, but lets not 896 * die yet... 897 */ 898 max = 512; 899 } else { 900 max -= (page_size / 1024); 901 } 902 903 pages = (max * 1024) / page_size; 904 if (!is_power_of_2(pages)) 905 pages = rounddown_pow_of_two(pages); 906 907 return pages; 908 } 909 910 size_t perf_evlist__mmap_size(unsigned long pages) 911 { 912 if (pages == UINT_MAX) 913 pages = perf_event_mlock_kb_in_pages(); 914 else if (!is_power_of_2(pages)) 915 return 0; 916 917 return (pages + 1) * page_size; 918 } 919 920 static long parse_pages_arg(const char *str, unsigned long min, 921 unsigned long max) 922 { 923 unsigned long pages, val; 924 static struct parse_tag tags[] = { 925 { .tag = 'B', .mult = 1 }, 926 { .tag = 'K', .mult = 1 << 10 }, 927 { .tag = 'M', .mult = 1 << 20 }, 928 { .tag = 'G', .mult = 1 << 30 }, 929 { .tag = 0 }, 930 }; 931 932 if (str == NULL) 933 return -EINVAL; 934 935 val = parse_tag_value(str, tags); 936 if (val != (unsigned long) -1) { 937 /* we got file size value */ 938 pages = PERF_ALIGN(val, page_size) / page_size; 939 } else { 940 /* we got pages count value */ 941 char *eptr; 942 pages = strtoul(str, &eptr, 10); 943 if (*eptr != '\0') 944 return -EINVAL; 945 } 946 947 if (pages == 0 && min == 0) { 948 /* leave number of pages at 0 */ 949 } else if (!is_power_of_2(pages)) { 950 char buf[100]; 951 952 /* round pages up to next power of 2 */ 953 pages = roundup_pow_of_two(pages); 954 if (!pages) 955 return -EINVAL; 956 957 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 958 pr_info("rounding mmap pages size to %s (%lu pages)\n", 959 buf, pages); 960 } 961 962 if (pages > max) 963 return -EINVAL; 964 965 return pages; 966 } 967 968 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 969 { 970 unsigned long max = UINT_MAX; 971 long pages; 972 973 if (max > SIZE_MAX / page_size) 974 max = SIZE_MAX / page_size; 975 976 pages = parse_pages_arg(str, 1, max); 977 if (pages < 0) { 978 pr_err("Invalid argument for --mmap_pages/-m\n"); 979 return -1; 980 } 981 982 *mmap_pages = pages; 983 return 0; 984 } 985 986 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 987 int unset __maybe_unused) 988 { 989 return __perf_evlist__parse_mmap_pages(opt->value, str); 990 } 991 992 /** 993 * perf_evlist__mmap_ex - Create mmaps to receive events. 994 * @evlist: list of events 995 * @pages: map length in pages 996 * @overwrite: overwrite older events? 997 * @auxtrace_pages - auxtrace map length in pages 998 * @auxtrace_overwrite - overwrite older auxtrace data? 999 * 1000 * If @overwrite is %false the user needs to signal event consumption using 1001 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1002 * automatically. 1003 * 1004 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1005 * consumption using auxtrace_mmap__write_tail(). 1006 * 1007 * Return: %0 on success, negative error code otherwise. 1008 */ 1009 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1010 unsigned int auxtrace_pages, 1011 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 1012 int comp_level) 1013 { 1014 struct perf_evsel *evsel; 1015 const struct cpu_map *cpus = evlist->cpus; 1016 const struct thread_map *threads = evlist->threads; 1017 /* 1018 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1019 * Its value is decided by evsel's write_backward. 1020 * So &mp should not be passed through const pointer. 1021 */ 1022 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 1023 .comp_level = comp_level }; 1024 1025 if (!evlist->mmap) 1026 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1027 if (!evlist->mmap) 1028 return -ENOMEM; 1029 1030 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1031 return -ENOMEM; 1032 1033 evlist->mmap_len = perf_evlist__mmap_size(pages); 1034 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1035 mp.mask = evlist->mmap_len - page_size - 1; 1036 1037 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1038 auxtrace_pages, auxtrace_overwrite); 1039 1040 evlist__for_each_entry(evlist, evsel) { 1041 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1042 evsel->sample_id == NULL && 1043 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1044 return -ENOMEM; 1045 } 1046 1047 if (cpu_map__empty(cpus)) 1048 return perf_evlist__mmap_per_thread(evlist, &mp); 1049 1050 return perf_evlist__mmap_per_cpu(evlist, &mp); 1051 } 1052 1053 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1054 { 1055 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); 1056 } 1057 1058 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1059 { 1060 bool all_threads = (target->per_thread && target->system_wide); 1061 struct cpu_map *cpus; 1062 struct thread_map *threads; 1063 1064 /* 1065 * If specify '-a' and '--per-thread' to perf record, perf record 1066 * will override '--per-thread'. target->per_thread = false and 1067 * target->system_wide = true. 1068 * 1069 * If specify '--per-thread' only to perf record, 1070 * target->per_thread = true and target->system_wide = false. 1071 * 1072 * So target->per_thread && target->system_wide is false. 1073 * For perf record, thread_map__new_str doesn't call 1074 * thread_map__new_all_cpus. That will keep perf record's 1075 * current behavior. 1076 * 1077 * For perf stat, it allows the case that target->per_thread and 1078 * target->system_wide are all true. It means to collect system-wide 1079 * per-thread data. thread_map__new_str will call 1080 * thread_map__new_all_cpus to enumerate all threads. 1081 */ 1082 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1083 all_threads); 1084 1085 if (!threads) 1086 return -1; 1087 1088 if (target__uses_dummy_map(target)) 1089 cpus = cpu_map__dummy_new(); 1090 else 1091 cpus = cpu_map__new(target->cpu_list); 1092 1093 if (!cpus) 1094 goto out_delete_threads; 1095 1096 evlist->has_user_cpus = !!target->cpu_list; 1097 1098 perf_evlist__set_maps(evlist, cpus, threads); 1099 1100 return 0; 1101 1102 out_delete_threads: 1103 thread_map__put(threads); 1104 return -1; 1105 } 1106 1107 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1108 struct thread_map *threads) 1109 { 1110 /* 1111 * Allow for the possibility that one or another of the maps isn't being 1112 * changed i.e. don't put it. Note we are assuming the maps that are 1113 * being applied are brand new and evlist is taking ownership of the 1114 * original reference count of 1. If that is not the case it is up to 1115 * the caller to increase the reference count. 1116 */ 1117 if (cpus != evlist->cpus) { 1118 cpu_map__put(evlist->cpus); 1119 evlist->cpus = cpu_map__get(cpus); 1120 } 1121 1122 if (threads != evlist->threads) { 1123 thread_map__put(evlist->threads); 1124 evlist->threads = thread_map__get(threads); 1125 } 1126 1127 perf_evlist__propagate_maps(evlist); 1128 } 1129 1130 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1131 enum perf_event_sample_format bit) 1132 { 1133 struct perf_evsel *evsel; 1134 1135 evlist__for_each_entry(evlist, evsel) 1136 __perf_evsel__set_sample_bit(evsel, bit); 1137 } 1138 1139 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1140 enum perf_event_sample_format bit) 1141 { 1142 struct perf_evsel *evsel; 1143 1144 evlist__for_each_entry(evlist, evsel) 1145 __perf_evsel__reset_sample_bit(evsel, bit); 1146 } 1147 1148 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1149 { 1150 struct perf_evsel *evsel; 1151 int err = 0; 1152 1153 evlist__for_each_entry(evlist, evsel) { 1154 if (evsel->filter == NULL) 1155 continue; 1156 1157 /* 1158 * filters only work for tracepoint event, which doesn't have cpu limit. 1159 * So evlist and evsel should always be same. 1160 */ 1161 err = perf_evsel__apply_filter(evsel, evsel->filter); 1162 if (err) { 1163 *err_evsel = evsel; 1164 break; 1165 } 1166 } 1167 1168 return err; 1169 } 1170 1171 int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter) 1172 { 1173 struct perf_evsel *evsel; 1174 int err = 0; 1175 1176 evlist__for_each_entry(evlist, evsel) { 1177 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1178 continue; 1179 1180 err = perf_evsel__set_filter(evsel, filter); 1181 if (err) 1182 break; 1183 } 1184 1185 return err; 1186 } 1187 1188 int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1189 { 1190 char *filter; 1191 int ret = -1; 1192 size_t i; 1193 1194 for (i = 0; i < npids; ++i) { 1195 if (i == 0) { 1196 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1197 return -1; 1198 } else { 1199 char *tmp; 1200 1201 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1202 goto out_free; 1203 1204 free(filter); 1205 filter = tmp; 1206 } 1207 } 1208 1209 ret = perf_evlist__set_tp_filter(evlist, filter); 1210 out_free: 1211 free(filter); 1212 return ret; 1213 } 1214 1215 int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid) 1216 { 1217 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1218 } 1219 1220 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1221 { 1222 struct perf_evsel *pos; 1223 1224 if (evlist->nr_entries == 1) 1225 return true; 1226 1227 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1228 return false; 1229 1230 evlist__for_each_entry(evlist, pos) { 1231 if (pos->id_pos != evlist->id_pos || 1232 pos->is_pos != evlist->is_pos) 1233 return false; 1234 } 1235 1236 return true; 1237 } 1238 1239 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1240 { 1241 struct perf_evsel *evsel; 1242 1243 if (evlist->combined_sample_type) 1244 return evlist->combined_sample_type; 1245 1246 evlist__for_each_entry(evlist, evsel) 1247 evlist->combined_sample_type |= evsel->attr.sample_type; 1248 1249 return evlist->combined_sample_type; 1250 } 1251 1252 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1253 { 1254 evlist->combined_sample_type = 0; 1255 return __perf_evlist__combined_sample_type(evlist); 1256 } 1257 1258 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1259 { 1260 struct perf_evsel *evsel; 1261 u64 branch_type = 0; 1262 1263 evlist__for_each_entry(evlist, evsel) 1264 branch_type |= evsel->attr.branch_sample_type; 1265 return branch_type; 1266 } 1267 1268 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1269 { 1270 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1271 u64 read_format = first->attr.read_format; 1272 u64 sample_type = first->attr.sample_type; 1273 1274 evlist__for_each_entry(evlist, pos) { 1275 if (read_format != pos->attr.read_format) 1276 return false; 1277 } 1278 1279 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1280 if ((sample_type & PERF_SAMPLE_READ) && 1281 !(read_format & PERF_FORMAT_ID)) { 1282 return false; 1283 } 1284 1285 return true; 1286 } 1287 1288 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1289 { 1290 struct perf_evsel *first = perf_evlist__first(evlist); 1291 return first->attr.read_format; 1292 } 1293 1294 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1295 { 1296 struct perf_evsel *first = perf_evlist__first(evlist); 1297 struct perf_sample *data; 1298 u64 sample_type; 1299 u16 size = 0; 1300 1301 if (!first->attr.sample_id_all) 1302 goto out; 1303 1304 sample_type = first->attr.sample_type; 1305 1306 if (sample_type & PERF_SAMPLE_TID) 1307 size += sizeof(data->tid) * 2; 1308 1309 if (sample_type & PERF_SAMPLE_TIME) 1310 size += sizeof(data->time); 1311 1312 if (sample_type & PERF_SAMPLE_ID) 1313 size += sizeof(data->id); 1314 1315 if (sample_type & PERF_SAMPLE_STREAM_ID) 1316 size += sizeof(data->stream_id); 1317 1318 if (sample_type & PERF_SAMPLE_CPU) 1319 size += sizeof(data->cpu) * 2; 1320 1321 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1322 size += sizeof(data->id); 1323 out: 1324 return size; 1325 } 1326 1327 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1328 { 1329 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1330 1331 evlist__for_each_entry_continue(evlist, pos) { 1332 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1333 return false; 1334 } 1335 1336 return true; 1337 } 1338 1339 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1340 { 1341 struct perf_evsel *first = perf_evlist__first(evlist); 1342 return first->attr.sample_id_all; 1343 } 1344 1345 void perf_evlist__set_selected(struct perf_evlist *evlist, 1346 struct perf_evsel *evsel) 1347 { 1348 evlist->selected = evsel; 1349 } 1350 1351 void perf_evlist__close(struct perf_evlist *evlist) 1352 { 1353 struct perf_evsel *evsel; 1354 1355 evlist__for_each_entry_reverse(evlist, evsel) 1356 perf_evsel__close(evsel); 1357 } 1358 1359 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1360 { 1361 struct cpu_map *cpus; 1362 struct thread_map *threads; 1363 int err = -ENOMEM; 1364 1365 /* 1366 * Try reading /sys/devices/system/cpu/online to get 1367 * an all cpus map. 1368 * 1369 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1370 * code needs an overhaul to properly forward the 1371 * error, and we may not want to do that fallback to a 1372 * default cpu identity map :-\ 1373 */ 1374 cpus = cpu_map__new(NULL); 1375 if (!cpus) 1376 goto out; 1377 1378 threads = thread_map__new_dummy(); 1379 if (!threads) 1380 goto out_put; 1381 1382 perf_evlist__set_maps(evlist, cpus, threads); 1383 out: 1384 return err; 1385 out_put: 1386 cpu_map__put(cpus); 1387 goto out; 1388 } 1389 1390 int perf_evlist__open(struct perf_evlist *evlist) 1391 { 1392 struct perf_evsel *evsel; 1393 int err; 1394 1395 /* 1396 * Default: one fd per CPU, all threads, aka systemwide 1397 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1398 */ 1399 if (evlist->threads == NULL && evlist->cpus == NULL) { 1400 err = perf_evlist__create_syswide_maps(evlist); 1401 if (err < 0) 1402 goto out_err; 1403 } 1404 1405 perf_evlist__update_id_pos(evlist); 1406 1407 evlist__for_each_entry(evlist, evsel) { 1408 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1409 if (err < 0) 1410 goto out_err; 1411 } 1412 1413 return 0; 1414 out_err: 1415 perf_evlist__close(evlist); 1416 errno = -err; 1417 return err; 1418 } 1419 1420 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1421 const char *argv[], bool pipe_output, 1422 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1423 { 1424 int child_ready_pipe[2], go_pipe[2]; 1425 char bf; 1426 1427 if (pipe(child_ready_pipe) < 0) { 1428 perror("failed to create 'ready' pipe"); 1429 return -1; 1430 } 1431 1432 if (pipe(go_pipe) < 0) { 1433 perror("failed to create 'go' pipe"); 1434 goto out_close_ready_pipe; 1435 } 1436 1437 evlist->workload.pid = fork(); 1438 if (evlist->workload.pid < 0) { 1439 perror("failed to fork"); 1440 goto out_close_pipes; 1441 } 1442 1443 if (!evlist->workload.pid) { 1444 int ret; 1445 1446 if (pipe_output) 1447 dup2(2, 1); 1448 1449 signal(SIGTERM, SIG_DFL); 1450 1451 close(child_ready_pipe[0]); 1452 close(go_pipe[1]); 1453 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1454 1455 /* 1456 * Tell the parent we're ready to go 1457 */ 1458 close(child_ready_pipe[1]); 1459 1460 /* 1461 * Wait until the parent tells us to go. 1462 */ 1463 ret = read(go_pipe[0], &bf, 1); 1464 /* 1465 * The parent will ask for the execvp() to be performed by 1466 * writing exactly one byte, in workload.cork_fd, usually via 1467 * perf_evlist__start_workload(). 1468 * 1469 * For cancelling the workload without actually running it, 1470 * the parent will just close workload.cork_fd, without writing 1471 * anything, i.e. read will return zero and we just exit() 1472 * here. 1473 */ 1474 if (ret != 1) { 1475 if (ret == -1) 1476 perror("unable to read pipe"); 1477 exit(ret); 1478 } 1479 1480 execvp(argv[0], (char **)argv); 1481 1482 if (exec_error) { 1483 union sigval val; 1484 1485 val.sival_int = errno; 1486 if (sigqueue(getppid(), SIGUSR1, val)) 1487 perror(argv[0]); 1488 } else 1489 perror(argv[0]); 1490 exit(-1); 1491 } 1492 1493 if (exec_error) { 1494 struct sigaction act = { 1495 .sa_flags = SA_SIGINFO, 1496 .sa_sigaction = exec_error, 1497 }; 1498 sigaction(SIGUSR1, &act, NULL); 1499 } 1500 1501 if (target__none(target)) { 1502 if (evlist->threads == NULL) { 1503 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1504 __func__, __LINE__); 1505 goto out_close_pipes; 1506 } 1507 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1508 } 1509 1510 close(child_ready_pipe[1]); 1511 close(go_pipe[0]); 1512 /* 1513 * wait for child to settle 1514 */ 1515 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1516 perror("unable to read pipe"); 1517 goto out_close_pipes; 1518 } 1519 1520 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1521 evlist->workload.cork_fd = go_pipe[1]; 1522 close(child_ready_pipe[0]); 1523 return 0; 1524 1525 out_close_pipes: 1526 close(go_pipe[0]); 1527 close(go_pipe[1]); 1528 out_close_ready_pipe: 1529 close(child_ready_pipe[0]); 1530 close(child_ready_pipe[1]); 1531 return -1; 1532 } 1533 1534 int perf_evlist__start_workload(struct perf_evlist *evlist) 1535 { 1536 if (evlist->workload.cork_fd > 0) { 1537 char bf = 0; 1538 int ret; 1539 /* 1540 * Remove the cork, let it rip! 1541 */ 1542 ret = write(evlist->workload.cork_fd, &bf, 1); 1543 if (ret < 0) 1544 perror("unable to write to pipe"); 1545 1546 close(evlist->workload.cork_fd); 1547 return ret; 1548 } 1549 1550 return 0; 1551 } 1552 1553 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1554 struct perf_sample *sample) 1555 { 1556 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1557 1558 if (!evsel) 1559 return -EFAULT; 1560 return perf_evsel__parse_sample(evsel, event, sample); 1561 } 1562 1563 int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, 1564 union perf_event *event, 1565 u64 *timestamp) 1566 { 1567 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1568 1569 if (!evsel) 1570 return -EFAULT; 1571 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1572 } 1573 1574 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1575 { 1576 struct perf_evsel *evsel; 1577 size_t printed = 0; 1578 1579 evlist__for_each_entry(evlist, evsel) { 1580 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1581 perf_evsel__name(evsel)); 1582 } 1583 1584 return printed + fprintf(fp, "\n"); 1585 } 1586 1587 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1588 int err, char *buf, size_t size) 1589 { 1590 int printed, value; 1591 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1592 1593 switch (err) { 1594 case EACCES: 1595 case EPERM: 1596 printed = scnprintf(buf, size, 1597 "Error:\t%s.\n" 1598 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1599 1600 value = perf_event_paranoid(); 1601 1602 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1603 1604 if (value >= 2) { 1605 printed += scnprintf(buf + printed, size - printed, 1606 "For your workloads it needs to be <= 1\nHint:\t"); 1607 } 1608 printed += scnprintf(buf + printed, size - printed, 1609 "For system wide tracing it needs to be set to -1.\n"); 1610 1611 printed += scnprintf(buf + printed, size - printed, 1612 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1613 "Hint:\tThe current value is %d.", value); 1614 break; 1615 case EINVAL: { 1616 struct perf_evsel *first = perf_evlist__first(evlist); 1617 int max_freq; 1618 1619 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1620 goto out_default; 1621 1622 if (first->attr.sample_freq < (u64)max_freq) 1623 goto out_default; 1624 1625 printed = scnprintf(buf, size, 1626 "Error:\t%s.\n" 1627 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1628 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1629 emsg, max_freq, first->attr.sample_freq); 1630 break; 1631 } 1632 default: 1633 out_default: 1634 scnprintf(buf, size, "%s", emsg); 1635 break; 1636 } 1637 1638 return 0; 1639 } 1640 1641 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1642 { 1643 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1644 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1645 1646 switch (err) { 1647 case EPERM: 1648 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1649 printed += scnprintf(buf + printed, size - printed, 1650 "Error:\t%s.\n" 1651 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1652 "Hint:\tTried using %zd kB.\n", 1653 emsg, pages_max_per_user, pages_attempted); 1654 1655 if (pages_attempted >= pages_max_per_user) { 1656 printed += scnprintf(buf + printed, size - printed, 1657 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1658 pages_max_per_user + pages_attempted); 1659 } 1660 1661 printed += scnprintf(buf + printed, size - printed, 1662 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1663 break; 1664 default: 1665 scnprintf(buf, size, "%s", emsg); 1666 break; 1667 } 1668 1669 return 0; 1670 } 1671 1672 void perf_evlist__to_front(struct perf_evlist *evlist, 1673 struct perf_evsel *move_evsel) 1674 { 1675 struct perf_evsel *evsel, *n; 1676 LIST_HEAD(move); 1677 1678 if (move_evsel == perf_evlist__first(evlist)) 1679 return; 1680 1681 evlist__for_each_entry_safe(evlist, n, evsel) { 1682 if (evsel->leader == move_evsel->leader) 1683 list_move_tail(&evsel->node, &move); 1684 } 1685 1686 list_splice(&move, &evlist->entries); 1687 } 1688 1689 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1690 struct perf_evsel *tracking_evsel) 1691 { 1692 struct perf_evsel *evsel; 1693 1694 if (tracking_evsel->tracking) 1695 return; 1696 1697 evlist__for_each_entry(evlist, evsel) { 1698 if (evsel != tracking_evsel) 1699 evsel->tracking = false; 1700 } 1701 1702 tracking_evsel->tracking = true; 1703 } 1704 1705 struct perf_evsel * 1706 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1707 const char *str) 1708 { 1709 struct perf_evsel *evsel; 1710 1711 evlist__for_each_entry(evlist, evsel) { 1712 if (!evsel->name) 1713 continue; 1714 if (strcmp(str, evsel->name) == 0) 1715 return evsel; 1716 } 1717 1718 return NULL; 1719 } 1720 1721 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1722 enum bkw_mmap_state state) 1723 { 1724 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1725 enum action { 1726 NONE, 1727 PAUSE, 1728 RESUME, 1729 } action = NONE; 1730 1731 if (!evlist->overwrite_mmap) 1732 return; 1733 1734 switch (old_state) { 1735 case BKW_MMAP_NOTREADY: { 1736 if (state != BKW_MMAP_RUNNING) 1737 goto state_err; 1738 break; 1739 } 1740 case BKW_MMAP_RUNNING: { 1741 if (state != BKW_MMAP_DATA_PENDING) 1742 goto state_err; 1743 action = PAUSE; 1744 break; 1745 } 1746 case BKW_MMAP_DATA_PENDING: { 1747 if (state != BKW_MMAP_EMPTY) 1748 goto state_err; 1749 break; 1750 } 1751 case BKW_MMAP_EMPTY: { 1752 if (state != BKW_MMAP_RUNNING) 1753 goto state_err; 1754 action = RESUME; 1755 break; 1756 } 1757 default: 1758 WARN_ONCE(1, "Shouldn't get there\n"); 1759 } 1760 1761 evlist->bkw_mmap_state = state; 1762 1763 switch (action) { 1764 case PAUSE: 1765 perf_evlist__pause(evlist); 1766 break; 1767 case RESUME: 1768 perf_evlist__resume(evlist); 1769 break; 1770 case NONE: 1771 default: 1772 break; 1773 } 1774 1775 state_err: 1776 return; 1777 } 1778 1779 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) 1780 { 1781 struct perf_evsel *evsel; 1782 1783 evlist__for_each_entry(evlist, evsel) { 1784 if (!evsel->attr.exclude_kernel) 1785 return false; 1786 } 1787 1788 return true; 1789 } 1790 1791 /* 1792 * Events in data file are not collect in groups, but we still want 1793 * the group display. Set the artificial group and set the leader's 1794 * forced_leader flag to notify the display code. 1795 */ 1796 void perf_evlist__force_leader(struct perf_evlist *evlist) 1797 { 1798 if (!evlist->nr_groups) { 1799 struct perf_evsel *leader = perf_evlist__first(evlist); 1800 1801 perf_evlist__set_leader(evlist); 1802 leader->forced_leader = true; 1803 } 1804 } 1805 1806 struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, 1807 struct perf_evsel *evsel) 1808 { 1809 struct perf_evsel *c2, *leader; 1810 bool is_open = true; 1811 1812 leader = evsel->leader; 1813 pr_debug("Weak group for %s/%d failed\n", 1814 leader->name, leader->nr_members); 1815 1816 /* 1817 * for_each_group_member doesn't work here because it doesn't 1818 * include the first entry. 1819 */ 1820 evlist__for_each_entry(evsel_list, c2) { 1821 if (c2 == evsel) 1822 is_open = false; 1823 if (c2->leader == leader) { 1824 if (is_open) 1825 perf_evsel__close(c2); 1826 c2->leader = c2; 1827 c2->nr_members = 0; 1828 } 1829 } 1830 return leader; 1831 } 1832 1833 int perf_evlist__add_sb_event(struct perf_evlist **evlist, 1834 struct perf_event_attr *attr, 1835 perf_evsel__sb_cb_t cb, 1836 void *data) 1837 { 1838 struct perf_evsel *evsel; 1839 bool new_evlist = (*evlist) == NULL; 1840 1841 if (*evlist == NULL) 1842 *evlist = perf_evlist__new(); 1843 if (*evlist == NULL) 1844 return -1; 1845 1846 if (!attr->sample_id_all) { 1847 pr_warning("enabling sample_id_all for all side band events\n"); 1848 attr->sample_id_all = 1; 1849 } 1850 1851 evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries); 1852 if (!evsel) 1853 goto out_err; 1854 1855 evsel->side_band.cb = cb; 1856 evsel->side_band.data = data; 1857 perf_evlist__add(*evlist, evsel); 1858 return 0; 1859 1860 out_err: 1861 if (new_evlist) { 1862 perf_evlist__delete(*evlist); 1863 *evlist = NULL; 1864 } 1865 return -1; 1866 } 1867 1868 static void *perf_evlist__poll_thread(void *arg) 1869 { 1870 struct perf_evlist *evlist = arg; 1871 bool draining = false; 1872 int i, done = 0; 1873 1874 while (!done) { 1875 bool got_data = false; 1876 1877 if (evlist->thread.done) 1878 draining = true; 1879 1880 if (!draining) 1881 perf_evlist__poll(evlist, 1000); 1882 1883 for (i = 0; i < evlist->nr_mmaps; i++) { 1884 struct perf_mmap *map = &evlist->mmap[i]; 1885 union perf_event *event; 1886 1887 if (perf_mmap__read_init(map)) 1888 continue; 1889 while ((event = perf_mmap__read_event(map)) != NULL) { 1890 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1891 1892 if (evsel && evsel->side_band.cb) 1893 evsel->side_band.cb(event, evsel->side_band.data); 1894 else 1895 pr_warning("cannot locate proper evsel for the side band event\n"); 1896 1897 perf_mmap__consume(map); 1898 got_data = true; 1899 } 1900 perf_mmap__read_done(map); 1901 } 1902 1903 if (draining && !got_data) 1904 break; 1905 } 1906 return NULL; 1907 } 1908 1909 int perf_evlist__start_sb_thread(struct perf_evlist *evlist, 1910 struct target *target) 1911 { 1912 struct perf_evsel *counter; 1913 1914 if (!evlist) 1915 return 0; 1916 1917 if (perf_evlist__create_maps(evlist, target)) 1918 goto out_delete_evlist; 1919 1920 evlist__for_each_entry(evlist, counter) { 1921 if (perf_evsel__open(counter, evlist->cpus, 1922 evlist->threads) < 0) 1923 goto out_delete_evlist; 1924 } 1925 1926 if (perf_evlist__mmap(evlist, UINT_MAX)) 1927 goto out_delete_evlist; 1928 1929 evlist__for_each_entry(evlist, counter) { 1930 if (perf_evsel__enable(counter)) 1931 goto out_delete_evlist; 1932 } 1933 1934 evlist->thread.done = 0; 1935 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1936 goto out_delete_evlist; 1937 1938 return 0; 1939 1940 out_delete_evlist: 1941 perf_evlist__delete(evlist); 1942 evlist = NULL; 1943 return -1; 1944 } 1945 1946 void perf_evlist__stop_sb_thread(struct perf_evlist *evlist) 1947 { 1948 if (!evlist) 1949 return; 1950 evlist->thread.done = 1; 1951 pthread_join(evlist->thread.th, NULL); 1952 perf_evlist__delete(evlist); 1953 } 1954