1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <poll.h> 14 #include "cpumap.h" 15 #include "thread_map.h" 16 #include "target.h" 17 #include "evlist.h" 18 #include "evsel.h" 19 #include "debug.h" 20 #include "units.h" 21 #include "asm/bug.h" 22 #include "bpf-event.h" 23 #include <signal.h> 24 #include <unistd.h> 25 26 #include "parse-events.h" 27 #include <subcmd/parse-options.h> 28 29 #include <fcntl.h> 30 #include <sys/ioctl.h> 31 #include <sys/mman.h> 32 33 #include <linux/bitops.h> 34 #include <linux/hash.h> 35 #include <linux/log2.h> 36 #include <linux/err.h> 37 38 #ifdef LACKS_SIGQUEUE_PROTOTYPE 39 int sigqueue(pid_t pid, int sig, const union sigval value); 40 #endif 41 42 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 43 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 44 45 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 46 struct thread_map *threads) 47 { 48 int i; 49 50 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 51 INIT_HLIST_HEAD(&evlist->heads[i]); 52 INIT_LIST_HEAD(&evlist->entries); 53 perf_evlist__set_maps(evlist, cpus, threads); 54 fdarray__init(&evlist->pollfd, 64); 55 evlist->workload.pid = -1; 56 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 57 } 58 59 struct perf_evlist *perf_evlist__new(void) 60 { 61 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 62 63 if (evlist != NULL) 64 perf_evlist__init(evlist, NULL, NULL); 65 66 return evlist; 67 } 68 69 struct perf_evlist *perf_evlist__new_default(void) 70 { 71 struct perf_evlist *evlist = perf_evlist__new(); 72 73 if (evlist && perf_evlist__add_default(evlist)) { 74 perf_evlist__delete(evlist); 75 evlist = NULL; 76 } 77 78 return evlist; 79 } 80 81 struct perf_evlist *perf_evlist__new_dummy(void) 82 { 83 struct perf_evlist *evlist = perf_evlist__new(); 84 85 if (evlist && perf_evlist__add_dummy(evlist)) { 86 perf_evlist__delete(evlist); 87 evlist = NULL; 88 } 89 90 return evlist; 91 } 92 93 /** 94 * perf_evlist__set_id_pos - set the positions of event ids. 95 * @evlist: selected event list 96 * 97 * Events with compatible sample types all have the same id_pos 98 * and is_pos. For convenience, put a copy on evlist. 99 */ 100 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 101 { 102 struct perf_evsel *first = perf_evlist__first(evlist); 103 104 evlist->id_pos = first->id_pos; 105 evlist->is_pos = first->is_pos; 106 } 107 108 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 109 { 110 struct perf_evsel *evsel; 111 112 evlist__for_each_entry(evlist, evsel) 113 perf_evsel__calc_id_pos(evsel); 114 115 perf_evlist__set_id_pos(evlist); 116 } 117 118 static void perf_evlist__purge(struct perf_evlist *evlist) 119 { 120 struct perf_evsel *pos, *n; 121 122 evlist__for_each_entry_safe(evlist, n, pos) { 123 list_del_init(&pos->node); 124 pos->evlist = NULL; 125 perf_evsel__delete(pos); 126 } 127 128 evlist->nr_entries = 0; 129 } 130 131 void perf_evlist__exit(struct perf_evlist *evlist) 132 { 133 zfree(&evlist->mmap); 134 zfree(&evlist->overwrite_mmap); 135 fdarray__exit(&evlist->pollfd); 136 } 137 138 void perf_evlist__delete(struct perf_evlist *evlist) 139 { 140 if (evlist == NULL) 141 return; 142 143 perf_evlist__munmap(evlist); 144 perf_evlist__close(evlist); 145 cpu_map__put(evlist->cpus); 146 thread_map__put(evlist->threads); 147 evlist->cpus = NULL; 148 evlist->threads = NULL; 149 perf_evlist__purge(evlist); 150 perf_evlist__exit(evlist); 151 free(evlist); 152 } 153 154 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 155 struct perf_evsel *evsel) 156 { 157 /* 158 * We already have cpus for evsel (via PMU sysfs) so 159 * keep it, if there's no target cpu list defined. 160 */ 161 if (!evsel->own_cpus || evlist->has_user_cpus) { 162 cpu_map__put(evsel->cpus); 163 evsel->cpus = cpu_map__get(evlist->cpus); 164 } else if (evsel->cpus != evsel->own_cpus) { 165 cpu_map__put(evsel->cpus); 166 evsel->cpus = cpu_map__get(evsel->own_cpus); 167 } 168 169 thread_map__put(evsel->threads); 170 evsel->threads = thread_map__get(evlist->threads); 171 } 172 173 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 174 { 175 struct perf_evsel *evsel; 176 177 evlist__for_each_entry(evlist, evsel) 178 __perf_evlist__propagate_maps(evlist, evsel); 179 } 180 181 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 182 { 183 entry->evlist = evlist; 184 list_add_tail(&entry->node, &evlist->entries); 185 entry->idx = evlist->nr_entries; 186 entry->tracking = !entry->idx; 187 188 if (!evlist->nr_entries++) 189 perf_evlist__set_id_pos(evlist); 190 191 __perf_evlist__propagate_maps(evlist, entry); 192 } 193 194 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 195 { 196 evsel->evlist = NULL; 197 list_del_init(&evsel->node); 198 evlist->nr_entries -= 1; 199 } 200 201 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 202 struct list_head *list) 203 { 204 struct perf_evsel *evsel, *temp; 205 206 __evlist__for_each_entry_safe(list, temp, evsel) { 207 list_del_init(&evsel->node); 208 perf_evlist__add(evlist, evsel); 209 } 210 } 211 212 void __perf_evlist__set_leader(struct list_head *list) 213 { 214 struct perf_evsel *evsel, *leader; 215 216 leader = list_entry(list->next, struct perf_evsel, node); 217 evsel = list_entry(list->prev, struct perf_evsel, node); 218 219 leader->nr_members = evsel->idx - leader->idx + 1; 220 221 __evlist__for_each_entry(list, evsel) { 222 evsel->leader = leader; 223 } 224 } 225 226 void perf_evlist__set_leader(struct perf_evlist *evlist) 227 { 228 if (evlist->nr_entries) { 229 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 230 __perf_evlist__set_leader(&evlist->entries); 231 } 232 } 233 234 int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) 235 { 236 struct perf_evsel *evsel = perf_evsel__new_cycles(precise); 237 238 if (evsel == NULL) 239 return -ENOMEM; 240 241 perf_evlist__add(evlist, evsel); 242 return 0; 243 } 244 245 int perf_evlist__add_dummy(struct perf_evlist *evlist) 246 { 247 struct perf_event_attr attr = { 248 .type = PERF_TYPE_SOFTWARE, 249 .config = PERF_COUNT_SW_DUMMY, 250 .size = sizeof(attr), /* to capture ABI version */ 251 }; 252 struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); 253 254 if (evsel == NULL) 255 return -ENOMEM; 256 257 perf_evlist__add(evlist, evsel); 258 return 0; 259 } 260 261 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 262 struct perf_event_attr *attrs, size_t nr_attrs) 263 { 264 struct perf_evsel *evsel, *n; 265 LIST_HEAD(head); 266 size_t i; 267 268 for (i = 0; i < nr_attrs; i++) { 269 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 270 if (evsel == NULL) 271 goto out_delete_partial_list; 272 list_add_tail(&evsel->node, &head); 273 } 274 275 perf_evlist__splice_list_tail(evlist, &head); 276 277 return 0; 278 279 out_delete_partial_list: 280 __evlist__for_each_entry_safe(&head, n, evsel) 281 perf_evsel__delete(evsel); 282 return -1; 283 } 284 285 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 286 struct perf_event_attr *attrs, size_t nr_attrs) 287 { 288 size_t i; 289 290 for (i = 0; i < nr_attrs; i++) 291 event_attr_init(attrs + i); 292 293 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 294 } 295 296 struct perf_evsel * 297 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 298 { 299 struct perf_evsel *evsel; 300 301 evlist__for_each_entry(evlist, evsel) { 302 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 303 (int)evsel->attr.config == id) 304 return evsel; 305 } 306 307 return NULL; 308 } 309 310 struct perf_evsel * 311 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 312 const char *name) 313 { 314 struct perf_evsel *evsel; 315 316 evlist__for_each_entry(evlist, evsel) { 317 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 318 (strcmp(evsel->name, name) == 0)) 319 return evsel; 320 } 321 322 return NULL; 323 } 324 325 int perf_evlist__add_newtp(struct perf_evlist *evlist, 326 const char *sys, const char *name, void *handler) 327 { 328 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 329 330 if (IS_ERR(evsel)) 331 return -1; 332 333 evsel->handler = handler; 334 perf_evlist__add(evlist, evsel); 335 return 0; 336 } 337 338 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 339 struct perf_evsel *evsel) 340 { 341 if (evsel->system_wide) 342 return 1; 343 else 344 return thread_map__nr(evlist->threads); 345 } 346 347 void perf_evlist__disable(struct perf_evlist *evlist) 348 { 349 struct perf_evsel *pos; 350 351 evlist__for_each_entry(evlist, pos) { 352 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) 353 continue; 354 perf_evsel__disable(pos); 355 } 356 357 evlist->enabled = false; 358 } 359 360 void perf_evlist__enable(struct perf_evlist *evlist) 361 { 362 struct perf_evsel *pos; 363 364 evlist__for_each_entry(evlist, pos) { 365 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 366 continue; 367 perf_evsel__enable(pos); 368 } 369 370 evlist->enabled = true; 371 } 372 373 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 374 { 375 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 376 } 377 378 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 379 struct perf_evsel *evsel, int cpu) 380 { 381 int thread; 382 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 383 384 if (!evsel->fd) 385 return -EINVAL; 386 387 for (thread = 0; thread < nr_threads; thread++) { 388 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 389 if (err) 390 return err; 391 } 392 return 0; 393 } 394 395 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 396 struct perf_evsel *evsel, 397 int thread) 398 { 399 int cpu; 400 int nr_cpus = cpu_map__nr(evlist->cpus); 401 402 if (!evsel->fd) 403 return -EINVAL; 404 405 for (cpu = 0; cpu < nr_cpus; cpu++) { 406 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 407 if (err) 408 return err; 409 } 410 return 0; 411 } 412 413 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 414 struct perf_evsel *evsel, int idx) 415 { 416 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 417 418 if (per_cpu_mmaps) 419 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 420 else 421 return perf_evlist__enable_event_thread(evlist, evsel, idx); 422 } 423 424 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 425 { 426 int nr_cpus = cpu_map__nr(evlist->cpus); 427 int nr_threads = thread_map__nr(evlist->threads); 428 int nfds = 0; 429 struct perf_evsel *evsel; 430 431 evlist__for_each_entry(evlist, evsel) { 432 if (evsel->system_wide) 433 nfds += nr_cpus; 434 else 435 nfds += nr_cpus * nr_threads; 436 } 437 438 if (fdarray__available_entries(&evlist->pollfd) < nfds && 439 fdarray__grow(&evlist->pollfd, nfds) < 0) 440 return -ENOMEM; 441 442 return 0; 443 } 444 445 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 446 struct perf_mmap *map, short revent) 447 { 448 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 449 /* 450 * Save the idx so that when we filter out fds POLLHUP'ed we can 451 * close the associated evlist->mmap[] entry. 452 */ 453 if (pos >= 0) { 454 evlist->pollfd.priv[pos].ptr = map; 455 456 fcntl(fd, F_SETFL, O_NONBLOCK); 457 } 458 459 return pos; 460 } 461 462 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 463 { 464 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 465 } 466 467 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 468 void *arg __maybe_unused) 469 { 470 struct perf_mmap *map = fda->priv[fd].ptr; 471 472 if (map) 473 perf_mmap__put(map); 474 } 475 476 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 477 { 478 return fdarray__filter(&evlist->pollfd, revents_and_mask, 479 perf_evlist__munmap_filtered, NULL); 480 } 481 482 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 483 { 484 return fdarray__poll(&evlist->pollfd, timeout); 485 } 486 487 static void perf_evlist__id_hash(struct perf_evlist *evlist, 488 struct perf_evsel *evsel, 489 int cpu, int thread, u64 id) 490 { 491 int hash; 492 struct perf_sample_id *sid = SID(evsel, cpu, thread); 493 494 sid->id = id; 495 sid->evsel = evsel; 496 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 497 hlist_add_head(&sid->node, &evlist->heads[hash]); 498 } 499 500 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 501 int cpu, int thread, u64 id) 502 { 503 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 504 evsel->id[evsel->ids++] = id; 505 } 506 507 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 508 struct perf_evsel *evsel, 509 int cpu, int thread, int fd) 510 { 511 u64 read_data[4] = { 0, }; 512 int id_idx = 1; /* The first entry is the counter value */ 513 u64 id; 514 int ret; 515 516 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 517 if (!ret) 518 goto add; 519 520 if (errno != ENOTTY) 521 return -1; 522 523 /* Legacy way to get event id.. All hail to old kernels! */ 524 525 /* 526 * This way does not work with group format read, so bail 527 * out in that case. 528 */ 529 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 530 return -1; 531 532 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 533 read(fd, &read_data, sizeof(read_data)) == -1) 534 return -1; 535 536 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 537 ++id_idx; 538 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 539 ++id_idx; 540 541 id = read_data[id_idx]; 542 543 add: 544 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 545 return 0; 546 } 547 548 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 549 struct perf_evsel *evsel, int idx, int cpu, 550 int thread) 551 { 552 struct perf_sample_id *sid = SID(evsel, cpu, thread); 553 sid->idx = idx; 554 if (evlist->cpus && cpu >= 0) 555 sid->cpu = evlist->cpus->map[cpu]; 556 else 557 sid->cpu = -1; 558 if (!evsel->system_wide && evlist->threads && thread >= 0) 559 sid->tid = thread_map__pid(evlist->threads, thread); 560 else 561 sid->tid = -1; 562 } 563 564 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 565 { 566 struct hlist_head *head; 567 struct perf_sample_id *sid; 568 int hash; 569 570 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 571 head = &evlist->heads[hash]; 572 573 hlist_for_each_entry(sid, head, node) 574 if (sid->id == id) 575 return sid; 576 577 return NULL; 578 } 579 580 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 581 { 582 struct perf_sample_id *sid; 583 584 if (evlist->nr_entries == 1 || !id) 585 return perf_evlist__first(evlist); 586 587 sid = perf_evlist__id2sid(evlist, id); 588 if (sid) 589 return sid->evsel; 590 591 if (!perf_evlist__sample_id_all(evlist)) 592 return perf_evlist__first(evlist); 593 594 return NULL; 595 } 596 597 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 598 u64 id) 599 { 600 struct perf_sample_id *sid; 601 602 if (!id) 603 return NULL; 604 605 sid = perf_evlist__id2sid(evlist, id); 606 if (sid) 607 return sid->evsel; 608 609 return NULL; 610 } 611 612 static int perf_evlist__event2id(struct perf_evlist *evlist, 613 union perf_event *event, u64 *id) 614 { 615 const u64 *array = event->sample.array; 616 ssize_t n; 617 618 n = (event->header.size - sizeof(event->header)) >> 3; 619 620 if (event->header.type == PERF_RECORD_SAMPLE) { 621 if (evlist->id_pos >= n) 622 return -1; 623 *id = array[evlist->id_pos]; 624 } else { 625 if (evlist->is_pos > n) 626 return -1; 627 n -= evlist->is_pos; 628 *id = array[n]; 629 } 630 return 0; 631 } 632 633 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 634 union perf_event *event) 635 { 636 struct perf_evsel *first = perf_evlist__first(evlist); 637 struct hlist_head *head; 638 struct perf_sample_id *sid; 639 int hash; 640 u64 id; 641 642 if (evlist->nr_entries == 1) 643 return first; 644 645 if (!first->attr.sample_id_all && 646 event->header.type != PERF_RECORD_SAMPLE) 647 return first; 648 649 if (perf_evlist__event2id(evlist, event, &id)) 650 return NULL; 651 652 /* Synthesized events have an id of zero */ 653 if (!id) 654 return first; 655 656 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 657 head = &evlist->heads[hash]; 658 659 hlist_for_each_entry(sid, head, node) { 660 if (sid->id == id) 661 return sid->evsel; 662 } 663 return NULL; 664 } 665 666 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 667 { 668 int i; 669 670 if (!evlist->overwrite_mmap) 671 return 0; 672 673 for (i = 0; i < evlist->nr_mmaps; i++) { 674 int fd = evlist->overwrite_mmap[i].fd; 675 int err; 676 677 if (fd < 0) 678 continue; 679 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 680 if (err) 681 return err; 682 } 683 return 0; 684 } 685 686 static int perf_evlist__pause(struct perf_evlist *evlist) 687 { 688 return perf_evlist__set_paused(evlist, true); 689 } 690 691 static int perf_evlist__resume(struct perf_evlist *evlist) 692 { 693 return perf_evlist__set_paused(evlist, false); 694 } 695 696 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 697 { 698 int i; 699 700 if (evlist->mmap) 701 for (i = 0; i < evlist->nr_mmaps; i++) 702 perf_mmap__munmap(&evlist->mmap[i]); 703 704 if (evlist->overwrite_mmap) 705 for (i = 0; i < evlist->nr_mmaps; i++) 706 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 707 } 708 709 void perf_evlist__munmap(struct perf_evlist *evlist) 710 { 711 perf_evlist__munmap_nofree(evlist); 712 zfree(&evlist->mmap); 713 zfree(&evlist->overwrite_mmap); 714 } 715 716 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist, 717 bool overwrite) 718 { 719 int i; 720 struct perf_mmap *map; 721 722 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 723 if (cpu_map__empty(evlist->cpus)) 724 evlist->nr_mmaps = thread_map__nr(evlist->threads); 725 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 726 if (!map) 727 return NULL; 728 729 for (i = 0; i < evlist->nr_mmaps; i++) { 730 map[i].fd = -1; 731 map[i].overwrite = overwrite; 732 /* 733 * When the perf_mmap() call is made we grab one refcount, plus 734 * one extra to let perf_mmap__consume() get the last 735 * events after all real references (perf_mmap__get()) are 736 * dropped. 737 * 738 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 739 * thus does perf_mmap__get() on it. 740 */ 741 refcount_set(&map[i].refcnt, 0); 742 } 743 return map; 744 } 745 746 static bool 747 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 748 struct perf_evsel *evsel) 749 { 750 if (evsel->attr.write_backward) 751 return false; 752 return true; 753 } 754 755 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 756 struct mmap_params *mp, int cpu_idx, 757 int thread, int *_output, int *_output_overwrite) 758 { 759 struct perf_evsel *evsel; 760 int revent; 761 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 762 763 evlist__for_each_entry(evlist, evsel) { 764 struct perf_mmap *maps = evlist->mmap; 765 int *output = _output; 766 int fd; 767 int cpu; 768 769 mp->prot = PROT_READ | PROT_WRITE; 770 if (evsel->attr.write_backward) { 771 output = _output_overwrite; 772 maps = evlist->overwrite_mmap; 773 774 if (!maps) { 775 maps = perf_evlist__alloc_mmap(evlist, true); 776 if (!maps) 777 return -1; 778 evlist->overwrite_mmap = maps; 779 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 780 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 781 } 782 mp->prot &= ~PROT_WRITE; 783 } 784 785 if (evsel->system_wide && thread) 786 continue; 787 788 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 789 if (cpu == -1) 790 continue; 791 792 fd = FD(evsel, cpu, thread); 793 794 if (*output == -1) { 795 *output = fd; 796 797 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 798 return -1; 799 } else { 800 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 801 return -1; 802 803 perf_mmap__get(&maps[idx]); 804 } 805 806 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 807 808 /* 809 * The system_wide flag causes a selected event to be opened 810 * always without a pid. Consequently it will never get a 811 * POLLHUP, but it is used for tracking in combination with 812 * other events, so it should not need to be polled anyway. 813 * Therefore don't add it for polling. 814 */ 815 if (!evsel->system_wide && 816 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 817 perf_mmap__put(&maps[idx]); 818 return -1; 819 } 820 821 if (evsel->attr.read_format & PERF_FORMAT_ID) { 822 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 823 fd) < 0) 824 return -1; 825 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 826 thread); 827 } 828 } 829 830 return 0; 831 } 832 833 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 834 struct mmap_params *mp) 835 { 836 int cpu, thread; 837 int nr_cpus = cpu_map__nr(evlist->cpus); 838 int nr_threads = thread_map__nr(evlist->threads); 839 840 pr_debug2("perf event ring buffer mmapped per cpu\n"); 841 for (cpu = 0; cpu < nr_cpus; cpu++) { 842 int output = -1; 843 int output_overwrite = -1; 844 845 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 846 true); 847 848 for (thread = 0; thread < nr_threads; thread++) { 849 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 850 thread, &output, &output_overwrite)) 851 goto out_unmap; 852 } 853 } 854 855 return 0; 856 857 out_unmap: 858 perf_evlist__munmap_nofree(evlist); 859 return -1; 860 } 861 862 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 863 struct mmap_params *mp) 864 { 865 int thread; 866 int nr_threads = thread_map__nr(evlist->threads); 867 868 pr_debug2("perf event ring buffer mmapped per thread\n"); 869 for (thread = 0; thread < nr_threads; thread++) { 870 int output = -1; 871 int output_overwrite = -1; 872 873 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 874 false); 875 876 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 877 &output, &output_overwrite)) 878 goto out_unmap; 879 } 880 881 return 0; 882 883 out_unmap: 884 perf_evlist__munmap_nofree(evlist); 885 return -1; 886 } 887 888 unsigned long perf_event_mlock_kb_in_pages(void) 889 { 890 unsigned long pages; 891 int max; 892 893 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 894 /* 895 * Pick a once upon a time good value, i.e. things look 896 * strange since we can't read a sysctl value, but lets not 897 * die yet... 898 */ 899 max = 512; 900 } else { 901 max -= (page_size / 1024); 902 } 903 904 pages = (max * 1024) / page_size; 905 if (!is_power_of_2(pages)) 906 pages = rounddown_pow_of_two(pages); 907 908 return pages; 909 } 910 911 size_t perf_evlist__mmap_size(unsigned long pages) 912 { 913 if (pages == UINT_MAX) 914 pages = perf_event_mlock_kb_in_pages(); 915 else if (!is_power_of_2(pages)) 916 return 0; 917 918 return (pages + 1) * page_size; 919 } 920 921 static long parse_pages_arg(const char *str, unsigned long min, 922 unsigned long max) 923 { 924 unsigned long pages, val; 925 static struct parse_tag tags[] = { 926 { .tag = 'B', .mult = 1 }, 927 { .tag = 'K', .mult = 1 << 10 }, 928 { .tag = 'M', .mult = 1 << 20 }, 929 { .tag = 'G', .mult = 1 << 30 }, 930 { .tag = 0 }, 931 }; 932 933 if (str == NULL) 934 return -EINVAL; 935 936 val = parse_tag_value(str, tags); 937 if (val != (unsigned long) -1) { 938 /* we got file size value */ 939 pages = PERF_ALIGN(val, page_size) / page_size; 940 } else { 941 /* we got pages count value */ 942 char *eptr; 943 pages = strtoul(str, &eptr, 10); 944 if (*eptr != '\0') 945 return -EINVAL; 946 } 947 948 if (pages == 0 && min == 0) { 949 /* leave number of pages at 0 */ 950 } else if (!is_power_of_2(pages)) { 951 char buf[100]; 952 953 /* round pages up to next power of 2 */ 954 pages = roundup_pow_of_two(pages); 955 if (!pages) 956 return -EINVAL; 957 958 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 959 pr_info("rounding mmap pages size to %s (%lu pages)\n", 960 buf, pages); 961 } 962 963 if (pages > max) 964 return -EINVAL; 965 966 return pages; 967 } 968 969 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 970 { 971 unsigned long max = UINT_MAX; 972 long pages; 973 974 if (max > SIZE_MAX / page_size) 975 max = SIZE_MAX / page_size; 976 977 pages = parse_pages_arg(str, 1, max); 978 if (pages < 0) { 979 pr_err("Invalid argument for --mmap_pages/-m\n"); 980 return -1; 981 } 982 983 *mmap_pages = pages; 984 return 0; 985 } 986 987 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 988 int unset __maybe_unused) 989 { 990 return __perf_evlist__parse_mmap_pages(opt->value, str); 991 } 992 993 /** 994 * perf_evlist__mmap_ex - Create mmaps to receive events. 995 * @evlist: list of events 996 * @pages: map length in pages 997 * @overwrite: overwrite older events? 998 * @auxtrace_pages - auxtrace map length in pages 999 * @auxtrace_overwrite - overwrite older auxtrace data? 1000 * 1001 * If @overwrite is %false the user needs to signal event consumption using 1002 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1003 * automatically. 1004 * 1005 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1006 * consumption using auxtrace_mmap__write_tail(). 1007 * 1008 * Return: %0 on success, negative error code otherwise. 1009 */ 1010 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1011 unsigned int auxtrace_pages, 1012 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 1013 int comp_level) 1014 { 1015 struct perf_evsel *evsel; 1016 const struct cpu_map *cpus = evlist->cpus; 1017 const struct thread_map *threads = evlist->threads; 1018 /* 1019 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1020 * Its value is decided by evsel's write_backward. 1021 * So &mp should not be passed through const pointer. 1022 */ 1023 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 1024 .comp_level = comp_level }; 1025 1026 if (!evlist->mmap) 1027 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1028 if (!evlist->mmap) 1029 return -ENOMEM; 1030 1031 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1032 return -ENOMEM; 1033 1034 evlist->mmap_len = perf_evlist__mmap_size(pages); 1035 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1036 mp.mask = evlist->mmap_len - page_size - 1; 1037 1038 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1039 auxtrace_pages, auxtrace_overwrite); 1040 1041 evlist__for_each_entry(evlist, evsel) { 1042 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1043 evsel->sample_id == NULL && 1044 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1045 return -ENOMEM; 1046 } 1047 1048 if (cpu_map__empty(cpus)) 1049 return perf_evlist__mmap_per_thread(evlist, &mp); 1050 1051 return perf_evlist__mmap_per_cpu(evlist, &mp); 1052 } 1053 1054 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1055 { 1056 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); 1057 } 1058 1059 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1060 { 1061 bool all_threads = (target->per_thread && target->system_wide); 1062 struct cpu_map *cpus; 1063 struct thread_map *threads; 1064 1065 /* 1066 * If specify '-a' and '--per-thread' to perf record, perf record 1067 * will override '--per-thread'. target->per_thread = false and 1068 * target->system_wide = true. 1069 * 1070 * If specify '--per-thread' only to perf record, 1071 * target->per_thread = true and target->system_wide = false. 1072 * 1073 * So target->per_thread && target->system_wide is false. 1074 * For perf record, thread_map__new_str doesn't call 1075 * thread_map__new_all_cpus. That will keep perf record's 1076 * current behavior. 1077 * 1078 * For perf stat, it allows the case that target->per_thread and 1079 * target->system_wide are all true. It means to collect system-wide 1080 * per-thread data. thread_map__new_str will call 1081 * thread_map__new_all_cpus to enumerate all threads. 1082 */ 1083 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1084 all_threads); 1085 1086 if (!threads) 1087 return -1; 1088 1089 if (target__uses_dummy_map(target)) 1090 cpus = cpu_map__dummy_new(); 1091 else 1092 cpus = cpu_map__new(target->cpu_list); 1093 1094 if (!cpus) 1095 goto out_delete_threads; 1096 1097 evlist->has_user_cpus = !!target->cpu_list; 1098 1099 perf_evlist__set_maps(evlist, cpus, threads); 1100 1101 return 0; 1102 1103 out_delete_threads: 1104 thread_map__put(threads); 1105 return -1; 1106 } 1107 1108 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1109 struct thread_map *threads) 1110 { 1111 /* 1112 * Allow for the possibility that one or another of the maps isn't being 1113 * changed i.e. don't put it. Note we are assuming the maps that are 1114 * being applied are brand new and evlist is taking ownership of the 1115 * original reference count of 1. If that is not the case it is up to 1116 * the caller to increase the reference count. 1117 */ 1118 if (cpus != evlist->cpus) { 1119 cpu_map__put(evlist->cpus); 1120 evlist->cpus = cpu_map__get(cpus); 1121 } 1122 1123 if (threads != evlist->threads) { 1124 thread_map__put(evlist->threads); 1125 evlist->threads = thread_map__get(threads); 1126 } 1127 1128 perf_evlist__propagate_maps(evlist); 1129 } 1130 1131 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1132 enum perf_event_sample_format bit) 1133 { 1134 struct perf_evsel *evsel; 1135 1136 evlist__for_each_entry(evlist, evsel) 1137 __perf_evsel__set_sample_bit(evsel, bit); 1138 } 1139 1140 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1141 enum perf_event_sample_format bit) 1142 { 1143 struct perf_evsel *evsel; 1144 1145 evlist__for_each_entry(evlist, evsel) 1146 __perf_evsel__reset_sample_bit(evsel, bit); 1147 } 1148 1149 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1150 { 1151 struct perf_evsel *evsel; 1152 int err = 0; 1153 1154 evlist__for_each_entry(evlist, evsel) { 1155 if (evsel->filter == NULL) 1156 continue; 1157 1158 /* 1159 * filters only work for tracepoint event, which doesn't have cpu limit. 1160 * So evlist and evsel should always be same. 1161 */ 1162 err = perf_evsel__apply_filter(evsel, evsel->filter); 1163 if (err) { 1164 *err_evsel = evsel; 1165 break; 1166 } 1167 } 1168 1169 return err; 1170 } 1171 1172 int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter) 1173 { 1174 struct perf_evsel *evsel; 1175 int err = 0; 1176 1177 evlist__for_each_entry(evlist, evsel) { 1178 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1179 continue; 1180 1181 err = perf_evsel__set_filter(evsel, filter); 1182 if (err) 1183 break; 1184 } 1185 1186 return err; 1187 } 1188 1189 int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1190 { 1191 char *filter; 1192 int ret = -1; 1193 size_t i; 1194 1195 for (i = 0; i < npids; ++i) { 1196 if (i == 0) { 1197 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1198 return -1; 1199 } else { 1200 char *tmp; 1201 1202 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1203 goto out_free; 1204 1205 free(filter); 1206 filter = tmp; 1207 } 1208 } 1209 1210 ret = perf_evlist__set_tp_filter(evlist, filter); 1211 out_free: 1212 free(filter); 1213 return ret; 1214 } 1215 1216 int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid) 1217 { 1218 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1219 } 1220 1221 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1222 { 1223 struct perf_evsel *pos; 1224 1225 if (evlist->nr_entries == 1) 1226 return true; 1227 1228 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1229 return false; 1230 1231 evlist__for_each_entry(evlist, pos) { 1232 if (pos->id_pos != evlist->id_pos || 1233 pos->is_pos != evlist->is_pos) 1234 return false; 1235 } 1236 1237 return true; 1238 } 1239 1240 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1241 { 1242 struct perf_evsel *evsel; 1243 1244 if (evlist->combined_sample_type) 1245 return evlist->combined_sample_type; 1246 1247 evlist__for_each_entry(evlist, evsel) 1248 evlist->combined_sample_type |= evsel->attr.sample_type; 1249 1250 return evlist->combined_sample_type; 1251 } 1252 1253 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1254 { 1255 evlist->combined_sample_type = 0; 1256 return __perf_evlist__combined_sample_type(evlist); 1257 } 1258 1259 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1260 { 1261 struct perf_evsel *evsel; 1262 u64 branch_type = 0; 1263 1264 evlist__for_each_entry(evlist, evsel) 1265 branch_type |= evsel->attr.branch_sample_type; 1266 return branch_type; 1267 } 1268 1269 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1270 { 1271 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1272 u64 read_format = first->attr.read_format; 1273 u64 sample_type = first->attr.sample_type; 1274 1275 evlist__for_each_entry(evlist, pos) { 1276 if (read_format != pos->attr.read_format) 1277 return false; 1278 } 1279 1280 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1281 if ((sample_type & PERF_SAMPLE_READ) && 1282 !(read_format & PERF_FORMAT_ID)) { 1283 return false; 1284 } 1285 1286 return true; 1287 } 1288 1289 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1290 { 1291 struct perf_evsel *first = perf_evlist__first(evlist); 1292 return first->attr.read_format; 1293 } 1294 1295 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1296 { 1297 struct perf_evsel *first = perf_evlist__first(evlist); 1298 struct perf_sample *data; 1299 u64 sample_type; 1300 u16 size = 0; 1301 1302 if (!first->attr.sample_id_all) 1303 goto out; 1304 1305 sample_type = first->attr.sample_type; 1306 1307 if (sample_type & PERF_SAMPLE_TID) 1308 size += sizeof(data->tid) * 2; 1309 1310 if (sample_type & PERF_SAMPLE_TIME) 1311 size += sizeof(data->time); 1312 1313 if (sample_type & PERF_SAMPLE_ID) 1314 size += sizeof(data->id); 1315 1316 if (sample_type & PERF_SAMPLE_STREAM_ID) 1317 size += sizeof(data->stream_id); 1318 1319 if (sample_type & PERF_SAMPLE_CPU) 1320 size += sizeof(data->cpu) * 2; 1321 1322 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1323 size += sizeof(data->id); 1324 out: 1325 return size; 1326 } 1327 1328 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1329 { 1330 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1331 1332 evlist__for_each_entry_continue(evlist, pos) { 1333 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1334 return false; 1335 } 1336 1337 return true; 1338 } 1339 1340 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1341 { 1342 struct perf_evsel *first = perf_evlist__first(evlist); 1343 return first->attr.sample_id_all; 1344 } 1345 1346 void perf_evlist__set_selected(struct perf_evlist *evlist, 1347 struct perf_evsel *evsel) 1348 { 1349 evlist->selected = evsel; 1350 } 1351 1352 void perf_evlist__close(struct perf_evlist *evlist) 1353 { 1354 struct perf_evsel *evsel; 1355 1356 evlist__for_each_entry_reverse(evlist, evsel) 1357 perf_evsel__close(evsel); 1358 } 1359 1360 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1361 { 1362 struct cpu_map *cpus; 1363 struct thread_map *threads; 1364 int err = -ENOMEM; 1365 1366 /* 1367 * Try reading /sys/devices/system/cpu/online to get 1368 * an all cpus map. 1369 * 1370 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1371 * code needs an overhaul to properly forward the 1372 * error, and we may not want to do that fallback to a 1373 * default cpu identity map :-\ 1374 */ 1375 cpus = cpu_map__new(NULL); 1376 if (!cpus) 1377 goto out; 1378 1379 threads = thread_map__new_dummy(); 1380 if (!threads) 1381 goto out_put; 1382 1383 perf_evlist__set_maps(evlist, cpus, threads); 1384 out: 1385 return err; 1386 out_put: 1387 cpu_map__put(cpus); 1388 goto out; 1389 } 1390 1391 int perf_evlist__open(struct perf_evlist *evlist) 1392 { 1393 struct perf_evsel *evsel; 1394 int err; 1395 1396 /* 1397 * Default: one fd per CPU, all threads, aka systemwide 1398 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1399 */ 1400 if (evlist->threads == NULL && evlist->cpus == NULL) { 1401 err = perf_evlist__create_syswide_maps(evlist); 1402 if (err < 0) 1403 goto out_err; 1404 } 1405 1406 perf_evlist__update_id_pos(evlist); 1407 1408 evlist__for_each_entry(evlist, evsel) { 1409 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1410 if (err < 0) 1411 goto out_err; 1412 } 1413 1414 return 0; 1415 out_err: 1416 perf_evlist__close(evlist); 1417 errno = -err; 1418 return err; 1419 } 1420 1421 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1422 const char *argv[], bool pipe_output, 1423 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1424 { 1425 int child_ready_pipe[2], go_pipe[2]; 1426 char bf; 1427 1428 if (pipe(child_ready_pipe) < 0) { 1429 perror("failed to create 'ready' pipe"); 1430 return -1; 1431 } 1432 1433 if (pipe(go_pipe) < 0) { 1434 perror("failed to create 'go' pipe"); 1435 goto out_close_ready_pipe; 1436 } 1437 1438 evlist->workload.pid = fork(); 1439 if (evlist->workload.pid < 0) { 1440 perror("failed to fork"); 1441 goto out_close_pipes; 1442 } 1443 1444 if (!evlist->workload.pid) { 1445 int ret; 1446 1447 if (pipe_output) 1448 dup2(2, 1); 1449 1450 signal(SIGTERM, SIG_DFL); 1451 1452 close(child_ready_pipe[0]); 1453 close(go_pipe[1]); 1454 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1455 1456 /* 1457 * Tell the parent we're ready to go 1458 */ 1459 close(child_ready_pipe[1]); 1460 1461 /* 1462 * Wait until the parent tells us to go. 1463 */ 1464 ret = read(go_pipe[0], &bf, 1); 1465 /* 1466 * The parent will ask for the execvp() to be performed by 1467 * writing exactly one byte, in workload.cork_fd, usually via 1468 * perf_evlist__start_workload(). 1469 * 1470 * For cancelling the workload without actually running it, 1471 * the parent will just close workload.cork_fd, without writing 1472 * anything, i.e. read will return zero and we just exit() 1473 * here. 1474 */ 1475 if (ret != 1) { 1476 if (ret == -1) 1477 perror("unable to read pipe"); 1478 exit(ret); 1479 } 1480 1481 execvp(argv[0], (char **)argv); 1482 1483 if (exec_error) { 1484 union sigval val; 1485 1486 val.sival_int = errno; 1487 if (sigqueue(getppid(), SIGUSR1, val)) 1488 perror(argv[0]); 1489 } else 1490 perror(argv[0]); 1491 exit(-1); 1492 } 1493 1494 if (exec_error) { 1495 struct sigaction act = { 1496 .sa_flags = SA_SIGINFO, 1497 .sa_sigaction = exec_error, 1498 }; 1499 sigaction(SIGUSR1, &act, NULL); 1500 } 1501 1502 if (target__none(target)) { 1503 if (evlist->threads == NULL) { 1504 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1505 __func__, __LINE__); 1506 goto out_close_pipes; 1507 } 1508 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1509 } 1510 1511 close(child_ready_pipe[1]); 1512 close(go_pipe[0]); 1513 /* 1514 * wait for child to settle 1515 */ 1516 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1517 perror("unable to read pipe"); 1518 goto out_close_pipes; 1519 } 1520 1521 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1522 evlist->workload.cork_fd = go_pipe[1]; 1523 close(child_ready_pipe[0]); 1524 return 0; 1525 1526 out_close_pipes: 1527 close(go_pipe[0]); 1528 close(go_pipe[1]); 1529 out_close_ready_pipe: 1530 close(child_ready_pipe[0]); 1531 close(child_ready_pipe[1]); 1532 return -1; 1533 } 1534 1535 int perf_evlist__start_workload(struct perf_evlist *evlist) 1536 { 1537 if (evlist->workload.cork_fd > 0) { 1538 char bf = 0; 1539 int ret; 1540 /* 1541 * Remove the cork, let it rip! 1542 */ 1543 ret = write(evlist->workload.cork_fd, &bf, 1); 1544 if (ret < 0) 1545 perror("unable to write to pipe"); 1546 1547 close(evlist->workload.cork_fd); 1548 return ret; 1549 } 1550 1551 return 0; 1552 } 1553 1554 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1555 struct perf_sample *sample) 1556 { 1557 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1558 1559 if (!evsel) 1560 return -EFAULT; 1561 return perf_evsel__parse_sample(evsel, event, sample); 1562 } 1563 1564 int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, 1565 union perf_event *event, 1566 u64 *timestamp) 1567 { 1568 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1569 1570 if (!evsel) 1571 return -EFAULT; 1572 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1573 } 1574 1575 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1576 { 1577 struct perf_evsel *evsel; 1578 size_t printed = 0; 1579 1580 evlist__for_each_entry(evlist, evsel) { 1581 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1582 perf_evsel__name(evsel)); 1583 } 1584 1585 return printed + fprintf(fp, "\n"); 1586 } 1587 1588 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1589 int err, char *buf, size_t size) 1590 { 1591 int printed, value; 1592 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1593 1594 switch (err) { 1595 case EACCES: 1596 case EPERM: 1597 printed = scnprintf(buf, size, 1598 "Error:\t%s.\n" 1599 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1600 1601 value = perf_event_paranoid(); 1602 1603 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1604 1605 if (value >= 2) { 1606 printed += scnprintf(buf + printed, size - printed, 1607 "For your workloads it needs to be <= 1\nHint:\t"); 1608 } 1609 printed += scnprintf(buf + printed, size - printed, 1610 "For system wide tracing it needs to be set to -1.\n"); 1611 1612 printed += scnprintf(buf + printed, size - printed, 1613 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1614 "Hint:\tThe current value is %d.", value); 1615 break; 1616 case EINVAL: { 1617 struct perf_evsel *first = perf_evlist__first(evlist); 1618 int max_freq; 1619 1620 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1621 goto out_default; 1622 1623 if (first->attr.sample_freq < (u64)max_freq) 1624 goto out_default; 1625 1626 printed = scnprintf(buf, size, 1627 "Error:\t%s.\n" 1628 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1629 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1630 emsg, max_freq, first->attr.sample_freq); 1631 break; 1632 } 1633 default: 1634 out_default: 1635 scnprintf(buf, size, "%s", emsg); 1636 break; 1637 } 1638 1639 return 0; 1640 } 1641 1642 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1643 { 1644 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1645 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1646 1647 switch (err) { 1648 case EPERM: 1649 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1650 printed += scnprintf(buf + printed, size - printed, 1651 "Error:\t%s.\n" 1652 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1653 "Hint:\tTried using %zd kB.\n", 1654 emsg, pages_max_per_user, pages_attempted); 1655 1656 if (pages_attempted >= pages_max_per_user) { 1657 printed += scnprintf(buf + printed, size - printed, 1658 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1659 pages_max_per_user + pages_attempted); 1660 } 1661 1662 printed += scnprintf(buf + printed, size - printed, 1663 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1664 break; 1665 default: 1666 scnprintf(buf, size, "%s", emsg); 1667 break; 1668 } 1669 1670 return 0; 1671 } 1672 1673 void perf_evlist__to_front(struct perf_evlist *evlist, 1674 struct perf_evsel *move_evsel) 1675 { 1676 struct perf_evsel *evsel, *n; 1677 LIST_HEAD(move); 1678 1679 if (move_evsel == perf_evlist__first(evlist)) 1680 return; 1681 1682 evlist__for_each_entry_safe(evlist, n, evsel) { 1683 if (evsel->leader == move_evsel->leader) 1684 list_move_tail(&evsel->node, &move); 1685 } 1686 1687 list_splice(&move, &evlist->entries); 1688 } 1689 1690 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1691 struct perf_evsel *tracking_evsel) 1692 { 1693 struct perf_evsel *evsel; 1694 1695 if (tracking_evsel->tracking) 1696 return; 1697 1698 evlist__for_each_entry(evlist, evsel) { 1699 if (evsel != tracking_evsel) 1700 evsel->tracking = false; 1701 } 1702 1703 tracking_evsel->tracking = true; 1704 } 1705 1706 struct perf_evsel * 1707 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1708 const char *str) 1709 { 1710 struct perf_evsel *evsel; 1711 1712 evlist__for_each_entry(evlist, evsel) { 1713 if (!evsel->name) 1714 continue; 1715 if (strcmp(str, evsel->name) == 0) 1716 return evsel; 1717 } 1718 1719 return NULL; 1720 } 1721 1722 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1723 enum bkw_mmap_state state) 1724 { 1725 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1726 enum action { 1727 NONE, 1728 PAUSE, 1729 RESUME, 1730 } action = NONE; 1731 1732 if (!evlist->overwrite_mmap) 1733 return; 1734 1735 switch (old_state) { 1736 case BKW_MMAP_NOTREADY: { 1737 if (state != BKW_MMAP_RUNNING) 1738 goto state_err; 1739 break; 1740 } 1741 case BKW_MMAP_RUNNING: { 1742 if (state != BKW_MMAP_DATA_PENDING) 1743 goto state_err; 1744 action = PAUSE; 1745 break; 1746 } 1747 case BKW_MMAP_DATA_PENDING: { 1748 if (state != BKW_MMAP_EMPTY) 1749 goto state_err; 1750 break; 1751 } 1752 case BKW_MMAP_EMPTY: { 1753 if (state != BKW_MMAP_RUNNING) 1754 goto state_err; 1755 action = RESUME; 1756 break; 1757 } 1758 default: 1759 WARN_ONCE(1, "Shouldn't get there\n"); 1760 } 1761 1762 evlist->bkw_mmap_state = state; 1763 1764 switch (action) { 1765 case PAUSE: 1766 perf_evlist__pause(evlist); 1767 break; 1768 case RESUME: 1769 perf_evlist__resume(evlist); 1770 break; 1771 case NONE: 1772 default: 1773 break; 1774 } 1775 1776 state_err: 1777 return; 1778 } 1779 1780 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) 1781 { 1782 struct perf_evsel *evsel; 1783 1784 evlist__for_each_entry(evlist, evsel) { 1785 if (!evsel->attr.exclude_kernel) 1786 return false; 1787 } 1788 1789 return true; 1790 } 1791 1792 /* 1793 * Events in data file are not collect in groups, but we still want 1794 * the group display. Set the artificial group and set the leader's 1795 * forced_leader flag to notify the display code. 1796 */ 1797 void perf_evlist__force_leader(struct perf_evlist *evlist) 1798 { 1799 if (!evlist->nr_groups) { 1800 struct perf_evsel *leader = perf_evlist__first(evlist); 1801 1802 perf_evlist__set_leader(evlist); 1803 leader->forced_leader = true; 1804 } 1805 } 1806 1807 struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, 1808 struct perf_evsel *evsel) 1809 { 1810 struct perf_evsel *c2, *leader; 1811 bool is_open = true; 1812 1813 leader = evsel->leader; 1814 pr_debug("Weak group for %s/%d failed\n", 1815 leader->name, leader->nr_members); 1816 1817 /* 1818 * for_each_group_member doesn't work here because it doesn't 1819 * include the first entry. 1820 */ 1821 evlist__for_each_entry(evsel_list, c2) { 1822 if (c2 == evsel) 1823 is_open = false; 1824 if (c2->leader == leader) { 1825 if (is_open) 1826 perf_evsel__close(c2); 1827 c2->leader = c2; 1828 c2->nr_members = 0; 1829 } 1830 } 1831 return leader; 1832 } 1833 1834 int perf_evlist__add_sb_event(struct perf_evlist **evlist, 1835 struct perf_event_attr *attr, 1836 perf_evsel__sb_cb_t cb, 1837 void *data) 1838 { 1839 struct perf_evsel *evsel; 1840 bool new_evlist = (*evlist) == NULL; 1841 1842 if (*evlist == NULL) 1843 *evlist = perf_evlist__new(); 1844 if (*evlist == NULL) 1845 return -1; 1846 1847 if (!attr->sample_id_all) { 1848 pr_warning("enabling sample_id_all for all side band events\n"); 1849 attr->sample_id_all = 1; 1850 } 1851 1852 evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries); 1853 if (!evsel) 1854 goto out_err; 1855 1856 evsel->side_band.cb = cb; 1857 evsel->side_band.data = data; 1858 perf_evlist__add(*evlist, evsel); 1859 return 0; 1860 1861 out_err: 1862 if (new_evlist) { 1863 perf_evlist__delete(*evlist); 1864 *evlist = NULL; 1865 } 1866 return -1; 1867 } 1868 1869 static void *perf_evlist__poll_thread(void *arg) 1870 { 1871 struct perf_evlist *evlist = arg; 1872 bool draining = false; 1873 int i, done = 0; 1874 1875 while (!done) { 1876 bool got_data = false; 1877 1878 if (evlist->thread.done) 1879 draining = true; 1880 1881 if (!draining) 1882 perf_evlist__poll(evlist, 1000); 1883 1884 for (i = 0; i < evlist->nr_mmaps; i++) { 1885 struct perf_mmap *map = &evlist->mmap[i]; 1886 union perf_event *event; 1887 1888 if (perf_mmap__read_init(map)) 1889 continue; 1890 while ((event = perf_mmap__read_event(map)) != NULL) { 1891 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1892 1893 if (evsel && evsel->side_band.cb) 1894 evsel->side_band.cb(event, evsel->side_band.data); 1895 else 1896 pr_warning("cannot locate proper evsel for the side band event\n"); 1897 1898 perf_mmap__consume(map); 1899 got_data = true; 1900 } 1901 perf_mmap__read_done(map); 1902 } 1903 1904 if (draining && !got_data) 1905 break; 1906 } 1907 return NULL; 1908 } 1909 1910 int perf_evlist__start_sb_thread(struct perf_evlist *evlist, 1911 struct target *target) 1912 { 1913 struct perf_evsel *counter; 1914 1915 if (!evlist) 1916 return 0; 1917 1918 if (perf_evlist__create_maps(evlist, target)) 1919 goto out_delete_evlist; 1920 1921 evlist__for_each_entry(evlist, counter) { 1922 if (perf_evsel__open(counter, evlist->cpus, 1923 evlist->threads) < 0) 1924 goto out_delete_evlist; 1925 } 1926 1927 if (perf_evlist__mmap(evlist, UINT_MAX)) 1928 goto out_delete_evlist; 1929 1930 evlist__for_each_entry(evlist, counter) { 1931 if (perf_evsel__enable(counter)) 1932 goto out_delete_evlist; 1933 } 1934 1935 evlist->thread.done = 0; 1936 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1937 goto out_delete_evlist; 1938 1939 return 0; 1940 1941 out_delete_evlist: 1942 perf_evlist__delete(evlist); 1943 evlist = NULL; 1944 return -1; 1945 } 1946 1947 void perf_evlist__stop_sb_thread(struct perf_evlist *evlist) 1948 { 1949 if (!evlist) 1950 return; 1951 evlist->thread.done = 1; 1952 pthread_join(evlist->thread.th, NULL); 1953 perf_evlist__delete(evlist); 1954 } 1955