1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <poll.h> 14 #include "cpumap.h" 15 #include "thread_map.h" 16 #include "target.h" 17 #include "evlist.h" 18 #include "evsel.h" 19 #include "debug.h" 20 #include "units.h" 21 #include "asm/bug.h" 22 #include "bpf-event.h" 23 #include <signal.h> 24 #include <unistd.h> 25 26 #include "parse-events.h" 27 #include <subcmd/parse-options.h> 28 29 #include <fcntl.h> 30 #include <sys/ioctl.h> 31 #include <sys/mman.h> 32 33 #include <linux/bitops.h> 34 #include <linux/hash.h> 35 #include <linux/log2.h> 36 #include <linux/err.h> 37 38 #ifdef LACKS_SIGQUEUE_PROTOTYPE 39 int sigqueue(pid_t pid, int sig, const union sigval value); 40 #endif 41 42 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 43 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 44 45 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 46 struct thread_map *threads) 47 { 48 int i; 49 50 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 51 INIT_HLIST_HEAD(&evlist->heads[i]); 52 INIT_LIST_HEAD(&evlist->entries); 53 perf_evlist__set_maps(evlist, cpus, threads); 54 fdarray__init(&evlist->pollfd, 64); 55 evlist->workload.pid = -1; 56 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 57 } 58 59 struct perf_evlist *perf_evlist__new(void) 60 { 61 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 62 63 if (evlist != NULL) 64 perf_evlist__init(evlist, NULL, NULL); 65 66 return evlist; 67 } 68 69 struct perf_evlist *perf_evlist__new_default(void) 70 { 71 struct perf_evlist *evlist = perf_evlist__new(); 72 73 if (evlist && perf_evlist__add_default(evlist)) { 74 perf_evlist__delete(evlist); 75 evlist = NULL; 76 } 77 78 return evlist; 79 } 80 81 struct perf_evlist *perf_evlist__new_dummy(void) 82 { 83 struct perf_evlist *evlist = perf_evlist__new(); 84 85 if (evlist && perf_evlist__add_dummy(evlist)) { 86 perf_evlist__delete(evlist); 87 evlist = NULL; 88 } 89 90 return evlist; 91 } 92 93 /** 94 * perf_evlist__set_id_pos - set the positions of event ids. 95 * @evlist: selected event list 96 * 97 * Events with compatible sample types all have the same id_pos 98 * and is_pos. For convenience, put a copy on evlist. 99 */ 100 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 101 { 102 struct perf_evsel *first = perf_evlist__first(evlist); 103 104 evlist->id_pos = first->id_pos; 105 evlist->is_pos = first->is_pos; 106 } 107 108 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 109 { 110 struct perf_evsel *evsel; 111 112 evlist__for_each_entry(evlist, evsel) 113 perf_evsel__calc_id_pos(evsel); 114 115 perf_evlist__set_id_pos(evlist); 116 } 117 118 static void perf_evlist__purge(struct perf_evlist *evlist) 119 { 120 struct perf_evsel *pos, *n; 121 122 evlist__for_each_entry_safe(evlist, n, pos) { 123 list_del_init(&pos->node); 124 pos->evlist = NULL; 125 perf_evsel__delete(pos); 126 } 127 128 evlist->nr_entries = 0; 129 } 130 131 void perf_evlist__exit(struct perf_evlist *evlist) 132 { 133 zfree(&evlist->mmap); 134 zfree(&evlist->overwrite_mmap); 135 fdarray__exit(&evlist->pollfd); 136 } 137 138 void perf_evlist__delete(struct perf_evlist *evlist) 139 { 140 if (evlist == NULL) 141 return; 142 143 perf_evlist__munmap(evlist); 144 perf_evlist__close(evlist); 145 cpu_map__put(evlist->cpus); 146 thread_map__put(evlist->threads); 147 evlist->cpus = NULL; 148 evlist->threads = NULL; 149 perf_evlist__purge(evlist); 150 perf_evlist__exit(evlist); 151 free(evlist); 152 } 153 154 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 155 struct perf_evsel *evsel) 156 { 157 /* 158 * We already have cpus for evsel (via PMU sysfs) so 159 * keep it, if there's no target cpu list defined. 160 */ 161 if (!evsel->own_cpus || evlist->has_user_cpus) { 162 cpu_map__put(evsel->cpus); 163 evsel->cpus = cpu_map__get(evlist->cpus); 164 } else if (evsel->cpus != evsel->own_cpus) { 165 cpu_map__put(evsel->cpus); 166 evsel->cpus = cpu_map__get(evsel->own_cpus); 167 } 168 169 thread_map__put(evsel->threads); 170 evsel->threads = thread_map__get(evlist->threads); 171 } 172 173 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 174 { 175 struct perf_evsel *evsel; 176 177 evlist__for_each_entry(evlist, evsel) 178 __perf_evlist__propagate_maps(evlist, evsel); 179 } 180 181 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 182 { 183 entry->evlist = evlist; 184 list_add_tail(&entry->node, &evlist->entries); 185 entry->idx = evlist->nr_entries; 186 entry->tracking = !entry->idx; 187 188 if (!evlist->nr_entries++) 189 perf_evlist__set_id_pos(evlist); 190 191 __perf_evlist__propagate_maps(evlist, entry); 192 } 193 194 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 195 { 196 evsel->evlist = NULL; 197 list_del_init(&evsel->node); 198 evlist->nr_entries -= 1; 199 } 200 201 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 202 struct list_head *list) 203 { 204 struct perf_evsel *evsel, *temp; 205 206 __evlist__for_each_entry_safe(list, temp, evsel) { 207 list_del_init(&evsel->node); 208 perf_evlist__add(evlist, evsel); 209 } 210 } 211 212 void __perf_evlist__set_leader(struct list_head *list) 213 { 214 struct perf_evsel *evsel, *leader; 215 216 leader = list_entry(list->next, struct perf_evsel, node); 217 evsel = list_entry(list->prev, struct perf_evsel, node); 218 219 leader->nr_members = evsel->idx - leader->idx + 1; 220 221 __evlist__for_each_entry(list, evsel) { 222 evsel->leader = leader; 223 } 224 } 225 226 void perf_evlist__set_leader(struct perf_evlist *evlist) 227 { 228 if (evlist->nr_entries) { 229 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 230 __perf_evlist__set_leader(&evlist->entries); 231 } 232 } 233 234 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr) 235 { 236 struct perf_event_attr attr = { 237 .type = PERF_TYPE_HARDWARE, 238 .config = PERF_COUNT_HW_CPU_CYCLES, 239 .exclude_kernel = 1, 240 .precise_ip = 3, 241 }; 242 243 event_attr_init(&attr); 244 245 /* 246 * Unnamed union member, not supported as struct member named 247 * initializer in older compilers such as gcc 4.4.7 248 */ 249 attr.sample_period = 1; 250 251 while (attr.precise_ip != 0) { 252 int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 253 if (fd != -1) { 254 close(fd); 255 break; 256 } 257 --attr.precise_ip; 258 } 259 260 pattr->precise_ip = attr.precise_ip; 261 } 262 263 int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) 264 { 265 struct perf_evsel *evsel = perf_evsel__new_cycles(precise); 266 267 if (evsel == NULL) 268 return -ENOMEM; 269 270 perf_evlist__add(evlist, evsel); 271 return 0; 272 } 273 274 int perf_evlist__add_dummy(struct perf_evlist *evlist) 275 { 276 struct perf_event_attr attr = { 277 .type = PERF_TYPE_SOFTWARE, 278 .config = PERF_COUNT_SW_DUMMY, 279 .size = sizeof(attr), /* to capture ABI version */ 280 }; 281 struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); 282 283 if (evsel == NULL) 284 return -ENOMEM; 285 286 perf_evlist__add(evlist, evsel); 287 return 0; 288 } 289 290 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 291 struct perf_event_attr *attrs, size_t nr_attrs) 292 { 293 struct perf_evsel *evsel, *n; 294 LIST_HEAD(head); 295 size_t i; 296 297 for (i = 0; i < nr_attrs; i++) { 298 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 299 if (evsel == NULL) 300 goto out_delete_partial_list; 301 list_add_tail(&evsel->node, &head); 302 } 303 304 perf_evlist__splice_list_tail(evlist, &head); 305 306 return 0; 307 308 out_delete_partial_list: 309 __evlist__for_each_entry_safe(&head, n, evsel) 310 perf_evsel__delete(evsel); 311 return -1; 312 } 313 314 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 315 struct perf_event_attr *attrs, size_t nr_attrs) 316 { 317 size_t i; 318 319 for (i = 0; i < nr_attrs; i++) 320 event_attr_init(attrs + i); 321 322 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 323 } 324 325 struct perf_evsel * 326 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 327 { 328 struct perf_evsel *evsel; 329 330 evlist__for_each_entry(evlist, evsel) { 331 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 332 (int)evsel->attr.config == id) 333 return evsel; 334 } 335 336 return NULL; 337 } 338 339 struct perf_evsel * 340 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 341 const char *name) 342 { 343 struct perf_evsel *evsel; 344 345 evlist__for_each_entry(evlist, evsel) { 346 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 347 (strcmp(evsel->name, name) == 0)) 348 return evsel; 349 } 350 351 return NULL; 352 } 353 354 int perf_evlist__add_newtp(struct perf_evlist *evlist, 355 const char *sys, const char *name, void *handler) 356 { 357 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 358 359 if (IS_ERR(evsel)) 360 return -1; 361 362 evsel->handler = handler; 363 perf_evlist__add(evlist, evsel); 364 return 0; 365 } 366 367 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 368 struct perf_evsel *evsel) 369 { 370 if (evsel->system_wide) 371 return 1; 372 else 373 return thread_map__nr(evlist->threads); 374 } 375 376 void perf_evlist__disable(struct perf_evlist *evlist) 377 { 378 struct perf_evsel *pos; 379 380 evlist__for_each_entry(evlist, pos) { 381 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) 382 continue; 383 perf_evsel__disable(pos); 384 } 385 386 evlist->enabled = false; 387 } 388 389 void perf_evlist__enable(struct perf_evlist *evlist) 390 { 391 struct perf_evsel *pos; 392 393 evlist__for_each_entry(evlist, pos) { 394 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 395 continue; 396 perf_evsel__enable(pos); 397 } 398 399 evlist->enabled = true; 400 } 401 402 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 403 { 404 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 405 } 406 407 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 408 struct perf_evsel *evsel, int cpu) 409 { 410 int thread; 411 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 412 413 if (!evsel->fd) 414 return -EINVAL; 415 416 for (thread = 0; thread < nr_threads; thread++) { 417 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 418 if (err) 419 return err; 420 } 421 return 0; 422 } 423 424 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 425 struct perf_evsel *evsel, 426 int thread) 427 { 428 int cpu; 429 int nr_cpus = cpu_map__nr(evlist->cpus); 430 431 if (!evsel->fd) 432 return -EINVAL; 433 434 for (cpu = 0; cpu < nr_cpus; cpu++) { 435 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 436 if (err) 437 return err; 438 } 439 return 0; 440 } 441 442 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 443 struct perf_evsel *evsel, int idx) 444 { 445 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 446 447 if (per_cpu_mmaps) 448 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 449 else 450 return perf_evlist__enable_event_thread(evlist, evsel, idx); 451 } 452 453 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 454 { 455 int nr_cpus = cpu_map__nr(evlist->cpus); 456 int nr_threads = thread_map__nr(evlist->threads); 457 int nfds = 0; 458 struct perf_evsel *evsel; 459 460 evlist__for_each_entry(evlist, evsel) { 461 if (evsel->system_wide) 462 nfds += nr_cpus; 463 else 464 nfds += nr_cpus * nr_threads; 465 } 466 467 if (fdarray__available_entries(&evlist->pollfd) < nfds && 468 fdarray__grow(&evlist->pollfd, nfds) < 0) 469 return -ENOMEM; 470 471 return 0; 472 } 473 474 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 475 struct perf_mmap *map, short revent) 476 { 477 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 478 /* 479 * Save the idx so that when we filter out fds POLLHUP'ed we can 480 * close the associated evlist->mmap[] entry. 481 */ 482 if (pos >= 0) { 483 evlist->pollfd.priv[pos].ptr = map; 484 485 fcntl(fd, F_SETFL, O_NONBLOCK); 486 } 487 488 return pos; 489 } 490 491 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 492 { 493 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 494 } 495 496 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 497 void *arg __maybe_unused) 498 { 499 struct perf_mmap *map = fda->priv[fd].ptr; 500 501 if (map) 502 perf_mmap__put(map); 503 } 504 505 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 506 { 507 return fdarray__filter(&evlist->pollfd, revents_and_mask, 508 perf_evlist__munmap_filtered, NULL); 509 } 510 511 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 512 { 513 return fdarray__poll(&evlist->pollfd, timeout); 514 } 515 516 static void perf_evlist__id_hash(struct perf_evlist *evlist, 517 struct perf_evsel *evsel, 518 int cpu, int thread, u64 id) 519 { 520 int hash; 521 struct perf_sample_id *sid = SID(evsel, cpu, thread); 522 523 sid->id = id; 524 sid->evsel = evsel; 525 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 526 hlist_add_head(&sid->node, &evlist->heads[hash]); 527 } 528 529 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 530 int cpu, int thread, u64 id) 531 { 532 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 533 evsel->id[evsel->ids++] = id; 534 } 535 536 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 537 struct perf_evsel *evsel, 538 int cpu, int thread, int fd) 539 { 540 u64 read_data[4] = { 0, }; 541 int id_idx = 1; /* The first entry is the counter value */ 542 u64 id; 543 int ret; 544 545 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 546 if (!ret) 547 goto add; 548 549 if (errno != ENOTTY) 550 return -1; 551 552 /* Legacy way to get event id.. All hail to old kernels! */ 553 554 /* 555 * This way does not work with group format read, so bail 556 * out in that case. 557 */ 558 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 559 return -1; 560 561 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 562 read(fd, &read_data, sizeof(read_data)) == -1) 563 return -1; 564 565 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 566 ++id_idx; 567 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 568 ++id_idx; 569 570 id = read_data[id_idx]; 571 572 add: 573 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 574 return 0; 575 } 576 577 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 578 struct perf_evsel *evsel, int idx, int cpu, 579 int thread) 580 { 581 struct perf_sample_id *sid = SID(evsel, cpu, thread); 582 sid->idx = idx; 583 if (evlist->cpus && cpu >= 0) 584 sid->cpu = evlist->cpus->map[cpu]; 585 else 586 sid->cpu = -1; 587 if (!evsel->system_wide && evlist->threads && thread >= 0) 588 sid->tid = thread_map__pid(evlist->threads, thread); 589 else 590 sid->tid = -1; 591 } 592 593 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 594 { 595 struct hlist_head *head; 596 struct perf_sample_id *sid; 597 int hash; 598 599 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 600 head = &evlist->heads[hash]; 601 602 hlist_for_each_entry(sid, head, node) 603 if (sid->id == id) 604 return sid; 605 606 return NULL; 607 } 608 609 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 610 { 611 struct perf_sample_id *sid; 612 613 if (evlist->nr_entries == 1 || !id) 614 return perf_evlist__first(evlist); 615 616 sid = perf_evlist__id2sid(evlist, id); 617 if (sid) 618 return sid->evsel; 619 620 if (!perf_evlist__sample_id_all(evlist)) 621 return perf_evlist__first(evlist); 622 623 return NULL; 624 } 625 626 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 627 u64 id) 628 { 629 struct perf_sample_id *sid; 630 631 if (!id) 632 return NULL; 633 634 sid = perf_evlist__id2sid(evlist, id); 635 if (sid) 636 return sid->evsel; 637 638 return NULL; 639 } 640 641 static int perf_evlist__event2id(struct perf_evlist *evlist, 642 union perf_event *event, u64 *id) 643 { 644 const u64 *array = event->sample.array; 645 ssize_t n; 646 647 n = (event->header.size - sizeof(event->header)) >> 3; 648 649 if (event->header.type == PERF_RECORD_SAMPLE) { 650 if (evlist->id_pos >= n) 651 return -1; 652 *id = array[evlist->id_pos]; 653 } else { 654 if (evlist->is_pos > n) 655 return -1; 656 n -= evlist->is_pos; 657 *id = array[n]; 658 } 659 return 0; 660 } 661 662 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 663 union perf_event *event) 664 { 665 struct perf_evsel *first = perf_evlist__first(evlist); 666 struct hlist_head *head; 667 struct perf_sample_id *sid; 668 int hash; 669 u64 id; 670 671 if (evlist->nr_entries == 1) 672 return first; 673 674 if (!first->attr.sample_id_all && 675 event->header.type != PERF_RECORD_SAMPLE) 676 return first; 677 678 if (perf_evlist__event2id(evlist, event, &id)) 679 return NULL; 680 681 /* Synthesized events have an id of zero */ 682 if (!id) 683 return first; 684 685 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 686 head = &evlist->heads[hash]; 687 688 hlist_for_each_entry(sid, head, node) { 689 if (sid->id == id) 690 return sid->evsel; 691 } 692 return NULL; 693 } 694 695 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 696 { 697 int i; 698 699 if (!evlist->overwrite_mmap) 700 return 0; 701 702 for (i = 0; i < evlist->nr_mmaps; i++) { 703 int fd = evlist->overwrite_mmap[i].fd; 704 int err; 705 706 if (fd < 0) 707 continue; 708 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 709 if (err) 710 return err; 711 } 712 return 0; 713 } 714 715 static int perf_evlist__pause(struct perf_evlist *evlist) 716 { 717 return perf_evlist__set_paused(evlist, true); 718 } 719 720 static int perf_evlist__resume(struct perf_evlist *evlist) 721 { 722 return perf_evlist__set_paused(evlist, false); 723 } 724 725 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 726 { 727 int i; 728 729 if (evlist->mmap) 730 for (i = 0; i < evlist->nr_mmaps; i++) 731 perf_mmap__munmap(&evlist->mmap[i]); 732 733 if (evlist->overwrite_mmap) 734 for (i = 0; i < evlist->nr_mmaps; i++) 735 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 736 } 737 738 void perf_evlist__munmap(struct perf_evlist *evlist) 739 { 740 perf_evlist__munmap_nofree(evlist); 741 zfree(&evlist->mmap); 742 zfree(&evlist->overwrite_mmap); 743 } 744 745 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist, 746 bool overwrite) 747 { 748 int i; 749 struct perf_mmap *map; 750 751 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 752 if (cpu_map__empty(evlist->cpus)) 753 evlist->nr_mmaps = thread_map__nr(evlist->threads); 754 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 755 if (!map) 756 return NULL; 757 758 for (i = 0; i < evlist->nr_mmaps; i++) { 759 map[i].fd = -1; 760 map[i].overwrite = overwrite; 761 /* 762 * When the perf_mmap() call is made we grab one refcount, plus 763 * one extra to let perf_mmap__consume() get the last 764 * events after all real references (perf_mmap__get()) are 765 * dropped. 766 * 767 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 768 * thus does perf_mmap__get() on it. 769 */ 770 refcount_set(&map[i].refcnt, 0); 771 } 772 return map; 773 } 774 775 static bool 776 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 777 struct perf_evsel *evsel) 778 { 779 if (evsel->attr.write_backward) 780 return false; 781 return true; 782 } 783 784 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 785 struct mmap_params *mp, int cpu_idx, 786 int thread, int *_output, int *_output_overwrite) 787 { 788 struct perf_evsel *evsel; 789 int revent; 790 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 791 792 evlist__for_each_entry(evlist, evsel) { 793 struct perf_mmap *maps = evlist->mmap; 794 int *output = _output; 795 int fd; 796 int cpu; 797 798 mp->prot = PROT_READ | PROT_WRITE; 799 if (evsel->attr.write_backward) { 800 output = _output_overwrite; 801 maps = evlist->overwrite_mmap; 802 803 if (!maps) { 804 maps = perf_evlist__alloc_mmap(evlist, true); 805 if (!maps) 806 return -1; 807 evlist->overwrite_mmap = maps; 808 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 809 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 810 } 811 mp->prot &= ~PROT_WRITE; 812 } 813 814 if (evsel->system_wide && thread) 815 continue; 816 817 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 818 if (cpu == -1) 819 continue; 820 821 fd = FD(evsel, cpu, thread); 822 823 if (*output == -1) { 824 *output = fd; 825 826 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 827 return -1; 828 } else { 829 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 830 return -1; 831 832 perf_mmap__get(&maps[idx]); 833 } 834 835 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 836 837 /* 838 * The system_wide flag causes a selected event to be opened 839 * always without a pid. Consequently it will never get a 840 * POLLHUP, but it is used for tracking in combination with 841 * other events, so it should not need to be polled anyway. 842 * Therefore don't add it for polling. 843 */ 844 if (!evsel->system_wide && 845 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 846 perf_mmap__put(&maps[idx]); 847 return -1; 848 } 849 850 if (evsel->attr.read_format & PERF_FORMAT_ID) { 851 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 852 fd) < 0) 853 return -1; 854 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 855 thread); 856 } 857 } 858 859 return 0; 860 } 861 862 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 863 struct mmap_params *mp) 864 { 865 int cpu, thread; 866 int nr_cpus = cpu_map__nr(evlist->cpus); 867 int nr_threads = thread_map__nr(evlist->threads); 868 869 pr_debug2("perf event ring buffer mmapped per cpu\n"); 870 for (cpu = 0; cpu < nr_cpus; cpu++) { 871 int output = -1; 872 int output_overwrite = -1; 873 874 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 875 true); 876 877 for (thread = 0; thread < nr_threads; thread++) { 878 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 879 thread, &output, &output_overwrite)) 880 goto out_unmap; 881 } 882 } 883 884 return 0; 885 886 out_unmap: 887 perf_evlist__munmap_nofree(evlist); 888 return -1; 889 } 890 891 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 892 struct mmap_params *mp) 893 { 894 int thread; 895 int nr_threads = thread_map__nr(evlist->threads); 896 897 pr_debug2("perf event ring buffer mmapped per thread\n"); 898 for (thread = 0; thread < nr_threads; thread++) { 899 int output = -1; 900 int output_overwrite = -1; 901 902 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 903 false); 904 905 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 906 &output, &output_overwrite)) 907 goto out_unmap; 908 } 909 910 return 0; 911 912 out_unmap: 913 perf_evlist__munmap_nofree(evlist); 914 return -1; 915 } 916 917 unsigned long perf_event_mlock_kb_in_pages(void) 918 { 919 unsigned long pages; 920 int max; 921 922 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 923 /* 924 * Pick a once upon a time good value, i.e. things look 925 * strange since we can't read a sysctl value, but lets not 926 * die yet... 927 */ 928 max = 512; 929 } else { 930 max -= (page_size / 1024); 931 } 932 933 pages = (max * 1024) / page_size; 934 if (!is_power_of_2(pages)) 935 pages = rounddown_pow_of_two(pages); 936 937 return pages; 938 } 939 940 size_t perf_evlist__mmap_size(unsigned long pages) 941 { 942 if (pages == UINT_MAX) 943 pages = perf_event_mlock_kb_in_pages(); 944 else if (!is_power_of_2(pages)) 945 return 0; 946 947 return (pages + 1) * page_size; 948 } 949 950 static long parse_pages_arg(const char *str, unsigned long min, 951 unsigned long max) 952 { 953 unsigned long pages, val; 954 static struct parse_tag tags[] = { 955 { .tag = 'B', .mult = 1 }, 956 { .tag = 'K', .mult = 1 << 10 }, 957 { .tag = 'M', .mult = 1 << 20 }, 958 { .tag = 'G', .mult = 1 << 30 }, 959 { .tag = 0 }, 960 }; 961 962 if (str == NULL) 963 return -EINVAL; 964 965 val = parse_tag_value(str, tags); 966 if (val != (unsigned long) -1) { 967 /* we got file size value */ 968 pages = PERF_ALIGN(val, page_size) / page_size; 969 } else { 970 /* we got pages count value */ 971 char *eptr; 972 pages = strtoul(str, &eptr, 10); 973 if (*eptr != '\0') 974 return -EINVAL; 975 } 976 977 if (pages == 0 && min == 0) { 978 /* leave number of pages at 0 */ 979 } else if (!is_power_of_2(pages)) { 980 char buf[100]; 981 982 /* round pages up to next power of 2 */ 983 pages = roundup_pow_of_two(pages); 984 if (!pages) 985 return -EINVAL; 986 987 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 988 pr_info("rounding mmap pages size to %s (%lu pages)\n", 989 buf, pages); 990 } 991 992 if (pages > max) 993 return -EINVAL; 994 995 return pages; 996 } 997 998 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 999 { 1000 unsigned long max = UINT_MAX; 1001 long pages; 1002 1003 if (max > SIZE_MAX / page_size) 1004 max = SIZE_MAX / page_size; 1005 1006 pages = parse_pages_arg(str, 1, max); 1007 if (pages < 0) { 1008 pr_err("Invalid argument for --mmap_pages/-m\n"); 1009 return -1; 1010 } 1011 1012 *mmap_pages = pages; 1013 return 0; 1014 } 1015 1016 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1017 int unset __maybe_unused) 1018 { 1019 return __perf_evlist__parse_mmap_pages(opt->value, str); 1020 } 1021 1022 /** 1023 * perf_evlist__mmap_ex - Create mmaps to receive events. 1024 * @evlist: list of events 1025 * @pages: map length in pages 1026 * @overwrite: overwrite older events? 1027 * @auxtrace_pages - auxtrace map length in pages 1028 * @auxtrace_overwrite - overwrite older auxtrace data? 1029 * 1030 * If @overwrite is %false the user needs to signal event consumption using 1031 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1032 * automatically. 1033 * 1034 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1035 * consumption using auxtrace_mmap__write_tail(). 1036 * 1037 * Return: %0 on success, negative error code otherwise. 1038 */ 1039 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1040 unsigned int auxtrace_pages, 1041 bool auxtrace_overwrite, int nr_cblocks, int affinity) 1042 { 1043 struct perf_evsel *evsel; 1044 const struct cpu_map *cpus = evlist->cpus; 1045 const struct thread_map *threads = evlist->threads; 1046 /* 1047 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1048 * Its value is decided by evsel's write_backward. 1049 * So &mp should not be passed through const pointer. 1050 */ 1051 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; 1052 1053 if (!evlist->mmap) 1054 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1055 if (!evlist->mmap) 1056 return -ENOMEM; 1057 1058 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1059 return -ENOMEM; 1060 1061 evlist->mmap_len = perf_evlist__mmap_size(pages); 1062 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1063 mp.mask = evlist->mmap_len - page_size - 1; 1064 1065 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1066 auxtrace_pages, auxtrace_overwrite); 1067 1068 evlist__for_each_entry(evlist, evsel) { 1069 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1070 evsel->sample_id == NULL && 1071 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1072 return -ENOMEM; 1073 } 1074 1075 if (cpu_map__empty(cpus)) 1076 return perf_evlist__mmap_per_thread(evlist, &mp); 1077 1078 return perf_evlist__mmap_per_cpu(evlist, &mp); 1079 } 1080 1081 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1082 { 1083 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); 1084 } 1085 1086 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1087 { 1088 bool all_threads = (target->per_thread && target->system_wide); 1089 struct cpu_map *cpus; 1090 struct thread_map *threads; 1091 1092 /* 1093 * If specify '-a' and '--per-thread' to perf record, perf record 1094 * will override '--per-thread'. target->per_thread = false and 1095 * target->system_wide = true. 1096 * 1097 * If specify '--per-thread' only to perf record, 1098 * target->per_thread = true and target->system_wide = false. 1099 * 1100 * So target->per_thread && target->system_wide is false. 1101 * For perf record, thread_map__new_str doesn't call 1102 * thread_map__new_all_cpus. That will keep perf record's 1103 * current behavior. 1104 * 1105 * For perf stat, it allows the case that target->per_thread and 1106 * target->system_wide are all true. It means to collect system-wide 1107 * per-thread data. thread_map__new_str will call 1108 * thread_map__new_all_cpus to enumerate all threads. 1109 */ 1110 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1111 all_threads); 1112 1113 if (!threads) 1114 return -1; 1115 1116 if (target__uses_dummy_map(target)) 1117 cpus = cpu_map__dummy_new(); 1118 else 1119 cpus = cpu_map__new(target->cpu_list); 1120 1121 if (!cpus) 1122 goto out_delete_threads; 1123 1124 evlist->has_user_cpus = !!target->cpu_list; 1125 1126 perf_evlist__set_maps(evlist, cpus, threads); 1127 1128 return 0; 1129 1130 out_delete_threads: 1131 thread_map__put(threads); 1132 return -1; 1133 } 1134 1135 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1136 struct thread_map *threads) 1137 { 1138 /* 1139 * Allow for the possibility that one or another of the maps isn't being 1140 * changed i.e. don't put it. Note we are assuming the maps that are 1141 * being applied are brand new and evlist is taking ownership of the 1142 * original reference count of 1. If that is not the case it is up to 1143 * the caller to increase the reference count. 1144 */ 1145 if (cpus != evlist->cpus) { 1146 cpu_map__put(evlist->cpus); 1147 evlist->cpus = cpu_map__get(cpus); 1148 } 1149 1150 if (threads != evlist->threads) { 1151 thread_map__put(evlist->threads); 1152 evlist->threads = thread_map__get(threads); 1153 } 1154 1155 perf_evlist__propagate_maps(evlist); 1156 } 1157 1158 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1159 enum perf_event_sample_format bit) 1160 { 1161 struct perf_evsel *evsel; 1162 1163 evlist__for_each_entry(evlist, evsel) 1164 __perf_evsel__set_sample_bit(evsel, bit); 1165 } 1166 1167 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1168 enum perf_event_sample_format bit) 1169 { 1170 struct perf_evsel *evsel; 1171 1172 evlist__for_each_entry(evlist, evsel) 1173 __perf_evsel__reset_sample_bit(evsel, bit); 1174 } 1175 1176 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1177 { 1178 struct perf_evsel *evsel; 1179 int err = 0; 1180 1181 evlist__for_each_entry(evlist, evsel) { 1182 if (evsel->filter == NULL) 1183 continue; 1184 1185 /* 1186 * filters only work for tracepoint event, which doesn't have cpu limit. 1187 * So evlist and evsel should always be same. 1188 */ 1189 err = perf_evsel__apply_filter(evsel, evsel->filter); 1190 if (err) { 1191 *err_evsel = evsel; 1192 break; 1193 } 1194 } 1195 1196 return err; 1197 } 1198 1199 int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter) 1200 { 1201 struct perf_evsel *evsel; 1202 int err = 0; 1203 1204 evlist__for_each_entry(evlist, evsel) { 1205 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1206 continue; 1207 1208 err = perf_evsel__set_filter(evsel, filter); 1209 if (err) 1210 break; 1211 } 1212 1213 return err; 1214 } 1215 1216 int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1217 { 1218 char *filter; 1219 int ret = -1; 1220 size_t i; 1221 1222 for (i = 0; i < npids; ++i) { 1223 if (i == 0) { 1224 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1225 return -1; 1226 } else { 1227 char *tmp; 1228 1229 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1230 goto out_free; 1231 1232 free(filter); 1233 filter = tmp; 1234 } 1235 } 1236 1237 ret = perf_evlist__set_tp_filter(evlist, filter); 1238 out_free: 1239 free(filter); 1240 return ret; 1241 } 1242 1243 int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid) 1244 { 1245 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1246 } 1247 1248 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1249 { 1250 struct perf_evsel *pos; 1251 1252 if (evlist->nr_entries == 1) 1253 return true; 1254 1255 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1256 return false; 1257 1258 evlist__for_each_entry(evlist, pos) { 1259 if (pos->id_pos != evlist->id_pos || 1260 pos->is_pos != evlist->is_pos) 1261 return false; 1262 } 1263 1264 return true; 1265 } 1266 1267 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1268 { 1269 struct perf_evsel *evsel; 1270 1271 if (evlist->combined_sample_type) 1272 return evlist->combined_sample_type; 1273 1274 evlist__for_each_entry(evlist, evsel) 1275 evlist->combined_sample_type |= evsel->attr.sample_type; 1276 1277 return evlist->combined_sample_type; 1278 } 1279 1280 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1281 { 1282 evlist->combined_sample_type = 0; 1283 return __perf_evlist__combined_sample_type(evlist); 1284 } 1285 1286 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1287 { 1288 struct perf_evsel *evsel; 1289 u64 branch_type = 0; 1290 1291 evlist__for_each_entry(evlist, evsel) 1292 branch_type |= evsel->attr.branch_sample_type; 1293 return branch_type; 1294 } 1295 1296 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1297 { 1298 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1299 u64 read_format = first->attr.read_format; 1300 u64 sample_type = first->attr.sample_type; 1301 1302 evlist__for_each_entry(evlist, pos) { 1303 if (read_format != pos->attr.read_format) 1304 return false; 1305 } 1306 1307 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1308 if ((sample_type & PERF_SAMPLE_READ) && 1309 !(read_format & PERF_FORMAT_ID)) { 1310 return false; 1311 } 1312 1313 return true; 1314 } 1315 1316 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1317 { 1318 struct perf_evsel *first = perf_evlist__first(evlist); 1319 return first->attr.read_format; 1320 } 1321 1322 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1323 { 1324 struct perf_evsel *first = perf_evlist__first(evlist); 1325 struct perf_sample *data; 1326 u64 sample_type; 1327 u16 size = 0; 1328 1329 if (!first->attr.sample_id_all) 1330 goto out; 1331 1332 sample_type = first->attr.sample_type; 1333 1334 if (sample_type & PERF_SAMPLE_TID) 1335 size += sizeof(data->tid) * 2; 1336 1337 if (sample_type & PERF_SAMPLE_TIME) 1338 size += sizeof(data->time); 1339 1340 if (sample_type & PERF_SAMPLE_ID) 1341 size += sizeof(data->id); 1342 1343 if (sample_type & PERF_SAMPLE_STREAM_ID) 1344 size += sizeof(data->stream_id); 1345 1346 if (sample_type & PERF_SAMPLE_CPU) 1347 size += sizeof(data->cpu) * 2; 1348 1349 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1350 size += sizeof(data->id); 1351 out: 1352 return size; 1353 } 1354 1355 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1356 { 1357 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1358 1359 evlist__for_each_entry_continue(evlist, pos) { 1360 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1361 return false; 1362 } 1363 1364 return true; 1365 } 1366 1367 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1368 { 1369 struct perf_evsel *first = perf_evlist__first(evlist); 1370 return first->attr.sample_id_all; 1371 } 1372 1373 void perf_evlist__set_selected(struct perf_evlist *evlist, 1374 struct perf_evsel *evsel) 1375 { 1376 evlist->selected = evsel; 1377 } 1378 1379 void perf_evlist__close(struct perf_evlist *evlist) 1380 { 1381 struct perf_evsel *evsel; 1382 1383 evlist__for_each_entry_reverse(evlist, evsel) 1384 perf_evsel__close(evsel); 1385 } 1386 1387 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1388 { 1389 struct cpu_map *cpus; 1390 struct thread_map *threads; 1391 int err = -ENOMEM; 1392 1393 /* 1394 * Try reading /sys/devices/system/cpu/online to get 1395 * an all cpus map. 1396 * 1397 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1398 * code needs an overhaul to properly forward the 1399 * error, and we may not want to do that fallback to a 1400 * default cpu identity map :-\ 1401 */ 1402 cpus = cpu_map__new(NULL); 1403 if (!cpus) 1404 goto out; 1405 1406 threads = thread_map__new_dummy(); 1407 if (!threads) 1408 goto out_put; 1409 1410 perf_evlist__set_maps(evlist, cpus, threads); 1411 out: 1412 return err; 1413 out_put: 1414 cpu_map__put(cpus); 1415 goto out; 1416 } 1417 1418 int perf_evlist__open(struct perf_evlist *evlist) 1419 { 1420 struct perf_evsel *evsel; 1421 int err; 1422 1423 /* 1424 * Default: one fd per CPU, all threads, aka systemwide 1425 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1426 */ 1427 if (evlist->threads == NULL && evlist->cpus == NULL) { 1428 err = perf_evlist__create_syswide_maps(evlist); 1429 if (err < 0) 1430 goto out_err; 1431 } 1432 1433 perf_evlist__update_id_pos(evlist); 1434 1435 evlist__for_each_entry(evlist, evsel) { 1436 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1437 if (err < 0) 1438 goto out_err; 1439 } 1440 1441 return 0; 1442 out_err: 1443 perf_evlist__close(evlist); 1444 errno = -err; 1445 return err; 1446 } 1447 1448 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1449 const char *argv[], bool pipe_output, 1450 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1451 { 1452 int child_ready_pipe[2], go_pipe[2]; 1453 char bf; 1454 1455 if (pipe(child_ready_pipe) < 0) { 1456 perror("failed to create 'ready' pipe"); 1457 return -1; 1458 } 1459 1460 if (pipe(go_pipe) < 0) { 1461 perror("failed to create 'go' pipe"); 1462 goto out_close_ready_pipe; 1463 } 1464 1465 evlist->workload.pid = fork(); 1466 if (evlist->workload.pid < 0) { 1467 perror("failed to fork"); 1468 goto out_close_pipes; 1469 } 1470 1471 if (!evlist->workload.pid) { 1472 int ret; 1473 1474 if (pipe_output) 1475 dup2(2, 1); 1476 1477 signal(SIGTERM, SIG_DFL); 1478 1479 close(child_ready_pipe[0]); 1480 close(go_pipe[1]); 1481 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1482 1483 /* 1484 * Tell the parent we're ready to go 1485 */ 1486 close(child_ready_pipe[1]); 1487 1488 /* 1489 * Wait until the parent tells us to go. 1490 */ 1491 ret = read(go_pipe[0], &bf, 1); 1492 /* 1493 * The parent will ask for the execvp() to be performed by 1494 * writing exactly one byte, in workload.cork_fd, usually via 1495 * perf_evlist__start_workload(). 1496 * 1497 * For cancelling the workload without actually running it, 1498 * the parent will just close workload.cork_fd, without writing 1499 * anything, i.e. read will return zero and we just exit() 1500 * here. 1501 */ 1502 if (ret != 1) { 1503 if (ret == -1) 1504 perror("unable to read pipe"); 1505 exit(ret); 1506 } 1507 1508 execvp(argv[0], (char **)argv); 1509 1510 if (exec_error) { 1511 union sigval val; 1512 1513 val.sival_int = errno; 1514 if (sigqueue(getppid(), SIGUSR1, val)) 1515 perror(argv[0]); 1516 } else 1517 perror(argv[0]); 1518 exit(-1); 1519 } 1520 1521 if (exec_error) { 1522 struct sigaction act = { 1523 .sa_flags = SA_SIGINFO, 1524 .sa_sigaction = exec_error, 1525 }; 1526 sigaction(SIGUSR1, &act, NULL); 1527 } 1528 1529 if (target__none(target)) { 1530 if (evlist->threads == NULL) { 1531 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1532 __func__, __LINE__); 1533 goto out_close_pipes; 1534 } 1535 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1536 } 1537 1538 close(child_ready_pipe[1]); 1539 close(go_pipe[0]); 1540 /* 1541 * wait for child to settle 1542 */ 1543 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1544 perror("unable to read pipe"); 1545 goto out_close_pipes; 1546 } 1547 1548 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1549 evlist->workload.cork_fd = go_pipe[1]; 1550 close(child_ready_pipe[0]); 1551 return 0; 1552 1553 out_close_pipes: 1554 close(go_pipe[0]); 1555 close(go_pipe[1]); 1556 out_close_ready_pipe: 1557 close(child_ready_pipe[0]); 1558 close(child_ready_pipe[1]); 1559 return -1; 1560 } 1561 1562 int perf_evlist__start_workload(struct perf_evlist *evlist) 1563 { 1564 if (evlist->workload.cork_fd > 0) { 1565 char bf = 0; 1566 int ret; 1567 /* 1568 * Remove the cork, let it rip! 1569 */ 1570 ret = write(evlist->workload.cork_fd, &bf, 1); 1571 if (ret < 0) 1572 perror("unable to write to pipe"); 1573 1574 close(evlist->workload.cork_fd); 1575 return ret; 1576 } 1577 1578 return 0; 1579 } 1580 1581 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1582 struct perf_sample *sample) 1583 { 1584 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1585 1586 if (!evsel) 1587 return -EFAULT; 1588 return perf_evsel__parse_sample(evsel, event, sample); 1589 } 1590 1591 int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, 1592 union perf_event *event, 1593 u64 *timestamp) 1594 { 1595 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1596 1597 if (!evsel) 1598 return -EFAULT; 1599 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1600 } 1601 1602 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1603 { 1604 struct perf_evsel *evsel; 1605 size_t printed = 0; 1606 1607 evlist__for_each_entry(evlist, evsel) { 1608 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1609 perf_evsel__name(evsel)); 1610 } 1611 1612 return printed + fprintf(fp, "\n"); 1613 } 1614 1615 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1616 int err, char *buf, size_t size) 1617 { 1618 int printed, value; 1619 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1620 1621 switch (err) { 1622 case EACCES: 1623 case EPERM: 1624 printed = scnprintf(buf, size, 1625 "Error:\t%s.\n" 1626 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1627 1628 value = perf_event_paranoid(); 1629 1630 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1631 1632 if (value >= 2) { 1633 printed += scnprintf(buf + printed, size - printed, 1634 "For your workloads it needs to be <= 1\nHint:\t"); 1635 } 1636 printed += scnprintf(buf + printed, size - printed, 1637 "For system wide tracing it needs to be set to -1.\n"); 1638 1639 printed += scnprintf(buf + printed, size - printed, 1640 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1641 "Hint:\tThe current value is %d.", value); 1642 break; 1643 case EINVAL: { 1644 struct perf_evsel *first = perf_evlist__first(evlist); 1645 int max_freq; 1646 1647 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1648 goto out_default; 1649 1650 if (first->attr.sample_freq < (u64)max_freq) 1651 goto out_default; 1652 1653 printed = scnprintf(buf, size, 1654 "Error:\t%s.\n" 1655 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1656 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1657 emsg, max_freq, first->attr.sample_freq); 1658 break; 1659 } 1660 default: 1661 out_default: 1662 scnprintf(buf, size, "%s", emsg); 1663 break; 1664 } 1665 1666 return 0; 1667 } 1668 1669 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1670 { 1671 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1672 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1673 1674 switch (err) { 1675 case EPERM: 1676 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1677 printed += scnprintf(buf + printed, size - printed, 1678 "Error:\t%s.\n" 1679 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1680 "Hint:\tTried using %zd kB.\n", 1681 emsg, pages_max_per_user, pages_attempted); 1682 1683 if (pages_attempted >= pages_max_per_user) { 1684 printed += scnprintf(buf + printed, size - printed, 1685 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1686 pages_max_per_user + pages_attempted); 1687 } 1688 1689 printed += scnprintf(buf + printed, size - printed, 1690 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1691 break; 1692 default: 1693 scnprintf(buf, size, "%s", emsg); 1694 break; 1695 } 1696 1697 return 0; 1698 } 1699 1700 void perf_evlist__to_front(struct perf_evlist *evlist, 1701 struct perf_evsel *move_evsel) 1702 { 1703 struct perf_evsel *evsel, *n; 1704 LIST_HEAD(move); 1705 1706 if (move_evsel == perf_evlist__first(evlist)) 1707 return; 1708 1709 evlist__for_each_entry_safe(evlist, n, evsel) { 1710 if (evsel->leader == move_evsel->leader) 1711 list_move_tail(&evsel->node, &move); 1712 } 1713 1714 list_splice(&move, &evlist->entries); 1715 } 1716 1717 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1718 struct perf_evsel *tracking_evsel) 1719 { 1720 struct perf_evsel *evsel; 1721 1722 if (tracking_evsel->tracking) 1723 return; 1724 1725 evlist__for_each_entry(evlist, evsel) { 1726 if (evsel != tracking_evsel) 1727 evsel->tracking = false; 1728 } 1729 1730 tracking_evsel->tracking = true; 1731 } 1732 1733 struct perf_evsel * 1734 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1735 const char *str) 1736 { 1737 struct perf_evsel *evsel; 1738 1739 evlist__for_each_entry(evlist, evsel) { 1740 if (!evsel->name) 1741 continue; 1742 if (strcmp(str, evsel->name) == 0) 1743 return evsel; 1744 } 1745 1746 return NULL; 1747 } 1748 1749 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1750 enum bkw_mmap_state state) 1751 { 1752 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1753 enum action { 1754 NONE, 1755 PAUSE, 1756 RESUME, 1757 } action = NONE; 1758 1759 if (!evlist->overwrite_mmap) 1760 return; 1761 1762 switch (old_state) { 1763 case BKW_MMAP_NOTREADY: { 1764 if (state != BKW_MMAP_RUNNING) 1765 goto state_err; 1766 break; 1767 } 1768 case BKW_MMAP_RUNNING: { 1769 if (state != BKW_MMAP_DATA_PENDING) 1770 goto state_err; 1771 action = PAUSE; 1772 break; 1773 } 1774 case BKW_MMAP_DATA_PENDING: { 1775 if (state != BKW_MMAP_EMPTY) 1776 goto state_err; 1777 break; 1778 } 1779 case BKW_MMAP_EMPTY: { 1780 if (state != BKW_MMAP_RUNNING) 1781 goto state_err; 1782 action = RESUME; 1783 break; 1784 } 1785 default: 1786 WARN_ONCE(1, "Shouldn't get there\n"); 1787 } 1788 1789 evlist->bkw_mmap_state = state; 1790 1791 switch (action) { 1792 case PAUSE: 1793 perf_evlist__pause(evlist); 1794 break; 1795 case RESUME: 1796 perf_evlist__resume(evlist); 1797 break; 1798 case NONE: 1799 default: 1800 break; 1801 } 1802 1803 state_err: 1804 return; 1805 } 1806 1807 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) 1808 { 1809 struct perf_evsel *evsel; 1810 1811 evlist__for_each_entry(evlist, evsel) { 1812 if (!evsel->attr.exclude_kernel) 1813 return false; 1814 } 1815 1816 return true; 1817 } 1818 1819 /* 1820 * Events in data file are not collect in groups, but we still want 1821 * the group display. Set the artificial group and set the leader's 1822 * forced_leader flag to notify the display code. 1823 */ 1824 void perf_evlist__force_leader(struct perf_evlist *evlist) 1825 { 1826 if (!evlist->nr_groups) { 1827 struct perf_evsel *leader = perf_evlist__first(evlist); 1828 1829 perf_evlist__set_leader(evlist); 1830 leader->forced_leader = true; 1831 } 1832 } 1833 1834 struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, 1835 struct perf_evsel *evsel) 1836 { 1837 struct perf_evsel *c2, *leader; 1838 bool is_open = true; 1839 1840 leader = evsel->leader; 1841 pr_debug("Weak group for %s/%d failed\n", 1842 leader->name, leader->nr_members); 1843 1844 /* 1845 * for_each_group_member doesn't work here because it doesn't 1846 * include the first entry. 1847 */ 1848 evlist__for_each_entry(evsel_list, c2) { 1849 if (c2 == evsel) 1850 is_open = false; 1851 if (c2->leader == leader) { 1852 if (is_open) 1853 perf_evsel__close(c2); 1854 c2->leader = c2; 1855 c2->nr_members = 0; 1856 } 1857 } 1858 return leader; 1859 } 1860 1861 int perf_evlist__add_sb_event(struct perf_evlist **evlist, 1862 struct perf_event_attr *attr, 1863 perf_evsel__sb_cb_t cb, 1864 void *data) 1865 { 1866 struct perf_evsel *evsel; 1867 bool new_evlist = (*evlist) == NULL; 1868 1869 if (*evlist == NULL) 1870 *evlist = perf_evlist__new(); 1871 if (*evlist == NULL) 1872 return -1; 1873 1874 if (!attr->sample_id_all) { 1875 pr_warning("enabling sample_id_all for all side band events\n"); 1876 attr->sample_id_all = 1; 1877 } 1878 1879 evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries); 1880 if (!evsel) 1881 goto out_err; 1882 1883 evsel->side_band.cb = cb; 1884 evsel->side_band.data = data; 1885 perf_evlist__add(*evlist, evsel); 1886 return 0; 1887 1888 out_err: 1889 if (new_evlist) { 1890 perf_evlist__delete(*evlist); 1891 *evlist = NULL; 1892 } 1893 return -1; 1894 } 1895 1896 static void *perf_evlist__poll_thread(void *arg) 1897 { 1898 struct perf_evlist *evlist = arg; 1899 bool draining = false; 1900 int i; 1901 1902 while (draining || !(evlist->thread.done)) { 1903 if (draining) 1904 draining = false; 1905 else if (evlist->thread.done) 1906 draining = true; 1907 1908 if (!draining) 1909 perf_evlist__poll(evlist, 1000); 1910 1911 for (i = 0; i < evlist->nr_mmaps; i++) { 1912 struct perf_mmap *map = &evlist->mmap[i]; 1913 union perf_event *event; 1914 1915 if (perf_mmap__read_init(map)) 1916 continue; 1917 while ((event = perf_mmap__read_event(map)) != NULL) { 1918 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1919 1920 if (evsel && evsel->side_band.cb) 1921 evsel->side_band.cb(event, evsel->side_band.data); 1922 else 1923 pr_warning("cannot locate proper evsel for the side band event\n"); 1924 1925 perf_mmap__consume(map); 1926 } 1927 perf_mmap__read_done(map); 1928 } 1929 } 1930 return NULL; 1931 } 1932 1933 int perf_evlist__start_sb_thread(struct perf_evlist *evlist, 1934 struct target *target) 1935 { 1936 struct perf_evsel *counter; 1937 1938 if (!evlist) 1939 return 0; 1940 1941 if (perf_evlist__create_maps(evlist, target)) 1942 goto out_delete_evlist; 1943 1944 evlist__for_each_entry(evlist, counter) { 1945 if (perf_evsel__open(counter, evlist->cpus, 1946 evlist->threads) < 0) 1947 goto out_delete_evlist; 1948 } 1949 1950 if (perf_evlist__mmap(evlist, UINT_MAX)) 1951 goto out_delete_evlist; 1952 1953 evlist__for_each_entry(evlist, counter) { 1954 if (perf_evsel__enable(counter)) 1955 goto out_delete_evlist; 1956 } 1957 1958 evlist->thread.done = 0; 1959 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1960 goto out_delete_evlist; 1961 1962 return 0; 1963 1964 out_delete_evlist: 1965 perf_evlist__delete(evlist); 1966 evlist = NULL; 1967 return -1; 1968 } 1969 1970 void perf_evlist__stop_sb_thread(struct perf_evlist *evlist) 1971 { 1972 if (!evlist) 1973 return; 1974 evlist->thread.done = 1; 1975 pthread_join(evlist->thread.th, NULL); 1976 perf_evlist__delete(evlist); 1977 } 1978