1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include "parse-options.h" 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 #include <linux/err.h> 29 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 32 33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 35 36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 37 struct thread_map *threads) 38 { 39 int i; 40 41 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 42 INIT_HLIST_HEAD(&evlist->heads[i]); 43 INIT_LIST_HEAD(&evlist->entries); 44 perf_evlist__set_maps(evlist, cpus, threads); 45 fdarray__init(&evlist->pollfd, 64); 46 evlist->workload.pid = -1; 47 } 48 49 struct perf_evlist *perf_evlist__new(void) 50 { 51 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 52 53 if (evlist != NULL) 54 perf_evlist__init(evlist, NULL, NULL); 55 56 return evlist; 57 } 58 59 struct perf_evlist *perf_evlist__new_default(void) 60 { 61 struct perf_evlist *evlist = perf_evlist__new(); 62 63 if (evlist && perf_evlist__add_default(evlist)) { 64 perf_evlist__delete(evlist); 65 evlist = NULL; 66 } 67 68 return evlist; 69 } 70 71 /** 72 * perf_evlist__set_id_pos - set the positions of event ids. 73 * @evlist: selected event list 74 * 75 * Events with compatible sample types all have the same id_pos 76 * and is_pos. For convenience, put a copy on evlist. 77 */ 78 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 79 { 80 struct perf_evsel *first = perf_evlist__first(evlist); 81 82 evlist->id_pos = first->id_pos; 83 evlist->is_pos = first->is_pos; 84 } 85 86 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 87 { 88 struct perf_evsel *evsel; 89 90 evlist__for_each(evlist, evsel) 91 perf_evsel__calc_id_pos(evsel); 92 93 perf_evlist__set_id_pos(evlist); 94 } 95 96 static void perf_evlist__purge(struct perf_evlist *evlist) 97 { 98 struct perf_evsel *pos, *n; 99 100 evlist__for_each_safe(evlist, n, pos) { 101 list_del_init(&pos->node); 102 pos->evlist = NULL; 103 perf_evsel__delete(pos); 104 } 105 106 evlist->nr_entries = 0; 107 } 108 109 void perf_evlist__exit(struct perf_evlist *evlist) 110 { 111 zfree(&evlist->mmap); 112 fdarray__exit(&evlist->pollfd); 113 } 114 115 void perf_evlist__delete(struct perf_evlist *evlist) 116 { 117 perf_evlist__munmap(evlist); 118 perf_evlist__close(evlist); 119 cpu_map__put(evlist->cpus); 120 thread_map__put(evlist->threads); 121 evlist->cpus = NULL; 122 evlist->threads = NULL; 123 perf_evlist__purge(evlist); 124 perf_evlist__exit(evlist); 125 free(evlist); 126 } 127 128 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 129 struct perf_evsel *evsel) 130 { 131 /* 132 * We already have cpus for evsel (via PMU sysfs) so 133 * keep it, if there's no target cpu list defined. 134 */ 135 if (!evsel->own_cpus || evlist->has_user_cpus) { 136 cpu_map__put(evsel->cpus); 137 evsel->cpus = cpu_map__get(evlist->cpus); 138 } else if (evsel->cpus != evsel->own_cpus) { 139 cpu_map__put(evsel->cpus); 140 evsel->cpus = cpu_map__get(evsel->own_cpus); 141 } 142 143 thread_map__put(evsel->threads); 144 evsel->threads = thread_map__get(evlist->threads); 145 } 146 147 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 148 { 149 struct perf_evsel *evsel; 150 151 evlist__for_each(evlist, evsel) 152 __perf_evlist__propagate_maps(evlist, evsel); 153 } 154 155 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 156 { 157 entry->evlist = evlist; 158 list_add_tail(&entry->node, &evlist->entries); 159 entry->idx = evlist->nr_entries; 160 entry->tracking = !entry->idx; 161 162 if (!evlist->nr_entries++) 163 perf_evlist__set_id_pos(evlist); 164 165 __perf_evlist__propagate_maps(evlist, entry); 166 } 167 168 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 169 { 170 evsel->evlist = NULL; 171 list_del_init(&evsel->node); 172 evlist->nr_entries -= 1; 173 } 174 175 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 176 struct list_head *list) 177 { 178 struct perf_evsel *evsel, *temp; 179 180 __evlist__for_each_safe(list, temp, evsel) { 181 list_del_init(&evsel->node); 182 perf_evlist__add(evlist, evsel); 183 } 184 } 185 186 void __perf_evlist__set_leader(struct list_head *list) 187 { 188 struct perf_evsel *evsel, *leader; 189 190 leader = list_entry(list->next, struct perf_evsel, node); 191 evsel = list_entry(list->prev, struct perf_evsel, node); 192 193 leader->nr_members = evsel->idx - leader->idx + 1; 194 195 __evlist__for_each(list, evsel) { 196 evsel->leader = leader; 197 } 198 } 199 200 void perf_evlist__set_leader(struct perf_evlist *evlist) 201 { 202 if (evlist->nr_entries) { 203 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 204 __perf_evlist__set_leader(&evlist->entries); 205 } 206 } 207 208 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 209 { 210 attr->precise_ip = 3; 211 212 while (attr->precise_ip != 0) { 213 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 214 if (fd != -1) { 215 close(fd); 216 break; 217 } 218 --attr->precise_ip; 219 } 220 } 221 222 int perf_evlist__add_default(struct perf_evlist *evlist) 223 { 224 struct perf_event_attr attr = { 225 .type = PERF_TYPE_HARDWARE, 226 .config = PERF_COUNT_HW_CPU_CYCLES, 227 }; 228 struct perf_evsel *evsel; 229 230 event_attr_init(&attr); 231 232 perf_event_attr__set_max_precise_ip(&attr); 233 234 evsel = perf_evsel__new(&attr); 235 if (evsel == NULL) 236 goto error; 237 238 /* use asprintf() because free(evsel) assumes name is allocated */ 239 if (asprintf(&evsel->name, "cycles%.*s", 240 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 241 goto error_free; 242 243 perf_evlist__add(evlist, evsel); 244 return 0; 245 error_free: 246 perf_evsel__delete(evsel); 247 error: 248 return -ENOMEM; 249 } 250 251 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 252 struct perf_event_attr *attrs, size_t nr_attrs) 253 { 254 struct perf_evsel *evsel, *n; 255 LIST_HEAD(head); 256 size_t i; 257 258 for (i = 0; i < nr_attrs; i++) { 259 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 260 if (evsel == NULL) 261 goto out_delete_partial_list; 262 list_add_tail(&evsel->node, &head); 263 } 264 265 perf_evlist__splice_list_tail(evlist, &head); 266 267 return 0; 268 269 out_delete_partial_list: 270 __evlist__for_each_safe(&head, n, evsel) 271 perf_evsel__delete(evsel); 272 return -1; 273 } 274 275 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 276 struct perf_event_attr *attrs, size_t nr_attrs) 277 { 278 size_t i; 279 280 for (i = 0; i < nr_attrs; i++) 281 event_attr_init(attrs + i); 282 283 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 284 } 285 286 struct perf_evsel * 287 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 288 { 289 struct perf_evsel *evsel; 290 291 evlist__for_each(evlist, evsel) { 292 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 293 (int)evsel->attr.config == id) 294 return evsel; 295 } 296 297 return NULL; 298 } 299 300 struct perf_evsel * 301 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 302 const char *name) 303 { 304 struct perf_evsel *evsel; 305 306 evlist__for_each(evlist, evsel) { 307 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 308 (strcmp(evsel->name, name) == 0)) 309 return evsel; 310 } 311 312 return NULL; 313 } 314 315 int perf_evlist__add_newtp(struct perf_evlist *evlist, 316 const char *sys, const char *name, void *handler) 317 { 318 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 319 320 if (IS_ERR(evsel)) 321 return -1; 322 323 evsel->handler = handler; 324 perf_evlist__add(evlist, evsel); 325 return 0; 326 } 327 328 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 329 struct perf_evsel *evsel) 330 { 331 if (evsel->system_wide) 332 return 1; 333 else 334 return thread_map__nr(evlist->threads); 335 } 336 337 void perf_evlist__disable(struct perf_evlist *evlist) 338 { 339 int cpu, thread; 340 struct perf_evsel *pos; 341 int nr_cpus = cpu_map__nr(evlist->cpus); 342 int nr_threads; 343 344 for (cpu = 0; cpu < nr_cpus; cpu++) { 345 evlist__for_each(evlist, pos) { 346 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 347 continue; 348 nr_threads = perf_evlist__nr_threads(evlist, pos); 349 for (thread = 0; thread < nr_threads; thread++) 350 ioctl(FD(pos, cpu, thread), 351 PERF_EVENT_IOC_DISABLE, 0); 352 } 353 } 354 355 evlist->enabled = false; 356 } 357 358 void perf_evlist__enable(struct perf_evlist *evlist) 359 { 360 int cpu, thread; 361 struct perf_evsel *pos; 362 int nr_cpus = cpu_map__nr(evlist->cpus); 363 int nr_threads; 364 365 for (cpu = 0; cpu < nr_cpus; cpu++) { 366 evlist__for_each(evlist, pos) { 367 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 368 continue; 369 nr_threads = perf_evlist__nr_threads(evlist, pos); 370 for (thread = 0; thread < nr_threads; thread++) 371 ioctl(FD(pos, cpu, thread), 372 PERF_EVENT_IOC_ENABLE, 0); 373 } 374 } 375 376 evlist->enabled = true; 377 } 378 379 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 380 { 381 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 382 } 383 384 int perf_evlist__disable_event(struct perf_evlist *evlist, 385 struct perf_evsel *evsel) 386 { 387 int cpu, thread, err; 388 int nr_cpus = cpu_map__nr(evlist->cpus); 389 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 390 391 if (!evsel->fd) 392 return 0; 393 394 for (cpu = 0; cpu < nr_cpus; cpu++) { 395 for (thread = 0; thread < nr_threads; thread++) { 396 err = ioctl(FD(evsel, cpu, thread), 397 PERF_EVENT_IOC_DISABLE, 0); 398 if (err) 399 return err; 400 } 401 } 402 return 0; 403 } 404 405 int perf_evlist__enable_event(struct perf_evlist *evlist, 406 struct perf_evsel *evsel) 407 { 408 int cpu, thread, err; 409 int nr_cpus = cpu_map__nr(evlist->cpus); 410 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 411 412 if (!evsel->fd) 413 return -EINVAL; 414 415 for (cpu = 0; cpu < nr_cpus; cpu++) { 416 for (thread = 0; thread < nr_threads; thread++) { 417 err = ioctl(FD(evsel, cpu, thread), 418 PERF_EVENT_IOC_ENABLE, 0); 419 if (err) 420 return err; 421 } 422 } 423 return 0; 424 } 425 426 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 427 struct perf_evsel *evsel, int cpu) 428 { 429 int thread, err; 430 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 431 432 if (!evsel->fd) 433 return -EINVAL; 434 435 for (thread = 0; thread < nr_threads; thread++) { 436 err = ioctl(FD(evsel, cpu, thread), 437 PERF_EVENT_IOC_ENABLE, 0); 438 if (err) 439 return err; 440 } 441 return 0; 442 } 443 444 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 445 struct perf_evsel *evsel, 446 int thread) 447 { 448 int cpu, err; 449 int nr_cpus = cpu_map__nr(evlist->cpus); 450 451 if (!evsel->fd) 452 return -EINVAL; 453 454 for (cpu = 0; cpu < nr_cpus; cpu++) { 455 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 456 if (err) 457 return err; 458 } 459 return 0; 460 } 461 462 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 463 struct perf_evsel *evsel, int idx) 464 { 465 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 466 467 if (per_cpu_mmaps) 468 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 469 else 470 return perf_evlist__enable_event_thread(evlist, evsel, idx); 471 } 472 473 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 474 { 475 int nr_cpus = cpu_map__nr(evlist->cpus); 476 int nr_threads = thread_map__nr(evlist->threads); 477 int nfds = 0; 478 struct perf_evsel *evsel; 479 480 evlist__for_each(evlist, evsel) { 481 if (evsel->system_wide) 482 nfds += nr_cpus; 483 else 484 nfds += nr_cpus * nr_threads; 485 } 486 487 if (fdarray__available_entries(&evlist->pollfd) < nfds && 488 fdarray__grow(&evlist->pollfd, nfds) < 0) 489 return -ENOMEM; 490 491 return 0; 492 } 493 494 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) 495 { 496 int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); 497 /* 498 * Save the idx so that when we filter out fds POLLHUP'ed we can 499 * close the associated evlist->mmap[] entry. 500 */ 501 if (pos >= 0) { 502 evlist->pollfd.priv[pos].idx = idx; 503 504 fcntl(fd, F_SETFL, O_NONBLOCK); 505 } 506 507 return pos; 508 } 509 510 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 511 { 512 return __perf_evlist__add_pollfd(evlist, fd, -1); 513 } 514 515 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) 516 { 517 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 518 519 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 520 } 521 522 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 523 { 524 return fdarray__filter(&evlist->pollfd, revents_and_mask, 525 perf_evlist__munmap_filtered); 526 } 527 528 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 529 { 530 return fdarray__poll(&evlist->pollfd, timeout); 531 } 532 533 static void perf_evlist__id_hash(struct perf_evlist *evlist, 534 struct perf_evsel *evsel, 535 int cpu, int thread, u64 id) 536 { 537 int hash; 538 struct perf_sample_id *sid = SID(evsel, cpu, thread); 539 540 sid->id = id; 541 sid->evsel = evsel; 542 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 543 hlist_add_head(&sid->node, &evlist->heads[hash]); 544 } 545 546 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 547 int cpu, int thread, u64 id) 548 { 549 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 550 evsel->id[evsel->ids++] = id; 551 } 552 553 static int perf_evlist__id_add_fd(struct perf_evlist *evlist, 554 struct perf_evsel *evsel, 555 int cpu, int thread, int fd) 556 { 557 u64 read_data[4] = { 0, }; 558 int id_idx = 1; /* The first entry is the counter value */ 559 u64 id; 560 int ret; 561 562 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 563 if (!ret) 564 goto add; 565 566 if (errno != ENOTTY) 567 return -1; 568 569 /* Legacy way to get event id.. All hail to old kernels! */ 570 571 /* 572 * This way does not work with group format read, so bail 573 * out in that case. 574 */ 575 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 576 return -1; 577 578 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 579 read(fd, &read_data, sizeof(read_data)) == -1) 580 return -1; 581 582 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 583 ++id_idx; 584 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 585 ++id_idx; 586 587 id = read_data[id_idx]; 588 589 add: 590 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 591 return 0; 592 } 593 594 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 595 struct perf_evsel *evsel, int idx, int cpu, 596 int thread) 597 { 598 struct perf_sample_id *sid = SID(evsel, cpu, thread); 599 sid->idx = idx; 600 if (evlist->cpus && cpu >= 0) 601 sid->cpu = evlist->cpus->map[cpu]; 602 else 603 sid->cpu = -1; 604 if (!evsel->system_wide && evlist->threads && thread >= 0) 605 sid->tid = thread_map__pid(evlist->threads, thread); 606 else 607 sid->tid = -1; 608 } 609 610 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 611 { 612 struct hlist_head *head; 613 struct perf_sample_id *sid; 614 int hash; 615 616 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 617 head = &evlist->heads[hash]; 618 619 hlist_for_each_entry(sid, head, node) 620 if (sid->id == id) 621 return sid; 622 623 return NULL; 624 } 625 626 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 627 { 628 struct perf_sample_id *sid; 629 630 if (evlist->nr_entries == 1 || !id) 631 return perf_evlist__first(evlist); 632 633 sid = perf_evlist__id2sid(evlist, id); 634 if (sid) 635 return sid->evsel; 636 637 if (!perf_evlist__sample_id_all(evlist)) 638 return perf_evlist__first(evlist); 639 640 return NULL; 641 } 642 643 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 644 u64 id) 645 { 646 struct perf_sample_id *sid; 647 648 if (!id) 649 return NULL; 650 651 sid = perf_evlist__id2sid(evlist, id); 652 if (sid) 653 return sid->evsel; 654 655 return NULL; 656 } 657 658 static int perf_evlist__event2id(struct perf_evlist *evlist, 659 union perf_event *event, u64 *id) 660 { 661 const u64 *array = event->sample.array; 662 ssize_t n; 663 664 n = (event->header.size - sizeof(event->header)) >> 3; 665 666 if (event->header.type == PERF_RECORD_SAMPLE) { 667 if (evlist->id_pos >= n) 668 return -1; 669 *id = array[evlist->id_pos]; 670 } else { 671 if (evlist->is_pos > n) 672 return -1; 673 n -= evlist->is_pos; 674 *id = array[n]; 675 } 676 return 0; 677 } 678 679 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 680 union perf_event *event) 681 { 682 struct perf_evsel *first = perf_evlist__first(evlist); 683 struct hlist_head *head; 684 struct perf_sample_id *sid; 685 int hash; 686 u64 id; 687 688 if (evlist->nr_entries == 1) 689 return first; 690 691 if (!first->attr.sample_id_all && 692 event->header.type != PERF_RECORD_SAMPLE) 693 return first; 694 695 if (perf_evlist__event2id(evlist, event, &id)) 696 return NULL; 697 698 /* Synthesized events have an id of zero */ 699 if (!id) 700 return first; 701 702 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 703 head = &evlist->heads[hash]; 704 705 hlist_for_each_entry(sid, head, node) { 706 if (sid->id == id) 707 return sid->evsel; 708 } 709 return NULL; 710 } 711 712 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 713 { 714 struct perf_mmap *md = &evlist->mmap[idx]; 715 u64 head; 716 u64 old = md->prev; 717 unsigned char *data = md->base + page_size; 718 union perf_event *event = NULL; 719 720 /* 721 * Check if event was unmapped due to a POLLHUP/POLLERR. 722 */ 723 if (!atomic_read(&md->refcnt)) 724 return NULL; 725 726 head = perf_mmap__read_head(md); 727 if (evlist->overwrite) { 728 /* 729 * If we're further behind than half the buffer, there's a chance 730 * the writer will bite our tail and mess up the samples under us. 731 * 732 * If we somehow ended up ahead of the head, we got messed up. 733 * 734 * In either case, truncate and restart at head. 735 */ 736 int diff = head - old; 737 if (diff > md->mask / 2 || diff < 0) { 738 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 739 740 /* 741 * head points to a known good entry, start there. 742 */ 743 old = head; 744 } 745 } 746 747 if (old != head) { 748 size_t size; 749 750 event = (union perf_event *)&data[old & md->mask]; 751 size = event->header.size; 752 753 /* 754 * Event straddles the mmap boundary -- header should always 755 * be inside due to u64 alignment of output. 756 */ 757 if ((old & md->mask) + size != ((old + size) & md->mask)) { 758 unsigned int offset = old; 759 unsigned int len = min(sizeof(*event), size), cpy; 760 void *dst = md->event_copy; 761 762 do { 763 cpy = min(md->mask + 1 - (offset & md->mask), len); 764 memcpy(dst, &data[offset & md->mask], cpy); 765 offset += cpy; 766 dst += cpy; 767 len -= cpy; 768 } while (len); 769 770 event = (union perf_event *) md->event_copy; 771 } 772 773 old += size; 774 } 775 776 md->prev = old; 777 778 return event; 779 } 780 781 static bool perf_mmap__empty(struct perf_mmap *md) 782 { 783 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 784 } 785 786 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 787 { 788 atomic_inc(&evlist->mmap[idx].refcnt); 789 } 790 791 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 792 { 793 BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); 794 795 if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) 796 __perf_evlist__munmap(evlist, idx); 797 } 798 799 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 800 { 801 struct perf_mmap *md = &evlist->mmap[idx]; 802 803 if (!evlist->overwrite) { 804 u64 old = md->prev; 805 806 perf_mmap__write_tail(md, old); 807 } 808 809 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 810 perf_evlist__mmap_put(evlist, idx); 811 } 812 813 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 814 struct auxtrace_mmap_params *mp __maybe_unused, 815 void *userpg __maybe_unused, 816 int fd __maybe_unused) 817 { 818 return 0; 819 } 820 821 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 822 { 823 } 824 825 void __weak auxtrace_mmap_params__init( 826 struct auxtrace_mmap_params *mp __maybe_unused, 827 off_t auxtrace_offset __maybe_unused, 828 unsigned int auxtrace_pages __maybe_unused, 829 bool auxtrace_overwrite __maybe_unused) 830 { 831 } 832 833 void __weak auxtrace_mmap_params__set_idx( 834 struct auxtrace_mmap_params *mp __maybe_unused, 835 struct perf_evlist *evlist __maybe_unused, 836 int idx __maybe_unused, 837 bool per_cpu __maybe_unused) 838 { 839 } 840 841 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 842 { 843 if (evlist->mmap[idx].base != NULL) { 844 munmap(evlist->mmap[idx].base, evlist->mmap_len); 845 evlist->mmap[idx].base = NULL; 846 atomic_set(&evlist->mmap[idx].refcnt, 0); 847 } 848 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 849 } 850 851 void perf_evlist__munmap(struct perf_evlist *evlist) 852 { 853 int i; 854 855 if (evlist->mmap == NULL) 856 return; 857 858 for (i = 0; i < evlist->nr_mmaps; i++) 859 __perf_evlist__munmap(evlist, i); 860 861 zfree(&evlist->mmap); 862 } 863 864 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 865 { 866 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 867 if (cpu_map__empty(evlist->cpus)) 868 evlist->nr_mmaps = thread_map__nr(evlist->threads); 869 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 870 return evlist->mmap != NULL ? 0 : -ENOMEM; 871 } 872 873 struct mmap_params { 874 int prot; 875 int mask; 876 struct auxtrace_mmap_params auxtrace_mp; 877 }; 878 879 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 880 struct mmap_params *mp, int fd) 881 { 882 /* 883 * The last one will be done at perf_evlist__mmap_consume(), so that we 884 * make sure we don't prevent tools from consuming every last event in 885 * the ring buffer. 886 * 887 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 888 * anymore, but the last events for it are still in the ring buffer, 889 * waiting to be consumed. 890 * 891 * Tools can chose to ignore this at their own discretion, but the 892 * evlist layer can't just drop it when filtering events in 893 * perf_evlist__filter_pollfd(). 894 */ 895 atomic_set(&evlist->mmap[idx].refcnt, 2); 896 evlist->mmap[idx].prev = 0; 897 evlist->mmap[idx].mask = mp->mask; 898 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 899 MAP_SHARED, fd, 0); 900 if (evlist->mmap[idx].base == MAP_FAILED) { 901 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 902 errno); 903 evlist->mmap[idx].base = NULL; 904 return -1; 905 } 906 907 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 908 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 909 return -1; 910 911 return 0; 912 } 913 914 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 915 struct mmap_params *mp, int cpu, 916 int thread, int *output) 917 { 918 struct perf_evsel *evsel; 919 920 evlist__for_each(evlist, evsel) { 921 int fd; 922 923 if (evsel->system_wide && thread) 924 continue; 925 926 fd = FD(evsel, cpu, thread); 927 928 if (*output == -1) { 929 *output = fd; 930 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 931 return -1; 932 } else { 933 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 934 return -1; 935 936 perf_evlist__mmap_get(evlist, idx); 937 } 938 939 /* 940 * The system_wide flag causes a selected event to be opened 941 * always without a pid. Consequently it will never get a 942 * POLLHUP, but it is used for tracking in combination with 943 * other events, so it should not need to be polled anyway. 944 * Therefore don't add it for polling. 945 */ 946 if (!evsel->system_wide && 947 __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { 948 perf_evlist__mmap_put(evlist, idx); 949 return -1; 950 } 951 952 if (evsel->attr.read_format & PERF_FORMAT_ID) { 953 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 954 fd) < 0) 955 return -1; 956 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 957 thread); 958 } 959 } 960 961 return 0; 962 } 963 964 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 965 struct mmap_params *mp) 966 { 967 int cpu, thread; 968 int nr_cpus = cpu_map__nr(evlist->cpus); 969 int nr_threads = thread_map__nr(evlist->threads); 970 971 pr_debug2("perf event ring buffer mmapped per cpu\n"); 972 for (cpu = 0; cpu < nr_cpus; cpu++) { 973 int output = -1; 974 975 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 976 true); 977 978 for (thread = 0; thread < nr_threads; thread++) { 979 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 980 thread, &output)) 981 goto out_unmap; 982 } 983 } 984 985 return 0; 986 987 out_unmap: 988 for (cpu = 0; cpu < nr_cpus; cpu++) 989 __perf_evlist__munmap(evlist, cpu); 990 return -1; 991 } 992 993 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 994 struct mmap_params *mp) 995 { 996 int thread; 997 int nr_threads = thread_map__nr(evlist->threads); 998 999 pr_debug2("perf event ring buffer mmapped per thread\n"); 1000 for (thread = 0; thread < nr_threads; thread++) { 1001 int output = -1; 1002 1003 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1004 false); 1005 1006 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1007 &output)) 1008 goto out_unmap; 1009 } 1010 1011 return 0; 1012 1013 out_unmap: 1014 for (thread = 0; thread < nr_threads; thread++) 1015 __perf_evlist__munmap(evlist, thread); 1016 return -1; 1017 } 1018 1019 static size_t perf_evlist__mmap_size(unsigned long pages) 1020 { 1021 if (pages == UINT_MAX) { 1022 int max; 1023 1024 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1025 /* 1026 * Pick a once upon a time good value, i.e. things look 1027 * strange since we can't read a sysctl value, but lets not 1028 * die yet... 1029 */ 1030 max = 512; 1031 } else { 1032 max -= (page_size / 1024); 1033 } 1034 1035 pages = (max * 1024) / page_size; 1036 if (!is_power_of_2(pages)) 1037 pages = rounddown_pow_of_two(pages); 1038 } else if (!is_power_of_2(pages)) 1039 return 0; 1040 1041 return (pages + 1) * page_size; 1042 } 1043 1044 static long parse_pages_arg(const char *str, unsigned long min, 1045 unsigned long max) 1046 { 1047 unsigned long pages, val; 1048 static struct parse_tag tags[] = { 1049 { .tag = 'B', .mult = 1 }, 1050 { .tag = 'K', .mult = 1 << 10 }, 1051 { .tag = 'M', .mult = 1 << 20 }, 1052 { .tag = 'G', .mult = 1 << 30 }, 1053 { .tag = 0 }, 1054 }; 1055 1056 if (str == NULL) 1057 return -EINVAL; 1058 1059 val = parse_tag_value(str, tags); 1060 if (val != (unsigned long) -1) { 1061 /* we got file size value */ 1062 pages = PERF_ALIGN(val, page_size) / page_size; 1063 } else { 1064 /* we got pages count value */ 1065 char *eptr; 1066 pages = strtoul(str, &eptr, 10); 1067 if (*eptr != '\0') 1068 return -EINVAL; 1069 } 1070 1071 if (pages == 0 && min == 0) { 1072 /* leave number of pages at 0 */ 1073 } else if (!is_power_of_2(pages)) { 1074 /* round pages up to next power of 2 */ 1075 pages = roundup_pow_of_two(pages); 1076 if (!pages) 1077 return -EINVAL; 1078 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1079 pages * page_size, pages); 1080 } 1081 1082 if (pages > max) 1083 return -EINVAL; 1084 1085 return pages; 1086 } 1087 1088 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1089 { 1090 unsigned long max = UINT_MAX; 1091 long pages; 1092 1093 if (max > SIZE_MAX / page_size) 1094 max = SIZE_MAX / page_size; 1095 1096 pages = parse_pages_arg(str, 1, max); 1097 if (pages < 0) { 1098 pr_err("Invalid argument for --mmap_pages/-m\n"); 1099 return -1; 1100 } 1101 1102 *mmap_pages = pages; 1103 return 0; 1104 } 1105 1106 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1107 int unset __maybe_unused) 1108 { 1109 return __perf_evlist__parse_mmap_pages(opt->value, str); 1110 } 1111 1112 /** 1113 * perf_evlist__mmap_ex - Create mmaps to receive events. 1114 * @evlist: list of events 1115 * @pages: map length in pages 1116 * @overwrite: overwrite older events? 1117 * @auxtrace_pages - auxtrace map length in pages 1118 * @auxtrace_overwrite - overwrite older auxtrace data? 1119 * 1120 * If @overwrite is %false the user needs to signal event consumption using 1121 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1122 * automatically. 1123 * 1124 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1125 * consumption using auxtrace_mmap__write_tail(). 1126 * 1127 * Return: %0 on success, negative error code otherwise. 1128 */ 1129 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1130 bool overwrite, unsigned int auxtrace_pages, 1131 bool auxtrace_overwrite) 1132 { 1133 struct perf_evsel *evsel; 1134 const struct cpu_map *cpus = evlist->cpus; 1135 const struct thread_map *threads = evlist->threads; 1136 struct mmap_params mp = { 1137 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1138 }; 1139 1140 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1141 return -ENOMEM; 1142 1143 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1144 return -ENOMEM; 1145 1146 evlist->overwrite = overwrite; 1147 evlist->mmap_len = perf_evlist__mmap_size(pages); 1148 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1149 mp.mask = evlist->mmap_len - page_size - 1; 1150 1151 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1152 auxtrace_pages, auxtrace_overwrite); 1153 1154 evlist__for_each(evlist, evsel) { 1155 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1156 evsel->sample_id == NULL && 1157 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1158 return -ENOMEM; 1159 } 1160 1161 if (cpu_map__empty(cpus)) 1162 return perf_evlist__mmap_per_thread(evlist, &mp); 1163 1164 return perf_evlist__mmap_per_cpu(evlist, &mp); 1165 } 1166 1167 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1168 bool overwrite) 1169 { 1170 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1171 } 1172 1173 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1174 { 1175 struct cpu_map *cpus; 1176 struct thread_map *threads; 1177 1178 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1179 1180 if (!threads) 1181 return -1; 1182 1183 if (target__uses_dummy_map(target)) 1184 cpus = cpu_map__dummy_new(); 1185 else 1186 cpus = cpu_map__new(target->cpu_list); 1187 1188 if (!cpus) 1189 goto out_delete_threads; 1190 1191 evlist->has_user_cpus = !!target->cpu_list; 1192 1193 perf_evlist__set_maps(evlist, cpus, threads); 1194 1195 return 0; 1196 1197 out_delete_threads: 1198 thread_map__put(threads); 1199 return -1; 1200 } 1201 1202 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1203 struct thread_map *threads) 1204 { 1205 /* 1206 * Allow for the possibility that one or another of the maps isn't being 1207 * changed i.e. don't put it. Note we are assuming the maps that are 1208 * being applied are brand new and evlist is taking ownership of the 1209 * original reference count of 1. If that is not the case it is up to 1210 * the caller to increase the reference count. 1211 */ 1212 if (cpus != evlist->cpus) { 1213 cpu_map__put(evlist->cpus); 1214 evlist->cpus = cpus; 1215 } 1216 1217 if (threads != evlist->threads) { 1218 thread_map__put(evlist->threads); 1219 evlist->threads = threads; 1220 } 1221 1222 perf_evlist__propagate_maps(evlist); 1223 } 1224 1225 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1226 { 1227 struct perf_evsel *evsel; 1228 int err = 0; 1229 const int ncpus = cpu_map__nr(evlist->cpus), 1230 nthreads = thread_map__nr(evlist->threads); 1231 1232 evlist__for_each(evlist, evsel) { 1233 if (evsel->filter == NULL) 1234 continue; 1235 1236 /* 1237 * filters only work for tracepoint event, which doesn't have cpu limit. 1238 * So evlist and evsel should always be same. 1239 */ 1240 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1241 if (err) { 1242 *err_evsel = evsel; 1243 break; 1244 } 1245 } 1246 1247 return err; 1248 } 1249 1250 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1251 { 1252 struct perf_evsel *evsel; 1253 int err = 0; 1254 1255 evlist__for_each(evlist, evsel) { 1256 err = perf_evsel__set_filter(evsel, filter); 1257 if (err) 1258 break; 1259 } 1260 1261 return err; 1262 } 1263 1264 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1265 { 1266 char *filter; 1267 int ret = -1; 1268 size_t i; 1269 1270 for (i = 0; i < npids; ++i) { 1271 if (i == 0) { 1272 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1273 return -1; 1274 } else { 1275 char *tmp; 1276 1277 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1278 goto out_free; 1279 1280 free(filter); 1281 filter = tmp; 1282 } 1283 } 1284 1285 ret = perf_evlist__set_filter(evlist, filter); 1286 out_free: 1287 free(filter); 1288 return ret; 1289 } 1290 1291 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1292 { 1293 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1294 } 1295 1296 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1297 { 1298 struct perf_evsel *pos; 1299 1300 if (evlist->nr_entries == 1) 1301 return true; 1302 1303 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1304 return false; 1305 1306 evlist__for_each(evlist, pos) { 1307 if (pos->id_pos != evlist->id_pos || 1308 pos->is_pos != evlist->is_pos) 1309 return false; 1310 } 1311 1312 return true; 1313 } 1314 1315 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1316 { 1317 struct perf_evsel *evsel; 1318 1319 if (evlist->combined_sample_type) 1320 return evlist->combined_sample_type; 1321 1322 evlist__for_each(evlist, evsel) 1323 evlist->combined_sample_type |= evsel->attr.sample_type; 1324 1325 return evlist->combined_sample_type; 1326 } 1327 1328 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1329 { 1330 evlist->combined_sample_type = 0; 1331 return __perf_evlist__combined_sample_type(evlist); 1332 } 1333 1334 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1335 { 1336 struct perf_evsel *evsel; 1337 u64 branch_type = 0; 1338 1339 evlist__for_each(evlist, evsel) 1340 branch_type |= evsel->attr.branch_sample_type; 1341 return branch_type; 1342 } 1343 1344 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1345 { 1346 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1347 u64 read_format = first->attr.read_format; 1348 u64 sample_type = first->attr.sample_type; 1349 1350 evlist__for_each(evlist, pos) { 1351 if (read_format != pos->attr.read_format) 1352 return false; 1353 } 1354 1355 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1356 if ((sample_type & PERF_SAMPLE_READ) && 1357 !(read_format & PERF_FORMAT_ID)) { 1358 return false; 1359 } 1360 1361 return true; 1362 } 1363 1364 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1365 { 1366 struct perf_evsel *first = perf_evlist__first(evlist); 1367 return first->attr.read_format; 1368 } 1369 1370 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1371 { 1372 struct perf_evsel *first = perf_evlist__first(evlist); 1373 struct perf_sample *data; 1374 u64 sample_type; 1375 u16 size = 0; 1376 1377 if (!first->attr.sample_id_all) 1378 goto out; 1379 1380 sample_type = first->attr.sample_type; 1381 1382 if (sample_type & PERF_SAMPLE_TID) 1383 size += sizeof(data->tid) * 2; 1384 1385 if (sample_type & PERF_SAMPLE_TIME) 1386 size += sizeof(data->time); 1387 1388 if (sample_type & PERF_SAMPLE_ID) 1389 size += sizeof(data->id); 1390 1391 if (sample_type & PERF_SAMPLE_STREAM_ID) 1392 size += sizeof(data->stream_id); 1393 1394 if (sample_type & PERF_SAMPLE_CPU) 1395 size += sizeof(data->cpu) * 2; 1396 1397 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1398 size += sizeof(data->id); 1399 out: 1400 return size; 1401 } 1402 1403 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1404 { 1405 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1406 1407 evlist__for_each_continue(evlist, pos) { 1408 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1409 return false; 1410 } 1411 1412 return true; 1413 } 1414 1415 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1416 { 1417 struct perf_evsel *first = perf_evlist__first(evlist); 1418 return first->attr.sample_id_all; 1419 } 1420 1421 void perf_evlist__set_selected(struct perf_evlist *evlist, 1422 struct perf_evsel *evsel) 1423 { 1424 evlist->selected = evsel; 1425 } 1426 1427 void perf_evlist__close(struct perf_evlist *evlist) 1428 { 1429 struct perf_evsel *evsel; 1430 int ncpus = cpu_map__nr(evlist->cpus); 1431 int nthreads = thread_map__nr(evlist->threads); 1432 int n; 1433 1434 evlist__for_each_reverse(evlist, evsel) { 1435 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1436 perf_evsel__close(evsel, n, nthreads); 1437 } 1438 } 1439 1440 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1441 { 1442 struct cpu_map *cpus; 1443 struct thread_map *threads; 1444 int err = -ENOMEM; 1445 1446 /* 1447 * Try reading /sys/devices/system/cpu/online to get 1448 * an all cpus map. 1449 * 1450 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1451 * code needs an overhaul to properly forward the 1452 * error, and we may not want to do that fallback to a 1453 * default cpu identity map :-\ 1454 */ 1455 cpus = cpu_map__new(NULL); 1456 if (!cpus) 1457 goto out; 1458 1459 threads = thread_map__new_dummy(); 1460 if (!threads) 1461 goto out_put; 1462 1463 perf_evlist__set_maps(evlist, cpus, threads); 1464 out: 1465 return err; 1466 out_put: 1467 cpu_map__put(cpus); 1468 goto out; 1469 } 1470 1471 int perf_evlist__open(struct perf_evlist *evlist) 1472 { 1473 struct perf_evsel *evsel; 1474 int err; 1475 1476 /* 1477 * Default: one fd per CPU, all threads, aka systemwide 1478 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1479 */ 1480 if (evlist->threads == NULL && evlist->cpus == NULL) { 1481 err = perf_evlist__create_syswide_maps(evlist); 1482 if (err < 0) 1483 goto out_err; 1484 } 1485 1486 perf_evlist__update_id_pos(evlist); 1487 1488 evlist__for_each(evlist, evsel) { 1489 err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); 1490 if (err < 0) 1491 goto out_err; 1492 } 1493 1494 return 0; 1495 out_err: 1496 perf_evlist__close(evlist); 1497 errno = -err; 1498 return err; 1499 } 1500 1501 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1502 const char *argv[], bool pipe_output, 1503 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1504 { 1505 int child_ready_pipe[2], go_pipe[2]; 1506 char bf; 1507 1508 if (pipe(child_ready_pipe) < 0) { 1509 perror("failed to create 'ready' pipe"); 1510 return -1; 1511 } 1512 1513 if (pipe(go_pipe) < 0) { 1514 perror("failed to create 'go' pipe"); 1515 goto out_close_ready_pipe; 1516 } 1517 1518 evlist->workload.pid = fork(); 1519 if (evlist->workload.pid < 0) { 1520 perror("failed to fork"); 1521 goto out_close_pipes; 1522 } 1523 1524 if (!evlist->workload.pid) { 1525 int ret; 1526 1527 if (pipe_output) 1528 dup2(2, 1); 1529 1530 signal(SIGTERM, SIG_DFL); 1531 1532 close(child_ready_pipe[0]); 1533 close(go_pipe[1]); 1534 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1535 1536 /* 1537 * Tell the parent we're ready to go 1538 */ 1539 close(child_ready_pipe[1]); 1540 1541 /* 1542 * Wait until the parent tells us to go. 1543 */ 1544 ret = read(go_pipe[0], &bf, 1); 1545 /* 1546 * The parent will ask for the execvp() to be performed by 1547 * writing exactly one byte, in workload.cork_fd, usually via 1548 * perf_evlist__start_workload(). 1549 * 1550 * For cancelling the workload without actually running it, 1551 * the parent will just close workload.cork_fd, without writing 1552 * anything, i.e. read will return zero and we just exit() 1553 * here. 1554 */ 1555 if (ret != 1) { 1556 if (ret == -1) 1557 perror("unable to read pipe"); 1558 exit(ret); 1559 } 1560 1561 execvp(argv[0], (char **)argv); 1562 1563 if (exec_error) { 1564 union sigval val; 1565 1566 val.sival_int = errno; 1567 if (sigqueue(getppid(), SIGUSR1, val)) 1568 perror(argv[0]); 1569 } else 1570 perror(argv[0]); 1571 exit(-1); 1572 } 1573 1574 if (exec_error) { 1575 struct sigaction act = { 1576 .sa_flags = SA_SIGINFO, 1577 .sa_sigaction = exec_error, 1578 }; 1579 sigaction(SIGUSR1, &act, NULL); 1580 } 1581 1582 if (target__none(target)) { 1583 if (evlist->threads == NULL) { 1584 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1585 __func__, __LINE__); 1586 goto out_close_pipes; 1587 } 1588 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1589 } 1590 1591 close(child_ready_pipe[1]); 1592 close(go_pipe[0]); 1593 /* 1594 * wait for child to settle 1595 */ 1596 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1597 perror("unable to read pipe"); 1598 goto out_close_pipes; 1599 } 1600 1601 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1602 evlist->workload.cork_fd = go_pipe[1]; 1603 close(child_ready_pipe[0]); 1604 return 0; 1605 1606 out_close_pipes: 1607 close(go_pipe[0]); 1608 close(go_pipe[1]); 1609 out_close_ready_pipe: 1610 close(child_ready_pipe[0]); 1611 close(child_ready_pipe[1]); 1612 return -1; 1613 } 1614 1615 int perf_evlist__start_workload(struct perf_evlist *evlist) 1616 { 1617 if (evlist->workload.cork_fd > 0) { 1618 char bf = 0; 1619 int ret; 1620 /* 1621 * Remove the cork, let it rip! 1622 */ 1623 ret = write(evlist->workload.cork_fd, &bf, 1); 1624 if (ret < 0) 1625 perror("enable to write to pipe"); 1626 1627 close(evlist->workload.cork_fd); 1628 return ret; 1629 } 1630 1631 return 0; 1632 } 1633 1634 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1635 struct perf_sample *sample) 1636 { 1637 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1638 1639 if (!evsel) 1640 return -EFAULT; 1641 return perf_evsel__parse_sample(evsel, event, sample); 1642 } 1643 1644 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1645 { 1646 struct perf_evsel *evsel; 1647 size_t printed = 0; 1648 1649 evlist__for_each(evlist, evsel) { 1650 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1651 perf_evsel__name(evsel)); 1652 } 1653 1654 return printed + fprintf(fp, "\n"); 1655 } 1656 1657 int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, 1658 int err, char *buf, size_t size) 1659 { 1660 int printed, value; 1661 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1662 1663 switch (err) { 1664 case EACCES: 1665 case EPERM: 1666 printed = scnprintf(buf, size, 1667 "Error:\t%s.\n" 1668 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1669 1670 value = perf_event_paranoid(); 1671 1672 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1673 1674 if (value >= 2) { 1675 printed += scnprintf(buf + printed, size - printed, 1676 "For your workloads it needs to be <= 1\nHint:\t"); 1677 } 1678 printed += scnprintf(buf + printed, size - printed, 1679 "For system wide tracing it needs to be set to -1.\n"); 1680 1681 printed += scnprintf(buf + printed, size - printed, 1682 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1683 "Hint:\tThe current value is %d.", value); 1684 break; 1685 default: 1686 scnprintf(buf, size, "%s", emsg); 1687 break; 1688 } 1689 1690 return 0; 1691 } 1692 1693 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1694 { 1695 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1696 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1697 1698 switch (err) { 1699 case EPERM: 1700 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1701 printed += scnprintf(buf + printed, size - printed, 1702 "Error:\t%s.\n" 1703 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1704 "Hint:\tTried using %zd kB.\n", 1705 emsg, pages_max_per_user, pages_attempted); 1706 1707 if (pages_attempted >= pages_max_per_user) { 1708 printed += scnprintf(buf + printed, size - printed, 1709 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1710 pages_max_per_user + pages_attempted); 1711 } 1712 1713 printed += scnprintf(buf + printed, size - printed, 1714 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1715 break; 1716 default: 1717 scnprintf(buf, size, "%s", emsg); 1718 break; 1719 } 1720 1721 return 0; 1722 } 1723 1724 void perf_evlist__to_front(struct perf_evlist *evlist, 1725 struct perf_evsel *move_evsel) 1726 { 1727 struct perf_evsel *evsel, *n; 1728 LIST_HEAD(move); 1729 1730 if (move_evsel == perf_evlist__first(evlist)) 1731 return; 1732 1733 evlist__for_each_safe(evlist, n, evsel) { 1734 if (evsel->leader == move_evsel->leader) 1735 list_move_tail(&evsel->node, &move); 1736 } 1737 1738 list_splice(&move, &evlist->entries); 1739 } 1740 1741 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1742 struct perf_evsel *tracking_evsel) 1743 { 1744 struct perf_evsel *evsel; 1745 1746 if (tracking_evsel->tracking) 1747 return; 1748 1749 evlist__for_each(evlist, evsel) { 1750 if (evsel != tracking_evsel) 1751 evsel->tracking = false; 1752 } 1753 1754 tracking_evsel->tracking = true; 1755 } 1756