1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include "parse-options.h" 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 29 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 30 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 31 32 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 33 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 34 35 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 36 struct thread_map *threads) 37 { 38 int i; 39 40 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 41 INIT_HLIST_HEAD(&evlist->heads[i]); 42 INIT_LIST_HEAD(&evlist->entries); 43 perf_evlist__set_maps(evlist, cpus, threads); 44 fdarray__init(&evlist->pollfd, 64); 45 evlist->workload.pid = -1; 46 } 47 48 struct perf_evlist *perf_evlist__new(void) 49 { 50 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 51 52 if (evlist != NULL) 53 perf_evlist__init(evlist, NULL, NULL); 54 55 return evlist; 56 } 57 58 struct perf_evlist *perf_evlist__new_default(void) 59 { 60 struct perf_evlist *evlist = perf_evlist__new(); 61 62 if (evlist && perf_evlist__add_default(evlist)) { 63 perf_evlist__delete(evlist); 64 evlist = NULL; 65 } 66 67 return evlist; 68 } 69 70 /** 71 * perf_evlist__set_id_pos - set the positions of event ids. 72 * @evlist: selected event list 73 * 74 * Events with compatible sample types all have the same id_pos 75 * and is_pos. For convenience, put a copy on evlist. 76 */ 77 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 78 { 79 struct perf_evsel *first = perf_evlist__first(evlist); 80 81 evlist->id_pos = first->id_pos; 82 evlist->is_pos = first->is_pos; 83 } 84 85 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 86 { 87 struct perf_evsel *evsel; 88 89 evlist__for_each(evlist, evsel) 90 perf_evsel__calc_id_pos(evsel); 91 92 perf_evlist__set_id_pos(evlist); 93 } 94 95 static void perf_evlist__purge(struct perf_evlist *evlist) 96 { 97 struct perf_evsel *pos, *n; 98 99 evlist__for_each_safe(evlist, n, pos) { 100 list_del_init(&pos->node); 101 pos->evlist = NULL; 102 perf_evsel__delete(pos); 103 } 104 105 evlist->nr_entries = 0; 106 } 107 108 void perf_evlist__exit(struct perf_evlist *evlist) 109 { 110 zfree(&evlist->mmap); 111 fdarray__exit(&evlist->pollfd); 112 } 113 114 void perf_evlist__delete(struct perf_evlist *evlist) 115 { 116 perf_evlist__munmap(evlist); 117 perf_evlist__close(evlist); 118 cpu_map__put(evlist->cpus); 119 thread_map__put(evlist->threads); 120 evlist->cpus = NULL; 121 evlist->threads = NULL; 122 perf_evlist__purge(evlist); 123 perf_evlist__exit(evlist); 124 free(evlist); 125 } 126 127 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 128 struct perf_evsel *evsel) 129 { 130 /* 131 * We already have cpus for evsel (via PMU sysfs) so 132 * keep it, if there's no target cpu list defined. 133 */ 134 if (!evsel->own_cpus || evlist->has_user_cpus) { 135 cpu_map__put(evsel->cpus); 136 evsel->cpus = cpu_map__get(evlist->cpus); 137 } else if (evsel->cpus != evsel->own_cpus) { 138 cpu_map__put(evsel->cpus); 139 evsel->cpus = cpu_map__get(evsel->own_cpus); 140 } 141 142 thread_map__put(evsel->threads); 143 evsel->threads = thread_map__get(evlist->threads); 144 } 145 146 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 147 { 148 struct perf_evsel *evsel; 149 150 evlist__for_each(evlist, evsel) 151 __perf_evlist__propagate_maps(evlist, evsel); 152 } 153 154 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 155 { 156 entry->evlist = evlist; 157 list_add_tail(&entry->node, &evlist->entries); 158 entry->idx = evlist->nr_entries; 159 entry->tracking = !entry->idx; 160 161 if (!evlist->nr_entries++) 162 perf_evlist__set_id_pos(evlist); 163 164 __perf_evlist__propagate_maps(evlist, entry); 165 } 166 167 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 168 struct list_head *list) 169 { 170 struct perf_evsel *evsel, *temp; 171 172 __evlist__for_each_safe(list, temp, evsel) { 173 list_del_init(&evsel->node); 174 perf_evlist__add(evlist, evsel); 175 } 176 } 177 178 void __perf_evlist__set_leader(struct list_head *list) 179 { 180 struct perf_evsel *evsel, *leader; 181 182 leader = list_entry(list->next, struct perf_evsel, node); 183 evsel = list_entry(list->prev, struct perf_evsel, node); 184 185 leader->nr_members = evsel->idx - leader->idx + 1; 186 187 __evlist__for_each(list, evsel) { 188 evsel->leader = leader; 189 } 190 } 191 192 void perf_evlist__set_leader(struct perf_evlist *evlist) 193 { 194 if (evlist->nr_entries) { 195 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 196 __perf_evlist__set_leader(&evlist->entries); 197 } 198 } 199 200 int perf_evlist__add_default(struct perf_evlist *evlist) 201 { 202 struct perf_event_attr attr = { 203 .type = PERF_TYPE_HARDWARE, 204 .config = PERF_COUNT_HW_CPU_CYCLES, 205 }; 206 struct perf_evsel *evsel; 207 208 event_attr_init(&attr); 209 210 evsel = perf_evsel__new(&attr); 211 if (evsel == NULL) 212 goto error; 213 214 /* use strdup() because free(evsel) assumes name is allocated */ 215 evsel->name = strdup("cycles"); 216 if (!evsel->name) 217 goto error_free; 218 219 perf_evlist__add(evlist, evsel); 220 return 0; 221 error_free: 222 perf_evsel__delete(evsel); 223 error: 224 return -ENOMEM; 225 } 226 227 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 228 struct perf_event_attr *attrs, size_t nr_attrs) 229 { 230 struct perf_evsel *evsel, *n; 231 LIST_HEAD(head); 232 size_t i; 233 234 for (i = 0; i < nr_attrs; i++) { 235 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 236 if (evsel == NULL) 237 goto out_delete_partial_list; 238 list_add_tail(&evsel->node, &head); 239 } 240 241 perf_evlist__splice_list_tail(evlist, &head); 242 243 return 0; 244 245 out_delete_partial_list: 246 __evlist__for_each_safe(&head, n, evsel) 247 perf_evsel__delete(evsel); 248 return -1; 249 } 250 251 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 252 struct perf_event_attr *attrs, size_t nr_attrs) 253 { 254 size_t i; 255 256 for (i = 0; i < nr_attrs; i++) 257 event_attr_init(attrs + i); 258 259 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 260 } 261 262 struct perf_evsel * 263 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 264 { 265 struct perf_evsel *evsel; 266 267 evlist__for_each(evlist, evsel) { 268 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 269 (int)evsel->attr.config == id) 270 return evsel; 271 } 272 273 return NULL; 274 } 275 276 struct perf_evsel * 277 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 278 const char *name) 279 { 280 struct perf_evsel *evsel; 281 282 evlist__for_each(evlist, evsel) { 283 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 284 (strcmp(evsel->name, name) == 0)) 285 return evsel; 286 } 287 288 return NULL; 289 } 290 291 int perf_evlist__add_newtp(struct perf_evlist *evlist, 292 const char *sys, const char *name, void *handler) 293 { 294 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 295 296 if (evsel == NULL) 297 return -1; 298 299 evsel->handler = handler; 300 perf_evlist__add(evlist, evsel); 301 return 0; 302 } 303 304 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 305 struct perf_evsel *evsel) 306 { 307 if (evsel->system_wide) 308 return 1; 309 else 310 return thread_map__nr(evlist->threads); 311 } 312 313 void perf_evlist__disable(struct perf_evlist *evlist) 314 { 315 int cpu, thread; 316 struct perf_evsel *pos; 317 int nr_cpus = cpu_map__nr(evlist->cpus); 318 int nr_threads; 319 320 for (cpu = 0; cpu < nr_cpus; cpu++) { 321 evlist__for_each(evlist, pos) { 322 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 323 continue; 324 nr_threads = perf_evlist__nr_threads(evlist, pos); 325 for (thread = 0; thread < nr_threads; thread++) 326 ioctl(FD(pos, cpu, thread), 327 PERF_EVENT_IOC_DISABLE, 0); 328 } 329 } 330 331 evlist->enabled = false; 332 } 333 334 void perf_evlist__enable(struct perf_evlist *evlist) 335 { 336 int cpu, thread; 337 struct perf_evsel *pos; 338 int nr_cpus = cpu_map__nr(evlist->cpus); 339 int nr_threads; 340 341 for (cpu = 0; cpu < nr_cpus; cpu++) { 342 evlist__for_each(evlist, pos) { 343 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 344 continue; 345 nr_threads = perf_evlist__nr_threads(evlist, pos); 346 for (thread = 0; thread < nr_threads; thread++) 347 ioctl(FD(pos, cpu, thread), 348 PERF_EVENT_IOC_ENABLE, 0); 349 } 350 } 351 352 evlist->enabled = true; 353 } 354 355 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 356 { 357 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 358 } 359 360 int perf_evlist__disable_event(struct perf_evlist *evlist, 361 struct perf_evsel *evsel) 362 { 363 int cpu, thread, err; 364 int nr_cpus = cpu_map__nr(evlist->cpus); 365 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 366 367 if (!evsel->fd) 368 return 0; 369 370 for (cpu = 0; cpu < nr_cpus; cpu++) { 371 for (thread = 0; thread < nr_threads; thread++) { 372 err = ioctl(FD(evsel, cpu, thread), 373 PERF_EVENT_IOC_DISABLE, 0); 374 if (err) 375 return err; 376 } 377 } 378 return 0; 379 } 380 381 int perf_evlist__enable_event(struct perf_evlist *evlist, 382 struct perf_evsel *evsel) 383 { 384 int cpu, thread, err; 385 int nr_cpus = cpu_map__nr(evlist->cpus); 386 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 387 388 if (!evsel->fd) 389 return -EINVAL; 390 391 for (cpu = 0; cpu < nr_cpus; cpu++) { 392 for (thread = 0; thread < nr_threads; thread++) { 393 err = ioctl(FD(evsel, cpu, thread), 394 PERF_EVENT_IOC_ENABLE, 0); 395 if (err) 396 return err; 397 } 398 } 399 return 0; 400 } 401 402 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 403 struct perf_evsel *evsel, int cpu) 404 { 405 int thread, err; 406 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 407 408 if (!evsel->fd) 409 return -EINVAL; 410 411 for (thread = 0; thread < nr_threads; thread++) { 412 err = ioctl(FD(evsel, cpu, thread), 413 PERF_EVENT_IOC_ENABLE, 0); 414 if (err) 415 return err; 416 } 417 return 0; 418 } 419 420 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 421 struct perf_evsel *evsel, 422 int thread) 423 { 424 int cpu, err; 425 int nr_cpus = cpu_map__nr(evlist->cpus); 426 427 if (!evsel->fd) 428 return -EINVAL; 429 430 for (cpu = 0; cpu < nr_cpus; cpu++) { 431 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 432 if (err) 433 return err; 434 } 435 return 0; 436 } 437 438 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 439 struct perf_evsel *evsel, int idx) 440 { 441 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 442 443 if (per_cpu_mmaps) 444 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 445 else 446 return perf_evlist__enable_event_thread(evlist, evsel, idx); 447 } 448 449 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 450 { 451 int nr_cpus = cpu_map__nr(evlist->cpus); 452 int nr_threads = thread_map__nr(evlist->threads); 453 int nfds = 0; 454 struct perf_evsel *evsel; 455 456 evlist__for_each(evlist, evsel) { 457 if (evsel->system_wide) 458 nfds += nr_cpus; 459 else 460 nfds += nr_cpus * nr_threads; 461 } 462 463 if (fdarray__available_entries(&evlist->pollfd) < nfds && 464 fdarray__grow(&evlist->pollfd, nfds) < 0) 465 return -ENOMEM; 466 467 return 0; 468 } 469 470 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) 471 { 472 int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); 473 /* 474 * Save the idx so that when we filter out fds POLLHUP'ed we can 475 * close the associated evlist->mmap[] entry. 476 */ 477 if (pos >= 0) { 478 evlist->pollfd.priv[pos].idx = idx; 479 480 fcntl(fd, F_SETFL, O_NONBLOCK); 481 } 482 483 return pos; 484 } 485 486 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 487 { 488 return __perf_evlist__add_pollfd(evlist, fd, -1); 489 } 490 491 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) 492 { 493 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 494 495 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 496 } 497 498 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 499 { 500 return fdarray__filter(&evlist->pollfd, revents_and_mask, 501 perf_evlist__munmap_filtered); 502 } 503 504 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 505 { 506 return fdarray__poll(&evlist->pollfd, timeout); 507 } 508 509 static void perf_evlist__id_hash(struct perf_evlist *evlist, 510 struct perf_evsel *evsel, 511 int cpu, int thread, u64 id) 512 { 513 int hash; 514 struct perf_sample_id *sid = SID(evsel, cpu, thread); 515 516 sid->id = id; 517 sid->evsel = evsel; 518 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 519 hlist_add_head(&sid->node, &evlist->heads[hash]); 520 } 521 522 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 523 int cpu, int thread, u64 id) 524 { 525 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 526 evsel->id[evsel->ids++] = id; 527 } 528 529 static int perf_evlist__id_add_fd(struct perf_evlist *evlist, 530 struct perf_evsel *evsel, 531 int cpu, int thread, int fd) 532 { 533 u64 read_data[4] = { 0, }; 534 int id_idx = 1; /* The first entry is the counter value */ 535 u64 id; 536 int ret; 537 538 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 539 if (!ret) 540 goto add; 541 542 if (errno != ENOTTY) 543 return -1; 544 545 /* Legacy way to get event id.. All hail to old kernels! */ 546 547 /* 548 * This way does not work with group format read, so bail 549 * out in that case. 550 */ 551 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 552 return -1; 553 554 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 555 read(fd, &read_data, sizeof(read_data)) == -1) 556 return -1; 557 558 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 559 ++id_idx; 560 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 561 ++id_idx; 562 563 id = read_data[id_idx]; 564 565 add: 566 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 567 return 0; 568 } 569 570 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 571 struct perf_evsel *evsel, int idx, int cpu, 572 int thread) 573 { 574 struct perf_sample_id *sid = SID(evsel, cpu, thread); 575 sid->idx = idx; 576 if (evlist->cpus && cpu >= 0) 577 sid->cpu = evlist->cpus->map[cpu]; 578 else 579 sid->cpu = -1; 580 if (!evsel->system_wide && evlist->threads && thread >= 0) 581 sid->tid = thread_map__pid(evlist->threads, thread); 582 else 583 sid->tid = -1; 584 } 585 586 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 587 { 588 struct hlist_head *head; 589 struct perf_sample_id *sid; 590 int hash; 591 592 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 593 head = &evlist->heads[hash]; 594 595 hlist_for_each_entry(sid, head, node) 596 if (sid->id == id) 597 return sid; 598 599 return NULL; 600 } 601 602 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 603 { 604 struct perf_sample_id *sid; 605 606 if (evlist->nr_entries == 1 || !id) 607 return perf_evlist__first(evlist); 608 609 sid = perf_evlist__id2sid(evlist, id); 610 if (sid) 611 return sid->evsel; 612 613 if (!perf_evlist__sample_id_all(evlist)) 614 return perf_evlist__first(evlist); 615 616 return NULL; 617 } 618 619 static int perf_evlist__event2id(struct perf_evlist *evlist, 620 union perf_event *event, u64 *id) 621 { 622 const u64 *array = event->sample.array; 623 ssize_t n; 624 625 n = (event->header.size - sizeof(event->header)) >> 3; 626 627 if (event->header.type == PERF_RECORD_SAMPLE) { 628 if (evlist->id_pos >= n) 629 return -1; 630 *id = array[evlist->id_pos]; 631 } else { 632 if (evlist->is_pos > n) 633 return -1; 634 n -= evlist->is_pos; 635 *id = array[n]; 636 } 637 return 0; 638 } 639 640 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 641 union perf_event *event) 642 { 643 struct perf_evsel *first = perf_evlist__first(evlist); 644 struct hlist_head *head; 645 struct perf_sample_id *sid; 646 int hash; 647 u64 id; 648 649 if (evlist->nr_entries == 1) 650 return first; 651 652 if (!first->attr.sample_id_all && 653 event->header.type != PERF_RECORD_SAMPLE) 654 return first; 655 656 if (perf_evlist__event2id(evlist, event, &id)) 657 return NULL; 658 659 /* Synthesized events have an id of zero */ 660 if (!id) 661 return first; 662 663 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 664 head = &evlist->heads[hash]; 665 666 hlist_for_each_entry(sid, head, node) { 667 if (sid->id == id) 668 return sid->evsel; 669 } 670 return NULL; 671 } 672 673 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 674 { 675 struct perf_mmap *md = &evlist->mmap[idx]; 676 u64 head; 677 u64 old = md->prev; 678 unsigned char *data = md->base + page_size; 679 union perf_event *event = NULL; 680 681 /* 682 * Check if event was unmapped due to a POLLHUP/POLLERR. 683 */ 684 if (!atomic_read(&md->refcnt)) 685 return NULL; 686 687 head = perf_mmap__read_head(md); 688 if (evlist->overwrite) { 689 /* 690 * If we're further behind than half the buffer, there's a chance 691 * the writer will bite our tail and mess up the samples under us. 692 * 693 * If we somehow ended up ahead of the head, we got messed up. 694 * 695 * In either case, truncate and restart at head. 696 */ 697 int diff = head - old; 698 if (diff > md->mask / 2 || diff < 0) { 699 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 700 701 /* 702 * head points to a known good entry, start there. 703 */ 704 old = head; 705 } 706 } 707 708 if (old != head) { 709 size_t size; 710 711 event = (union perf_event *)&data[old & md->mask]; 712 size = event->header.size; 713 714 /* 715 * Event straddles the mmap boundary -- header should always 716 * be inside due to u64 alignment of output. 717 */ 718 if ((old & md->mask) + size != ((old + size) & md->mask)) { 719 unsigned int offset = old; 720 unsigned int len = min(sizeof(*event), size), cpy; 721 void *dst = md->event_copy; 722 723 do { 724 cpy = min(md->mask + 1 - (offset & md->mask), len); 725 memcpy(dst, &data[offset & md->mask], cpy); 726 offset += cpy; 727 dst += cpy; 728 len -= cpy; 729 } while (len); 730 731 event = (union perf_event *) md->event_copy; 732 } 733 734 old += size; 735 } 736 737 md->prev = old; 738 739 return event; 740 } 741 742 static bool perf_mmap__empty(struct perf_mmap *md) 743 { 744 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 745 } 746 747 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 748 { 749 atomic_inc(&evlist->mmap[idx].refcnt); 750 } 751 752 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 753 { 754 BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); 755 756 if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) 757 __perf_evlist__munmap(evlist, idx); 758 } 759 760 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 761 { 762 struct perf_mmap *md = &evlist->mmap[idx]; 763 764 if (!evlist->overwrite) { 765 u64 old = md->prev; 766 767 perf_mmap__write_tail(md, old); 768 } 769 770 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 771 perf_evlist__mmap_put(evlist, idx); 772 } 773 774 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 775 struct auxtrace_mmap_params *mp __maybe_unused, 776 void *userpg __maybe_unused, 777 int fd __maybe_unused) 778 { 779 return 0; 780 } 781 782 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 783 { 784 } 785 786 void __weak auxtrace_mmap_params__init( 787 struct auxtrace_mmap_params *mp __maybe_unused, 788 off_t auxtrace_offset __maybe_unused, 789 unsigned int auxtrace_pages __maybe_unused, 790 bool auxtrace_overwrite __maybe_unused) 791 { 792 } 793 794 void __weak auxtrace_mmap_params__set_idx( 795 struct auxtrace_mmap_params *mp __maybe_unused, 796 struct perf_evlist *evlist __maybe_unused, 797 int idx __maybe_unused, 798 bool per_cpu __maybe_unused) 799 { 800 } 801 802 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 803 { 804 if (evlist->mmap[idx].base != NULL) { 805 munmap(evlist->mmap[idx].base, evlist->mmap_len); 806 evlist->mmap[idx].base = NULL; 807 atomic_set(&evlist->mmap[idx].refcnt, 0); 808 } 809 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 810 } 811 812 void perf_evlist__munmap(struct perf_evlist *evlist) 813 { 814 int i; 815 816 if (evlist->mmap == NULL) 817 return; 818 819 for (i = 0; i < evlist->nr_mmaps; i++) 820 __perf_evlist__munmap(evlist, i); 821 822 zfree(&evlist->mmap); 823 } 824 825 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 826 { 827 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 828 if (cpu_map__empty(evlist->cpus)) 829 evlist->nr_mmaps = thread_map__nr(evlist->threads); 830 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 831 return evlist->mmap != NULL ? 0 : -ENOMEM; 832 } 833 834 struct mmap_params { 835 int prot; 836 int mask; 837 struct auxtrace_mmap_params auxtrace_mp; 838 }; 839 840 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 841 struct mmap_params *mp, int fd) 842 { 843 /* 844 * The last one will be done at perf_evlist__mmap_consume(), so that we 845 * make sure we don't prevent tools from consuming every last event in 846 * the ring buffer. 847 * 848 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 849 * anymore, but the last events for it are still in the ring buffer, 850 * waiting to be consumed. 851 * 852 * Tools can chose to ignore this at their own discretion, but the 853 * evlist layer can't just drop it when filtering events in 854 * perf_evlist__filter_pollfd(). 855 */ 856 atomic_set(&evlist->mmap[idx].refcnt, 2); 857 evlist->mmap[idx].prev = 0; 858 evlist->mmap[idx].mask = mp->mask; 859 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 860 MAP_SHARED, fd, 0); 861 if (evlist->mmap[idx].base == MAP_FAILED) { 862 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 863 errno); 864 evlist->mmap[idx].base = NULL; 865 return -1; 866 } 867 868 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 869 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 870 return -1; 871 872 return 0; 873 } 874 875 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 876 struct mmap_params *mp, int cpu, 877 int thread, int *output) 878 { 879 struct perf_evsel *evsel; 880 881 evlist__for_each(evlist, evsel) { 882 int fd; 883 884 if (evsel->system_wide && thread) 885 continue; 886 887 fd = FD(evsel, cpu, thread); 888 889 if (*output == -1) { 890 *output = fd; 891 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 892 return -1; 893 } else { 894 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 895 return -1; 896 897 perf_evlist__mmap_get(evlist, idx); 898 } 899 900 /* 901 * The system_wide flag causes a selected event to be opened 902 * always without a pid. Consequently it will never get a 903 * POLLHUP, but it is used for tracking in combination with 904 * other events, so it should not need to be polled anyway. 905 * Therefore don't add it for polling. 906 */ 907 if (!evsel->system_wide && 908 __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { 909 perf_evlist__mmap_put(evlist, idx); 910 return -1; 911 } 912 913 if (evsel->attr.read_format & PERF_FORMAT_ID) { 914 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 915 fd) < 0) 916 return -1; 917 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 918 thread); 919 } 920 } 921 922 return 0; 923 } 924 925 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 926 struct mmap_params *mp) 927 { 928 int cpu, thread; 929 int nr_cpus = cpu_map__nr(evlist->cpus); 930 int nr_threads = thread_map__nr(evlist->threads); 931 932 pr_debug2("perf event ring buffer mmapped per cpu\n"); 933 for (cpu = 0; cpu < nr_cpus; cpu++) { 934 int output = -1; 935 936 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 937 true); 938 939 for (thread = 0; thread < nr_threads; thread++) { 940 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 941 thread, &output)) 942 goto out_unmap; 943 } 944 } 945 946 return 0; 947 948 out_unmap: 949 for (cpu = 0; cpu < nr_cpus; cpu++) 950 __perf_evlist__munmap(evlist, cpu); 951 return -1; 952 } 953 954 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 955 struct mmap_params *mp) 956 { 957 int thread; 958 int nr_threads = thread_map__nr(evlist->threads); 959 960 pr_debug2("perf event ring buffer mmapped per thread\n"); 961 for (thread = 0; thread < nr_threads; thread++) { 962 int output = -1; 963 964 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 965 false); 966 967 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 968 &output)) 969 goto out_unmap; 970 } 971 972 return 0; 973 974 out_unmap: 975 for (thread = 0; thread < nr_threads; thread++) 976 __perf_evlist__munmap(evlist, thread); 977 return -1; 978 } 979 980 static size_t perf_evlist__mmap_size(unsigned long pages) 981 { 982 if (pages == UINT_MAX) { 983 int max; 984 985 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 986 /* 987 * Pick a once upon a time good value, i.e. things look 988 * strange since we can't read a sysctl value, but lets not 989 * die yet... 990 */ 991 max = 512; 992 } else { 993 max -= (page_size / 1024); 994 } 995 996 pages = (max * 1024) / page_size; 997 if (!is_power_of_2(pages)) 998 pages = rounddown_pow_of_two(pages); 999 } else if (!is_power_of_2(pages)) 1000 return 0; 1001 1002 return (pages + 1) * page_size; 1003 } 1004 1005 static long parse_pages_arg(const char *str, unsigned long min, 1006 unsigned long max) 1007 { 1008 unsigned long pages, val; 1009 static struct parse_tag tags[] = { 1010 { .tag = 'B', .mult = 1 }, 1011 { .tag = 'K', .mult = 1 << 10 }, 1012 { .tag = 'M', .mult = 1 << 20 }, 1013 { .tag = 'G', .mult = 1 << 30 }, 1014 { .tag = 0 }, 1015 }; 1016 1017 if (str == NULL) 1018 return -EINVAL; 1019 1020 val = parse_tag_value(str, tags); 1021 if (val != (unsigned long) -1) { 1022 /* we got file size value */ 1023 pages = PERF_ALIGN(val, page_size) / page_size; 1024 } else { 1025 /* we got pages count value */ 1026 char *eptr; 1027 pages = strtoul(str, &eptr, 10); 1028 if (*eptr != '\0') 1029 return -EINVAL; 1030 } 1031 1032 if (pages == 0 && min == 0) { 1033 /* leave number of pages at 0 */ 1034 } else if (!is_power_of_2(pages)) { 1035 /* round pages up to next power of 2 */ 1036 pages = roundup_pow_of_two(pages); 1037 if (!pages) 1038 return -EINVAL; 1039 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1040 pages * page_size, pages); 1041 } 1042 1043 if (pages > max) 1044 return -EINVAL; 1045 1046 return pages; 1047 } 1048 1049 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1050 { 1051 unsigned long max = UINT_MAX; 1052 long pages; 1053 1054 if (max > SIZE_MAX / page_size) 1055 max = SIZE_MAX / page_size; 1056 1057 pages = parse_pages_arg(str, 1, max); 1058 if (pages < 0) { 1059 pr_err("Invalid argument for --mmap_pages/-m\n"); 1060 return -1; 1061 } 1062 1063 *mmap_pages = pages; 1064 return 0; 1065 } 1066 1067 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1068 int unset __maybe_unused) 1069 { 1070 return __perf_evlist__parse_mmap_pages(opt->value, str); 1071 } 1072 1073 /** 1074 * perf_evlist__mmap_ex - Create mmaps to receive events. 1075 * @evlist: list of events 1076 * @pages: map length in pages 1077 * @overwrite: overwrite older events? 1078 * @auxtrace_pages - auxtrace map length in pages 1079 * @auxtrace_overwrite - overwrite older auxtrace data? 1080 * 1081 * If @overwrite is %false the user needs to signal event consumption using 1082 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1083 * automatically. 1084 * 1085 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1086 * consumption using auxtrace_mmap__write_tail(). 1087 * 1088 * Return: %0 on success, negative error code otherwise. 1089 */ 1090 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1091 bool overwrite, unsigned int auxtrace_pages, 1092 bool auxtrace_overwrite) 1093 { 1094 struct perf_evsel *evsel; 1095 const struct cpu_map *cpus = evlist->cpus; 1096 const struct thread_map *threads = evlist->threads; 1097 struct mmap_params mp = { 1098 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1099 }; 1100 1101 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1102 return -ENOMEM; 1103 1104 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1105 return -ENOMEM; 1106 1107 evlist->overwrite = overwrite; 1108 evlist->mmap_len = perf_evlist__mmap_size(pages); 1109 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1110 mp.mask = evlist->mmap_len - page_size - 1; 1111 1112 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1113 auxtrace_pages, auxtrace_overwrite); 1114 1115 evlist__for_each(evlist, evsel) { 1116 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1117 evsel->sample_id == NULL && 1118 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1119 return -ENOMEM; 1120 } 1121 1122 if (cpu_map__empty(cpus)) 1123 return perf_evlist__mmap_per_thread(evlist, &mp); 1124 1125 return perf_evlist__mmap_per_cpu(evlist, &mp); 1126 } 1127 1128 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1129 bool overwrite) 1130 { 1131 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1132 } 1133 1134 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1135 { 1136 struct cpu_map *cpus; 1137 struct thread_map *threads; 1138 1139 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1140 1141 if (!threads) 1142 return -1; 1143 1144 if (target__uses_dummy_map(target)) 1145 cpus = cpu_map__dummy_new(); 1146 else 1147 cpus = cpu_map__new(target->cpu_list); 1148 1149 if (!cpus) 1150 goto out_delete_threads; 1151 1152 evlist->has_user_cpus = !!target->cpu_list; 1153 1154 perf_evlist__set_maps(evlist, cpus, threads); 1155 1156 return 0; 1157 1158 out_delete_threads: 1159 thread_map__put(threads); 1160 return -1; 1161 } 1162 1163 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1164 struct thread_map *threads) 1165 { 1166 /* 1167 * Allow for the possibility that one or another of the maps isn't being 1168 * changed i.e. don't put it. Note we are assuming the maps that are 1169 * being applied are brand new and evlist is taking ownership of the 1170 * original reference count of 1. If that is not the case it is up to 1171 * the caller to increase the reference count. 1172 */ 1173 if (cpus != evlist->cpus) { 1174 cpu_map__put(evlist->cpus); 1175 evlist->cpus = cpus; 1176 } 1177 1178 if (threads != evlist->threads) { 1179 thread_map__put(evlist->threads); 1180 evlist->threads = threads; 1181 } 1182 1183 perf_evlist__propagate_maps(evlist); 1184 } 1185 1186 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1187 { 1188 struct perf_evsel *evsel; 1189 int err = 0; 1190 const int ncpus = cpu_map__nr(evlist->cpus), 1191 nthreads = thread_map__nr(evlist->threads); 1192 1193 evlist__for_each(evlist, evsel) { 1194 if (evsel->filter == NULL) 1195 continue; 1196 1197 /* 1198 * filters only work for tracepoint event, which doesn't have cpu limit. 1199 * So evlist and evsel should always be same. 1200 */ 1201 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1202 if (err) { 1203 *err_evsel = evsel; 1204 break; 1205 } 1206 } 1207 1208 return err; 1209 } 1210 1211 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1212 { 1213 struct perf_evsel *evsel; 1214 int err = 0; 1215 1216 evlist__for_each(evlist, evsel) { 1217 err = perf_evsel__set_filter(evsel, filter); 1218 if (err) 1219 break; 1220 } 1221 1222 return err; 1223 } 1224 1225 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1226 { 1227 char *filter; 1228 int ret = -1; 1229 size_t i; 1230 1231 for (i = 0; i < npids; ++i) { 1232 if (i == 0) { 1233 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1234 return -1; 1235 } else { 1236 char *tmp; 1237 1238 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1239 goto out_free; 1240 1241 free(filter); 1242 filter = tmp; 1243 } 1244 } 1245 1246 ret = perf_evlist__set_filter(evlist, filter); 1247 out_free: 1248 free(filter); 1249 return ret; 1250 } 1251 1252 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1253 { 1254 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1255 } 1256 1257 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1258 { 1259 struct perf_evsel *pos; 1260 1261 if (evlist->nr_entries == 1) 1262 return true; 1263 1264 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1265 return false; 1266 1267 evlist__for_each(evlist, pos) { 1268 if (pos->id_pos != evlist->id_pos || 1269 pos->is_pos != evlist->is_pos) 1270 return false; 1271 } 1272 1273 return true; 1274 } 1275 1276 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1277 { 1278 struct perf_evsel *evsel; 1279 1280 if (evlist->combined_sample_type) 1281 return evlist->combined_sample_type; 1282 1283 evlist__for_each(evlist, evsel) 1284 evlist->combined_sample_type |= evsel->attr.sample_type; 1285 1286 return evlist->combined_sample_type; 1287 } 1288 1289 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1290 { 1291 evlist->combined_sample_type = 0; 1292 return __perf_evlist__combined_sample_type(evlist); 1293 } 1294 1295 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1296 { 1297 struct perf_evsel *evsel; 1298 u64 branch_type = 0; 1299 1300 evlist__for_each(evlist, evsel) 1301 branch_type |= evsel->attr.branch_sample_type; 1302 return branch_type; 1303 } 1304 1305 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1306 { 1307 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1308 u64 read_format = first->attr.read_format; 1309 u64 sample_type = first->attr.sample_type; 1310 1311 evlist__for_each(evlist, pos) { 1312 if (read_format != pos->attr.read_format) 1313 return false; 1314 } 1315 1316 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1317 if ((sample_type & PERF_SAMPLE_READ) && 1318 !(read_format & PERF_FORMAT_ID)) { 1319 return false; 1320 } 1321 1322 return true; 1323 } 1324 1325 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1326 { 1327 struct perf_evsel *first = perf_evlist__first(evlist); 1328 return first->attr.read_format; 1329 } 1330 1331 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1332 { 1333 struct perf_evsel *first = perf_evlist__first(evlist); 1334 struct perf_sample *data; 1335 u64 sample_type; 1336 u16 size = 0; 1337 1338 if (!first->attr.sample_id_all) 1339 goto out; 1340 1341 sample_type = first->attr.sample_type; 1342 1343 if (sample_type & PERF_SAMPLE_TID) 1344 size += sizeof(data->tid) * 2; 1345 1346 if (sample_type & PERF_SAMPLE_TIME) 1347 size += sizeof(data->time); 1348 1349 if (sample_type & PERF_SAMPLE_ID) 1350 size += sizeof(data->id); 1351 1352 if (sample_type & PERF_SAMPLE_STREAM_ID) 1353 size += sizeof(data->stream_id); 1354 1355 if (sample_type & PERF_SAMPLE_CPU) 1356 size += sizeof(data->cpu) * 2; 1357 1358 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1359 size += sizeof(data->id); 1360 out: 1361 return size; 1362 } 1363 1364 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1365 { 1366 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1367 1368 evlist__for_each_continue(evlist, pos) { 1369 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1370 return false; 1371 } 1372 1373 return true; 1374 } 1375 1376 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1377 { 1378 struct perf_evsel *first = perf_evlist__first(evlist); 1379 return first->attr.sample_id_all; 1380 } 1381 1382 void perf_evlist__set_selected(struct perf_evlist *evlist, 1383 struct perf_evsel *evsel) 1384 { 1385 evlist->selected = evsel; 1386 } 1387 1388 void perf_evlist__close(struct perf_evlist *evlist) 1389 { 1390 struct perf_evsel *evsel; 1391 int ncpus = cpu_map__nr(evlist->cpus); 1392 int nthreads = thread_map__nr(evlist->threads); 1393 int n; 1394 1395 evlist__for_each_reverse(evlist, evsel) { 1396 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1397 perf_evsel__close(evsel, n, nthreads); 1398 } 1399 } 1400 1401 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1402 { 1403 struct cpu_map *cpus; 1404 struct thread_map *threads; 1405 int err = -ENOMEM; 1406 1407 /* 1408 * Try reading /sys/devices/system/cpu/online to get 1409 * an all cpus map. 1410 * 1411 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1412 * code needs an overhaul to properly forward the 1413 * error, and we may not want to do that fallback to a 1414 * default cpu identity map :-\ 1415 */ 1416 cpus = cpu_map__new(NULL); 1417 if (!cpus) 1418 goto out; 1419 1420 threads = thread_map__new_dummy(); 1421 if (!threads) 1422 goto out_put; 1423 1424 perf_evlist__set_maps(evlist, cpus, threads); 1425 out: 1426 return err; 1427 out_put: 1428 cpu_map__put(cpus); 1429 goto out; 1430 } 1431 1432 int perf_evlist__open(struct perf_evlist *evlist) 1433 { 1434 struct perf_evsel *evsel; 1435 int err; 1436 1437 /* 1438 * Default: one fd per CPU, all threads, aka systemwide 1439 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1440 */ 1441 if (evlist->threads == NULL && evlist->cpus == NULL) { 1442 err = perf_evlist__create_syswide_maps(evlist); 1443 if (err < 0) 1444 goto out_err; 1445 } 1446 1447 perf_evlist__update_id_pos(evlist); 1448 1449 evlist__for_each(evlist, evsel) { 1450 err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); 1451 if (err < 0) 1452 goto out_err; 1453 } 1454 1455 return 0; 1456 out_err: 1457 perf_evlist__close(evlist); 1458 errno = -err; 1459 return err; 1460 } 1461 1462 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1463 const char *argv[], bool pipe_output, 1464 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1465 { 1466 int child_ready_pipe[2], go_pipe[2]; 1467 char bf; 1468 1469 if (pipe(child_ready_pipe) < 0) { 1470 perror("failed to create 'ready' pipe"); 1471 return -1; 1472 } 1473 1474 if (pipe(go_pipe) < 0) { 1475 perror("failed to create 'go' pipe"); 1476 goto out_close_ready_pipe; 1477 } 1478 1479 evlist->workload.pid = fork(); 1480 if (evlist->workload.pid < 0) { 1481 perror("failed to fork"); 1482 goto out_close_pipes; 1483 } 1484 1485 if (!evlist->workload.pid) { 1486 int ret; 1487 1488 if (pipe_output) 1489 dup2(2, 1); 1490 1491 signal(SIGTERM, SIG_DFL); 1492 1493 close(child_ready_pipe[0]); 1494 close(go_pipe[1]); 1495 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1496 1497 /* 1498 * Tell the parent we're ready to go 1499 */ 1500 close(child_ready_pipe[1]); 1501 1502 /* 1503 * Wait until the parent tells us to go. 1504 */ 1505 ret = read(go_pipe[0], &bf, 1); 1506 /* 1507 * The parent will ask for the execvp() to be performed by 1508 * writing exactly one byte, in workload.cork_fd, usually via 1509 * perf_evlist__start_workload(). 1510 * 1511 * For cancelling the workload without actually running it, 1512 * the parent will just close workload.cork_fd, without writing 1513 * anything, i.e. read will return zero and we just exit() 1514 * here. 1515 */ 1516 if (ret != 1) { 1517 if (ret == -1) 1518 perror("unable to read pipe"); 1519 exit(ret); 1520 } 1521 1522 execvp(argv[0], (char **)argv); 1523 1524 if (exec_error) { 1525 union sigval val; 1526 1527 val.sival_int = errno; 1528 if (sigqueue(getppid(), SIGUSR1, val)) 1529 perror(argv[0]); 1530 } else 1531 perror(argv[0]); 1532 exit(-1); 1533 } 1534 1535 if (exec_error) { 1536 struct sigaction act = { 1537 .sa_flags = SA_SIGINFO, 1538 .sa_sigaction = exec_error, 1539 }; 1540 sigaction(SIGUSR1, &act, NULL); 1541 } 1542 1543 if (target__none(target)) { 1544 if (evlist->threads == NULL) { 1545 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1546 __func__, __LINE__); 1547 goto out_close_pipes; 1548 } 1549 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1550 } 1551 1552 close(child_ready_pipe[1]); 1553 close(go_pipe[0]); 1554 /* 1555 * wait for child to settle 1556 */ 1557 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1558 perror("unable to read pipe"); 1559 goto out_close_pipes; 1560 } 1561 1562 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1563 evlist->workload.cork_fd = go_pipe[1]; 1564 close(child_ready_pipe[0]); 1565 return 0; 1566 1567 out_close_pipes: 1568 close(go_pipe[0]); 1569 close(go_pipe[1]); 1570 out_close_ready_pipe: 1571 close(child_ready_pipe[0]); 1572 close(child_ready_pipe[1]); 1573 return -1; 1574 } 1575 1576 int perf_evlist__start_workload(struct perf_evlist *evlist) 1577 { 1578 if (evlist->workload.cork_fd > 0) { 1579 char bf = 0; 1580 int ret; 1581 /* 1582 * Remove the cork, let it rip! 1583 */ 1584 ret = write(evlist->workload.cork_fd, &bf, 1); 1585 if (ret < 0) 1586 perror("enable to write to pipe"); 1587 1588 close(evlist->workload.cork_fd); 1589 return ret; 1590 } 1591 1592 return 0; 1593 } 1594 1595 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1596 struct perf_sample *sample) 1597 { 1598 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1599 1600 if (!evsel) 1601 return -EFAULT; 1602 return perf_evsel__parse_sample(evsel, event, sample); 1603 } 1604 1605 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1606 { 1607 struct perf_evsel *evsel; 1608 size_t printed = 0; 1609 1610 evlist__for_each(evlist, evsel) { 1611 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1612 perf_evsel__name(evsel)); 1613 } 1614 1615 return printed + fprintf(fp, "\n"); 1616 } 1617 1618 int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, 1619 int err, char *buf, size_t size) 1620 { 1621 int printed, value; 1622 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1623 1624 switch (err) { 1625 case EACCES: 1626 case EPERM: 1627 printed = scnprintf(buf, size, 1628 "Error:\t%s.\n" 1629 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1630 1631 value = perf_event_paranoid(); 1632 1633 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1634 1635 if (value >= 2) { 1636 printed += scnprintf(buf + printed, size - printed, 1637 "For your workloads it needs to be <= 1\nHint:\t"); 1638 } 1639 printed += scnprintf(buf + printed, size - printed, 1640 "For system wide tracing it needs to be set to -1.\n"); 1641 1642 printed += scnprintf(buf + printed, size - printed, 1643 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1644 "Hint:\tThe current value is %d.", value); 1645 break; 1646 default: 1647 scnprintf(buf, size, "%s", emsg); 1648 break; 1649 } 1650 1651 return 0; 1652 } 1653 1654 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1655 { 1656 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1657 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1658 1659 switch (err) { 1660 case EPERM: 1661 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1662 printed += scnprintf(buf + printed, size - printed, 1663 "Error:\t%s.\n" 1664 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1665 "Hint:\tTried using %zd kB.\n", 1666 emsg, pages_max_per_user, pages_attempted); 1667 1668 if (pages_attempted >= pages_max_per_user) { 1669 printed += scnprintf(buf + printed, size - printed, 1670 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1671 pages_max_per_user + pages_attempted); 1672 } 1673 1674 printed += scnprintf(buf + printed, size - printed, 1675 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1676 break; 1677 default: 1678 scnprintf(buf, size, "%s", emsg); 1679 break; 1680 } 1681 1682 return 0; 1683 } 1684 1685 void perf_evlist__to_front(struct perf_evlist *evlist, 1686 struct perf_evsel *move_evsel) 1687 { 1688 struct perf_evsel *evsel, *n; 1689 LIST_HEAD(move); 1690 1691 if (move_evsel == perf_evlist__first(evlist)) 1692 return; 1693 1694 evlist__for_each_safe(evlist, n, evsel) { 1695 if (evsel->leader == move_evsel->leader) 1696 list_move_tail(&evsel->node, &move); 1697 } 1698 1699 list_splice(&move, &evlist->entries); 1700 } 1701 1702 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1703 struct perf_evsel *tracking_evsel) 1704 { 1705 struct perf_evsel *evsel; 1706 1707 if (tracking_evsel->tracking) 1708 return; 1709 1710 evlist__for_each(evlist, evsel) { 1711 if (evsel != tracking_evsel) 1712 evsel->tracking = false; 1713 } 1714 1715 tracking_evsel->tracking = true; 1716 } 1717