1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <poll.h> 14 #include "cpumap.h" 15 #include "thread_map.h" 16 #include "target.h" 17 #include "evlist.h" 18 #include "evsel.h" 19 #include "debug.h" 20 #include "units.h" 21 #include "asm/bug.h" 22 #include <signal.h> 23 #include <unistd.h> 24 25 #include "parse-events.h" 26 #include <subcmd/parse-options.h> 27 28 #include <fcntl.h> 29 #include <sys/ioctl.h> 30 #include <sys/mman.h> 31 32 #include <linux/bitops.h> 33 #include <linux/hash.h> 34 #include <linux/log2.h> 35 #include <linux/err.h> 36 37 #ifdef LACKS_SIGQUEUE_PROTOTYPE 38 int sigqueue(pid_t pid, int sig, const union sigval value); 39 #endif 40 41 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 42 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 43 44 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 45 struct thread_map *threads) 46 { 47 int i; 48 49 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 50 INIT_HLIST_HEAD(&evlist->heads[i]); 51 INIT_LIST_HEAD(&evlist->entries); 52 perf_evlist__set_maps(evlist, cpus, threads); 53 fdarray__init(&evlist->pollfd, 64); 54 evlist->workload.pid = -1; 55 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 56 } 57 58 struct perf_evlist *perf_evlist__new(void) 59 { 60 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 61 62 if (evlist != NULL) 63 perf_evlist__init(evlist, NULL, NULL); 64 65 return evlist; 66 } 67 68 struct perf_evlist *perf_evlist__new_default(void) 69 { 70 struct perf_evlist *evlist = perf_evlist__new(); 71 72 if (evlist && perf_evlist__add_default(evlist)) { 73 perf_evlist__delete(evlist); 74 evlist = NULL; 75 } 76 77 return evlist; 78 } 79 80 struct perf_evlist *perf_evlist__new_dummy(void) 81 { 82 struct perf_evlist *evlist = perf_evlist__new(); 83 84 if (evlist && perf_evlist__add_dummy(evlist)) { 85 perf_evlist__delete(evlist); 86 evlist = NULL; 87 } 88 89 return evlist; 90 } 91 92 /** 93 * perf_evlist__set_id_pos - set the positions of event ids. 94 * @evlist: selected event list 95 * 96 * Events with compatible sample types all have the same id_pos 97 * and is_pos. For convenience, put a copy on evlist. 98 */ 99 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 100 { 101 struct perf_evsel *first = perf_evlist__first(evlist); 102 103 evlist->id_pos = first->id_pos; 104 evlist->is_pos = first->is_pos; 105 } 106 107 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 108 { 109 struct perf_evsel *evsel; 110 111 evlist__for_each_entry(evlist, evsel) 112 perf_evsel__calc_id_pos(evsel); 113 114 perf_evlist__set_id_pos(evlist); 115 } 116 117 static void perf_evlist__purge(struct perf_evlist *evlist) 118 { 119 struct perf_evsel *pos, *n; 120 121 evlist__for_each_entry_safe(evlist, n, pos) { 122 list_del_init(&pos->node); 123 pos->evlist = NULL; 124 perf_evsel__delete(pos); 125 } 126 127 evlist->nr_entries = 0; 128 } 129 130 void perf_evlist__exit(struct perf_evlist *evlist) 131 { 132 zfree(&evlist->mmap); 133 zfree(&evlist->overwrite_mmap); 134 fdarray__exit(&evlist->pollfd); 135 } 136 137 void perf_evlist__delete(struct perf_evlist *evlist) 138 { 139 if (evlist == NULL) 140 return; 141 142 perf_evlist__munmap(evlist); 143 perf_evlist__close(evlist); 144 cpu_map__put(evlist->cpus); 145 thread_map__put(evlist->threads); 146 evlist->cpus = NULL; 147 evlist->threads = NULL; 148 perf_evlist__purge(evlist); 149 perf_evlist__exit(evlist); 150 free(evlist); 151 } 152 153 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 154 struct perf_evsel *evsel) 155 { 156 /* 157 * We already have cpus for evsel (via PMU sysfs) so 158 * keep it, if there's no target cpu list defined. 159 */ 160 if (!evsel->own_cpus || evlist->has_user_cpus) { 161 cpu_map__put(evsel->cpus); 162 evsel->cpus = cpu_map__get(evlist->cpus); 163 } else if (evsel->cpus != evsel->own_cpus) { 164 cpu_map__put(evsel->cpus); 165 evsel->cpus = cpu_map__get(evsel->own_cpus); 166 } 167 168 thread_map__put(evsel->threads); 169 evsel->threads = thread_map__get(evlist->threads); 170 } 171 172 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 173 { 174 struct perf_evsel *evsel; 175 176 evlist__for_each_entry(evlist, evsel) 177 __perf_evlist__propagate_maps(evlist, evsel); 178 } 179 180 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 181 { 182 entry->evlist = evlist; 183 list_add_tail(&entry->node, &evlist->entries); 184 entry->idx = evlist->nr_entries; 185 entry->tracking = !entry->idx; 186 187 if (!evlist->nr_entries++) 188 perf_evlist__set_id_pos(evlist); 189 190 __perf_evlist__propagate_maps(evlist, entry); 191 } 192 193 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 194 { 195 evsel->evlist = NULL; 196 list_del_init(&evsel->node); 197 evlist->nr_entries -= 1; 198 } 199 200 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 201 struct list_head *list) 202 { 203 struct perf_evsel *evsel, *temp; 204 205 __evlist__for_each_entry_safe(list, temp, evsel) { 206 list_del_init(&evsel->node); 207 perf_evlist__add(evlist, evsel); 208 } 209 } 210 211 void __perf_evlist__set_leader(struct list_head *list) 212 { 213 struct perf_evsel *evsel, *leader; 214 215 leader = list_entry(list->next, struct perf_evsel, node); 216 evsel = list_entry(list->prev, struct perf_evsel, node); 217 218 leader->nr_members = evsel->idx - leader->idx + 1; 219 220 __evlist__for_each_entry(list, evsel) { 221 evsel->leader = leader; 222 } 223 } 224 225 void perf_evlist__set_leader(struct perf_evlist *evlist) 226 { 227 if (evlist->nr_entries) { 228 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 229 __perf_evlist__set_leader(&evlist->entries); 230 } 231 } 232 233 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr) 234 { 235 struct perf_event_attr attr = { 236 .type = PERF_TYPE_HARDWARE, 237 .config = PERF_COUNT_HW_CPU_CYCLES, 238 .exclude_kernel = 1, 239 .precise_ip = 3, 240 }; 241 242 event_attr_init(&attr); 243 244 /* 245 * Unnamed union member, not supported as struct member named 246 * initializer in older compilers such as gcc 4.4.7 247 */ 248 attr.sample_period = 1; 249 250 while (attr.precise_ip != 0) { 251 int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); 252 if (fd != -1) { 253 close(fd); 254 break; 255 } 256 --attr.precise_ip; 257 } 258 259 pattr->precise_ip = attr.precise_ip; 260 } 261 262 int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) 263 { 264 struct perf_evsel *evsel = perf_evsel__new_cycles(precise); 265 266 if (evsel == NULL) 267 return -ENOMEM; 268 269 perf_evlist__add(evlist, evsel); 270 return 0; 271 } 272 273 int perf_evlist__add_dummy(struct perf_evlist *evlist) 274 { 275 struct perf_event_attr attr = { 276 .type = PERF_TYPE_SOFTWARE, 277 .config = PERF_COUNT_SW_DUMMY, 278 .size = sizeof(attr), /* to capture ABI version */ 279 }; 280 struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); 281 282 if (evsel == NULL) 283 return -ENOMEM; 284 285 perf_evlist__add(evlist, evsel); 286 return 0; 287 } 288 289 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 290 struct perf_event_attr *attrs, size_t nr_attrs) 291 { 292 struct perf_evsel *evsel, *n; 293 LIST_HEAD(head); 294 size_t i; 295 296 for (i = 0; i < nr_attrs; i++) { 297 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 298 if (evsel == NULL) 299 goto out_delete_partial_list; 300 list_add_tail(&evsel->node, &head); 301 } 302 303 perf_evlist__splice_list_tail(evlist, &head); 304 305 return 0; 306 307 out_delete_partial_list: 308 __evlist__for_each_entry_safe(&head, n, evsel) 309 perf_evsel__delete(evsel); 310 return -1; 311 } 312 313 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 314 struct perf_event_attr *attrs, size_t nr_attrs) 315 { 316 size_t i; 317 318 for (i = 0; i < nr_attrs; i++) 319 event_attr_init(attrs + i); 320 321 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 322 } 323 324 struct perf_evsel * 325 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 326 { 327 struct perf_evsel *evsel; 328 329 evlist__for_each_entry(evlist, evsel) { 330 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 331 (int)evsel->attr.config == id) 332 return evsel; 333 } 334 335 return NULL; 336 } 337 338 struct perf_evsel * 339 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 340 const char *name) 341 { 342 struct perf_evsel *evsel; 343 344 evlist__for_each_entry(evlist, evsel) { 345 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 346 (strcmp(evsel->name, name) == 0)) 347 return evsel; 348 } 349 350 return NULL; 351 } 352 353 int perf_evlist__add_newtp(struct perf_evlist *evlist, 354 const char *sys, const char *name, void *handler) 355 { 356 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 357 358 if (IS_ERR(evsel)) 359 return -1; 360 361 evsel->handler = handler; 362 perf_evlist__add(evlist, evsel); 363 return 0; 364 } 365 366 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 367 struct perf_evsel *evsel) 368 { 369 if (evsel->system_wide) 370 return 1; 371 else 372 return thread_map__nr(evlist->threads); 373 } 374 375 void perf_evlist__disable(struct perf_evlist *evlist) 376 { 377 struct perf_evsel *pos; 378 379 evlist__for_each_entry(evlist, pos) { 380 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) 381 continue; 382 perf_evsel__disable(pos); 383 } 384 385 evlist->enabled = false; 386 } 387 388 void perf_evlist__enable(struct perf_evlist *evlist) 389 { 390 struct perf_evsel *pos; 391 392 evlist__for_each_entry(evlist, pos) { 393 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 394 continue; 395 perf_evsel__enable(pos); 396 } 397 398 evlist->enabled = true; 399 } 400 401 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 402 { 403 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 404 } 405 406 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 407 struct perf_evsel *evsel, int cpu) 408 { 409 int thread; 410 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 411 412 if (!evsel->fd) 413 return -EINVAL; 414 415 for (thread = 0; thread < nr_threads; thread++) { 416 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 417 if (err) 418 return err; 419 } 420 return 0; 421 } 422 423 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 424 struct perf_evsel *evsel, 425 int thread) 426 { 427 int cpu; 428 int nr_cpus = cpu_map__nr(evlist->cpus); 429 430 if (!evsel->fd) 431 return -EINVAL; 432 433 for (cpu = 0; cpu < nr_cpus; cpu++) { 434 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 435 if (err) 436 return err; 437 } 438 return 0; 439 } 440 441 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 442 struct perf_evsel *evsel, int idx) 443 { 444 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 445 446 if (per_cpu_mmaps) 447 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 448 else 449 return perf_evlist__enable_event_thread(evlist, evsel, idx); 450 } 451 452 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 453 { 454 int nr_cpus = cpu_map__nr(evlist->cpus); 455 int nr_threads = thread_map__nr(evlist->threads); 456 int nfds = 0; 457 struct perf_evsel *evsel; 458 459 evlist__for_each_entry(evlist, evsel) { 460 if (evsel->system_wide) 461 nfds += nr_cpus; 462 else 463 nfds += nr_cpus * nr_threads; 464 } 465 466 if (fdarray__available_entries(&evlist->pollfd) < nfds && 467 fdarray__grow(&evlist->pollfd, nfds) < 0) 468 return -ENOMEM; 469 470 return 0; 471 } 472 473 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 474 struct perf_mmap *map, short revent) 475 { 476 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 477 /* 478 * Save the idx so that when we filter out fds POLLHUP'ed we can 479 * close the associated evlist->mmap[] entry. 480 */ 481 if (pos >= 0) { 482 evlist->pollfd.priv[pos].ptr = map; 483 484 fcntl(fd, F_SETFL, O_NONBLOCK); 485 } 486 487 return pos; 488 } 489 490 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 491 { 492 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 493 } 494 495 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 496 void *arg __maybe_unused) 497 { 498 struct perf_mmap *map = fda->priv[fd].ptr; 499 500 if (map) 501 perf_mmap__put(map); 502 } 503 504 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 505 { 506 return fdarray__filter(&evlist->pollfd, revents_and_mask, 507 perf_evlist__munmap_filtered, NULL); 508 } 509 510 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 511 { 512 return fdarray__poll(&evlist->pollfd, timeout); 513 } 514 515 static void perf_evlist__id_hash(struct perf_evlist *evlist, 516 struct perf_evsel *evsel, 517 int cpu, int thread, u64 id) 518 { 519 int hash; 520 struct perf_sample_id *sid = SID(evsel, cpu, thread); 521 522 sid->id = id; 523 sid->evsel = evsel; 524 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 525 hlist_add_head(&sid->node, &evlist->heads[hash]); 526 } 527 528 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 529 int cpu, int thread, u64 id) 530 { 531 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 532 evsel->id[evsel->ids++] = id; 533 } 534 535 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 536 struct perf_evsel *evsel, 537 int cpu, int thread, int fd) 538 { 539 u64 read_data[4] = { 0, }; 540 int id_idx = 1; /* The first entry is the counter value */ 541 u64 id; 542 int ret; 543 544 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 545 if (!ret) 546 goto add; 547 548 if (errno != ENOTTY) 549 return -1; 550 551 /* Legacy way to get event id.. All hail to old kernels! */ 552 553 /* 554 * This way does not work with group format read, so bail 555 * out in that case. 556 */ 557 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 558 return -1; 559 560 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 561 read(fd, &read_data, sizeof(read_data)) == -1) 562 return -1; 563 564 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 565 ++id_idx; 566 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 567 ++id_idx; 568 569 id = read_data[id_idx]; 570 571 add: 572 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 573 return 0; 574 } 575 576 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 577 struct perf_evsel *evsel, int idx, int cpu, 578 int thread) 579 { 580 struct perf_sample_id *sid = SID(evsel, cpu, thread); 581 sid->idx = idx; 582 if (evlist->cpus && cpu >= 0) 583 sid->cpu = evlist->cpus->map[cpu]; 584 else 585 sid->cpu = -1; 586 if (!evsel->system_wide && evlist->threads && thread >= 0) 587 sid->tid = thread_map__pid(evlist->threads, thread); 588 else 589 sid->tid = -1; 590 } 591 592 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 593 { 594 struct hlist_head *head; 595 struct perf_sample_id *sid; 596 int hash; 597 598 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 599 head = &evlist->heads[hash]; 600 601 hlist_for_each_entry(sid, head, node) 602 if (sid->id == id) 603 return sid; 604 605 return NULL; 606 } 607 608 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 609 { 610 struct perf_sample_id *sid; 611 612 if (evlist->nr_entries == 1 || !id) 613 return perf_evlist__first(evlist); 614 615 sid = perf_evlist__id2sid(evlist, id); 616 if (sid) 617 return sid->evsel; 618 619 if (!perf_evlist__sample_id_all(evlist)) 620 return perf_evlist__first(evlist); 621 622 return NULL; 623 } 624 625 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 626 u64 id) 627 { 628 struct perf_sample_id *sid; 629 630 if (!id) 631 return NULL; 632 633 sid = perf_evlist__id2sid(evlist, id); 634 if (sid) 635 return sid->evsel; 636 637 return NULL; 638 } 639 640 static int perf_evlist__event2id(struct perf_evlist *evlist, 641 union perf_event *event, u64 *id) 642 { 643 const u64 *array = event->sample.array; 644 ssize_t n; 645 646 n = (event->header.size - sizeof(event->header)) >> 3; 647 648 if (event->header.type == PERF_RECORD_SAMPLE) { 649 if (evlist->id_pos >= n) 650 return -1; 651 *id = array[evlist->id_pos]; 652 } else { 653 if (evlist->is_pos > n) 654 return -1; 655 n -= evlist->is_pos; 656 *id = array[n]; 657 } 658 return 0; 659 } 660 661 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 662 union perf_event *event) 663 { 664 struct perf_evsel *first = perf_evlist__first(evlist); 665 struct hlist_head *head; 666 struct perf_sample_id *sid; 667 int hash; 668 u64 id; 669 670 if (evlist->nr_entries == 1) 671 return first; 672 673 if (!first->attr.sample_id_all && 674 event->header.type != PERF_RECORD_SAMPLE) 675 return first; 676 677 if (perf_evlist__event2id(evlist, event, &id)) 678 return NULL; 679 680 /* Synthesized events have an id of zero */ 681 if (!id) 682 return first; 683 684 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 685 head = &evlist->heads[hash]; 686 687 hlist_for_each_entry(sid, head, node) { 688 if (sid->id == id) 689 return sid->evsel; 690 } 691 return NULL; 692 } 693 694 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 695 { 696 int i; 697 698 if (!evlist->overwrite_mmap) 699 return 0; 700 701 for (i = 0; i < evlist->nr_mmaps; i++) { 702 int fd = evlist->overwrite_mmap[i].fd; 703 int err; 704 705 if (fd < 0) 706 continue; 707 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 708 if (err) 709 return err; 710 } 711 return 0; 712 } 713 714 static int perf_evlist__pause(struct perf_evlist *evlist) 715 { 716 return perf_evlist__set_paused(evlist, true); 717 } 718 719 static int perf_evlist__resume(struct perf_evlist *evlist) 720 { 721 return perf_evlist__set_paused(evlist, false); 722 } 723 724 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 725 { 726 int i; 727 728 if (evlist->mmap) 729 for (i = 0; i < evlist->nr_mmaps; i++) 730 perf_mmap__munmap(&evlist->mmap[i]); 731 732 if (evlist->overwrite_mmap) 733 for (i = 0; i < evlist->nr_mmaps; i++) 734 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 735 } 736 737 void perf_evlist__munmap(struct perf_evlist *evlist) 738 { 739 perf_evlist__munmap_nofree(evlist); 740 zfree(&evlist->mmap); 741 zfree(&evlist->overwrite_mmap); 742 } 743 744 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist, 745 bool overwrite) 746 { 747 int i; 748 struct perf_mmap *map; 749 750 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 751 if (cpu_map__empty(evlist->cpus)) 752 evlist->nr_mmaps = thread_map__nr(evlist->threads); 753 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 754 if (!map) 755 return NULL; 756 757 for (i = 0; i < evlist->nr_mmaps; i++) { 758 map[i].fd = -1; 759 map[i].overwrite = overwrite; 760 /* 761 * When the perf_mmap() call is made we grab one refcount, plus 762 * one extra to let perf_mmap__consume() get the last 763 * events after all real references (perf_mmap__get()) are 764 * dropped. 765 * 766 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 767 * thus does perf_mmap__get() on it. 768 */ 769 refcount_set(&map[i].refcnt, 0); 770 } 771 return map; 772 } 773 774 static bool 775 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 776 struct perf_evsel *evsel) 777 { 778 if (evsel->attr.write_backward) 779 return false; 780 return true; 781 } 782 783 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 784 struct mmap_params *mp, int cpu_idx, 785 int thread, int *_output, int *_output_overwrite) 786 { 787 struct perf_evsel *evsel; 788 int revent; 789 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 790 791 evlist__for_each_entry(evlist, evsel) { 792 struct perf_mmap *maps = evlist->mmap; 793 int *output = _output; 794 int fd; 795 int cpu; 796 797 mp->prot = PROT_READ | PROT_WRITE; 798 if (evsel->attr.write_backward) { 799 output = _output_overwrite; 800 maps = evlist->overwrite_mmap; 801 802 if (!maps) { 803 maps = perf_evlist__alloc_mmap(evlist, true); 804 if (!maps) 805 return -1; 806 evlist->overwrite_mmap = maps; 807 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 808 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 809 } 810 mp->prot &= ~PROT_WRITE; 811 } 812 813 if (evsel->system_wide && thread) 814 continue; 815 816 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 817 if (cpu == -1) 818 continue; 819 820 fd = FD(evsel, cpu, thread); 821 822 if (*output == -1) { 823 *output = fd; 824 825 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 826 return -1; 827 } else { 828 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 829 return -1; 830 831 perf_mmap__get(&maps[idx]); 832 } 833 834 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 835 836 /* 837 * The system_wide flag causes a selected event to be opened 838 * always without a pid. Consequently it will never get a 839 * POLLHUP, but it is used for tracking in combination with 840 * other events, so it should not need to be polled anyway. 841 * Therefore don't add it for polling. 842 */ 843 if (!evsel->system_wide && 844 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 845 perf_mmap__put(&maps[idx]); 846 return -1; 847 } 848 849 if (evsel->attr.read_format & PERF_FORMAT_ID) { 850 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 851 fd) < 0) 852 return -1; 853 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 854 thread); 855 } 856 } 857 858 return 0; 859 } 860 861 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 862 struct mmap_params *mp) 863 { 864 int cpu, thread; 865 int nr_cpus = cpu_map__nr(evlist->cpus); 866 int nr_threads = thread_map__nr(evlist->threads); 867 868 pr_debug2("perf event ring buffer mmapped per cpu\n"); 869 for (cpu = 0; cpu < nr_cpus; cpu++) { 870 int output = -1; 871 int output_overwrite = -1; 872 873 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 874 true); 875 876 for (thread = 0; thread < nr_threads; thread++) { 877 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 878 thread, &output, &output_overwrite)) 879 goto out_unmap; 880 } 881 } 882 883 return 0; 884 885 out_unmap: 886 perf_evlist__munmap_nofree(evlist); 887 return -1; 888 } 889 890 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 891 struct mmap_params *mp) 892 { 893 int thread; 894 int nr_threads = thread_map__nr(evlist->threads); 895 896 pr_debug2("perf event ring buffer mmapped per thread\n"); 897 for (thread = 0; thread < nr_threads; thread++) { 898 int output = -1; 899 int output_overwrite = -1; 900 901 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 902 false); 903 904 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 905 &output, &output_overwrite)) 906 goto out_unmap; 907 } 908 909 return 0; 910 911 out_unmap: 912 perf_evlist__munmap_nofree(evlist); 913 return -1; 914 } 915 916 unsigned long perf_event_mlock_kb_in_pages(void) 917 { 918 unsigned long pages; 919 int max; 920 921 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 922 /* 923 * Pick a once upon a time good value, i.e. things look 924 * strange since we can't read a sysctl value, but lets not 925 * die yet... 926 */ 927 max = 512; 928 } else { 929 max -= (page_size / 1024); 930 } 931 932 pages = (max * 1024) / page_size; 933 if (!is_power_of_2(pages)) 934 pages = rounddown_pow_of_two(pages); 935 936 return pages; 937 } 938 939 size_t perf_evlist__mmap_size(unsigned long pages) 940 { 941 if (pages == UINT_MAX) 942 pages = perf_event_mlock_kb_in_pages(); 943 else if (!is_power_of_2(pages)) 944 return 0; 945 946 return (pages + 1) * page_size; 947 } 948 949 static long parse_pages_arg(const char *str, unsigned long min, 950 unsigned long max) 951 { 952 unsigned long pages, val; 953 static struct parse_tag tags[] = { 954 { .tag = 'B', .mult = 1 }, 955 { .tag = 'K', .mult = 1 << 10 }, 956 { .tag = 'M', .mult = 1 << 20 }, 957 { .tag = 'G', .mult = 1 << 30 }, 958 { .tag = 0 }, 959 }; 960 961 if (str == NULL) 962 return -EINVAL; 963 964 val = parse_tag_value(str, tags); 965 if (val != (unsigned long) -1) { 966 /* we got file size value */ 967 pages = PERF_ALIGN(val, page_size) / page_size; 968 } else { 969 /* we got pages count value */ 970 char *eptr; 971 pages = strtoul(str, &eptr, 10); 972 if (*eptr != '\0') 973 return -EINVAL; 974 } 975 976 if (pages == 0 && min == 0) { 977 /* leave number of pages at 0 */ 978 } else if (!is_power_of_2(pages)) { 979 char buf[100]; 980 981 /* round pages up to next power of 2 */ 982 pages = roundup_pow_of_two(pages); 983 if (!pages) 984 return -EINVAL; 985 986 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 987 pr_info("rounding mmap pages size to %s (%lu pages)\n", 988 buf, pages); 989 } 990 991 if (pages > max) 992 return -EINVAL; 993 994 return pages; 995 } 996 997 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 998 { 999 unsigned long max = UINT_MAX; 1000 long pages; 1001 1002 if (max > SIZE_MAX / page_size) 1003 max = SIZE_MAX / page_size; 1004 1005 pages = parse_pages_arg(str, 1, max); 1006 if (pages < 0) { 1007 pr_err("Invalid argument for --mmap_pages/-m\n"); 1008 return -1; 1009 } 1010 1011 *mmap_pages = pages; 1012 return 0; 1013 } 1014 1015 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1016 int unset __maybe_unused) 1017 { 1018 return __perf_evlist__parse_mmap_pages(opt->value, str); 1019 } 1020 1021 /** 1022 * perf_evlist__mmap_ex - Create mmaps to receive events. 1023 * @evlist: list of events 1024 * @pages: map length in pages 1025 * @overwrite: overwrite older events? 1026 * @auxtrace_pages - auxtrace map length in pages 1027 * @auxtrace_overwrite - overwrite older auxtrace data? 1028 * 1029 * If @overwrite is %false the user needs to signal event consumption using 1030 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1031 * automatically. 1032 * 1033 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1034 * consumption using auxtrace_mmap__write_tail(). 1035 * 1036 * Return: %0 on success, negative error code otherwise. 1037 */ 1038 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1039 unsigned int auxtrace_pages, 1040 bool auxtrace_overwrite, int nr_cblocks, int affinity) 1041 { 1042 struct perf_evsel *evsel; 1043 const struct cpu_map *cpus = evlist->cpus; 1044 const struct thread_map *threads = evlist->threads; 1045 /* 1046 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1047 * Its value is decided by evsel's write_backward. 1048 * So &mp should not be passed through const pointer. 1049 */ 1050 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; 1051 1052 if (!evlist->mmap) 1053 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1054 if (!evlist->mmap) 1055 return -ENOMEM; 1056 1057 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1058 return -ENOMEM; 1059 1060 evlist->mmap_len = perf_evlist__mmap_size(pages); 1061 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1062 mp.mask = evlist->mmap_len - page_size - 1; 1063 1064 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1065 auxtrace_pages, auxtrace_overwrite); 1066 1067 evlist__for_each_entry(evlist, evsel) { 1068 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1069 evsel->sample_id == NULL && 1070 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1071 return -ENOMEM; 1072 } 1073 1074 if (cpu_map__empty(cpus)) 1075 return perf_evlist__mmap_per_thread(evlist, &mp); 1076 1077 return perf_evlist__mmap_per_cpu(evlist, &mp); 1078 } 1079 1080 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1081 { 1082 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); 1083 } 1084 1085 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1086 { 1087 bool all_threads = (target->per_thread && target->system_wide); 1088 struct cpu_map *cpus; 1089 struct thread_map *threads; 1090 1091 /* 1092 * If specify '-a' and '--per-thread' to perf record, perf record 1093 * will override '--per-thread'. target->per_thread = false and 1094 * target->system_wide = true. 1095 * 1096 * If specify '--per-thread' only to perf record, 1097 * target->per_thread = true and target->system_wide = false. 1098 * 1099 * So target->per_thread && target->system_wide is false. 1100 * For perf record, thread_map__new_str doesn't call 1101 * thread_map__new_all_cpus. That will keep perf record's 1102 * current behavior. 1103 * 1104 * For perf stat, it allows the case that target->per_thread and 1105 * target->system_wide are all true. It means to collect system-wide 1106 * per-thread data. thread_map__new_str will call 1107 * thread_map__new_all_cpus to enumerate all threads. 1108 */ 1109 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1110 all_threads); 1111 1112 if (!threads) 1113 return -1; 1114 1115 if (target__uses_dummy_map(target)) 1116 cpus = cpu_map__dummy_new(); 1117 else 1118 cpus = cpu_map__new(target->cpu_list); 1119 1120 if (!cpus) 1121 goto out_delete_threads; 1122 1123 evlist->has_user_cpus = !!target->cpu_list; 1124 1125 perf_evlist__set_maps(evlist, cpus, threads); 1126 1127 return 0; 1128 1129 out_delete_threads: 1130 thread_map__put(threads); 1131 return -1; 1132 } 1133 1134 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1135 struct thread_map *threads) 1136 { 1137 /* 1138 * Allow for the possibility that one or another of the maps isn't being 1139 * changed i.e. don't put it. Note we are assuming the maps that are 1140 * being applied are brand new and evlist is taking ownership of the 1141 * original reference count of 1. If that is not the case it is up to 1142 * the caller to increase the reference count. 1143 */ 1144 if (cpus != evlist->cpus) { 1145 cpu_map__put(evlist->cpus); 1146 evlist->cpus = cpu_map__get(cpus); 1147 } 1148 1149 if (threads != evlist->threads) { 1150 thread_map__put(evlist->threads); 1151 evlist->threads = thread_map__get(threads); 1152 } 1153 1154 perf_evlist__propagate_maps(evlist); 1155 } 1156 1157 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1158 enum perf_event_sample_format bit) 1159 { 1160 struct perf_evsel *evsel; 1161 1162 evlist__for_each_entry(evlist, evsel) 1163 __perf_evsel__set_sample_bit(evsel, bit); 1164 } 1165 1166 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1167 enum perf_event_sample_format bit) 1168 { 1169 struct perf_evsel *evsel; 1170 1171 evlist__for_each_entry(evlist, evsel) 1172 __perf_evsel__reset_sample_bit(evsel, bit); 1173 } 1174 1175 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1176 { 1177 struct perf_evsel *evsel; 1178 int err = 0; 1179 1180 evlist__for_each_entry(evlist, evsel) { 1181 if (evsel->filter == NULL) 1182 continue; 1183 1184 /* 1185 * filters only work for tracepoint event, which doesn't have cpu limit. 1186 * So evlist and evsel should always be same. 1187 */ 1188 err = perf_evsel__apply_filter(evsel, evsel->filter); 1189 if (err) { 1190 *err_evsel = evsel; 1191 break; 1192 } 1193 } 1194 1195 return err; 1196 } 1197 1198 int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter) 1199 { 1200 struct perf_evsel *evsel; 1201 int err = 0; 1202 1203 evlist__for_each_entry(evlist, evsel) { 1204 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1205 continue; 1206 1207 err = perf_evsel__set_filter(evsel, filter); 1208 if (err) 1209 break; 1210 } 1211 1212 return err; 1213 } 1214 1215 int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1216 { 1217 char *filter; 1218 int ret = -1; 1219 size_t i; 1220 1221 for (i = 0; i < npids; ++i) { 1222 if (i == 0) { 1223 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1224 return -1; 1225 } else { 1226 char *tmp; 1227 1228 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1229 goto out_free; 1230 1231 free(filter); 1232 filter = tmp; 1233 } 1234 } 1235 1236 ret = perf_evlist__set_tp_filter(evlist, filter); 1237 out_free: 1238 free(filter); 1239 return ret; 1240 } 1241 1242 int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid) 1243 { 1244 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1245 } 1246 1247 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1248 { 1249 struct perf_evsel *pos; 1250 1251 if (evlist->nr_entries == 1) 1252 return true; 1253 1254 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1255 return false; 1256 1257 evlist__for_each_entry(evlist, pos) { 1258 if (pos->id_pos != evlist->id_pos || 1259 pos->is_pos != evlist->is_pos) 1260 return false; 1261 } 1262 1263 return true; 1264 } 1265 1266 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1267 { 1268 struct perf_evsel *evsel; 1269 1270 if (evlist->combined_sample_type) 1271 return evlist->combined_sample_type; 1272 1273 evlist__for_each_entry(evlist, evsel) 1274 evlist->combined_sample_type |= evsel->attr.sample_type; 1275 1276 return evlist->combined_sample_type; 1277 } 1278 1279 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1280 { 1281 evlist->combined_sample_type = 0; 1282 return __perf_evlist__combined_sample_type(evlist); 1283 } 1284 1285 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1286 { 1287 struct perf_evsel *evsel; 1288 u64 branch_type = 0; 1289 1290 evlist__for_each_entry(evlist, evsel) 1291 branch_type |= evsel->attr.branch_sample_type; 1292 return branch_type; 1293 } 1294 1295 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1296 { 1297 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1298 u64 read_format = first->attr.read_format; 1299 u64 sample_type = first->attr.sample_type; 1300 1301 evlist__for_each_entry(evlist, pos) { 1302 if (read_format != pos->attr.read_format) 1303 return false; 1304 } 1305 1306 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1307 if ((sample_type & PERF_SAMPLE_READ) && 1308 !(read_format & PERF_FORMAT_ID)) { 1309 return false; 1310 } 1311 1312 return true; 1313 } 1314 1315 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1316 { 1317 struct perf_evsel *first = perf_evlist__first(evlist); 1318 return first->attr.read_format; 1319 } 1320 1321 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1322 { 1323 struct perf_evsel *first = perf_evlist__first(evlist); 1324 struct perf_sample *data; 1325 u64 sample_type; 1326 u16 size = 0; 1327 1328 if (!first->attr.sample_id_all) 1329 goto out; 1330 1331 sample_type = first->attr.sample_type; 1332 1333 if (sample_type & PERF_SAMPLE_TID) 1334 size += sizeof(data->tid) * 2; 1335 1336 if (sample_type & PERF_SAMPLE_TIME) 1337 size += sizeof(data->time); 1338 1339 if (sample_type & PERF_SAMPLE_ID) 1340 size += sizeof(data->id); 1341 1342 if (sample_type & PERF_SAMPLE_STREAM_ID) 1343 size += sizeof(data->stream_id); 1344 1345 if (sample_type & PERF_SAMPLE_CPU) 1346 size += sizeof(data->cpu) * 2; 1347 1348 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1349 size += sizeof(data->id); 1350 out: 1351 return size; 1352 } 1353 1354 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1355 { 1356 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1357 1358 evlist__for_each_entry_continue(evlist, pos) { 1359 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1360 return false; 1361 } 1362 1363 return true; 1364 } 1365 1366 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1367 { 1368 struct perf_evsel *first = perf_evlist__first(evlist); 1369 return first->attr.sample_id_all; 1370 } 1371 1372 void perf_evlist__set_selected(struct perf_evlist *evlist, 1373 struct perf_evsel *evsel) 1374 { 1375 evlist->selected = evsel; 1376 } 1377 1378 void perf_evlist__close(struct perf_evlist *evlist) 1379 { 1380 struct perf_evsel *evsel; 1381 1382 evlist__for_each_entry_reverse(evlist, evsel) 1383 perf_evsel__close(evsel); 1384 } 1385 1386 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1387 { 1388 struct cpu_map *cpus; 1389 struct thread_map *threads; 1390 int err = -ENOMEM; 1391 1392 /* 1393 * Try reading /sys/devices/system/cpu/online to get 1394 * an all cpus map. 1395 * 1396 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1397 * code needs an overhaul to properly forward the 1398 * error, and we may not want to do that fallback to a 1399 * default cpu identity map :-\ 1400 */ 1401 cpus = cpu_map__new(NULL); 1402 if (!cpus) 1403 goto out; 1404 1405 threads = thread_map__new_dummy(); 1406 if (!threads) 1407 goto out_put; 1408 1409 perf_evlist__set_maps(evlist, cpus, threads); 1410 out: 1411 return err; 1412 out_put: 1413 cpu_map__put(cpus); 1414 goto out; 1415 } 1416 1417 int perf_evlist__open(struct perf_evlist *evlist) 1418 { 1419 struct perf_evsel *evsel; 1420 int err; 1421 1422 /* 1423 * Default: one fd per CPU, all threads, aka systemwide 1424 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1425 */ 1426 if (evlist->threads == NULL && evlist->cpus == NULL) { 1427 err = perf_evlist__create_syswide_maps(evlist); 1428 if (err < 0) 1429 goto out_err; 1430 } 1431 1432 perf_evlist__update_id_pos(evlist); 1433 1434 evlist__for_each_entry(evlist, evsel) { 1435 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1436 if (err < 0) 1437 goto out_err; 1438 } 1439 1440 return 0; 1441 out_err: 1442 perf_evlist__close(evlist); 1443 errno = -err; 1444 return err; 1445 } 1446 1447 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1448 const char *argv[], bool pipe_output, 1449 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1450 { 1451 int child_ready_pipe[2], go_pipe[2]; 1452 char bf; 1453 1454 if (pipe(child_ready_pipe) < 0) { 1455 perror("failed to create 'ready' pipe"); 1456 return -1; 1457 } 1458 1459 if (pipe(go_pipe) < 0) { 1460 perror("failed to create 'go' pipe"); 1461 goto out_close_ready_pipe; 1462 } 1463 1464 evlist->workload.pid = fork(); 1465 if (evlist->workload.pid < 0) { 1466 perror("failed to fork"); 1467 goto out_close_pipes; 1468 } 1469 1470 if (!evlist->workload.pid) { 1471 int ret; 1472 1473 if (pipe_output) 1474 dup2(2, 1); 1475 1476 signal(SIGTERM, SIG_DFL); 1477 1478 close(child_ready_pipe[0]); 1479 close(go_pipe[1]); 1480 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1481 1482 /* 1483 * Tell the parent we're ready to go 1484 */ 1485 close(child_ready_pipe[1]); 1486 1487 /* 1488 * Wait until the parent tells us to go. 1489 */ 1490 ret = read(go_pipe[0], &bf, 1); 1491 /* 1492 * The parent will ask for the execvp() to be performed by 1493 * writing exactly one byte, in workload.cork_fd, usually via 1494 * perf_evlist__start_workload(). 1495 * 1496 * For cancelling the workload without actually running it, 1497 * the parent will just close workload.cork_fd, without writing 1498 * anything, i.e. read will return zero and we just exit() 1499 * here. 1500 */ 1501 if (ret != 1) { 1502 if (ret == -1) 1503 perror("unable to read pipe"); 1504 exit(ret); 1505 } 1506 1507 execvp(argv[0], (char **)argv); 1508 1509 if (exec_error) { 1510 union sigval val; 1511 1512 val.sival_int = errno; 1513 if (sigqueue(getppid(), SIGUSR1, val)) 1514 perror(argv[0]); 1515 } else 1516 perror(argv[0]); 1517 exit(-1); 1518 } 1519 1520 if (exec_error) { 1521 struct sigaction act = { 1522 .sa_flags = SA_SIGINFO, 1523 .sa_sigaction = exec_error, 1524 }; 1525 sigaction(SIGUSR1, &act, NULL); 1526 } 1527 1528 if (target__none(target)) { 1529 if (evlist->threads == NULL) { 1530 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1531 __func__, __LINE__); 1532 goto out_close_pipes; 1533 } 1534 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1535 } 1536 1537 close(child_ready_pipe[1]); 1538 close(go_pipe[0]); 1539 /* 1540 * wait for child to settle 1541 */ 1542 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1543 perror("unable to read pipe"); 1544 goto out_close_pipes; 1545 } 1546 1547 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1548 evlist->workload.cork_fd = go_pipe[1]; 1549 close(child_ready_pipe[0]); 1550 return 0; 1551 1552 out_close_pipes: 1553 close(go_pipe[0]); 1554 close(go_pipe[1]); 1555 out_close_ready_pipe: 1556 close(child_ready_pipe[0]); 1557 close(child_ready_pipe[1]); 1558 return -1; 1559 } 1560 1561 int perf_evlist__start_workload(struct perf_evlist *evlist) 1562 { 1563 if (evlist->workload.cork_fd > 0) { 1564 char bf = 0; 1565 int ret; 1566 /* 1567 * Remove the cork, let it rip! 1568 */ 1569 ret = write(evlist->workload.cork_fd, &bf, 1); 1570 if (ret < 0) 1571 perror("unable to write to pipe"); 1572 1573 close(evlist->workload.cork_fd); 1574 return ret; 1575 } 1576 1577 return 0; 1578 } 1579 1580 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1581 struct perf_sample *sample) 1582 { 1583 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1584 1585 if (!evsel) 1586 return -EFAULT; 1587 return perf_evsel__parse_sample(evsel, event, sample); 1588 } 1589 1590 int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, 1591 union perf_event *event, 1592 u64 *timestamp) 1593 { 1594 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1595 1596 if (!evsel) 1597 return -EFAULT; 1598 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1599 } 1600 1601 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1602 { 1603 struct perf_evsel *evsel; 1604 size_t printed = 0; 1605 1606 evlist__for_each_entry(evlist, evsel) { 1607 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1608 perf_evsel__name(evsel)); 1609 } 1610 1611 return printed + fprintf(fp, "\n"); 1612 } 1613 1614 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1615 int err, char *buf, size_t size) 1616 { 1617 int printed, value; 1618 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1619 1620 switch (err) { 1621 case EACCES: 1622 case EPERM: 1623 printed = scnprintf(buf, size, 1624 "Error:\t%s.\n" 1625 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1626 1627 value = perf_event_paranoid(); 1628 1629 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1630 1631 if (value >= 2) { 1632 printed += scnprintf(buf + printed, size - printed, 1633 "For your workloads it needs to be <= 1\nHint:\t"); 1634 } 1635 printed += scnprintf(buf + printed, size - printed, 1636 "For system wide tracing it needs to be set to -1.\n"); 1637 1638 printed += scnprintf(buf + printed, size - printed, 1639 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1640 "Hint:\tThe current value is %d.", value); 1641 break; 1642 case EINVAL: { 1643 struct perf_evsel *first = perf_evlist__first(evlist); 1644 int max_freq; 1645 1646 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1647 goto out_default; 1648 1649 if (first->attr.sample_freq < (u64)max_freq) 1650 goto out_default; 1651 1652 printed = scnprintf(buf, size, 1653 "Error:\t%s.\n" 1654 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1655 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1656 emsg, max_freq, first->attr.sample_freq); 1657 break; 1658 } 1659 default: 1660 out_default: 1661 scnprintf(buf, size, "%s", emsg); 1662 break; 1663 } 1664 1665 return 0; 1666 } 1667 1668 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1669 { 1670 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1671 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1672 1673 switch (err) { 1674 case EPERM: 1675 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1676 printed += scnprintf(buf + printed, size - printed, 1677 "Error:\t%s.\n" 1678 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1679 "Hint:\tTried using %zd kB.\n", 1680 emsg, pages_max_per_user, pages_attempted); 1681 1682 if (pages_attempted >= pages_max_per_user) { 1683 printed += scnprintf(buf + printed, size - printed, 1684 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1685 pages_max_per_user + pages_attempted); 1686 } 1687 1688 printed += scnprintf(buf + printed, size - printed, 1689 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1690 break; 1691 default: 1692 scnprintf(buf, size, "%s", emsg); 1693 break; 1694 } 1695 1696 return 0; 1697 } 1698 1699 void perf_evlist__to_front(struct perf_evlist *evlist, 1700 struct perf_evsel *move_evsel) 1701 { 1702 struct perf_evsel *evsel, *n; 1703 LIST_HEAD(move); 1704 1705 if (move_evsel == perf_evlist__first(evlist)) 1706 return; 1707 1708 evlist__for_each_entry_safe(evlist, n, evsel) { 1709 if (evsel->leader == move_evsel->leader) 1710 list_move_tail(&evsel->node, &move); 1711 } 1712 1713 list_splice(&move, &evlist->entries); 1714 } 1715 1716 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1717 struct perf_evsel *tracking_evsel) 1718 { 1719 struct perf_evsel *evsel; 1720 1721 if (tracking_evsel->tracking) 1722 return; 1723 1724 evlist__for_each_entry(evlist, evsel) { 1725 if (evsel != tracking_evsel) 1726 evsel->tracking = false; 1727 } 1728 1729 tracking_evsel->tracking = true; 1730 } 1731 1732 struct perf_evsel * 1733 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1734 const char *str) 1735 { 1736 struct perf_evsel *evsel; 1737 1738 evlist__for_each_entry(evlist, evsel) { 1739 if (!evsel->name) 1740 continue; 1741 if (strcmp(str, evsel->name) == 0) 1742 return evsel; 1743 } 1744 1745 return NULL; 1746 } 1747 1748 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1749 enum bkw_mmap_state state) 1750 { 1751 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1752 enum action { 1753 NONE, 1754 PAUSE, 1755 RESUME, 1756 } action = NONE; 1757 1758 if (!evlist->overwrite_mmap) 1759 return; 1760 1761 switch (old_state) { 1762 case BKW_MMAP_NOTREADY: { 1763 if (state != BKW_MMAP_RUNNING) 1764 goto state_err; 1765 break; 1766 } 1767 case BKW_MMAP_RUNNING: { 1768 if (state != BKW_MMAP_DATA_PENDING) 1769 goto state_err; 1770 action = PAUSE; 1771 break; 1772 } 1773 case BKW_MMAP_DATA_PENDING: { 1774 if (state != BKW_MMAP_EMPTY) 1775 goto state_err; 1776 break; 1777 } 1778 case BKW_MMAP_EMPTY: { 1779 if (state != BKW_MMAP_RUNNING) 1780 goto state_err; 1781 action = RESUME; 1782 break; 1783 } 1784 default: 1785 WARN_ONCE(1, "Shouldn't get there\n"); 1786 } 1787 1788 evlist->bkw_mmap_state = state; 1789 1790 switch (action) { 1791 case PAUSE: 1792 perf_evlist__pause(evlist); 1793 break; 1794 case RESUME: 1795 perf_evlist__resume(evlist); 1796 break; 1797 case NONE: 1798 default: 1799 break; 1800 } 1801 1802 state_err: 1803 return; 1804 } 1805 1806 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) 1807 { 1808 struct perf_evsel *evsel; 1809 1810 evlist__for_each_entry(evlist, evsel) { 1811 if (!evsel->attr.exclude_kernel) 1812 return false; 1813 } 1814 1815 return true; 1816 } 1817 1818 /* 1819 * Events in data file are not collect in groups, but we still want 1820 * the group display. Set the artificial group and set the leader's 1821 * forced_leader flag to notify the display code. 1822 */ 1823 void perf_evlist__force_leader(struct perf_evlist *evlist) 1824 { 1825 if (!evlist->nr_groups) { 1826 struct perf_evsel *leader = perf_evlist__first(evlist); 1827 1828 perf_evlist__set_leader(evlist); 1829 leader->forced_leader = true; 1830 } 1831 } 1832 1833 struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, 1834 struct perf_evsel *evsel) 1835 { 1836 struct perf_evsel *c2, *leader; 1837 bool is_open = true; 1838 1839 leader = evsel->leader; 1840 pr_debug("Weak group for %s/%d failed\n", 1841 leader->name, leader->nr_members); 1842 1843 /* 1844 * for_each_group_member doesn't work here because it doesn't 1845 * include the first entry. 1846 */ 1847 evlist__for_each_entry(evsel_list, c2) { 1848 if (c2 == evsel) 1849 is_open = false; 1850 if (c2->leader == leader) { 1851 if (is_open) 1852 perf_evsel__close(c2); 1853 c2->leader = c2; 1854 c2->nr_members = 0; 1855 } 1856 } 1857 return leader; 1858 } 1859