1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include "asm/bug.h" 12 #include "evsel.h" 13 #include "evlist.h" 14 #include "util.h" 15 #include "cpumap.h" 16 #include "thread_map.h" 17 18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) 20 21 int __perf_evsel__sample_size(u64 sample_type) 22 { 23 u64 mask = sample_type & PERF_SAMPLE_MASK; 24 int size = 0; 25 int i; 26 27 for (i = 0; i < 64; i++) { 28 if (mask & (1ULL << i)) 29 size++; 30 } 31 32 size *= sizeof(u64); 33 34 return size; 35 } 36 37 static void hists__init(struct hists *hists) 38 { 39 memset(hists, 0, sizeof(*hists)); 40 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; 41 hists->entries_in = &hists->entries_in_array[0]; 42 hists->entries_collapsed = RB_ROOT; 43 hists->entries = RB_ROOT; 44 pthread_mutex_init(&hists->lock, NULL); 45 } 46 47 void perf_evsel__init(struct perf_evsel *evsel, 48 struct perf_event_attr *attr, int idx) 49 { 50 evsel->idx = idx; 51 evsel->attr = *attr; 52 INIT_LIST_HEAD(&evsel->node); 53 hists__init(&evsel->hists); 54 } 55 56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 57 { 58 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 59 60 if (evsel != NULL) 61 perf_evsel__init(evsel, attr, idx); 62 63 return evsel; 64 } 65 66 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) 67 { 68 struct perf_event_attr *attr = &evsel->attr; 69 int track = !evsel->idx; /* only the first counter needs these */ 70 71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 72 attr->inherit = !opts->no_inherit; 73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 74 PERF_FORMAT_TOTAL_TIME_RUNNING | 75 PERF_FORMAT_ID; 76 77 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 78 79 /* 80 * We default some events to a 1 default interval. But keep 81 * it a weak assumption overridable by the user. 82 */ 83 if (!attr->sample_period || (opts->user_freq != UINT_MAX && 84 opts->user_interval != ULLONG_MAX)) { 85 if (opts->freq) { 86 attr->sample_type |= PERF_SAMPLE_PERIOD; 87 attr->freq = 1; 88 attr->sample_freq = opts->freq; 89 } else { 90 attr->sample_period = opts->default_interval; 91 } 92 } 93 94 if (opts->no_samples) 95 attr->sample_freq = 0; 96 97 if (opts->inherit_stat) 98 attr->inherit_stat = 1; 99 100 if (opts->sample_address) { 101 attr->sample_type |= PERF_SAMPLE_ADDR; 102 attr->mmap_data = track; 103 } 104 105 if (opts->call_graph) 106 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 107 108 if (opts->system_wide) 109 attr->sample_type |= PERF_SAMPLE_CPU; 110 111 if (opts->period) 112 attr->sample_type |= PERF_SAMPLE_PERIOD; 113 114 if (opts->sample_id_all_avail && 115 (opts->sample_time || opts->system_wide || 116 !opts->no_inherit || opts->cpu_list)) 117 attr->sample_type |= PERF_SAMPLE_TIME; 118 119 if (opts->raw_samples) { 120 attr->sample_type |= PERF_SAMPLE_TIME; 121 attr->sample_type |= PERF_SAMPLE_RAW; 122 attr->sample_type |= PERF_SAMPLE_CPU; 123 } 124 125 if (opts->no_delay) { 126 attr->watermark = 0; 127 attr->wakeup_events = 1; 128 } 129 130 attr->mmap = track; 131 attr->comm = track; 132 133 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { 134 attr->disabled = 1; 135 attr->enable_on_exec = 1; 136 } 137 } 138 139 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 140 { 141 int cpu, thread; 142 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 143 144 if (evsel->fd) { 145 for (cpu = 0; cpu < ncpus; cpu++) { 146 for (thread = 0; thread < nthreads; thread++) { 147 FD(evsel, cpu, thread) = -1; 148 } 149 } 150 } 151 152 return evsel->fd != NULL ? 0 : -ENOMEM; 153 } 154 155 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 156 { 157 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 158 if (evsel->sample_id == NULL) 159 return -ENOMEM; 160 161 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 162 if (evsel->id == NULL) { 163 xyarray__delete(evsel->sample_id); 164 evsel->sample_id = NULL; 165 return -ENOMEM; 166 } 167 168 return 0; 169 } 170 171 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 172 { 173 evsel->counts = zalloc((sizeof(*evsel->counts) + 174 (ncpus * sizeof(struct perf_counts_values)))); 175 return evsel->counts != NULL ? 0 : -ENOMEM; 176 } 177 178 void perf_evsel__free_fd(struct perf_evsel *evsel) 179 { 180 xyarray__delete(evsel->fd); 181 evsel->fd = NULL; 182 } 183 184 void perf_evsel__free_id(struct perf_evsel *evsel) 185 { 186 xyarray__delete(evsel->sample_id); 187 evsel->sample_id = NULL; 188 free(evsel->id); 189 evsel->id = NULL; 190 } 191 192 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 193 { 194 int cpu, thread; 195 196 for (cpu = 0; cpu < ncpus; cpu++) 197 for (thread = 0; thread < nthreads; ++thread) { 198 close(FD(evsel, cpu, thread)); 199 FD(evsel, cpu, thread) = -1; 200 } 201 } 202 203 void perf_evsel__exit(struct perf_evsel *evsel) 204 { 205 assert(list_empty(&evsel->node)); 206 xyarray__delete(evsel->fd); 207 xyarray__delete(evsel->sample_id); 208 free(evsel->id); 209 } 210 211 void perf_evsel__delete(struct perf_evsel *evsel) 212 { 213 perf_evsel__exit(evsel); 214 close_cgroup(evsel->cgrp); 215 free(evsel->name); 216 free(evsel); 217 } 218 219 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 220 int cpu, int thread, bool scale) 221 { 222 struct perf_counts_values count; 223 size_t nv = scale ? 3 : 1; 224 225 if (FD(evsel, cpu, thread) < 0) 226 return -EINVAL; 227 228 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 229 return -ENOMEM; 230 231 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 232 return -errno; 233 234 if (scale) { 235 if (count.run == 0) 236 count.val = 0; 237 else if (count.run < count.ena) 238 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 239 } else 240 count.ena = count.run = 0; 241 242 evsel->counts->cpu[cpu] = count; 243 return 0; 244 } 245 246 int __perf_evsel__read(struct perf_evsel *evsel, 247 int ncpus, int nthreads, bool scale) 248 { 249 size_t nv = scale ? 3 : 1; 250 int cpu, thread; 251 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 252 253 aggr->val = aggr->ena = aggr->run = 0; 254 255 for (cpu = 0; cpu < ncpus; cpu++) { 256 for (thread = 0; thread < nthreads; thread++) { 257 if (FD(evsel, cpu, thread) < 0) 258 continue; 259 260 if (readn(FD(evsel, cpu, thread), 261 &count, nv * sizeof(u64)) < 0) 262 return -errno; 263 264 aggr->val += count.val; 265 if (scale) { 266 aggr->ena += count.ena; 267 aggr->run += count.run; 268 } 269 } 270 } 271 272 evsel->counts->scaled = 0; 273 if (scale) { 274 if (aggr->run == 0) { 275 evsel->counts->scaled = -1; 276 aggr->val = 0; 277 return 0; 278 } 279 280 if (aggr->run < aggr->ena) { 281 evsel->counts->scaled = 1; 282 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 283 } 284 } else 285 aggr->ena = aggr->run = 0; 286 287 return 0; 288 } 289 290 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 291 struct thread_map *threads, bool group, 292 struct xyarray *group_fds) 293 { 294 int cpu, thread; 295 unsigned long flags = 0; 296 int pid = -1, err; 297 298 if (evsel->fd == NULL && 299 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 300 return -ENOMEM; 301 302 if (evsel->cgrp) { 303 flags = PERF_FLAG_PID_CGROUP; 304 pid = evsel->cgrp->fd; 305 } 306 307 for (cpu = 0; cpu < cpus->nr; cpu++) { 308 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; 309 310 for (thread = 0; thread < threads->nr; thread++) { 311 312 if (!evsel->cgrp) 313 pid = threads->map[thread]; 314 315 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 316 pid, 317 cpus->map[cpu], 318 group_fd, flags); 319 if (FD(evsel, cpu, thread) < 0) { 320 err = -errno; 321 goto out_close; 322 } 323 324 if (group && group_fd == -1) 325 group_fd = FD(evsel, cpu, thread); 326 } 327 } 328 329 return 0; 330 331 out_close: 332 do { 333 while (--thread >= 0) { 334 close(FD(evsel, cpu, thread)); 335 FD(evsel, cpu, thread) = -1; 336 } 337 thread = threads->nr; 338 } while (--cpu >= 0); 339 return err; 340 } 341 342 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) 343 { 344 if (evsel->fd == NULL) 345 return; 346 347 perf_evsel__close_fd(evsel, ncpus, nthreads); 348 perf_evsel__free_fd(evsel); 349 evsel->fd = NULL; 350 } 351 352 static struct { 353 struct cpu_map map; 354 int cpus[1]; 355 } empty_cpu_map = { 356 .map.nr = 1, 357 .cpus = { -1, }, 358 }; 359 360 static struct { 361 struct thread_map map; 362 int threads[1]; 363 } empty_thread_map = { 364 .map.nr = 1, 365 .threads = { -1, }, 366 }; 367 368 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 369 struct thread_map *threads, bool group, 370 struct xyarray *group_fd) 371 { 372 if (cpus == NULL) { 373 /* Work around old compiler warnings about strict aliasing */ 374 cpus = &empty_cpu_map.map; 375 } 376 377 if (threads == NULL) 378 threads = &empty_thread_map.map; 379 380 return __perf_evsel__open(evsel, cpus, threads, group, group_fd); 381 } 382 383 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 384 struct cpu_map *cpus, bool group, 385 struct xyarray *group_fd) 386 { 387 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, 388 group_fd); 389 } 390 391 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 392 struct thread_map *threads, bool group, 393 struct xyarray *group_fd) 394 { 395 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, 396 group_fd); 397 } 398 399 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 400 struct perf_sample *sample) 401 { 402 const u64 *array = event->sample.array; 403 404 array += ((event->header.size - 405 sizeof(event->header)) / sizeof(u64)) - 1; 406 407 if (type & PERF_SAMPLE_CPU) { 408 u32 *p = (u32 *)array; 409 sample->cpu = *p; 410 array--; 411 } 412 413 if (type & PERF_SAMPLE_STREAM_ID) { 414 sample->stream_id = *array; 415 array--; 416 } 417 418 if (type & PERF_SAMPLE_ID) { 419 sample->id = *array; 420 array--; 421 } 422 423 if (type & PERF_SAMPLE_TIME) { 424 sample->time = *array; 425 array--; 426 } 427 428 if (type & PERF_SAMPLE_TID) { 429 u32 *p = (u32 *)array; 430 sample->pid = p[0]; 431 sample->tid = p[1]; 432 } 433 434 return 0; 435 } 436 437 static bool sample_overlap(const union perf_event *event, 438 const void *offset, u64 size) 439 { 440 const void *base = event; 441 442 if (offset + size > base + event->header.size) 443 return true; 444 445 return false; 446 } 447 448 int perf_event__parse_sample(const union perf_event *event, u64 type, 449 int sample_size, bool sample_id_all, 450 struct perf_sample *data, bool swapped) 451 { 452 const u64 *array; 453 454 /* 455 * used for cross-endian analysis. See git commit 65014ab3 456 * for why this goofiness is needed. 457 */ 458 union { 459 u64 val64; 460 u32 val32[2]; 461 } u; 462 463 memset(data, 0, sizeof(*data)); 464 data->cpu = data->pid = data->tid = -1; 465 data->stream_id = data->id = data->time = -1ULL; 466 data->period = 1; 467 468 if (event->header.type != PERF_RECORD_SAMPLE) { 469 if (!sample_id_all) 470 return 0; 471 return perf_event__parse_id_sample(event, type, data); 472 } 473 474 array = event->sample.array; 475 476 if (sample_size + sizeof(event->header) > event->header.size) 477 return -EFAULT; 478 479 if (type & PERF_SAMPLE_IP) { 480 data->ip = event->ip.ip; 481 array++; 482 } 483 484 if (type & PERF_SAMPLE_TID) { 485 u.val64 = *array; 486 if (swapped) { 487 /* undo swap of u64, then swap on individual u32s */ 488 u.val64 = bswap_64(u.val64); 489 u.val32[0] = bswap_32(u.val32[0]); 490 u.val32[1] = bswap_32(u.val32[1]); 491 } 492 493 data->pid = u.val32[0]; 494 data->tid = u.val32[1]; 495 array++; 496 } 497 498 if (type & PERF_SAMPLE_TIME) { 499 data->time = *array; 500 array++; 501 } 502 503 data->addr = 0; 504 if (type & PERF_SAMPLE_ADDR) { 505 data->addr = *array; 506 array++; 507 } 508 509 data->id = -1ULL; 510 if (type & PERF_SAMPLE_ID) { 511 data->id = *array; 512 array++; 513 } 514 515 if (type & PERF_SAMPLE_STREAM_ID) { 516 data->stream_id = *array; 517 array++; 518 } 519 520 if (type & PERF_SAMPLE_CPU) { 521 522 u.val64 = *array; 523 if (swapped) { 524 /* undo swap of u64, then swap on individual u32s */ 525 u.val64 = bswap_64(u.val64); 526 u.val32[0] = bswap_32(u.val32[0]); 527 } 528 529 data->cpu = u.val32[0]; 530 array++; 531 } 532 533 if (type & PERF_SAMPLE_PERIOD) { 534 data->period = *array; 535 array++; 536 } 537 538 if (type & PERF_SAMPLE_READ) { 539 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 540 return -1; 541 } 542 543 if (type & PERF_SAMPLE_CALLCHAIN) { 544 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 545 return -EFAULT; 546 547 data->callchain = (struct ip_callchain *)array; 548 549 if (sample_overlap(event, array, data->callchain->nr)) 550 return -EFAULT; 551 552 array += 1 + data->callchain->nr; 553 } 554 555 if (type & PERF_SAMPLE_RAW) { 556 const u64 *pdata; 557 558 u.val64 = *array; 559 if (WARN_ONCE(swapped, 560 "Endianness of raw data not corrected!\n")) { 561 /* undo swap of u64, then swap on individual u32s */ 562 u.val64 = bswap_64(u.val64); 563 u.val32[0] = bswap_32(u.val32[0]); 564 u.val32[1] = bswap_32(u.val32[1]); 565 } 566 567 if (sample_overlap(event, array, sizeof(u32))) 568 return -EFAULT; 569 570 data->raw_size = u.val32[0]; 571 pdata = (void *) array + sizeof(u32); 572 573 if (sample_overlap(event, pdata, data->raw_size)) 574 return -EFAULT; 575 576 data->raw_data = (void *) pdata; 577 } 578 579 return 0; 580 } 581 582 int perf_event__synthesize_sample(union perf_event *event, u64 type, 583 const struct perf_sample *sample, 584 bool swapped) 585 { 586 u64 *array; 587 588 /* 589 * used for cross-endian analysis. See git commit 65014ab3 590 * for why this goofiness is needed. 591 */ 592 union { 593 u64 val64; 594 u32 val32[2]; 595 } u; 596 597 array = event->sample.array; 598 599 if (type & PERF_SAMPLE_IP) { 600 event->ip.ip = sample->ip; 601 array++; 602 } 603 604 if (type & PERF_SAMPLE_TID) { 605 u.val32[0] = sample->pid; 606 u.val32[1] = sample->tid; 607 if (swapped) { 608 /* 609 * Inverse of what is done in perf_event__parse_sample 610 */ 611 u.val32[0] = bswap_32(u.val32[0]); 612 u.val32[1] = bswap_32(u.val32[1]); 613 u.val64 = bswap_64(u.val64); 614 } 615 616 *array = u.val64; 617 array++; 618 } 619 620 if (type & PERF_SAMPLE_TIME) { 621 *array = sample->time; 622 array++; 623 } 624 625 if (type & PERF_SAMPLE_ADDR) { 626 *array = sample->addr; 627 array++; 628 } 629 630 if (type & PERF_SAMPLE_ID) { 631 *array = sample->id; 632 array++; 633 } 634 635 if (type & PERF_SAMPLE_STREAM_ID) { 636 *array = sample->stream_id; 637 array++; 638 } 639 640 if (type & PERF_SAMPLE_CPU) { 641 u.val32[0] = sample->cpu; 642 if (swapped) { 643 /* 644 * Inverse of what is done in perf_event__parse_sample 645 */ 646 u.val32[0] = bswap_32(u.val32[0]); 647 u.val64 = bswap_64(u.val64); 648 } 649 *array = u.val64; 650 array++; 651 } 652 653 if (type & PERF_SAMPLE_PERIOD) { 654 *array = sample->period; 655 array++; 656 } 657 658 return 0; 659 } 660