1 #define _FILE_OFFSET_BITS 64 2 3 #include <linux/kernel.h> 4 5 #include <byteswap.h> 6 #include <unistd.h> 7 #include <sys/types.h> 8 #include <sys/mman.h> 9 10 #include "evlist.h" 11 #include "evsel.h" 12 #include "session.h" 13 #include "tool.h" 14 #include "sort.h" 15 #include "util.h" 16 #include "cpumap.h" 17 18 static int perf_session__open(struct perf_session *self, bool force) 19 { 20 struct stat input_stat; 21 22 if (!strcmp(self->filename, "-")) { 23 self->fd_pipe = true; 24 self->fd = STDIN_FILENO; 25 26 if (perf_session__read_header(self, self->fd) < 0) 27 pr_err("incompatible file format"); 28 29 return 0; 30 } 31 32 self->fd = open(self->filename, O_RDONLY); 33 if (self->fd < 0) { 34 int err = errno; 35 36 pr_err("failed to open %s: %s", self->filename, strerror(err)); 37 if (err == ENOENT && !strcmp(self->filename, "perf.data")) 38 pr_err(" (try 'perf record' first)"); 39 pr_err("\n"); 40 return -errno; 41 } 42 43 if (fstat(self->fd, &input_stat) < 0) 44 goto out_close; 45 46 if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { 47 pr_err("file %s not owned by current user or root\n", 48 self->filename); 49 goto out_close; 50 } 51 52 if (!input_stat.st_size) { 53 pr_info("zero-sized file (%s), nothing to do!\n", 54 self->filename); 55 goto out_close; 56 } 57 58 if (perf_session__read_header(self, self->fd) < 0) { 59 pr_err("incompatible file format"); 60 goto out_close; 61 } 62 63 if (!perf_evlist__valid_sample_type(self->evlist)) { 64 pr_err("non matching sample_type"); 65 goto out_close; 66 } 67 68 if (!perf_evlist__valid_sample_id_all(self->evlist)) { 69 pr_err("non matching sample_id_all"); 70 goto out_close; 71 } 72 73 self->size = input_stat.st_size; 74 return 0; 75 76 out_close: 77 close(self->fd); 78 self->fd = -1; 79 return -1; 80 } 81 82 void perf_session__update_sample_type(struct perf_session *self) 83 { 84 self->sample_type = perf_evlist__sample_type(self->evlist); 85 self->sample_size = __perf_evsel__sample_size(self->sample_type); 86 self->sample_id_all = perf_evlist__sample_id_all(self->evlist); 87 self->id_hdr_size = perf_evlist__id_hdr_size(self->evlist); 88 self->host_machine.id_hdr_size = self->id_hdr_size; 89 } 90 91 int perf_session__create_kernel_maps(struct perf_session *self) 92 { 93 int ret = machine__create_kernel_maps(&self->host_machine); 94 95 if (ret >= 0) 96 ret = machines__create_guest_kernel_maps(&self->machines); 97 return ret; 98 } 99 100 static void perf_session__destroy_kernel_maps(struct perf_session *self) 101 { 102 machine__destroy_kernel_maps(&self->host_machine); 103 machines__destroy_guest_kernel_maps(&self->machines); 104 } 105 106 struct perf_session *perf_session__new(const char *filename, int mode, 107 bool force, bool repipe, 108 struct perf_tool *tool) 109 { 110 size_t len = filename ? strlen(filename) : 0; 111 struct perf_session *self = zalloc(sizeof(*self) + len); 112 113 if (self == NULL) 114 goto out; 115 116 memcpy(self->filename, filename, len); 117 /* 118 * On 64bit we can mmap the data file in one go. No need for tiny mmap 119 * slices. On 32bit we use 32MB. 120 */ 121 #if BITS_PER_LONG == 64 122 self->mmap_window = ULLONG_MAX; 123 #else 124 self->mmap_window = 32 * 1024 * 1024ULL; 125 #endif 126 self->machines = RB_ROOT; 127 self->repipe = repipe; 128 INIT_LIST_HEAD(&self->ordered_samples.samples); 129 INIT_LIST_HEAD(&self->ordered_samples.sample_cache); 130 INIT_LIST_HEAD(&self->ordered_samples.to_free); 131 machine__init(&self->host_machine, "", HOST_KERNEL_ID); 132 133 if (mode == O_RDONLY) { 134 if (perf_session__open(self, force) < 0) 135 goto out_delete; 136 perf_session__update_sample_type(self); 137 } else if (mode == O_WRONLY) { 138 /* 139 * In O_RDONLY mode this will be performed when reading the 140 * kernel MMAP event, in perf_event__process_mmap(). 141 */ 142 if (perf_session__create_kernel_maps(self) < 0) 143 goto out_delete; 144 } 145 146 if (tool && tool->ordering_requires_timestamps && 147 tool->ordered_samples && !self->sample_id_all) { 148 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); 149 tool->ordered_samples = false; 150 } 151 152 out: 153 return self; 154 out_delete: 155 perf_session__delete(self); 156 return NULL; 157 } 158 159 static void machine__delete_dead_threads(struct machine *machine) 160 { 161 struct thread *n, *t; 162 163 list_for_each_entry_safe(t, n, &machine->dead_threads, node) { 164 list_del(&t->node); 165 thread__delete(t); 166 } 167 } 168 169 static void perf_session__delete_dead_threads(struct perf_session *session) 170 { 171 machine__delete_dead_threads(&session->host_machine); 172 } 173 174 static void machine__delete_threads(struct machine *self) 175 { 176 struct rb_node *nd = rb_first(&self->threads); 177 178 while (nd) { 179 struct thread *t = rb_entry(nd, struct thread, rb_node); 180 181 rb_erase(&t->rb_node, &self->threads); 182 nd = rb_next(nd); 183 thread__delete(t); 184 } 185 } 186 187 static void perf_session__delete_threads(struct perf_session *session) 188 { 189 machine__delete_threads(&session->host_machine); 190 } 191 192 void perf_session__delete(struct perf_session *self) 193 { 194 perf_session__destroy_kernel_maps(self); 195 perf_session__delete_dead_threads(self); 196 perf_session__delete_threads(self); 197 machine__exit(&self->host_machine); 198 close(self->fd); 199 free(self); 200 } 201 202 void machine__remove_thread(struct machine *self, struct thread *th) 203 { 204 self->last_match = NULL; 205 rb_erase(&th->rb_node, &self->threads); 206 /* 207 * We may have references to this thread, for instance in some hist_entry 208 * instances, so just move them to a separate list. 209 */ 210 list_add_tail(&th->node, &self->dead_threads); 211 } 212 213 static bool symbol__match_parent_regex(struct symbol *sym) 214 { 215 if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) 216 return 1; 217 218 return 0; 219 } 220 221 int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, 222 struct thread *thread, 223 struct ip_callchain *chain, 224 struct symbol **parent) 225 { 226 u8 cpumode = PERF_RECORD_MISC_USER; 227 unsigned int i; 228 int err; 229 230 callchain_cursor_reset(&evsel->hists.callchain_cursor); 231 232 for (i = 0; i < chain->nr; i++) { 233 u64 ip; 234 struct addr_location al; 235 236 if (callchain_param.order == ORDER_CALLEE) 237 ip = chain->ips[i]; 238 else 239 ip = chain->ips[chain->nr - i - 1]; 240 241 if (ip >= PERF_CONTEXT_MAX) { 242 switch (ip) { 243 case PERF_CONTEXT_HV: 244 cpumode = PERF_RECORD_MISC_HYPERVISOR; break; 245 case PERF_CONTEXT_KERNEL: 246 cpumode = PERF_RECORD_MISC_KERNEL; break; 247 case PERF_CONTEXT_USER: 248 cpumode = PERF_RECORD_MISC_USER; break; 249 default: 250 break; 251 } 252 continue; 253 } 254 255 al.filtered = false; 256 thread__find_addr_location(thread, self, cpumode, 257 MAP__FUNCTION, ip, &al, NULL); 258 if (al.sym != NULL) { 259 if (sort__has_parent && !*parent && 260 symbol__match_parent_regex(al.sym)) 261 *parent = al.sym; 262 if (!symbol_conf.use_callchain) 263 break; 264 } 265 266 err = callchain_cursor_append(&evsel->hists.callchain_cursor, 267 ip, al.map, al.sym); 268 if (err) 269 return err; 270 } 271 272 return 0; 273 } 274 275 static int process_event_synth_tracing_data_stub(union perf_event *event __used, 276 struct perf_session *session __used) 277 { 278 dump_printf(": unhandled!\n"); 279 return 0; 280 } 281 282 static int process_event_synth_attr_stub(union perf_event *event __used, 283 struct perf_evlist **pevlist __used) 284 { 285 dump_printf(": unhandled!\n"); 286 return 0; 287 } 288 289 static int process_event_sample_stub(struct perf_tool *tool __used, 290 union perf_event *event __used, 291 struct perf_sample *sample __used, 292 struct perf_evsel *evsel __used, 293 struct machine *machine __used) 294 { 295 dump_printf(": unhandled!\n"); 296 return 0; 297 } 298 299 static int process_event_stub(struct perf_tool *tool __used, 300 union perf_event *event __used, 301 struct perf_sample *sample __used, 302 struct machine *machine __used) 303 { 304 dump_printf(": unhandled!\n"); 305 return 0; 306 } 307 308 static int process_finished_round_stub(struct perf_tool *tool __used, 309 union perf_event *event __used, 310 struct perf_session *perf_session __used) 311 { 312 dump_printf(": unhandled!\n"); 313 return 0; 314 } 315 316 static int process_event_type_stub(struct perf_tool *tool __used, 317 union perf_event *event __used) 318 { 319 dump_printf(": unhandled!\n"); 320 return 0; 321 } 322 323 static int process_finished_round(struct perf_tool *tool, 324 union perf_event *event, 325 struct perf_session *session); 326 327 static void perf_tool__fill_defaults(struct perf_tool *tool) 328 { 329 if (tool->sample == NULL) 330 tool->sample = process_event_sample_stub; 331 if (tool->mmap == NULL) 332 tool->mmap = process_event_stub; 333 if (tool->comm == NULL) 334 tool->comm = process_event_stub; 335 if (tool->fork == NULL) 336 tool->fork = process_event_stub; 337 if (tool->exit == NULL) 338 tool->exit = process_event_stub; 339 if (tool->lost == NULL) 340 tool->lost = perf_event__process_lost; 341 if (tool->read == NULL) 342 tool->read = process_event_sample_stub; 343 if (tool->throttle == NULL) 344 tool->throttle = process_event_stub; 345 if (tool->unthrottle == NULL) 346 tool->unthrottle = process_event_stub; 347 if (tool->attr == NULL) 348 tool->attr = process_event_synth_attr_stub; 349 if (tool->event_type == NULL) 350 tool->event_type = process_event_type_stub; 351 if (tool->tracing_data == NULL) 352 tool->tracing_data = process_event_synth_tracing_data_stub; 353 if (tool->build_id == NULL) 354 tool->build_id = process_finished_round_stub; 355 if (tool->finished_round == NULL) { 356 if (tool->ordered_samples) 357 tool->finished_round = process_finished_round; 358 else 359 tool->finished_round = process_finished_round_stub; 360 } 361 } 362 363 void mem_bswap_64(void *src, int byte_size) 364 { 365 u64 *m = src; 366 367 while (byte_size > 0) { 368 *m = bswap_64(*m); 369 byte_size -= sizeof(u64); 370 ++m; 371 } 372 } 373 374 static void perf_event__all64_swap(union perf_event *event) 375 { 376 struct perf_event_header *hdr = &event->header; 377 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); 378 } 379 380 static void perf_event__comm_swap(union perf_event *event) 381 { 382 event->comm.pid = bswap_32(event->comm.pid); 383 event->comm.tid = bswap_32(event->comm.tid); 384 } 385 386 static void perf_event__mmap_swap(union perf_event *event) 387 { 388 event->mmap.pid = bswap_32(event->mmap.pid); 389 event->mmap.tid = bswap_32(event->mmap.tid); 390 event->mmap.start = bswap_64(event->mmap.start); 391 event->mmap.len = bswap_64(event->mmap.len); 392 event->mmap.pgoff = bswap_64(event->mmap.pgoff); 393 } 394 395 static void perf_event__task_swap(union perf_event *event) 396 { 397 event->fork.pid = bswap_32(event->fork.pid); 398 event->fork.tid = bswap_32(event->fork.tid); 399 event->fork.ppid = bswap_32(event->fork.ppid); 400 event->fork.ptid = bswap_32(event->fork.ptid); 401 event->fork.time = bswap_64(event->fork.time); 402 } 403 404 static void perf_event__read_swap(union perf_event *event) 405 { 406 event->read.pid = bswap_32(event->read.pid); 407 event->read.tid = bswap_32(event->read.tid); 408 event->read.value = bswap_64(event->read.value); 409 event->read.time_enabled = bswap_64(event->read.time_enabled); 410 event->read.time_running = bswap_64(event->read.time_running); 411 event->read.id = bswap_64(event->read.id); 412 } 413 414 /* exported for swapping attributes in file header */ 415 void perf_event__attr_swap(struct perf_event_attr *attr) 416 { 417 attr->type = bswap_32(attr->type); 418 attr->size = bswap_32(attr->size); 419 attr->config = bswap_64(attr->config); 420 attr->sample_period = bswap_64(attr->sample_period); 421 attr->sample_type = bswap_64(attr->sample_type); 422 attr->read_format = bswap_64(attr->read_format); 423 attr->wakeup_events = bswap_32(attr->wakeup_events); 424 attr->bp_type = bswap_32(attr->bp_type); 425 attr->bp_addr = bswap_64(attr->bp_addr); 426 attr->bp_len = bswap_64(attr->bp_len); 427 } 428 429 static void perf_event__hdr_attr_swap(union perf_event *event) 430 { 431 size_t size; 432 433 perf_event__attr_swap(&event->attr.attr); 434 435 size = event->header.size; 436 size -= (void *)&event->attr.id - (void *)event; 437 mem_bswap_64(event->attr.id, size); 438 } 439 440 static void perf_event__event_type_swap(union perf_event *event) 441 { 442 event->event_type.event_type.event_id = 443 bswap_64(event->event_type.event_type.event_id); 444 } 445 446 static void perf_event__tracing_data_swap(union perf_event *event) 447 { 448 event->tracing_data.size = bswap_32(event->tracing_data.size); 449 } 450 451 typedef void (*perf_event__swap_op)(union perf_event *event); 452 453 static perf_event__swap_op perf_event__swap_ops[] = { 454 [PERF_RECORD_MMAP] = perf_event__mmap_swap, 455 [PERF_RECORD_COMM] = perf_event__comm_swap, 456 [PERF_RECORD_FORK] = perf_event__task_swap, 457 [PERF_RECORD_EXIT] = perf_event__task_swap, 458 [PERF_RECORD_LOST] = perf_event__all64_swap, 459 [PERF_RECORD_READ] = perf_event__read_swap, 460 [PERF_RECORD_SAMPLE] = perf_event__all64_swap, 461 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, 462 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, 463 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, 464 [PERF_RECORD_HEADER_BUILD_ID] = NULL, 465 [PERF_RECORD_HEADER_MAX] = NULL, 466 }; 467 468 struct sample_queue { 469 u64 timestamp; 470 u64 file_offset; 471 union perf_event *event; 472 struct list_head list; 473 }; 474 475 static void perf_session_free_sample_buffers(struct perf_session *session) 476 { 477 struct ordered_samples *os = &session->ordered_samples; 478 479 while (!list_empty(&os->to_free)) { 480 struct sample_queue *sq; 481 482 sq = list_entry(os->to_free.next, struct sample_queue, list); 483 list_del(&sq->list); 484 free(sq); 485 } 486 } 487 488 static int perf_session_deliver_event(struct perf_session *session, 489 union perf_event *event, 490 struct perf_sample *sample, 491 struct perf_tool *tool, 492 u64 file_offset); 493 494 static void flush_sample_queue(struct perf_session *s, 495 struct perf_tool *tool) 496 { 497 struct ordered_samples *os = &s->ordered_samples; 498 struct list_head *head = &os->samples; 499 struct sample_queue *tmp, *iter; 500 struct perf_sample sample; 501 u64 limit = os->next_flush; 502 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; 503 unsigned idx = 0, progress_next = os->nr_samples / 16; 504 int ret; 505 506 if (!tool->ordered_samples || !limit) 507 return; 508 509 list_for_each_entry_safe(iter, tmp, head, list) { 510 if (iter->timestamp > limit) 511 break; 512 513 ret = perf_session__parse_sample(s, iter->event, &sample); 514 if (ret) 515 pr_err("Can't parse sample, err = %d\n", ret); 516 else 517 perf_session_deliver_event(s, iter->event, &sample, tool, 518 iter->file_offset); 519 520 os->last_flush = iter->timestamp; 521 list_del(&iter->list); 522 list_add(&iter->list, &os->sample_cache); 523 if (++idx >= progress_next) { 524 progress_next += os->nr_samples / 16; 525 ui_progress__update(idx, os->nr_samples, 526 "Processing time ordered events..."); 527 } 528 } 529 530 if (list_empty(head)) { 531 os->last_sample = NULL; 532 } else if (last_ts <= limit) { 533 os->last_sample = 534 list_entry(head->prev, struct sample_queue, list); 535 } 536 537 os->nr_samples = 0; 538 } 539 540 /* 541 * When perf record finishes a pass on every buffers, it records this pseudo 542 * event. 543 * We record the max timestamp t found in the pass n. 544 * Assuming these timestamps are monotonic across cpus, we know that if 545 * a buffer still has events with timestamps below t, they will be all 546 * available and then read in the pass n + 1. 547 * Hence when we start to read the pass n + 2, we can safely flush every 548 * events with timestamps below t. 549 * 550 * ============ PASS n ================= 551 * CPU 0 | CPU 1 552 * | 553 * cnt1 timestamps | cnt2 timestamps 554 * 1 | 2 555 * 2 | 3 556 * - | 4 <--- max recorded 557 * 558 * ============ PASS n + 1 ============== 559 * CPU 0 | CPU 1 560 * | 561 * cnt1 timestamps | cnt2 timestamps 562 * 3 | 5 563 * 4 | 6 564 * 5 | 7 <---- max recorded 565 * 566 * Flush every events below timestamp 4 567 * 568 * ============ PASS n + 2 ============== 569 * CPU 0 | CPU 1 570 * | 571 * cnt1 timestamps | cnt2 timestamps 572 * 6 | 8 573 * 7 | 9 574 * - | 10 575 * 576 * Flush every events below timestamp 7 577 * etc... 578 */ 579 static int process_finished_round(struct perf_tool *tool, 580 union perf_event *event __used, 581 struct perf_session *session) 582 { 583 flush_sample_queue(session, tool); 584 session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; 585 586 return 0; 587 } 588 589 /* The queue is ordered by time */ 590 static void __queue_event(struct sample_queue *new, struct perf_session *s) 591 { 592 struct ordered_samples *os = &s->ordered_samples; 593 struct sample_queue *sample = os->last_sample; 594 u64 timestamp = new->timestamp; 595 struct list_head *p; 596 597 ++os->nr_samples; 598 os->last_sample = new; 599 600 if (!sample) { 601 list_add(&new->list, &os->samples); 602 os->max_timestamp = timestamp; 603 return; 604 } 605 606 /* 607 * last_sample might point to some random place in the list as it's 608 * the last queued event. We expect that the new event is close to 609 * this. 610 */ 611 if (sample->timestamp <= timestamp) { 612 while (sample->timestamp <= timestamp) { 613 p = sample->list.next; 614 if (p == &os->samples) { 615 list_add_tail(&new->list, &os->samples); 616 os->max_timestamp = timestamp; 617 return; 618 } 619 sample = list_entry(p, struct sample_queue, list); 620 } 621 list_add_tail(&new->list, &sample->list); 622 } else { 623 while (sample->timestamp > timestamp) { 624 p = sample->list.prev; 625 if (p == &os->samples) { 626 list_add(&new->list, &os->samples); 627 return; 628 } 629 sample = list_entry(p, struct sample_queue, list); 630 } 631 list_add(&new->list, &sample->list); 632 } 633 } 634 635 #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) 636 637 static int perf_session_queue_event(struct perf_session *s, union perf_event *event, 638 struct perf_sample *sample, u64 file_offset) 639 { 640 struct ordered_samples *os = &s->ordered_samples; 641 struct list_head *sc = &os->sample_cache; 642 u64 timestamp = sample->time; 643 struct sample_queue *new; 644 645 if (!timestamp || timestamp == ~0ULL) 646 return -ETIME; 647 648 if (timestamp < s->ordered_samples.last_flush) { 649 printf("Warning: Timestamp below last timeslice flush\n"); 650 return -EINVAL; 651 } 652 653 if (!list_empty(sc)) { 654 new = list_entry(sc->next, struct sample_queue, list); 655 list_del(&new->list); 656 } else if (os->sample_buffer) { 657 new = os->sample_buffer + os->sample_buffer_idx; 658 if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER) 659 os->sample_buffer = NULL; 660 } else { 661 os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new)); 662 if (!os->sample_buffer) 663 return -ENOMEM; 664 list_add(&os->sample_buffer->list, &os->to_free); 665 os->sample_buffer_idx = 2; 666 new = os->sample_buffer + 1; 667 } 668 669 new->timestamp = timestamp; 670 new->file_offset = file_offset; 671 new->event = event; 672 673 __queue_event(new, s); 674 675 return 0; 676 } 677 678 static void callchain__printf(struct perf_sample *sample) 679 { 680 unsigned int i; 681 682 printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); 683 684 for (i = 0; i < sample->callchain->nr; i++) 685 printf("..... %2d: %016" PRIx64 "\n", 686 i, sample->callchain->ips[i]); 687 } 688 689 static void perf_session__print_tstamp(struct perf_session *session, 690 union perf_event *event, 691 struct perf_sample *sample) 692 { 693 if (event->header.type != PERF_RECORD_SAMPLE && 694 !session->sample_id_all) { 695 fputs("-1 -1 ", stdout); 696 return; 697 } 698 699 if ((session->sample_type & PERF_SAMPLE_CPU)) 700 printf("%u ", sample->cpu); 701 702 if (session->sample_type & PERF_SAMPLE_TIME) 703 printf("%" PRIu64 " ", sample->time); 704 } 705 706 static void dump_event(struct perf_session *session, union perf_event *event, 707 u64 file_offset, struct perf_sample *sample) 708 { 709 if (!dump_trace) 710 return; 711 712 printf("\n%#" PRIx64 " [%#x]: event: %d\n", 713 file_offset, event->header.size, event->header.type); 714 715 trace_event(event); 716 717 if (sample) 718 perf_session__print_tstamp(session, event, sample); 719 720 printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, 721 event->header.size, perf_event__name(event->header.type)); 722 } 723 724 static void dump_sample(struct perf_session *session, union perf_event *event, 725 struct perf_sample *sample) 726 { 727 if (!dump_trace) 728 return; 729 730 printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n", 731 event->header.misc, sample->pid, sample->tid, sample->ip, 732 sample->period, sample->addr); 733 734 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) 735 callchain__printf(sample); 736 } 737 738 static struct machine * 739 perf_session__find_machine_for_cpumode(struct perf_session *session, 740 union perf_event *event) 741 { 742 const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 743 744 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) 745 return perf_session__find_machine(session, event->ip.pid); 746 747 return perf_session__find_host_machine(session); 748 } 749 750 static int perf_session_deliver_event(struct perf_session *session, 751 union perf_event *event, 752 struct perf_sample *sample, 753 struct perf_tool *tool, 754 u64 file_offset) 755 { 756 struct perf_evsel *evsel; 757 struct machine *machine; 758 759 dump_event(session, event, file_offset, sample); 760 761 evsel = perf_evlist__id2evsel(session->evlist, sample->id); 762 if (evsel != NULL && event->header.type != PERF_RECORD_SAMPLE) { 763 /* 764 * XXX We're leaving PERF_RECORD_SAMPLE unnacounted here 765 * because the tools right now may apply filters, discarding 766 * some of the samples. For consistency, in the future we 767 * should have something like nr_filtered_samples and remove 768 * the sample->period from total_sample_period, etc, KISS for 769 * now tho. 770 * 771 * Also testing against NULL allows us to handle files without 772 * attr.sample_id_all and/or without PERF_SAMPLE_ID. In the 773 * future probably it'll be a good idea to restrict event 774 * processing via perf_session to files with both set. 775 */ 776 hists__inc_nr_events(&evsel->hists, event->header.type); 777 } 778 779 machine = perf_session__find_machine_for_cpumode(session, event); 780 781 switch (event->header.type) { 782 case PERF_RECORD_SAMPLE: 783 dump_sample(session, event, sample); 784 if (evsel == NULL) { 785 ++session->hists.stats.nr_unknown_id; 786 return -1; 787 } 788 return tool->sample(tool, event, sample, evsel, machine); 789 case PERF_RECORD_MMAP: 790 return tool->mmap(tool, event, sample, machine); 791 case PERF_RECORD_COMM: 792 return tool->comm(tool, event, sample, machine); 793 case PERF_RECORD_FORK: 794 return tool->fork(tool, event, sample, machine); 795 case PERF_RECORD_EXIT: 796 return tool->exit(tool, event, sample, machine); 797 case PERF_RECORD_LOST: 798 if (tool->lost == perf_event__process_lost) 799 session->hists.stats.total_lost += event->lost.lost; 800 return tool->lost(tool, event, sample, machine); 801 case PERF_RECORD_READ: 802 return tool->read(tool, event, sample, evsel, machine); 803 case PERF_RECORD_THROTTLE: 804 return tool->throttle(tool, event, sample, machine); 805 case PERF_RECORD_UNTHROTTLE: 806 return tool->unthrottle(tool, event, sample, machine); 807 default: 808 ++session->hists.stats.nr_unknown_events; 809 return -1; 810 } 811 } 812 813 static int perf_session__preprocess_sample(struct perf_session *session, 814 union perf_event *event, struct perf_sample *sample) 815 { 816 if (event->header.type != PERF_RECORD_SAMPLE || 817 !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) 818 return 0; 819 820 if (!ip_callchain__valid(sample->callchain, event)) { 821 pr_debug("call-chain problem with event, skipping it.\n"); 822 ++session->hists.stats.nr_invalid_chains; 823 session->hists.stats.total_invalid_chains += sample->period; 824 return -EINVAL; 825 } 826 return 0; 827 } 828 829 static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, 830 struct perf_tool *tool, u64 file_offset) 831 { 832 int err; 833 834 dump_event(session, event, file_offset, NULL); 835 836 /* These events are processed right away */ 837 switch (event->header.type) { 838 case PERF_RECORD_HEADER_ATTR: 839 err = tool->attr(event, &session->evlist); 840 if (err == 0) 841 perf_session__update_sample_type(session); 842 return err; 843 case PERF_RECORD_HEADER_EVENT_TYPE: 844 return tool->event_type(tool, event); 845 case PERF_RECORD_HEADER_TRACING_DATA: 846 /* setup for reading amidst mmap */ 847 lseek(session->fd, file_offset, SEEK_SET); 848 return tool->tracing_data(event, session); 849 case PERF_RECORD_HEADER_BUILD_ID: 850 return tool->build_id(tool, event, session); 851 case PERF_RECORD_FINISHED_ROUND: 852 return tool->finished_round(tool, event, session); 853 default: 854 return -EINVAL; 855 } 856 } 857 858 static int perf_session__process_event(struct perf_session *session, 859 union perf_event *event, 860 struct perf_tool *tool, 861 u64 file_offset) 862 { 863 struct perf_sample sample; 864 int ret; 865 866 if (session->header.needs_swap && 867 perf_event__swap_ops[event->header.type]) 868 perf_event__swap_ops[event->header.type](event); 869 870 if (event->header.type >= PERF_RECORD_HEADER_MAX) 871 return -EINVAL; 872 873 hists__inc_nr_events(&session->hists, event->header.type); 874 875 if (event->header.type >= PERF_RECORD_USER_TYPE_START) 876 return perf_session__process_user_event(session, event, tool, file_offset); 877 878 /* 879 * For all kernel events we get the sample data 880 */ 881 ret = perf_session__parse_sample(session, event, &sample); 882 if (ret) 883 return ret; 884 885 /* Preprocess sample records - precheck callchains */ 886 if (perf_session__preprocess_sample(session, event, &sample)) 887 return 0; 888 889 if (tool->ordered_samples) { 890 ret = perf_session_queue_event(session, event, &sample, 891 file_offset); 892 if (ret != -ETIME) 893 return ret; 894 } 895 896 return perf_session_deliver_event(session, event, &sample, tool, 897 file_offset); 898 } 899 900 void perf_event_header__bswap(struct perf_event_header *self) 901 { 902 self->type = bswap_32(self->type); 903 self->misc = bswap_16(self->misc); 904 self->size = bswap_16(self->size); 905 } 906 907 struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) 908 { 909 return machine__findnew_thread(&session->host_machine, pid); 910 } 911 912 static struct thread *perf_session__register_idle_thread(struct perf_session *self) 913 { 914 struct thread *thread = perf_session__findnew(self, 0); 915 916 if (thread == NULL || thread__set_comm(thread, "swapper")) { 917 pr_err("problem inserting idle task.\n"); 918 thread = NULL; 919 } 920 921 return thread; 922 } 923 924 static void perf_session__warn_about_errors(const struct perf_session *session, 925 const struct perf_tool *tool) 926 { 927 if (tool->lost == perf_event__process_lost && 928 session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) { 929 ui__warning("Processed %d events and lost %d chunks!\n\n" 930 "Check IO/CPU overload!\n\n", 931 session->hists.stats.nr_events[0], 932 session->hists.stats.nr_events[PERF_RECORD_LOST]); 933 } 934 935 if (session->hists.stats.nr_unknown_events != 0) { 936 ui__warning("Found %u unknown events!\n\n" 937 "Is this an older tool processing a perf.data " 938 "file generated by a more recent tool?\n\n" 939 "If that is not the case, consider " 940 "reporting to linux-kernel@vger.kernel.org.\n\n", 941 session->hists.stats.nr_unknown_events); 942 } 943 944 if (session->hists.stats.nr_unknown_id != 0) { 945 ui__warning("%u samples with id not present in the header\n", 946 session->hists.stats.nr_unknown_id); 947 } 948 949 if (session->hists.stats.nr_invalid_chains != 0) { 950 ui__warning("Found invalid callchains!\n\n" 951 "%u out of %u events were discarded for this reason.\n\n" 952 "Consider reporting to linux-kernel@vger.kernel.org.\n\n", 953 session->hists.stats.nr_invalid_chains, 954 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); 955 } 956 } 957 958 #define session_done() (*(volatile int *)(&session_done)) 959 volatile int session_done; 960 961 static int __perf_session__process_pipe_events(struct perf_session *self, 962 struct perf_tool *tool) 963 { 964 union perf_event event; 965 uint32_t size; 966 int skip = 0; 967 u64 head; 968 int err; 969 void *p; 970 971 perf_tool__fill_defaults(tool); 972 973 head = 0; 974 more: 975 err = readn(self->fd, &event, sizeof(struct perf_event_header)); 976 if (err <= 0) { 977 if (err == 0) 978 goto done; 979 980 pr_err("failed to read event header\n"); 981 goto out_err; 982 } 983 984 if (self->header.needs_swap) 985 perf_event_header__bswap(&event.header); 986 987 size = event.header.size; 988 if (size == 0) 989 size = 8; 990 991 p = &event; 992 p += sizeof(struct perf_event_header); 993 994 if (size - sizeof(struct perf_event_header)) { 995 err = readn(self->fd, p, size - sizeof(struct perf_event_header)); 996 if (err <= 0) { 997 if (err == 0) { 998 pr_err("unexpected end of event stream\n"); 999 goto done; 1000 } 1001 1002 pr_err("failed to read event data\n"); 1003 goto out_err; 1004 } 1005 } 1006 1007 if (size == 0 || 1008 (skip = perf_session__process_event(self, &event, tool, head)) < 0) { 1009 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", 1010 head, event.header.size, event.header.type); 1011 /* 1012 * assume we lost track of the stream, check alignment, and 1013 * increment a single u64 in the hope to catch on again 'soon'. 1014 */ 1015 if (unlikely(head & 7)) 1016 head &= ~7ULL; 1017 1018 size = 8; 1019 } 1020 1021 head += size; 1022 1023 if (skip > 0) 1024 head += skip; 1025 1026 if (!session_done()) 1027 goto more; 1028 done: 1029 err = 0; 1030 out_err: 1031 perf_session__warn_about_errors(self, tool); 1032 perf_session_free_sample_buffers(self); 1033 return err; 1034 } 1035 1036 static union perf_event * 1037 fetch_mmaped_event(struct perf_session *session, 1038 u64 head, size_t mmap_size, char *buf) 1039 { 1040 union perf_event *event; 1041 1042 /* 1043 * Ensure we have enough space remaining to read 1044 * the size of the event in the headers. 1045 */ 1046 if (head + sizeof(event->header) > mmap_size) 1047 return NULL; 1048 1049 event = (union perf_event *)(buf + head); 1050 1051 if (session->header.needs_swap) 1052 perf_event_header__bswap(&event->header); 1053 1054 if (head + event->header.size > mmap_size) 1055 return NULL; 1056 1057 return event; 1058 } 1059 1060 int __perf_session__process_events(struct perf_session *session, 1061 u64 data_offset, u64 data_size, 1062 u64 file_size, struct perf_tool *tool) 1063 { 1064 u64 head, page_offset, file_offset, file_pos, progress_next; 1065 int err, mmap_prot, mmap_flags, map_idx = 0; 1066 size_t page_size, mmap_size; 1067 char *buf, *mmaps[8]; 1068 union perf_event *event; 1069 uint32_t size; 1070 1071 perf_tool__fill_defaults(tool); 1072 1073 page_size = sysconf(_SC_PAGESIZE); 1074 1075 page_offset = page_size * (data_offset / page_size); 1076 file_offset = page_offset; 1077 head = data_offset - page_offset; 1078 1079 if (data_offset + data_size < file_size) 1080 file_size = data_offset + data_size; 1081 1082 progress_next = file_size / 16; 1083 1084 mmap_size = session->mmap_window; 1085 if (mmap_size > file_size) 1086 mmap_size = file_size; 1087 1088 memset(mmaps, 0, sizeof(mmaps)); 1089 1090 mmap_prot = PROT_READ; 1091 mmap_flags = MAP_SHARED; 1092 1093 if (session->header.needs_swap) { 1094 mmap_prot |= PROT_WRITE; 1095 mmap_flags = MAP_PRIVATE; 1096 } 1097 remap: 1098 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd, 1099 file_offset); 1100 if (buf == MAP_FAILED) { 1101 pr_err("failed to mmap file\n"); 1102 err = -errno; 1103 goto out_err; 1104 } 1105 mmaps[map_idx] = buf; 1106 map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); 1107 file_pos = file_offset + head; 1108 1109 more: 1110 event = fetch_mmaped_event(session, head, mmap_size, buf); 1111 if (!event) { 1112 if (mmaps[map_idx]) { 1113 munmap(mmaps[map_idx], mmap_size); 1114 mmaps[map_idx] = NULL; 1115 } 1116 1117 page_offset = page_size * (head / page_size); 1118 file_offset += page_offset; 1119 head -= page_offset; 1120 goto remap; 1121 } 1122 1123 size = event->header.size; 1124 1125 if (size == 0 || 1126 perf_session__process_event(session, event, tool, file_pos) < 0) { 1127 dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n", 1128 file_offset + head, event->header.size, 1129 event->header.type); 1130 /* 1131 * assume we lost track of the stream, check alignment, and 1132 * increment a single u64 in the hope to catch on again 'soon'. 1133 */ 1134 if (unlikely(head & 7)) 1135 head &= ~7ULL; 1136 1137 size = 8; 1138 } 1139 1140 head += size; 1141 file_pos += size; 1142 1143 if (file_pos >= progress_next) { 1144 progress_next += file_size / 16; 1145 ui_progress__update(file_pos, file_size, 1146 "Processing events..."); 1147 } 1148 1149 if (file_pos < file_size) 1150 goto more; 1151 1152 err = 0; 1153 /* do the final flush for ordered samples */ 1154 session->ordered_samples.next_flush = ULLONG_MAX; 1155 flush_sample_queue(session, tool); 1156 out_err: 1157 perf_session__warn_about_errors(session, tool); 1158 perf_session_free_sample_buffers(session); 1159 return err; 1160 } 1161 1162 int perf_session__process_events(struct perf_session *self, 1163 struct perf_tool *tool) 1164 { 1165 int err; 1166 1167 if (perf_session__register_idle_thread(self) == NULL) 1168 return -ENOMEM; 1169 1170 if (!self->fd_pipe) 1171 err = __perf_session__process_events(self, 1172 self->header.data_offset, 1173 self->header.data_size, 1174 self->size, tool); 1175 else 1176 err = __perf_session__process_pipe_events(self, tool); 1177 1178 return err; 1179 } 1180 1181 bool perf_session__has_traces(struct perf_session *self, const char *msg) 1182 { 1183 if (!(self->sample_type & PERF_SAMPLE_RAW)) { 1184 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg); 1185 return false; 1186 } 1187 1188 return true; 1189 } 1190 1191 int maps__set_kallsyms_ref_reloc_sym(struct map **maps, 1192 const char *symbol_name, u64 addr) 1193 { 1194 char *bracket; 1195 enum map_type i; 1196 struct ref_reloc_sym *ref; 1197 1198 ref = zalloc(sizeof(struct ref_reloc_sym)); 1199 if (ref == NULL) 1200 return -ENOMEM; 1201 1202 ref->name = strdup(symbol_name); 1203 if (ref->name == NULL) { 1204 free(ref); 1205 return -ENOMEM; 1206 } 1207 1208 bracket = strchr(ref->name, ']'); 1209 if (bracket) 1210 *bracket = '\0'; 1211 1212 ref->addr = addr; 1213 1214 for (i = 0; i < MAP__NR_TYPES; ++i) { 1215 struct kmap *kmap = map__kmap(maps[i]); 1216 kmap->ref_reloc_sym = ref; 1217 } 1218 1219 return 0; 1220 } 1221 1222 size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp) 1223 { 1224 return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) + 1225 __dsos__fprintf(&self->host_machine.user_dsos, fp) + 1226 machines__fprintf_dsos(&self->machines, fp); 1227 } 1228 1229 size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, 1230 bool with_hits) 1231 { 1232 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); 1233 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits); 1234 } 1235 1236 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) 1237 { 1238 struct perf_evsel *pos; 1239 size_t ret = fprintf(fp, "Aggregated stats:\n"); 1240 1241 ret += hists__fprintf_nr_events(&session->hists, fp); 1242 1243 list_for_each_entry(pos, &session->evlist->entries, node) { 1244 ret += fprintf(fp, "%s stats:\n", event_name(pos)); 1245 ret += hists__fprintf_nr_events(&pos->hists, fp); 1246 } 1247 1248 return ret; 1249 } 1250 1251 size_t perf_session__fprintf(struct perf_session *session, FILE *fp) 1252 { 1253 /* 1254 * FIXME: Here we have to actually print all the machines in this 1255 * session, not just the host... 1256 */ 1257 return machine__fprintf(&session->host_machine, fp); 1258 } 1259 1260 void perf_session__remove_thread(struct perf_session *session, 1261 struct thread *th) 1262 { 1263 /* 1264 * FIXME: This one makes no sense, we need to remove the thread from 1265 * the machine it belongs to, perf_session can have many machines, so 1266 * doing it always on ->host_machine is wrong. Fix when auditing all 1267 * the 'perf kvm' code. 1268 */ 1269 machine__remove_thread(&session->host_machine, th); 1270 } 1271 1272 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 1273 unsigned int type) 1274 { 1275 struct perf_evsel *pos; 1276 1277 list_for_each_entry(pos, &session->evlist->entries, node) { 1278 if (pos->attr.type == type) 1279 return pos; 1280 } 1281 return NULL; 1282 } 1283 1284 void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 1285 struct machine *machine, struct perf_evsel *evsel, 1286 int print_sym, int print_dso) 1287 { 1288 struct addr_location al; 1289 const char *symname, *dsoname; 1290 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; 1291 struct callchain_cursor_node *node; 1292 1293 if (perf_event__preprocess_sample(event, machine, &al, sample, 1294 NULL) < 0) { 1295 error("problem processing %d event, skipping it.\n", 1296 event->header.type); 1297 return; 1298 } 1299 1300 if (symbol_conf.use_callchain && sample->callchain) { 1301 1302 if (machine__resolve_callchain(machine, evsel, al.thread, 1303 sample->callchain, NULL) != 0) { 1304 if (verbose) 1305 error("Failed to resolve callchain. Skipping\n"); 1306 return; 1307 } 1308 callchain_cursor_commit(cursor); 1309 1310 while (1) { 1311 node = callchain_cursor_current(cursor); 1312 if (!node) 1313 break; 1314 1315 printf("\t%16" PRIx64, node->ip); 1316 if (print_sym) { 1317 if (node->sym && node->sym->name) 1318 symname = node->sym->name; 1319 else 1320 symname = ""; 1321 1322 printf(" %s", symname); 1323 } 1324 if (print_dso) { 1325 if (node->map && node->map->dso && node->map->dso->name) 1326 dsoname = node->map->dso->name; 1327 else 1328 dsoname = ""; 1329 1330 printf(" (%s)", dsoname); 1331 } 1332 printf("\n"); 1333 1334 callchain_cursor_advance(cursor); 1335 } 1336 1337 } else { 1338 printf("%16" PRIx64, sample->ip); 1339 if (print_sym) { 1340 if (al.sym && al.sym->name) 1341 symname = al.sym->name; 1342 else 1343 symname = ""; 1344 1345 printf(" %s", symname); 1346 } 1347 1348 if (print_dso) { 1349 if (al.map && al.map->dso && al.map->dso->name) 1350 dsoname = al.map->dso->name; 1351 else 1352 dsoname = ""; 1353 1354 printf(" (%s)", dsoname); 1355 } 1356 } 1357 } 1358 1359 int perf_session__cpu_bitmap(struct perf_session *session, 1360 const char *cpu_list, unsigned long *cpu_bitmap) 1361 { 1362 int i; 1363 struct cpu_map *map; 1364 1365 for (i = 0; i < PERF_TYPE_MAX; ++i) { 1366 struct perf_evsel *evsel; 1367 1368 evsel = perf_session__find_first_evtype(session, i); 1369 if (!evsel) 1370 continue; 1371 1372 if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { 1373 pr_err("File does not contain CPU events. " 1374 "Remove -c option to proceed.\n"); 1375 return -1; 1376 } 1377 } 1378 1379 map = cpu_map__new(cpu_list); 1380 if (map == NULL) { 1381 pr_err("Invalid cpu_list\n"); 1382 return -1; 1383 } 1384 1385 for (i = 0; i < map->nr; i++) { 1386 int cpu = map->map[i]; 1387 1388 if (cpu >= MAX_NR_CPUS) { 1389 pr_err("Requested CPU %d too large. " 1390 "Consider raising MAX_NR_CPUS\n", cpu); 1391 return -1; 1392 } 1393 1394 set_bit(cpu, cpu_bitmap); 1395 } 1396 1397 return 0; 1398 } 1399 1400 void perf_session__fprintf_info(struct perf_session *session, FILE *fp, 1401 bool full) 1402 { 1403 struct stat st; 1404 int ret; 1405 1406 if (session == NULL || fp == NULL) 1407 return; 1408 1409 ret = fstat(session->fd, &st); 1410 if (ret == -1) 1411 return; 1412 1413 fprintf(fp, "# ========\n"); 1414 fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); 1415 perf_header__fprintf_info(session, fp, full); 1416 fprintf(fp, "# ========\n#\n"); 1417 } 1418