1 /* 2 * intel_pt.c: Intel Processor Trace support 3 * Copyright (c) 2013-2015, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 */ 15 16 #include <stdio.h> 17 #include <stdbool.h> 18 #include <errno.h> 19 #include <linux/kernel.h> 20 #include <linux/types.h> 21 22 #include "../perf.h" 23 #include "session.h" 24 #include "machine.h" 25 #include "sort.h" 26 #include "tool.h" 27 #include "event.h" 28 #include "evlist.h" 29 #include "evsel.h" 30 #include "map.h" 31 #include "color.h" 32 #include "util.h" 33 #include "thread.h" 34 #include "thread-stack.h" 35 #include "symbol.h" 36 #include "callchain.h" 37 #include "dso.h" 38 #include "debug.h" 39 #include "auxtrace.h" 40 #include "tsc.h" 41 #include "intel-pt.h" 42 43 #include "intel-pt-decoder/intel-pt-log.h" 44 #include "intel-pt-decoder/intel-pt-decoder.h" 45 #include "intel-pt-decoder/intel-pt-insn-decoder.h" 46 #include "intel-pt-decoder/intel-pt-pkt-decoder.h" 47 48 #define MAX_TIMESTAMP (~0ULL) 49 50 struct intel_pt { 51 struct auxtrace auxtrace; 52 struct auxtrace_queues queues; 53 struct auxtrace_heap heap; 54 u32 auxtrace_type; 55 struct perf_session *session; 56 struct machine *machine; 57 struct perf_evsel *switch_evsel; 58 struct thread *unknown_thread; 59 bool timeless_decoding; 60 bool sampling_mode; 61 bool snapshot_mode; 62 bool per_cpu_mmaps; 63 bool have_tsc; 64 bool data_queued; 65 bool est_tsc; 66 bool sync_switch; 67 bool mispred_all; 68 int have_sched_switch; 69 u32 pmu_type; 70 u64 kernel_start; 71 u64 switch_ip; 72 u64 ptss_ip; 73 74 struct perf_tsc_conversion tc; 75 bool cap_user_time_zero; 76 77 struct itrace_synth_opts synth_opts; 78 79 bool sample_instructions; 80 u64 instructions_sample_type; 81 u64 instructions_sample_period; 82 u64 instructions_id; 83 84 bool sample_branches; 85 u32 branches_filter; 86 u64 branches_sample_type; 87 u64 branches_id; 88 89 bool sample_transactions; 90 u64 transactions_sample_type; 91 u64 transactions_id; 92 93 bool synth_needs_swap; 94 95 u64 tsc_bit; 96 u64 mtc_bit; 97 u64 mtc_freq_bits; 98 u32 tsc_ctc_ratio_n; 99 u32 tsc_ctc_ratio_d; 100 u64 cyc_bit; 101 u64 noretcomp_bit; 102 unsigned max_non_turbo_ratio; 103 104 unsigned long num_events; 105 }; 106 107 enum switch_state { 108 INTEL_PT_SS_NOT_TRACING, 109 INTEL_PT_SS_UNKNOWN, 110 INTEL_PT_SS_TRACING, 111 INTEL_PT_SS_EXPECTING_SWITCH_EVENT, 112 INTEL_PT_SS_EXPECTING_SWITCH_IP, 113 }; 114 115 struct intel_pt_queue { 116 struct intel_pt *pt; 117 unsigned int queue_nr; 118 struct auxtrace_buffer *buffer; 119 void *decoder; 120 const struct intel_pt_state *state; 121 struct ip_callchain *chain; 122 struct branch_stack *last_branch; 123 struct branch_stack *last_branch_rb; 124 size_t last_branch_pos; 125 union perf_event *event_buf; 126 bool on_heap; 127 bool stop; 128 bool step_through_buffers; 129 bool use_buffer_pid_tid; 130 pid_t pid, tid; 131 int cpu; 132 int switch_state; 133 pid_t next_tid; 134 struct thread *thread; 135 bool exclude_kernel; 136 bool have_sample; 137 u64 time; 138 u64 timestamp; 139 u32 flags; 140 u16 insn_len; 141 u64 last_insn_cnt; 142 }; 143 144 static void intel_pt_dump(struct intel_pt *pt __maybe_unused, 145 unsigned char *buf, size_t len) 146 { 147 struct intel_pt_pkt packet; 148 size_t pos = 0; 149 int ret, pkt_len, i; 150 char desc[INTEL_PT_PKT_DESC_MAX]; 151 const char *color = PERF_COLOR_BLUE; 152 153 color_fprintf(stdout, color, 154 ". ... Intel Processor Trace data: size %zu bytes\n", 155 len); 156 157 while (len) { 158 ret = intel_pt_get_packet(buf, len, &packet); 159 if (ret > 0) 160 pkt_len = ret; 161 else 162 pkt_len = 1; 163 printf("."); 164 color_fprintf(stdout, color, " %08x: ", pos); 165 for (i = 0; i < pkt_len; i++) 166 color_fprintf(stdout, color, " %02x", buf[i]); 167 for (; i < 16; i++) 168 color_fprintf(stdout, color, " "); 169 if (ret > 0) { 170 ret = intel_pt_pkt_desc(&packet, desc, 171 INTEL_PT_PKT_DESC_MAX); 172 if (ret > 0) 173 color_fprintf(stdout, color, " %s\n", desc); 174 } else { 175 color_fprintf(stdout, color, " Bad packet!\n"); 176 } 177 pos += pkt_len; 178 buf += pkt_len; 179 len -= pkt_len; 180 } 181 } 182 183 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, 184 size_t len) 185 { 186 printf(".\n"); 187 intel_pt_dump(pt, buf, len); 188 } 189 190 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, 191 struct auxtrace_buffer *b) 192 { 193 void *start; 194 195 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, 196 pt->have_tsc); 197 if (!start) 198 return -EINVAL; 199 b->use_size = b->data + b->size - start; 200 b->use_data = start; 201 return 0; 202 } 203 204 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, 205 struct auxtrace_queue *queue, 206 struct auxtrace_buffer *buffer) 207 { 208 if (queue->cpu == -1 && buffer->cpu != -1) 209 ptq->cpu = buffer->cpu; 210 211 ptq->pid = buffer->pid; 212 ptq->tid = buffer->tid; 213 214 intel_pt_log("queue %u cpu %d pid %d tid %d\n", 215 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 216 217 thread__zput(ptq->thread); 218 219 if (ptq->tid != -1) { 220 if (ptq->pid != -1) 221 ptq->thread = machine__findnew_thread(ptq->pt->machine, 222 ptq->pid, 223 ptq->tid); 224 else 225 ptq->thread = machine__find_thread(ptq->pt->machine, -1, 226 ptq->tid); 227 } 228 } 229 230 /* This function assumes data is processed sequentially only */ 231 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 232 { 233 struct intel_pt_queue *ptq = data; 234 struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; 235 struct auxtrace_queue *queue; 236 237 if (ptq->stop) { 238 b->len = 0; 239 return 0; 240 } 241 242 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 243 244 buffer = auxtrace_buffer__next(queue, buffer); 245 if (!buffer) { 246 if (old_buffer) 247 auxtrace_buffer__drop_data(old_buffer); 248 b->len = 0; 249 return 0; 250 } 251 252 ptq->buffer = buffer; 253 254 if (!buffer->data) { 255 int fd = perf_data_file__fd(ptq->pt->session->file); 256 257 buffer->data = auxtrace_buffer__get_data(buffer, fd); 258 if (!buffer->data) 259 return -ENOMEM; 260 } 261 262 if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && 263 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) 264 return -ENOMEM; 265 266 if (old_buffer) 267 auxtrace_buffer__drop_data(old_buffer); 268 269 if (buffer->use_data) { 270 b->len = buffer->use_size; 271 b->buf = buffer->use_data; 272 } else { 273 b->len = buffer->size; 274 b->buf = buffer->data; 275 } 276 b->ref_timestamp = buffer->reference; 277 278 if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && 279 !buffer->consecutive)) { 280 b->consecutive = false; 281 b->trace_nr = buffer->buffer_nr + 1; 282 } else { 283 b->consecutive = true; 284 } 285 286 if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || 287 ptq->tid != buffer->tid)) 288 intel_pt_use_buffer_pid_tid(ptq, queue, buffer); 289 290 if (ptq->step_through_buffers) 291 ptq->stop = true; 292 293 if (!b->len) 294 return intel_pt_get_trace(b, data); 295 296 return 0; 297 } 298 299 struct intel_pt_cache_entry { 300 struct auxtrace_cache_entry entry; 301 u64 insn_cnt; 302 u64 byte_cnt; 303 enum intel_pt_insn_op op; 304 enum intel_pt_insn_branch branch; 305 int length; 306 int32_t rel; 307 }; 308 309 static int intel_pt_config_div(const char *var, const char *value, void *data) 310 { 311 int *d = data; 312 long val; 313 314 if (!strcmp(var, "intel-pt.cache-divisor")) { 315 val = strtol(value, NULL, 0); 316 if (val > 0 && val <= INT_MAX) 317 *d = val; 318 } 319 320 return 0; 321 } 322 323 static int intel_pt_cache_divisor(void) 324 { 325 static int d; 326 327 if (d) 328 return d; 329 330 perf_config(intel_pt_config_div, &d); 331 332 if (!d) 333 d = 64; 334 335 return d; 336 } 337 338 static unsigned int intel_pt_cache_size(struct dso *dso, 339 struct machine *machine) 340 { 341 off_t size; 342 343 size = dso__data_size(dso, machine); 344 size /= intel_pt_cache_divisor(); 345 if (size < 1000) 346 return 10; 347 if (size > (1 << 21)) 348 return 21; 349 return 32 - __builtin_clz(size); 350 } 351 352 static struct auxtrace_cache *intel_pt_cache(struct dso *dso, 353 struct machine *machine) 354 { 355 struct auxtrace_cache *c; 356 unsigned int bits; 357 358 if (dso->auxtrace_cache) 359 return dso->auxtrace_cache; 360 361 bits = intel_pt_cache_size(dso, machine); 362 363 /* Ignoring cache creation failure */ 364 c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); 365 366 dso->auxtrace_cache = c; 367 368 return c; 369 } 370 371 static int intel_pt_cache_add(struct dso *dso, struct machine *machine, 372 u64 offset, u64 insn_cnt, u64 byte_cnt, 373 struct intel_pt_insn *intel_pt_insn) 374 { 375 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 376 struct intel_pt_cache_entry *e; 377 int err; 378 379 if (!c) 380 return -ENOMEM; 381 382 e = auxtrace_cache__alloc_entry(c); 383 if (!e) 384 return -ENOMEM; 385 386 e->insn_cnt = insn_cnt; 387 e->byte_cnt = byte_cnt; 388 e->op = intel_pt_insn->op; 389 e->branch = intel_pt_insn->branch; 390 e->length = intel_pt_insn->length; 391 e->rel = intel_pt_insn->rel; 392 393 err = auxtrace_cache__add(c, offset, &e->entry); 394 if (err) 395 auxtrace_cache__free_entry(c, e); 396 397 return err; 398 } 399 400 static struct intel_pt_cache_entry * 401 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) 402 { 403 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 404 405 if (!c) 406 return NULL; 407 408 return auxtrace_cache__lookup(dso->auxtrace_cache, offset); 409 } 410 411 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, 412 uint64_t *insn_cnt_ptr, uint64_t *ip, 413 uint64_t to_ip, uint64_t max_insn_cnt, 414 void *data) 415 { 416 struct intel_pt_queue *ptq = data; 417 struct machine *machine = ptq->pt->machine; 418 struct thread *thread; 419 struct addr_location al; 420 unsigned char buf[1024]; 421 size_t bufsz; 422 ssize_t len; 423 int x86_64; 424 u8 cpumode; 425 u64 offset, start_offset, start_ip; 426 u64 insn_cnt = 0; 427 bool one_map = true; 428 429 if (to_ip && *ip == to_ip) 430 goto out_no_cache; 431 432 bufsz = intel_pt_insn_max_size(); 433 434 if (*ip >= ptq->pt->kernel_start) 435 cpumode = PERF_RECORD_MISC_KERNEL; 436 else 437 cpumode = PERF_RECORD_MISC_USER; 438 439 thread = ptq->thread; 440 if (!thread) { 441 if (cpumode != PERF_RECORD_MISC_KERNEL) 442 return -EINVAL; 443 thread = ptq->pt->unknown_thread; 444 } 445 446 while (1) { 447 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); 448 if (!al.map || !al.map->dso) 449 return -EINVAL; 450 451 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 452 dso__data_status_seen(al.map->dso, 453 DSO_DATA_STATUS_SEEN_ITRACE)) 454 return -ENOENT; 455 456 offset = al.map->map_ip(al.map, *ip); 457 458 if (!to_ip && one_map) { 459 struct intel_pt_cache_entry *e; 460 461 e = intel_pt_cache_lookup(al.map->dso, machine, offset); 462 if (e && 463 (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { 464 *insn_cnt_ptr = e->insn_cnt; 465 *ip += e->byte_cnt; 466 intel_pt_insn->op = e->op; 467 intel_pt_insn->branch = e->branch; 468 intel_pt_insn->length = e->length; 469 intel_pt_insn->rel = e->rel; 470 intel_pt_log_insn_no_data(intel_pt_insn, *ip); 471 return 0; 472 } 473 } 474 475 start_offset = offset; 476 start_ip = *ip; 477 478 /* Load maps to ensure dso->is_64_bit has been updated */ 479 map__load(al.map, machine->symbol_filter); 480 481 x86_64 = al.map->dso->is_64_bit; 482 483 while (1) { 484 len = dso__data_read_offset(al.map->dso, machine, 485 offset, buf, bufsz); 486 if (len <= 0) 487 return -EINVAL; 488 489 if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) 490 return -EINVAL; 491 492 intel_pt_log_insn(intel_pt_insn, *ip); 493 494 insn_cnt += 1; 495 496 if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) 497 goto out; 498 499 if (max_insn_cnt && insn_cnt >= max_insn_cnt) 500 goto out_no_cache; 501 502 *ip += intel_pt_insn->length; 503 504 if (to_ip && *ip == to_ip) 505 goto out_no_cache; 506 507 if (*ip >= al.map->end) 508 break; 509 510 offset += intel_pt_insn->length; 511 } 512 one_map = false; 513 } 514 out: 515 *insn_cnt_ptr = insn_cnt; 516 517 if (!one_map) 518 goto out_no_cache; 519 520 /* 521 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate 522 * entries. 523 */ 524 if (to_ip) { 525 struct intel_pt_cache_entry *e; 526 527 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); 528 if (e) 529 return 0; 530 } 531 532 /* Ignore cache errors */ 533 intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, 534 *ip - start_ip, intel_pt_insn); 535 536 return 0; 537 538 out_no_cache: 539 *insn_cnt_ptr = insn_cnt; 540 return 0; 541 } 542 543 static bool intel_pt_get_config(struct intel_pt *pt, 544 struct perf_event_attr *attr, u64 *config) 545 { 546 if (attr->type == pt->pmu_type) { 547 if (config) 548 *config = attr->config; 549 return true; 550 } 551 552 return false; 553 } 554 555 static bool intel_pt_exclude_kernel(struct intel_pt *pt) 556 { 557 struct perf_evsel *evsel; 558 559 evlist__for_each(pt->session->evlist, evsel) { 560 if (intel_pt_get_config(pt, &evsel->attr, NULL) && 561 !evsel->attr.exclude_kernel) 562 return false; 563 } 564 return true; 565 } 566 567 static bool intel_pt_return_compression(struct intel_pt *pt) 568 { 569 struct perf_evsel *evsel; 570 u64 config; 571 572 if (!pt->noretcomp_bit) 573 return true; 574 575 evlist__for_each(pt->session->evlist, evsel) { 576 if (intel_pt_get_config(pt, &evsel->attr, &config) && 577 (config & pt->noretcomp_bit)) 578 return false; 579 } 580 return true; 581 } 582 583 static unsigned int intel_pt_mtc_period(struct intel_pt *pt) 584 { 585 struct perf_evsel *evsel; 586 unsigned int shift; 587 u64 config; 588 589 if (!pt->mtc_freq_bits) 590 return 0; 591 592 for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) 593 config >>= 1; 594 595 evlist__for_each(pt->session->evlist, evsel) { 596 if (intel_pt_get_config(pt, &evsel->attr, &config)) 597 return (config & pt->mtc_freq_bits) >> shift; 598 } 599 return 0; 600 } 601 602 static bool intel_pt_timeless_decoding(struct intel_pt *pt) 603 { 604 struct perf_evsel *evsel; 605 bool timeless_decoding = true; 606 u64 config; 607 608 if (!pt->tsc_bit || !pt->cap_user_time_zero) 609 return true; 610 611 evlist__for_each(pt->session->evlist, evsel) { 612 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) 613 return true; 614 if (intel_pt_get_config(pt, &evsel->attr, &config)) { 615 if (config & pt->tsc_bit) 616 timeless_decoding = false; 617 else 618 return true; 619 } 620 } 621 return timeless_decoding; 622 } 623 624 static bool intel_pt_tracing_kernel(struct intel_pt *pt) 625 { 626 struct perf_evsel *evsel; 627 628 evlist__for_each(pt->session->evlist, evsel) { 629 if (intel_pt_get_config(pt, &evsel->attr, NULL) && 630 !evsel->attr.exclude_kernel) 631 return true; 632 } 633 return false; 634 } 635 636 static bool intel_pt_have_tsc(struct intel_pt *pt) 637 { 638 struct perf_evsel *evsel; 639 bool have_tsc = false; 640 u64 config; 641 642 if (!pt->tsc_bit) 643 return false; 644 645 evlist__for_each(pt->session->evlist, evsel) { 646 if (intel_pt_get_config(pt, &evsel->attr, &config)) { 647 if (config & pt->tsc_bit) 648 have_tsc = true; 649 else 650 return false; 651 } 652 } 653 return have_tsc; 654 } 655 656 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) 657 { 658 u64 quot, rem; 659 660 quot = ns / pt->tc.time_mult; 661 rem = ns % pt->tc.time_mult; 662 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / 663 pt->tc.time_mult; 664 } 665 666 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, 667 unsigned int queue_nr) 668 { 669 struct intel_pt_params params = { .get_trace = 0, }; 670 struct intel_pt_queue *ptq; 671 672 ptq = zalloc(sizeof(struct intel_pt_queue)); 673 if (!ptq) 674 return NULL; 675 676 if (pt->synth_opts.callchain) { 677 size_t sz = sizeof(struct ip_callchain); 678 679 sz += pt->synth_opts.callchain_sz * sizeof(u64); 680 ptq->chain = zalloc(sz); 681 if (!ptq->chain) 682 goto out_free; 683 } 684 685 if (pt->synth_opts.last_branch) { 686 size_t sz = sizeof(struct branch_stack); 687 688 sz += pt->synth_opts.last_branch_sz * 689 sizeof(struct branch_entry); 690 ptq->last_branch = zalloc(sz); 691 if (!ptq->last_branch) 692 goto out_free; 693 ptq->last_branch_rb = zalloc(sz); 694 if (!ptq->last_branch_rb) 695 goto out_free; 696 } 697 698 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 699 if (!ptq->event_buf) 700 goto out_free; 701 702 ptq->pt = pt; 703 ptq->queue_nr = queue_nr; 704 ptq->exclude_kernel = intel_pt_exclude_kernel(pt); 705 ptq->pid = -1; 706 ptq->tid = -1; 707 ptq->cpu = -1; 708 ptq->next_tid = -1; 709 710 params.get_trace = intel_pt_get_trace; 711 params.walk_insn = intel_pt_walk_next_insn; 712 params.data = ptq; 713 params.return_compression = intel_pt_return_compression(pt); 714 params.max_non_turbo_ratio = pt->max_non_turbo_ratio; 715 params.mtc_period = intel_pt_mtc_period(pt); 716 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; 717 params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; 718 719 if (pt->synth_opts.instructions) { 720 if (pt->synth_opts.period) { 721 switch (pt->synth_opts.period_type) { 722 case PERF_ITRACE_PERIOD_INSTRUCTIONS: 723 params.period_type = 724 INTEL_PT_PERIOD_INSTRUCTIONS; 725 params.period = pt->synth_opts.period; 726 break; 727 case PERF_ITRACE_PERIOD_TICKS: 728 params.period_type = INTEL_PT_PERIOD_TICKS; 729 params.period = pt->synth_opts.period; 730 break; 731 case PERF_ITRACE_PERIOD_NANOSECS: 732 params.period_type = INTEL_PT_PERIOD_TICKS; 733 params.period = intel_pt_ns_to_ticks(pt, 734 pt->synth_opts.period); 735 break; 736 default: 737 break; 738 } 739 } 740 741 if (!params.period) { 742 params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; 743 params.period = 1; 744 } 745 } 746 747 ptq->decoder = intel_pt_decoder_new(¶ms); 748 if (!ptq->decoder) 749 goto out_free; 750 751 return ptq; 752 753 out_free: 754 zfree(&ptq->event_buf); 755 zfree(&ptq->last_branch); 756 zfree(&ptq->last_branch_rb); 757 zfree(&ptq->chain); 758 free(ptq); 759 return NULL; 760 } 761 762 static void intel_pt_free_queue(void *priv) 763 { 764 struct intel_pt_queue *ptq = priv; 765 766 if (!ptq) 767 return; 768 thread__zput(ptq->thread); 769 intel_pt_decoder_free(ptq->decoder); 770 zfree(&ptq->event_buf); 771 zfree(&ptq->last_branch); 772 zfree(&ptq->last_branch_rb); 773 zfree(&ptq->chain); 774 free(ptq); 775 } 776 777 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, 778 struct auxtrace_queue *queue) 779 { 780 struct intel_pt_queue *ptq = queue->priv; 781 782 if (queue->tid == -1 || pt->have_sched_switch) { 783 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); 784 thread__zput(ptq->thread); 785 } 786 787 if (!ptq->thread && ptq->tid != -1) 788 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); 789 790 if (ptq->thread) { 791 ptq->pid = ptq->thread->pid_; 792 if (queue->cpu == -1) 793 ptq->cpu = ptq->thread->cpu; 794 } 795 } 796 797 static void intel_pt_sample_flags(struct intel_pt_queue *ptq) 798 { 799 if (ptq->state->flags & INTEL_PT_ABORT_TX) { 800 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; 801 } else if (ptq->state->flags & INTEL_PT_ASYNC) { 802 if (ptq->state->to_ip) 803 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | 804 PERF_IP_FLAG_ASYNC | 805 PERF_IP_FLAG_INTERRUPT; 806 else 807 ptq->flags = PERF_IP_FLAG_BRANCH | 808 PERF_IP_FLAG_TRACE_END; 809 ptq->insn_len = 0; 810 } else { 811 if (ptq->state->from_ip) 812 ptq->flags = intel_pt_insn_type(ptq->state->insn_op); 813 else 814 ptq->flags = PERF_IP_FLAG_BRANCH | 815 PERF_IP_FLAG_TRACE_BEGIN; 816 if (ptq->state->flags & INTEL_PT_IN_TX) 817 ptq->flags |= PERF_IP_FLAG_IN_TX; 818 ptq->insn_len = ptq->state->insn_len; 819 } 820 } 821 822 static int intel_pt_setup_queue(struct intel_pt *pt, 823 struct auxtrace_queue *queue, 824 unsigned int queue_nr) 825 { 826 struct intel_pt_queue *ptq = queue->priv; 827 828 if (list_empty(&queue->head)) 829 return 0; 830 831 if (!ptq) { 832 ptq = intel_pt_alloc_queue(pt, queue_nr); 833 if (!ptq) 834 return -ENOMEM; 835 queue->priv = ptq; 836 837 if (queue->cpu != -1) 838 ptq->cpu = queue->cpu; 839 ptq->tid = queue->tid; 840 841 if (pt->sampling_mode) { 842 if (pt->timeless_decoding) 843 ptq->step_through_buffers = true; 844 if (pt->timeless_decoding || !pt->have_sched_switch) 845 ptq->use_buffer_pid_tid = true; 846 } 847 } 848 849 if (!ptq->on_heap && 850 (!pt->sync_switch || 851 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { 852 const struct intel_pt_state *state; 853 int ret; 854 855 if (pt->timeless_decoding) 856 return 0; 857 858 intel_pt_log("queue %u getting timestamp\n", queue_nr); 859 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 860 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 861 while (1) { 862 state = intel_pt_decode(ptq->decoder); 863 if (state->err) { 864 if (state->err == INTEL_PT_ERR_NODATA) { 865 intel_pt_log("queue %u has no timestamp\n", 866 queue_nr); 867 return 0; 868 } 869 continue; 870 } 871 if (state->timestamp) 872 break; 873 } 874 875 ptq->timestamp = state->timestamp; 876 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", 877 queue_nr, ptq->timestamp); 878 ptq->state = state; 879 ptq->have_sample = true; 880 intel_pt_sample_flags(ptq); 881 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 882 if (ret) 883 return ret; 884 ptq->on_heap = true; 885 } 886 887 return 0; 888 } 889 890 static int intel_pt_setup_queues(struct intel_pt *pt) 891 { 892 unsigned int i; 893 int ret; 894 895 for (i = 0; i < pt->queues.nr_queues; i++) { 896 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); 897 if (ret) 898 return ret; 899 } 900 return 0; 901 } 902 903 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) 904 { 905 struct branch_stack *bs_src = ptq->last_branch_rb; 906 struct branch_stack *bs_dst = ptq->last_branch; 907 size_t nr = 0; 908 909 bs_dst->nr = bs_src->nr; 910 911 if (!bs_src->nr) 912 return; 913 914 nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; 915 memcpy(&bs_dst->entries[0], 916 &bs_src->entries[ptq->last_branch_pos], 917 sizeof(struct branch_entry) * nr); 918 919 if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { 920 memcpy(&bs_dst->entries[nr], 921 &bs_src->entries[0], 922 sizeof(struct branch_entry) * ptq->last_branch_pos); 923 } 924 } 925 926 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) 927 { 928 ptq->last_branch_pos = 0; 929 ptq->last_branch_rb->nr = 0; 930 } 931 932 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) 933 { 934 const struct intel_pt_state *state = ptq->state; 935 struct branch_stack *bs = ptq->last_branch_rb; 936 struct branch_entry *be; 937 938 if (!ptq->last_branch_pos) 939 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; 940 941 ptq->last_branch_pos -= 1; 942 943 be = &bs->entries[ptq->last_branch_pos]; 944 be->from = state->from_ip; 945 be->to = state->to_ip; 946 be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); 947 be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); 948 /* No support for mispredict */ 949 be->flags.mispred = ptq->pt->mispred_all; 950 951 if (bs->nr < ptq->pt->synth_opts.last_branch_sz) 952 bs->nr += 1; 953 } 954 955 static int intel_pt_inject_event(union perf_event *event, 956 struct perf_sample *sample, u64 type, 957 bool swapped) 958 { 959 event->header.size = perf_event__sample_event_size(sample, type, 0); 960 return perf_event__synthesize_sample(event, type, 0, sample, swapped); 961 } 962 963 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) 964 { 965 int ret; 966 struct intel_pt *pt = ptq->pt; 967 union perf_event *event = ptq->event_buf; 968 struct perf_sample sample = { .ip = 0, }; 969 struct dummy_branch_stack { 970 u64 nr; 971 struct branch_entry entries; 972 } dummy_bs; 973 974 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 975 return 0; 976 977 if (pt->synth_opts.initial_skip && 978 pt->num_events++ < pt->synth_opts.initial_skip) 979 return 0; 980 981 event->sample.header.type = PERF_RECORD_SAMPLE; 982 event->sample.header.misc = PERF_RECORD_MISC_USER; 983 event->sample.header.size = sizeof(struct perf_event_header); 984 985 if (!pt->timeless_decoding) 986 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 987 988 sample.cpumode = PERF_RECORD_MISC_USER; 989 sample.ip = ptq->state->from_ip; 990 sample.pid = ptq->pid; 991 sample.tid = ptq->tid; 992 sample.addr = ptq->state->to_ip; 993 sample.id = ptq->pt->branches_id; 994 sample.stream_id = ptq->pt->branches_id; 995 sample.period = 1; 996 sample.cpu = ptq->cpu; 997 sample.flags = ptq->flags; 998 sample.insn_len = ptq->insn_len; 999 1000 /* 1001 * perf report cannot handle events without a branch stack when using 1002 * SORT_MODE__BRANCH so make a dummy one. 1003 */ 1004 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { 1005 dummy_bs = (struct dummy_branch_stack){ 1006 .nr = 1, 1007 .entries = { 1008 .from = sample.ip, 1009 .to = sample.addr, 1010 }, 1011 }; 1012 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1013 } 1014 1015 if (pt->synth_opts.inject) { 1016 ret = intel_pt_inject_event(event, &sample, 1017 pt->branches_sample_type, 1018 pt->synth_needs_swap); 1019 if (ret) 1020 return ret; 1021 } 1022 1023 ret = perf_session__deliver_synth_event(pt->session, event, &sample); 1024 if (ret) 1025 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", 1026 ret); 1027 1028 return ret; 1029 } 1030 1031 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) 1032 { 1033 int ret; 1034 struct intel_pt *pt = ptq->pt; 1035 union perf_event *event = ptq->event_buf; 1036 struct perf_sample sample = { .ip = 0, }; 1037 1038 if (pt->synth_opts.initial_skip && 1039 pt->num_events++ < pt->synth_opts.initial_skip) 1040 return 0; 1041 1042 event->sample.header.type = PERF_RECORD_SAMPLE; 1043 event->sample.header.misc = PERF_RECORD_MISC_USER; 1044 event->sample.header.size = sizeof(struct perf_event_header); 1045 1046 if (!pt->timeless_decoding) 1047 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1048 1049 sample.cpumode = PERF_RECORD_MISC_USER; 1050 sample.ip = ptq->state->from_ip; 1051 sample.pid = ptq->pid; 1052 sample.tid = ptq->tid; 1053 sample.addr = ptq->state->to_ip; 1054 sample.id = ptq->pt->instructions_id; 1055 sample.stream_id = ptq->pt->instructions_id; 1056 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; 1057 sample.cpu = ptq->cpu; 1058 sample.flags = ptq->flags; 1059 sample.insn_len = ptq->insn_len; 1060 1061 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1062 1063 if (pt->synth_opts.callchain) { 1064 thread_stack__sample(ptq->thread, ptq->chain, 1065 pt->synth_opts.callchain_sz, sample.ip); 1066 sample.callchain = ptq->chain; 1067 } 1068 1069 if (pt->synth_opts.last_branch) { 1070 intel_pt_copy_last_branch_rb(ptq); 1071 sample.branch_stack = ptq->last_branch; 1072 } 1073 1074 if (pt->synth_opts.inject) { 1075 ret = intel_pt_inject_event(event, &sample, 1076 pt->instructions_sample_type, 1077 pt->synth_needs_swap); 1078 if (ret) 1079 return ret; 1080 } 1081 1082 ret = perf_session__deliver_synth_event(pt->session, event, &sample); 1083 if (ret) 1084 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", 1085 ret); 1086 1087 if (pt->synth_opts.last_branch) 1088 intel_pt_reset_last_branch_rb(ptq); 1089 1090 return ret; 1091 } 1092 1093 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) 1094 { 1095 int ret; 1096 struct intel_pt *pt = ptq->pt; 1097 union perf_event *event = ptq->event_buf; 1098 struct perf_sample sample = { .ip = 0, }; 1099 1100 if (pt->synth_opts.initial_skip && 1101 pt->num_events++ < pt->synth_opts.initial_skip) 1102 return 0; 1103 1104 event->sample.header.type = PERF_RECORD_SAMPLE; 1105 event->sample.header.misc = PERF_RECORD_MISC_USER; 1106 event->sample.header.size = sizeof(struct perf_event_header); 1107 1108 if (!pt->timeless_decoding) 1109 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1110 1111 sample.cpumode = PERF_RECORD_MISC_USER; 1112 sample.ip = ptq->state->from_ip; 1113 sample.pid = ptq->pid; 1114 sample.tid = ptq->tid; 1115 sample.addr = ptq->state->to_ip; 1116 sample.id = ptq->pt->transactions_id; 1117 sample.stream_id = ptq->pt->transactions_id; 1118 sample.period = 1; 1119 sample.cpu = ptq->cpu; 1120 sample.flags = ptq->flags; 1121 sample.insn_len = ptq->insn_len; 1122 1123 if (pt->synth_opts.callchain) { 1124 thread_stack__sample(ptq->thread, ptq->chain, 1125 pt->synth_opts.callchain_sz, sample.ip); 1126 sample.callchain = ptq->chain; 1127 } 1128 1129 if (pt->synth_opts.last_branch) { 1130 intel_pt_copy_last_branch_rb(ptq); 1131 sample.branch_stack = ptq->last_branch; 1132 } 1133 1134 if (pt->synth_opts.inject) { 1135 ret = intel_pt_inject_event(event, &sample, 1136 pt->transactions_sample_type, 1137 pt->synth_needs_swap); 1138 if (ret) 1139 return ret; 1140 } 1141 1142 ret = perf_session__deliver_synth_event(pt->session, event, &sample); 1143 if (ret) 1144 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", 1145 ret); 1146 1147 if (pt->synth_opts.last_branch) 1148 intel_pt_reset_last_branch_rb(ptq); 1149 1150 return ret; 1151 } 1152 1153 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1154 pid_t pid, pid_t tid, u64 ip) 1155 { 1156 union perf_event event; 1157 char msg[MAX_AUXTRACE_ERROR_MSG]; 1158 int err; 1159 1160 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); 1161 1162 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 1163 code, cpu, pid, tid, ip, msg); 1164 1165 err = perf_session__deliver_synth_event(pt->session, &event, NULL); 1166 if (err) 1167 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", 1168 err); 1169 1170 return err; 1171 } 1172 1173 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) 1174 { 1175 struct auxtrace_queue *queue; 1176 pid_t tid = ptq->next_tid; 1177 int err; 1178 1179 if (tid == -1) 1180 return 0; 1181 1182 intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); 1183 1184 err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); 1185 1186 queue = &pt->queues.queue_array[ptq->queue_nr]; 1187 intel_pt_set_pid_tid_cpu(pt, queue); 1188 1189 ptq->next_tid = -1; 1190 1191 return err; 1192 } 1193 1194 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) 1195 { 1196 struct intel_pt *pt = ptq->pt; 1197 1198 return ip == pt->switch_ip && 1199 (ptq->flags & PERF_IP_FLAG_BRANCH) && 1200 !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | 1201 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); 1202 } 1203 1204 static int intel_pt_sample(struct intel_pt_queue *ptq) 1205 { 1206 const struct intel_pt_state *state = ptq->state; 1207 struct intel_pt *pt = ptq->pt; 1208 int err; 1209 1210 if (!ptq->have_sample) 1211 return 0; 1212 1213 ptq->have_sample = false; 1214 1215 if (pt->sample_instructions && 1216 (state->type & INTEL_PT_INSTRUCTION) && 1217 (!pt->synth_opts.initial_skip || 1218 pt->num_events++ >= pt->synth_opts.initial_skip)) { 1219 err = intel_pt_synth_instruction_sample(ptq); 1220 if (err) 1221 return err; 1222 } 1223 1224 if (pt->sample_transactions && 1225 (state->type & INTEL_PT_TRANSACTION) && 1226 (!pt->synth_opts.initial_skip || 1227 pt->num_events++ >= pt->synth_opts.initial_skip)) { 1228 err = intel_pt_synth_transaction_sample(ptq); 1229 if (err) 1230 return err; 1231 } 1232 1233 if (!(state->type & INTEL_PT_BRANCH)) 1234 return 0; 1235 1236 if (pt->synth_opts.callchain) 1237 thread_stack__event(ptq->thread, ptq->flags, state->from_ip, 1238 state->to_ip, ptq->insn_len, 1239 state->trace_nr); 1240 else 1241 thread_stack__set_trace_nr(ptq->thread, state->trace_nr); 1242 1243 if (pt->sample_branches) { 1244 err = intel_pt_synth_branch_sample(ptq); 1245 if (err) 1246 return err; 1247 } 1248 1249 if (pt->synth_opts.last_branch) 1250 intel_pt_update_last_branch_rb(ptq); 1251 1252 if (!pt->sync_switch) 1253 return 0; 1254 1255 if (intel_pt_is_switch_ip(ptq, state->to_ip)) { 1256 switch (ptq->switch_state) { 1257 case INTEL_PT_SS_UNKNOWN: 1258 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 1259 err = intel_pt_next_tid(pt, ptq); 1260 if (err) 1261 return err; 1262 ptq->switch_state = INTEL_PT_SS_TRACING; 1263 break; 1264 default: 1265 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; 1266 return 1; 1267 } 1268 } else if (!state->to_ip) { 1269 ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 1270 } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { 1271 ptq->switch_state = INTEL_PT_SS_UNKNOWN; 1272 } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && 1273 state->to_ip == pt->ptss_ip && 1274 (ptq->flags & PERF_IP_FLAG_CALL)) { 1275 ptq->switch_state = INTEL_PT_SS_TRACING; 1276 } 1277 1278 return 0; 1279 } 1280 1281 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) 1282 { 1283 struct machine *machine = pt->machine; 1284 struct map *map; 1285 struct symbol *sym, *start; 1286 u64 ip, switch_ip = 0; 1287 const char *ptss; 1288 1289 if (ptss_ip) 1290 *ptss_ip = 0; 1291 1292 map = machine__kernel_map(machine); 1293 if (!map) 1294 return 0; 1295 1296 if (map__load(map, machine->symbol_filter)) 1297 return 0; 1298 1299 start = dso__first_symbol(map->dso, MAP__FUNCTION); 1300 1301 for (sym = start; sym; sym = dso__next_symbol(sym)) { 1302 if (sym->binding == STB_GLOBAL && 1303 !strcmp(sym->name, "__switch_to")) { 1304 ip = map->unmap_ip(map, sym->start); 1305 if (ip >= map->start && ip < map->end) { 1306 switch_ip = ip; 1307 break; 1308 } 1309 } 1310 } 1311 1312 if (!switch_ip || !ptss_ip) 1313 return 0; 1314 1315 if (pt->have_sched_switch == 1) 1316 ptss = "perf_trace_sched_switch"; 1317 else 1318 ptss = "__perf_event_task_sched_out"; 1319 1320 for (sym = start; sym; sym = dso__next_symbol(sym)) { 1321 if (!strcmp(sym->name, ptss)) { 1322 ip = map->unmap_ip(map, sym->start); 1323 if (ip >= map->start && ip < map->end) { 1324 *ptss_ip = ip; 1325 break; 1326 } 1327 } 1328 } 1329 1330 return switch_ip; 1331 } 1332 1333 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 1334 { 1335 const struct intel_pt_state *state = ptq->state; 1336 struct intel_pt *pt = ptq->pt; 1337 int err; 1338 1339 if (!pt->kernel_start) { 1340 pt->kernel_start = machine__kernel_start(pt->machine); 1341 if (pt->per_cpu_mmaps && 1342 (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && 1343 !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && 1344 !pt->sampling_mode) { 1345 pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); 1346 if (pt->switch_ip) { 1347 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", 1348 pt->switch_ip, pt->ptss_ip); 1349 pt->sync_switch = true; 1350 } 1351 } 1352 } 1353 1354 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1355 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 1356 while (1) { 1357 err = intel_pt_sample(ptq); 1358 if (err) 1359 return err; 1360 1361 state = intel_pt_decode(ptq->decoder); 1362 if (state->err) { 1363 if (state->err == INTEL_PT_ERR_NODATA) 1364 return 1; 1365 if (pt->sync_switch && 1366 state->from_ip >= pt->kernel_start) { 1367 pt->sync_switch = false; 1368 intel_pt_next_tid(pt, ptq); 1369 } 1370 if (pt->synth_opts.errors) { 1371 err = intel_pt_synth_error(pt, state->err, 1372 ptq->cpu, ptq->pid, 1373 ptq->tid, 1374 state->from_ip); 1375 if (err) 1376 return err; 1377 } 1378 continue; 1379 } 1380 1381 ptq->state = state; 1382 ptq->have_sample = true; 1383 intel_pt_sample_flags(ptq); 1384 1385 /* Use estimated TSC upon return to user space */ 1386 if (pt->est_tsc && 1387 (state->from_ip >= pt->kernel_start || !state->from_ip) && 1388 state->to_ip && state->to_ip < pt->kernel_start) { 1389 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 1390 state->timestamp, state->est_timestamp); 1391 ptq->timestamp = state->est_timestamp; 1392 /* Use estimated TSC in unknown switch state */ 1393 } else if (pt->sync_switch && 1394 ptq->switch_state == INTEL_PT_SS_UNKNOWN && 1395 intel_pt_is_switch_ip(ptq, state->to_ip) && 1396 ptq->next_tid == -1) { 1397 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 1398 state->timestamp, state->est_timestamp); 1399 ptq->timestamp = state->est_timestamp; 1400 } else if (state->timestamp > ptq->timestamp) { 1401 ptq->timestamp = state->timestamp; 1402 } 1403 1404 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { 1405 *timestamp = ptq->timestamp; 1406 return 0; 1407 } 1408 } 1409 return 0; 1410 } 1411 1412 static inline int intel_pt_update_queues(struct intel_pt *pt) 1413 { 1414 if (pt->queues.new_data) { 1415 pt->queues.new_data = false; 1416 return intel_pt_setup_queues(pt); 1417 } 1418 return 0; 1419 } 1420 1421 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) 1422 { 1423 unsigned int queue_nr; 1424 u64 ts; 1425 int ret; 1426 1427 while (1) { 1428 struct auxtrace_queue *queue; 1429 struct intel_pt_queue *ptq; 1430 1431 if (!pt->heap.heap_cnt) 1432 return 0; 1433 1434 if (pt->heap.heap_array[0].ordinal >= timestamp) 1435 return 0; 1436 1437 queue_nr = pt->heap.heap_array[0].queue_nr; 1438 queue = &pt->queues.queue_array[queue_nr]; 1439 ptq = queue->priv; 1440 1441 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", 1442 queue_nr, pt->heap.heap_array[0].ordinal, 1443 timestamp); 1444 1445 auxtrace_heap__pop(&pt->heap); 1446 1447 if (pt->heap.heap_cnt) { 1448 ts = pt->heap.heap_array[0].ordinal + 1; 1449 if (ts > timestamp) 1450 ts = timestamp; 1451 } else { 1452 ts = timestamp; 1453 } 1454 1455 intel_pt_set_pid_tid_cpu(pt, queue); 1456 1457 ret = intel_pt_run_decoder(ptq, &ts); 1458 1459 if (ret < 0) { 1460 auxtrace_heap__add(&pt->heap, queue_nr, ts); 1461 return ret; 1462 } 1463 1464 if (!ret) { 1465 ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); 1466 if (ret < 0) 1467 return ret; 1468 } else { 1469 ptq->on_heap = false; 1470 } 1471 } 1472 1473 return 0; 1474 } 1475 1476 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, 1477 u64 time_) 1478 { 1479 struct auxtrace_queues *queues = &pt->queues; 1480 unsigned int i; 1481 u64 ts = 0; 1482 1483 for (i = 0; i < queues->nr_queues; i++) { 1484 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 1485 struct intel_pt_queue *ptq = queue->priv; 1486 1487 if (ptq && (tid == -1 || ptq->tid == tid)) { 1488 ptq->time = time_; 1489 intel_pt_set_pid_tid_cpu(pt, queue); 1490 intel_pt_run_decoder(ptq, &ts); 1491 } 1492 } 1493 return 0; 1494 } 1495 1496 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) 1497 { 1498 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, 1499 sample->pid, sample->tid, 0); 1500 } 1501 1502 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) 1503 { 1504 unsigned i, j; 1505 1506 if (cpu < 0 || !pt->queues.nr_queues) 1507 return NULL; 1508 1509 if ((unsigned)cpu >= pt->queues.nr_queues) 1510 i = pt->queues.nr_queues - 1; 1511 else 1512 i = cpu; 1513 1514 if (pt->queues.queue_array[i].cpu == cpu) 1515 return pt->queues.queue_array[i].priv; 1516 1517 for (j = 0; i > 0; j++) { 1518 if (pt->queues.queue_array[--i].cpu == cpu) 1519 return pt->queues.queue_array[i].priv; 1520 } 1521 1522 for (; j < pt->queues.nr_queues; j++) { 1523 if (pt->queues.queue_array[j].cpu == cpu) 1524 return pt->queues.queue_array[j].priv; 1525 } 1526 1527 return NULL; 1528 } 1529 1530 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, 1531 u64 timestamp) 1532 { 1533 struct intel_pt_queue *ptq; 1534 int err; 1535 1536 if (!pt->sync_switch) 1537 return 1; 1538 1539 ptq = intel_pt_cpu_to_ptq(pt, cpu); 1540 if (!ptq) 1541 return 1; 1542 1543 switch (ptq->switch_state) { 1544 case INTEL_PT_SS_NOT_TRACING: 1545 ptq->next_tid = -1; 1546 break; 1547 case INTEL_PT_SS_UNKNOWN: 1548 case INTEL_PT_SS_TRACING: 1549 ptq->next_tid = tid; 1550 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; 1551 return 0; 1552 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 1553 if (!ptq->on_heap) { 1554 ptq->timestamp = perf_time_to_tsc(timestamp, 1555 &pt->tc); 1556 err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, 1557 ptq->timestamp); 1558 if (err) 1559 return err; 1560 ptq->on_heap = true; 1561 } 1562 ptq->switch_state = INTEL_PT_SS_TRACING; 1563 break; 1564 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 1565 ptq->next_tid = tid; 1566 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); 1567 break; 1568 default: 1569 break; 1570 } 1571 1572 return 1; 1573 } 1574 1575 static int intel_pt_process_switch(struct intel_pt *pt, 1576 struct perf_sample *sample) 1577 { 1578 struct perf_evsel *evsel; 1579 pid_t tid; 1580 int cpu, ret; 1581 1582 evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); 1583 if (evsel != pt->switch_evsel) 1584 return 0; 1585 1586 tid = perf_evsel__intval(evsel, sample, "next_pid"); 1587 cpu = sample->cpu; 1588 1589 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 1590 cpu, tid, sample->time, perf_time_to_tsc(sample->time, 1591 &pt->tc)); 1592 1593 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 1594 if (ret <= 0) 1595 return ret; 1596 1597 return machine__set_current_tid(pt->machine, cpu, -1, tid); 1598 } 1599 1600 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, 1601 struct perf_sample *sample) 1602 { 1603 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1604 pid_t pid, tid; 1605 int cpu, ret; 1606 1607 cpu = sample->cpu; 1608 1609 if (pt->have_sched_switch == 3) { 1610 if (!out) 1611 return 0; 1612 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { 1613 pr_err("Expecting CPU-wide context switch event\n"); 1614 return -EINVAL; 1615 } 1616 pid = event->context_switch.next_prev_pid; 1617 tid = event->context_switch.next_prev_tid; 1618 } else { 1619 if (out) 1620 return 0; 1621 pid = sample->pid; 1622 tid = sample->tid; 1623 } 1624 1625 if (tid == -1) { 1626 pr_err("context_switch event has no tid\n"); 1627 return -EINVAL; 1628 } 1629 1630 intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 1631 cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, 1632 &pt->tc)); 1633 1634 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 1635 if (ret <= 0) 1636 return ret; 1637 1638 return machine__set_current_tid(pt->machine, cpu, pid, tid); 1639 } 1640 1641 static int intel_pt_process_itrace_start(struct intel_pt *pt, 1642 union perf_event *event, 1643 struct perf_sample *sample) 1644 { 1645 if (!pt->per_cpu_mmaps) 1646 return 0; 1647 1648 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 1649 sample->cpu, event->itrace_start.pid, 1650 event->itrace_start.tid, sample->time, 1651 perf_time_to_tsc(sample->time, &pt->tc)); 1652 1653 return machine__set_current_tid(pt->machine, sample->cpu, 1654 event->itrace_start.pid, 1655 event->itrace_start.tid); 1656 } 1657 1658 static int intel_pt_process_event(struct perf_session *session, 1659 union perf_event *event, 1660 struct perf_sample *sample, 1661 struct perf_tool *tool) 1662 { 1663 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1664 auxtrace); 1665 u64 timestamp; 1666 int err = 0; 1667 1668 if (dump_trace) 1669 return 0; 1670 1671 if (!tool->ordered_events) { 1672 pr_err("Intel Processor Trace requires ordered events\n"); 1673 return -EINVAL; 1674 } 1675 1676 if (sample->time && sample->time != (u64)-1) 1677 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 1678 else 1679 timestamp = 0; 1680 1681 if (timestamp || pt->timeless_decoding) { 1682 err = intel_pt_update_queues(pt); 1683 if (err) 1684 return err; 1685 } 1686 1687 if (pt->timeless_decoding) { 1688 if (event->header.type == PERF_RECORD_EXIT) { 1689 err = intel_pt_process_timeless_queues(pt, 1690 event->fork.tid, 1691 sample->time); 1692 } 1693 } else if (timestamp) { 1694 err = intel_pt_process_queues(pt, timestamp); 1695 } 1696 if (err) 1697 return err; 1698 1699 if (event->header.type == PERF_RECORD_AUX && 1700 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && 1701 pt->synth_opts.errors) { 1702 err = intel_pt_lost(pt, sample); 1703 if (err) 1704 return err; 1705 } 1706 1707 if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) 1708 err = intel_pt_process_switch(pt, sample); 1709 else if (event->header.type == PERF_RECORD_ITRACE_START) 1710 err = intel_pt_process_itrace_start(pt, event, sample); 1711 else if (event->header.type == PERF_RECORD_SWITCH || 1712 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 1713 err = intel_pt_context_switch(pt, event, sample); 1714 1715 intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", 1716 perf_event__name(event->header.type), event->header.type, 1717 sample->cpu, sample->time, timestamp); 1718 1719 return err; 1720 } 1721 1722 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) 1723 { 1724 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1725 auxtrace); 1726 int ret; 1727 1728 if (dump_trace) 1729 return 0; 1730 1731 if (!tool->ordered_events) 1732 return -EINVAL; 1733 1734 ret = intel_pt_update_queues(pt); 1735 if (ret < 0) 1736 return ret; 1737 1738 if (pt->timeless_decoding) 1739 return intel_pt_process_timeless_queues(pt, -1, 1740 MAX_TIMESTAMP - 1); 1741 1742 return intel_pt_process_queues(pt, MAX_TIMESTAMP); 1743 } 1744 1745 static void intel_pt_free_events(struct perf_session *session) 1746 { 1747 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1748 auxtrace); 1749 struct auxtrace_queues *queues = &pt->queues; 1750 unsigned int i; 1751 1752 for (i = 0; i < queues->nr_queues; i++) { 1753 intel_pt_free_queue(queues->queue_array[i].priv); 1754 queues->queue_array[i].priv = NULL; 1755 } 1756 intel_pt_log_disable(); 1757 auxtrace_queues__free(queues); 1758 } 1759 1760 static void intel_pt_free(struct perf_session *session) 1761 { 1762 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1763 auxtrace); 1764 1765 auxtrace_heap__free(&pt->heap); 1766 intel_pt_free_events(session); 1767 session->auxtrace = NULL; 1768 thread__put(pt->unknown_thread); 1769 free(pt); 1770 } 1771 1772 static int intel_pt_process_auxtrace_event(struct perf_session *session, 1773 union perf_event *event, 1774 struct perf_tool *tool __maybe_unused) 1775 { 1776 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1777 auxtrace); 1778 1779 if (pt->sampling_mode) 1780 return 0; 1781 1782 if (!pt->data_queued) { 1783 struct auxtrace_buffer *buffer; 1784 off_t data_offset; 1785 int fd = perf_data_file__fd(session->file); 1786 int err; 1787 1788 if (perf_data_file__is_pipe(session->file)) { 1789 data_offset = 0; 1790 } else { 1791 data_offset = lseek(fd, 0, SEEK_CUR); 1792 if (data_offset == -1) 1793 return -errno; 1794 } 1795 1796 err = auxtrace_queues__add_event(&pt->queues, session, event, 1797 data_offset, &buffer); 1798 if (err) 1799 return err; 1800 1801 /* Dump here now we have copied a piped trace out of the pipe */ 1802 if (dump_trace) { 1803 if (auxtrace_buffer__get_data(buffer, fd)) { 1804 intel_pt_dump_event(pt, buffer->data, 1805 buffer->size); 1806 auxtrace_buffer__put_data(buffer); 1807 } 1808 } 1809 } 1810 1811 return 0; 1812 } 1813 1814 struct intel_pt_synth { 1815 struct perf_tool dummy_tool; 1816 struct perf_session *session; 1817 }; 1818 1819 static int intel_pt_event_synth(struct perf_tool *tool, 1820 union perf_event *event, 1821 struct perf_sample *sample __maybe_unused, 1822 struct machine *machine __maybe_unused) 1823 { 1824 struct intel_pt_synth *intel_pt_synth = 1825 container_of(tool, struct intel_pt_synth, dummy_tool); 1826 1827 return perf_session__deliver_synth_event(intel_pt_synth->session, event, 1828 NULL); 1829 } 1830 1831 static int intel_pt_synth_event(struct perf_session *session, 1832 struct perf_event_attr *attr, u64 id) 1833 { 1834 struct intel_pt_synth intel_pt_synth; 1835 1836 memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); 1837 intel_pt_synth.session = session; 1838 1839 return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, 1840 &id, intel_pt_event_synth); 1841 } 1842 1843 static int intel_pt_synth_events(struct intel_pt *pt, 1844 struct perf_session *session) 1845 { 1846 struct perf_evlist *evlist = session->evlist; 1847 struct perf_evsel *evsel; 1848 struct perf_event_attr attr; 1849 bool found = false; 1850 u64 id; 1851 int err; 1852 1853 evlist__for_each(evlist, evsel) { 1854 if (evsel->attr.type == pt->pmu_type && evsel->ids) { 1855 found = true; 1856 break; 1857 } 1858 } 1859 1860 if (!found) { 1861 pr_debug("There are no selected events with Intel Processor Trace data\n"); 1862 return 0; 1863 } 1864 1865 memset(&attr, 0, sizeof(struct perf_event_attr)); 1866 attr.size = sizeof(struct perf_event_attr); 1867 attr.type = PERF_TYPE_HARDWARE; 1868 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; 1869 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1870 PERF_SAMPLE_PERIOD; 1871 if (pt->timeless_decoding) 1872 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1873 else 1874 attr.sample_type |= PERF_SAMPLE_TIME; 1875 if (!pt->per_cpu_mmaps) 1876 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; 1877 attr.exclude_user = evsel->attr.exclude_user; 1878 attr.exclude_kernel = evsel->attr.exclude_kernel; 1879 attr.exclude_hv = evsel->attr.exclude_hv; 1880 attr.exclude_host = evsel->attr.exclude_host; 1881 attr.exclude_guest = evsel->attr.exclude_guest; 1882 attr.sample_id_all = evsel->attr.sample_id_all; 1883 attr.read_format = evsel->attr.read_format; 1884 1885 id = evsel->id[0] + 1000000000; 1886 if (!id) 1887 id = 1; 1888 1889 if (pt->synth_opts.instructions) { 1890 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1891 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) 1892 attr.sample_period = 1893 intel_pt_ns_to_ticks(pt, pt->synth_opts.period); 1894 else 1895 attr.sample_period = pt->synth_opts.period; 1896 pt->instructions_sample_period = attr.sample_period; 1897 if (pt->synth_opts.callchain) 1898 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 1899 if (pt->synth_opts.last_branch) 1900 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1901 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1902 id, (u64)attr.sample_type); 1903 err = intel_pt_synth_event(session, &attr, id); 1904 if (err) { 1905 pr_err("%s: failed to synthesize 'instructions' event type\n", 1906 __func__); 1907 return err; 1908 } 1909 pt->sample_instructions = true; 1910 pt->instructions_sample_type = attr.sample_type; 1911 pt->instructions_id = id; 1912 id += 1; 1913 } 1914 1915 if (pt->synth_opts.transactions) { 1916 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1917 attr.sample_period = 1; 1918 if (pt->synth_opts.callchain) 1919 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 1920 if (pt->synth_opts.last_branch) 1921 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1922 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1923 id, (u64)attr.sample_type); 1924 err = intel_pt_synth_event(session, &attr, id); 1925 if (err) { 1926 pr_err("%s: failed to synthesize 'transactions' event type\n", 1927 __func__); 1928 return err; 1929 } 1930 pt->sample_transactions = true; 1931 pt->transactions_id = id; 1932 id += 1; 1933 evlist__for_each(evlist, evsel) { 1934 if (evsel->id && evsel->id[0] == pt->transactions_id) { 1935 if (evsel->name) 1936 zfree(&evsel->name); 1937 evsel->name = strdup("transactions"); 1938 break; 1939 } 1940 } 1941 } 1942 1943 if (pt->synth_opts.branches) { 1944 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1945 attr.sample_period = 1; 1946 attr.sample_type |= PERF_SAMPLE_ADDR; 1947 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; 1948 attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; 1949 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1950 id, (u64)attr.sample_type); 1951 err = intel_pt_synth_event(session, &attr, id); 1952 if (err) { 1953 pr_err("%s: failed to synthesize 'branches' event type\n", 1954 __func__); 1955 return err; 1956 } 1957 pt->sample_branches = true; 1958 pt->branches_sample_type = attr.sample_type; 1959 pt->branches_id = id; 1960 } 1961 1962 pt->synth_needs_swap = evsel->needs_swap; 1963 1964 return 0; 1965 } 1966 1967 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) 1968 { 1969 struct perf_evsel *evsel; 1970 1971 evlist__for_each_reverse(evlist, evsel) { 1972 const char *name = perf_evsel__name(evsel); 1973 1974 if (!strcmp(name, "sched:sched_switch")) 1975 return evsel; 1976 } 1977 1978 return NULL; 1979 } 1980 1981 static bool intel_pt_find_switch(struct perf_evlist *evlist) 1982 { 1983 struct perf_evsel *evsel; 1984 1985 evlist__for_each(evlist, evsel) { 1986 if (evsel->attr.context_switch) 1987 return true; 1988 } 1989 1990 return false; 1991 } 1992 1993 static int intel_pt_perf_config(const char *var, const char *value, void *data) 1994 { 1995 struct intel_pt *pt = data; 1996 1997 if (!strcmp(var, "intel-pt.mispred-all")) 1998 pt->mispred_all = perf_config_bool(var, value); 1999 2000 return 0; 2001 } 2002 2003 static const char * const intel_pt_info_fmts[] = { 2004 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 2005 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 2006 [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", 2007 [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", 2008 [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", 2009 [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", 2010 [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", 2011 [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", 2012 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", 2013 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", 2014 [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", 2015 [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", 2016 [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", 2017 [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", 2018 }; 2019 2020 static void intel_pt_print_info(u64 *arr, int start, int finish) 2021 { 2022 int i; 2023 2024 if (!dump_trace) 2025 return; 2026 2027 for (i = start; i <= finish; i++) 2028 fprintf(stdout, intel_pt_info_fmts[i], arr[i]); 2029 } 2030 2031 int intel_pt_process_auxtrace_info(union perf_event *event, 2032 struct perf_session *session) 2033 { 2034 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 2035 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; 2036 struct intel_pt *pt; 2037 int err; 2038 2039 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + 2040 min_sz) 2041 return -EINVAL; 2042 2043 pt = zalloc(sizeof(struct intel_pt)); 2044 if (!pt) 2045 return -ENOMEM; 2046 2047 perf_config(intel_pt_perf_config, pt); 2048 2049 err = auxtrace_queues__init(&pt->queues); 2050 if (err) 2051 goto err_free; 2052 2053 intel_pt_log_set_name(INTEL_PT_PMU_NAME); 2054 2055 pt->session = session; 2056 pt->machine = &session->machines.host; /* No kvm support */ 2057 pt->auxtrace_type = auxtrace_info->type; 2058 pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; 2059 pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; 2060 pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; 2061 pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; 2062 pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; 2063 pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; 2064 pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; 2065 pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; 2066 pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; 2067 pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; 2068 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, 2069 INTEL_PT_PER_CPU_MMAPS); 2070 2071 if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + 2072 (sizeof(u64) * INTEL_PT_CYC_BIT)) { 2073 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; 2074 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; 2075 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; 2076 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; 2077 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; 2078 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, 2079 INTEL_PT_CYC_BIT); 2080 } 2081 2082 pt->timeless_decoding = intel_pt_timeless_decoding(pt); 2083 pt->have_tsc = intel_pt_have_tsc(pt); 2084 pt->sampling_mode = false; 2085 pt->est_tsc = !pt->timeless_decoding; 2086 2087 pt->unknown_thread = thread__new(999999999, 999999999); 2088 if (!pt->unknown_thread) { 2089 err = -ENOMEM; 2090 goto err_free_queues; 2091 } 2092 2093 /* 2094 * Since this thread will not be kept in any rbtree not in a 2095 * list, initialize its list node so that at thread__put() the 2096 * current thread lifetime assuption is kept and we don't segfault 2097 * at list_del_init(). 2098 */ 2099 INIT_LIST_HEAD(&pt->unknown_thread->node); 2100 2101 err = thread__set_comm(pt->unknown_thread, "unknown", 0); 2102 if (err) 2103 goto err_delete_thread; 2104 if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { 2105 err = -ENOMEM; 2106 goto err_delete_thread; 2107 } 2108 2109 pt->auxtrace.process_event = intel_pt_process_event; 2110 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; 2111 pt->auxtrace.flush_events = intel_pt_flush; 2112 pt->auxtrace.free_events = intel_pt_free_events; 2113 pt->auxtrace.free = intel_pt_free; 2114 session->auxtrace = &pt->auxtrace; 2115 2116 if (dump_trace) 2117 return 0; 2118 2119 if (pt->have_sched_switch == 1) { 2120 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); 2121 if (!pt->switch_evsel) { 2122 pr_err("%s: missing sched_switch event\n", __func__); 2123 goto err_delete_thread; 2124 } 2125 } else if (pt->have_sched_switch == 2 && 2126 !intel_pt_find_switch(session->evlist)) { 2127 pr_err("%s: missing context_switch attribute flag\n", __func__); 2128 goto err_delete_thread; 2129 } 2130 2131 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 2132 pt->synth_opts = *session->itrace_synth_opts; 2133 } else { 2134 itrace_synth_opts__set_default(&pt->synth_opts); 2135 if (use_browser != -1) { 2136 pt->synth_opts.branches = false; 2137 pt->synth_opts.callchain = true; 2138 } 2139 } 2140 2141 if (pt->synth_opts.log) 2142 intel_pt_log_enable(); 2143 2144 /* Maximum non-turbo ratio is TSC freq / 100 MHz */ 2145 if (pt->tc.time_mult) { 2146 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); 2147 2148 pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; 2149 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); 2150 intel_pt_log("Maximum non-turbo ratio %u\n", 2151 pt->max_non_turbo_ratio); 2152 } 2153 2154 if (pt->synth_opts.calls) 2155 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | 2156 PERF_IP_FLAG_TRACE_END; 2157 if (pt->synth_opts.returns) 2158 pt->branches_filter |= PERF_IP_FLAG_RETURN | 2159 PERF_IP_FLAG_TRACE_BEGIN; 2160 2161 if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { 2162 symbol_conf.use_callchain = true; 2163 if (callchain_register_param(&callchain_param) < 0) { 2164 symbol_conf.use_callchain = false; 2165 pt->synth_opts.callchain = false; 2166 } 2167 } 2168 2169 err = intel_pt_synth_events(pt, session); 2170 if (err) 2171 goto err_delete_thread; 2172 2173 err = auxtrace_queues__process_index(&pt->queues, session); 2174 if (err) 2175 goto err_delete_thread; 2176 2177 if (pt->queues.populated) 2178 pt->data_queued = true; 2179 2180 if (pt->timeless_decoding) 2181 pr_debug2("Intel PT decoding without timestamps\n"); 2182 2183 return 0; 2184 2185 err_delete_thread: 2186 thread__zput(pt->unknown_thread); 2187 err_free_queues: 2188 intel_pt_log_disable(); 2189 auxtrace_queues__free(&pt->queues); 2190 session->auxtrace = NULL; 2191 err_free: 2192 free(pt); 2193 return err; 2194 } 2195