1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_pt.c: Intel Processor Trace support 4 * Copyright (c) 2013-2015, Intel Corporation. 5 */ 6 7 #include <inttypes.h> 8 #include <stdio.h> 9 #include <stdbool.h> 10 #include <errno.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/types.h> 14 #include <linux/zalloc.h> 15 16 #include "session.h" 17 #include "machine.h" 18 #include "memswap.h" 19 #include "sort.h" 20 #include "tool.h" 21 #include "event.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "map.h" 25 #include "color.h" 26 #include "thread.h" 27 #include "thread-stack.h" 28 #include "symbol.h" 29 #include "callchain.h" 30 #include "dso.h" 31 #include "debug.h" 32 #include "auxtrace.h" 33 #include "tsc.h" 34 #include "intel-pt.h" 35 #include "config.h" 36 #include "util/perf_api_probe.h" 37 #include "util/synthetic-events.h" 38 #include "time-utils.h" 39 40 #include "../arch/x86/include/uapi/asm/perf_regs.h" 41 42 #include "intel-pt-decoder/intel-pt-log.h" 43 #include "intel-pt-decoder/intel-pt-decoder.h" 44 #include "intel-pt-decoder/intel-pt-insn-decoder.h" 45 #include "intel-pt-decoder/intel-pt-pkt-decoder.h" 46 47 #define MAX_TIMESTAMP (~0ULL) 48 49 #define INTEL_PT_CFG_PASS_THRU BIT_ULL(0) 50 #define INTEL_PT_CFG_PWR_EVT_EN BIT_ULL(4) 51 #define INTEL_PT_CFG_BRANCH_EN BIT_ULL(13) 52 #define INTEL_PT_CFG_EVT_EN BIT_ULL(31) 53 #define INTEL_PT_CFG_TNT_DIS BIT_ULL(55) 54 55 struct range { 56 u64 start; 57 u64 end; 58 }; 59 60 struct intel_pt { 61 struct auxtrace auxtrace; 62 struct auxtrace_queues queues; 63 struct auxtrace_heap heap; 64 u32 auxtrace_type; 65 struct perf_session *session; 66 struct machine *machine; 67 struct evsel *switch_evsel; 68 struct thread *unknown_thread; 69 bool timeless_decoding; 70 bool sampling_mode; 71 bool snapshot_mode; 72 bool per_cpu_mmaps; 73 bool have_tsc; 74 bool data_queued; 75 bool est_tsc; 76 bool sync_switch; 77 bool sync_switch_not_supported; 78 bool mispred_all; 79 bool use_thread_stack; 80 bool callstack; 81 bool cap_event_trace; 82 bool have_guest_sideband; 83 unsigned int br_stack_sz; 84 unsigned int br_stack_sz_plus; 85 int have_sched_switch; 86 u32 pmu_type; 87 u64 kernel_start; 88 u64 switch_ip; 89 u64 ptss_ip; 90 u64 first_timestamp; 91 92 struct perf_tsc_conversion tc; 93 bool cap_user_time_zero; 94 95 struct itrace_synth_opts synth_opts; 96 97 bool sample_instructions; 98 u64 instructions_sample_type; 99 u64 instructions_id; 100 101 bool sample_branches; 102 u32 branches_filter; 103 u64 branches_sample_type; 104 u64 branches_id; 105 106 bool sample_transactions; 107 u64 transactions_sample_type; 108 u64 transactions_id; 109 110 bool sample_ptwrites; 111 u64 ptwrites_sample_type; 112 u64 ptwrites_id; 113 114 bool sample_pwr_events; 115 u64 pwr_events_sample_type; 116 u64 mwait_id; 117 u64 pwre_id; 118 u64 exstop_id; 119 u64 pwrx_id; 120 u64 cbr_id; 121 u64 psb_id; 122 123 bool single_pebs; 124 bool sample_pebs; 125 struct evsel *pebs_evsel; 126 127 u64 evt_sample_type; 128 u64 evt_id; 129 130 u64 iflag_chg_sample_type; 131 u64 iflag_chg_id; 132 133 u64 tsc_bit; 134 u64 mtc_bit; 135 u64 mtc_freq_bits; 136 u32 tsc_ctc_ratio_n; 137 u32 tsc_ctc_ratio_d; 138 u64 cyc_bit; 139 u64 noretcomp_bit; 140 unsigned max_non_turbo_ratio; 141 unsigned cbr2khz; 142 int max_loops; 143 144 unsigned long num_events; 145 146 char *filter; 147 struct addr_filters filts; 148 149 struct range *time_ranges; 150 unsigned int range_cnt; 151 152 struct ip_callchain *chain; 153 struct branch_stack *br_stack; 154 155 u64 dflt_tsc_offset; 156 struct rb_root vmcs_info; 157 }; 158 159 enum switch_state { 160 INTEL_PT_SS_NOT_TRACING, 161 INTEL_PT_SS_UNKNOWN, 162 INTEL_PT_SS_TRACING, 163 INTEL_PT_SS_EXPECTING_SWITCH_EVENT, 164 INTEL_PT_SS_EXPECTING_SWITCH_IP, 165 }; 166 167 /* applicable_counters is 64-bits */ 168 #define INTEL_PT_MAX_PEBS 64 169 170 struct intel_pt_pebs_event { 171 struct evsel *evsel; 172 u64 id; 173 }; 174 175 struct intel_pt_queue { 176 struct intel_pt *pt; 177 unsigned int queue_nr; 178 struct auxtrace_buffer *buffer; 179 struct auxtrace_buffer *old_buffer; 180 void *decoder; 181 const struct intel_pt_state *state; 182 struct ip_callchain *chain; 183 struct branch_stack *last_branch; 184 union perf_event *event_buf; 185 bool on_heap; 186 bool stop; 187 bool step_through_buffers; 188 bool use_buffer_pid_tid; 189 bool sync_switch; 190 bool sample_ipc; 191 pid_t pid, tid; 192 int cpu; 193 int switch_state; 194 pid_t next_tid; 195 struct thread *thread; 196 struct machine *guest_machine; 197 struct thread *guest_thread; 198 struct thread *unknown_guest_thread; 199 pid_t guest_machine_pid; 200 pid_t guest_pid; 201 pid_t guest_tid; 202 int vcpu; 203 bool exclude_kernel; 204 bool have_sample; 205 u64 time; 206 u64 timestamp; 207 u64 sel_timestamp; 208 bool sel_start; 209 unsigned int sel_idx; 210 u32 flags; 211 u16 insn_len; 212 u64 last_insn_cnt; 213 u64 ipc_insn_cnt; 214 u64 ipc_cyc_cnt; 215 u64 last_in_insn_cnt; 216 u64 last_in_cyc_cnt; 217 u64 last_br_insn_cnt; 218 u64 last_br_cyc_cnt; 219 unsigned int cbr_seen; 220 char insn[INTEL_PT_INSN_BUF_SZ]; 221 struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; 222 }; 223 224 static void intel_pt_dump(struct intel_pt *pt __maybe_unused, 225 unsigned char *buf, size_t len) 226 { 227 struct intel_pt_pkt packet; 228 size_t pos = 0; 229 int ret, pkt_len, i; 230 char desc[INTEL_PT_PKT_DESC_MAX]; 231 const char *color = PERF_COLOR_BLUE; 232 enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; 233 234 color_fprintf(stdout, color, 235 ". ... Intel Processor Trace data: size %zu bytes\n", 236 len); 237 238 while (len) { 239 ret = intel_pt_get_packet(buf, len, &packet, &ctx); 240 if (ret > 0) 241 pkt_len = ret; 242 else 243 pkt_len = 1; 244 printf("."); 245 color_fprintf(stdout, color, " %08x: ", pos); 246 for (i = 0; i < pkt_len; i++) 247 color_fprintf(stdout, color, " %02x", buf[i]); 248 for (; i < 16; i++) 249 color_fprintf(stdout, color, " "); 250 if (ret > 0) { 251 ret = intel_pt_pkt_desc(&packet, desc, 252 INTEL_PT_PKT_DESC_MAX); 253 if (ret > 0) 254 color_fprintf(stdout, color, " %s\n", desc); 255 } else { 256 color_fprintf(stdout, color, " Bad packet!\n"); 257 } 258 pos += pkt_len; 259 buf += pkt_len; 260 len -= pkt_len; 261 } 262 } 263 264 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, 265 size_t len) 266 { 267 printf(".\n"); 268 intel_pt_dump(pt, buf, len); 269 } 270 271 static void intel_pt_log_event(union perf_event *event) 272 { 273 FILE *f = intel_pt_log_fp(); 274 275 if (!intel_pt_enable_logging || !f) 276 return; 277 278 perf_event__fprintf(event, NULL, f); 279 } 280 281 static void intel_pt_dump_sample(struct perf_session *session, 282 struct perf_sample *sample) 283 { 284 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 285 auxtrace); 286 287 printf("\n"); 288 intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); 289 } 290 291 static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) 292 { 293 struct perf_time_interval *range = pt->synth_opts.ptime_range; 294 int n = pt->synth_opts.range_num; 295 296 if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) 297 return true; 298 299 if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) 300 return false; 301 302 /* perf_time__ranges_skip_sample does not work if time is zero */ 303 if (!tm) 304 tm = 1; 305 306 return !n || !perf_time__ranges_skip_sample(range, n, tm); 307 } 308 309 static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root, 310 u64 vmcs, 311 u64 dflt_tsc_offset) 312 { 313 struct rb_node **p = &rb_root->rb_node; 314 struct rb_node *parent = NULL; 315 struct intel_pt_vmcs_info *v; 316 317 while (*p) { 318 parent = *p; 319 v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node); 320 321 if (v->vmcs == vmcs) 322 return v; 323 324 if (vmcs < v->vmcs) 325 p = &(*p)->rb_left; 326 else 327 p = &(*p)->rb_right; 328 } 329 330 v = zalloc(sizeof(*v)); 331 if (v) { 332 v->vmcs = vmcs; 333 v->tsc_offset = dflt_tsc_offset; 334 v->reliable = dflt_tsc_offset; 335 336 rb_link_node(&v->rb_node, parent, p); 337 rb_insert_color(&v->rb_node, rb_root); 338 } 339 340 return v; 341 } 342 343 static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs) 344 { 345 struct intel_pt_queue *ptq = data; 346 struct intel_pt *pt = ptq->pt; 347 348 if (!vmcs && !pt->dflt_tsc_offset) 349 return NULL; 350 351 return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset); 352 } 353 354 static void intel_pt_free_vmcs_info(struct intel_pt *pt) 355 { 356 struct intel_pt_vmcs_info *v; 357 struct rb_node *n; 358 359 n = rb_first(&pt->vmcs_info); 360 while (n) { 361 v = rb_entry(n, struct intel_pt_vmcs_info, rb_node); 362 n = rb_next(n); 363 rb_erase(&v->rb_node, &pt->vmcs_info); 364 free(v); 365 } 366 } 367 368 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, 369 struct auxtrace_buffer *b) 370 { 371 bool consecutive = false; 372 void *start; 373 374 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, 375 pt->have_tsc, &consecutive, 376 pt->synth_opts.vm_time_correlation); 377 if (!start) 378 return -EINVAL; 379 /* 380 * In the case of vm_time_correlation, the overlap might contain TSC 381 * packets that will not be fixed, and that will then no longer work for 382 * overlap detection. Avoid that by zeroing out the overlap. 383 */ 384 if (pt->synth_opts.vm_time_correlation) 385 memset(b->data, 0, start - b->data); 386 b->use_size = b->data + b->size - start; 387 b->use_data = start; 388 if (b->use_size && consecutive) 389 b->consecutive = true; 390 return 0; 391 } 392 393 static int intel_pt_get_buffer(struct intel_pt_queue *ptq, 394 struct auxtrace_buffer *buffer, 395 struct auxtrace_buffer *old_buffer, 396 struct intel_pt_buffer *b) 397 { 398 bool might_overlap; 399 400 if (!buffer->data) { 401 int fd = perf_data__fd(ptq->pt->session->data); 402 403 buffer->data = auxtrace_buffer__get_data(buffer, fd); 404 if (!buffer->data) 405 return -ENOMEM; 406 } 407 408 might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode; 409 if (might_overlap && !buffer->consecutive && old_buffer && 410 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) 411 return -ENOMEM; 412 413 if (buffer->use_data) { 414 b->len = buffer->use_size; 415 b->buf = buffer->use_data; 416 } else { 417 b->len = buffer->size; 418 b->buf = buffer->data; 419 } 420 b->ref_timestamp = buffer->reference; 421 422 if (!old_buffer || (might_overlap && !buffer->consecutive)) { 423 b->consecutive = false; 424 b->trace_nr = buffer->buffer_nr + 1; 425 } else { 426 b->consecutive = true; 427 } 428 429 return 0; 430 } 431 432 /* Do not drop buffers with references - refer intel_pt_get_trace() */ 433 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, 434 struct auxtrace_buffer *buffer) 435 { 436 if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) 437 return; 438 439 auxtrace_buffer__drop_data(buffer); 440 } 441 442 /* Must be serialized with respect to intel_pt_get_trace() */ 443 static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, 444 void *cb_data) 445 { 446 struct intel_pt_queue *ptq = data; 447 struct auxtrace_buffer *buffer = ptq->buffer; 448 struct auxtrace_buffer *old_buffer = ptq->old_buffer; 449 struct auxtrace_queue *queue; 450 int err = 0; 451 452 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 453 454 while (1) { 455 struct intel_pt_buffer b = { .len = 0 }; 456 457 buffer = auxtrace_buffer__next(queue, buffer); 458 if (!buffer) 459 break; 460 461 err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); 462 if (err) 463 break; 464 465 if (b.len) { 466 intel_pt_lookahead_drop_buffer(ptq, old_buffer); 467 old_buffer = buffer; 468 } else { 469 intel_pt_lookahead_drop_buffer(ptq, buffer); 470 continue; 471 } 472 473 err = cb(&b, cb_data); 474 if (err) 475 break; 476 } 477 478 if (buffer != old_buffer) 479 intel_pt_lookahead_drop_buffer(ptq, buffer); 480 intel_pt_lookahead_drop_buffer(ptq, old_buffer); 481 482 return err; 483 } 484 485 /* 486 * This function assumes data is processed sequentially only. 487 * Must be serialized with respect to intel_pt_lookahead() 488 */ 489 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 490 { 491 struct intel_pt_queue *ptq = data; 492 struct auxtrace_buffer *buffer = ptq->buffer; 493 struct auxtrace_buffer *old_buffer = ptq->old_buffer; 494 struct auxtrace_queue *queue; 495 int err; 496 497 if (ptq->stop) { 498 b->len = 0; 499 return 0; 500 } 501 502 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 503 504 buffer = auxtrace_buffer__next(queue, buffer); 505 if (!buffer) { 506 if (old_buffer) 507 auxtrace_buffer__drop_data(old_buffer); 508 b->len = 0; 509 return 0; 510 } 511 512 ptq->buffer = buffer; 513 514 err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); 515 if (err) 516 return err; 517 518 if (ptq->step_through_buffers) 519 ptq->stop = true; 520 521 if (b->len) { 522 if (old_buffer) 523 auxtrace_buffer__drop_data(old_buffer); 524 ptq->old_buffer = buffer; 525 } else { 526 auxtrace_buffer__drop_data(buffer); 527 return intel_pt_get_trace(b, data); 528 } 529 530 return 0; 531 } 532 533 struct intel_pt_cache_entry { 534 struct auxtrace_cache_entry entry; 535 u64 insn_cnt; 536 u64 byte_cnt; 537 enum intel_pt_insn_op op; 538 enum intel_pt_insn_branch branch; 539 bool emulated_ptwrite; 540 int length; 541 int32_t rel; 542 char insn[INTEL_PT_INSN_BUF_SZ]; 543 }; 544 545 static int intel_pt_config_div(const char *var, const char *value, void *data) 546 { 547 int *d = data; 548 long val; 549 550 if (!strcmp(var, "intel-pt.cache-divisor")) { 551 val = strtol(value, NULL, 0); 552 if (val > 0 && val <= INT_MAX) 553 *d = val; 554 } 555 556 return 0; 557 } 558 559 static int intel_pt_cache_divisor(void) 560 { 561 static int d; 562 563 if (d) 564 return d; 565 566 perf_config(intel_pt_config_div, &d); 567 568 if (!d) 569 d = 64; 570 571 return d; 572 } 573 574 static unsigned int intel_pt_cache_size(struct dso *dso, 575 struct machine *machine) 576 { 577 off_t size; 578 579 size = dso__data_size(dso, machine); 580 size /= intel_pt_cache_divisor(); 581 if (size < 1000) 582 return 10; 583 if (size > (1 << 21)) 584 return 21; 585 return 32 - __builtin_clz(size); 586 } 587 588 static struct auxtrace_cache *intel_pt_cache(struct dso *dso, 589 struct machine *machine) 590 { 591 struct auxtrace_cache *c; 592 unsigned int bits; 593 594 if (dso->auxtrace_cache) 595 return dso->auxtrace_cache; 596 597 bits = intel_pt_cache_size(dso, machine); 598 599 /* Ignoring cache creation failure */ 600 c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); 601 602 dso->auxtrace_cache = c; 603 604 return c; 605 } 606 607 static int intel_pt_cache_add(struct dso *dso, struct machine *machine, 608 u64 offset, u64 insn_cnt, u64 byte_cnt, 609 struct intel_pt_insn *intel_pt_insn) 610 { 611 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 612 struct intel_pt_cache_entry *e; 613 int err; 614 615 if (!c) 616 return -ENOMEM; 617 618 e = auxtrace_cache__alloc_entry(c); 619 if (!e) 620 return -ENOMEM; 621 622 e->insn_cnt = insn_cnt; 623 e->byte_cnt = byte_cnt; 624 e->op = intel_pt_insn->op; 625 e->branch = intel_pt_insn->branch; 626 e->emulated_ptwrite = intel_pt_insn->emulated_ptwrite; 627 e->length = intel_pt_insn->length; 628 e->rel = intel_pt_insn->rel; 629 memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); 630 631 err = auxtrace_cache__add(c, offset, &e->entry); 632 if (err) 633 auxtrace_cache__free_entry(c, e); 634 635 return err; 636 } 637 638 static struct intel_pt_cache_entry * 639 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) 640 { 641 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 642 643 if (!c) 644 return NULL; 645 646 return auxtrace_cache__lookup(dso->auxtrace_cache, offset); 647 } 648 649 static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, 650 u64 offset) 651 { 652 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 653 654 if (!c) 655 return; 656 657 auxtrace_cache__remove(dso->auxtrace_cache, offset); 658 } 659 660 static inline bool intel_pt_guest_kernel_ip(uint64_t ip) 661 { 662 /* Assumes 64-bit kernel */ 663 return ip & (1ULL << 63); 664 } 665 666 static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr) 667 { 668 if (nr) { 669 return intel_pt_guest_kernel_ip(ip) ? 670 PERF_RECORD_MISC_GUEST_KERNEL : 671 PERF_RECORD_MISC_GUEST_USER; 672 } 673 674 return ip >= ptq->pt->kernel_start ? 675 PERF_RECORD_MISC_KERNEL : 676 PERF_RECORD_MISC_USER; 677 } 678 679 static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip) 680 { 681 /* No support for non-zero CS base */ 682 if (from_ip) 683 return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr); 684 return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr); 685 } 686 687 static int intel_pt_get_guest(struct intel_pt_queue *ptq) 688 { 689 struct machines *machines = &ptq->pt->session->machines; 690 struct machine *machine; 691 pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid; 692 693 if (ptq->guest_machine && pid == ptq->guest_machine->pid) 694 return 0; 695 696 ptq->guest_machine = NULL; 697 thread__zput(ptq->unknown_guest_thread); 698 699 if (symbol_conf.guest_code) { 700 thread__zput(ptq->guest_thread); 701 ptq->guest_thread = machines__findnew_guest_code(machines, pid); 702 } 703 704 machine = machines__find_guest(machines, pid); 705 if (!machine) 706 return -1; 707 708 ptq->unknown_guest_thread = machine__idle_thread(machine); 709 if (!ptq->unknown_guest_thread) 710 return -1; 711 712 ptq->guest_machine = machine; 713 714 return 0; 715 } 716 717 static inline bool intel_pt_jmp_16(struct intel_pt_insn *intel_pt_insn) 718 { 719 return intel_pt_insn->rel == 16 && intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL; 720 } 721 722 #define PTWRITE_MAGIC "\x0f\x0bperf,ptwrite " 723 #define PTWRITE_MAGIC_LEN 16 724 725 static bool intel_pt_emulated_ptwrite(struct dso *dso, struct machine *machine, u64 offset) 726 { 727 unsigned char buf[PTWRITE_MAGIC_LEN]; 728 ssize_t len; 729 730 len = dso__data_read_offset(dso, machine, offset, buf, PTWRITE_MAGIC_LEN); 731 if (len == PTWRITE_MAGIC_LEN && !memcmp(buf, PTWRITE_MAGIC, PTWRITE_MAGIC_LEN)) { 732 intel_pt_log("Emulated ptwrite signature found\n"); 733 return true; 734 } 735 intel_pt_log("Emulated ptwrite signature not found\n"); 736 return false; 737 } 738 739 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, 740 uint64_t *insn_cnt_ptr, uint64_t *ip, 741 uint64_t to_ip, uint64_t max_insn_cnt, 742 void *data) 743 { 744 struct intel_pt_queue *ptq = data; 745 struct machine *machine = ptq->pt->machine; 746 struct thread *thread; 747 struct addr_location al; 748 unsigned char buf[INTEL_PT_INSN_BUF_SZ]; 749 ssize_t len; 750 int x86_64; 751 u8 cpumode; 752 u64 offset, start_offset, start_ip; 753 u64 insn_cnt = 0; 754 bool one_map = true; 755 bool nr; 756 757 intel_pt_insn->length = 0; 758 759 if (to_ip && *ip == to_ip) 760 goto out_no_cache; 761 762 nr = ptq->state->to_nr; 763 cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); 764 765 if (nr) { 766 if (ptq->pt->have_guest_sideband) { 767 if (!ptq->guest_machine || ptq->guest_machine_pid != ptq->pid) { 768 intel_pt_log("ERROR: guest sideband but no guest machine\n"); 769 return -EINVAL; 770 } 771 } else if ((!symbol_conf.guest_code && cpumode != PERF_RECORD_MISC_GUEST_KERNEL) || 772 intel_pt_get_guest(ptq)) { 773 intel_pt_log("ERROR: no guest machine\n"); 774 return -EINVAL; 775 } 776 machine = ptq->guest_machine; 777 thread = ptq->guest_thread; 778 if (!thread) { 779 if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL) { 780 intel_pt_log("ERROR: no guest thread\n"); 781 return -EINVAL; 782 } 783 thread = ptq->unknown_guest_thread; 784 } 785 } else { 786 thread = ptq->thread; 787 if (!thread) { 788 if (cpumode != PERF_RECORD_MISC_KERNEL) { 789 intel_pt_log("ERROR: no thread\n"); 790 return -EINVAL; 791 } 792 thread = ptq->pt->unknown_thread; 793 } 794 } 795 796 while (1) { 797 if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) { 798 if (al.map) 799 intel_pt_log("ERROR: thread has no dso for %#" PRIx64 "\n", *ip); 800 else 801 intel_pt_log("ERROR: thread has no map for %#" PRIx64 "\n", *ip); 802 return -EINVAL; 803 } 804 805 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 806 dso__data_status_seen(al.map->dso, 807 DSO_DATA_STATUS_SEEN_ITRACE)) 808 return -ENOENT; 809 810 offset = al.map->map_ip(al.map, *ip); 811 812 if (!to_ip && one_map) { 813 struct intel_pt_cache_entry *e; 814 815 e = intel_pt_cache_lookup(al.map->dso, machine, offset); 816 if (e && 817 (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { 818 *insn_cnt_ptr = e->insn_cnt; 819 *ip += e->byte_cnt; 820 intel_pt_insn->op = e->op; 821 intel_pt_insn->branch = e->branch; 822 intel_pt_insn->emulated_ptwrite = e->emulated_ptwrite; 823 intel_pt_insn->length = e->length; 824 intel_pt_insn->rel = e->rel; 825 memcpy(intel_pt_insn->buf, e->insn, 826 INTEL_PT_INSN_BUF_SZ); 827 intel_pt_log_insn_no_data(intel_pt_insn, *ip); 828 return 0; 829 } 830 } 831 832 start_offset = offset; 833 start_ip = *ip; 834 835 /* Load maps to ensure dso->is_64_bit has been updated */ 836 map__load(al.map); 837 838 x86_64 = al.map->dso->is_64_bit; 839 840 while (1) { 841 len = dso__data_read_offset(al.map->dso, machine, 842 offset, buf, 843 INTEL_PT_INSN_BUF_SZ); 844 if (len <= 0) { 845 intel_pt_log("ERROR: failed to read at %" PRIu64 " ", offset); 846 if (intel_pt_enable_logging) 847 dso__fprintf(al.map->dso, intel_pt_log_fp()); 848 return -EINVAL; 849 } 850 851 if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) 852 return -EINVAL; 853 854 intel_pt_log_insn(intel_pt_insn, *ip); 855 856 insn_cnt += 1; 857 858 if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) { 859 bool eptw; 860 u64 offs; 861 862 if (!intel_pt_jmp_16(intel_pt_insn)) 863 goto out; 864 /* Check for emulated ptwrite */ 865 offs = offset + intel_pt_insn->length; 866 eptw = intel_pt_emulated_ptwrite(al.map->dso, machine, offs); 867 intel_pt_insn->emulated_ptwrite = eptw; 868 goto out; 869 } 870 871 if (max_insn_cnt && insn_cnt >= max_insn_cnt) 872 goto out_no_cache; 873 874 *ip += intel_pt_insn->length; 875 876 if (to_ip && *ip == to_ip) { 877 intel_pt_insn->length = 0; 878 goto out_no_cache; 879 } 880 881 if (*ip >= al.map->end) 882 break; 883 884 offset += intel_pt_insn->length; 885 } 886 one_map = false; 887 } 888 out: 889 *insn_cnt_ptr = insn_cnt; 890 891 if (!one_map) 892 goto out_no_cache; 893 894 /* 895 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate 896 * entries. 897 */ 898 if (to_ip) { 899 struct intel_pt_cache_entry *e; 900 901 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); 902 if (e) 903 return 0; 904 } 905 906 /* Ignore cache errors */ 907 intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, 908 *ip - start_ip, intel_pt_insn); 909 910 return 0; 911 912 out_no_cache: 913 *insn_cnt_ptr = insn_cnt; 914 return 0; 915 } 916 917 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip, 918 uint64_t offset, const char *filename) 919 { 920 struct addr_filter *filt; 921 bool have_filter = false; 922 bool hit_tracestop = false; 923 bool hit_filter = false; 924 925 list_for_each_entry(filt, &pt->filts.head, list) { 926 if (filt->start) 927 have_filter = true; 928 929 if ((filename && !filt->filename) || 930 (!filename && filt->filename) || 931 (filename && strcmp(filename, filt->filename))) 932 continue; 933 934 if (!(offset >= filt->addr && offset < filt->addr + filt->size)) 935 continue; 936 937 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n", 938 ip, offset, filename ? filename : "[kernel]", 939 filt->start ? "filter" : "stop", 940 filt->addr, filt->size); 941 942 if (filt->start) 943 hit_filter = true; 944 else 945 hit_tracestop = true; 946 } 947 948 if (!hit_tracestop && !hit_filter) 949 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n", 950 ip, offset, filename ? filename : "[kernel]"); 951 952 return hit_tracestop || (have_filter && !hit_filter); 953 } 954 955 static int __intel_pt_pgd_ip(uint64_t ip, void *data) 956 { 957 struct intel_pt_queue *ptq = data; 958 struct thread *thread; 959 struct addr_location al; 960 u8 cpumode; 961 u64 offset; 962 963 if (ptq->state->to_nr) { 964 if (intel_pt_guest_kernel_ip(ip)) 965 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); 966 /* No support for decoding guest user space */ 967 return -EINVAL; 968 } else if (ip >= ptq->pt->kernel_start) { 969 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); 970 } 971 972 cpumode = PERF_RECORD_MISC_USER; 973 974 thread = ptq->thread; 975 if (!thread) 976 return -EINVAL; 977 978 if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) 979 return -EINVAL; 980 981 offset = al.map->map_ip(al.map, ip); 982 983 return intel_pt_match_pgd_ip(ptq->pt, ip, offset, 984 al.map->dso->long_name); 985 } 986 987 static bool intel_pt_pgd_ip(uint64_t ip, void *data) 988 { 989 return __intel_pt_pgd_ip(ip, data) > 0; 990 } 991 992 static bool intel_pt_get_config(struct intel_pt *pt, 993 struct perf_event_attr *attr, u64 *config) 994 { 995 if (attr->type == pt->pmu_type) { 996 if (config) 997 *config = attr->config; 998 return true; 999 } 1000 1001 return false; 1002 } 1003 1004 static bool intel_pt_exclude_kernel(struct intel_pt *pt) 1005 { 1006 struct evsel *evsel; 1007 1008 evlist__for_each_entry(pt->session->evlist, evsel) { 1009 if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && 1010 !evsel->core.attr.exclude_kernel) 1011 return false; 1012 } 1013 return true; 1014 } 1015 1016 static bool intel_pt_return_compression(struct intel_pt *pt) 1017 { 1018 struct evsel *evsel; 1019 u64 config; 1020 1021 if (!pt->noretcomp_bit) 1022 return true; 1023 1024 evlist__for_each_entry(pt->session->evlist, evsel) { 1025 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 1026 (config & pt->noretcomp_bit)) 1027 return false; 1028 } 1029 return true; 1030 } 1031 1032 static bool intel_pt_branch_enable(struct intel_pt *pt) 1033 { 1034 struct evsel *evsel; 1035 u64 config; 1036 1037 evlist__for_each_entry(pt->session->evlist, evsel) { 1038 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 1039 (config & INTEL_PT_CFG_PASS_THRU) && 1040 !(config & INTEL_PT_CFG_BRANCH_EN)) 1041 return false; 1042 } 1043 return true; 1044 } 1045 1046 static bool intel_pt_disabled_tnt(struct intel_pt *pt) 1047 { 1048 struct evsel *evsel; 1049 u64 config; 1050 1051 evlist__for_each_entry(pt->session->evlist, evsel) { 1052 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 1053 config & INTEL_PT_CFG_TNT_DIS) 1054 return true; 1055 } 1056 return false; 1057 } 1058 1059 static unsigned int intel_pt_mtc_period(struct intel_pt *pt) 1060 { 1061 struct evsel *evsel; 1062 unsigned int shift; 1063 u64 config; 1064 1065 if (!pt->mtc_freq_bits) 1066 return 0; 1067 1068 for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) 1069 config >>= 1; 1070 1071 evlist__for_each_entry(pt->session->evlist, evsel) { 1072 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) 1073 return (config & pt->mtc_freq_bits) >> shift; 1074 } 1075 return 0; 1076 } 1077 1078 static bool intel_pt_timeless_decoding(struct intel_pt *pt) 1079 { 1080 struct evsel *evsel; 1081 bool timeless_decoding = true; 1082 u64 config; 1083 1084 if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding) 1085 return true; 1086 1087 evlist__for_each_entry(pt->session->evlist, evsel) { 1088 if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 1089 return true; 1090 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { 1091 if (config & pt->tsc_bit) 1092 timeless_decoding = false; 1093 else 1094 return true; 1095 } 1096 } 1097 return timeless_decoding; 1098 } 1099 1100 static bool intel_pt_tracing_kernel(struct intel_pt *pt) 1101 { 1102 struct evsel *evsel; 1103 1104 evlist__for_each_entry(pt->session->evlist, evsel) { 1105 if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && 1106 !evsel->core.attr.exclude_kernel) 1107 return true; 1108 } 1109 return false; 1110 } 1111 1112 static bool intel_pt_have_tsc(struct intel_pt *pt) 1113 { 1114 struct evsel *evsel; 1115 bool have_tsc = false; 1116 u64 config; 1117 1118 if (!pt->tsc_bit) 1119 return false; 1120 1121 evlist__for_each_entry(pt->session->evlist, evsel) { 1122 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { 1123 if (config & pt->tsc_bit) 1124 have_tsc = true; 1125 else 1126 return false; 1127 } 1128 } 1129 return have_tsc; 1130 } 1131 1132 static bool intel_pt_have_mtc(struct intel_pt *pt) 1133 { 1134 struct evsel *evsel; 1135 u64 config; 1136 1137 evlist__for_each_entry(pt->session->evlist, evsel) { 1138 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 1139 (config & pt->mtc_bit)) 1140 return true; 1141 } 1142 return false; 1143 } 1144 1145 static bool intel_pt_sampling_mode(struct intel_pt *pt) 1146 { 1147 struct evsel *evsel; 1148 1149 evlist__for_each_entry(pt->session->evlist, evsel) { 1150 if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) && 1151 evsel->core.attr.aux_sample_size) 1152 return true; 1153 } 1154 return false; 1155 } 1156 1157 static u64 intel_pt_ctl(struct intel_pt *pt) 1158 { 1159 struct evsel *evsel; 1160 u64 config; 1161 1162 evlist__for_each_entry(pt->session->evlist, evsel) { 1163 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) 1164 return config; 1165 } 1166 return 0; 1167 } 1168 1169 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) 1170 { 1171 u64 quot, rem; 1172 1173 quot = ns / pt->tc.time_mult; 1174 rem = ns % pt->tc.time_mult; 1175 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / 1176 pt->tc.time_mult; 1177 } 1178 1179 static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) 1180 { 1181 size_t sz = sizeof(struct ip_callchain); 1182 1183 /* Add 1 to callchain_sz for callchain context */ 1184 sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); 1185 return zalloc(sz); 1186 } 1187 1188 static int intel_pt_callchain_init(struct intel_pt *pt) 1189 { 1190 struct evsel *evsel; 1191 1192 evlist__for_each_entry(pt->session->evlist, evsel) { 1193 if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 1194 evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; 1195 } 1196 1197 pt->chain = intel_pt_alloc_chain(pt); 1198 if (!pt->chain) 1199 return -ENOMEM; 1200 1201 return 0; 1202 } 1203 1204 static void intel_pt_add_callchain(struct intel_pt *pt, 1205 struct perf_sample *sample) 1206 { 1207 struct thread *thread = machine__findnew_thread(pt->machine, 1208 sample->pid, 1209 sample->tid); 1210 1211 thread_stack__sample_late(thread, sample->cpu, pt->chain, 1212 pt->synth_opts.callchain_sz + 1, sample->ip, 1213 pt->kernel_start); 1214 1215 sample->callchain = pt->chain; 1216 } 1217 1218 static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) 1219 { 1220 size_t sz = sizeof(struct branch_stack); 1221 1222 sz += entry_cnt * sizeof(struct branch_entry); 1223 return zalloc(sz); 1224 } 1225 1226 static int intel_pt_br_stack_init(struct intel_pt *pt) 1227 { 1228 struct evsel *evsel; 1229 1230 evlist__for_each_entry(pt->session->evlist, evsel) { 1231 if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) 1232 evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; 1233 } 1234 1235 pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); 1236 if (!pt->br_stack) 1237 return -ENOMEM; 1238 1239 return 0; 1240 } 1241 1242 static void intel_pt_add_br_stack(struct intel_pt *pt, 1243 struct perf_sample *sample) 1244 { 1245 struct thread *thread = machine__findnew_thread(pt->machine, 1246 sample->pid, 1247 sample->tid); 1248 1249 thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, 1250 pt->br_stack_sz, sample->ip, 1251 pt->kernel_start); 1252 1253 sample->branch_stack = pt->br_stack; 1254 } 1255 1256 /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ 1257 #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) 1258 1259 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, 1260 unsigned int queue_nr) 1261 { 1262 struct intel_pt_params params = { .get_trace = 0, }; 1263 struct perf_env *env = pt->machine->env; 1264 struct intel_pt_queue *ptq; 1265 1266 ptq = zalloc(sizeof(struct intel_pt_queue)); 1267 if (!ptq) 1268 return NULL; 1269 1270 if (pt->synth_opts.callchain) { 1271 ptq->chain = intel_pt_alloc_chain(pt); 1272 if (!ptq->chain) 1273 goto out_free; 1274 } 1275 1276 if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { 1277 unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); 1278 1279 ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); 1280 if (!ptq->last_branch) 1281 goto out_free; 1282 } 1283 1284 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 1285 if (!ptq->event_buf) 1286 goto out_free; 1287 1288 ptq->pt = pt; 1289 ptq->queue_nr = queue_nr; 1290 ptq->exclude_kernel = intel_pt_exclude_kernel(pt); 1291 ptq->pid = -1; 1292 ptq->tid = -1; 1293 ptq->cpu = -1; 1294 ptq->next_tid = -1; 1295 1296 params.get_trace = intel_pt_get_trace; 1297 params.walk_insn = intel_pt_walk_next_insn; 1298 params.lookahead = intel_pt_lookahead; 1299 params.findnew_vmcs_info = intel_pt_findnew_vmcs_info; 1300 params.data = ptq; 1301 params.return_compression = intel_pt_return_compression(pt); 1302 params.branch_enable = intel_pt_branch_enable(pt); 1303 params.ctl = intel_pt_ctl(pt); 1304 params.max_non_turbo_ratio = pt->max_non_turbo_ratio; 1305 params.mtc_period = intel_pt_mtc_period(pt); 1306 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; 1307 params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; 1308 params.quick = pt->synth_opts.quick; 1309 params.vm_time_correlation = pt->synth_opts.vm_time_correlation; 1310 params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; 1311 params.first_timestamp = pt->first_timestamp; 1312 params.max_loops = pt->max_loops; 1313 1314 /* Cannot walk code without TNT, so force 'quick' mode */ 1315 if (params.branch_enable && intel_pt_disabled_tnt(pt) && !params.quick) 1316 params.quick = 1; 1317 1318 if (pt->filts.cnt > 0) 1319 params.pgd_ip = intel_pt_pgd_ip; 1320 1321 if (pt->synth_opts.instructions) { 1322 if (pt->synth_opts.period) { 1323 switch (pt->synth_opts.period_type) { 1324 case PERF_ITRACE_PERIOD_INSTRUCTIONS: 1325 params.period_type = 1326 INTEL_PT_PERIOD_INSTRUCTIONS; 1327 params.period = pt->synth_opts.period; 1328 break; 1329 case PERF_ITRACE_PERIOD_TICKS: 1330 params.period_type = INTEL_PT_PERIOD_TICKS; 1331 params.period = pt->synth_opts.period; 1332 break; 1333 case PERF_ITRACE_PERIOD_NANOSECS: 1334 params.period_type = INTEL_PT_PERIOD_TICKS; 1335 params.period = intel_pt_ns_to_ticks(pt, 1336 pt->synth_opts.period); 1337 break; 1338 default: 1339 break; 1340 } 1341 } 1342 1343 if (!params.period) { 1344 params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; 1345 params.period = 1; 1346 } 1347 } 1348 1349 if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18)) 1350 params.flags |= INTEL_PT_FUP_WITH_NLIP; 1351 1352 ptq->decoder = intel_pt_decoder_new(¶ms); 1353 if (!ptq->decoder) 1354 goto out_free; 1355 1356 return ptq; 1357 1358 out_free: 1359 zfree(&ptq->event_buf); 1360 zfree(&ptq->last_branch); 1361 zfree(&ptq->chain); 1362 free(ptq); 1363 return NULL; 1364 } 1365 1366 static void intel_pt_free_queue(void *priv) 1367 { 1368 struct intel_pt_queue *ptq = priv; 1369 1370 if (!ptq) 1371 return; 1372 thread__zput(ptq->thread); 1373 thread__zput(ptq->guest_thread); 1374 thread__zput(ptq->unknown_guest_thread); 1375 intel_pt_decoder_free(ptq->decoder); 1376 zfree(&ptq->event_buf); 1377 zfree(&ptq->last_branch); 1378 zfree(&ptq->chain); 1379 free(ptq); 1380 } 1381 1382 static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp) 1383 { 1384 unsigned int i; 1385 1386 pt->first_timestamp = timestamp; 1387 1388 for (i = 0; i < pt->queues.nr_queues; i++) { 1389 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 1390 struct intel_pt_queue *ptq = queue->priv; 1391 1392 if (ptq && ptq->decoder) 1393 intel_pt_set_first_timestamp(ptq->decoder, timestamp); 1394 } 1395 } 1396 1397 static int intel_pt_get_guest_from_sideband(struct intel_pt_queue *ptq) 1398 { 1399 struct machines *machines = &ptq->pt->session->machines; 1400 struct machine *machine; 1401 pid_t machine_pid = ptq->pid; 1402 pid_t tid; 1403 int vcpu; 1404 1405 if (machine_pid <= 0) 1406 return 0; /* Not a guest machine */ 1407 1408 machine = machines__find(machines, machine_pid); 1409 if (!machine) 1410 return 0; /* Not a guest machine */ 1411 1412 if (ptq->guest_machine != machine) { 1413 ptq->guest_machine = NULL; 1414 thread__zput(ptq->guest_thread); 1415 thread__zput(ptq->unknown_guest_thread); 1416 1417 ptq->unknown_guest_thread = machine__find_thread(machine, 0, 0); 1418 if (!ptq->unknown_guest_thread) 1419 return -1; 1420 ptq->guest_machine = machine; 1421 } 1422 1423 vcpu = ptq->thread ? ptq->thread->guest_cpu : -1; 1424 if (vcpu < 0) 1425 return -1; 1426 1427 tid = machine__get_current_tid(machine, vcpu); 1428 1429 if (ptq->guest_thread && ptq->guest_thread->tid != tid) 1430 thread__zput(ptq->guest_thread); 1431 1432 if (!ptq->guest_thread) { 1433 ptq->guest_thread = machine__find_thread(machine, -1, tid); 1434 if (!ptq->guest_thread) 1435 return -1; 1436 } 1437 1438 ptq->guest_machine_pid = machine_pid; 1439 ptq->guest_pid = ptq->guest_thread->pid_; 1440 ptq->guest_tid = tid; 1441 ptq->vcpu = vcpu; 1442 1443 return 0; 1444 } 1445 1446 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, 1447 struct auxtrace_queue *queue) 1448 { 1449 struct intel_pt_queue *ptq = queue->priv; 1450 1451 if (queue->tid == -1 || pt->have_sched_switch) { 1452 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); 1453 if (ptq->tid == -1) 1454 ptq->pid = -1; 1455 thread__zput(ptq->thread); 1456 } 1457 1458 if (!ptq->thread && ptq->tid != -1) 1459 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); 1460 1461 if (ptq->thread) { 1462 ptq->pid = ptq->thread->pid_; 1463 if (queue->cpu == -1) 1464 ptq->cpu = ptq->thread->cpu; 1465 } 1466 1467 if (pt->have_guest_sideband && intel_pt_get_guest_from_sideband(ptq)) { 1468 ptq->guest_machine_pid = 0; 1469 ptq->guest_pid = -1; 1470 ptq->guest_tid = -1; 1471 ptq->vcpu = -1; 1472 } 1473 } 1474 1475 static void intel_pt_sample_flags(struct intel_pt_queue *ptq) 1476 { 1477 struct intel_pt *pt = ptq->pt; 1478 1479 ptq->insn_len = 0; 1480 if (ptq->state->flags & INTEL_PT_ABORT_TX) { 1481 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; 1482 } else if (ptq->state->flags & INTEL_PT_ASYNC) { 1483 if (!ptq->state->to_ip) 1484 ptq->flags = PERF_IP_FLAG_BRANCH | 1485 PERF_IP_FLAG_TRACE_END; 1486 else if (ptq->state->from_nr && !ptq->state->to_nr) 1487 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | 1488 PERF_IP_FLAG_VMEXIT; 1489 else 1490 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | 1491 PERF_IP_FLAG_ASYNC | 1492 PERF_IP_FLAG_INTERRUPT; 1493 } else { 1494 if (ptq->state->from_ip) 1495 ptq->flags = intel_pt_insn_type(ptq->state->insn_op); 1496 else 1497 ptq->flags = PERF_IP_FLAG_BRANCH | 1498 PERF_IP_FLAG_TRACE_BEGIN; 1499 if (ptq->state->flags & INTEL_PT_IN_TX) 1500 ptq->flags |= PERF_IP_FLAG_IN_TX; 1501 ptq->insn_len = ptq->state->insn_len; 1502 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); 1503 } 1504 1505 if (ptq->state->type & INTEL_PT_TRACE_BEGIN) 1506 ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; 1507 if (ptq->state->type & INTEL_PT_TRACE_END) 1508 ptq->flags |= PERF_IP_FLAG_TRACE_END; 1509 1510 if (pt->cap_event_trace) { 1511 if (ptq->state->type & INTEL_PT_IFLAG_CHG) { 1512 if (!ptq->state->from_iflag) 1513 ptq->flags |= PERF_IP_FLAG_INTR_DISABLE; 1514 if (ptq->state->from_iflag != ptq->state->to_iflag) 1515 ptq->flags |= PERF_IP_FLAG_INTR_TOGGLE; 1516 } else if (!ptq->state->to_iflag) { 1517 ptq->flags |= PERF_IP_FLAG_INTR_DISABLE; 1518 } 1519 } 1520 } 1521 1522 static void intel_pt_setup_time_range(struct intel_pt *pt, 1523 struct intel_pt_queue *ptq) 1524 { 1525 if (!pt->range_cnt) 1526 return; 1527 1528 ptq->sel_timestamp = pt->time_ranges[0].start; 1529 ptq->sel_idx = 0; 1530 1531 if (ptq->sel_timestamp) { 1532 ptq->sel_start = true; 1533 } else { 1534 ptq->sel_timestamp = pt->time_ranges[0].end; 1535 ptq->sel_start = false; 1536 } 1537 } 1538 1539 static int intel_pt_setup_queue(struct intel_pt *pt, 1540 struct auxtrace_queue *queue, 1541 unsigned int queue_nr) 1542 { 1543 struct intel_pt_queue *ptq = queue->priv; 1544 1545 if (list_empty(&queue->head)) 1546 return 0; 1547 1548 if (!ptq) { 1549 ptq = intel_pt_alloc_queue(pt, queue_nr); 1550 if (!ptq) 1551 return -ENOMEM; 1552 queue->priv = ptq; 1553 1554 if (queue->cpu != -1) 1555 ptq->cpu = queue->cpu; 1556 ptq->tid = queue->tid; 1557 1558 ptq->cbr_seen = UINT_MAX; 1559 1560 if (pt->sampling_mode && !pt->snapshot_mode && 1561 pt->timeless_decoding) 1562 ptq->step_through_buffers = true; 1563 1564 ptq->sync_switch = pt->sync_switch; 1565 1566 intel_pt_setup_time_range(pt, ptq); 1567 } 1568 1569 if (!ptq->on_heap && 1570 (!ptq->sync_switch || 1571 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { 1572 const struct intel_pt_state *state; 1573 int ret; 1574 1575 if (pt->timeless_decoding) 1576 return 0; 1577 1578 intel_pt_log("queue %u getting timestamp\n", queue_nr); 1579 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1580 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 1581 1582 if (ptq->sel_start && ptq->sel_timestamp) { 1583 ret = intel_pt_fast_forward(ptq->decoder, 1584 ptq->sel_timestamp); 1585 if (ret) 1586 return ret; 1587 } 1588 1589 while (1) { 1590 state = intel_pt_decode(ptq->decoder); 1591 if (state->err) { 1592 if (state->err == INTEL_PT_ERR_NODATA) { 1593 intel_pt_log("queue %u has no timestamp\n", 1594 queue_nr); 1595 return 0; 1596 } 1597 continue; 1598 } 1599 if (state->timestamp) 1600 break; 1601 } 1602 1603 ptq->timestamp = state->timestamp; 1604 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", 1605 queue_nr, ptq->timestamp); 1606 ptq->state = state; 1607 ptq->have_sample = true; 1608 if (ptq->sel_start && ptq->sel_timestamp && 1609 ptq->timestamp < ptq->sel_timestamp) 1610 ptq->have_sample = false; 1611 intel_pt_sample_flags(ptq); 1612 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 1613 if (ret) 1614 return ret; 1615 ptq->on_heap = true; 1616 } 1617 1618 return 0; 1619 } 1620 1621 static int intel_pt_setup_queues(struct intel_pt *pt) 1622 { 1623 unsigned int i; 1624 int ret; 1625 1626 for (i = 0; i < pt->queues.nr_queues; i++) { 1627 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); 1628 if (ret) 1629 return ret; 1630 } 1631 return 0; 1632 } 1633 1634 static inline bool intel_pt_skip_event(struct intel_pt *pt) 1635 { 1636 return pt->synth_opts.initial_skip && 1637 pt->num_events++ < pt->synth_opts.initial_skip; 1638 } 1639 1640 /* 1641 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. 1642 * Also ensure CBR is first non-skipped event by allowing for 4 more samples 1643 * from this decoder state. 1644 */ 1645 static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) 1646 { 1647 return pt->synth_opts.initial_skip && 1648 pt->num_events + 4 < pt->synth_opts.initial_skip; 1649 } 1650 1651 static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, 1652 union perf_event *event, 1653 struct perf_sample *sample) 1654 { 1655 event->sample.header.type = PERF_RECORD_SAMPLE; 1656 event->sample.header.size = sizeof(struct perf_event_header); 1657 1658 sample->pid = ptq->pid; 1659 sample->tid = ptq->tid; 1660 1661 if (ptq->pt->have_guest_sideband) { 1662 if ((ptq->state->from_ip && ptq->state->from_nr) || 1663 (ptq->state->to_ip && ptq->state->to_nr)) { 1664 sample->pid = ptq->guest_pid; 1665 sample->tid = ptq->guest_tid; 1666 sample->machine_pid = ptq->guest_machine_pid; 1667 sample->vcpu = ptq->vcpu; 1668 } 1669 } 1670 1671 sample->cpu = ptq->cpu; 1672 sample->insn_len = ptq->insn_len; 1673 memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1674 } 1675 1676 static void intel_pt_prep_b_sample(struct intel_pt *pt, 1677 struct intel_pt_queue *ptq, 1678 union perf_event *event, 1679 struct perf_sample *sample) 1680 { 1681 intel_pt_prep_a_sample(ptq, event, sample); 1682 1683 if (!pt->timeless_decoding) 1684 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1685 1686 sample->ip = ptq->state->from_ip; 1687 sample->addr = ptq->state->to_ip; 1688 sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr); 1689 sample->period = 1; 1690 sample->flags = ptq->flags; 1691 1692 event->sample.header.misc = sample->cpumode; 1693 } 1694 1695 static int intel_pt_inject_event(union perf_event *event, 1696 struct perf_sample *sample, u64 type) 1697 { 1698 event->header.size = perf_event__sample_event_size(sample, type, 0); 1699 return perf_event__synthesize_sample(event, type, 0, sample); 1700 } 1701 1702 static inline int intel_pt_opt_inject(struct intel_pt *pt, 1703 union perf_event *event, 1704 struct perf_sample *sample, u64 type) 1705 { 1706 if (!pt->synth_opts.inject) 1707 return 0; 1708 1709 return intel_pt_inject_event(event, sample, type); 1710 } 1711 1712 static int intel_pt_deliver_synth_event(struct intel_pt *pt, 1713 union perf_event *event, 1714 struct perf_sample *sample, u64 type) 1715 { 1716 int ret; 1717 1718 ret = intel_pt_opt_inject(pt, event, sample, type); 1719 if (ret) 1720 return ret; 1721 1722 ret = perf_session__deliver_synth_event(pt->session, event, sample); 1723 if (ret) 1724 pr_err("Intel PT: failed to deliver event, error %d\n", ret); 1725 1726 return ret; 1727 } 1728 1729 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) 1730 { 1731 struct intel_pt *pt = ptq->pt; 1732 union perf_event *event = ptq->event_buf; 1733 struct perf_sample sample = { .ip = 0, }; 1734 struct dummy_branch_stack { 1735 u64 nr; 1736 u64 hw_idx; 1737 struct branch_entry entries; 1738 } dummy_bs; 1739 1740 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 1741 return 0; 1742 1743 if (intel_pt_skip_event(pt)) 1744 return 0; 1745 1746 intel_pt_prep_b_sample(pt, ptq, event, &sample); 1747 1748 sample.id = ptq->pt->branches_id; 1749 sample.stream_id = ptq->pt->branches_id; 1750 1751 /* 1752 * perf report cannot handle events without a branch stack when using 1753 * SORT_MODE__BRANCH so make a dummy one. 1754 */ 1755 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { 1756 dummy_bs = (struct dummy_branch_stack){ 1757 .nr = 1, 1758 .hw_idx = -1ULL, 1759 .entries = { 1760 .from = sample.ip, 1761 .to = sample.addr, 1762 }, 1763 }; 1764 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1765 } 1766 1767 if (ptq->sample_ipc) 1768 sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; 1769 if (sample.cyc_cnt) { 1770 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; 1771 ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; 1772 ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; 1773 } 1774 1775 return intel_pt_deliver_synth_event(pt, event, &sample, 1776 pt->branches_sample_type); 1777 } 1778 1779 static void intel_pt_prep_sample(struct intel_pt *pt, 1780 struct intel_pt_queue *ptq, 1781 union perf_event *event, 1782 struct perf_sample *sample) 1783 { 1784 intel_pt_prep_b_sample(pt, ptq, event, sample); 1785 1786 if (pt->synth_opts.callchain) { 1787 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, 1788 pt->synth_opts.callchain_sz + 1, 1789 sample->ip, pt->kernel_start); 1790 sample->callchain = ptq->chain; 1791 } 1792 1793 if (pt->synth_opts.last_branch) { 1794 thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, 1795 pt->br_stack_sz); 1796 sample->branch_stack = ptq->last_branch; 1797 } 1798 } 1799 1800 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) 1801 { 1802 struct intel_pt *pt = ptq->pt; 1803 union perf_event *event = ptq->event_buf; 1804 struct perf_sample sample = { .ip = 0, }; 1805 1806 if (intel_pt_skip_event(pt)) 1807 return 0; 1808 1809 intel_pt_prep_sample(pt, ptq, event, &sample); 1810 1811 sample.id = ptq->pt->instructions_id; 1812 sample.stream_id = ptq->pt->instructions_id; 1813 if (pt->synth_opts.quick) 1814 sample.period = 1; 1815 else 1816 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; 1817 1818 if (ptq->sample_ipc) 1819 sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; 1820 if (sample.cyc_cnt) { 1821 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; 1822 ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; 1823 ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; 1824 } 1825 1826 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1827 1828 return intel_pt_deliver_synth_event(pt, event, &sample, 1829 pt->instructions_sample_type); 1830 } 1831 1832 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) 1833 { 1834 struct intel_pt *pt = ptq->pt; 1835 union perf_event *event = ptq->event_buf; 1836 struct perf_sample sample = { .ip = 0, }; 1837 1838 if (intel_pt_skip_event(pt)) 1839 return 0; 1840 1841 intel_pt_prep_sample(pt, ptq, event, &sample); 1842 1843 sample.id = ptq->pt->transactions_id; 1844 sample.stream_id = ptq->pt->transactions_id; 1845 1846 return intel_pt_deliver_synth_event(pt, event, &sample, 1847 pt->transactions_sample_type); 1848 } 1849 1850 static void intel_pt_prep_p_sample(struct intel_pt *pt, 1851 struct intel_pt_queue *ptq, 1852 union perf_event *event, 1853 struct perf_sample *sample) 1854 { 1855 intel_pt_prep_sample(pt, ptq, event, sample); 1856 1857 /* 1858 * Zero IP is used to mean "trace start" but that is not the case for 1859 * power or PTWRITE events with no IP, so clear the flags. 1860 */ 1861 if (!sample->ip) 1862 sample->flags = 0; 1863 } 1864 1865 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) 1866 { 1867 struct intel_pt *pt = ptq->pt; 1868 union perf_event *event = ptq->event_buf; 1869 struct perf_sample sample = { .ip = 0, }; 1870 struct perf_synth_intel_ptwrite raw; 1871 1872 if (intel_pt_skip_event(pt)) 1873 return 0; 1874 1875 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1876 1877 sample.id = ptq->pt->ptwrites_id; 1878 sample.stream_id = ptq->pt->ptwrites_id; 1879 1880 raw.flags = 0; 1881 raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); 1882 raw.payload = cpu_to_le64(ptq->state->ptw_payload); 1883 1884 sample.raw_size = perf_synth__raw_size(raw); 1885 sample.raw_data = perf_synth__raw_data(&raw); 1886 1887 return intel_pt_deliver_synth_event(pt, event, &sample, 1888 pt->ptwrites_sample_type); 1889 } 1890 1891 static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) 1892 { 1893 struct intel_pt *pt = ptq->pt; 1894 union perf_event *event = ptq->event_buf; 1895 struct perf_sample sample = { .ip = 0, }; 1896 struct perf_synth_intel_cbr raw; 1897 u32 flags; 1898 1899 if (intel_pt_skip_cbr_event(pt)) 1900 return 0; 1901 1902 ptq->cbr_seen = ptq->state->cbr; 1903 1904 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1905 1906 sample.id = ptq->pt->cbr_id; 1907 sample.stream_id = ptq->pt->cbr_id; 1908 1909 flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16); 1910 raw.flags = cpu_to_le32(flags); 1911 raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz); 1912 raw.reserved3 = 0; 1913 1914 sample.raw_size = perf_synth__raw_size(raw); 1915 sample.raw_data = perf_synth__raw_data(&raw); 1916 1917 return intel_pt_deliver_synth_event(pt, event, &sample, 1918 pt->pwr_events_sample_type); 1919 } 1920 1921 static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) 1922 { 1923 struct intel_pt *pt = ptq->pt; 1924 union perf_event *event = ptq->event_buf; 1925 struct perf_sample sample = { .ip = 0, }; 1926 struct perf_synth_intel_psb raw; 1927 1928 if (intel_pt_skip_event(pt)) 1929 return 0; 1930 1931 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1932 1933 sample.id = ptq->pt->psb_id; 1934 sample.stream_id = ptq->pt->psb_id; 1935 sample.flags = 0; 1936 1937 raw.reserved = 0; 1938 raw.offset = ptq->state->psb_offset; 1939 1940 sample.raw_size = perf_synth__raw_size(raw); 1941 sample.raw_data = perf_synth__raw_data(&raw); 1942 1943 return intel_pt_deliver_synth_event(pt, event, &sample, 1944 pt->pwr_events_sample_type); 1945 } 1946 1947 static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) 1948 { 1949 struct intel_pt *pt = ptq->pt; 1950 union perf_event *event = ptq->event_buf; 1951 struct perf_sample sample = { .ip = 0, }; 1952 struct perf_synth_intel_mwait raw; 1953 1954 if (intel_pt_skip_event(pt)) 1955 return 0; 1956 1957 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1958 1959 sample.id = ptq->pt->mwait_id; 1960 sample.stream_id = ptq->pt->mwait_id; 1961 1962 raw.reserved = 0; 1963 raw.payload = cpu_to_le64(ptq->state->mwait_payload); 1964 1965 sample.raw_size = perf_synth__raw_size(raw); 1966 sample.raw_data = perf_synth__raw_data(&raw); 1967 1968 return intel_pt_deliver_synth_event(pt, event, &sample, 1969 pt->pwr_events_sample_type); 1970 } 1971 1972 static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) 1973 { 1974 struct intel_pt *pt = ptq->pt; 1975 union perf_event *event = ptq->event_buf; 1976 struct perf_sample sample = { .ip = 0, }; 1977 struct perf_synth_intel_pwre raw; 1978 1979 if (intel_pt_skip_event(pt)) 1980 return 0; 1981 1982 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1983 1984 sample.id = ptq->pt->pwre_id; 1985 sample.stream_id = ptq->pt->pwre_id; 1986 1987 raw.reserved = 0; 1988 raw.payload = cpu_to_le64(ptq->state->pwre_payload); 1989 1990 sample.raw_size = perf_synth__raw_size(raw); 1991 sample.raw_data = perf_synth__raw_data(&raw); 1992 1993 return intel_pt_deliver_synth_event(pt, event, &sample, 1994 pt->pwr_events_sample_type); 1995 } 1996 1997 static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) 1998 { 1999 struct intel_pt *pt = ptq->pt; 2000 union perf_event *event = ptq->event_buf; 2001 struct perf_sample sample = { .ip = 0, }; 2002 struct perf_synth_intel_exstop raw; 2003 2004 if (intel_pt_skip_event(pt)) 2005 return 0; 2006 2007 intel_pt_prep_p_sample(pt, ptq, event, &sample); 2008 2009 sample.id = ptq->pt->exstop_id; 2010 sample.stream_id = ptq->pt->exstop_id; 2011 2012 raw.flags = 0; 2013 raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); 2014 2015 sample.raw_size = perf_synth__raw_size(raw); 2016 sample.raw_data = perf_synth__raw_data(&raw); 2017 2018 return intel_pt_deliver_synth_event(pt, event, &sample, 2019 pt->pwr_events_sample_type); 2020 } 2021 2022 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) 2023 { 2024 struct intel_pt *pt = ptq->pt; 2025 union perf_event *event = ptq->event_buf; 2026 struct perf_sample sample = { .ip = 0, }; 2027 struct perf_synth_intel_pwrx raw; 2028 2029 if (intel_pt_skip_event(pt)) 2030 return 0; 2031 2032 intel_pt_prep_p_sample(pt, ptq, event, &sample); 2033 2034 sample.id = ptq->pt->pwrx_id; 2035 sample.stream_id = ptq->pt->pwrx_id; 2036 2037 raw.reserved = 0; 2038 raw.payload = cpu_to_le64(ptq->state->pwrx_payload); 2039 2040 sample.raw_size = perf_synth__raw_size(raw); 2041 sample.raw_data = perf_synth__raw_data(&raw); 2042 2043 return intel_pt_deliver_synth_event(pt, event, &sample, 2044 pt->pwr_events_sample_type); 2045 } 2046 2047 /* 2048 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer 2049 * intel_pt_add_gp_regs(). 2050 */ 2051 static const int pebs_gp_regs[] = { 2052 [PERF_REG_X86_FLAGS] = 1, 2053 [PERF_REG_X86_IP] = 2, 2054 [PERF_REG_X86_AX] = 3, 2055 [PERF_REG_X86_CX] = 4, 2056 [PERF_REG_X86_DX] = 5, 2057 [PERF_REG_X86_BX] = 6, 2058 [PERF_REG_X86_SP] = 7, 2059 [PERF_REG_X86_BP] = 8, 2060 [PERF_REG_X86_SI] = 9, 2061 [PERF_REG_X86_DI] = 10, 2062 [PERF_REG_X86_R8] = 11, 2063 [PERF_REG_X86_R9] = 12, 2064 [PERF_REG_X86_R10] = 13, 2065 [PERF_REG_X86_R11] = 14, 2066 [PERF_REG_X86_R12] = 15, 2067 [PERF_REG_X86_R13] = 16, 2068 [PERF_REG_X86_R14] = 17, 2069 [PERF_REG_X86_R15] = 18, 2070 }; 2071 2072 static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, 2073 const struct intel_pt_blk_items *items, 2074 u64 regs_mask) 2075 { 2076 const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; 2077 u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; 2078 u32 bit; 2079 int i; 2080 2081 for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { 2082 /* Get the PEBS gp_regs array index */ 2083 int n = pebs_gp_regs[i] - 1; 2084 2085 if (n < 0) 2086 continue; 2087 /* 2088 * Add only registers that were requested (i.e. 'regs_mask') and 2089 * that were provided (i.e. 'mask'), and update the resulting 2090 * mask (i.e. 'intr_regs->mask') accordingly. 2091 */ 2092 if (mask & 1 << n && regs_mask & bit) { 2093 intr_regs->mask |= bit; 2094 *pos++ = gp_regs[n]; 2095 } 2096 } 2097 2098 return pos; 2099 } 2100 2101 #ifndef PERF_REG_X86_XMM0 2102 #define PERF_REG_X86_XMM0 32 2103 #endif 2104 2105 static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, 2106 const struct intel_pt_blk_items *items, 2107 u64 regs_mask) 2108 { 2109 u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); 2110 const u64 *xmm = items->xmm; 2111 2112 /* 2113 * If there are any XMM registers, then there should be all of them. 2114 * Nevertheless, follow the logic to add only registers that were 2115 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), 2116 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly. 2117 */ 2118 intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; 2119 2120 for (; mask; mask >>= 1, xmm++) { 2121 if (mask & 1) 2122 *pos++ = *xmm; 2123 } 2124 } 2125 2126 #define LBR_INFO_MISPRED (1ULL << 63) 2127 #define LBR_INFO_IN_TX (1ULL << 62) 2128 #define LBR_INFO_ABORT (1ULL << 61) 2129 #define LBR_INFO_CYCLES 0xffff 2130 2131 /* Refer kernel's intel_pmu_store_pebs_lbrs() */ 2132 static u64 intel_pt_lbr_flags(u64 info) 2133 { 2134 union { 2135 struct branch_flags flags; 2136 u64 result; 2137 } u; 2138 2139 u.result = 0; 2140 u.flags.mispred = !!(info & LBR_INFO_MISPRED); 2141 u.flags.predicted = !(info & LBR_INFO_MISPRED); 2142 u.flags.in_tx = !!(info & LBR_INFO_IN_TX); 2143 u.flags.abort = !!(info & LBR_INFO_ABORT); 2144 u.flags.cycles = info & LBR_INFO_CYCLES; 2145 2146 return u.result; 2147 } 2148 2149 static void intel_pt_add_lbrs(struct branch_stack *br_stack, 2150 const struct intel_pt_blk_items *items) 2151 { 2152 u64 *to; 2153 int i; 2154 2155 br_stack->nr = 0; 2156 2157 to = &br_stack->entries[0].from; 2158 2159 for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { 2160 u32 mask = items->mask[i]; 2161 const u64 *from = items->val[i]; 2162 2163 for (; mask; mask >>= 3, from += 3) { 2164 if ((mask & 7) == 7) { 2165 *to++ = from[0]; 2166 *to++ = from[1]; 2167 *to++ = intel_pt_lbr_flags(from[2]); 2168 br_stack->nr += 1; 2169 } 2170 } 2171 } 2172 } 2173 2174 static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) 2175 { 2176 const struct intel_pt_blk_items *items = &ptq->state->items; 2177 struct perf_sample sample = { .ip = 0, }; 2178 union perf_event *event = ptq->event_buf; 2179 struct intel_pt *pt = ptq->pt; 2180 u64 sample_type = evsel->core.attr.sample_type; 2181 u8 cpumode; 2182 u64 regs[8 * sizeof(sample.intr_regs.mask)]; 2183 2184 if (intel_pt_skip_event(pt)) 2185 return 0; 2186 2187 intel_pt_prep_a_sample(ptq, event, &sample); 2188 2189 sample.id = id; 2190 sample.stream_id = id; 2191 2192 if (!evsel->core.attr.freq) 2193 sample.period = evsel->core.attr.sample_period; 2194 2195 /* No support for non-zero CS base */ 2196 if (items->has_ip) 2197 sample.ip = items->ip; 2198 else if (items->has_rip) 2199 sample.ip = items->rip; 2200 else 2201 sample.ip = ptq->state->from_ip; 2202 2203 cpumode = intel_pt_cpumode(ptq, sample.ip, 0); 2204 2205 event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; 2206 2207 sample.cpumode = cpumode; 2208 2209 if (sample_type & PERF_SAMPLE_TIME) { 2210 u64 timestamp = 0; 2211 2212 if (items->has_timestamp) 2213 timestamp = items->timestamp; 2214 else if (!pt->timeless_decoding) 2215 timestamp = ptq->timestamp; 2216 if (timestamp) 2217 sample.time = tsc_to_perf_time(timestamp, &pt->tc); 2218 } 2219 2220 if (sample_type & PERF_SAMPLE_CALLCHAIN && 2221 pt->synth_opts.callchain) { 2222 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, 2223 pt->synth_opts.callchain_sz, sample.ip, 2224 pt->kernel_start); 2225 sample.callchain = ptq->chain; 2226 } 2227 2228 if (sample_type & PERF_SAMPLE_REGS_INTR && 2229 (items->mask[INTEL_PT_GP_REGS_POS] || 2230 items->mask[INTEL_PT_XMM_POS])) { 2231 u64 regs_mask = evsel->core.attr.sample_regs_intr; 2232 u64 *pos; 2233 2234 sample.intr_regs.abi = items->is_32_bit ? 2235 PERF_SAMPLE_REGS_ABI_32 : 2236 PERF_SAMPLE_REGS_ABI_64; 2237 sample.intr_regs.regs = regs; 2238 2239 pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); 2240 2241 intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); 2242 } 2243 2244 if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 2245 if (items->mask[INTEL_PT_LBR_0_POS] || 2246 items->mask[INTEL_PT_LBR_1_POS] || 2247 items->mask[INTEL_PT_LBR_2_POS]) { 2248 intel_pt_add_lbrs(ptq->last_branch, items); 2249 } else if (pt->synth_opts.last_branch) { 2250 thread_stack__br_sample(ptq->thread, ptq->cpu, 2251 ptq->last_branch, 2252 pt->br_stack_sz); 2253 } else { 2254 ptq->last_branch->nr = 0; 2255 } 2256 sample.branch_stack = ptq->last_branch; 2257 } 2258 2259 if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) 2260 sample.addr = items->mem_access_address; 2261 2262 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { 2263 /* 2264 * Refer kernel's setup_pebs_adaptive_sample_data() and 2265 * intel_hsw_weight(). 2266 */ 2267 if (items->has_mem_access_latency) { 2268 u64 weight = items->mem_access_latency >> 32; 2269 2270 /* 2271 * Starts from SPR, the mem access latency field 2272 * contains both cache latency [47:32] and instruction 2273 * latency [15:0]. The cache latency is the same as the 2274 * mem access latency on previous platforms. 2275 * 2276 * In practice, no memory access could last than 4G 2277 * cycles. Use latency >> 32 to distinguish the 2278 * different format of the mem access latency field. 2279 */ 2280 if (weight > 0) { 2281 sample.weight = weight & 0xffff; 2282 sample.ins_lat = items->mem_access_latency & 0xffff; 2283 } else 2284 sample.weight = items->mem_access_latency; 2285 } 2286 if (!sample.weight && items->has_tsx_aux_info) { 2287 /* Cycles last block */ 2288 sample.weight = (u32)items->tsx_aux_info; 2289 } 2290 } 2291 2292 if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { 2293 u64 ax = items->has_rax ? items->rax : 0; 2294 /* Refer kernel's intel_hsw_transaction() */ 2295 u64 txn = (u8)(items->tsx_aux_info >> 32); 2296 2297 /* For RTM XABORTs also log the abort code from AX */ 2298 if (txn & PERF_TXN_TRANSACTION && ax & 1) 2299 txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 2300 sample.transaction = txn; 2301 } 2302 2303 return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); 2304 } 2305 2306 static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) 2307 { 2308 struct intel_pt *pt = ptq->pt; 2309 struct evsel *evsel = pt->pebs_evsel; 2310 u64 id = evsel->core.id[0]; 2311 2312 return intel_pt_do_synth_pebs_sample(ptq, evsel, id); 2313 } 2314 2315 static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) 2316 { 2317 const struct intel_pt_blk_items *items = &ptq->state->items; 2318 struct intel_pt_pebs_event *pe; 2319 struct intel_pt *pt = ptq->pt; 2320 int err = -EINVAL; 2321 int hw_id; 2322 2323 if (!items->has_applicable_counters || !items->applicable_counters) { 2324 if (!pt->single_pebs) 2325 pr_err("PEBS-via-PT record with no applicable_counters\n"); 2326 return intel_pt_synth_single_pebs_sample(ptq); 2327 } 2328 2329 for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { 2330 pe = &ptq->pebs[hw_id]; 2331 if (!pe->evsel) { 2332 if (!pt->single_pebs) 2333 pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", 2334 hw_id); 2335 return intel_pt_synth_single_pebs_sample(ptq); 2336 } 2337 err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); 2338 if (err) 2339 return err; 2340 } 2341 2342 return err; 2343 } 2344 2345 static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) 2346 { 2347 struct intel_pt *pt = ptq->pt; 2348 union perf_event *event = ptq->event_buf; 2349 struct perf_sample sample = { .ip = 0, }; 2350 struct { 2351 struct perf_synth_intel_evt cfe; 2352 struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; 2353 } raw; 2354 int i; 2355 2356 if (intel_pt_skip_event(pt)) 2357 return 0; 2358 2359 intel_pt_prep_p_sample(pt, ptq, event, &sample); 2360 2361 sample.id = ptq->pt->evt_id; 2362 sample.stream_id = ptq->pt->evt_id; 2363 2364 raw.cfe.type = ptq->state->cfe_type; 2365 raw.cfe.reserved = 0; 2366 raw.cfe.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); 2367 raw.cfe.vector = ptq->state->cfe_vector; 2368 raw.cfe.evd_cnt = ptq->state->evd_cnt; 2369 2370 for (i = 0; i < ptq->state->evd_cnt; i++) { 2371 raw.evd[i].et = 0; 2372 raw.evd[i].evd_type = ptq->state->evd[i].type; 2373 raw.evd[i].payload = ptq->state->evd[i].payload; 2374 } 2375 2376 sample.raw_size = perf_synth__raw_size(raw) + 2377 ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); 2378 sample.raw_data = perf_synth__raw_data(&raw); 2379 2380 return intel_pt_deliver_synth_event(pt, event, &sample, 2381 pt->evt_sample_type); 2382 } 2383 2384 static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) 2385 { 2386 struct intel_pt *pt = ptq->pt; 2387 union perf_event *event = ptq->event_buf; 2388 struct perf_sample sample = { .ip = 0, }; 2389 struct perf_synth_intel_iflag_chg raw; 2390 2391 if (intel_pt_skip_event(pt)) 2392 return 0; 2393 2394 intel_pt_prep_p_sample(pt, ptq, event, &sample); 2395 2396 sample.id = ptq->pt->iflag_chg_id; 2397 sample.stream_id = ptq->pt->iflag_chg_id; 2398 2399 raw.flags = 0; 2400 raw.iflag = ptq->state->to_iflag; 2401 2402 if (ptq->state->type & INTEL_PT_BRANCH) { 2403 raw.via_branch = 1; 2404 raw.branch_ip = ptq->state->to_ip; 2405 } else { 2406 sample.addr = 0; 2407 } 2408 sample.flags = ptq->flags; 2409 2410 sample.raw_size = perf_synth__raw_size(raw); 2411 sample.raw_data = perf_synth__raw_data(&raw); 2412 2413 return intel_pt_deliver_synth_event(pt, event, &sample, 2414 pt->iflag_chg_sample_type); 2415 } 2416 2417 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 2418 pid_t pid, pid_t tid, u64 ip, u64 timestamp, 2419 pid_t machine_pid, int vcpu) 2420 { 2421 union perf_event event; 2422 char msg[MAX_AUXTRACE_ERROR_MSG]; 2423 int err; 2424 2425 if (pt->synth_opts.error_minus_flags) { 2426 if (code == INTEL_PT_ERR_OVR && 2427 pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) 2428 return 0; 2429 if (code == INTEL_PT_ERR_LOST && 2430 pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) 2431 return 0; 2432 } 2433 2434 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); 2435 2436 auxtrace_synth_guest_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 2437 code, cpu, pid, tid, ip, msg, timestamp, 2438 machine_pid, vcpu); 2439 2440 err = perf_session__deliver_synth_event(pt->session, &event, NULL); 2441 if (err) 2442 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", 2443 err); 2444 2445 return err; 2446 } 2447 2448 static int intel_ptq_synth_error(struct intel_pt_queue *ptq, 2449 const struct intel_pt_state *state) 2450 { 2451 struct intel_pt *pt = ptq->pt; 2452 u64 tm = ptq->timestamp; 2453 pid_t machine_pid = 0; 2454 pid_t pid = ptq->pid; 2455 pid_t tid = ptq->tid; 2456 int vcpu = -1; 2457 2458 tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); 2459 2460 if (pt->have_guest_sideband && state->from_nr) { 2461 machine_pid = ptq->guest_machine_pid; 2462 vcpu = ptq->vcpu; 2463 pid = ptq->guest_pid; 2464 tid = ptq->guest_tid; 2465 } 2466 2467 return intel_pt_synth_error(pt, state->err, ptq->cpu, pid, tid, 2468 state->from_ip, tm, machine_pid, vcpu); 2469 } 2470 2471 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) 2472 { 2473 struct auxtrace_queue *queue; 2474 pid_t tid = ptq->next_tid; 2475 int err; 2476 2477 if (tid == -1) 2478 return 0; 2479 2480 intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); 2481 2482 err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); 2483 2484 queue = &pt->queues.queue_array[ptq->queue_nr]; 2485 intel_pt_set_pid_tid_cpu(pt, queue); 2486 2487 ptq->next_tid = -1; 2488 2489 return err; 2490 } 2491 2492 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) 2493 { 2494 struct intel_pt *pt = ptq->pt; 2495 2496 return ip == pt->switch_ip && 2497 (ptq->flags & PERF_IP_FLAG_BRANCH) && 2498 !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | 2499 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); 2500 } 2501 2502 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ 2503 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) 2504 2505 static int intel_pt_sample(struct intel_pt_queue *ptq) 2506 { 2507 const struct intel_pt_state *state = ptq->state; 2508 struct intel_pt *pt = ptq->pt; 2509 int err; 2510 2511 if (!ptq->have_sample) 2512 return 0; 2513 2514 ptq->have_sample = false; 2515 2516 if (pt->synth_opts.approx_ipc) { 2517 ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; 2518 ptq->ipc_cyc_cnt = ptq->state->cycles; 2519 ptq->sample_ipc = true; 2520 } else { 2521 ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; 2522 ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; 2523 ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; 2524 } 2525 2526 /* Ensure guest code maps are set up */ 2527 if (symbol_conf.guest_code && (state->from_nr || state->to_nr)) 2528 intel_pt_get_guest(ptq); 2529 2530 /* 2531 * Do PEBS first to allow for the possibility that the PEBS timestamp 2532 * precedes the current timestamp. 2533 */ 2534 if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { 2535 err = intel_pt_synth_pebs_sample(ptq); 2536 if (err) 2537 return err; 2538 } 2539 2540 if (pt->synth_opts.intr_events) { 2541 if (state->type & INTEL_PT_EVT) { 2542 err = intel_pt_synth_events_sample(ptq); 2543 if (err) 2544 return err; 2545 } 2546 if (state->type & INTEL_PT_IFLAG_CHG) { 2547 err = intel_pt_synth_iflag_chg_sample(ptq); 2548 if (err) 2549 return err; 2550 } 2551 } 2552 2553 if (pt->sample_pwr_events) { 2554 if (state->type & INTEL_PT_PSB_EVT) { 2555 err = intel_pt_synth_psb_sample(ptq); 2556 if (err) 2557 return err; 2558 } 2559 if (ptq->state->cbr != ptq->cbr_seen) { 2560 err = intel_pt_synth_cbr_sample(ptq); 2561 if (err) 2562 return err; 2563 } 2564 if (state->type & INTEL_PT_PWR_EVT) { 2565 if (state->type & INTEL_PT_MWAIT_OP) { 2566 err = intel_pt_synth_mwait_sample(ptq); 2567 if (err) 2568 return err; 2569 } 2570 if (state->type & INTEL_PT_PWR_ENTRY) { 2571 err = intel_pt_synth_pwre_sample(ptq); 2572 if (err) 2573 return err; 2574 } 2575 if (state->type & INTEL_PT_EX_STOP) { 2576 err = intel_pt_synth_exstop_sample(ptq); 2577 if (err) 2578 return err; 2579 } 2580 if (state->type & INTEL_PT_PWR_EXIT) { 2581 err = intel_pt_synth_pwrx_sample(ptq); 2582 if (err) 2583 return err; 2584 } 2585 } 2586 } 2587 2588 if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { 2589 err = intel_pt_synth_instruction_sample(ptq); 2590 if (err) 2591 return err; 2592 } 2593 2594 if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { 2595 err = intel_pt_synth_transaction_sample(ptq); 2596 if (err) 2597 return err; 2598 } 2599 2600 if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) { 2601 err = intel_pt_synth_ptwrite_sample(ptq); 2602 if (err) 2603 return err; 2604 } 2605 2606 if (!(state->type & INTEL_PT_BRANCH)) 2607 return 0; 2608 2609 if (pt->use_thread_stack) { 2610 thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, 2611 state->from_ip, state->to_ip, ptq->insn_len, 2612 state->trace_nr, pt->callstack, 2613 pt->br_stack_sz_plus, 2614 pt->mispred_all); 2615 } else { 2616 thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); 2617 } 2618 2619 if (pt->sample_branches) { 2620 if (state->from_nr != state->to_nr && 2621 state->from_ip && state->to_ip) { 2622 struct intel_pt_state *st = (struct intel_pt_state *)state; 2623 u64 to_ip = st->to_ip; 2624 u64 from_ip = st->from_ip; 2625 2626 /* 2627 * perf cannot handle having different machines for ip 2628 * and addr, so create 2 branches. 2629 */ 2630 st->to_ip = 0; 2631 err = intel_pt_synth_branch_sample(ptq); 2632 if (err) 2633 return err; 2634 st->from_ip = 0; 2635 st->to_ip = to_ip; 2636 err = intel_pt_synth_branch_sample(ptq); 2637 st->from_ip = from_ip; 2638 } else { 2639 err = intel_pt_synth_branch_sample(ptq); 2640 } 2641 if (err) 2642 return err; 2643 } 2644 2645 if (!ptq->sync_switch) 2646 return 0; 2647 2648 if (intel_pt_is_switch_ip(ptq, state->to_ip)) { 2649 switch (ptq->switch_state) { 2650 case INTEL_PT_SS_NOT_TRACING: 2651 case INTEL_PT_SS_UNKNOWN: 2652 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 2653 err = intel_pt_next_tid(pt, ptq); 2654 if (err) 2655 return err; 2656 ptq->switch_state = INTEL_PT_SS_TRACING; 2657 break; 2658 default: 2659 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; 2660 return 1; 2661 } 2662 } else if (!state->to_ip) { 2663 ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 2664 } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { 2665 ptq->switch_state = INTEL_PT_SS_UNKNOWN; 2666 } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && 2667 state->to_ip == pt->ptss_ip && 2668 (ptq->flags & PERF_IP_FLAG_CALL)) { 2669 ptq->switch_state = INTEL_PT_SS_TRACING; 2670 } 2671 2672 return 0; 2673 } 2674 2675 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) 2676 { 2677 struct machine *machine = pt->machine; 2678 struct map *map; 2679 struct symbol *sym, *start; 2680 u64 ip, switch_ip = 0; 2681 const char *ptss; 2682 2683 if (ptss_ip) 2684 *ptss_ip = 0; 2685 2686 map = machine__kernel_map(machine); 2687 if (!map) 2688 return 0; 2689 2690 if (map__load(map)) 2691 return 0; 2692 2693 start = dso__first_symbol(map->dso); 2694 2695 for (sym = start; sym; sym = dso__next_symbol(sym)) { 2696 if (sym->binding == STB_GLOBAL && 2697 !strcmp(sym->name, "__switch_to")) { 2698 ip = map->unmap_ip(map, sym->start); 2699 if (ip >= map->start && ip < map->end) { 2700 switch_ip = ip; 2701 break; 2702 } 2703 } 2704 } 2705 2706 if (!switch_ip || !ptss_ip) 2707 return 0; 2708 2709 if (pt->have_sched_switch == 1) 2710 ptss = "perf_trace_sched_switch"; 2711 else 2712 ptss = "__perf_event_task_sched_out"; 2713 2714 for (sym = start; sym; sym = dso__next_symbol(sym)) { 2715 if (!strcmp(sym->name, ptss)) { 2716 ip = map->unmap_ip(map, sym->start); 2717 if (ip >= map->start && ip < map->end) { 2718 *ptss_ip = ip; 2719 break; 2720 } 2721 } 2722 } 2723 2724 return switch_ip; 2725 } 2726 2727 static void intel_pt_enable_sync_switch(struct intel_pt *pt) 2728 { 2729 unsigned int i; 2730 2731 if (pt->sync_switch_not_supported) 2732 return; 2733 2734 pt->sync_switch = true; 2735 2736 for (i = 0; i < pt->queues.nr_queues; i++) { 2737 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 2738 struct intel_pt_queue *ptq = queue->priv; 2739 2740 if (ptq) 2741 ptq->sync_switch = true; 2742 } 2743 } 2744 2745 static void intel_pt_disable_sync_switch(struct intel_pt *pt) 2746 { 2747 unsigned int i; 2748 2749 pt->sync_switch = false; 2750 2751 for (i = 0; i < pt->queues.nr_queues; i++) { 2752 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 2753 struct intel_pt_queue *ptq = queue->priv; 2754 2755 if (ptq) { 2756 ptq->sync_switch = false; 2757 intel_pt_next_tid(pt, ptq); 2758 } 2759 } 2760 } 2761 2762 /* 2763 * To filter against time ranges, it is only necessary to look at the next start 2764 * or end time. 2765 */ 2766 static bool intel_pt_next_time(struct intel_pt_queue *ptq) 2767 { 2768 struct intel_pt *pt = ptq->pt; 2769 2770 if (ptq->sel_start) { 2771 /* Next time is an end time */ 2772 ptq->sel_start = false; 2773 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; 2774 return true; 2775 } else if (ptq->sel_idx + 1 < pt->range_cnt) { 2776 /* Next time is a start time */ 2777 ptq->sel_start = true; 2778 ptq->sel_idx += 1; 2779 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; 2780 return true; 2781 } 2782 2783 /* No next time */ 2784 return false; 2785 } 2786 2787 static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) 2788 { 2789 int err; 2790 2791 while (1) { 2792 if (ptq->sel_start) { 2793 if (ptq->timestamp >= ptq->sel_timestamp) { 2794 /* After start time, so consider next time */ 2795 intel_pt_next_time(ptq); 2796 if (!ptq->sel_timestamp) { 2797 /* No end time */ 2798 return 0; 2799 } 2800 /* Check against end time */ 2801 continue; 2802 } 2803 /* Before start time, so fast forward */ 2804 ptq->have_sample = false; 2805 if (ptq->sel_timestamp > *ff_timestamp) { 2806 if (ptq->sync_switch) { 2807 intel_pt_next_tid(ptq->pt, ptq); 2808 ptq->switch_state = INTEL_PT_SS_UNKNOWN; 2809 } 2810 *ff_timestamp = ptq->sel_timestamp; 2811 err = intel_pt_fast_forward(ptq->decoder, 2812 ptq->sel_timestamp); 2813 if (err) 2814 return err; 2815 } 2816 return 0; 2817 } else if (ptq->timestamp > ptq->sel_timestamp) { 2818 /* After end time, so consider next time */ 2819 if (!intel_pt_next_time(ptq)) { 2820 /* No next time range, so stop decoding */ 2821 ptq->have_sample = false; 2822 ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 2823 return 1; 2824 } 2825 /* Check against next start time */ 2826 continue; 2827 } else { 2828 /* Before end time */ 2829 return 0; 2830 } 2831 } 2832 } 2833 2834 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 2835 { 2836 const struct intel_pt_state *state = ptq->state; 2837 struct intel_pt *pt = ptq->pt; 2838 u64 ff_timestamp = 0; 2839 int err; 2840 2841 if (!pt->kernel_start) { 2842 pt->kernel_start = machine__kernel_start(pt->machine); 2843 if (pt->per_cpu_mmaps && 2844 (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && 2845 !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && 2846 !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) { 2847 pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); 2848 if (pt->switch_ip) { 2849 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", 2850 pt->switch_ip, pt->ptss_ip); 2851 intel_pt_enable_sync_switch(pt); 2852 } 2853 } 2854 } 2855 2856 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 2857 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 2858 while (1) { 2859 err = intel_pt_sample(ptq); 2860 if (err) 2861 return err; 2862 2863 state = intel_pt_decode(ptq->decoder); 2864 if (state->err) { 2865 if (state->err == INTEL_PT_ERR_NODATA) 2866 return 1; 2867 if (ptq->sync_switch && 2868 state->from_ip >= pt->kernel_start) { 2869 ptq->sync_switch = false; 2870 intel_pt_next_tid(pt, ptq); 2871 } 2872 ptq->timestamp = state->est_timestamp; 2873 if (pt->synth_opts.errors) { 2874 err = intel_ptq_synth_error(ptq, state); 2875 if (err) 2876 return err; 2877 } 2878 continue; 2879 } 2880 2881 ptq->state = state; 2882 ptq->have_sample = true; 2883 intel_pt_sample_flags(ptq); 2884 2885 /* Use estimated TSC upon return to user space */ 2886 if (pt->est_tsc && 2887 (state->from_ip >= pt->kernel_start || !state->from_ip) && 2888 state->to_ip && state->to_ip < pt->kernel_start) { 2889 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 2890 state->timestamp, state->est_timestamp); 2891 ptq->timestamp = state->est_timestamp; 2892 /* Use estimated TSC in unknown switch state */ 2893 } else if (ptq->sync_switch && 2894 ptq->switch_state == INTEL_PT_SS_UNKNOWN && 2895 intel_pt_is_switch_ip(ptq, state->to_ip) && 2896 ptq->next_tid == -1) { 2897 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 2898 state->timestamp, state->est_timestamp); 2899 ptq->timestamp = state->est_timestamp; 2900 } else if (state->timestamp > ptq->timestamp) { 2901 ptq->timestamp = state->timestamp; 2902 } 2903 2904 if (ptq->sel_timestamp) { 2905 err = intel_pt_time_filter(ptq, &ff_timestamp); 2906 if (err) 2907 return err; 2908 } 2909 2910 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { 2911 *timestamp = ptq->timestamp; 2912 return 0; 2913 } 2914 } 2915 return 0; 2916 } 2917 2918 static inline int intel_pt_update_queues(struct intel_pt *pt) 2919 { 2920 if (pt->queues.new_data) { 2921 pt->queues.new_data = false; 2922 return intel_pt_setup_queues(pt); 2923 } 2924 return 0; 2925 } 2926 2927 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) 2928 { 2929 unsigned int queue_nr; 2930 u64 ts; 2931 int ret; 2932 2933 while (1) { 2934 struct auxtrace_queue *queue; 2935 struct intel_pt_queue *ptq; 2936 2937 if (!pt->heap.heap_cnt) 2938 return 0; 2939 2940 if (pt->heap.heap_array[0].ordinal >= timestamp) 2941 return 0; 2942 2943 queue_nr = pt->heap.heap_array[0].queue_nr; 2944 queue = &pt->queues.queue_array[queue_nr]; 2945 ptq = queue->priv; 2946 2947 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", 2948 queue_nr, pt->heap.heap_array[0].ordinal, 2949 timestamp); 2950 2951 auxtrace_heap__pop(&pt->heap); 2952 2953 if (pt->heap.heap_cnt) { 2954 ts = pt->heap.heap_array[0].ordinal + 1; 2955 if (ts > timestamp) 2956 ts = timestamp; 2957 } else { 2958 ts = timestamp; 2959 } 2960 2961 intel_pt_set_pid_tid_cpu(pt, queue); 2962 2963 ret = intel_pt_run_decoder(ptq, &ts); 2964 2965 if (ret < 0) { 2966 auxtrace_heap__add(&pt->heap, queue_nr, ts); 2967 return ret; 2968 } 2969 2970 if (!ret) { 2971 ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); 2972 if (ret < 0) 2973 return ret; 2974 } else { 2975 ptq->on_heap = false; 2976 } 2977 } 2978 2979 return 0; 2980 } 2981 2982 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, 2983 u64 time_) 2984 { 2985 struct auxtrace_queues *queues = &pt->queues; 2986 unsigned int i; 2987 u64 ts = 0; 2988 2989 for (i = 0; i < queues->nr_queues; i++) { 2990 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 2991 struct intel_pt_queue *ptq = queue->priv; 2992 2993 if (ptq && (tid == -1 || ptq->tid == tid)) { 2994 ptq->time = time_; 2995 intel_pt_set_pid_tid_cpu(pt, queue); 2996 intel_pt_run_decoder(ptq, &ts); 2997 } 2998 } 2999 return 0; 3000 } 3001 3002 static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, 3003 struct auxtrace_queue *queue, 3004 struct perf_sample *sample) 3005 { 3006 struct machine *m = ptq->pt->machine; 3007 3008 ptq->pid = sample->pid; 3009 ptq->tid = sample->tid; 3010 ptq->cpu = queue->cpu; 3011 3012 intel_pt_log("queue %u cpu %d pid %d tid %d\n", 3013 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 3014 3015 thread__zput(ptq->thread); 3016 3017 if (ptq->tid == -1) 3018 return; 3019 3020 if (ptq->pid == -1) { 3021 ptq->thread = machine__find_thread(m, -1, ptq->tid); 3022 if (ptq->thread) 3023 ptq->pid = ptq->thread->pid_; 3024 return; 3025 } 3026 3027 ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid); 3028 } 3029 3030 static int intel_pt_process_timeless_sample(struct intel_pt *pt, 3031 struct perf_sample *sample) 3032 { 3033 struct auxtrace_queue *queue; 3034 struct intel_pt_queue *ptq; 3035 u64 ts = 0; 3036 3037 queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); 3038 if (!queue) 3039 return -EINVAL; 3040 3041 ptq = queue->priv; 3042 if (!ptq) 3043 return 0; 3044 3045 ptq->stop = false; 3046 ptq->time = sample->time; 3047 intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample); 3048 intel_pt_run_decoder(ptq, &ts); 3049 return 0; 3050 } 3051 3052 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) 3053 { 3054 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, 3055 sample->pid, sample->tid, 0, sample->time, 3056 sample->machine_pid, sample->vcpu); 3057 } 3058 3059 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) 3060 { 3061 unsigned i, j; 3062 3063 if (cpu < 0 || !pt->queues.nr_queues) 3064 return NULL; 3065 3066 if ((unsigned)cpu >= pt->queues.nr_queues) 3067 i = pt->queues.nr_queues - 1; 3068 else 3069 i = cpu; 3070 3071 if (pt->queues.queue_array[i].cpu == cpu) 3072 return pt->queues.queue_array[i].priv; 3073 3074 for (j = 0; i > 0; j++) { 3075 if (pt->queues.queue_array[--i].cpu == cpu) 3076 return pt->queues.queue_array[i].priv; 3077 } 3078 3079 for (; j < pt->queues.nr_queues; j++) { 3080 if (pt->queues.queue_array[j].cpu == cpu) 3081 return pt->queues.queue_array[j].priv; 3082 } 3083 3084 return NULL; 3085 } 3086 3087 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, 3088 u64 timestamp) 3089 { 3090 struct intel_pt_queue *ptq; 3091 int err; 3092 3093 if (!pt->sync_switch) 3094 return 1; 3095 3096 ptq = intel_pt_cpu_to_ptq(pt, cpu); 3097 if (!ptq || !ptq->sync_switch) 3098 return 1; 3099 3100 switch (ptq->switch_state) { 3101 case INTEL_PT_SS_NOT_TRACING: 3102 break; 3103 case INTEL_PT_SS_UNKNOWN: 3104 case INTEL_PT_SS_TRACING: 3105 ptq->next_tid = tid; 3106 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; 3107 return 0; 3108 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 3109 if (!ptq->on_heap) { 3110 ptq->timestamp = perf_time_to_tsc(timestamp, 3111 &pt->tc); 3112 err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, 3113 ptq->timestamp); 3114 if (err) 3115 return err; 3116 ptq->on_heap = true; 3117 } 3118 ptq->switch_state = INTEL_PT_SS_TRACING; 3119 break; 3120 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 3121 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); 3122 break; 3123 default: 3124 break; 3125 } 3126 3127 ptq->next_tid = -1; 3128 3129 return 1; 3130 } 3131 3132 static int intel_pt_process_switch(struct intel_pt *pt, 3133 struct perf_sample *sample) 3134 { 3135 pid_t tid; 3136 int cpu, ret; 3137 struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id); 3138 3139 if (evsel != pt->switch_evsel) 3140 return 0; 3141 3142 tid = evsel__intval(evsel, sample, "next_pid"); 3143 cpu = sample->cpu; 3144 3145 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 3146 cpu, tid, sample->time, perf_time_to_tsc(sample->time, 3147 &pt->tc)); 3148 3149 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 3150 if (ret <= 0) 3151 return ret; 3152 3153 return machine__set_current_tid(pt->machine, cpu, -1, tid); 3154 } 3155 3156 static int intel_pt_context_switch_in(struct intel_pt *pt, 3157 struct perf_sample *sample) 3158 { 3159 pid_t pid = sample->pid; 3160 pid_t tid = sample->tid; 3161 int cpu = sample->cpu; 3162 3163 if (pt->sync_switch) { 3164 struct intel_pt_queue *ptq; 3165 3166 ptq = intel_pt_cpu_to_ptq(pt, cpu); 3167 if (ptq && ptq->sync_switch) { 3168 ptq->next_tid = -1; 3169 switch (ptq->switch_state) { 3170 case INTEL_PT_SS_NOT_TRACING: 3171 case INTEL_PT_SS_UNKNOWN: 3172 case INTEL_PT_SS_TRACING: 3173 break; 3174 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 3175 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 3176 ptq->switch_state = INTEL_PT_SS_TRACING; 3177 break; 3178 default: 3179 break; 3180 } 3181 } 3182 } 3183 3184 /* 3185 * If the current tid has not been updated yet, ensure it is now that 3186 * a "switch in" event has occurred. 3187 */ 3188 if (machine__get_current_tid(pt->machine, cpu) == tid) 3189 return 0; 3190 3191 return machine__set_current_tid(pt->machine, cpu, pid, tid); 3192 } 3193 3194 static int intel_pt_guest_context_switch(struct intel_pt *pt, 3195 union perf_event *event, 3196 struct perf_sample *sample) 3197 { 3198 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 3199 struct machines *machines = &pt->session->machines; 3200 struct machine *machine = machines__find(machines, sample->machine_pid); 3201 3202 pt->have_guest_sideband = true; 3203 3204 /* 3205 * sync_switch cannot handle guest machines at present, so just disable 3206 * it. 3207 */ 3208 pt->sync_switch_not_supported = true; 3209 if (pt->sync_switch) 3210 intel_pt_disable_sync_switch(pt); 3211 3212 if (out) 3213 return 0; 3214 3215 if (!machine) 3216 return -EINVAL; 3217 3218 return machine__set_current_tid(machine, sample->vcpu, sample->pid, sample->tid); 3219 } 3220 3221 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, 3222 struct perf_sample *sample) 3223 { 3224 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 3225 pid_t pid, tid; 3226 int cpu, ret; 3227 3228 if (perf_event__is_guest(event)) 3229 return intel_pt_guest_context_switch(pt, event, sample); 3230 3231 cpu = sample->cpu; 3232 3233 if (pt->have_sched_switch == 3) { 3234 if (!out) 3235 return intel_pt_context_switch_in(pt, sample); 3236 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { 3237 pr_err("Expecting CPU-wide context switch event\n"); 3238 return -EINVAL; 3239 } 3240 pid = event->context_switch.next_prev_pid; 3241 tid = event->context_switch.next_prev_tid; 3242 } else { 3243 if (out) 3244 return 0; 3245 pid = sample->pid; 3246 tid = sample->tid; 3247 } 3248 3249 if (tid == -1) 3250 intel_pt_log("context_switch event has no tid\n"); 3251 3252 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 3253 if (ret <= 0) 3254 return ret; 3255 3256 return machine__set_current_tid(pt->machine, cpu, pid, tid); 3257 } 3258 3259 static int intel_pt_process_itrace_start(struct intel_pt *pt, 3260 union perf_event *event, 3261 struct perf_sample *sample) 3262 { 3263 if (!pt->per_cpu_mmaps) 3264 return 0; 3265 3266 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 3267 sample->cpu, event->itrace_start.pid, 3268 event->itrace_start.tid, sample->time, 3269 perf_time_to_tsc(sample->time, &pt->tc)); 3270 3271 return machine__set_current_tid(pt->machine, sample->cpu, 3272 event->itrace_start.pid, 3273 event->itrace_start.tid); 3274 } 3275 3276 static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, 3277 union perf_event *event, 3278 struct perf_sample *sample) 3279 { 3280 u64 hw_id = event->aux_output_hw_id.hw_id; 3281 struct auxtrace_queue *queue; 3282 struct intel_pt_queue *ptq; 3283 struct evsel *evsel; 3284 3285 queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); 3286 evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); 3287 if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { 3288 pr_err("Bad AUX output hardware ID\n"); 3289 return -EINVAL; 3290 } 3291 3292 ptq = queue->priv; 3293 3294 ptq->pebs[hw_id].evsel = evsel; 3295 ptq->pebs[hw_id].id = sample->id; 3296 3297 return 0; 3298 } 3299 3300 static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, 3301 struct addr_location *al) 3302 { 3303 if (!al->map || addr < al->map->start || addr >= al->map->end) { 3304 if (!thread__find_map(thread, cpumode, addr, al)) 3305 return -1; 3306 } 3307 3308 return 0; 3309 } 3310 3311 /* Invalidate all instruction cache entries that overlap the text poke */ 3312 static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) 3313 { 3314 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 3315 u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; 3316 /* Assume text poke begins in a basic block no more than 4096 bytes */ 3317 int cnt = 4096 + event->text_poke.new_len; 3318 struct thread *thread = pt->unknown_thread; 3319 struct addr_location al = { .map = NULL }; 3320 struct machine *machine = pt->machine; 3321 struct intel_pt_cache_entry *e; 3322 u64 offset; 3323 3324 if (!event->text_poke.new_len) 3325 return 0; 3326 3327 for (; cnt; cnt--, addr--) { 3328 if (intel_pt_find_map(thread, cpumode, addr, &al)) { 3329 if (addr < event->text_poke.addr) 3330 return 0; 3331 continue; 3332 } 3333 3334 if (!al.map->dso || !al.map->dso->auxtrace_cache) 3335 continue; 3336 3337 offset = al.map->map_ip(al.map, addr); 3338 3339 e = intel_pt_cache_lookup(al.map->dso, machine, offset); 3340 if (!e) 3341 continue; 3342 3343 if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { 3344 /* 3345 * No overlap. Working backwards there cannot be another 3346 * basic block that overlaps the text poke if there is a 3347 * branch instruction before the text poke address. 3348 */ 3349 if (e->branch != INTEL_PT_BR_NO_BRANCH) 3350 return 0; 3351 } else { 3352 intel_pt_cache_invalidate(al.map->dso, machine, offset); 3353 intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", 3354 al.map->dso->long_name, addr); 3355 } 3356 } 3357 3358 return 0; 3359 } 3360 3361 static int intel_pt_process_event(struct perf_session *session, 3362 union perf_event *event, 3363 struct perf_sample *sample, 3364 struct perf_tool *tool) 3365 { 3366 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3367 auxtrace); 3368 u64 timestamp; 3369 int err = 0; 3370 3371 if (dump_trace) 3372 return 0; 3373 3374 if (!tool->ordered_events) { 3375 pr_err("Intel Processor Trace requires ordered events\n"); 3376 return -EINVAL; 3377 } 3378 3379 if (sample->time && sample->time != (u64)-1) 3380 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 3381 else 3382 timestamp = 0; 3383 3384 if (timestamp || pt->timeless_decoding) { 3385 err = intel_pt_update_queues(pt); 3386 if (err) 3387 return err; 3388 } 3389 3390 if (pt->timeless_decoding) { 3391 if (pt->sampling_mode) { 3392 if (sample->aux_sample.size) 3393 err = intel_pt_process_timeless_sample(pt, 3394 sample); 3395 } else if (event->header.type == PERF_RECORD_EXIT) { 3396 err = intel_pt_process_timeless_queues(pt, 3397 event->fork.tid, 3398 sample->time); 3399 } 3400 } else if (timestamp) { 3401 if (!pt->first_timestamp) 3402 intel_pt_first_timestamp(pt, timestamp); 3403 err = intel_pt_process_queues(pt, timestamp); 3404 } 3405 if (err) 3406 return err; 3407 3408 if (event->header.type == PERF_RECORD_SAMPLE) { 3409 if (pt->synth_opts.add_callchain && !sample->callchain) 3410 intel_pt_add_callchain(pt, sample); 3411 if (pt->synth_opts.add_last_branch && !sample->branch_stack) 3412 intel_pt_add_br_stack(pt, sample); 3413 } 3414 3415 if (event->header.type == PERF_RECORD_AUX && 3416 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && 3417 pt->synth_opts.errors) { 3418 err = intel_pt_lost(pt, sample); 3419 if (err) 3420 return err; 3421 } 3422 3423 if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) 3424 err = intel_pt_process_switch(pt, sample); 3425 else if (event->header.type == PERF_RECORD_ITRACE_START) 3426 err = intel_pt_process_itrace_start(pt, event, sample); 3427 else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) 3428 err = intel_pt_process_aux_output_hw_id(pt, event, sample); 3429 else if (event->header.type == PERF_RECORD_SWITCH || 3430 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 3431 err = intel_pt_context_switch(pt, event, sample); 3432 3433 if (!err && event->header.type == PERF_RECORD_TEXT_POKE) 3434 err = intel_pt_text_poke(pt, event); 3435 3436 if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { 3437 intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", 3438 event->header.type, sample->cpu, sample->time, timestamp); 3439 intel_pt_log_event(event); 3440 } 3441 3442 return err; 3443 } 3444 3445 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) 3446 { 3447 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3448 auxtrace); 3449 int ret; 3450 3451 if (dump_trace) 3452 return 0; 3453 3454 if (!tool->ordered_events) 3455 return -EINVAL; 3456 3457 ret = intel_pt_update_queues(pt); 3458 if (ret < 0) 3459 return ret; 3460 3461 if (pt->timeless_decoding) 3462 return intel_pt_process_timeless_queues(pt, -1, 3463 MAX_TIMESTAMP - 1); 3464 3465 return intel_pt_process_queues(pt, MAX_TIMESTAMP); 3466 } 3467 3468 static void intel_pt_free_events(struct perf_session *session) 3469 { 3470 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3471 auxtrace); 3472 struct auxtrace_queues *queues = &pt->queues; 3473 unsigned int i; 3474 3475 for (i = 0; i < queues->nr_queues; i++) { 3476 intel_pt_free_queue(queues->queue_array[i].priv); 3477 queues->queue_array[i].priv = NULL; 3478 } 3479 intel_pt_log_disable(); 3480 auxtrace_queues__free(queues); 3481 } 3482 3483 static void intel_pt_free(struct perf_session *session) 3484 { 3485 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3486 auxtrace); 3487 3488 auxtrace_heap__free(&pt->heap); 3489 intel_pt_free_events(session); 3490 session->auxtrace = NULL; 3491 intel_pt_free_vmcs_info(pt); 3492 thread__put(pt->unknown_thread); 3493 addr_filters__exit(&pt->filts); 3494 zfree(&pt->chain); 3495 zfree(&pt->filter); 3496 zfree(&pt->time_ranges); 3497 free(pt); 3498 } 3499 3500 static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, 3501 struct evsel *evsel) 3502 { 3503 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3504 auxtrace); 3505 3506 return evsel->core.attr.type == pt->pmu_type; 3507 } 3508 3509 static int intel_pt_process_auxtrace_event(struct perf_session *session, 3510 union perf_event *event, 3511 struct perf_tool *tool __maybe_unused) 3512 { 3513 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3514 auxtrace); 3515 3516 if (!pt->data_queued) { 3517 struct auxtrace_buffer *buffer; 3518 off_t data_offset; 3519 int fd = perf_data__fd(session->data); 3520 int err; 3521 3522 if (perf_data__is_pipe(session->data)) { 3523 data_offset = 0; 3524 } else { 3525 data_offset = lseek(fd, 0, SEEK_CUR); 3526 if (data_offset == -1) 3527 return -errno; 3528 } 3529 3530 err = auxtrace_queues__add_event(&pt->queues, session, event, 3531 data_offset, &buffer); 3532 if (err) 3533 return err; 3534 3535 /* Dump here now we have copied a piped trace out of the pipe */ 3536 if (dump_trace) { 3537 if (auxtrace_buffer__get_data(buffer, fd)) { 3538 intel_pt_dump_event(pt, buffer->data, 3539 buffer->size); 3540 auxtrace_buffer__put_data(buffer); 3541 } 3542 } 3543 } 3544 3545 return 0; 3546 } 3547 3548 static int intel_pt_queue_data(struct perf_session *session, 3549 struct perf_sample *sample, 3550 union perf_event *event, u64 data_offset) 3551 { 3552 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3553 auxtrace); 3554 u64 timestamp; 3555 3556 if (event) { 3557 return auxtrace_queues__add_event(&pt->queues, session, event, 3558 data_offset, NULL); 3559 } 3560 3561 if (sample->time && sample->time != (u64)-1) 3562 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 3563 else 3564 timestamp = 0; 3565 3566 return auxtrace_queues__add_sample(&pt->queues, session, sample, 3567 data_offset, timestamp); 3568 } 3569 3570 struct intel_pt_synth { 3571 struct perf_tool dummy_tool; 3572 struct perf_session *session; 3573 }; 3574 3575 static int intel_pt_event_synth(struct perf_tool *tool, 3576 union perf_event *event, 3577 struct perf_sample *sample __maybe_unused, 3578 struct machine *machine __maybe_unused) 3579 { 3580 struct intel_pt_synth *intel_pt_synth = 3581 container_of(tool, struct intel_pt_synth, dummy_tool); 3582 3583 return perf_session__deliver_synth_event(intel_pt_synth->session, event, 3584 NULL); 3585 } 3586 3587 static int intel_pt_synth_event(struct perf_session *session, const char *name, 3588 struct perf_event_attr *attr, u64 id) 3589 { 3590 struct intel_pt_synth intel_pt_synth; 3591 int err; 3592 3593 pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 3594 name, id, (u64)attr->sample_type); 3595 3596 memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); 3597 intel_pt_synth.session = session; 3598 3599 err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, 3600 &id, intel_pt_event_synth); 3601 if (err) 3602 pr_err("%s: failed to synthesize '%s' event type\n", 3603 __func__, name); 3604 3605 return err; 3606 } 3607 3608 static void intel_pt_set_event_name(struct evlist *evlist, u64 id, 3609 const char *name) 3610 { 3611 struct evsel *evsel; 3612 3613 evlist__for_each_entry(evlist, evsel) { 3614 if (evsel->core.id && evsel->core.id[0] == id) { 3615 if (evsel->name) 3616 zfree(&evsel->name); 3617 evsel->name = strdup(name); 3618 break; 3619 } 3620 } 3621 } 3622 3623 static struct evsel *intel_pt_evsel(struct intel_pt *pt, 3624 struct evlist *evlist) 3625 { 3626 struct evsel *evsel; 3627 3628 evlist__for_each_entry(evlist, evsel) { 3629 if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids) 3630 return evsel; 3631 } 3632 3633 return NULL; 3634 } 3635 3636 static int intel_pt_synth_events(struct intel_pt *pt, 3637 struct perf_session *session) 3638 { 3639 struct evlist *evlist = session->evlist; 3640 struct evsel *evsel = intel_pt_evsel(pt, evlist); 3641 struct perf_event_attr attr; 3642 u64 id; 3643 int err; 3644 3645 if (!evsel) { 3646 pr_debug("There are no selected events with Intel Processor Trace data\n"); 3647 return 0; 3648 } 3649 3650 memset(&attr, 0, sizeof(struct perf_event_attr)); 3651 attr.size = sizeof(struct perf_event_attr); 3652 attr.type = PERF_TYPE_HARDWARE; 3653 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 3654 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 3655 PERF_SAMPLE_PERIOD; 3656 if (pt->timeless_decoding) 3657 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 3658 else 3659 attr.sample_type |= PERF_SAMPLE_TIME; 3660 if (!pt->per_cpu_mmaps) 3661 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; 3662 attr.exclude_user = evsel->core.attr.exclude_user; 3663 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 3664 attr.exclude_hv = evsel->core.attr.exclude_hv; 3665 attr.exclude_host = evsel->core.attr.exclude_host; 3666 attr.exclude_guest = evsel->core.attr.exclude_guest; 3667 attr.sample_id_all = evsel->core.attr.sample_id_all; 3668 attr.read_format = evsel->core.attr.read_format; 3669 3670 id = evsel->core.id[0] + 1000000000; 3671 if (!id) 3672 id = 1; 3673 3674 if (pt->synth_opts.branches) { 3675 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 3676 attr.sample_period = 1; 3677 attr.sample_type |= PERF_SAMPLE_ADDR; 3678 err = intel_pt_synth_event(session, "branches", &attr, id); 3679 if (err) 3680 return err; 3681 pt->sample_branches = true; 3682 pt->branches_sample_type = attr.sample_type; 3683 pt->branches_id = id; 3684 id += 1; 3685 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 3686 } 3687 3688 if (pt->synth_opts.callchain) 3689 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 3690 if (pt->synth_opts.last_branch) { 3691 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 3692 /* 3693 * We don't use the hardware index, but the sample generation 3694 * code uses the new format branch_stack with this field, 3695 * so the event attributes must indicate that it's present. 3696 */ 3697 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 3698 } 3699 3700 if (pt->synth_opts.instructions) { 3701 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 3702 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) 3703 attr.sample_period = 3704 intel_pt_ns_to_ticks(pt, pt->synth_opts.period); 3705 else 3706 attr.sample_period = pt->synth_opts.period; 3707 err = intel_pt_synth_event(session, "instructions", &attr, id); 3708 if (err) 3709 return err; 3710 pt->sample_instructions = true; 3711 pt->instructions_sample_type = attr.sample_type; 3712 pt->instructions_id = id; 3713 id += 1; 3714 } 3715 3716 attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; 3717 attr.sample_period = 1; 3718 3719 if (pt->synth_opts.transactions) { 3720 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 3721 err = intel_pt_synth_event(session, "transactions", &attr, id); 3722 if (err) 3723 return err; 3724 pt->sample_transactions = true; 3725 pt->transactions_sample_type = attr.sample_type; 3726 pt->transactions_id = id; 3727 intel_pt_set_event_name(evlist, id, "transactions"); 3728 id += 1; 3729 } 3730 3731 attr.type = PERF_TYPE_SYNTH; 3732 attr.sample_type |= PERF_SAMPLE_RAW; 3733 3734 if (pt->synth_opts.ptwrites) { 3735 attr.config = PERF_SYNTH_INTEL_PTWRITE; 3736 err = intel_pt_synth_event(session, "ptwrite", &attr, id); 3737 if (err) 3738 return err; 3739 pt->sample_ptwrites = true; 3740 pt->ptwrites_sample_type = attr.sample_type; 3741 pt->ptwrites_id = id; 3742 intel_pt_set_event_name(evlist, id, "ptwrite"); 3743 id += 1; 3744 } 3745 3746 if (pt->synth_opts.pwr_events) { 3747 pt->sample_pwr_events = true; 3748 pt->pwr_events_sample_type = attr.sample_type; 3749 3750 attr.config = PERF_SYNTH_INTEL_CBR; 3751 err = intel_pt_synth_event(session, "cbr", &attr, id); 3752 if (err) 3753 return err; 3754 pt->cbr_id = id; 3755 intel_pt_set_event_name(evlist, id, "cbr"); 3756 id += 1; 3757 3758 attr.config = PERF_SYNTH_INTEL_PSB; 3759 err = intel_pt_synth_event(session, "psb", &attr, id); 3760 if (err) 3761 return err; 3762 pt->psb_id = id; 3763 intel_pt_set_event_name(evlist, id, "psb"); 3764 id += 1; 3765 } 3766 3767 if (pt->synth_opts.pwr_events && (evsel->core.attr.config & INTEL_PT_CFG_PWR_EVT_EN)) { 3768 attr.config = PERF_SYNTH_INTEL_MWAIT; 3769 err = intel_pt_synth_event(session, "mwait", &attr, id); 3770 if (err) 3771 return err; 3772 pt->mwait_id = id; 3773 intel_pt_set_event_name(evlist, id, "mwait"); 3774 id += 1; 3775 3776 attr.config = PERF_SYNTH_INTEL_PWRE; 3777 err = intel_pt_synth_event(session, "pwre", &attr, id); 3778 if (err) 3779 return err; 3780 pt->pwre_id = id; 3781 intel_pt_set_event_name(evlist, id, "pwre"); 3782 id += 1; 3783 3784 attr.config = PERF_SYNTH_INTEL_EXSTOP; 3785 err = intel_pt_synth_event(session, "exstop", &attr, id); 3786 if (err) 3787 return err; 3788 pt->exstop_id = id; 3789 intel_pt_set_event_name(evlist, id, "exstop"); 3790 id += 1; 3791 3792 attr.config = PERF_SYNTH_INTEL_PWRX; 3793 err = intel_pt_synth_event(session, "pwrx", &attr, id); 3794 if (err) 3795 return err; 3796 pt->pwrx_id = id; 3797 intel_pt_set_event_name(evlist, id, "pwrx"); 3798 id += 1; 3799 } 3800 3801 if (pt->synth_opts.intr_events && (evsel->core.attr.config & INTEL_PT_CFG_EVT_EN)) { 3802 attr.config = PERF_SYNTH_INTEL_EVT; 3803 err = intel_pt_synth_event(session, "evt", &attr, id); 3804 if (err) 3805 return err; 3806 pt->evt_sample_type = attr.sample_type; 3807 pt->evt_id = id; 3808 intel_pt_set_event_name(evlist, id, "evt"); 3809 id += 1; 3810 } 3811 3812 if (pt->synth_opts.intr_events && pt->cap_event_trace) { 3813 attr.config = PERF_SYNTH_INTEL_IFLAG_CHG; 3814 err = intel_pt_synth_event(session, "iflag", &attr, id); 3815 if (err) 3816 return err; 3817 pt->iflag_chg_sample_type = attr.sample_type; 3818 pt->iflag_chg_id = id; 3819 intel_pt_set_event_name(evlist, id, "iflag"); 3820 id += 1; 3821 } 3822 3823 return 0; 3824 } 3825 3826 static void intel_pt_setup_pebs_events(struct intel_pt *pt) 3827 { 3828 struct evsel *evsel; 3829 3830 if (!pt->synth_opts.other_events) 3831 return; 3832 3833 evlist__for_each_entry(pt->session->evlist, evsel) { 3834 if (evsel->core.attr.aux_output && evsel->core.id) { 3835 if (pt->single_pebs) { 3836 pt->single_pebs = false; 3837 return; 3838 } 3839 pt->single_pebs = true; 3840 pt->sample_pebs = true; 3841 pt->pebs_evsel = evsel; 3842 } 3843 } 3844 } 3845 3846 static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) 3847 { 3848 struct evsel *evsel; 3849 3850 evlist__for_each_entry_reverse(evlist, evsel) { 3851 const char *name = evsel__name(evsel); 3852 3853 if (!strcmp(name, "sched:sched_switch")) 3854 return evsel; 3855 } 3856 3857 return NULL; 3858 } 3859 3860 static bool intel_pt_find_switch(struct evlist *evlist) 3861 { 3862 struct evsel *evsel; 3863 3864 evlist__for_each_entry(evlist, evsel) { 3865 if (evsel->core.attr.context_switch) 3866 return true; 3867 } 3868 3869 return false; 3870 } 3871 3872 static int intel_pt_perf_config(const char *var, const char *value, void *data) 3873 { 3874 struct intel_pt *pt = data; 3875 3876 if (!strcmp(var, "intel-pt.mispred-all")) 3877 pt->mispred_all = perf_config_bool(var, value); 3878 3879 if (!strcmp(var, "intel-pt.max-loops")) 3880 perf_config_int(&pt->max_loops, var, value); 3881 3882 return 0; 3883 } 3884 3885 /* Find least TSC which converts to ns or later */ 3886 static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) 3887 { 3888 u64 tsc, tm; 3889 3890 tsc = perf_time_to_tsc(ns, &pt->tc); 3891 3892 while (1) { 3893 tm = tsc_to_perf_time(tsc, &pt->tc); 3894 if (tm < ns) 3895 break; 3896 tsc -= 1; 3897 } 3898 3899 while (tm < ns) 3900 tm = tsc_to_perf_time(++tsc, &pt->tc); 3901 3902 return tsc; 3903 } 3904 3905 /* Find greatest TSC which converts to ns or earlier */ 3906 static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) 3907 { 3908 u64 tsc, tm; 3909 3910 tsc = perf_time_to_tsc(ns, &pt->tc); 3911 3912 while (1) { 3913 tm = tsc_to_perf_time(tsc, &pt->tc); 3914 if (tm > ns) 3915 break; 3916 tsc += 1; 3917 } 3918 3919 while (tm > ns) 3920 tm = tsc_to_perf_time(--tsc, &pt->tc); 3921 3922 return tsc; 3923 } 3924 3925 static int intel_pt_setup_time_ranges(struct intel_pt *pt, 3926 struct itrace_synth_opts *opts) 3927 { 3928 struct perf_time_interval *p = opts->ptime_range; 3929 int n = opts->range_num; 3930 int i; 3931 3932 if (!n || !p || pt->timeless_decoding) 3933 return 0; 3934 3935 pt->time_ranges = calloc(n, sizeof(struct range)); 3936 if (!pt->time_ranges) 3937 return -ENOMEM; 3938 3939 pt->range_cnt = n; 3940 3941 intel_pt_log("%s: %u range(s)\n", __func__, n); 3942 3943 for (i = 0; i < n; i++) { 3944 struct range *r = &pt->time_ranges[i]; 3945 u64 ts = p[i].start; 3946 u64 te = p[i].end; 3947 3948 /* 3949 * Take care to ensure the TSC range matches the perf-time range 3950 * when converted back to perf-time. 3951 */ 3952 r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; 3953 r->end = te ? intel_pt_tsc_end(te, pt) : 0; 3954 3955 intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", 3956 i, ts, te); 3957 intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", 3958 i, r->start, r->end); 3959 } 3960 3961 return 0; 3962 } 3963 3964 static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) 3965 { 3966 struct intel_pt_vmcs_info *vmcs_info; 3967 u64 tsc_offset, vmcs; 3968 char *p = *args; 3969 3970 errno = 0; 3971 3972 p = skip_spaces(p); 3973 if (!*p) 3974 return 1; 3975 3976 tsc_offset = strtoull(p, &p, 0); 3977 if (errno) 3978 return -errno; 3979 p = skip_spaces(p); 3980 if (*p != ':') { 3981 pt->dflt_tsc_offset = tsc_offset; 3982 *args = p; 3983 return 0; 3984 } 3985 p += 1; 3986 while (1) { 3987 vmcs = strtoull(p, &p, 0); 3988 if (errno) 3989 return -errno; 3990 if (!vmcs) 3991 return -EINVAL; 3992 vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset); 3993 if (!vmcs_info) 3994 return -ENOMEM; 3995 p = skip_spaces(p); 3996 if (*p != ',') 3997 break; 3998 p += 1; 3999 } 4000 *args = p; 4001 return 0; 4002 } 4003 4004 static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) 4005 { 4006 char *args = pt->synth_opts.vm_tm_corr_args; 4007 int ret; 4008 4009 if (!args) 4010 return 0; 4011 4012 do { 4013 ret = intel_pt_parse_vm_tm_corr_arg(pt, &args); 4014 } while (!ret); 4015 4016 if (ret < 0) { 4017 pr_err("Failed to parse VM Time Correlation options\n"); 4018 return ret; 4019 } 4020 4021 return 0; 4022 } 4023 4024 static const char * const intel_pt_info_fmts[] = { 4025 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 4026 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 4027 [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", 4028 [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", 4029 [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", 4030 [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", 4031 [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", 4032 [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", 4033 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", 4034 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", 4035 [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", 4036 [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", 4037 [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", 4038 [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", 4039 [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n", 4040 [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n", 4041 }; 4042 4043 static void intel_pt_print_info(__u64 *arr, int start, int finish) 4044 { 4045 int i; 4046 4047 if (!dump_trace) 4048 return; 4049 4050 for (i = start; i <= finish; i++) 4051 fprintf(stdout, intel_pt_info_fmts[i], arr[i]); 4052 } 4053 4054 static void intel_pt_print_info_str(const char *name, const char *str) 4055 { 4056 if (!dump_trace) 4057 return; 4058 4059 fprintf(stdout, " %-20s%s\n", name, str ? str : ""); 4060 } 4061 4062 static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos) 4063 { 4064 return auxtrace_info->header.size >= 4065 sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1)); 4066 } 4067 4068 int intel_pt_process_auxtrace_info(union perf_event *event, 4069 struct perf_session *session) 4070 { 4071 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 4072 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; 4073 struct intel_pt *pt; 4074 void *info_end; 4075 __u64 *info; 4076 int err; 4077 4078 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 4079 min_sz) 4080 return -EINVAL; 4081 4082 pt = zalloc(sizeof(struct intel_pt)); 4083 if (!pt) 4084 return -ENOMEM; 4085 4086 pt->vmcs_info = RB_ROOT; 4087 4088 addr_filters__init(&pt->filts); 4089 4090 err = perf_config(intel_pt_perf_config, pt); 4091 if (err) 4092 goto err_free; 4093 4094 err = auxtrace_queues__init(&pt->queues); 4095 if (err) 4096 goto err_free; 4097 4098 if (session->itrace_synth_opts->set) { 4099 pt->synth_opts = *session->itrace_synth_opts; 4100 } else { 4101 struct itrace_synth_opts *opts = session->itrace_synth_opts; 4102 4103 itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample); 4104 if (!opts->default_no_sample && !opts->inject) { 4105 pt->synth_opts.branches = false; 4106 pt->synth_opts.callchain = true; 4107 pt->synth_opts.add_callchain = true; 4108 } 4109 pt->synth_opts.thread_stack = opts->thread_stack; 4110 } 4111 4112 if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) 4113 intel_pt_log_set_name(INTEL_PT_PMU_NAME); 4114 4115 pt->session = session; 4116 pt->machine = &session->machines.host; /* No kvm support */ 4117 pt->auxtrace_type = auxtrace_info->type; 4118 pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; 4119 pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; 4120 pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; 4121 pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; 4122 pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; 4123 pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; 4124 pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; 4125 pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; 4126 pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; 4127 pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; 4128 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, 4129 INTEL_PT_PER_CPU_MMAPS); 4130 4131 if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) { 4132 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; 4133 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; 4134 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; 4135 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; 4136 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; 4137 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, 4138 INTEL_PT_CYC_BIT); 4139 } 4140 4141 if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) { 4142 pt->max_non_turbo_ratio = 4143 auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO]; 4144 intel_pt_print_info(&auxtrace_info->priv[0], 4145 INTEL_PT_MAX_NONTURBO_RATIO, 4146 INTEL_PT_MAX_NONTURBO_RATIO); 4147 } 4148 4149 info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; 4150 info_end = (void *)auxtrace_info + auxtrace_info->header.size; 4151 4152 if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) { 4153 size_t len; 4154 4155 len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN]; 4156 intel_pt_print_info(&auxtrace_info->priv[0], 4157 INTEL_PT_FILTER_STR_LEN, 4158 INTEL_PT_FILTER_STR_LEN); 4159 if (len) { 4160 const char *filter = (const char *)info; 4161 4162 len = roundup(len + 1, 8); 4163 info += len >> 3; 4164 if ((void *)info > info_end) { 4165 pr_err("%s: bad filter string length\n", __func__); 4166 err = -EINVAL; 4167 goto err_free_queues; 4168 } 4169 pt->filter = memdup(filter, len); 4170 if (!pt->filter) { 4171 err = -ENOMEM; 4172 goto err_free_queues; 4173 } 4174 if (session->header.needs_swap) 4175 mem_bswap_64(pt->filter, len); 4176 if (pt->filter[len - 1]) { 4177 pr_err("%s: filter string not null terminated\n", __func__); 4178 err = -EINVAL; 4179 goto err_free_queues; 4180 } 4181 err = addr_filters__parse_bare_filter(&pt->filts, 4182 filter); 4183 if (err) 4184 goto err_free_queues; 4185 } 4186 intel_pt_print_info_str("Filter string", pt->filter); 4187 } 4188 4189 if ((void *)info < info_end) { 4190 pt->cap_event_trace = *info++; 4191 if (dump_trace) 4192 fprintf(stdout, " Cap Event Trace %d\n", 4193 pt->cap_event_trace); 4194 } 4195 4196 pt->timeless_decoding = intel_pt_timeless_decoding(pt); 4197 if (pt->timeless_decoding && !pt->tc.time_mult) 4198 pt->tc.time_mult = 1; 4199 pt->have_tsc = intel_pt_have_tsc(pt); 4200 pt->sampling_mode = intel_pt_sampling_mode(pt); 4201 pt->est_tsc = !pt->timeless_decoding; 4202 4203 if (pt->synth_opts.vm_time_correlation) { 4204 if (pt->timeless_decoding) { 4205 pr_err("Intel PT has no time information for VM Time Correlation\n"); 4206 err = -EINVAL; 4207 goto err_free_queues; 4208 } 4209 if (session->itrace_synth_opts->ptime_range) { 4210 pr_err("Time ranges cannot be specified with VM Time Correlation\n"); 4211 err = -EINVAL; 4212 goto err_free_queues; 4213 } 4214 /* Currently TSC Offset is calculated using MTC packets */ 4215 if (!intel_pt_have_mtc(pt)) { 4216 pr_err("MTC packets must have been enabled for VM Time Correlation\n"); 4217 err = -EINVAL; 4218 goto err_free_queues; 4219 } 4220 err = intel_pt_parse_vm_tm_corr_args(pt); 4221 if (err) 4222 goto err_free_queues; 4223 } 4224 4225 pt->unknown_thread = thread__new(999999999, 999999999); 4226 if (!pt->unknown_thread) { 4227 err = -ENOMEM; 4228 goto err_free_queues; 4229 } 4230 4231 /* 4232 * Since this thread will not be kept in any rbtree not in a 4233 * list, initialize its list node so that at thread__put() the 4234 * current thread lifetime assumption is kept and we don't segfault 4235 * at list_del_init(). 4236 */ 4237 INIT_LIST_HEAD(&pt->unknown_thread->node); 4238 4239 err = thread__set_comm(pt->unknown_thread, "unknown", 0); 4240 if (err) 4241 goto err_delete_thread; 4242 if (thread__init_maps(pt->unknown_thread, pt->machine)) { 4243 err = -ENOMEM; 4244 goto err_delete_thread; 4245 } 4246 4247 pt->auxtrace.process_event = intel_pt_process_event; 4248 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; 4249 pt->auxtrace.queue_data = intel_pt_queue_data; 4250 pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample; 4251 pt->auxtrace.flush_events = intel_pt_flush; 4252 pt->auxtrace.free_events = intel_pt_free_events; 4253 pt->auxtrace.free = intel_pt_free; 4254 pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; 4255 session->auxtrace = &pt->auxtrace; 4256 4257 if (dump_trace) 4258 return 0; 4259 4260 if (pt->have_sched_switch == 1) { 4261 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); 4262 if (!pt->switch_evsel) { 4263 pr_err("%s: missing sched_switch event\n", __func__); 4264 err = -EINVAL; 4265 goto err_delete_thread; 4266 } 4267 } else if (pt->have_sched_switch == 2 && 4268 !intel_pt_find_switch(session->evlist)) { 4269 pr_err("%s: missing context_switch attribute flag\n", __func__); 4270 err = -EINVAL; 4271 goto err_delete_thread; 4272 } 4273 4274 if (pt->synth_opts.log) 4275 intel_pt_log_enable(); 4276 4277 /* Maximum non-turbo ratio is TSC freq / 100 MHz */ 4278 if (pt->tc.time_mult) { 4279 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); 4280 4281 if (!pt->max_non_turbo_ratio) 4282 pt->max_non_turbo_ratio = 4283 (tsc_freq + 50000000) / 100000000; 4284 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); 4285 intel_pt_log("Maximum non-turbo ratio %u\n", 4286 pt->max_non_turbo_ratio); 4287 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; 4288 } 4289 4290 err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); 4291 if (err) 4292 goto err_delete_thread; 4293 4294 if (pt->synth_opts.calls) 4295 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | 4296 PERF_IP_FLAG_TRACE_END; 4297 if (pt->synth_opts.returns) 4298 pt->branches_filter |= PERF_IP_FLAG_RETURN | 4299 PERF_IP_FLAG_TRACE_BEGIN; 4300 4301 if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && 4302 !symbol_conf.use_callchain) { 4303 symbol_conf.use_callchain = true; 4304 if (callchain_register_param(&callchain_param) < 0) { 4305 symbol_conf.use_callchain = false; 4306 pt->synth_opts.callchain = false; 4307 pt->synth_opts.add_callchain = false; 4308 } 4309 } 4310 4311 if (pt->synth_opts.add_callchain) { 4312 err = intel_pt_callchain_init(pt); 4313 if (err) 4314 goto err_delete_thread; 4315 } 4316 4317 if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { 4318 pt->br_stack_sz = pt->synth_opts.last_branch_sz; 4319 pt->br_stack_sz_plus = pt->br_stack_sz; 4320 } 4321 4322 if (pt->synth_opts.add_last_branch) { 4323 err = intel_pt_br_stack_init(pt); 4324 if (err) 4325 goto err_delete_thread; 4326 /* 4327 * Additional branch stack size to cater for tracing from the 4328 * actual sample ip to where the sample time is recorded. 4329 * Measured at about 200 branches, but generously set to 1024. 4330 * If kernel space is not being traced, then add just 1 for the 4331 * branch to kernel space. 4332 */ 4333 if (intel_pt_tracing_kernel(pt)) 4334 pt->br_stack_sz_plus += 1024; 4335 else 4336 pt->br_stack_sz_plus += 1; 4337 } 4338 4339 pt->use_thread_stack = pt->synth_opts.callchain || 4340 pt->synth_opts.add_callchain || 4341 pt->synth_opts.thread_stack || 4342 pt->synth_opts.last_branch || 4343 pt->synth_opts.add_last_branch; 4344 4345 pt->callstack = pt->synth_opts.callchain || 4346 pt->synth_opts.add_callchain || 4347 pt->synth_opts.thread_stack; 4348 4349 err = intel_pt_synth_events(pt, session); 4350 if (err) 4351 goto err_delete_thread; 4352 4353 intel_pt_setup_pebs_events(pt); 4354 4355 if (pt->sampling_mode || list_empty(&session->auxtrace_index)) 4356 err = auxtrace_queue_data(session, true, true); 4357 else 4358 err = auxtrace_queues__process_index(&pt->queues, session); 4359 if (err) 4360 goto err_delete_thread; 4361 4362 if (pt->queues.populated) 4363 pt->data_queued = true; 4364 4365 if (pt->timeless_decoding) 4366 pr_debug2("Intel PT decoding without timestamps\n"); 4367 4368 return 0; 4369 4370 err_delete_thread: 4371 zfree(&pt->chain); 4372 thread__zput(pt->unknown_thread); 4373 err_free_queues: 4374 intel_pt_log_disable(); 4375 auxtrace_queues__free(&pt->queues); 4376 session->auxtrace = NULL; 4377 err_free: 4378 addr_filters__exit(&pt->filts); 4379 zfree(&pt->filter); 4380 zfree(&pt->time_ranges); 4381 free(pt); 4382 return err; 4383 } 4384