1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_pt.c: Intel Processor Trace support 4 * Copyright (c) 2013-2015, Intel Corporation. 5 */ 6 7 #include <errno.h> 8 #include <stdbool.h> 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/bitops.h> 12 #include <linux/log2.h> 13 #include <cpuid.h> 14 15 #include "../../perf.h" 16 #include "../../util/session.h" 17 #include "../../util/event.h" 18 #include "../../util/evlist.h" 19 #include "../../util/evsel.h" 20 #include "../../util/cpumap.h" 21 #include <subcmd/parse-options.h> 22 #include "../../util/parse-events.h" 23 #include "../../util/pmu.h" 24 #include "../../util/debug.h" 25 #include "../../util/auxtrace.h" 26 #include "../../util/tsc.h" 27 #include "../../util/intel-pt.h" 28 29 #define KiB(x) ((x) * 1024) 30 #define MiB(x) ((x) * 1024 * 1024) 31 #define KiB_MASK(x) (KiB(x) - 1) 32 #define MiB_MASK(x) (MiB(x) - 1) 33 34 #define INTEL_PT_PSB_PERIOD_NEAR 256 35 36 struct intel_pt_snapshot_ref { 37 void *ref_buf; 38 size_t ref_offset; 39 bool wrapped; 40 }; 41 42 struct intel_pt_recording { 43 struct auxtrace_record itr; 44 struct perf_pmu *intel_pt_pmu; 45 int have_sched_switch; 46 struct perf_evlist *evlist; 47 bool snapshot_mode; 48 bool snapshot_init_done; 49 size_t snapshot_size; 50 size_t snapshot_ref_buf_size; 51 int snapshot_ref_cnt; 52 struct intel_pt_snapshot_ref *snapshot_refs; 53 size_t priv_size; 54 }; 55 56 static int intel_pt_parse_terms_with_default(struct list_head *formats, 57 const char *str, 58 u64 *config) 59 { 60 struct list_head *terms; 61 struct perf_event_attr attr = { .size = 0, }; 62 int err; 63 64 terms = malloc(sizeof(struct list_head)); 65 if (!terms) 66 return -ENOMEM; 67 68 INIT_LIST_HEAD(terms); 69 70 err = parse_events_terms(terms, str); 71 if (err) 72 goto out_free; 73 74 attr.config = *config; 75 err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); 76 if (err) 77 goto out_free; 78 79 *config = attr.config; 80 out_free: 81 parse_events_terms__delete(terms); 82 return err; 83 } 84 85 static int intel_pt_parse_terms(struct list_head *formats, const char *str, 86 u64 *config) 87 { 88 *config = 0; 89 return intel_pt_parse_terms_with_default(formats, str, config); 90 } 91 92 static u64 intel_pt_masked_bits(u64 mask, u64 bits) 93 { 94 const u64 top_bit = 1ULL << 63; 95 u64 res = 0; 96 int i; 97 98 for (i = 0; i < 64; i++) { 99 if (mask & top_bit) { 100 res <<= 1; 101 if (bits & top_bit) 102 res |= 1; 103 } 104 mask <<= 1; 105 bits <<= 1; 106 } 107 108 return res; 109 } 110 111 static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, 112 struct perf_evlist *evlist, u64 *res) 113 { 114 struct perf_evsel *evsel; 115 u64 mask; 116 117 *res = 0; 118 119 mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); 120 if (!mask) 121 return -EINVAL; 122 123 evlist__for_each_entry(evlist, evsel) { 124 if (evsel->attr.type == intel_pt_pmu->type) { 125 *res = intel_pt_masked_bits(mask, evsel->attr.config); 126 return 0; 127 } 128 } 129 130 return -EINVAL; 131 } 132 133 static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, 134 struct perf_evlist *evlist) 135 { 136 u64 val; 137 int err, topa_multiple_entries; 138 size_t psb_period; 139 140 if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", 141 "%d", &topa_multiple_entries) != 1) 142 topa_multiple_entries = 0; 143 144 /* 145 * Use caps/topa_multiple_entries to indicate early hardware that had 146 * extra frequent PSBs. 147 */ 148 if (!topa_multiple_entries) { 149 psb_period = 256; 150 goto out; 151 } 152 153 err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); 154 if (err) 155 val = 0; 156 157 psb_period = 1 << (val + 11); 158 out: 159 pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); 160 return psb_period; 161 } 162 163 static int intel_pt_pick_bit(int bits, int target) 164 { 165 int pos, pick = -1; 166 167 for (pos = 0; bits; bits >>= 1, pos++) { 168 if (bits & 1) { 169 if (pos <= target || pick < 0) 170 pick = pos; 171 if (pos >= target) 172 break; 173 } 174 } 175 176 return pick; 177 } 178 179 static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) 180 { 181 char buf[256]; 182 int mtc, mtc_periods = 0, mtc_period; 183 int psb_cyc, psb_periods, psb_period; 184 int pos = 0; 185 u64 config; 186 char c; 187 188 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); 189 190 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", 191 &mtc) != 1) 192 mtc = 1; 193 194 if (mtc) { 195 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", 196 &mtc_periods) != 1) 197 mtc_periods = 0; 198 if (mtc_periods) { 199 mtc_period = intel_pt_pick_bit(mtc_periods, 3); 200 pos += scnprintf(buf + pos, sizeof(buf) - pos, 201 ",mtc,mtc_period=%d", mtc_period); 202 } 203 } 204 205 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", 206 &psb_cyc) != 1) 207 psb_cyc = 1; 208 209 if (psb_cyc && mtc_periods) { 210 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", 211 &psb_periods) != 1) 212 psb_periods = 0; 213 if (psb_periods) { 214 psb_period = intel_pt_pick_bit(psb_periods, 3); 215 pos += scnprintf(buf + pos, sizeof(buf) - pos, 216 ",psb_period=%d", psb_period); 217 } 218 } 219 220 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && 221 perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) 222 pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); 223 224 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); 225 226 intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); 227 228 return config; 229 } 230 231 static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, 232 struct record_opts *opts, 233 const char *str) 234 { 235 struct intel_pt_recording *ptr = 236 container_of(itr, struct intel_pt_recording, itr); 237 unsigned long long snapshot_size = 0; 238 char *endptr; 239 240 if (str) { 241 snapshot_size = strtoull(str, &endptr, 0); 242 if (*endptr || snapshot_size > SIZE_MAX) 243 return -1; 244 } 245 246 opts->auxtrace_snapshot_mode = true; 247 opts->auxtrace_snapshot_size = snapshot_size; 248 249 ptr->snapshot_size = snapshot_size; 250 251 return 0; 252 } 253 254 struct perf_event_attr * 255 intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) 256 { 257 struct perf_event_attr *attr; 258 259 attr = zalloc(sizeof(struct perf_event_attr)); 260 if (!attr) 261 return NULL; 262 263 attr->config = intel_pt_default_config(intel_pt_pmu); 264 265 intel_pt_pmu->selectable = true; 266 267 return attr; 268 } 269 270 static const char *intel_pt_find_filter(struct perf_evlist *evlist, 271 struct perf_pmu *intel_pt_pmu) 272 { 273 struct perf_evsel *evsel; 274 275 evlist__for_each_entry(evlist, evsel) { 276 if (evsel->attr.type == intel_pt_pmu->type) 277 return evsel->filter; 278 } 279 280 return NULL; 281 } 282 283 static size_t intel_pt_filter_bytes(const char *filter) 284 { 285 size_t len = filter ? strlen(filter) : 0; 286 287 return len ? roundup(len + 1, 8) : 0; 288 } 289 290 static size_t 291 intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist) 292 { 293 struct intel_pt_recording *ptr = 294 container_of(itr, struct intel_pt_recording, itr); 295 const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu); 296 297 ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + 298 intel_pt_filter_bytes(filter); 299 300 return ptr->priv_size; 301 } 302 303 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) 304 { 305 unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; 306 307 __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); 308 *n = ebx; 309 *d = eax; 310 } 311 312 static int intel_pt_info_fill(struct auxtrace_record *itr, 313 struct perf_session *session, 314 struct auxtrace_info_event *auxtrace_info, 315 size_t priv_size) 316 { 317 struct intel_pt_recording *ptr = 318 container_of(itr, struct intel_pt_recording, itr); 319 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 320 struct perf_event_mmap_page *pc; 321 struct perf_tsc_conversion tc = { .time_mult = 0, }; 322 bool cap_user_time_zero = false, per_cpu_mmaps; 323 u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; 324 u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; 325 unsigned long max_non_turbo_ratio; 326 size_t filter_str_len; 327 const char *filter; 328 u64 *info; 329 int err; 330 331 if (priv_size != ptr->priv_size) 332 return -EINVAL; 333 334 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 335 intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", 336 &noretcomp_bit); 337 intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); 338 mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, 339 "mtc_period"); 340 intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); 341 342 intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); 343 344 if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", 345 "%lu", &max_non_turbo_ratio) != 1) 346 max_non_turbo_ratio = 0; 347 348 filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); 349 filter_str_len = filter ? strlen(filter) : 0; 350 351 if (!session->evlist->nr_mmaps) 352 return -EINVAL; 353 354 pc = session->evlist->mmap[0].base; 355 if (pc) { 356 err = perf_read_tsc_conversion(pc, &tc); 357 if (err) { 358 if (err != -EOPNOTSUPP) 359 return err; 360 } else { 361 cap_user_time_zero = tc.time_mult != 0; 362 } 363 if (!cap_user_time_zero) 364 ui__warning("Intel Processor Trace: TSC not available\n"); 365 } 366 367 per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); 368 369 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; 370 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; 371 auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; 372 auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; 373 auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; 374 auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; 375 auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; 376 auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; 377 auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; 378 auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; 379 auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; 380 auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; 381 auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; 382 auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; 383 auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; 384 auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; 385 auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio; 386 auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len; 387 388 info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; 389 390 if (filter_str_len) { 391 size_t len = intel_pt_filter_bytes(filter); 392 393 strncpy((char *)info, filter, len); 394 info += len >> 3; 395 } 396 397 return 0; 398 } 399 400 static int intel_pt_track_switches(struct perf_evlist *evlist) 401 { 402 const char *sched_switch = "sched:sched_switch"; 403 struct perf_evsel *evsel; 404 int err; 405 406 if (!perf_evlist__can_select_event(evlist, sched_switch)) 407 return -EPERM; 408 409 err = parse_events(evlist, sched_switch, NULL); 410 if (err) { 411 pr_debug2("%s: failed to parse %s, error %d\n", 412 __func__, sched_switch, err); 413 return err; 414 } 415 416 evsel = perf_evlist__last(evlist); 417 418 perf_evsel__set_sample_bit(evsel, CPU); 419 perf_evsel__set_sample_bit(evsel, TIME); 420 421 evsel->system_wide = true; 422 evsel->no_aux_samples = true; 423 evsel->immediate = true; 424 425 return 0; 426 } 427 428 static void intel_pt_valid_str(char *str, size_t len, u64 valid) 429 { 430 unsigned int val, last = 0, state = 1; 431 int p = 0; 432 433 str[0] = '\0'; 434 435 for (val = 0; val <= 64; val++, valid >>= 1) { 436 if (valid & 1) { 437 last = val; 438 switch (state) { 439 case 0: 440 p += scnprintf(str + p, len - p, ","); 441 /* Fall through */ 442 case 1: 443 p += scnprintf(str + p, len - p, "%u", val); 444 state = 2; 445 break; 446 case 2: 447 state = 3; 448 break; 449 case 3: 450 state = 4; 451 break; 452 default: 453 break; 454 } 455 } else { 456 switch (state) { 457 case 3: 458 p += scnprintf(str + p, len - p, ",%u", last); 459 state = 0; 460 break; 461 case 4: 462 p += scnprintf(str + p, len - p, "-%u", last); 463 state = 0; 464 break; 465 default: 466 break; 467 } 468 if (state != 1) 469 state = 0; 470 } 471 } 472 } 473 474 static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, 475 const char *caps, const char *name, 476 const char *supported, u64 config) 477 { 478 char valid_str[256]; 479 unsigned int shift; 480 unsigned long long valid; 481 u64 bits; 482 int ok; 483 484 if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) 485 valid = 0; 486 487 if (supported && 488 perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) 489 valid = 0; 490 491 valid |= 1; 492 493 bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); 494 495 config &= bits; 496 497 for (shift = 0; bits && !(bits & 1); shift++) 498 bits >>= 1; 499 500 config >>= shift; 501 502 if (config > 63) 503 goto out_err; 504 505 if (valid & (1 << config)) 506 return 0; 507 out_err: 508 intel_pt_valid_str(valid_str, sizeof(valid_str), valid); 509 pr_err("Invalid %s for %s. Valid values are: %s\n", 510 name, INTEL_PT_PMU_NAME, valid_str); 511 return -EINVAL; 512 } 513 514 static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, 515 struct perf_evsel *evsel) 516 { 517 int err; 518 char c; 519 520 if (!evsel) 521 return 0; 522 523 /* 524 * If supported, force pass-through config term (pt=1) even if user 525 * sets pt=0, which avoids senseless kernel errors. 526 */ 527 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && 528 !(evsel->attr.config & 1)) { 529 pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); 530 evsel->attr.config |= 1; 531 } 532 533 err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", 534 "cyc_thresh", "caps/psb_cyc", 535 evsel->attr.config); 536 if (err) 537 return err; 538 539 err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", 540 "mtc_period", "caps/mtc", 541 evsel->attr.config); 542 if (err) 543 return err; 544 545 return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", 546 "psb_period", "caps/psb_cyc", 547 evsel->attr.config); 548 } 549 550 static int intel_pt_recording_options(struct auxtrace_record *itr, 551 struct perf_evlist *evlist, 552 struct record_opts *opts) 553 { 554 struct intel_pt_recording *ptr = 555 container_of(itr, struct intel_pt_recording, itr); 556 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 557 bool have_timing_info, need_immediate = false; 558 struct perf_evsel *evsel, *intel_pt_evsel = NULL; 559 const struct cpu_map *cpus = evlist->cpus; 560 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; 561 u64 tsc_bit; 562 int err; 563 564 ptr->evlist = evlist; 565 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 566 567 evlist__for_each_entry(evlist, evsel) { 568 if (evsel->attr.type == intel_pt_pmu->type) { 569 if (intel_pt_evsel) { 570 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); 571 return -EINVAL; 572 } 573 evsel->attr.freq = 0; 574 evsel->attr.sample_period = 1; 575 intel_pt_evsel = evsel; 576 opts->full_auxtrace = true; 577 } 578 } 579 580 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { 581 pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); 582 return -EINVAL; 583 } 584 585 if (opts->use_clockid) { 586 pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); 587 return -EINVAL; 588 } 589 590 if (!opts->full_auxtrace) 591 return 0; 592 593 err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); 594 if (err) 595 return err; 596 597 /* Set default sizes for snapshot mode */ 598 if (opts->auxtrace_snapshot_mode) { 599 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); 600 601 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 602 if (privileged) { 603 opts->auxtrace_mmap_pages = MiB(4) / page_size; 604 } else { 605 opts->auxtrace_mmap_pages = KiB(128) / page_size; 606 if (opts->mmap_pages == UINT_MAX) 607 opts->mmap_pages = KiB(256) / page_size; 608 } 609 } else if (!opts->auxtrace_mmap_pages && !privileged && 610 opts->mmap_pages == UINT_MAX) { 611 opts->mmap_pages = KiB(256) / page_size; 612 } 613 if (!opts->auxtrace_snapshot_size) 614 opts->auxtrace_snapshot_size = 615 opts->auxtrace_mmap_pages * (size_t)page_size; 616 if (!opts->auxtrace_mmap_pages) { 617 size_t sz = opts->auxtrace_snapshot_size; 618 619 sz = round_up(sz, page_size) / page_size; 620 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 621 } 622 if (opts->auxtrace_snapshot_size > 623 opts->auxtrace_mmap_pages * (size_t)page_size) { 624 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 625 opts->auxtrace_snapshot_size, 626 opts->auxtrace_mmap_pages * (size_t)page_size); 627 return -EINVAL; 628 } 629 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 630 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 631 return -EINVAL; 632 } 633 pr_debug2("Intel PT snapshot size: %zu\n", 634 opts->auxtrace_snapshot_size); 635 if (psb_period && 636 opts->auxtrace_snapshot_size <= psb_period + 637 INTEL_PT_PSB_PERIOD_NEAR) 638 ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", 639 opts->auxtrace_snapshot_size, psb_period); 640 } 641 642 /* Set default sizes for full trace mode */ 643 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { 644 if (privileged) { 645 opts->auxtrace_mmap_pages = MiB(4) / page_size; 646 } else { 647 opts->auxtrace_mmap_pages = KiB(128) / page_size; 648 if (opts->mmap_pages == UINT_MAX) 649 opts->mmap_pages = KiB(256) / page_size; 650 } 651 } 652 653 /* Validate auxtrace_mmap_pages */ 654 if (opts->auxtrace_mmap_pages) { 655 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 656 size_t min_sz; 657 658 if (opts->auxtrace_snapshot_mode) 659 min_sz = KiB(4); 660 else 661 min_sz = KiB(8); 662 663 if (sz < min_sz || !is_power_of_2(sz)) { 664 pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", 665 min_sz / 1024); 666 return -EINVAL; 667 } 668 } 669 670 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 671 672 if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) 673 have_timing_info = true; 674 else 675 have_timing_info = false; 676 677 /* 678 * Per-cpu recording needs sched_switch events to distinguish different 679 * threads. 680 */ 681 if (have_timing_info && !cpu_map__empty(cpus)) { 682 if (perf_can_record_switch_events()) { 683 bool cpu_wide = !target__none(&opts->target) && 684 !target__has_task(&opts->target); 685 686 if (!cpu_wide && perf_can_record_cpu_wide()) { 687 struct perf_evsel *switch_evsel; 688 689 err = parse_events(evlist, "dummy:u", NULL); 690 if (err) 691 return err; 692 693 switch_evsel = perf_evlist__last(evlist); 694 695 switch_evsel->attr.freq = 0; 696 switch_evsel->attr.sample_period = 1; 697 switch_evsel->attr.context_switch = 1; 698 699 switch_evsel->system_wide = true; 700 switch_evsel->no_aux_samples = true; 701 switch_evsel->immediate = true; 702 703 perf_evsel__set_sample_bit(switch_evsel, TID); 704 perf_evsel__set_sample_bit(switch_evsel, TIME); 705 perf_evsel__set_sample_bit(switch_evsel, CPU); 706 perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); 707 708 opts->record_switch_events = false; 709 ptr->have_sched_switch = 3; 710 } else { 711 opts->record_switch_events = true; 712 need_immediate = true; 713 if (cpu_wide) 714 ptr->have_sched_switch = 3; 715 else 716 ptr->have_sched_switch = 2; 717 } 718 } else { 719 err = intel_pt_track_switches(evlist); 720 if (err == -EPERM) 721 pr_debug2("Unable to select sched:sched_switch\n"); 722 else if (err) 723 return err; 724 else 725 ptr->have_sched_switch = 1; 726 } 727 } 728 729 if (intel_pt_evsel) { 730 /* 731 * To obtain the auxtrace buffer file descriptor, the auxtrace 732 * event must come first. 733 */ 734 perf_evlist__to_front(evlist, intel_pt_evsel); 735 /* 736 * In the case of per-cpu mmaps, we need the CPU on the 737 * AUX event. 738 */ 739 if (!cpu_map__empty(cpus)) 740 perf_evsel__set_sample_bit(intel_pt_evsel, CPU); 741 } 742 743 /* Add dummy event to keep tracking */ 744 if (opts->full_auxtrace) { 745 struct perf_evsel *tracking_evsel; 746 747 err = parse_events(evlist, "dummy:u", NULL); 748 if (err) 749 return err; 750 751 tracking_evsel = perf_evlist__last(evlist); 752 753 perf_evlist__set_tracking_event(evlist, tracking_evsel); 754 755 tracking_evsel->attr.freq = 0; 756 tracking_evsel->attr.sample_period = 1; 757 758 tracking_evsel->no_aux_samples = true; 759 if (need_immediate) 760 tracking_evsel->immediate = true; 761 762 /* In per-cpu case, always need the time of mmap events etc */ 763 if (!cpu_map__empty(cpus)) { 764 perf_evsel__set_sample_bit(tracking_evsel, TIME); 765 /* And the CPU for switch events */ 766 perf_evsel__set_sample_bit(tracking_evsel, CPU); 767 } 768 perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); 769 } 770 771 /* 772 * Warn the user when we do not have enough information to decode i.e. 773 * per-cpu with no sched_switch (except workload-only). 774 */ 775 if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && 776 !target__none(&opts->target)) 777 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); 778 779 return 0; 780 } 781 782 static int intel_pt_snapshot_start(struct auxtrace_record *itr) 783 { 784 struct intel_pt_recording *ptr = 785 container_of(itr, struct intel_pt_recording, itr); 786 struct perf_evsel *evsel; 787 788 evlist__for_each_entry(ptr->evlist, evsel) { 789 if (evsel->attr.type == ptr->intel_pt_pmu->type) 790 return perf_evsel__disable(evsel); 791 } 792 return -EINVAL; 793 } 794 795 static int intel_pt_snapshot_finish(struct auxtrace_record *itr) 796 { 797 struct intel_pt_recording *ptr = 798 container_of(itr, struct intel_pt_recording, itr); 799 struct perf_evsel *evsel; 800 801 evlist__for_each_entry(ptr->evlist, evsel) { 802 if (evsel->attr.type == ptr->intel_pt_pmu->type) 803 return perf_evsel__enable(evsel); 804 } 805 return -EINVAL; 806 } 807 808 static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) 809 { 810 const size_t sz = sizeof(struct intel_pt_snapshot_ref); 811 int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; 812 struct intel_pt_snapshot_ref *refs; 813 814 if (!new_cnt) 815 new_cnt = 16; 816 817 while (new_cnt <= idx) 818 new_cnt *= 2; 819 820 refs = calloc(new_cnt, sz); 821 if (!refs) 822 return -ENOMEM; 823 824 memcpy(refs, ptr->snapshot_refs, cnt * sz); 825 826 ptr->snapshot_refs = refs; 827 ptr->snapshot_ref_cnt = new_cnt; 828 829 return 0; 830 } 831 832 static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) 833 { 834 int i; 835 836 for (i = 0; i < ptr->snapshot_ref_cnt; i++) 837 zfree(&ptr->snapshot_refs[i].ref_buf); 838 zfree(&ptr->snapshot_refs); 839 } 840 841 static void intel_pt_recording_free(struct auxtrace_record *itr) 842 { 843 struct intel_pt_recording *ptr = 844 container_of(itr, struct intel_pt_recording, itr); 845 846 intel_pt_free_snapshot_refs(ptr); 847 free(ptr); 848 } 849 850 static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, 851 size_t snapshot_buf_size) 852 { 853 size_t ref_buf_size = ptr->snapshot_ref_buf_size; 854 void *ref_buf; 855 856 ref_buf = zalloc(ref_buf_size); 857 if (!ref_buf) 858 return -ENOMEM; 859 860 ptr->snapshot_refs[idx].ref_buf = ref_buf; 861 ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; 862 863 return 0; 864 } 865 866 static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, 867 size_t snapshot_buf_size) 868 { 869 const size_t max_size = 256 * 1024; 870 size_t buf_size = 0, psb_period; 871 872 if (ptr->snapshot_size <= 64 * 1024) 873 return 0; 874 875 psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); 876 if (psb_period) 877 buf_size = psb_period * 2; 878 879 if (!buf_size || buf_size > max_size) 880 buf_size = max_size; 881 882 if (buf_size >= snapshot_buf_size) 883 return 0; 884 885 if (buf_size >= ptr->snapshot_size / 2) 886 return 0; 887 888 return buf_size; 889 } 890 891 static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, 892 size_t snapshot_buf_size) 893 { 894 if (ptr->snapshot_init_done) 895 return 0; 896 897 ptr->snapshot_init_done = true; 898 899 ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, 900 snapshot_buf_size); 901 902 return 0; 903 } 904 905 /** 906 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. 907 * @buf1: first buffer 908 * @compare_size: number of bytes to compare 909 * @buf2: second buffer (a circular buffer) 910 * @offs2: offset in second buffer 911 * @buf2_size: size of second buffer 912 * 913 * The comparison allows for the possibility that the bytes to compare in the 914 * circular buffer are not contiguous. It is assumed that @compare_size <= 915 * @buf2_size. This function returns %false if the bytes are identical, %true 916 * otherwise. 917 */ 918 static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, 919 void *buf2, size_t offs2, size_t buf2_size) 920 { 921 size_t end2 = offs2 + compare_size, part_size; 922 923 if (end2 <= buf2_size) 924 return memcmp(buf1, buf2 + offs2, compare_size); 925 926 part_size = end2 - buf2_size; 927 if (memcmp(buf1, buf2 + offs2, part_size)) 928 return true; 929 930 compare_size -= part_size; 931 932 return memcmp(buf1 + part_size, buf2, compare_size); 933 } 934 935 static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, 936 size_t ref_size, size_t buf_size, 937 void *data, size_t head) 938 { 939 size_t ref_end = ref_offset + ref_size; 940 941 if (ref_end > buf_size) { 942 if (head > ref_offset || head < ref_end - buf_size) 943 return true; 944 } else if (head > ref_offset && head < ref_end) { 945 return true; 946 } 947 948 return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, 949 buf_size); 950 } 951 952 static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, 953 void *data, size_t head) 954 { 955 if (head >= ref_size) { 956 memcpy(ref_buf, data + head - ref_size, ref_size); 957 } else { 958 memcpy(ref_buf, data, head); 959 ref_size -= head; 960 memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); 961 } 962 } 963 964 static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, 965 struct auxtrace_mmap *mm, unsigned char *data, 966 u64 head) 967 { 968 struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; 969 bool wrapped; 970 971 wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, 972 ptr->snapshot_ref_buf_size, mm->len, 973 data, head); 974 975 intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, 976 data, head); 977 978 return wrapped; 979 } 980 981 static bool intel_pt_first_wrap(u64 *data, size_t buf_size) 982 { 983 int i, a, b; 984 985 b = buf_size >> 3; 986 a = b - 512; 987 if (a < 0) 988 a = 0; 989 990 for (i = a; i < b; i++) { 991 if (data[i]) 992 return true; 993 } 994 995 return false; 996 } 997 998 static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, 999 struct auxtrace_mmap *mm, unsigned char *data, 1000 u64 *head, u64 *old) 1001 { 1002 struct intel_pt_recording *ptr = 1003 container_of(itr, struct intel_pt_recording, itr); 1004 bool wrapped; 1005 int err; 1006 1007 pr_debug3("%s: mmap index %d old head %zu new head %zu\n", 1008 __func__, idx, (size_t)*old, (size_t)*head); 1009 1010 err = intel_pt_snapshot_init(ptr, mm->len); 1011 if (err) 1012 goto out_err; 1013 1014 if (idx >= ptr->snapshot_ref_cnt) { 1015 err = intel_pt_alloc_snapshot_refs(ptr, idx); 1016 if (err) 1017 goto out_err; 1018 } 1019 1020 if (ptr->snapshot_ref_buf_size) { 1021 if (!ptr->snapshot_refs[idx].ref_buf) { 1022 err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); 1023 if (err) 1024 goto out_err; 1025 } 1026 wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); 1027 } else { 1028 wrapped = ptr->snapshot_refs[idx].wrapped; 1029 if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { 1030 ptr->snapshot_refs[idx].wrapped = true; 1031 wrapped = true; 1032 } 1033 } 1034 1035 /* 1036 * In full trace mode 'head' continually increases. However in snapshot 1037 * mode 'head' is an offset within the buffer. Here 'old' and 'head' 1038 * are adjusted to match the full trace case which expects that 'old' is 1039 * always less than 'head'. 1040 */ 1041 if (wrapped) { 1042 *old = *head; 1043 *head += mm->len; 1044 } else { 1045 if (mm->mask) 1046 *old &= mm->mask; 1047 else 1048 *old %= mm->len; 1049 if (*old > *head) 1050 *head += mm->len; 1051 } 1052 1053 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", 1054 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); 1055 1056 return 0; 1057 1058 out_err: 1059 pr_err("%s: failed, error %d\n", __func__, err); 1060 return err; 1061 } 1062 1063 static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) 1064 { 1065 return rdtsc(); 1066 } 1067 1068 static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) 1069 { 1070 struct intel_pt_recording *ptr = 1071 container_of(itr, struct intel_pt_recording, itr); 1072 struct perf_evsel *evsel; 1073 1074 evlist__for_each_entry(ptr->evlist, evsel) { 1075 if (evsel->attr.type == ptr->intel_pt_pmu->type) 1076 return perf_evlist__enable_event_idx(ptr->evlist, evsel, 1077 idx); 1078 } 1079 return -EINVAL; 1080 } 1081 1082 struct auxtrace_record *intel_pt_recording_init(int *err) 1083 { 1084 struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); 1085 struct intel_pt_recording *ptr; 1086 1087 if (!intel_pt_pmu) 1088 return NULL; 1089 1090 if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { 1091 *err = -errno; 1092 return NULL; 1093 } 1094 1095 ptr = zalloc(sizeof(struct intel_pt_recording)); 1096 if (!ptr) { 1097 *err = -ENOMEM; 1098 return NULL; 1099 } 1100 1101 ptr->intel_pt_pmu = intel_pt_pmu; 1102 ptr->itr.recording_options = intel_pt_recording_options; 1103 ptr->itr.info_priv_size = intel_pt_info_priv_size; 1104 ptr->itr.info_fill = intel_pt_info_fill; 1105 ptr->itr.free = intel_pt_recording_free; 1106 ptr->itr.snapshot_start = intel_pt_snapshot_start; 1107 ptr->itr.snapshot_finish = intel_pt_snapshot_finish; 1108 ptr->itr.find_snapshot = intel_pt_find_snapshot; 1109 ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; 1110 ptr->itr.reference = intel_pt_reference; 1111 ptr->itr.read_finish = intel_pt_read_finish; 1112 return &ptr->itr; 1113 } 1114