1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_pt.c: Intel Processor Trace support 4 * Copyright (c) 2013-2015, Intel Corporation. 5 */ 6 7 #include <errno.h> 8 #include <stdbool.h> 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/bitops.h> 12 #include <linux/log2.h> 13 #include <linux/zalloc.h> 14 #include <cpuid.h> 15 16 #include "../../util/session.h" 17 #include "../../util/event.h" 18 #include "../../util/evlist.h" 19 #include "../../util/evsel.h" 20 #include "../../util/cpumap.h" 21 #include "../../util/mmap.h" 22 #include <subcmd/parse-options.h> 23 #include "../../util/parse-events.h" 24 #include "../../util/pmu.h" 25 #include "../../util/debug.h" 26 #include "../../util/auxtrace.h" 27 #include "../../util/record.h" 28 #include "../../util/target.h" 29 #include "../../util/tsc.h" 30 #include <internal/lib.h> // page_size 31 #include "../../util/intel-pt.h" 32 33 #define KiB(x) ((x) * 1024) 34 #define MiB(x) ((x) * 1024 * 1024) 35 #define KiB_MASK(x) (KiB(x) - 1) 36 #define MiB_MASK(x) (MiB(x) - 1) 37 38 #define INTEL_PT_PSB_PERIOD_NEAR 256 39 40 struct intel_pt_snapshot_ref { 41 void *ref_buf; 42 size_t ref_offset; 43 bool wrapped; 44 }; 45 46 struct intel_pt_recording { 47 struct auxtrace_record itr; 48 struct perf_pmu *intel_pt_pmu; 49 int have_sched_switch; 50 struct evlist *evlist; 51 bool snapshot_mode; 52 bool snapshot_init_done; 53 size_t snapshot_size; 54 size_t snapshot_ref_buf_size; 55 int snapshot_ref_cnt; 56 struct intel_pt_snapshot_ref *snapshot_refs; 57 size_t priv_size; 58 }; 59 60 static int intel_pt_parse_terms_with_default(struct list_head *formats, 61 const char *str, 62 u64 *config) 63 { 64 struct list_head *terms; 65 struct perf_event_attr attr = { .size = 0, }; 66 int err; 67 68 terms = malloc(sizeof(struct list_head)); 69 if (!terms) 70 return -ENOMEM; 71 72 INIT_LIST_HEAD(terms); 73 74 err = parse_events_terms(terms, str); 75 if (err) 76 goto out_free; 77 78 attr.config = *config; 79 err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); 80 if (err) 81 goto out_free; 82 83 *config = attr.config; 84 out_free: 85 parse_events_terms__delete(terms); 86 return err; 87 } 88 89 static int intel_pt_parse_terms(struct list_head *formats, const char *str, 90 u64 *config) 91 { 92 *config = 0; 93 return intel_pt_parse_terms_with_default(formats, str, config); 94 } 95 96 static u64 intel_pt_masked_bits(u64 mask, u64 bits) 97 { 98 const u64 top_bit = 1ULL << 63; 99 u64 res = 0; 100 int i; 101 102 for (i = 0; i < 64; i++) { 103 if (mask & top_bit) { 104 res <<= 1; 105 if (bits & top_bit) 106 res |= 1; 107 } 108 mask <<= 1; 109 bits <<= 1; 110 } 111 112 return res; 113 } 114 115 static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, 116 struct evlist *evlist, u64 *res) 117 { 118 struct evsel *evsel; 119 u64 mask; 120 121 *res = 0; 122 123 mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); 124 if (!mask) 125 return -EINVAL; 126 127 evlist__for_each_entry(evlist, evsel) { 128 if (evsel->core.attr.type == intel_pt_pmu->type) { 129 *res = intel_pt_masked_bits(mask, evsel->core.attr.config); 130 return 0; 131 } 132 } 133 134 return -EINVAL; 135 } 136 137 static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, 138 struct evlist *evlist) 139 { 140 u64 val; 141 int err, topa_multiple_entries; 142 size_t psb_period; 143 144 if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", 145 "%d", &topa_multiple_entries) != 1) 146 topa_multiple_entries = 0; 147 148 /* 149 * Use caps/topa_multiple_entries to indicate early hardware that had 150 * extra frequent PSBs. 151 */ 152 if (!topa_multiple_entries) { 153 psb_period = 256; 154 goto out; 155 } 156 157 err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); 158 if (err) 159 val = 0; 160 161 psb_period = 1 << (val + 11); 162 out: 163 pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); 164 return psb_period; 165 } 166 167 static int intel_pt_pick_bit(int bits, int target) 168 { 169 int pos, pick = -1; 170 171 for (pos = 0; bits; bits >>= 1, pos++) { 172 if (bits & 1) { 173 if (pos <= target || pick < 0) 174 pick = pos; 175 if (pos >= target) 176 break; 177 } 178 } 179 180 return pick; 181 } 182 183 static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) 184 { 185 char buf[256]; 186 int mtc, mtc_periods = 0, mtc_period; 187 int psb_cyc, psb_periods, psb_period; 188 int pos = 0; 189 u64 config; 190 char c; 191 192 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); 193 194 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", 195 &mtc) != 1) 196 mtc = 1; 197 198 if (mtc) { 199 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", 200 &mtc_periods) != 1) 201 mtc_periods = 0; 202 if (mtc_periods) { 203 mtc_period = intel_pt_pick_bit(mtc_periods, 3); 204 pos += scnprintf(buf + pos, sizeof(buf) - pos, 205 ",mtc,mtc_period=%d", mtc_period); 206 } 207 } 208 209 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", 210 &psb_cyc) != 1) 211 psb_cyc = 1; 212 213 if (psb_cyc && mtc_periods) { 214 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", 215 &psb_periods) != 1) 216 psb_periods = 0; 217 if (psb_periods) { 218 psb_period = intel_pt_pick_bit(psb_periods, 3); 219 pos += scnprintf(buf + pos, sizeof(buf) - pos, 220 ",psb_period=%d", psb_period); 221 } 222 } 223 224 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && 225 perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) 226 pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); 227 228 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); 229 230 intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); 231 232 return config; 233 } 234 235 static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, 236 struct record_opts *opts, 237 const char *str) 238 { 239 struct intel_pt_recording *ptr = 240 container_of(itr, struct intel_pt_recording, itr); 241 unsigned long long snapshot_size = 0; 242 char *endptr; 243 244 if (str) { 245 snapshot_size = strtoull(str, &endptr, 0); 246 if (*endptr || snapshot_size > SIZE_MAX) 247 return -1; 248 } 249 250 opts->auxtrace_snapshot_mode = true; 251 opts->auxtrace_snapshot_size = snapshot_size; 252 253 ptr->snapshot_size = snapshot_size; 254 255 return 0; 256 } 257 258 struct perf_event_attr * 259 intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) 260 { 261 struct perf_event_attr *attr; 262 263 attr = zalloc(sizeof(struct perf_event_attr)); 264 if (!attr) 265 return NULL; 266 267 attr->config = intel_pt_default_config(intel_pt_pmu); 268 269 intel_pt_pmu->selectable = true; 270 271 return attr; 272 } 273 274 static const char *intel_pt_find_filter(struct evlist *evlist, 275 struct perf_pmu *intel_pt_pmu) 276 { 277 struct evsel *evsel; 278 279 evlist__for_each_entry(evlist, evsel) { 280 if (evsel->core.attr.type == intel_pt_pmu->type) 281 return evsel->filter; 282 } 283 284 return NULL; 285 } 286 287 static size_t intel_pt_filter_bytes(const char *filter) 288 { 289 size_t len = filter ? strlen(filter) : 0; 290 291 return len ? roundup(len + 1, 8) : 0; 292 } 293 294 static size_t 295 intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) 296 { 297 struct intel_pt_recording *ptr = 298 container_of(itr, struct intel_pt_recording, itr); 299 const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu); 300 301 ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + 302 intel_pt_filter_bytes(filter); 303 304 return ptr->priv_size; 305 } 306 307 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) 308 { 309 unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; 310 311 __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); 312 *n = ebx; 313 *d = eax; 314 } 315 316 static int intel_pt_info_fill(struct auxtrace_record *itr, 317 struct perf_session *session, 318 struct perf_record_auxtrace_info *auxtrace_info, 319 size_t priv_size) 320 { 321 struct intel_pt_recording *ptr = 322 container_of(itr, struct intel_pt_recording, itr); 323 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 324 struct perf_event_mmap_page *pc; 325 struct perf_tsc_conversion tc = { .time_mult = 0, }; 326 bool cap_user_time_zero = false, per_cpu_mmaps; 327 u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; 328 u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; 329 unsigned long max_non_turbo_ratio; 330 size_t filter_str_len; 331 const char *filter; 332 __u64 *info; 333 int err; 334 335 if (priv_size != ptr->priv_size) 336 return -EINVAL; 337 338 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 339 intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", 340 &noretcomp_bit); 341 intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); 342 mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, 343 "mtc_period"); 344 intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); 345 346 intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); 347 348 if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", 349 "%lu", &max_non_turbo_ratio) != 1) 350 max_non_turbo_ratio = 0; 351 352 filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); 353 filter_str_len = filter ? strlen(filter) : 0; 354 355 if (!session->evlist->core.nr_mmaps) 356 return -EINVAL; 357 358 pc = session->evlist->mmap[0].core.base; 359 if (pc) { 360 err = perf_read_tsc_conversion(pc, &tc); 361 if (err) { 362 if (err != -EOPNOTSUPP) 363 return err; 364 } else { 365 cap_user_time_zero = tc.time_mult != 0; 366 } 367 if (!cap_user_time_zero) 368 ui__warning("Intel Processor Trace: TSC not available\n"); 369 } 370 371 per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus); 372 373 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; 374 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; 375 auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; 376 auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; 377 auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; 378 auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; 379 auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; 380 auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; 381 auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; 382 auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; 383 auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; 384 auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; 385 auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; 386 auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; 387 auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; 388 auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; 389 auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio; 390 auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len; 391 392 info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; 393 394 if (filter_str_len) { 395 size_t len = intel_pt_filter_bytes(filter); 396 397 strncpy((char *)info, filter, len); 398 info += len >> 3; 399 } 400 401 return 0; 402 } 403 404 static int intel_pt_track_switches(struct evlist *evlist) 405 { 406 const char *sched_switch = "sched:sched_switch"; 407 struct evsel *evsel; 408 int err; 409 410 if (!perf_evlist__can_select_event(evlist, sched_switch)) 411 return -EPERM; 412 413 err = parse_events(evlist, sched_switch, NULL); 414 if (err) { 415 pr_debug2("%s: failed to parse %s, error %d\n", 416 __func__, sched_switch, err); 417 return err; 418 } 419 420 evsel = evlist__last(evlist); 421 422 perf_evsel__set_sample_bit(evsel, CPU); 423 perf_evsel__set_sample_bit(evsel, TIME); 424 425 evsel->core.system_wide = true; 426 evsel->no_aux_samples = true; 427 evsel->immediate = true; 428 429 return 0; 430 } 431 432 static void intel_pt_valid_str(char *str, size_t len, u64 valid) 433 { 434 unsigned int val, last = 0, state = 1; 435 int p = 0; 436 437 str[0] = '\0'; 438 439 for (val = 0; val <= 64; val++, valid >>= 1) { 440 if (valid & 1) { 441 last = val; 442 switch (state) { 443 case 0: 444 p += scnprintf(str + p, len - p, ","); 445 /* Fall through */ 446 case 1: 447 p += scnprintf(str + p, len - p, "%u", val); 448 state = 2; 449 break; 450 case 2: 451 state = 3; 452 break; 453 case 3: 454 state = 4; 455 break; 456 default: 457 break; 458 } 459 } else { 460 switch (state) { 461 case 3: 462 p += scnprintf(str + p, len - p, ",%u", last); 463 state = 0; 464 break; 465 case 4: 466 p += scnprintf(str + p, len - p, "-%u", last); 467 state = 0; 468 break; 469 default: 470 break; 471 } 472 if (state != 1) 473 state = 0; 474 } 475 } 476 } 477 478 static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, 479 const char *caps, const char *name, 480 const char *supported, u64 config) 481 { 482 char valid_str[256]; 483 unsigned int shift; 484 unsigned long long valid; 485 u64 bits; 486 int ok; 487 488 if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) 489 valid = 0; 490 491 if (supported && 492 perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) 493 valid = 0; 494 495 valid |= 1; 496 497 bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); 498 499 config &= bits; 500 501 for (shift = 0; bits && !(bits & 1); shift++) 502 bits >>= 1; 503 504 config >>= shift; 505 506 if (config > 63) 507 goto out_err; 508 509 if (valid & (1 << config)) 510 return 0; 511 out_err: 512 intel_pt_valid_str(valid_str, sizeof(valid_str), valid); 513 pr_err("Invalid %s for %s. Valid values are: %s\n", 514 name, INTEL_PT_PMU_NAME, valid_str); 515 return -EINVAL; 516 } 517 518 static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, 519 struct evsel *evsel) 520 { 521 int err; 522 char c; 523 524 if (!evsel) 525 return 0; 526 527 /* 528 * If supported, force pass-through config term (pt=1) even if user 529 * sets pt=0, which avoids senseless kernel errors. 530 */ 531 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && 532 !(evsel->core.attr.config & 1)) { 533 pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); 534 evsel->core.attr.config |= 1; 535 } 536 537 err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", 538 "cyc_thresh", "caps/psb_cyc", 539 evsel->core.attr.config); 540 if (err) 541 return err; 542 543 err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", 544 "mtc_period", "caps/mtc", 545 evsel->core.attr.config); 546 if (err) 547 return err; 548 549 return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", 550 "psb_period", "caps/psb_cyc", 551 evsel->core.attr.config); 552 } 553 554 /* 555 * Currently, there is not enough information to disambiguate different PEBS 556 * events, so only allow one. 557 */ 558 static bool intel_pt_too_many_aux_output(struct evlist *evlist) 559 { 560 struct evsel *evsel; 561 int aux_output_cnt = 0; 562 563 evlist__for_each_entry(evlist, evsel) 564 aux_output_cnt += !!evsel->core.attr.aux_output; 565 566 if (aux_output_cnt > 1) { 567 pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n"); 568 return true; 569 } 570 571 return false; 572 } 573 574 static int intel_pt_recording_options(struct auxtrace_record *itr, 575 struct evlist *evlist, 576 struct record_opts *opts) 577 { 578 struct intel_pt_recording *ptr = 579 container_of(itr, struct intel_pt_recording, itr); 580 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 581 bool have_timing_info, need_immediate = false; 582 struct evsel *evsel, *intel_pt_evsel = NULL; 583 const struct perf_cpu_map *cpus = evlist->core.cpus; 584 bool privileged = perf_event_paranoid_check(-1); 585 u64 tsc_bit; 586 int err; 587 588 ptr->evlist = evlist; 589 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 590 591 evlist__for_each_entry(evlist, evsel) { 592 if (evsel->core.attr.type == intel_pt_pmu->type) { 593 if (intel_pt_evsel) { 594 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); 595 return -EINVAL; 596 } 597 evsel->core.attr.freq = 0; 598 evsel->core.attr.sample_period = 1; 599 intel_pt_evsel = evsel; 600 opts->full_auxtrace = true; 601 } 602 } 603 604 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { 605 pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); 606 return -EINVAL; 607 } 608 609 if (opts->use_clockid) { 610 pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); 611 return -EINVAL; 612 } 613 614 if (intel_pt_too_many_aux_output(evlist)) 615 return -EINVAL; 616 617 if (!opts->full_auxtrace) 618 return 0; 619 620 err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); 621 if (err) 622 return err; 623 624 /* Set default sizes for snapshot mode */ 625 if (opts->auxtrace_snapshot_mode) { 626 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); 627 628 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 629 if (privileged) { 630 opts->auxtrace_mmap_pages = MiB(4) / page_size; 631 } else { 632 opts->auxtrace_mmap_pages = KiB(128) / page_size; 633 if (opts->mmap_pages == UINT_MAX) 634 opts->mmap_pages = KiB(256) / page_size; 635 } 636 } else if (!opts->auxtrace_mmap_pages && !privileged && 637 opts->mmap_pages == UINT_MAX) { 638 opts->mmap_pages = KiB(256) / page_size; 639 } 640 if (!opts->auxtrace_snapshot_size) 641 opts->auxtrace_snapshot_size = 642 opts->auxtrace_mmap_pages * (size_t)page_size; 643 if (!opts->auxtrace_mmap_pages) { 644 size_t sz = opts->auxtrace_snapshot_size; 645 646 sz = round_up(sz, page_size) / page_size; 647 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 648 } 649 if (opts->auxtrace_snapshot_size > 650 opts->auxtrace_mmap_pages * (size_t)page_size) { 651 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 652 opts->auxtrace_snapshot_size, 653 opts->auxtrace_mmap_pages * (size_t)page_size); 654 return -EINVAL; 655 } 656 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 657 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 658 return -EINVAL; 659 } 660 pr_debug2("Intel PT snapshot size: %zu\n", 661 opts->auxtrace_snapshot_size); 662 if (psb_period && 663 opts->auxtrace_snapshot_size <= psb_period + 664 INTEL_PT_PSB_PERIOD_NEAR) 665 ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", 666 opts->auxtrace_snapshot_size, psb_period); 667 } 668 669 /* Set default sizes for full trace mode */ 670 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { 671 if (privileged) { 672 opts->auxtrace_mmap_pages = MiB(4) / page_size; 673 } else { 674 opts->auxtrace_mmap_pages = KiB(128) / page_size; 675 if (opts->mmap_pages == UINT_MAX) 676 opts->mmap_pages = KiB(256) / page_size; 677 } 678 } 679 680 /* Validate auxtrace_mmap_pages */ 681 if (opts->auxtrace_mmap_pages) { 682 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 683 size_t min_sz; 684 685 if (opts->auxtrace_snapshot_mode) 686 min_sz = KiB(4); 687 else 688 min_sz = KiB(8); 689 690 if (sz < min_sz || !is_power_of_2(sz)) { 691 pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", 692 min_sz / 1024); 693 return -EINVAL; 694 } 695 } 696 697 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 698 699 if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) 700 have_timing_info = true; 701 else 702 have_timing_info = false; 703 704 /* 705 * Per-cpu recording needs sched_switch events to distinguish different 706 * threads. 707 */ 708 if (have_timing_info && !perf_cpu_map__empty(cpus)) { 709 if (perf_can_record_switch_events()) { 710 bool cpu_wide = !target__none(&opts->target) && 711 !target__has_task(&opts->target); 712 713 if (!cpu_wide && perf_can_record_cpu_wide()) { 714 struct evsel *switch_evsel; 715 716 err = parse_events(evlist, "dummy:u", NULL); 717 if (err) 718 return err; 719 720 switch_evsel = evlist__last(evlist); 721 722 switch_evsel->core.attr.freq = 0; 723 switch_evsel->core.attr.sample_period = 1; 724 switch_evsel->core.attr.context_switch = 1; 725 726 switch_evsel->core.system_wide = true; 727 switch_evsel->no_aux_samples = true; 728 switch_evsel->immediate = true; 729 730 perf_evsel__set_sample_bit(switch_evsel, TID); 731 perf_evsel__set_sample_bit(switch_evsel, TIME); 732 perf_evsel__set_sample_bit(switch_evsel, CPU); 733 perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); 734 735 opts->record_switch_events = false; 736 ptr->have_sched_switch = 3; 737 } else { 738 opts->record_switch_events = true; 739 need_immediate = true; 740 if (cpu_wide) 741 ptr->have_sched_switch = 3; 742 else 743 ptr->have_sched_switch = 2; 744 } 745 } else { 746 err = intel_pt_track_switches(evlist); 747 if (err == -EPERM) 748 pr_debug2("Unable to select sched:sched_switch\n"); 749 else if (err) 750 return err; 751 else 752 ptr->have_sched_switch = 1; 753 } 754 } 755 756 if (intel_pt_evsel) { 757 /* 758 * To obtain the auxtrace buffer file descriptor, the auxtrace 759 * event must come first. 760 */ 761 perf_evlist__to_front(evlist, intel_pt_evsel); 762 /* 763 * In the case of per-cpu mmaps, we need the CPU on the 764 * AUX event. 765 */ 766 if (!perf_cpu_map__empty(cpus)) 767 perf_evsel__set_sample_bit(intel_pt_evsel, CPU); 768 } 769 770 /* Add dummy event to keep tracking */ 771 if (opts->full_auxtrace) { 772 struct evsel *tracking_evsel; 773 774 err = parse_events(evlist, "dummy:u", NULL); 775 if (err) 776 return err; 777 778 tracking_evsel = evlist__last(evlist); 779 780 perf_evlist__set_tracking_event(evlist, tracking_evsel); 781 782 tracking_evsel->core.attr.freq = 0; 783 tracking_evsel->core.attr.sample_period = 1; 784 785 tracking_evsel->no_aux_samples = true; 786 if (need_immediate) 787 tracking_evsel->immediate = true; 788 789 /* In per-cpu case, always need the time of mmap events etc */ 790 if (!perf_cpu_map__empty(cpus)) { 791 perf_evsel__set_sample_bit(tracking_evsel, TIME); 792 /* And the CPU for switch events */ 793 perf_evsel__set_sample_bit(tracking_evsel, CPU); 794 } 795 perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); 796 } 797 798 /* 799 * Warn the user when we do not have enough information to decode i.e. 800 * per-cpu with no sched_switch (except workload-only). 801 */ 802 if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && 803 !target__none(&opts->target)) 804 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); 805 806 return 0; 807 } 808 809 static int intel_pt_snapshot_start(struct auxtrace_record *itr) 810 { 811 struct intel_pt_recording *ptr = 812 container_of(itr, struct intel_pt_recording, itr); 813 struct evsel *evsel; 814 815 evlist__for_each_entry(ptr->evlist, evsel) { 816 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 817 return evsel__disable(evsel); 818 } 819 return -EINVAL; 820 } 821 822 static int intel_pt_snapshot_finish(struct auxtrace_record *itr) 823 { 824 struct intel_pt_recording *ptr = 825 container_of(itr, struct intel_pt_recording, itr); 826 struct evsel *evsel; 827 828 evlist__for_each_entry(ptr->evlist, evsel) { 829 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 830 return evsel__enable(evsel); 831 } 832 return -EINVAL; 833 } 834 835 static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) 836 { 837 const size_t sz = sizeof(struct intel_pt_snapshot_ref); 838 int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; 839 struct intel_pt_snapshot_ref *refs; 840 841 if (!new_cnt) 842 new_cnt = 16; 843 844 while (new_cnt <= idx) 845 new_cnt *= 2; 846 847 refs = calloc(new_cnt, sz); 848 if (!refs) 849 return -ENOMEM; 850 851 memcpy(refs, ptr->snapshot_refs, cnt * sz); 852 853 ptr->snapshot_refs = refs; 854 ptr->snapshot_ref_cnt = new_cnt; 855 856 return 0; 857 } 858 859 static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) 860 { 861 int i; 862 863 for (i = 0; i < ptr->snapshot_ref_cnt; i++) 864 zfree(&ptr->snapshot_refs[i].ref_buf); 865 zfree(&ptr->snapshot_refs); 866 } 867 868 static void intel_pt_recording_free(struct auxtrace_record *itr) 869 { 870 struct intel_pt_recording *ptr = 871 container_of(itr, struct intel_pt_recording, itr); 872 873 intel_pt_free_snapshot_refs(ptr); 874 free(ptr); 875 } 876 877 static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, 878 size_t snapshot_buf_size) 879 { 880 size_t ref_buf_size = ptr->snapshot_ref_buf_size; 881 void *ref_buf; 882 883 ref_buf = zalloc(ref_buf_size); 884 if (!ref_buf) 885 return -ENOMEM; 886 887 ptr->snapshot_refs[idx].ref_buf = ref_buf; 888 ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; 889 890 return 0; 891 } 892 893 static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, 894 size_t snapshot_buf_size) 895 { 896 const size_t max_size = 256 * 1024; 897 size_t buf_size = 0, psb_period; 898 899 if (ptr->snapshot_size <= 64 * 1024) 900 return 0; 901 902 psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); 903 if (psb_period) 904 buf_size = psb_period * 2; 905 906 if (!buf_size || buf_size > max_size) 907 buf_size = max_size; 908 909 if (buf_size >= snapshot_buf_size) 910 return 0; 911 912 if (buf_size >= ptr->snapshot_size / 2) 913 return 0; 914 915 return buf_size; 916 } 917 918 static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, 919 size_t snapshot_buf_size) 920 { 921 if (ptr->snapshot_init_done) 922 return 0; 923 924 ptr->snapshot_init_done = true; 925 926 ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, 927 snapshot_buf_size); 928 929 return 0; 930 } 931 932 /** 933 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. 934 * @buf1: first buffer 935 * @compare_size: number of bytes to compare 936 * @buf2: second buffer (a circular buffer) 937 * @offs2: offset in second buffer 938 * @buf2_size: size of second buffer 939 * 940 * The comparison allows for the possibility that the bytes to compare in the 941 * circular buffer are not contiguous. It is assumed that @compare_size <= 942 * @buf2_size. This function returns %false if the bytes are identical, %true 943 * otherwise. 944 */ 945 static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, 946 void *buf2, size_t offs2, size_t buf2_size) 947 { 948 size_t end2 = offs2 + compare_size, part_size; 949 950 if (end2 <= buf2_size) 951 return memcmp(buf1, buf2 + offs2, compare_size); 952 953 part_size = end2 - buf2_size; 954 if (memcmp(buf1, buf2 + offs2, part_size)) 955 return true; 956 957 compare_size -= part_size; 958 959 return memcmp(buf1 + part_size, buf2, compare_size); 960 } 961 962 static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, 963 size_t ref_size, size_t buf_size, 964 void *data, size_t head) 965 { 966 size_t ref_end = ref_offset + ref_size; 967 968 if (ref_end > buf_size) { 969 if (head > ref_offset || head < ref_end - buf_size) 970 return true; 971 } else if (head > ref_offset && head < ref_end) { 972 return true; 973 } 974 975 return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, 976 buf_size); 977 } 978 979 static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, 980 void *data, size_t head) 981 { 982 if (head >= ref_size) { 983 memcpy(ref_buf, data + head - ref_size, ref_size); 984 } else { 985 memcpy(ref_buf, data, head); 986 ref_size -= head; 987 memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); 988 } 989 } 990 991 static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, 992 struct auxtrace_mmap *mm, unsigned char *data, 993 u64 head) 994 { 995 struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; 996 bool wrapped; 997 998 wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, 999 ptr->snapshot_ref_buf_size, mm->len, 1000 data, head); 1001 1002 intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, 1003 data, head); 1004 1005 return wrapped; 1006 } 1007 1008 static bool intel_pt_first_wrap(u64 *data, size_t buf_size) 1009 { 1010 int i, a, b; 1011 1012 b = buf_size >> 3; 1013 a = b - 512; 1014 if (a < 0) 1015 a = 0; 1016 1017 for (i = a; i < b; i++) { 1018 if (data[i]) 1019 return true; 1020 } 1021 1022 return false; 1023 } 1024 1025 static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, 1026 struct auxtrace_mmap *mm, unsigned char *data, 1027 u64 *head, u64 *old) 1028 { 1029 struct intel_pt_recording *ptr = 1030 container_of(itr, struct intel_pt_recording, itr); 1031 bool wrapped; 1032 int err; 1033 1034 pr_debug3("%s: mmap index %d old head %zu new head %zu\n", 1035 __func__, idx, (size_t)*old, (size_t)*head); 1036 1037 err = intel_pt_snapshot_init(ptr, mm->len); 1038 if (err) 1039 goto out_err; 1040 1041 if (idx >= ptr->snapshot_ref_cnt) { 1042 err = intel_pt_alloc_snapshot_refs(ptr, idx); 1043 if (err) 1044 goto out_err; 1045 } 1046 1047 if (ptr->snapshot_ref_buf_size) { 1048 if (!ptr->snapshot_refs[idx].ref_buf) { 1049 err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); 1050 if (err) 1051 goto out_err; 1052 } 1053 wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); 1054 } else { 1055 wrapped = ptr->snapshot_refs[idx].wrapped; 1056 if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { 1057 ptr->snapshot_refs[idx].wrapped = true; 1058 wrapped = true; 1059 } 1060 } 1061 1062 /* 1063 * In full trace mode 'head' continually increases. However in snapshot 1064 * mode 'head' is an offset within the buffer. Here 'old' and 'head' 1065 * are adjusted to match the full trace case which expects that 'old' is 1066 * always less than 'head'. 1067 */ 1068 if (wrapped) { 1069 *old = *head; 1070 *head += mm->len; 1071 } else { 1072 if (mm->mask) 1073 *old &= mm->mask; 1074 else 1075 *old %= mm->len; 1076 if (*old > *head) 1077 *head += mm->len; 1078 } 1079 1080 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", 1081 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); 1082 1083 return 0; 1084 1085 out_err: 1086 pr_err("%s: failed, error %d\n", __func__, err); 1087 return err; 1088 } 1089 1090 static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) 1091 { 1092 return rdtsc(); 1093 } 1094 1095 static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) 1096 { 1097 struct intel_pt_recording *ptr = 1098 container_of(itr, struct intel_pt_recording, itr); 1099 struct evsel *evsel; 1100 1101 evlist__for_each_entry(ptr->evlist, evsel) { 1102 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 1103 return perf_evlist__enable_event_idx(ptr->evlist, evsel, 1104 idx); 1105 } 1106 return -EINVAL; 1107 } 1108 1109 struct auxtrace_record *intel_pt_recording_init(int *err) 1110 { 1111 struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); 1112 struct intel_pt_recording *ptr; 1113 1114 if (!intel_pt_pmu) 1115 return NULL; 1116 1117 if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { 1118 *err = -errno; 1119 return NULL; 1120 } 1121 1122 ptr = zalloc(sizeof(struct intel_pt_recording)); 1123 if (!ptr) { 1124 *err = -ENOMEM; 1125 return NULL; 1126 } 1127 1128 ptr->intel_pt_pmu = intel_pt_pmu; 1129 ptr->itr.recording_options = intel_pt_recording_options; 1130 ptr->itr.info_priv_size = intel_pt_info_priv_size; 1131 ptr->itr.info_fill = intel_pt_info_fill; 1132 ptr->itr.free = intel_pt_recording_free; 1133 ptr->itr.snapshot_start = intel_pt_snapshot_start; 1134 ptr->itr.snapshot_finish = intel_pt_snapshot_finish; 1135 ptr->itr.find_snapshot = intel_pt_find_snapshot; 1136 ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; 1137 ptr->itr.reference = intel_pt_reference; 1138 ptr->itr.read_finish = intel_pt_read_finish; 1139 return &ptr->itr; 1140 } 1141