1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_pt.c: Intel Processor Trace support 4 * Copyright (c) 2013-2015, Intel Corporation. 5 */ 6 7 #include <errno.h> 8 #include <stdbool.h> 9 #include <linux/kernel.h> 10 #include <linux/types.h> 11 #include <linux/bitops.h> 12 #include <linux/log2.h> 13 #include <linux/zalloc.h> 14 #include <cpuid.h> 15 16 #include "../../util/session.h" 17 #include "../../util/event.h" 18 #include "../../util/evlist.h" 19 #include "../../util/evsel.h" 20 #include "../../util/cpumap.h" 21 #include <subcmd/parse-options.h> 22 #include "../../util/parse-events.h" 23 #include "../../util/pmu.h" 24 #include "../../util/debug.h" 25 #include "../../util/auxtrace.h" 26 #include "../../util/record.h" 27 #include "../../util/target.h" 28 #include "../../util/tsc.h" 29 #include "../../util/util.h" 30 #include "../../util/intel-pt.h" 31 32 #define KiB(x) ((x) * 1024) 33 #define MiB(x) ((x) * 1024 * 1024) 34 #define KiB_MASK(x) (KiB(x) - 1) 35 #define MiB_MASK(x) (MiB(x) - 1) 36 37 #define INTEL_PT_PSB_PERIOD_NEAR 256 38 39 struct intel_pt_snapshot_ref { 40 void *ref_buf; 41 size_t ref_offset; 42 bool wrapped; 43 }; 44 45 struct intel_pt_recording { 46 struct auxtrace_record itr; 47 struct perf_pmu *intel_pt_pmu; 48 int have_sched_switch; 49 struct evlist *evlist; 50 bool snapshot_mode; 51 bool snapshot_init_done; 52 size_t snapshot_size; 53 size_t snapshot_ref_buf_size; 54 int snapshot_ref_cnt; 55 struct intel_pt_snapshot_ref *snapshot_refs; 56 size_t priv_size; 57 }; 58 59 static int intel_pt_parse_terms_with_default(struct list_head *formats, 60 const char *str, 61 u64 *config) 62 { 63 struct list_head *terms; 64 struct perf_event_attr attr = { .size = 0, }; 65 int err; 66 67 terms = malloc(sizeof(struct list_head)); 68 if (!terms) 69 return -ENOMEM; 70 71 INIT_LIST_HEAD(terms); 72 73 err = parse_events_terms(terms, str); 74 if (err) 75 goto out_free; 76 77 attr.config = *config; 78 err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); 79 if (err) 80 goto out_free; 81 82 *config = attr.config; 83 out_free: 84 parse_events_terms__delete(terms); 85 return err; 86 } 87 88 static int intel_pt_parse_terms(struct list_head *formats, const char *str, 89 u64 *config) 90 { 91 *config = 0; 92 return intel_pt_parse_terms_with_default(formats, str, config); 93 } 94 95 static u64 intel_pt_masked_bits(u64 mask, u64 bits) 96 { 97 const u64 top_bit = 1ULL << 63; 98 u64 res = 0; 99 int i; 100 101 for (i = 0; i < 64; i++) { 102 if (mask & top_bit) { 103 res <<= 1; 104 if (bits & top_bit) 105 res |= 1; 106 } 107 mask <<= 1; 108 bits <<= 1; 109 } 110 111 return res; 112 } 113 114 static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, 115 struct evlist *evlist, u64 *res) 116 { 117 struct evsel *evsel; 118 u64 mask; 119 120 *res = 0; 121 122 mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); 123 if (!mask) 124 return -EINVAL; 125 126 evlist__for_each_entry(evlist, evsel) { 127 if (evsel->core.attr.type == intel_pt_pmu->type) { 128 *res = intel_pt_masked_bits(mask, evsel->core.attr.config); 129 return 0; 130 } 131 } 132 133 return -EINVAL; 134 } 135 136 static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, 137 struct evlist *evlist) 138 { 139 u64 val; 140 int err, topa_multiple_entries; 141 size_t psb_period; 142 143 if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", 144 "%d", &topa_multiple_entries) != 1) 145 topa_multiple_entries = 0; 146 147 /* 148 * Use caps/topa_multiple_entries to indicate early hardware that had 149 * extra frequent PSBs. 150 */ 151 if (!topa_multiple_entries) { 152 psb_period = 256; 153 goto out; 154 } 155 156 err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); 157 if (err) 158 val = 0; 159 160 psb_period = 1 << (val + 11); 161 out: 162 pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); 163 return psb_period; 164 } 165 166 static int intel_pt_pick_bit(int bits, int target) 167 { 168 int pos, pick = -1; 169 170 for (pos = 0; bits; bits >>= 1, pos++) { 171 if (bits & 1) { 172 if (pos <= target || pick < 0) 173 pick = pos; 174 if (pos >= target) 175 break; 176 } 177 } 178 179 return pick; 180 } 181 182 static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) 183 { 184 char buf[256]; 185 int mtc, mtc_periods = 0, mtc_period; 186 int psb_cyc, psb_periods, psb_period; 187 int pos = 0; 188 u64 config; 189 char c; 190 191 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); 192 193 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", 194 &mtc) != 1) 195 mtc = 1; 196 197 if (mtc) { 198 if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", 199 &mtc_periods) != 1) 200 mtc_periods = 0; 201 if (mtc_periods) { 202 mtc_period = intel_pt_pick_bit(mtc_periods, 3); 203 pos += scnprintf(buf + pos, sizeof(buf) - pos, 204 ",mtc,mtc_period=%d", mtc_period); 205 } 206 } 207 208 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", 209 &psb_cyc) != 1) 210 psb_cyc = 1; 211 212 if (psb_cyc && mtc_periods) { 213 if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", 214 &psb_periods) != 1) 215 psb_periods = 0; 216 if (psb_periods) { 217 psb_period = intel_pt_pick_bit(psb_periods, 3); 218 pos += scnprintf(buf + pos, sizeof(buf) - pos, 219 ",psb_period=%d", psb_period); 220 } 221 } 222 223 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && 224 perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1) 225 pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); 226 227 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); 228 229 intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); 230 231 return config; 232 } 233 234 static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, 235 struct record_opts *opts, 236 const char *str) 237 { 238 struct intel_pt_recording *ptr = 239 container_of(itr, struct intel_pt_recording, itr); 240 unsigned long long snapshot_size = 0; 241 char *endptr; 242 243 if (str) { 244 snapshot_size = strtoull(str, &endptr, 0); 245 if (*endptr || snapshot_size > SIZE_MAX) 246 return -1; 247 } 248 249 opts->auxtrace_snapshot_mode = true; 250 opts->auxtrace_snapshot_size = snapshot_size; 251 252 ptr->snapshot_size = snapshot_size; 253 254 return 0; 255 } 256 257 struct perf_event_attr * 258 intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) 259 { 260 struct perf_event_attr *attr; 261 262 attr = zalloc(sizeof(struct perf_event_attr)); 263 if (!attr) 264 return NULL; 265 266 attr->config = intel_pt_default_config(intel_pt_pmu); 267 268 intel_pt_pmu->selectable = true; 269 270 return attr; 271 } 272 273 static const char *intel_pt_find_filter(struct evlist *evlist, 274 struct perf_pmu *intel_pt_pmu) 275 { 276 struct evsel *evsel; 277 278 evlist__for_each_entry(evlist, evsel) { 279 if (evsel->core.attr.type == intel_pt_pmu->type) 280 return evsel->filter; 281 } 282 283 return NULL; 284 } 285 286 static size_t intel_pt_filter_bytes(const char *filter) 287 { 288 size_t len = filter ? strlen(filter) : 0; 289 290 return len ? roundup(len + 1, 8) : 0; 291 } 292 293 static size_t 294 intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) 295 { 296 struct intel_pt_recording *ptr = 297 container_of(itr, struct intel_pt_recording, itr); 298 const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu); 299 300 ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + 301 intel_pt_filter_bytes(filter); 302 303 return ptr->priv_size; 304 } 305 306 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) 307 { 308 unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; 309 310 __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); 311 *n = ebx; 312 *d = eax; 313 } 314 315 static int intel_pt_info_fill(struct auxtrace_record *itr, 316 struct perf_session *session, 317 struct perf_record_auxtrace_info *auxtrace_info, 318 size_t priv_size) 319 { 320 struct intel_pt_recording *ptr = 321 container_of(itr, struct intel_pt_recording, itr); 322 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 323 struct perf_event_mmap_page *pc; 324 struct perf_tsc_conversion tc = { .time_mult = 0, }; 325 bool cap_user_time_zero = false, per_cpu_mmaps; 326 u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; 327 u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; 328 unsigned long max_non_turbo_ratio; 329 size_t filter_str_len; 330 const char *filter; 331 __u64 *info; 332 int err; 333 334 if (priv_size != ptr->priv_size) 335 return -EINVAL; 336 337 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 338 intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", 339 &noretcomp_bit); 340 intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); 341 mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, 342 "mtc_period"); 343 intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); 344 345 intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); 346 347 if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", 348 "%lu", &max_non_turbo_ratio) != 1) 349 max_non_turbo_ratio = 0; 350 351 filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); 352 filter_str_len = filter ? strlen(filter) : 0; 353 354 if (!session->evlist->nr_mmaps) 355 return -EINVAL; 356 357 pc = session->evlist->mmap[0].base; 358 if (pc) { 359 err = perf_read_tsc_conversion(pc, &tc); 360 if (err) { 361 if (err != -EOPNOTSUPP) 362 return err; 363 } else { 364 cap_user_time_zero = tc.time_mult != 0; 365 } 366 if (!cap_user_time_zero) 367 ui__warning("Intel Processor Trace: TSC not available\n"); 368 } 369 370 per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus); 371 372 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; 373 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; 374 auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; 375 auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; 376 auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; 377 auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; 378 auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; 379 auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; 380 auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; 381 auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; 382 auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; 383 auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; 384 auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; 385 auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; 386 auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; 387 auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; 388 auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio; 389 auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len; 390 391 info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; 392 393 if (filter_str_len) { 394 size_t len = intel_pt_filter_bytes(filter); 395 396 strncpy((char *)info, filter, len); 397 info += len >> 3; 398 } 399 400 return 0; 401 } 402 403 static int intel_pt_track_switches(struct evlist *evlist) 404 { 405 const char *sched_switch = "sched:sched_switch"; 406 struct evsel *evsel; 407 int err; 408 409 if (!perf_evlist__can_select_event(evlist, sched_switch)) 410 return -EPERM; 411 412 err = parse_events(evlist, sched_switch, NULL); 413 if (err) { 414 pr_debug2("%s: failed to parse %s, error %d\n", 415 __func__, sched_switch, err); 416 return err; 417 } 418 419 evsel = perf_evlist__last(evlist); 420 421 perf_evsel__set_sample_bit(evsel, CPU); 422 perf_evsel__set_sample_bit(evsel, TIME); 423 424 evsel->system_wide = true; 425 evsel->no_aux_samples = true; 426 evsel->immediate = true; 427 428 return 0; 429 } 430 431 static void intel_pt_valid_str(char *str, size_t len, u64 valid) 432 { 433 unsigned int val, last = 0, state = 1; 434 int p = 0; 435 436 str[0] = '\0'; 437 438 for (val = 0; val <= 64; val++, valid >>= 1) { 439 if (valid & 1) { 440 last = val; 441 switch (state) { 442 case 0: 443 p += scnprintf(str + p, len - p, ","); 444 /* Fall through */ 445 case 1: 446 p += scnprintf(str + p, len - p, "%u", val); 447 state = 2; 448 break; 449 case 2: 450 state = 3; 451 break; 452 case 3: 453 state = 4; 454 break; 455 default: 456 break; 457 } 458 } else { 459 switch (state) { 460 case 3: 461 p += scnprintf(str + p, len - p, ",%u", last); 462 state = 0; 463 break; 464 case 4: 465 p += scnprintf(str + p, len - p, "-%u", last); 466 state = 0; 467 break; 468 default: 469 break; 470 } 471 if (state != 1) 472 state = 0; 473 } 474 } 475 } 476 477 static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, 478 const char *caps, const char *name, 479 const char *supported, u64 config) 480 { 481 char valid_str[256]; 482 unsigned int shift; 483 unsigned long long valid; 484 u64 bits; 485 int ok; 486 487 if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) 488 valid = 0; 489 490 if (supported && 491 perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) 492 valid = 0; 493 494 valid |= 1; 495 496 bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); 497 498 config &= bits; 499 500 for (shift = 0; bits && !(bits & 1); shift++) 501 bits >>= 1; 502 503 config >>= shift; 504 505 if (config > 63) 506 goto out_err; 507 508 if (valid & (1 << config)) 509 return 0; 510 out_err: 511 intel_pt_valid_str(valid_str, sizeof(valid_str), valid); 512 pr_err("Invalid %s for %s. Valid values are: %s\n", 513 name, INTEL_PT_PMU_NAME, valid_str); 514 return -EINVAL; 515 } 516 517 static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, 518 struct evsel *evsel) 519 { 520 int err; 521 char c; 522 523 if (!evsel) 524 return 0; 525 526 /* 527 * If supported, force pass-through config term (pt=1) even if user 528 * sets pt=0, which avoids senseless kernel errors. 529 */ 530 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 && 531 !(evsel->core.attr.config & 1)) { 532 pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); 533 evsel->core.attr.config |= 1; 534 } 535 536 err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", 537 "cyc_thresh", "caps/psb_cyc", 538 evsel->core.attr.config); 539 if (err) 540 return err; 541 542 err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", 543 "mtc_period", "caps/mtc", 544 evsel->core.attr.config); 545 if (err) 546 return err; 547 548 return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", 549 "psb_period", "caps/psb_cyc", 550 evsel->core.attr.config); 551 } 552 553 /* 554 * Currently, there is not enough information to disambiguate different PEBS 555 * events, so only allow one. 556 */ 557 static bool intel_pt_too_many_aux_output(struct evlist *evlist) 558 { 559 struct evsel *evsel; 560 int aux_output_cnt = 0; 561 562 evlist__for_each_entry(evlist, evsel) 563 aux_output_cnt += !!evsel->core.attr.aux_output; 564 565 if (aux_output_cnt > 1) { 566 pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n"); 567 return true; 568 } 569 570 return false; 571 } 572 573 static int intel_pt_recording_options(struct auxtrace_record *itr, 574 struct evlist *evlist, 575 struct record_opts *opts) 576 { 577 struct intel_pt_recording *ptr = 578 container_of(itr, struct intel_pt_recording, itr); 579 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 580 bool have_timing_info, need_immediate = false; 581 struct evsel *evsel, *intel_pt_evsel = NULL; 582 const struct perf_cpu_map *cpus = evlist->core.cpus; 583 bool privileged = perf_event_paranoid_check(-1); 584 u64 tsc_bit; 585 int err; 586 587 ptr->evlist = evlist; 588 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 589 590 evlist__for_each_entry(evlist, evsel) { 591 if (evsel->core.attr.type == intel_pt_pmu->type) { 592 if (intel_pt_evsel) { 593 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); 594 return -EINVAL; 595 } 596 evsel->core.attr.freq = 0; 597 evsel->core.attr.sample_period = 1; 598 intel_pt_evsel = evsel; 599 opts->full_auxtrace = true; 600 } 601 } 602 603 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { 604 pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); 605 return -EINVAL; 606 } 607 608 if (opts->use_clockid) { 609 pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); 610 return -EINVAL; 611 } 612 613 if (intel_pt_too_many_aux_output(evlist)) 614 return -EINVAL; 615 616 if (!opts->full_auxtrace) 617 return 0; 618 619 err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); 620 if (err) 621 return err; 622 623 /* Set default sizes for snapshot mode */ 624 if (opts->auxtrace_snapshot_mode) { 625 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); 626 627 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 628 if (privileged) { 629 opts->auxtrace_mmap_pages = MiB(4) / page_size; 630 } else { 631 opts->auxtrace_mmap_pages = KiB(128) / page_size; 632 if (opts->mmap_pages == UINT_MAX) 633 opts->mmap_pages = KiB(256) / page_size; 634 } 635 } else if (!opts->auxtrace_mmap_pages && !privileged && 636 opts->mmap_pages == UINT_MAX) { 637 opts->mmap_pages = KiB(256) / page_size; 638 } 639 if (!opts->auxtrace_snapshot_size) 640 opts->auxtrace_snapshot_size = 641 opts->auxtrace_mmap_pages * (size_t)page_size; 642 if (!opts->auxtrace_mmap_pages) { 643 size_t sz = opts->auxtrace_snapshot_size; 644 645 sz = round_up(sz, page_size) / page_size; 646 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 647 } 648 if (opts->auxtrace_snapshot_size > 649 opts->auxtrace_mmap_pages * (size_t)page_size) { 650 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 651 opts->auxtrace_snapshot_size, 652 opts->auxtrace_mmap_pages * (size_t)page_size); 653 return -EINVAL; 654 } 655 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 656 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 657 return -EINVAL; 658 } 659 pr_debug2("Intel PT snapshot size: %zu\n", 660 opts->auxtrace_snapshot_size); 661 if (psb_period && 662 opts->auxtrace_snapshot_size <= psb_period + 663 INTEL_PT_PSB_PERIOD_NEAR) 664 ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", 665 opts->auxtrace_snapshot_size, psb_period); 666 } 667 668 /* Set default sizes for full trace mode */ 669 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { 670 if (privileged) { 671 opts->auxtrace_mmap_pages = MiB(4) / page_size; 672 } else { 673 opts->auxtrace_mmap_pages = KiB(128) / page_size; 674 if (opts->mmap_pages == UINT_MAX) 675 opts->mmap_pages = KiB(256) / page_size; 676 } 677 } 678 679 /* Validate auxtrace_mmap_pages */ 680 if (opts->auxtrace_mmap_pages) { 681 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 682 size_t min_sz; 683 684 if (opts->auxtrace_snapshot_mode) 685 min_sz = KiB(4); 686 else 687 min_sz = KiB(8); 688 689 if (sz < min_sz || !is_power_of_2(sz)) { 690 pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", 691 min_sz / 1024); 692 return -EINVAL; 693 } 694 } 695 696 intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); 697 698 if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) 699 have_timing_info = true; 700 else 701 have_timing_info = false; 702 703 /* 704 * Per-cpu recording needs sched_switch events to distinguish different 705 * threads. 706 */ 707 if (have_timing_info && !perf_cpu_map__empty(cpus)) { 708 if (perf_can_record_switch_events()) { 709 bool cpu_wide = !target__none(&opts->target) && 710 !target__has_task(&opts->target); 711 712 if (!cpu_wide && perf_can_record_cpu_wide()) { 713 struct evsel *switch_evsel; 714 715 err = parse_events(evlist, "dummy:u", NULL); 716 if (err) 717 return err; 718 719 switch_evsel = perf_evlist__last(evlist); 720 721 switch_evsel->core.attr.freq = 0; 722 switch_evsel->core.attr.sample_period = 1; 723 switch_evsel->core.attr.context_switch = 1; 724 725 switch_evsel->system_wide = true; 726 switch_evsel->no_aux_samples = true; 727 switch_evsel->immediate = true; 728 729 perf_evsel__set_sample_bit(switch_evsel, TID); 730 perf_evsel__set_sample_bit(switch_evsel, TIME); 731 perf_evsel__set_sample_bit(switch_evsel, CPU); 732 perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); 733 734 opts->record_switch_events = false; 735 ptr->have_sched_switch = 3; 736 } else { 737 opts->record_switch_events = true; 738 need_immediate = true; 739 if (cpu_wide) 740 ptr->have_sched_switch = 3; 741 else 742 ptr->have_sched_switch = 2; 743 } 744 } else { 745 err = intel_pt_track_switches(evlist); 746 if (err == -EPERM) 747 pr_debug2("Unable to select sched:sched_switch\n"); 748 else if (err) 749 return err; 750 else 751 ptr->have_sched_switch = 1; 752 } 753 } 754 755 if (intel_pt_evsel) { 756 /* 757 * To obtain the auxtrace buffer file descriptor, the auxtrace 758 * event must come first. 759 */ 760 perf_evlist__to_front(evlist, intel_pt_evsel); 761 /* 762 * In the case of per-cpu mmaps, we need the CPU on the 763 * AUX event. 764 */ 765 if (!perf_cpu_map__empty(cpus)) 766 perf_evsel__set_sample_bit(intel_pt_evsel, CPU); 767 } 768 769 /* Add dummy event to keep tracking */ 770 if (opts->full_auxtrace) { 771 struct evsel *tracking_evsel; 772 773 err = parse_events(evlist, "dummy:u", NULL); 774 if (err) 775 return err; 776 777 tracking_evsel = perf_evlist__last(evlist); 778 779 perf_evlist__set_tracking_event(evlist, tracking_evsel); 780 781 tracking_evsel->core.attr.freq = 0; 782 tracking_evsel->core.attr.sample_period = 1; 783 784 tracking_evsel->no_aux_samples = true; 785 if (need_immediate) 786 tracking_evsel->immediate = true; 787 788 /* In per-cpu case, always need the time of mmap events etc */ 789 if (!perf_cpu_map__empty(cpus)) { 790 perf_evsel__set_sample_bit(tracking_evsel, TIME); 791 /* And the CPU for switch events */ 792 perf_evsel__set_sample_bit(tracking_evsel, CPU); 793 } 794 perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); 795 } 796 797 /* 798 * Warn the user when we do not have enough information to decode i.e. 799 * per-cpu with no sched_switch (except workload-only). 800 */ 801 if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && 802 !target__none(&opts->target)) 803 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); 804 805 return 0; 806 } 807 808 static int intel_pt_snapshot_start(struct auxtrace_record *itr) 809 { 810 struct intel_pt_recording *ptr = 811 container_of(itr, struct intel_pt_recording, itr); 812 struct evsel *evsel; 813 814 evlist__for_each_entry(ptr->evlist, evsel) { 815 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 816 return evsel__disable(evsel); 817 } 818 return -EINVAL; 819 } 820 821 static int intel_pt_snapshot_finish(struct auxtrace_record *itr) 822 { 823 struct intel_pt_recording *ptr = 824 container_of(itr, struct intel_pt_recording, itr); 825 struct evsel *evsel; 826 827 evlist__for_each_entry(ptr->evlist, evsel) { 828 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 829 return evsel__enable(evsel); 830 } 831 return -EINVAL; 832 } 833 834 static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) 835 { 836 const size_t sz = sizeof(struct intel_pt_snapshot_ref); 837 int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; 838 struct intel_pt_snapshot_ref *refs; 839 840 if (!new_cnt) 841 new_cnt = 16; 842 843 while (new_cnt <= idx) 844 new_cnt *= 2; 845 846 refs = calloc(new_cnt, sz); 847 if (!refs) 848 return -ENOMEM; 849 850 memcpy(refs, ptr->snapshot_refs, cnt * sz); 851 852 ptr->snapshot_refs = refs; 853 ptr->snapshot_ref_cnt = new_cnt; 854 855 return 0; 856 } 857 858 static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) 859 { 860 int i; 861 862 for (i = 0; i < ptr->snapshot_ref_cnt; i++) 863 zfree(&ptr->snapshot_refs[i].ref_buf); 864 zfree(&ptr->snapshot_refs); 865 } 866 867 static void intel_pt_recording_free(struct auxtrace_record *itr) 868 { 869 struct intel_pt_recording *ptr = 870 container_of(itr, struct intel_pt_recording, itr); 871 872 intel_pt_free_snapshot_refs(ptr); 873 free(ptr); 874 } 875 876 static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, 877 size_t snapshot_buf_size) 878 { 879 size_t ref_buf_size = ptr->snapshot_ref_buf_size; 880 void *ref_buf; 881 882 ref_buf = zalloc(ref_buf_size); 883 if (!ref_buf) 884 return -ENOMEM; 885 886 ptr->snapshot_refs[idx].ref_buf = ref_buf; 887 ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; 888 889 return 0; 890 } 891 892 static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, 893 size_t snapshot_buf_size) 894 { 895 const size_t max_size = 256 * 1024; 896 size_t buf_size = 0, psb_period; 897 898 if (ptr->snapshot_size <= 64 * 1024) 899 return 0; 900 901 psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); 902 if (psb_period) 903 buf_size = psb_period * 2; 904 905 if (!buf_size || buf_size > max_size) 906 buf_size = max_size; 907 908 if (buf_size >= snapshot_buf_size) 909 return 0; 910 911 if (buf_size >= ptr->snapshot_size / 2) 912 return 0; 913 914 return buf_size; 915 } 916 917 static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, 918 size_t snapshot_buf_size) 919 { 920 if (ptr->snapshot_init_done) 921 return 0; 922 923 ptr->snapshot_init_done = true; 924 925 ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, 926 snapshot_buf_size); 927 928 return 0; 929 } 930 931 /** 932 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. 933 * @buf1: first buffer 934 * @compare_size: number of bytes to compare 935 * @buf2: second buffer (a circular buffer) 936 * @offs2: offset in second buffer 937 * @buf2_size: size of second buffer 938 * 939 * The comparison allows for the possibility that the bytes to compare in the 940 * circular buffer are not contiguous. It is assumed that @compare_size <= 941 * @buf2_size. This function returns %false if the bytes are identical, %true 942 * otherwise. 943 */ 944 static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, 945 void *buf2, size_t offs2, size_t buf2_size) 946 { 947 size_t end2 = offs2 + compare_size, part_size; 948 949 if (end2 <= buf2_size) 950 return memcmp(buf1, buf2 + offs2, compare_size); 951 952 part_size = end2 - buf2_size; 953 if (memcmp(buf1, buf2 + offs2, part_size)) 954 return true; 955 956 compare_size -= part_size; 957 958 return memcmp(buf1 + part_size, buf2, compare_size); 959 } 960 961 static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, 962 size_t ref_size, size_t buf_size, 963 void *data, size_t head) 964 { 965 size_t ref_end = ref_offset + ref_size; 966 967 if (ref_end > buf_size) { 968 if (head > ref_offset || head < ref_end - buf_size) 969 return true; 970 } else if (head > ref_offset && head < ref_end) { 971 return true; 972 } 973 974 return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, 975 buf_size); 976 } 977 978 static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, 979 void *data, size_t head) 980 { 981 if (head >= ref_size) { 982 memcpy(ref_buf, data + head - ref_size, ref_size); 983 } else { 984 memcpy(ref_buf, data, head); 985 ref_size -= head; 986 memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); 987 } 988 } 989 990 static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, 991 struct auxtrace_mmap *mm, unsigned char *data, 992 u64 head) 993 { 994 struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; 995 bool wrapped; 996 997 wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, 998 ptr->snapshot_ref_buf_size, mm->len, 999 data, head); 1000 1001 intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, 1002 data, head); 1003 1004 return wrapped; 1005 } 1006 1007 static bool intel_pt_first_wrap(u64 *data, size_t buf_size) 1008 { 1009 int i, a, b; 1010 1011 b = buf_size >> 3; 1012 a = b - 512; 1013 if (a < 0) 1014 a = 0; 1015 1016 for (i = a; i < b; i++) { 1017 if (data[i]) 1018 return true; 1019 } 1020 1021 return false; 1022 } 1023 1024 static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, 1025 struct auxtrace_mmap *mm, unsigned char *data, 1026 u64 *head, u64 *old) 1027 { 1028 struct intel_pt_recording *ptr = 1029 container_of(itr, struct intel_pt_recording, itr); 1030 bool wrapped; 1031 int err; 1032 1033 pr_debug3("%s: mmap index %d old head %zu new head %zu\n", 1034 __func__, idx, (size_t)*old, (size_t)*head); 1035 1036 err = intel_pt_snapshot_init(ptr, mm->len); 1037 if (err) 1038 goto out_err; 1039 1040 if (idx >= ptr->snapshot_ref_cnt) { 1041 err = intel_pt_alloc_snapshot_refs(ptr, idx); 1042 if (err) 1043 goto out_err; 1044 } 1045 1046 if (ptr->snapshot_ref_buf_size) { 1047 if (!ptr->snapshot_refs[idx].ref_buf) { 1048 err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); 1049 if (err) 1050 goto out_err; 1051 } 1052 wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); 1053 } else { 1054 wrapped = ptr->snapshot_refs[idx].wrapped; 1055 if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { 1056 ptr->snapshot_refs[idx].wrapped = true; 1057 wrapped = true; 1058 } 1059 } 1060 1061 /* 1062 * In full trace mode 'head' continually increases. However in snapshot 1063 * mode 'head' is an offset within the buffer. Here 'old' and 'head' 1064 * are adjusted to match the full trace case which expects that 'old' is 1065 * always less than 'head'. 1066 */ 1067 if (wrapped) { 1068 *old = *head; 1069 *head += mm->len; 1070 } else { 1071 if (mm->mask) 1072 *old &= mm->mask; 1073 else 1074 *old %= mm->len; 1075 if (*old > *head) 1076 *head += mm->len; 1077 } 1078 1079 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", 1080 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); 1081 1082 return 0; 1083 1084 out_err: 1085 pr_err("%s: failed, error %d\n", __func__, err); 1086 return err; 1087 } 1088 1089 static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) 1090 { 1091 return rdtsc(); 1092 } 1093 1094 static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) 1095 { 1096 struct intel_pt_recording *ptr = 1097 container_of(itr, struct intel_pt_recording, itr); 1098 struct evsel *evsel; 1099 1100 evlist__for_each_entry(ptr->evlist, evsel) { 1101 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 1102 return perf_evlist__enable_event_idx(ptr->evlist, evsel, 1103 idx); 1104 } 1105 return -EINVAL; 1106 } 1107 1108 struct auxtrace_record *intel_pt_recording_init(int *err) 1109 { 1110 struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); 1111 struct intel_pt_recording *ptr; 1112 1113 if (!intel_pt_pmu) 1114 return NULL; 1115 1116 if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { 1117 *err = -errno; 1118 return NULL; 1119 } 1120 1121 ptr = zalloc(sizeof(struct intel_pt_recording)); 1122 if (!ptr) { 1123 *err = -ENOMEM; 1124 return NULL; 1125 } 1126 1127 ptr->intel_pt_pmu = intel_pt_pmu; 1128 ptr->itr.recording_options = intel_pt_recording_options; 1129 ptr->itr.info_priv_size = intel_pt_info_priv_size; 1130 ptr->itr.info_fill = intel_pt_info_fill; 1131 ptr->itr.free = intel_pt_recording_free; 1132 ptr->itr.snapshot_start = intel_pt_snapshot_start; 1133 ptr->itr.snapshot_finish = intel_pt_snapshot_finish; 1134 ptr->itr.find_snapshot = intel_pt_find_snapshot; 1135 ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; 1136 ptr->itr.reference = intel_pt_reference; 1137 ptr->itr.read_finish = intel_pt_read_finish; 1138 return &ptr->itr; 1139 } 1140