1 /* 2 * intel-bts.c: Intel Processor Trace support 3 * Copyright (c) 2013-2015, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 */ 15 16 #include <linux/kernel.h> 17 #include <linux/types.h> 18 #include <linux/bitops.h> 19 #include <linux/log2.h> 20 21 #include "../../util/cpumap.h" 22 #include "../../util/evsel.h" 23 #include "../../util/evlist.h" 24 #include "../../util/session.h" 25 #include "../../util/util.h" 26 #include "../../util/pmu.h" 27 #include "../../util/debug.h" 28 #include "../../util/tsc.h" 29 #include "../../util/auxtrace.h" 30 #include "../../util/intel-bts.h" 31 32 #define KiB(x) ((x) * 1024) 33 #define MiB(x) ((x) * 1024 * 1024) 34 #define KiB_MASK(x) (KiB(x) - 1) 35 #define MiB_MASK(x) (MiB(x) - 1) 36 37 #define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) 38 39 #define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) 40 41 struct intel_bts_snapshot_ref { 42 void *ref_buf; 43 size_t ref_offset; 44 bool wrapped; 45 }; 46 47 struct intel_bts_recording { 48 struct auxtrace_record itr; 49 struct perf_pmu *intel_bts_pmu; 50 struct perf_evlist *evlist; 51 bool snapshot_mode; 52 size_t snapshot_size; 53 int snapshot_ref_cnt; 54 struct intel_bts_snapshot_ref *snapshot_refs; 55 }; 56 57 struct branch { 58 u64 from; 59 u64 to; 60 u64 misc; 61 }; 62 63 static size_t 64 intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused, 65 struct perf_evlist *evlist __maybe_unused) 66 { 67 return INTEL_BTS_AUXTRACE_PRIV_SIZE; 68 } 69 70 static int intel_bts_info_fill(struct auxtrace_record *itr, 71 struct perf_session *session, 72 struct auxtrace_info_event *auxtrace_info, 73 size_t priv_size) 74 { 75 struct intel_bts_recording *btsr = 76 container_of(itr, struct intel_bts_recording, itr); 77 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; 78 struct perf_event_mmap_page *pc; 79 struct perf_tsc_conversion tc = { .time_mult = 0, }; 80 bool cap_user_time_zero = false; 81 int err; 82 83 if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) 84 return -EINVAL; 85 86 if (!session->evlist->nr_mmaps) 87 return -EINVAL; 88 89 pc = session->evlist->mmap[0].base; 90 if (pc) { 91 err = perf_read_tsc_conversion(pc, &tc); 92 if (err) { 93 if (err != -EOPNOTSUPP) 94 return err; 95 } else { 96 cap_user_time_zero = tc.time_mult != 0; 97 } 98 if (!cap_user_time_zero) 99 ui__warning("Intel BTS: TSC not available\n"); 100 } 101 102 auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS; 103 auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type; 104 auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift; 105 auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult; 106 auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero; 107 auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero; 108 auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode; 109 110 return 0; 111 } 112 113 static int intel_bts_recording_options(struct auxtrace_record *itr, 114 struct perf_evlist *evlist, 115 struct record_opts *opts) 116 { 117 struct intel_bts_recording *btsr = 118 container_of(itr, struct intel_bts_recording, itr); 119 struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; 120 struct perf_evsel *evsel, *intel_bts_evsel = NULL; 121 const struct cpu_map *cpus = evlist->cpus; 122 bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; 123 124 btsr->evlist = evlist; 125 btsr->snapshot_mode = opts->auxtrace_snapshot_mode; 126 127 evlist__for_each(evlist, evsel) { 128 if (evsel->attr.type == intel_bts_pmu->type) { 129 if (intel_bts_evsel) { 130 pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); 131 return -EINVAL; 132 } 133 evsel->attr.freq = 0; 134 evsel->attr.sample_period = 1; 135 intel_bts_evsel = evsel; 136 opts->full_auxtrace = true; 137 } 138 } 139 140 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { 141 pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n"); 142 return -EINVAL; 143 } 144 145 if (!opts->full_auxtrace) 146 return 0; 147 148 if (opts->full_auxtrace && !cpu_map__empty(cpus)) { 149 pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); 150 return -EINVAL; 151 } 152 153 /* Set default sizes for snapshot mode */ 154 if (opts->auxtrace_snapshot_mode) { 155 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 156 if (privileged) { 157 opts->auxtrace_mmap_pages = MiB(4) / page_size; 158 } else { 159 opts->auxtrace_mmap_pages = KiB(128) / page_size; 160 if (opts->mmap_pages == UINT_MAX) 161 opts->mmap_pages = KiB(256) / page_size; 162 } 163 } else if (!opts->auxtrace_mmap_pages && !privileged && 164 opts->mmap_pages == UINT_MAX) { 165 opts->mmap_pages = KiB(256) / page_size; 166 } 167 if (!opts->auxtrace_snapshot_size) 168 opts->auxtrace_snapshot_size = 169 opts->auxtrace_mmap_pages * (size_t)page_size; 170 if (!opts->auxtrace_mmap_pages) { 171 size_t sz = opts->auxtrace_snapshot_size; 172 173 sz = round_up(sz, page_size) / page_size; 174 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 175 } 176 if (opts->auxtrace_snapshot_size > 177 opts->auxtrace_mmap_pages * (size_t)page_size) { 178 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 179 opts->auxtrace_snapshot_size, 180 opts->auxtrace_mmap_pages * (size_t)page_size); 181 return -EINVAL; 182 } 183 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 184 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 185 return -EINVAL; 186 } 187 pr_debug2("Intel BTS snapshot size: %zu\n", 188 opts->auxtrace_snapshot_size); 189 } 190 191 /* Set default sizes for full trace mode */ 192 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { 193 if (privileged) { 194 opts->auxtrace_mmap_pages = MiB(4) / page_size; 195 } else { 196 opts->auxtrace_mmap_pages = KiB(128) / page_size; 197 if (opts->mmap_pages == UINT_MAX) 198 opts->mmap_pages = KiB(256) / page_size; 199 } 200 } 201 202 /* Validate auxtrace_mmap_pages */ 203 if (opts->auxtrace_mmap_pages) { 204 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 205 size_t min_sz; 206 207 if (opts->auxtrace_snapshot_mode) 208 min_sz = KiB(4); 209 else 210 min_sz = KiB(8); 211 212 if (sz < min_sz || !is_power_of_2(sz)) { 213 pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n", 214 min_sz / 1024); 215 return -EINVAL; 216 } 217 } 218 219 if (intel_bts_evsel) { 220 /* 221 * To obtain the auxtrace buffer file descriptor, the auxtrace event 222 * must come first. 223 */ 224 perf_evlist__to_front(evlist, intel_bts_evsel); 225 /* 226 * In the case of per-cpu mmaps, we need the CPU on the 227 * AUX event. 228 */ 229 if (!cpu_map__empty(cpus)) 230 perf_evsel__set_sample_bit(intel_bts_evsel, CPU); 231 } 232 233 /* Add dummy event to keep tracking */ 234 if (opts->full_auxtrace) { 235 struct perf_evsel *tracking_evsel; 236 int err; 237 238 err = parse_events(evlist, "dummy:u", NULL); 239 if (err) 240 return err; 241 242 tracking_evsel = perf_evlist__last(evlist); 243 244 perf_evlist__set_tracking_event(evlist, tracking_evsel); 245 246 tracking_evsel->attr.freq = 0; 247 tracking_evsel->attr.sample_period = 1; 248 } 249 250 return 0; 251 } 252 253 static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr, 254 struct record_opts *opts, 255 const char *str) 256 { 257 struct intel_bts_recording *btsr = 258 container_of(itr, struct intel_bts_recording, itr); 259 unsigned long long snapshot_size = 0; 260 char *endptr; 261 262 if (str) { 263 snapshot_size = strtoull(str, &endptr, 0); 264 if (*endptr || snapshot_size > SIZE_MAX) 265 return -1; 266 } 267 268 opts->auxtrace_snapshot_mode = true; 269 opts->auxtrace_snapshot_size = snapshot_size; 270 271 btsr->snapshot_size = snapshot_size; 272 273 return 0; 274 } 275 276 static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused) 277 { 278 return rdtsc(); 279 } 280 281 static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr, 282 int idx) 283 { 284 const size_t sz = sizeof(struct intel_bts_snapshot_ref); 285 int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2; 286 struct intel_bts_snapshot_ref *refs; 287 288 if (!new_cnt) 289 new_cnt = 16; 290 291 while (new_cnt <= idx) 292 new_cnt *= 2; 293 294 refs = calloc(new_cnt, sz); 295 if (!refs) 296 return -ENOMEM; 297 298 memcpy(refs, btsr->snapshot_refs, cnt * sz); 299 300 btsr->snapshot_refs = refs; 301 btsr->snapshot_ref_cnt = new_cnt; 302 303 return 0; 304 } 305 306 static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr) 307 { 308 int i; 309 310 for (i = 0; i < btsr->snapshot_ref_cnt; i++) 311 zfree(&btsr->snapshot_refs[i].ref_buf); 312 zfree(&btsr->snapshot_refs); 313 } 314 315 static void intel_bts_recording_free(struct auxtrace_record *itr) 316 { 317 struct intel_bts_recording *btsr = 318 container_of(itr, struct intel_bts_recording, itr); 319 320 intel_bts_free_snapshot_refs(btsr); 321 free(btsr); 322 } 323 324 static int intel_bts_snapshot_start(struct auxtrace_record *itr) 325 { 326 struct intel_bts_recording *btsr = 327 container_of(itr, struct intel_bts_recording, itr); 328 struct perf_evsel *evsel; 329 330 evlist__for_each(btsr->evlist, evsel) { 331 if (evsel->attr.type == btsr->intel_bts_pmu->type) 332 return perf_evsel__disable(evsel); 333 } 334 return -EINVAL; 335 } 336 337 static int intel_bts_snapshot_finish(struct auxtrace_record *itr) 338 { 339 struct intel_bts_recording *btsr = 340 container_of(itr, struct intel_bts_recording, itr); 341 struct perf_evsel *evsel; 342 343 evlist__for_each(btsr->evlist, evsel) { 344 if (evsel->attr.type == btsr->intel_bts_pmu->type) 345 return perf_evsel__enable(evsel); 346 } 347 return -EINVAL; 348 } 349 350 static bool intel_bts_first_wrap(u64 *data, size_t buf_size) 351 { 352 int i, a, b; 353 354 b = buf_size >> 3; 355 a = b - 512; 356 if (a < 0) 357 a = 0; 358 359 for (i = a; i < b; i++) { 360 if (data[i]) 361 return true; 362 } 363 364 return false; 365 } 366 367 static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx, 368 struct auxtrace_mmap *mm, unsigned char *data, 369 u64 *head, u64 *old) 370 { 371 struct intel_bts_recording *btsr = 372 container_of(itr, struct intel_bts_recording, itr); 373 bool wrapped; 374 int err; 375 376 pr_debug3("%s: mmap index %d old head %zu new head %zu\n", 377 __func__, idx, (size_t)*old, (size_t)*head); 378 379 if (idx >= btsr->snapshot_ref_cnt) { 380 err = intel_bts_alloc_snapshot_refs(btsr, idx); 381 if (err) 382 goto out_err; 383 } 384 385 wrapped = btsr->snapshot_refs[idx].wrapped; 386 if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) { 387 btsr->snapshot_refs[idx].wrapped = true; 388 wrapped = true; 389 } 390 391 /* 392 * In full trace mode 'head' continually increases. However in snapshot 393 * mode 'head' is an offset within the buffer. Here 'old' and 'head' 394 * are adjusted to match the full trace case which expects that 'old' is 395 * always less than 'head'. 396 */ 397 if (wrapped) { 398 *old = *head; 399 *head += mm->len; 400 } else { 401 if (mm->mask) 402 *old &= mm->mask; 403 else 404 *old %= mm->len; 405 if (*old > *head) 406 *head += mm->len; 407 } 408 409 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", 410 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); 411 412 return 0; 413 414 out_err: 415 pr_err("%s: failed, error %d\n", __func__, err); 416 return err; 417 } 418 419 static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) 420 { 421 struct intel_bts_recording *btsr = 422 container_of(itr, struct intel_bts_recording, itr); 423 struct perf_evsel *evsel; 424 425 evlist__for_each(btsr->evlist, evsel) { 426 if (evsel->attr.type == btsr->intel_bts_pmu->type) 427 return perf_evlist__enable_event_idx(btsr->evlist, 428 evsel, idx); 429 } 430 return -EINVAL; 431 } 432 433 struct auxtrace_record *intel_bts_recording_init(int *err) 434 { 435 struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); 436 struct intel_bts_recording *btsr; 437 438 if (!intel_bts_pmu) 439 return NULL; 440 441 if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { 442 *err = -errno; 443 return NULL; 444 } 445 446 btsr = zalloc(sizeof(struct intel_bts_recording)); 447 if (!btsr) { 448 *err = -ENOMEM; 449 return NULL; 450 } 451 452 btsr->intel_bts_pmu = intel_bts_pmu; 453 btsr->itr.recording_options = intel_bts_recording_options; 454 btsr->itr.info_priv_size = intel_bts_info_priv_size; 455 btsr->itr.info_fill = intel_bts_info_fill; 456 btsr->itr.free = intel_bts_recording_free; 457 btsr->itr.snapshot_start = intel_bts_snapshot_start; 458 btsr->itr.snapshot_finish = intel_bts_snapshot_finish; 459 btsr->itr.find_snapshot = intel_bts_find_snapshot; 460 btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; 461 btsr->itr.reference = intel_bts_reference; 462 btsr->itr.read_finish = intel_bts_read_finish; 463 btsr->itr.alignment = sizeof(struct branch); 464 return &btsr->itr; 465 } 466