1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #define MAX_TIMESTAMP (~0ULL) 38 39 struct arm_spe { 40 struct auxtrace auxtrace; 41 struct auxtrace_queues queues; 42 struct auxtrace_heap heap; 43 struct itrace_synth_opts synth_opts; 44 u32 auxtrace_type; 45 struct perf_session *session; 46 struct machine *machine; 47 u32 pmu_type; 48 49 struct perf_tsc_conversion tc; 50 51 u8 timeless_decoding; 52 u8 data_queued; 53 54 u8 sample_flc; 55 u8 sample_llc; 56 u8 sample_tlb; 57 u8 sample_branch; 58 u8 sample_remote_access; 59 u8 sample_memory; 60 61 u64 l1d_miss_id; 62 u64 l1d_access_id; 63 u64 llc_miss_id; 64 u64 llc_access_id; 65 u64 tlb_miss_id; 66 u64 tlb_access_id; 67 u64 branch_miss_id; 68 u64 remote_access_id; 69 u64 memory_id; 70 71 u64 kernel_start; 72 73 unsigned long num_events; 74 }; 75 76 struct arm_spe_queue { 77 struct arm_spe *spe; 78 unsigned int queue_nr; 79 struct auxtrace_buffer *buffer; 80 struct auxtrace_buffer *old_buffer; 81 union perf_event *event_buf; 82 bool on_heap; 83 bool done; 84 pid_t pid; 85 pid_t tid; 86 int cpu; 87 struct arm_spe_decoder *decoder; 88 u64 time; 89 u64 timestamp; 90 struct thread *thread; 91 }; 92 93 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 94 unsigned char *buf, size_t len) 95 { 96 struct arm_spe_pkt packet; 97 size_t pos = 0; 98 int ret, pkt_len, i; 99 char desc[ARM_SPE_PKT_DESC_MAX]; 100 const char *color = PERF_COLOR_BLUE; 101 102 color_fprintf(stdout, color, 103 ". ... ARM SPE data: size %zu bytes\n", 104 len); 105 106 while (len) { 107 ret = arm_spe_get_packet(buf, len, &packet); 108 if (ret > 0) 109 pkt_len = ret; 110 else 111 pkt_len = 1; 112 printf("."); 113 color_fprintf(stdout, color, " %08x: ", pos); 114 for (i = 0; i < pkt_len; i++) 115 color_fprintf(stdout, color, " %02x", buf[i]); 116 for (; i < 16; i++) 117 color_fprintf(stdout, color, " "); 118 if (ret > 0) { 119 ret = arm_spe_pkt_desc(&packet, desc, 120 ARM_SPE_PKT_DESC_MAX); 121 if (!ret) 122 color_fprintf(stdout, color, " %s\n", desc); 123 } else { 124 color_fprintf(stdout, color, " Bad packet!\n"); 125 } 126 pos += pkt_len; 127 buf += pkt_len; 128 len -= pkt_len; 129 } 130 } 131 132 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 133 size_t len) 134 { 135 printf(".\n"); 136 arm_spe_dump(spe, buf, len); 137 } 138 139 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 140 { 141 struct arm_spe_queue *speq = data; 142 struct auxtrace_buffer *buffer = speq->buffer; 143 struct auxtrace_buffer *old_buffer = speq->old_buffer; 144 struct auxtrace_queue *queue; 145 146 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 147 148 buffer = auxtrace_buffer__next(queue, buffer); 149 /* If no more data, drop the previous auxtrace_buffer and return */ 150 if (!buffer) { 151 if (old_buffer) 152 auxtrace_buffer__drop_data(old_buffer); 153 b->len = 0; 154 return 0; 155 } 156 157 speq->buffer = buffer; 158 159 /* If the aux_buffer doesn't have data associated, try to load it */ 160 if (!buffer->data) { 161 /* get the file desc associated with the perf data file */ 162 int fd = perf_data__fd(speq->spe->session->data); 163 164 buffer->data = auxtrace_buffer__get_data(buffer, fd); 165 if (!buffer->data) 166 return -ENOMEM; 167 } 168 169 b->len = buffer->size; 170 b->buf = buffer->data; 171 172 if (b->len) { 173 if (old_buffer) 174 auxtrace_buffer__drop_data(old_buffer); 175 speq->old_buffer = buffer; 176 } else { 177 auxtrace_buffer__drop_data(buffer); 178 return arm_spe_get_trace(b, data); 179 } 180 181 return 0; 182 } 183 184 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 185 unsigned int queue_nr) 186 { 187 struct arm_spe_params params = { .get_trace = 0, }; 188 struct arm_spe_queue *speq; 189 190 speq = zalloc(sizeof(*speq)); 191 if (!speq) 192 return NULL; 193 194 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 195 if (!speq->event_buf) 196 goto out_free; 197 198 speq->spe = spe; 199 speq->queue_nr = queue_nr; 200 speq->pid = -1; 201 speq->tid = -1; 202 speq->cpu = -1; 203 204 /* params set */ 205 params.get_trace = arm_spe_get_trace; 206 params.data = speq; 207 208 /* create new decoder */ 209 speq->decoder = arm_spe_decoder_new(¶ms); 210 if (!speq->decoder) 211 goto out_free; 212 213 return speq; 214 215 out_free: 216 zfree(&speq->event_buf); 217 free(speq); 218 219 return NULL; 220 } 221 222 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 223 { 224 return ip >= spe->kernel_start ? 225 PERF_RECORD_MISC_KERNEL : 226 PERF_RECORD_MISC_USER; 227 } 228 229 static void arm_spe_prep_sample(struct arm_spe *spe, 230 struct arm_spe_queue *speq, 231 union perf_event *event, 232 struct perf_sample *sample) 233 { 234 struct arm_spe_record *record = &speq->decoder->record; 235 236 if (!spe->timeless_decoding) 237 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 238 239 sample->ip = record->from_ip; 240 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 241 sample->pid = speq->pid; 242 sample->tid = speq->tid; 243 sample->period = 1; 244 sample->cpu = speq->cpu; 245 246 event->sample.header.type = PERF_RECORD_SAMPLE; 247 event->sample.header.misc = sample->cpumode; 248 event->sample.header.size = sizeof(struct perf_event_header); 249 } 250 251 static inline int 252 arm_spe_deliver_synth_event(struct arm_spe *spe, 253 struct arm_spe_queue *speq __maybe_unused, 254 union perf_event *event, 255 struct perf_sample *sample) 256 { 257 int ret; 258 259 ret = perf_session__deliver_synth_event(spe->session, event, sample); 260 if (ret) 261 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 262 263 return ret; 264 } 265 266 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 267 u64 spe_events_id, u64 data_src) 268 { 269 struct arm_spe *spe = speq->spe; 270 struct arm_spe_record *record = &speq->decoder->record; 271 union perf_event *event = speq->event_buf; 272 struct perf_sample sample = { .ip = 0, }; 273 274 arm_spe_prep_sample(spe, speq, event, &sample); 275 276 sample.id = spe_events_id; 277 sample.stream_id = spe_events_id; 278 sample.addr = record->virt_addr; 279 sample.phys_addr = record->phys_addr; 280 sample.data_src = data_src; 281 282 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 283 } 284 285 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 286 u64 spe_events_id) 287 { 288 struct arm_spe *spe = speq->spe; 289 struct arm_spe_record *record = &speq->decoder->record; 290 union perf_event *event = speq->event_buf; 291 struct perf_sample sample = { .ip = 0, }; 292 293 arm_spe_prep_sample(spe, speq, event, &sample); 294 295 sample.id = spe_events_id; 296 sample.stream_id = spe_events_id; 297 sample.addr = record->to_ip; 298 299 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 300 } 301 302 #define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ 303 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ 304 ARM_SPE_REMOTE_ACCESS) 305 306 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) 307 { 308 if (type & SPE_MEM_TYPE) 309 return true; 310 311 return false; 312 } 313 314 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) 315 { 316 union perf_mem_data_src data_src = { 0 }; 317 318 if (record->op == ARM_SPE_LD) 319 data_src.mem_op = PERF_MEM_OP_LOAD; 320 else 321 data_src.mem_op = PERF_MEM_OP_STORE; 322 323 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 324 data_src.mem_lvl = PERF_MEM_LVL_L3; 325 326 if (record->type & ARM_SPE_LLC_MISS) 327 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 328 else 329 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 330 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 331 data_src.mem_lvl = PERF_MEM_LVL_L1; 332 333 if (record->type & ARM_SPE_L1D_MISS) 334 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 335 else 336 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 337 } 338 339 if (record->type & ARM_SPE_REMOTE_ACCESS) 340 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; 341 342 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 343 data_src.mem_dtlb = PERF_MEM_TLB_WK; 344 345 if (record->type & ARM_SPE_TLB_MISS) 346 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 347 else 348 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 349 } 350 351 return data_src.val; 352 } 353 354 static int arm_spe_sample(struct arm_spe_queue *speq) 355 { 356 const struct arm_spe_record *record = &speq->decoder->record; 357 struct arm_spe *spe = speq->spe; 358 u64 data_src; 359 int err; 360 361 data_src = arm_spe__synth_data_source(record); 362 363 if (spe->sample_flc) { 364 if (record->type & ARM_SPE_L1D_MISS) { 365 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 366 data_src); 367 if (err) 368 return err; 369 } 370 371 if (record->type & ARM_SPE_L1D_ACCESS) { 372 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 373 data_src); 374 if (err) 375 return err; 376 } 377 } 378 379 if (spe->sample_llc) { 380 if (record->type & ARM_SPE_LLC_MISS) { 381 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 382 data_src); 383 if (err) 384 return err; 385 } 386 387 if (record->type & ARM_SPE_LLC_ACCESS) { 388 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 389 data_src); 390 if (err) 391 return err; 392 } 393 } 394 395 if (spe->sample_tlb) { 396 if (record->type & ARM_SPE_TLB_MISS) { 397 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 398 data_src); 399 if (err) 400 return err; 401 } 402 403 if (record->type & ARM_SPE_TLB_ACCESS) { 404 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 405 data_src); 406 if (err) 407 return err; 408 } 409 } 410 411 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 412 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); 413 if (err) 414 return err; 415 } 416 417 if (spe->sample_remote_access && 418 (record->type & ARM_SPE_REMOTE_ACCESS)) { 419 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 420 data_src); 421 if (err) 422 return err; 423 } 424 425 if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { 426 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 427 if (err) 428 return err; 429 } 430 431 return 0; 432 } 433 434 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 435 { 436 struct arm_spe *spe = speq->spe; 437 struct arm_spe_record *record; 438 int ret; 439 440 if (!spe->kernel_start) 441 spe->kernel_start = machine__kernel_start(spe->machine); 442 443 while (1) { 444 /* 445 * The usual logic is firstly to decode the packets, and then 446 * based the record to synthesize sample; but here the flow is 447 * reversed: it calls arm_spe_sample() for synthesizing samples 448 * prior to arm_spe_decode(). 449 * 450 * Two reasons for this code logic: 451 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 452 * has decoded trace data and generated a record, but the record 453 * is left to generate sample until run to here, so it's correct 454 * to synthesize sample for the left record. 455 * 2. After decoding trace data, it needs to compare the record 456 * timestamp with the coming perf event, if the record timestamp 457 * is later than the perf event, it needs bail out and pushs the 458 * record into auxtrace heap, thus the record can be deferred to 459 * synthesize sample until run to here at the next time; so this 460 * can correlate samples between Arm SPE trace data and other 461 * perf events with correct time ordering. 462 */ 463 ret = arm_spe_sample(speq); 464 if (ret) 465 return ret; 466 467 ret = arm_spe_decode(speq->decoder); 468 if (!ret) { 469 pr_debug("No data or all data has been processed.\n"); 470 return 1; 471 } 472 473 /* 474 * Error is detected when decode SPE trace data, continue to 475 * the next trace data and find out more records. 476 */ 477 if (ret < 0) 478 continue; 479 480 record = &speq->decoder->record; 481 482 /* Update timestamp for the last record */ 483 if (record->timestamp > speq->timestamp) 484 speq->timestamp = record->timestamp; 485 486 /* 487 * If the timestamp of the queue is later than timestamp of the 488 * coming perf event, bail out so can allow the perf event to 489 * be processed ahead. 490 */ 491 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 492 *timestamp = speq->timestamp; 493 return 0; 494 } 495 } 496 497 return 0; 498 } 499 500 static int arm_spe__setup_queue(struct arm_spe *spe, 501 struct auxtrace_queue *queue, 502 unsigned int queue_nr) 503 { 504 struct arm_spe_queue *speq = queue->priv; 505 struct arm_spe_record *record; 506 507 if (list_empty(&queue->head) || speq) 508 return 0; 509 510 speq = arm_spe__alloc_queue(spe, queue_nr); 511 512 if (!speq) 513 return -ENOMEM; 514 515 queue->priv = speq; 516 517 if (queue->cpu != -1) 518 speq->cpu = queue->cpu; 519 520 if (!speq->on_heap) { 521 int ret; 522 523 if (spe->timeless_decoding) 524 return 0; 525 526 retry: 527 ret = arm_spe_decode(speq->decoder); 528 529 if (!ret) 530 return 0; 531 532 if (ret < 0) 533 goto retry; 534 535 record = &speq->decoder->record; 536 537 speq->timestamp = record->timestamp; 538 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 539 if (ret) 540 return ret; 541 speq->on_heap = true; 542 } 543 544 return 0; 545 } 546 547 static int arm_spe__setup_queues(struct arm_spe *spe) 548 { 549 unsigned int i; 550 int ret; 551 552 for (i = 0; i < spe->queues.nr_queues; i++) { 553 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 554 if (ret) 555 return ret; 556 } 557 558 return 0; 559 } 560 561 static int arm_spe__update_queues(struct arm_spe *spe) 562 { 563 if (spe->queues.new_data) { 564 spe->queues.new_data = false; 565 return arm_spe__setup_queues(spe); 566 } 567 568 return 0; 569 } 570 571 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 572 { 573 struct evsel *evsel; 574 struct evlist *evlist = spe->session->evlist; 575 bool timeless_decoding = true; 576 577 /* 578 * Circle through the list of event and complain if we find one 579 * with the time bit set. 580 */ 581 evlist__for_each_entry(evlist, evsel) { 582 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 583 timeless_decoding = false; 584 } 585 586 return timeless_decoding; 587 } 588 589 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 590 struct auxtrace_queue *queue) 591 { 592 struct arm_spe_queue *speq = queue->priv; 593 pid_t tid; 594 595 tid = machine__get_current_tid(spe->machine, speq->cpu); 596 if (tid != -1) { 597 speq->tid = tid; 598 thread__zput(speq->thread); 599 } else 600 speq->tid = queue->tid; 601 602 if ((!speq->thread) && (speq->tid != -1)) { 603 speq->thread = machine__find_thread(spe->machine, -1, 604 speq->tid); 605 } 606 607 if (speq->thread) { 608 speq->pid = speq->thread->pid_; 609 if (queue->cpu == -1) 610 speq->cpu = speq->thread->cpu; 611 } 612 } 613 614 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 615 { 616 unsigned int queue_nr; 617 u64 ts; 618 int ret; 619 620 while (1) { 621 struct auxtrace_queue *queue; 622 struct arm_spe_queue *speq; 623 624 if (!spe->heap.heap_cnt) 625 return 0; 626 627 if (spe->heap.heap_array[0].ordinal >= timestamp) 628 return 0; 629 630 queue_nr = spe->heap.heap_array[0].queue_nr; 631 queue = &spe->queues.queue_array[queue_nr]; 632 speq = queue->priv; 633 634 auxtrace_heap__pop(&spe->heap); 635 636 if (spe->heap.heap_cnt) { 637 ts = spe->heap.heap_array[0].ordinal + 1; 638 if (ts > timestamp) 639 ts = timestamp; 640 } else { 641 ts = timestamp; 642 } 643 644 arm_spe_set_pid_tid_cpu(spe, queue); 645 646 ret = arm_spe_run_decoder(speq, &ts); 647 if (ret < 0) { 648 auxtrace_heap__add(&spe->heap, queue_nr, ts); 649 return ret; 650 } 651 652 if (!ret) { 653 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 654 if (ret < 0) 655 return ret; 656 } else { 657 speq->on_heap = false; 658 } 659 } 660 661 return 0; 662 } 663 664 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 665 u64 time_) 666 { 667 struct auxtrace_queues *queues = &spe->queues; 668 unsigned int i; 669 u64 ts = 0; 670 671 for (i = 0; i < queues->nr_queues; i++) { 672 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 673 struct arm_spe_queue *speq = queue->priv; 674 675 if (speq && (tid == -1 || speq->tid == tid)) { 676 speq->time = time_; 677 arm_spe_set_pid_tid_cpu(spe, queue); 678 arm_spe_run_decoder(speq, &ts); 679 } 680 } 681 return 0; 682 } 683 684 static int arm_spe_process_event(struct perf_session *session, 685 union perf_event *event, 686 struct perf_sample *sample, 687 struct perf_tool *tool) 688 { 689 int err = 0; 690 u64 timestamp; 691 struct arm_spe *spe = container_of(session->auxtrace, 692 struct arm_spe, auxtrace); 693 694 if (dump_trace) 695 return 0; 696 697 if (!tool->ordered_events) { 698 pr_err("SPE trace requires ordered events\n"); 699 return -EINVAL; 700 } 701 702 if (sample->time && (sample->time != (u64) -1)) 703 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 704 else 705 timestamp = 0; 706 707 if (timestamp || spe->timeless_decoding) { 708 err = arm_spe__update_queues(spe); 709 if (err) 710 return err; 711 } 712 713 if (spe->timeless_decoding) { 714 if (event->header.type == PERF_RECORD_EXIT) { 715 err = arm_spe_process_timeless_queues(spe, 716 event->fork.tid, 717 sample->time); 718 } 719 } else if (timestamp) { 720 err = arm_spe_process_queues(spe, timestamp); 721 } 722 723 return err; 724 } 725 726 static int arm_spe_process_auxtrace_event(struct perf_session *session, 727 union perf_event *event, 728 struct perf_tool *tool __maybe_unused) 729 { 730 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 731 auxtrace); 732 733 if (!spe->data_queued) { 734 struct auxtrace_buffer *buffer; 735 off_t data_offset; 736 int fd = perf_data__fd(session->data); 737 int err; 738 739 if (perf_data__is_pipe(session->data)) { 740 data_offset = 0; 741 } else { 742 data_offset = lseek(fd, 0, SEEK_CUR); 743 if (data_offset == -1) 744 return -errno; 745 } 746 747 err = auxtrace_queues__add_event(&spe->queues, session, event, 748 data_offset, &buffer); 749 if (err) 750 return err; 751 752 /* Dump here now we have copied a piped trace out of the pipe */ 753 if (dump_trace) { 754 if (auxtrace_buffer__get_data(buffer, fd)) { 755 arm_spe_dump_event(spe, buffer->data, 756 buffer->size); 757 auxtrace_buffer__put_data(buffer); 758 } 759 } 760 } 761 762 return 0; 763 } 764 765 static int arm_spe_flush(struct perf_session *session __maybe_unused, 766 struct perf_tool *tool __maybe_unused) 767 { 768 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 769 auxtrace); 770 int ret; 771 772 if (dump_trace) 773 return 0; 774 775 if (!tool->ordered_events) 776 return -EINVAL; 777 778 ret = arm_spe__update_queues(spe); 779 if (ret < 0) 780 return ret; 781 782 if (spe->timeless_decoding) 783 return arm_spe_process_timeless_queues(spe, -1, 784 MAX_TIMESTAMP - 1); 785 786 return arm_spe_process_queues(spe, MAX_TIMESTAMP); 787 } 788 789 static void arm_spe_free_queue(void *priv) 790 { 791 struct arm_spe_queue *speq = priv; 792 793 if (!speq) 794 return; 795 thread__zput(speq->thread); 796 arm_spe_decoder_free(speq->decoder); 797 zfree(&speq->event_buf); 798 free(speq); 799 } 800 801 static void arm_spe_free_events(struct perf_session *session) 802 { 803 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 804 auxtrace); 805 struct auxtrace_queues *queues = &spe->queues; 806 unsigned int i; 807 808 for (i = 0; i < queues->nr_queues; i++) { 809 arm_spe_free_queue(queues->queue_array[i].priv); 810 queues->queue_array[i].priv = NULL; 811 } 812 auxtrace_queues__free(queues); 813 } 814 815 static void arm_spe_free(struct perf_session *session) 816 { 817 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 818 auxtrace); 819 820 auxtrace_heap__free(&spe->heap); 821 arm_spe_free_events(session); 822 session->auxtrace = NULL; 823 free(spe); 824 } 825 826 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 827 struct evsel *evsel) 828 { 829 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 830 831 return evsel->core.attr.type == spe->pmu_type; 832 } 833 834 static const char * const arm_spe_info_fmts[] = { 835 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", 836 }; 837 838 static void arm_spe_print_info(__u64 *arr) 839 { 840 if (!dump_trace) 841 return; 842 843 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); 844 } 845 846 struct arm_spe_synth { 847 struct perf_tool dummy_tool; 848 struct perf_session *session; 849 }; 850 851 static int arm_spe_event_synth(struct perf_tool *tool, 852 union perf_event *event, 853 struct perf_sample *sample __maybe_unused, 854 struct machine *machine __maybe_unused) 855 { 856 struct arm_spe_synth *arm_spe_synth = 857 container_of(tool, struct arm_spe_synth, dummy_tool); 858 859 return perf_session__deliver_synth_event(arm_spe_synth->session, 860 event, NULL); 861 } 862 863 static int arm_spe_synth_event(struct perf_session *session, 864 struct perf_event_attr *attr, u64 id) 865 { 866 struct arm_spe_synth arm_spe_synth; 867 868 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); 869 arm_spe_synth.session = session; 870 871 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, 872 &id, arm_spe_event_synth); 873 } 874 875 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 876 const char *name) 877 { 878 struct evsel *evsel; 879 880 evlist__for_each_entry(evlist, evsel) { 881 if (evsel->core.id && evsel->core.id[0] == id) { 882 if (evsel->name) 883 zfree(&evsel->name); 884 evsel->name = strdup(name); 885 break; 886 } 887 } 888 } 889 890 static int 891 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 892 { 893 struct evlist *evlist = session->evlist; 894 struct evsel *evsel; 895 struct perf_event_attr attr; 896 bool found = false; 897 u64 id; 898 int err; 899 900 evlist__for_each_entry(evlist, evsel) { 901 if (evsel->core.attr.type == spe->pmu_type) { 902 found = true; 903 break; 904 } 905 } 906 907 if (!found) { 908 pr_debug("No selected events with SPE trace data\n"); 909 return 0; 910 } 911 912 memset(&attr, 0, sizeof(struct perf_event_attr)); 913 attr.size = sizeof(struct perf_event_attr); 914 attr.type = PERF_TYPE_HARDWARE; 915 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 916 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 917 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; 918 if (spe->timeless_decoding) 919 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 920 else 921 attr.sample_type |= PERF_SAMPLE_TIME; 922 923 attr.exclude_user = evsel->core.attr.exclude_user; 924 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 925 attr.exclude_hv = evsel->core.attr.exclude_hv; 926 attr.exclude_host = evsel->core.attr.exclude_host; 927 attr.exclude_guest = evsel->core.attr.exclude_guest; 928 attr.sample_id_all = evsel->core.attr.sample_id_all; 929 attr.read_format = evsel->core.attr.read_format; 930 931 /* create new id val to be a fixed offset from evsel id */ 932 id = evsel->core.id[0] + 1000000000; 933 934 if (!id) 935 id = 1; 936 937 if (spe->synth_opts.flc) { 938 spe->sample_flc = true; 939 940 /* Level 1 data cache miss */ 941 err = arm_spe_synth_event(session, &attr, id); 942 if (err) 943 return err; 944 spe->l1d_miss_id = id; 945 arm_spe_set_event_name(evlist, id, "l1d-miss"); 946 id += 1; 947 948 /* Level 1 data cache access */ 949 err = arm_spe_synth_event(session, &attr, id); 950 if (err) 951 return err; 952 spe->l1d_access_id = id; 953 arm_spe_set_event_name(evlist, id, "l1d-access"); 954 id += 1; 955 } 956 957 if (spe->synth_opts.llc) { 958 spe->sample_llc = true; 959 960 /* Last level cache miss */ 961 err = arm_spe_synth_event(session, &attr, id); 962 if (err) 963 return err; 964 spe->llc_miss_id = id; 965 arm_spe_set_event_name(evlist, id, "llc-miss"); 966 id += 1; 967 968 /* Last level cache access */ 969 err = arm_spe_synth_event(session, &attr, id); 970 if (err) 971 return err; 972 spe->llc_access_id = id; 973 arm_spe_set_event_name(evlist, id, "llc-access"); 974 id += 1; 975 } 976 977 if (spe->synth_opts.tlb) { 978 spe->sample_tlb = true; 979 980 /* TLB miss */ 981 err = arm_spe_synth_event(session, &attr, id); 982 if (err) 983 return err; 984 spe->tlb_miss_id = id; 985 arm_spe_set_event_name(evlist, id, "tlb-miss"); 986 id += 1; 987 988 /* TLB access */ 989 err = arm_spe_synth_event(session, &attr, id); 990 if (err) 991 return err; 992 spe->tlb_access_id = id; 993 arm_spe_set_event_name(evlist, id, "tlb-access"); 994 id += 1; 995 } 996 997 if (spe->synth_opts.branches) { 998 spe->sample_branch = true; 999 1000 /* Branch miss */ 1001 err = arm_spe_synth_event(session, &attr, id); 1002 if (err) 1003 return err; 1004 spe->branch_miss_id = id; 1005 arm_spe_set_event_name(evlist, id, "branch-miss"); 1006 id += 1; 1007 } 1008 1009 if (spe->synth_opts.remote_access) { 1010 spe->sample_remote_access = true; 1011 1012 /* Remote access */ 1013 err = arm_spe_synth_event(session, &attr, id); 1014 if (err) 1015 return err; 1016 spe->remote_access_id = id; 1017 arm_spe_set_event_name(evlist, id, "remote-access"); 1018 id += 1; 1019 } 1020 1021 if (spe->synth_opts.mem) { 1022 spe->sample_memory = true; 1023 1024 err = arm_spe_synth_event(session, &attr, id); 1025 if (err) 1026 return err; 1027 spe->memory_id = id; 1028 arm_spe_set_event_name(evlist, id, "memory"); 1029 } 1030 1031 return 0; 1032 } 1033 1034 int arm_spe_process_auxtrace_info(union perf_event *event, 1035 struct perf_session *session) 1036 { 1037 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1038 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; 1039 struct perf_record_time_conv *tc = &session->time_conv; 1040 struct arm_spe *spe; 1041 int err; 1042 1043 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1044 min_sz) 1045 return -EINVAL; 1046 1047 spe = zalloc(sizeof(struct arm_spe)); 1048 if (!spe) 1049 return -ENOMEM; 1050 1051 err = auxtrace_queues__init(&spe->queues); 1052 if (err) 1053 goto err_free; 1054 1055 spe->session = session; 1056 spe->machine = &session->machines.host; /* No kvm support */ 1057 spe->auxtrace_type = auxtrace_info->type; 1058 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1059 1060 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1061 1062 /* 1063 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1064 * and the parameters for hardware clock are stored in the session 1065 * context. Passes these parameters to the struct perf_tsc_conversion 1066 * in "spe->tc", which is used for later conversion between clock 1067 * counter and timestamp. 1068 * 1069 * For backward compatibility, copies the fields starting from 1070 * "time_cycles" only if they are contained in the event. 1071 */ 1072 spe->tc.time_shift = tc->time_shift; 1073 spe->tc.time_mult = tc->time_mult; 1074 spe->tc.time_zero = tc->time_zero; 1075 1076 if (event_contains(*tc, time_cycles)) { 1077 spe->tc.time_cycles = tc->time_cycles; 1078 spe->tc.time_mask = tc->time_mask; 1079 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1080 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1081 } 1082 1083 spe->auxtrace.process_event = arm_spe_process_event; 1084 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1085 spe->auxtrace.flush_events = arm_spe_flush; 1086 spe->auxtrace.free_events = arm_spe_free_events; 1087 spe->auxtrace.free = arm_spe_free; 1088 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1089 session->auxtrace = &spe->auxtrace; 1090 1091 arm_spe_print_info(&auxtrace_info->priv[0]); 1092 1093 if (dump_trace) 1094 return 0; 1095 1096 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1097 spe->synth_opts = *session->itrace_synth_opts; 1098 else 1099 itrace_synth_opts__set_default(&spe->synth_opts, false); 1100 1101 err = arm_spe_synth_events(spe, session); 1102 if (err) 1103 goto err_free_queues; 1104 1105 err = auxtrace_queues__process_index(&spe->queues, session); 1106 if (err) 1107 goto err_free_queues; 1108 1109 if (spe->queues.populated) 1110 spe->data_queued = true; 1111 1112 return 0; 1113 1114 err_free_queues: 1115 auxtrace_queues__free(&spe->queues); 1116 session->auxtrace = NULL; 1117 err_free: 1118 free(spe); 1119 return err; 1120 } 1121