1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #define MAX_TIMESTAMP (~0ULL) 38 39 struct arm_spe { 40 struct auxtrace auxtrace; 41 struct auxtrace_queues queues; 42 struct auxtrace_heap heap; 43 struct itrace_synth_opts synth_opts; 44 u32 auxtrace_type; 45 struct perf_session *session; 46 struct machine *machine; 47 u32 pmu_type; 48 49 struct perf_tsc_conversion tc; 50 51 u8 timeless_decoding; 52 u8 data_queued; 53 54 u8 sample_flc; 55 u8 sample_llc; 56 u8 sample_tlb; 57 u8 sample_branch; 58 u8 sample_remote_access; 59 u8 sample_memory; 60 61 u64 l1d_miss_id; 62 u64 l1d_access_id; 63 u64 llc_miss_id; 64 u64 llc_access_id; 65 u64 tlb_miss_id; 66 u64 tlb_access_id; 67 u64 branch_miss_id; 68 u64 remote_access_id; 69 u64 memory_id; 70 71 u64 kernel_start; 72 73 unsigned long num_events; 74 u8 use_ctx_pkt_for_pid; 75 }; 76 77 struct arm_spe_queue { 78 struct arm_spe *spe; 79 unsigned int queue_nr; 80 struct auxtrace_buffer *buffer; 81 struct auxtrace_buffer *old_buffer; 82 union perf_event *event_buf; 83 bool on_heap; 84 bool done; 85 pid_t pid; 86 pid_t tid; 87 int cpu; 88 struct arm_spe_decoder *decoder; 89 u64 time; 90 u64 timestamp; 91 struct thread *thread; 92 }; 93 94 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 95 unsigned char *buf, size_t len) 96 { 97 struct arm_spe_pkt packet; 98 size_t pos = 0; 99 int ret, pkt_len, i; 100 char desc[ARM_SPE_PKT_DESC_MAX]; 101 const char *color = PERF_COLOR_BLUE; 102 103 color_fprintf(stdout, color, 104 ". ... ARM SPE data: size %#zx bytes\n", 105 len); 106 107 while (len) { 108 ret = arm_spe_get_packet(buf, len, &packet); 109 if (ret > 0) 110 pkt_len = ret; 111 else 112 pkt_len = 1; 113 printf("."); 114 color_fprintf(stdout, color, " %08x: ", pos); 115 for (i = 0; i < pkt_len; i++) 116 color_fprintf(stdout, color, " %02x", buf[i]); 117 for (; i < 16; i++) 118 color_fprintf(stdout, color, " "); 119 if (ret > 0) { 120 ret = arm_spe_pkt_desc(&packet, desc, 121 ARM_SPE_PKT_DESC_MAX); 122 if (!ret) 123 color_fprintf(stdout, color, " %s\n", desc); 124 } else { 125 color_fprintf(stdout, color, " Bad packet!\n"); 126 } 127 pos += pkt_len; 128 buf += pkt_len; 129 len -= pkt_len; 130 } 131 } 132 133 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 134 size_t len) 135 { 136 printf(".\n"); 137 arm_spe_dump(spe, buf, len); 138 } 139 140 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 141 { 142 struct arm_spe_queue *speq = data; 143 struct auxtrace_buffer *buffer = speq->buffer; 144 struct auxtrace_buffer *old_buffer = speq->old_buffer; 145 struct auxtrace_queue *queue; 146 147 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 148 149 buffer = auxtrace_buffer__next(queue, buffer); 150 /* If no more data, drop the previous auxtrace_buffer and return */ 151 if (!buffer) { 152 if (old_buffer) 153 auxtrace_buffer__drop_data(old_buffer); 154 b->len = 0; 155 return 0; 156 } 157 158 speq->buffer = buffer; 159 160 /* If the aux_buffer doesn't have data associated, try to load it */ 161 if (!buffer->data) { 162 /* get the file desc associated with the perf data file */ 163 int fd = perf_data__fd(speq->spe->session->data); 164 165 buffer->data = auxtrace_buffer__get_data(buffer, fd); 166 if (!buffer->data) 167 return -ENOMEM; 168 } 169 170 b->len = buffer->size; 171 b->buf = buffer->data; 172 173 if (b->len) { 174 if (old_buffer) 175 auxtrace_buffer__drop_data(old_buffer); 176 speq->old_buffer = buffer; 177 } else { 178 auxtrace_buffer__drop_data(buffer); 179 return arm_spe_get_trace(b, data); 180 } 181 182 return 0; 183 } 184 185 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 186 unsigned int queue_nr) 187 { 188 struct arm_spe_params params = { .get_trace = 0, }; 189 struct arm_spe_queue *speq; 190 191 speq = zalloc(sizeof(*speq)); 192 if (!speq) 193 return NULL; 194 195 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 196 if (!speq->event_buf) 197 goto out_free; 198 199 speq->spe = spe; 200 speq->queue_nr = queue_nr; 201 speq->pid = -1; 202 speq->tid = -1; 203 speq->cpu = -1; 204 205 /* params set */ 206 params.get_trace = arm_spe_get_trace; 207 params.data = speq; 208 209 /* create new decoder */ 210 speq->decoder = arm_spe_decoder_new(¶ms); 211 if (!speq->decoder) 212 goto out_free; 213 214 return speq; 215 216 out_free: 217 zfree(&speq->event_buf); 218 free(speq); 219 220 return NULL; 221 } 222 223 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 224 { 225 return ip >= spe->kernel_start ? 226 PERF_RECORD_MISC_KERNEL : 227 PERF_RECORD_MISC_USER; 228 } 229 230 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 231 struct auxtrace_queue *queue) 232 { 233 struct arm_spe_queue *speq = queue->priv; 234 pid_t tid; 235 236 tid = machine__get_current_tid(spe->machine, speq->cpu); 237 if (tid != -1) { 238 speq->tid = tid; 239 thread__zput(speq->thread); 240 } else 241 speq->tid = queue->tid; 242 243 if ((!speq->thread) && (speq->tid != -1)) { 244 speq->thread = machine__find_thread(spe->machine, -1, 245 speq->tid); 246 } 247 248 if (speq->thread) { 249 speq->pid = speq->thread->pid_; 250 if (queue->cpu == -1) 251 speq->cpu = speq->thread->cpu; 252 } 253 } 254 255 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 256 { 257 struct arm_spe *spe = speq->spe; 258 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 259 260 if (err) 261 return err; 262 263 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 264 265 return 0; 266 } 267 268 static void arm_spe_prep_sample(struct arm_spe *spe, 269 struct arm_spe_queue *speq, 270 union perf_event *event, 271 struct perf_sample *sample) 272 { 273 struct arm_spe_record *record = &speq->decoder->record; 274 275 if (!spe->timeless_decoding) 276 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 277 278 sample->ip = record->from_ip; 279 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 280 sample->pid = speq->pid; 281 sample->tid = speq->tid; 282 sample->period = 1; 283 sample->cpu = speq->cpu; 284 285 event->sample.header.type = PERF_RECORD_SAMPLE; 286 event->sample.header.misc = sample->cpumode; 287 event->sample.header.size = sizeof(struct perf_event_header); 288 } 289 290 static inline int 291 arm_spe_deliver_synth_event(struct arm_spe *spe, 292 struct arm_spe_queue *speq __maybe_unused, 293 union perf_event *event, 294 struct perf_sample *sample) 295 { 296 int ret; 297 298 ret = perf_session__deliver_synth_event(spe->session, event, sample); 299 if (ret) 300 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 301 302 return ret; 303 } 304 305 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 306 u64 spe_events_id, u64 data_src) 307 { 308 struct arm_spe *spe = speq->spe; 309 struct arm_spe_record *record = &speq->decoder->record; 310 union perf_event *event = speq->event_buf; 311 struct perf_sample sample = { .ip = 0, }; 312 313 arm_spe_prep_sample(spe, speq, event, &sample); 314 315 sample.id = spe_events_id; 316 sample.stream_id = spe_events_id; 317 sample.addr = record->virt_addr; 318 sample.phys_addr = record->phys_addr; 319 sample.data_src = data_src; 320 321 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 322 } 323 324 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 325 u64 spe_events_id) 326 { 327 struct arm_spe *spe = speq->spe; 328 struct arm_spe_record *record = &speq->decoder->record; 329 union perf_event *event = speq->event_buf; 330 struct perf_sample sample = { .ip = 0, }; 331 332 arm_spe_prep_sample(spe, speq, event, &sample); 333 334 sample.id = spe_events_id; 335 sample.stream_id = spe_events_id; 336 sample.addr = record->to_ip; 337 338 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 339 } 340 341 #define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ 342 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ 343 ARM_SPE_REMOTE_ACCESS) 344 345 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) 346 { 347 if (type & SPE_MEM_TYPE) 348 return true; 349 350 return false; 351 } 352 353 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) 354 { 355 union perf_mem_data_src data_src = { 0 }; 356 357 if (record->op == ARM_SPE_LD) 358 data_src.mem_op = PERF_MEM_OP_LOAD; 359 else 360 data_src.mem_op = PERF_MEM_OP_STORE; 361 362 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 363 data_src.mem_lvl = PERF_MEM_LVL_L3; 364 365 if (record->type & ARM_SPE_LLC_MISS) 366 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 367 else 368 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 369 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 370 data_src.mem_lvl = PERF_MEM_LVL_L1; 371 372 if (record->type & ARM_SPE_L1D_MISS) 373 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 374 else 375 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 376 } 377 378 if (record->type & ARM_SPE_REMOTE_ACCESS) 379 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; 380 381 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 382 data_src.mem_dtlb = PERF_MEM_TLB_WK; 383 384 if (record->type & ARM_SPE_TLB_MISS) 385 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 386 else 387 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 388 } 389 390 return data_src.val; 391 } 392 393 static int arm_spe_sample(struct arm_spe_queue *speq) 394 { 395 const struct arm_spe_record *record = &speq->decoder->record; 396 struct arm_spe *spe = speq->spe; 397 u64 data_src; 398 int err; 399 400 data_src = arm_spe__synth_data_source(record); 401 402 if (spe->sample_flc) { 403 if (record->type & ARM_SPE_L1D_MISS) { 404 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 405 data_src); 406 if (err) 407 return err; 408 } 409 410 if (record->type & ARM_SPE_L1D_ACCESS) { 411 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 412 data_src); 413 if (err) 414 return err; 415 } 416 } 417 418 if (spe->sample_llc) { 419 if (record->type & ARM_SPE_LLC_MISS) { 420 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 421 data_src); 422 if (err) 423 return err; 424 } 425 426 if (record->type & ARM_SPE_LLC_ACCESS) { 427 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 428 data_src); 429 if (err) 430 return err; 431 } 432 } 433 434 if (spe->sample_tlb) { 435 if (record->type & ARM_SPE_TLB_MISS) { 436 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 437 data_src); 438 if (err) 439 return err; 440 } 441 442 if (record->type & ARM_SPE_TLB_ACCESS) { 443 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 444 data_src); 445 if (err) 446 return err; 447 } 448 } 449 450 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 451 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); 452 if (err) 453 return err; 454 } 455 456 if (spe->sample_remote_access && 457 (record->type & ARM_SPE_REMOTE_ACCESS)) { 458 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 459 data_src); 460 if (err) 461 return err; 462 } 463 464 if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { 465 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 466 if (err) 467 return err; 468 } 469 470 return 0; 471 } 472 473 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 474 { 475 struct arm_spe *spe = speq->spe; 476 struct arm_spe_record *record; 477 int ret; 478 479 if (!spe->kernel_start) 480 spe->kernel_start = machine__kernel_start(spe->machine); 481 482 while (1) { 483 /* 484 * The usual logic is firstly to decode the packets, and then 485 * based the record to synthesize sample; but here the flow is 486 * reversed: it calls arm_spe_sample() for synthesizing samples 487 * prior to arm_spe_decode(). 488 * 489 * Two reasons for this code logic: 490 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 491 * has decoded trace data and generated a record, but the record 492 * is left to generate sample until run to here, so it's correct 493 * to synthesize sample for the left record. 494 * 2. After decoding trace data, it needs to compare the record 495 * timestamp with the coming perf event, if the record timestamp 496 * is later than the perf event, it needs bail out and pushs the 497 * record into auxtrace heap, thus the record can be deferred to 498 * synthesize sample until run to here at the next time; so this 499 * can correlate samples between Arm SPE trace data and other 500 * perf events with correct time ordering. 501 */ 502 503 /* 504 * Update pid/tid info. 505 */ 506 record = &speq->decoder->record; 507 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 508 ret = arm_spe_set_tid(speq, record->context_id); 509 if (ret) 510 return ret; 511 512 spe->use_ctx_pkt_for_pid = true; 513 } 514 515 ret = arm_spe_sample(speq); 516 if (ret) 517 return ret; 518 519 ret = arm_spe_decode(speq->decoder); 520 if (!ret) { 521 pr_debug("No data or all data has been processed.\n"); 522 return 1; 523 } 524 525 /* 526 * Error is detected when decode SPE trace data, continue to 527 * the next trace data and find out more records. 528 */ 529 if (ret < 0) 530 continue; 531 532 record = &speq->decoder->record; 533 534 /* Update timestamp for the last record */ 535 if (record->timestamp > speq->timestamp) 536 speq->timestamp = record->timestamp; 537 538 /* 539 * If the timestamp of the queue is later than timestamp of the 540 * coming perf event, bail out so can allow the perf event to 541 * be processed ahead. 542 */ 543 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 544 *timestamp = speq->timestamp; 545 return 0; 546 } 547 } 548 549 return 0; 550 } 551 552 static int arm_spe__setup_queue(struct arm_spe *spe, 553 struct auxtrace_queue *queue, 554 unsigned int queue_nr) 555 { 556 struct arm_spe_queue *speq = queue->priv; 557 struct arm_spe_record *record; 558 559 if (list_empty(&queue->head) || speq) 560 return 0; 561 562 speq = arm_spe__alloc_queue(spe, queue_nr); 563 564 if (!speq) 565 return -ENOMEM; 566 567 queue->priv = speq; 568 569 if (queue->cpu != -1) 570 speq->cpu = queue->cpu; 571 572 if (!speq->on_heap) { 573 int ret; 574 575 if (spe->timeless_decoding) 576 return 0; 577 578 retry: 579 ret = arm_spe_decode(speq->decoder); 580 581 if (!ret) 582 return 0; 583 584 if (ret < 0) 585 goto retry; 586 587 record = &speq->decoder->record; 588 589 speq->timestamp = record->timestamp; 590 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 591 if (ret) 592 return ret; 593 speq->on_heap = true; 594 } 595 596 return 0; 597 } 598 599 static int arm_spe__setup_queues(struct arm_spe *spe) 600 { 601 unsigned int i; 602 int ret; 603 604 for (i = 0; i < spe->queues.nr_queues; i++) { 605 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 606 if (ret) 607 return ret; 608 } 609 610 return 0; 611 } 612 613 static int arm_spe__update_queues(struct arm_spe *spe) 614 { 615 if (spe->queues.new_data) { 616 spe->queues.new_data = false; 617 return arm_spe__setup_queues(spe); 618 } 619 620 return 0; 621 } 622 623 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 624 { 625 struct evsel *evsel; 626 struct evlist *evlist = spe->session->evlist; 627 bool timeless_decoding = true; 628 629 /* 630 * Circle through the list of event and complain if we find one 631 * with the time bit set. 632 */ 633 evlist__for_each_entry(evlist, evsel) { 634 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 635 timeless_decoding = false; 636 } 637 638 return timeless_decoding; 639 } 640 641 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 642 { 643 unsigned int queue_nr; 644 u64 ts; 645 int ret; 646 647 while (1) { 648 struct auxtrace_queue *queue; 649 struct arm_spe_queue *speq; 650 651 if (!spe->heap.heap_cnt) 652 return 0; 653 654 if (spe->heap.heap_array[0].ordinal >= timestamp) 655 return 0; 656 657 queue_nr = spe->heap.heap_array[0].queue_nr; 658 queue = &spe->queues.queue_array[queue_nr]; 659 speq = queue->priv; 660 661 auxtrace_heap__pop(&spe->heap); 662 663 if (spe->heap.heap_cnt) { 664 ts = spe->heap.heap_array[0].ordinal + 1; 665 if (ts > timestamp) 666 ts = timestamp; 667 } else { 668 ts = timestamp; 669 } 670 671 /* 672 * A previous context-switch event has set pid/tid in the machine's context, so 673 * here we need to update the pid/tid in the thread and SPE queue. 674 */ 675 if (!spe->use_ctx_pkt_for_pid) 676 arm_spe_set_pid_tid_cpu(spe, queue); 677 678 ret = arm_spe_run_decoder(speq, &ts); 679 if (ret < 0) { 680 auxtrace_heap__add(&spe->heap, queue_nr, ts); 681 return ret; 682 } 683 684 if (!ret) { 685 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 686 if (ret < 0) 687 return ret; 688 } else { 689 speq->on_heap = false; 690 } 691 } 692 693 return 0; 694 } 695 696 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 697 u64 time_) 698 { 699 struct auxtrace_queues *queues = &spe->queues; 700 unsigned int i; 701 u64 ts = 0; 702 703 for (i = 0; i < queues->nr_queues; i++) { 704 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 705 struct arm_spe_queue *speq = queue->priv; 706 707 if (speq && (tid == -1 || speq->tid == tid)) { 708 speq->time = time_; 709 arm_spe_set_pid_tid_cpu(spe, queue); 710 arm_spe_run_decoder(speq, &ts); 711 } 712 } 713 return 0; 714 } 715 716 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 717 struct perf_sample *sample) 718 { 719 pid_t pid, tid; 720 int cpu; 721 722 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 723 return 0; 724 725 pid = event->context_switch.next_prev_pid; 726 tid = event->context_switch.next_prev_tid; 727 cpu = sample->cpu; 728 729 if (tid == -1) 730 pr_warning("context_switch event has no tid\n"); 731 732 return machine__set_current_tid(spe->machine, cpu, pid, tid); 733 } 734 735 static int arm_spe_process_event(struct perf_session *session, 736 union perf_event *event, 737 struct perf_sample *sample, 738 struct perf_tool *tool) 739 { 740 int err = 0; 741 u64 timestamp; 742 struct arm_spe *spe = container_of(session->auxtrace, 743 struct arm_spe, auxtrace); 744 745 if (dump_trace) 746 return 0; 747 748 if (!tool->ordered_events) { 749 pr_err("SPE trace requires ordered events\n"); 750 return -EINVAL; 751 } 752 753 if (sample->time && (sample->time != (u64) -1)) 754 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 755 else 756 timestamp = 0; 757 758 if (timestamp || spe->timeless_decoding) { 759 err = arm_spe__update_queues(spe); 760 if (err) 761 return err; 762 } 763 764 if (spe->timeless_decoding) { 765 if (event->header.type == PERF_RECORD_EXIT) { 766 err = arm_spe_process_timeless_queues(spe, 767 event->fork.tid, 768 sample->time); 769 } 770 } else if (timestamp) { 771 err = arm_spe_process_queues(spe, timestamp); 772 if (err) 773 return err; 774 775 if (!spe->use_ctx_pkt_for_pid && 776 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 777 event->header.type == PERF_RECORD_SWITCH)) 778 err = arm_spe_context_switch(spe, event, sample); 779 } 780 781 return err; 782 } 783 784 static int arm_spe_process_auxtrace_event(struct perf_session *session, 785 union perf_event *event, 786 struct perf_tool *tool __maybe_unused) 787 { 788 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 789 auxtrace); 790 791 if (!spe->data_queued) { 792 struct auxtrace_buffer *buffer; 793 off_t data_offset; 794 int fd = perf_data__fd(session->data); 795 int err; 796 797 if (perf_data__is_pipe(session->data)) { 798 data_offset = 0; 799 } else { 800 data_offset = lseek(fd, 0, SEEK_CUR); 801 if (data_offset == -1) 802 return -errno; 803 } 804 805 err = auxtrace_queues__add_event(&spe->queues, session, event, 806 data_offset, &buffer); 807 if (err) 808 return err; 809 810 /* Dump here now we have copied a piped trace out of the pipe */ 811 if (dump_trace) { 812 if (auxtrace_buffer__get_data(buffer, fd)) { 813 arm_spe_dump_event(spe, buffer->data, 814 buffer->size); 815 auxtrace_buffer__put_data(buffer); 816 } 817 } 818 } 819 820 return 0; 821 } 822 823 static int arm_spe_flush(struct perf_session *session __maybe_unused, 824 struct perf_tool *tool __maybe_unused) 825 { 826 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 827 auxtrace); 828 int ret; 829 830 if (dump_trace) 831 return 0; 832 833 if (!tool->ordered_events) 834 return -EINVAL; 835 836 ret = arm_spe__update_queues(spe); 837 if (ret < 0) 838 return ret; 839 840 if (spe->timeless_decoding) 841 return arm_spe_process_timeless_queues(spe, -1, 842 MAX_TIMESTAMP - 1); 843 844 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 845 if (ret) 846 return ret; 847 848 if (!spe->use_ctx_pkt_for_pid) 849 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 850 "Matching of TIDs to SPE events could be inaccurate.\n"); 851 852 return 0; 853 } 854 855 static void arm_spe_free_queue(void *priv) 856 { 857 struct arm_spe_queue *speq = priv; 858 859 if (!speq) 860 return; 861 thread__zput(speq->thread); 862 arm_spe_decoder_free(speq->decoder); 863 zfree(&speq->event_buf); 864 free(speq); 865 } 866 867 static void arm_spe_free_events(struct perf_session *session) 868 { 869 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 870 auxtrace); 871 struct auxtrace_queues *queues = &spe->queues; 872 unsigned int i; 873 874 for (i = 0; i < queues->nr_queues; i++) { 875 arm_spe_free_queue(queues->queue_array[i].priv); 876 queues->queue_array[i].priv = NULL; 877 } 878 auxtrace_queues__free(queues); 879 } 880 881 static void arm_spe_free(struct perf_session *session) 882 { 883 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 884 auxtrace); 885 886 auxtrace_heap__free(&spe->heap); 887 arm_spe_free_events(session); 888 session->auxtrace = NULL; 889 free(spe); 890 } 891 892 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 893 struct evsel *evsel) 894 { 895 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 896 897 return evsel->core.attr.type == spe->pmu_type; 898 } 899 900 static const char * const arm_spe_info_fmts[] = { 901 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", 902 }; 903 904 static void arm_spe_print_info(__u64 *arr) 905 { 906 if (!dump_trace) 907 return; 908 909 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); 910 } 911 912 struct arm_spe_synth { 913 struct perf_tool dummy_tool; 914 struct perf_session *session; 915 }; 916 917 static int arm_spe_event_synth(struct perf_tool *tool, 918 union perf_event *event, 919 struct perf_sample *sample __maybe_unused, 920 struct machine *machine __maybe_unused) 921 { 922 struct arm_spe_synth *arm_spe_synth = 923 container_of(tool, struct arm_spe_synth, dummy_tool); 924 925 return perf_session__deliver_synth_event(arm_spe_synth->session, 926 event, NULL); 927 } 928 929 static int arm_spe_synth_event(struct perf_session *session, 930 struct perf_event_attr *attr, u64 id) 931 { 932 struct arm_spe_synth arm_spe_synth; 933 934 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); 935 arm_spe_synth.session = session; 936 937 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, 938 &id, arm_spe_event_synth); 939 } 940 941 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 942 const char *name) 943 { 944 struct evsel *evsel; 945 946 evlist__for_each_entry(evlist, evsel) { 947 if (evsel->core.id && evsel->core.id[0] == id) { 948 if (evsel->name) 949 zfree(&evsel->name); 950 evsel->name = strdup(name); 951 break; 952 } 953 } 954 } 955 956 static int 957 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 958 { 959 struct evlist *evlist = session->evlist; 960 struct evsel *evsel; 961 struct perf_event_attr attr; 962 bool found = false; 963 u64 id; 964 int err; 965 966 evlist__for_each_entry(evlist, evsel) { 967 if (evsel->core.attr.type == spe->pmu_type) { 968 found = true; 969 break; 970 } 971 } 972 973 if (!found) { 974 pr_debug("No selected events with SPE trace data\n"); 975 return 0; 976 } 977 978 memset(&attr, 0, sizeof(struct perf_event_attr)); 979 attr.size = sizeof(struct perf_event_attr); 980 attr.type = PERF_TYPE_HARDWARE; 981 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 982 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 983 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; 984 if (spe->timeless_decoding) 985 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 986 else 987 attr.sample_type |= PERF_SAMPLE_TIME; 988 989 attr.exclude_user = evsel->core.attr.exclude_user; 990 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 991 attr.exclude_hv = evsel->core.attr.exclude_hv; 992 attr.exclude_host = evsel->core.attr.exclude_host; 993 attr.exclude_guest = evsel->core.attr.exclude_guest; 994 attr.sample_id_all = evsel->core.attr.sample_id_all; 995 attr.read_format = evsel->core.attr.read_format; 996 997 /* create new id val to be a fixed offset from evsel id */ 998 id = evsel->core.id[0] + 1000000000; 999 1000 if (!id) 1001 id = 1; 1002 1003 if (spe->synth_opts.flc) { 1004 spe->sample_flc = true; 1005 1006 /* Level 1 data cache miss */ 1007 err = arm_spe_synth_event(session, &attr, id); 1008 if (err) 1009 return err; 1010 spe->l1d_miss_id = id; 1011 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1012 id += 1; 1013 1014 /* Level 1 data cache access */ 1015 err = arm_spe_synth_event(session, &attr, id); 1016 if (err) 1017 return err; 1018 spe->l1d_access_id = id; 1019 arm_spe_set_event_name(evlist, id, "l1d-access"); 1020 id += 1; 1021 } 1022 1023 if (spe->synth_opts.llc) { 1024 spe->sample_llc = true; 1025 1026 /* Last level cache miss */ 1027 err = arm_spe_synth_event(session, &attr, id); 1028 if (err) 1029 return err; 1030 spe->llc_miss_id = id; 1031 arm_spe_set_event_name(evlist, id, "llc-miss"); 1032 id += 1; 1033 1034 /* Last level cache access */ 1035 err = arm_spe_synth_event(session, &attr, id); 1036 if (err) 1037 return err; 1038 spe->llc_access_id = id; 1039 arm_spe_set_event_name(evlist, id, "llc-access"); 1040 id += 1; 1041 } 1042 1043 if (spe->synth_opts.tlb) { 1044 spe->sample_tlb = true; 1045 1046 /* TLB miss */ 1047 err = arm_spe_synth_event(session, &attr, id); 1048 if (err) 1049 return err; 1050 spe->tlb_miss_id = id; 1051 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1052 id += 1; 1053 1054 /* TLB access */ 1055 err = arm_spe_synth_event(session, &attr, id); 1056 if (err) 1057 return err; 1058 spe->tlb_access_id = id; 1059 arm_spe_set_event_name(evlist, id, "tlb-access"); 1060 id += 1; 1061 } 1062 1063 if (spe->synth_opts.branches) { 1064 spe->sample_branch = true; 1065 1066 /* Branch miss */ 1067 err = arm_spe_synth_event(session, &attr, id); 1068 if (err) 1069 return err; 1070 spe->branch_miss_id = id; 1071 arm_spe_set_event_name(evlist, id, "branch-miss"); 1072 id += 1; 1073 } 1074 1075 if (spe->synth_opts.remote_access) { 1076 spe->sample_remote_access = true; 1077 1078 /* Remote access */ 1079 err = arm_spe_synth_event(session, &attr, id); 1080 if (err) 1081 return err; 1082 spe->remote_access_id = id; 1083 arm_spe_set_event_name(evlist, id, "remote-access"); 1084 id += 1; 1085 } 1086 1087 if (spe->synth_opts.mem) { 1088 spe->sample_memory = true; 1089 1090 err = arm_spe_synth_event(session, &attr, id); 1091 if (err) 1092 return err; 1093 spe->memory_id = id; 1094 arm_spe_set_event_name(evlist, id, "memory"); 1095 } 1096 1097 return 0; 1098 } 1099 1100 int arm_spe_process_auxtrace_info(union perf_event *event, 1101 struct perf_session *session) 1102 { 1103 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1104 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; 1105 struct perf_record_time_conv *tc = &session->time_conv; 1106 struct arm_spe *spe; 1107 int err; 1108 1109 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1110 min_sz) 1111 return -EINVAL; 1112 1113 spe = zalloc(sizeof(struct arm_spe)); 1114 if (!spe) 1115 return -ENOMEM; 1116 1117 err = auxtrace_queues__init(&spe->queues); 1118 if (err) 1119 goto err_free; 1120 1121 spe->session = session; 1122 spe->machine = &session->machines.host; /* No kvm support */ 1123 spe->auxtrace_type = auxtrace_info->type; 1124 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1125 1126 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1127 1128 /* 1129 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1130 * and the parameters for hardware clock are stored in the session 1131 * context. Passes these parameters to the struct perf_tsc_conversion 1132 * in "spe->tc", which is used for later conversion between clock 1133 * counter and timestamp. 1134 * 1135 * For backward compatibility, copies the fields starting from 1136 * "time_cycles" only if they are contained in the event. 1137 */ 1138 spe->tc.time_shift = tc->time_shift; 1139 spe->tc.time_mult = tc->time_mult; 1140 spe->tc.time_zero = tc->time_zero; 1141 1142 if (event_contains(*tc, time_cycles)) { 1143 spe->tc.time_cycles = tc->time_cycles; 1144 spe->tc.time_mask = tc->time_mask; 1145 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1146 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1147 } 1148 1149 spe->auxtrace.process_event = arm_spe_process_event; 1150 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1151 spe->auxtrace.flush_events = arm_spe_flush; 1152 spe->auxtrace.free_events = arm_spe_free_events; 1153 spe->auxtrace.free = arm_spe_free; 1154 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1155 session->auxtrace = &spe->auxtrace; 1156 1157 arm_spe_print_info(&auxtrace_info->priv[0]); 1158 1159 if (dump_trace) 1160 return 0; 1161 1162 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1163 spe->synth_opts = *session->itrace_synth_opts; 1164 else 1165 itrace_synth_opts__set_default(&spe->synth_opts, false); 1166 1167 err = arm_spe_synth_events(spe, session); 1168 if (err) 1169 goto err_free_queues; 1170 1171 err = auxtrace_queues__process_index(&spe->queues, session); 1172 if (err) 1173 goto err_free_queues; 1174 1175 if (spe->queues.populated) 1176 spe->data_queued = true; 1177 1178 return 0; 1179 1180 err_free_queues: 1181 auxtrace_queues__free(&spe->queues); 1182 session->auxtrace = NULL; 1183 err_free: 1184 free(spe); 1185 return err; 1186 } 1187