1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #define MAX_TIMESTAMP (~0ULL) 38 39 struct arm_spe { 40 struct auxtrace auxtrace; 41 struct auxtrace_queues queues; 42 struct auxtrace_heap heap; 43 struct itrace_synth_opts synth_opts; 44 u32 auxtrace_type; 45 struct perf_session *session; 46 struct machine *machine; 47 u32 pmu_type; 48 49 struct perf_tsc_conversion tc; 50 51 u8 timeless_decoding; 52 u8 data_queued; 53 54 u64 sample_type; 55 u8 sample_flc; 56 u8 sample_llc; 57 u8 sample_tlb; 58 u8 sample_branch; 59 u8 sample_remote_access; 60 u8 sample_memory; 61 62 u64 l1d_miss_id; 63 u64 l1d_access_id; 64 u64 llc_miss_id; 65 u64 llc_access_id; 66 u64 tlb_miss_id; 67 u64 tlb_access_id; 68 u64 branch_miss_id; 69 u64 remote_access_id; 70 u64 memory_id; 71 72 u64 kernel_start; 73 74 unsigned long num_events; 75 u8 use_ctx_pkt_for_pid; 76 }; 77 78 struct arm_spe_queue { 79 struct arm_spe *spe; 80 unsigned int queue_nr; 81 struct auxtrace_buffer *buffer; 82 struct auxtrace_buffer *old_buffer; 83 union perf_event *event_buf; 84 bool on_heap; 85 bool done; 86 pid_t pid; 87 pid_t tid; 88 int cpu; 89 struct arm_spe_decoder *decoder; 90 u64 time; 91 u64 timestamp; 92 struct thread *thread; 93 }; 94 95 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 96 unsigned char *buf, size_t len) 97 { 98 struct arm_spe_pkt packet; 99 size_t pos = 0; 100 int ret, pkt_len, i; 101 char desc[ARM_SPE_PKT_DESC_MAX]; 102 const char *color = PERF_COLOR_BLUE; 103 104 color_fprintf(stdout, color, 105 ". ... ARM SPE data: size %#zx bytes\n", 106 len); 107 108 while (len) { 109 ret = arm_spe_get_packet(buf, len, &packet); 110 if (ret > 0) 111 pkt_len = ret; 112 else 113 pkt_len = 1; 114 printf("."); 115 color_fprintf(stdout, color, " %08x: ", pos); 116 for (i = 0; i < pkt_len; i++) 117 color_fprintf(stdout, color, " %02x", buf[i]); 118 for (; i < 16; i++) 119 color_fprintf(stdout, color, " "); 120 if (ret > 0) { 121 ret = arm_spe_pkt_desc(&packet, desc, 122 ARM_SPE_PKT_DESC_MAX); 123 if (!ret) 124 color_fprintf(stdout, color, " %s\n", desc); 125 } else { 126 color_fprintf(stdout, color, " Bad packet!\n"); 127 } 128 pos += pkt_len; 129 buf += pkt_len; 130 len -= pkt_len; 131 } 132 } 133 134 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 135 size_t len) 136 { 137 printf(".\n"); 138 arm_spe_dump(spe, buf, len); 139 } 140 141 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 142 { 143 struct arm_spe_queue *speq = data; 144 struct auxtrace_buffer *buffer = speq->buffer; 145 struct auxtrace_buffer *old_buffer = speq->old_buffer; 146 struct auxtrace_queue *queue; 147 148 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 149 150 buffer = auxtrace_buffer__next(queue, buffer); 151 /* If no more data, drop the previous auxtrace_buffer and return */ 152 if (!buffer) { 153 if (old_buffer) 154 auxtrace_buffer__drop_data(old_buffer); 155 b->len = 0; 156 return 0; 157 } 158 159 speq->buffer = buffer; 160 161 /* If the aux_buffer doesn't have data associated, try to load it */ 162 if (!buffer->data) { 163 /* get the file desc associated with the perf data file */ 164 int fd = perf_data__fd(speq->spe->session->data); 165 166 buffer->data = auxtrace_buffer__get_data(buffer, fd); 167 if (!buffer->data) 168 return -ENOMEM; 169 } 170 171 b->len = buffer->size; 172 b->buf = buffer->data; 173 174 if (b->len) { 175 if (old_buffer) 176 auxtrace_buffer__drop_data(old_buffer); 177 speq->old_buffer = buffer; 178 } else { 179 auxtrace_buffer__drop_data(buffer); 180 return arm_spe_get_trace(b, data); 181 } 182 183 return 0; 184 } 185 186 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 187 unsigned int queue_nr) 188 { 189 struct arm_spe_params params = { .get_trace = 0, }; 190 struct arm_spe_queue *speq; 191 192 speq = zalloc(sizeof(*speq)); 193 if (!speq) 194 return NULL; 195 196 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 197 if (!speq->event_buf) 198 goto out_free; 199 200 speq->spe = spe; 201 speq->queue_nr = queue_nr; 202 speq->pid = -1; 203 speq->tid = -1; 204 speq->cpu = -1; 205 206 /* params set */ 207 params.get_trace = arm_spe_get_trace; 208 params.data = speq; 209 210 /* create new decoder */ 211 speq->decoder = arm_spe_decoder_new(¶ms); 212 if (!speq->decoder) 213 goto out_free; 214 215 return speq; 216 217 out_free: 218 zfree(&speq->event_buf); 219 free(speq); 220 221 return NULL; 222 } 223 224 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 225 { 226 return ip >= spe->kernel_start ? 227 PERF_RECORD_MISC_KERNEL : 228 PERF_RECORD_MISC_USER; 229 } 230 231 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 232 struct auxtrace_queue *queue) 233 { 234 struct arm_spe_queue *speq = queue->priv; 235 pid_t tid; 236 237 tid = machine__get_current_tid(spe->machine, speq->cpu); 238 if (tid != -1) { 239 speq->tid = tid; 240 thread__zput(speq->thread); 241 } else 242 speq->tid = queue->tid; 243 244 if ((!speq->thread) && (speq->tid != -1)) { 245 speq->thread = machine__find_thread(spe->machine, -1, 246 speq->tid); 247 } 248 249 if (speq->thread) { 250 speq->pid = speq->thread->pid_; 251 if (queue->cpu == -1) 252 speq->cpu = speq->thread->cpu; 253 } 254 } 255 256 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 257 { 258 struct arm_spe *spe = speq->spe; 259 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 260 261 if (err) 262 return err; 263 264 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 265 266 return 0; 267 } 268 269 static void arm_spe_prep_sample(struct arm_spe *spe, 270 struct arm_spe_queue *speq, 271 union perf_event *event, 272 struct perf_sample *sample) 273 { 274 struct arm_spe_record *record = &speq->decoder->record; 275 276 if (!spe->timeless_decoding) 277 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 278 279 sample->ip = record->from_ip; 280 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 281 sample->pid = speq->pid; 282 sample->tid = speq->tid; 283 sample->period = 1; 284 sample->cpu = speq->cpu; 285 286 event->sample.header.type = PERF_RECORD_SAMPLE; 287 event->sample.header.misc = sample->cpumode; 288 event->sample.header.size = sizeof(struct perf_event_header); 289 } 290 291 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 292 { 293 event->header.size = perf_event__sample_event_size(sample, type, 0); 294 return perf_event__synthesize_sample(event, type, 0, sample); 295 } 296 297 static inline int 298 arm_spe_deliver_synth_event(struct arm_spe *spe, 299 struct arm_spe_queue *speq __maybe_unused, 300 union perf_event *event, 301 struct perf_sample *sample) 302 { 303 int ret; 304 305 if (spe->synth_opts.inject) { 306 ret = arm_spe__inject_event(event, sample, spe->sample_type); 307 if (ret) 308 return ret; 309 } 310 311 ret = perf_session__deliver_synth_event(spe->session, event, sample); 312 if (ret) 313 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 314 315 return ret; 316 } 317 318 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 319 u64 spe_events_id, u64 data_src) 320 { 321 struct arm_spe *spe = speq->spe; 322 struct arm_spe_record *record = &speq->decoder->record; 323 union perf_event *event = speq->event_buf; 324 struct perf_sample sample = { .ip = 0, }; 325 326 arm_spe_prep_sample(spe, speq, event, &sample); 327 328 sample.id = spe_events_id; 329 sample.stream_id = spe_events_id; 330 sample.addr = record->virt_addr; 331 sample.phys_addr = record->phys_addr; 332 sample.data_src = data_src; 333 334 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 335 } 336 337 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 338 u64 spe_events_id) 339 { 340 struct arm_spe *spe = speq->spe; 341 struct arm_spe_record *record = &speq->decoder->record; 342 union perf_event *event = speq->event_buf; 343 struct perf_sample sample = { .ip = 0, }; 344 345 arm_spe_prep_sample(spe, speq, event, &sample); 346 347 sample.id = spe_events_id; 348 sample.stream_id = spe_events_id; 349 sample.addr = record->to_ip; 350 351 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 352 } 353 354 #define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ 355 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ 356 ARM_SPE_REMOTE_ACCESS) 357 358 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) 359 { 360 if (type & SPE_MEM_TYPE) 361 return true; 362 363 return false; 364 } 365 366 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) 367 { 368 union perf_mem_data_src data_src = { 0 }; 369 370 if (record->op == ARM_SPE_LD) 371 data_src.mem_op = PERF_MEM_OP_LOAD; 372 else 373 data_src.mem_op = PERF_MEM_OP_STORE; 374 375 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 376 data_src.mem_lvl = PERF_MEM_LVL_L3; 377 378 if (record->type & ARM_SPE_LLC_MISS) 379 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 380 else 381 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 382 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 383 data_src.mem_lvl = PERF_MEM_LVL_L1; 384 385 if (record->type & ARM_SPE_L1D_MISS) 386 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 387 else 388 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 389 } 390 391 if (record->type & ARM_SPE_REMOTE_ACCESS) 392 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; 393 394 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 395 data_src.mem_dtlb = PERF_MEM_TLB_WK; 396 397 if (record->type & ARM_SPE_TLB_MISS) 398 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 399 else 400 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 401 } 402 403 return data_src.val; 404 } 405 406 static int arm_spe_sample(struct arm_spe_queue *speq) 407 { 408 const struct arm_spe_record *record = &speq->decoder->record; 409 struct arm_spe *spe = speq->spe; 410 u64 data_src; 411 int err; 412 413 data_src = arm_spe__synth_data_source(record); 414 415 if (spe->sample_flc) { 416 if (record->type & ARM_SPE_L1D_MISS) { 417 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 418 data_src); 419 if (err) 420 return err; 421 } 422 423 if (record->type & ARM_SPE_L1D_ACCESS) { 424 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 425 data_src); 426 if (err) 427 return err; 428 } 429 } 430 431 if (spe->sample_llc) { 432 if (record->type & ARM_SPE_LLC_MISS) { 433 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 434 data_src); 435 if (err) 436 return err; 437 } 438 439 if (record->type & ARM_SPE_LLC_ACCESS) { 440 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 441 data_src); 442 if (err) 443 return err; 444 } 445 } 446 447 if (spe->sample_tlb) { 448 if (record->type & ARM_SPE_TLB_MISS) { 449 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 450 data_src); 451 if (err) 452 return err; 453 } 454 455 if (record->type & ARM_SPE_TLB_ACCESS) { 456 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 457 data_src); 458 if (err) 459 return err; 460 } 461 } 462 463 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 464 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); 465 if (err) 466 return err; 467 } 468 469 if (spe->sample_remote_access && 470 (record->type & ARM_SPE_REMOTE_ACCESS)) { 471 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 472 data_src); 473 if (err) 474 return err; 475 } 476 477 if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { 478 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 479 if (err) 480 return err; 481 } 482 483 return 0; 484 } 485 486 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 487 { 488 struct arm_spe *spe = speq->spe; 489 struct arm_spe_record *record; 490 int ret; 491 492 if (!spe->kernel_start) 493 spe->kernel_start = machine__kernel_start(spe->machine); 494 495 while (1) { 496 /* 497 * The usual logic is firstly to decode the packets, and then 498 * based the record to synthesize sample; but here the flow is 499 * reversed: it calls arm_spe_sample() for synthesizing samples 500 * prior to arm_spe_decode(). 501 * 502 * Two reasons for this code logic: 503 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 504 * has decoded trace data and generated a record, but the record 505 * is left to generate sample until run to here, so it's correct 506 * to synthesize sample for the left record. 507 * 2. After decoding trace data, it needs to compare the record 508 * timestamp with the coming perf event, if the record timestamp 509 * is later than the perf event, it needs bail out and pushs the 510 * record into auxtrace heap, thus the record can be deferred to 511 * synthesize sample until run to here at the next time; so this 512 * can correlate samples between Arm SPE trace data and other 513 * perf events with correct time ordering. 514 */ 515 516 /* 517 * Update pid/tid info. 518 */ 519 record = &speq->decoder->record; 520 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 521 ret = arm_spe_set_tid(speq, record->context_id); 522 if (ret) 523 return ret; 524 525 spe->use_ctx_pkt_for_pid = true; 526 } 527 528 ret = arm_spe_sample(speq); 529 if (ret) 530 return ret; 531 532 ret = arm_spe_decode(speq->decoder); 533 if (!ret) { 534 pr_debug("No data or all data has been processed.\n"); 535 return 1; 536 } 537 538 /* 539 * Error is detected when decode SPE trace data, continue to 540 * the next trace data and find out more records. 541 */ 542 if (ret < 0) 543 continue; 544 545 record = &speq->decoder->record; 546 547 /* Update timestamp for the last record */ 548 if (record->timestamp > speq->timestamp) 549 speq->timestamp = record->timestamp; 550 551 /* 552 * If the timestamp of the queue is later than timestamp of the 553 * coming perf event, bail out so can allow the perf event to 554 * be processed ahead. 555 */ 556 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 557 *timestamp = speq->timestamp; 558 return 0; 559 } 560 } 561 562 return 0; 563 } 564 565 static int arm_spe__setup_queue(struct arm_spe *spe, 566 struct auxtrace_queue *queue, 567 unsigned int queue_nr) 568 { 569 struct arm_spe_queue *speq = queue->priv; 570 struct arm_spe_record *record; 571 572 if (list_empty(&queue->head) || speq) 573 return 0; 574 575 speq = arm_spe__alloc_queue(spe, queue_nr); 576 577 if (!speq) 578 return -ENOMEM; 579 580 queue->priv = speq; 581 582 if (queue->cpu != -1) 583 speq->cpu = queue->cpu; 584 585 if (!speq->on_heap) { 586 int ret; 587 588 if (spe->timeless_decoding) 589 return 0; 590 591 retry: 592 ret = arm_spe_decode(speq->decoder); 593 594 if (!ret) 595 return 0; 596 597 if (ret < 0) 598 goto retry; 599 600 record = &speq->decoder->record; 601 602 speq->timestamp = record->timestamp; 603 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 604 if (ret) 605 return ret; 606 speq->on_heap = true; 607 } 608 609 return 0; 610 } 611 612 static int arm_spe__setup_queues(struct arm_spe *spe) 613 { 614 unsigned int i; 615 int ret; 616 617 for (i = 0; i < spe->queues.nr_queues; i++) { 618 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 619 if (ret) 620 return ret; 621 } 622 623 return 0; 624 } 625 626 static int arm_spe__update_queues(struct arm_spe *spe) 627 { 628 if (spe->queues.new_data) { 629 spe->queues.new_data = false; 630 return arm_spe__setup_queues(spe); 631 } 632 633 return 0; 634 } 635 636 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 637 { 638 struct evsel *evsel; 639 struct evlist *evlist = spe->session->evlist; 640 bool timeless_decoding = true; 641 642 /* 643 * Circle through the list of event and complain if we find one 644 * with the time bit set. 645 */ 646 evlist__for_each_entry(evlist, evsel) { 647 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 648 timeless_decoding = false; 649 } 650 651 return timeless_decoding; 652 } 653 654 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 655 { 656 unsigned int queue_nr; 657 u64 ts; 658 int ret; 659 660 while (1) { 661 struct auxtrace_queue *queue; 662 struct arm_spe_queue *speq; 663 664 if (!spe->heap.heap_cnt) 665 return 0; 666 667 if (spe->heap.heap_array[0].ordinal >= timestamp) 668 return 0; 669 670 queue_nr = spe->heap.heap_array[0].queue_nr; 671 queue = &spe->queues.queue_array[queue_nr]; 672 speq = queue->priv; 673 674 auxtrace_heap__pop(&spe->heap); 675 676 if (spe->heap.heap_cnt) { 677 ts = spe->heap.heap_array[0].ordinal + 1; 678 if (ts > timestamp) 679 ts = timestamp; 680 } else { 681 ts = timestamp; 682 } 683 684 /* 685 * A previous context-switch event has set pid/tid in the machine's context, so 686 * here we need to update the pid/tid in the thread and SPE queue. 687 */ 688 if (!spe->use_ctx_pkt_for_pid) 689 arm_spe_set_pid_tid_cpu(spe, queue); 690 691 ret = arm_spe_run_decoder(speq, &ts); 692 if (ret < 0) { 693 auxtrace_heap__add(&spe->heap, queue_nr, ts); 694 return ret; 695 } 696 697 if (!ret) { 698 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 699 if (ret < 0) 700 return ret; 701 } else { 702 speq->on_heap = false; 703 } 704 } 705 706 return 0; 707 } 708 709 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 710 u64 time_) 711 { 712 struct auxtrace_queues *queues = &spe->queues; 713 unsigned int i; 714 u64 ts = 0; 715 716 for (i = 0; i < queues->nr_queues; i++) { 717 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 718 struct arm_spe_queue *speq = queue->priv; 719 720 if (speq && (tid == -1 || speq->tid == tid)) { 721 speq->time = time_; 722 arm_spe_set_pid_tid_cpu(spe, queue); 723 arm_spe_run_decoder(speq, &ts); 724 } 725 } 726 return 0; 727 } 728 729 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 730 struct perf_sample *sample) 731 { 732 pid_t pid, tid; 733 int cpu; 734 735 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 736 return 0; 737 738 pid = event->context_switch.next_prev_pid; 739 tid = event->context_switch.next_prev_tid; 740 cpu = sample->cpu; 741 742 if (tid == -1) 743 pr_warning("context_switch event has no tid\n"); 744 745 return machine__set_current_tid(spe->machine, cpu, pid, tid); 746 } 747 748 static int arm_spe_process_event(struct perf_session *session, 749 union perf_event *event, 750 struct perf_sample *sample, 751 struct perf_tool *tool) 752 { 753 int err = 0; 754 u64 timestamp; 755 struct arm_spe *spe = container_of(session->auxtrace, 756 struct arm_spe, auxtrace); 757 758 if (dump_trace) 759 return 0; 760 761 if (!tool->ordered_events) { 762 pr_err("SPE trace requires ordered events\n"); 763 return -EINVAL; 764 } 765 766 if (sample->time && (sample->time != (u64) -1)) 767 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 768 else 769 timestamp = 0; 770 771 if (timestamp || spe->timeless_decoding) { 772 err = arm_spe__update_queues(spe); 773 if (err) 774 return err; 775 } 776 777 if (spe->timeless_decoding) { 778 if (event->header.type == PERF_RECORD_EXIT) { 779 err = arm_spe_process_timeless_queues(spe, 780 event->fork.tid, 781 sample->time); 782 } 783 } else if (timestamp) { 784 err = arm_spe_process_queues(spe, timestamp); 785 if (err) 786 return err; 787 788 if (!spe->use_ctx_pkt_for_pid && 789 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 790 event->header.type == PERF_RECORD_SWITCH)) 791 err = arm_spe_context_switch(spe, event, sample); 792 } 793 794 return err; 795 } 796 797 static int arm_spe_process_auxtrace_event(struct perf_session *session, 798 union perf_event *event, 799 struct perf_tool *tool __maybe_unused) 800 { 801 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 802 auxtrace); 803 804 if (!spe->data_queued) { 805 struct auxtrace_buffer *buffer; 806 off_t data_offset; 807 int fd = perf_data__fd(session->data); 808 int err; 809 810 if (perf_data__is_pipe(session->data)) { 811 data_offset = 0; 812 } else { 813 data_offset = lseek(fd, 0, SEEK_CUR); 814 if (data_offset == -1) 815 return -errno; 816 } 817 818 err = auxtrace_queues__add_event(&spe->queues, session, event, 819 data_offset, &buffer); 820 if (err) 821 return err; 822 823 /* Dump here now we have copied a piped trace out of the pipe */ 824 if (dump_trace) { 825 if (auxtrace_buffer__get_data(buffer, fd)) { 826 arm_spe_dump_event(spe, buffer->data, 827 buffer->size); 828 auxtrace_buffer__put_data(buffer); 829 } 830 } 831 } 832 833 return 0; 834 } 835 836 static int arm_spe_flush(struct perf_session *session __maybe_unused, 837 struct perf_tool *tool __maybe_unused) 838 { 839 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 840 auxtrace); 841 int ret; 842 843 if (dump_trace) 844 return 0; 845 846 if (!tool->ordered_events) 847 return -EINVAL; 848 849 ret = arm_spe__update_queues(spe); 850 if (ret < 0) 851 return ret; 852 853 if (spe->timeless_decoding) 854 return arm_spe_process_timeless_queues(spe, -1, 855 MAX_TIMESTAMP - 1); 856 857 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 858 if (ret) 859 return ret; 860 861 if (!spe->use_ctx_pkt_for_pid) 862 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 863 "Matching of TIDs to SPE events could be inaccurate.\n"); 864 865 return 0; 866 } 867 868 static void arm_spe_free_queue(void *priv) 869 { 870 struct arm_spe_queue *speq = priv; 871 872 if (!speq) 873 return; 874 thread__zput(speq->thread); 875 arm_spe_decoder_free(speq->decoder); 876 zfree(&speq->event_buf); 877 free(speq); 878 } 879 880 static void arm_spe_free_events(struct perf_session *session) 881 { 882 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 883 auxtrace); 884 struct auxtrace_queues *queues = &spe->queues; 885 unsigned int i; 886 887 for (i = 0; i < queues->nr_queues; i++) { 888 arm_spe_free_queue(queues->queue_array[i].priv); 889 queues->queue_array[i].priv = NULL; 890 } 891 auxtrace_queues__free(queues); 892 } 893 894 static void arm_spe_free(struct perf_session *session) 895 { 896 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 897 auxtrace); 898 899 auxtrace_heap__free(&spe->heap); 900 arm_spe_free_events(session); 901 session->auxtrace = NULL; 902 free(spe); 903 } 904 905 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 906 struct evsel *evsel) 907 { 908 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 909 910 return evsel->core.attr.type == spe->pmu_type; 911 } 912 913 static const char * const arm_spe_info_fmts[] = { 914 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", 915 }; 916 917 static void arm_spe_print_info(__u64 *arr) 918 { 919 if (!dump_trace) 920 return; 921 922 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); 923 } 924 925 struct arm_spe_synth { 926 struct perf_tool dummy_tool; 927 struct perf_session *session; 928 }; 929 930 static int arm_spe_event_synth(struct perf_tool *tool, 931 union perf_event *event, 932 struct perf_sample *sample __maybe_unused, 933 struct machine *machine __maybe_unused) 934 { 935 struct arm_spe_synth *arm_spe_synth = 936 container_of(tool, struct arm_spe_synth, dummy_tool); 937 938 return perf_session__deliver_synth_event(arm_spe_synth->session, 939 event, NULL); 940 } 941 942 static int arm_spe_synth_event(struct perf_session *session, 943 struct perf_event_attr *attr, u64 id) 944 { 945 struct arm_spe_synth arm_spe_synth; 946 947 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); 948 arm_spe_synth.session = session; 949 950 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, 951 &id, arm_spe_event_synth); 952 } 953 954 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 955 const char *name) 956 { 957 struct evsel *evsel; 958 959 evlist__for_each_entry(evlist, evsel) { 960 if (evsel->core.id && evsel->core.id[0] == id) { 961 if (evsel->name) 962 zfree(&evsel->name); 963 evsel->name = strdup(name); 964 break; 965 } 966 } 967 } 968 969 static int 970 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 971 { 972 struct evlist *evlist = session->evlist; 973 struct evsel *evsel; 974 struct perf_event_attr attr; 975 bool found = false; 976 u64 id; 977 int err; 978 979 evlist__for_each_entry(evlist, evsel) { 980 if (evsel->core.attr.type == spe->pmu_type) { 981 found = true; 982 break; 983 } 984 } 985 986 if (!found) { 987 pr_debug("No selected events with SPE trace data\n"); 988 return 0; 989 } 990 991 memset(&attr, 0, sizeof(struct perf_event_attr)); 992 attr.size = sizeof(struct perf_event_attr); 993 attr.type = PERF_TYPE_HARDWARE; 994 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 995 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 996 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; 997 if (spe->timeless_decoding) 998 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 999 else 1000 attr.sample_type |= PERF_SAMPLE_TIME; 1001 1002 spe->sample_type = attr.sample_type; 1003 1004 attr.exclude_user = evsel->core.attr.exclude_user; 1005 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1006 attr.exclude_hv = evsel->core.attr.exclude_hv; 1007 attr.exclude_host = evsel->core.attr.exclude_host; 1008 attr.exclude_guest = evsel->core.attr.exclude_guest; 1009 attr.sample_id_all = evsel->core.attr.sample_id_all; 1010 attr.read_format = evsel->core.attr.read_format; 1011 1012 /* create new id val to be a fixed offset from evsel id */ 1013 id = evsel->core.id[0] + 1000000000; 1014 1015 if (!id) 1016 id = 1; 1017 1018 if (spe->synth_opts.flc) { 1019 spe->sample_flc = true; 1020 1021 /* Level 1 data cache miss */ 1022 err = arm_spe_synth_event(session, &attr, id); 1023 if (err) 1024 return err; 1025 spe->l1d_miss_id = id; 1026 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1027 id += 1; 1028 1029 /* Level 1 data cache access */ 1030 err = arm_spe_synth_event(session, &attr, id); 1031 if (err) 1032 return err; 1033 spe->l1d_access_id = id; 1034 arm_spe_set_event_name(evlist, id, "l1d-access"); 1035 id += 1; 1036 } 1037 1038 if (spe->synth_opts.llc) { 1039 spe->sample_llc = true; 1040 1041 /* Last level cache miss */ 1042 err = arm_spe_synth_event(session, &attr, id); 1043 if (err) 1044 return err; 1045 spe->llc_miss_id = id; 1046 arm_spe_set_event_name(evlist, id, "llc-miss"); 1047 id += 1; 1048 1049 /* Last level cache access */ 1050 err = arm_spe_synth_event(session, &attr, id); 1051 if (err) 1052 return err; 1053 spe->llc_access_id = id; 1054 arm_spe_set_event_name(evlist, id, "llc-access"); 1055 id += 1; 1056 } 1057 1058 if (spe->synth_opts.tlb) { 1059 spe->sample_tlb = true; 1060 1061 /* TLB miss */ 1062 err = arm_spe_synth_event(session, &attr, id); 1063 if (err) 1064 return err; 1065 spe->tlb_miss_id = id; 1066 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1067 id += 1; 1068 1069 /* TLB access */ 1070 err = arm_spe_synth_event(session, &attr, id); 1071 if (err) 1072 return err; 1073 spe->tlb_access_id = id; 1074 arm_spe_set_event_name(evlist, id, "tlb-access"); 1075 id += 1; 1076 } 1077 1078 if (spe->synth_opts.branches) { 1079 spe->sample_branch = true; 1080 1081 /* Branch miss */ 1082 err = arm_spe_synth_event(session, &attr, id); 1083 if (err) 1084 return err; 1085 spe->branch_miss_id = id; 1086 arm_spe_set_event_name(evlist, id, "branch-miss"); 1087 id += 1; 1088 } 1089 1090 if (spe->synth_opts.remote_access) { 1091 spe->sample_remote_access = true; 1092 1093 /* Remote access */ 1094 err = arm_spe_synth_event(session, &attr, id); 1095 if (err) 1096 return err; 1097 spe->remote_access_id = id; 1098 arm_spe_set_event_name(evlist, id, "remote-access"); 1099 id += 1; 1100 } 1101 1102 if (spe->synth_opts.mem) { 1103 spe->sample_memory = true; 1104 1105 err = arm_spe_synth_event(session, &attr, id); 1106 if (err) 1107 return err; 1108 spe->memory_id = id; 1109 arm_spe_set_event_name(evlist, id, "memory"); 1110 } 1111 1112 return 0; 1113 } 1114 1115 int arm_spe_process_auxtrace_info(union perf_event *event, 1116 struct perf_session *session) 1117 { 1118 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1119 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; 1120 struct perf_record_time_conv *tc = &session->time_conv; 1121 struct arm_spe *spe; 1122 int err; 1123 1124 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1125 min_sz) 1126 return -EINVAL; 1127 1128 spe = zalloc(sizeof(struct arm_spe)); 1129 if (!spe) 1130 return -ENOMEM; 1131 1132 err = auxtrace_queues__init(&spe->queues); 1133 if (err) 1134 goto err_free; 1135 1136 spe->session = session; 1137 spe->machine = &session->machines.host; /* No kvm support */ 1138 spe->auxtrace_type = auxtrace_info->type; 1139 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1140 1141 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1142 1143 /* 1144 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1145 * and the parameters for hardware clock are stored in the session 1146 * context. Passes these parameters to the struct perf_tsc_conversion 1147 * in "spe->tc", which is used for later conversion between clock 1148 * counter and timestamp. 1149 * 1150 * For backward compatibility, copies the fields starting from 1151 * "time_cycles" only if they are contained in the event. 1152 */ 1153 spe->tc.time_shift = tc->time_shift; 1154 spe->tc.time_mult = tc->time_mult; 1155 spe->tc.time_zero = tc->time_zero; 1156 1157 if (event_contains(*tc, time_cycles)) { 1158 spe->tc.time_cycles = tc->time_cycles; 1159 spe->tc.time_mask = tc->time_mask; 1160 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1161 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1162 } 1163 1164 spe->auxtrace.process_event = arm_spe_process_event; 1165 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1166 spe->auxtrace.flush_events = arm_spe_flush; 1167 spe->auxtrace.free_events = arm_spe_free_events; 1168 spe->auxtrace.free = arm_spe_free; 1169 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1170 session->auxtrace = &spe->auxtrace; 1171 1172 arm_spe_print_info(&auxtrace_info->priv[0]); 1173 1174 if (dump_trace) 1175 return 0; 1176 1177 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1178 spe->synth_opts = *session->itrace_synth_opts; 1179 else 1180 itrace_synth_opts__set_default(&spe->synth_opts, false); 1181 1182 err = arm_spe_synth_events(spe, session); 1183 if (err) 1184 goto err_free_queues; 1185 1186 err = auxtrace_queues__process_index(&spe->queues, session); 1187 if (err) 1188 goto err_free_queues; 1189 1190 if (spe->queues.populated) 1191 spe->data_queued = true; 1192 1193 return 0; 1194 1195 err_free_queues: 1196 auxtrace_queues__free(&spe->queues); 1197 session->auxtrace = NULL; 1198 err_free: 1199 free(spe); 1200 return err; 1201 } 1202