1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Arm Statistical Profiling Extensions (SPE) support 4 * Copyright (c) 2017-2018, Arm Ltd. 5 */ 6 7 #include <byteswap.h> 8 #include <endian.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <linux/bitops.h> 12 #include <linux/kernel.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "machine.h" 25 #include "session.h" 26 #include "symbol.h" 27 #include "thread.h" 28 #include "thread-stack.h" 29 #include "tsc.h" 30 #include "tool.h" 31 #include "util/synthetic-events.h" 32 33 #include "arm-spe.h" 34 #include "arm-spe-decoder/arm-spe-decoder.h" 35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h" 36 37 #define MAX_TIMESTAMP (~0ULL) 38 39 struct arm_spe { 40 struct auxtrace auxtrace; 41 struct auxtrace_queues queues; 42 struct auxtrace_heap heap; 43 struct itrace_synth_opts synth_opts; 44 u32 auxtrace_type; 45 struct perf_session *session; 46 struct machine *machine; 47 u32 pmu_type; 48 49 struct perf_tsc_conversion tc; 50 51 u8 timeless_decoding; 52 u8 data_queued; 53 54 u64 sample_type; 55 u8 sample_flc; 56 u8 sample_llc; 57 u8 sample_tlb; 58 u8 sample_branch; 59 u8 sample_remote_access; 60 u8 sample_memory; 61 u8 sample_instructions; 62 u64 instructions_sample_period; 63 64 u64 l1d_miss_id; 65 u64 l1d_access_id; 66 u64 llc_miss_id; 67 u64 llc_access_id; 68 u64 tlb_miss_id; 69 u64 tlb_access_id; 70 u64 branch_miss_id; 71 u64 remote_access_id; 72 u64 memory_id; 73 u64 instructions_id; 74 75 u64 kernel_start; 76 77 unsigned long num_events; 78 u8 use_ctx_pkt_for_pid; 79 }; 80 81 struct arm_spe_queue { 82 struct arm_spe *spe; 83 unsigned int queue_nr; 84 struct auxtrace_buffer *buffer; 85 struct auxtrace_buffer *old_buffer; 86 union perf_event *event_buf; 87 bool on_heap; 88 bool done; 89 pid_t pid; 90 pid_t tid; 91 int cpu; 92 struct arm_spe_decoder *decoder; 93 u64 time; 94 u64 timestamp; 95 struct thread *thread; 96 u64 period_instructions; 97 }; 98 99 static void arm_spe_dump(struct arm_spe *spe __maybe_unused, 100 unsigned char *buf, size_t len) 101 { 102 struct arm_spe_pkt packet; 103 size_t pos = 0; 104 int ret, pkt_len, i; 105 char desc[ARM_SPE_PKT_DESC_MAX]; 106 const char *color = PERF_COLOR_BLUE; 107 108 color_fprintf(stdout, color, 109 ". ... ARM SPE data: size %#zx bytes\n", 110 len); 111 112 while (len) { 113 ret = arm_spe_get_packet(buf, len, &packet); 114 if (ret > 0) 115 pkt_len = ret; 116 else 117 pkt_len = 1; 118 printf("."); 119 color_fprintf(stdout, color, " %08x: ", pos); 120 for (i = 0; i < pkt_len; i++) 121 color_fprintf(stdout, color, " %02x", buf[i]); 122 for (; i < 16; i++) 123 color_fprintf(stdout, color, " "); 124 if (ret > 0) { 125 ret = arm_spe_pkt_desc(&packet, desc, 126 ARM_SPE_PKT_DESC_MAX); 127 if (!ret) 128 color_fprintf(stdout, color, " %s\n", desc); 129 } else { 130 color_fprintf(stdout, color, " Bad packet!\n"); 131 } 132 pos += pkt_len; 133 buf += pkt_len; 134 len -= pkt_len; 135 } 136 } 137 138 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, 139 size_t len) 140 { 141 printf(".\n"); 142 arm_spe_dump(spe, buf, len); 143 } 144 145 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data) 146 { 147 struct arm_spe_queue *speq = data; 148 struct auxtrace_buffer *buffer = speq->buffer; 149 struct auxtrace_buffer *old_buffer = speq->old_buffer; 150 struct auxtrace_queue *queue; 151 152 queue = &speq->spe->queues.queue_array[speq->queue_nr]; 153 154 buffer = auxtrace_buffer__next(queue, buffer); 155 /* If no more data, drop the previous auxtrace_buffer and return */ 156 if (!buffer) { 157 if (old_buffer) 158 auxtrace_buffer__drop_data(old_buffer); 159 b->len = 0; 160 return 0; 161 } 162 163 speq->buffer = buffer; 164 165 /* If the aux_buffer doesn't have data associated, try to load it */ 166 if (!buffer->data) { 167 /* get the file desc associated with the perf data file */ 168 int fd = perf_data__fd(speq->spe->session->data); 169 170 buffer->data = auxtrace_buffer__get_data(buffer, fd); 171 if (!buffer->data) 172 return -ENOMEM; 173 } 174 175 b->len = buffer->size; 176 b->buf = buffer->data; 177 178 if (b->len) { 179 if (old_buffer) 180 auxtrace_buffer__drop_data(old_buffer); 181 speq->old_buffer = buffer; 182 } else { 183 auxtrace_buffer__drop_data(buffer); 184 return arm_spe_get_trace(b, data); 185 } 186 187 return 0; 188 } 189 190 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, 191 unsigned int queue_nr) 192 { 193 struct arm_spe_params params = { .get_trace = 0, }; 194 struct arm_spe_queue *speq; 195 196 speq = zalloc(sizeof(*speq)); 197 if (!speq) 198 return NULL; 199 200 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 201 if (!speq->event_buf) 202 goto out_free; 203 204 speq->spe = spe; 205 speq->queue_nr = queue_nr; 206 speq->pid = -1; 207 speq->tid = -1; 208 speq->cpu = -1; 209 speq->period_instructions = 0; 210 211 /* params set */ 212 params.get_trace = arm_spe_get_trace; 213 params.data = speq; 214 215 /* create new decoder */ 216 speq->decoder = arm_spe_decoder_new(¶ms); 217 if (!speq->decoder) 218 goto out_free; 219 220 return speq; 221 222 out_free: 223 zfree(&speq->event_buf); 224 free(speq); 225 226 return NULL; 227 } 228 229 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip) 230 { 231 return ip >= spe->kernel_start ? 232 PERF_RECORD_MISC_KERNEL : 233 PERF_RECORD_MISC_USER; 234 } 235 236 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe, 237 struct auxtrace_queue *queue) 238 { 239 struct arm_spe_queue *speq = queue->priv; 240 pid_t tid; 241 242 tid = machine__get_current_tid(spe->machine, speq->cpu); 243 if (tid != -1) { 244 speq->tid = tid; 245 thread__zput(speq->thread); 246 } else 247 speq->tid = queue->tid; 248 249 if ((!speq->thread) && (speq->tid != -1)) { 250 speq->thread = machine__find_thread(spe->machine, -1, 251 speq->tid); 252 } 253 254 if (speq->thread) { 255 speq->pid = speq->thread->pid_; 256 if (queue->cpu == -1) 257 speq->cpu = speq->thread->cpu; 258 } 259 } 260 261 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) 262 { 263 struct arm_spe *spe = speq->spe; 264 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid); 265 266 if (err) 267 return err; 268 269 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]); 270 271 return 0; 272 } 273 274 static void arm_spe_prep_sample(struct arm_spe *spe, 275 struct arm_spe_queue *speq, 276 union perf_event *event, 277 struct perf_sample *sample) 278 { 279 struct arm_spe_record *record = &speq->decoder->record; 280 281 if (!spe->timeless_decoding) 282 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc); 283 284 sample->ip = record->from_ip; 285 sample->cpumode = arm_spe_cpumode(spe, sample->ip); 286 sample->pid = speq->pid; 287 sample->tid = speq->tid; 288 sample->period = 1; 289 sample->cpu = speq->cpu; 290 291 event->sample.header.type = PERF_RECORD_SAMPLE; 292 event->sample.header.misc = sample->cpumode; 293 event->sample.header.size = sizeof(struct perf_event_header); 294 } 295 296 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) 297 { 298 event->header.size = perf_event__sample_event_size(sample, type, 0); 299 return perf_event__synthesize_sample(event, type, 0, sample); 300 } 301 302 static inline int 303 arm_spe_deliver_synth_event(struct arm_spe *spe, 304 struct arm_spe_queue *speq __maybe_unused, 305 union perf_event *event, 306 struct perf_sample *sample) 307 { 308 int ret; 309 310 if (spe->synth_opts.inject) { 311 ret = arm_spe__inject_event(event, sample, spe->sample_type); 312 if (ret) 313 return ret; 314 } 315 316 ret = perf_session__deliver_synth_event(spe->session, event, sample); 317 if (ret) 318 pr_err("ARM SPE: failed to deliver event, error %d\n", ret); 319 320 return ret; 321 } 322 323 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, 324 u64 spe_events_id, u64 data_src) 325 { 326 struct arm_spe *spe = speq->spe; 327 struct arm_spe_record *record = &speq->decoder->record; 328 union perf_event *event = speq->event_buf; 329 struct perf_sample sample = { .ip = 0, }; 330 331 arm_spe_prep_sample(spe, speq, event, &sample); 332 333 sample.id = spe_events_id; 334 sample.stream_id = spe_events_id; 335 sample.addr = record->virt_addr; 336 sample.phys_addr = record->phys_addr; 337 sample.data_src = data_src; 338 sample.weight = record->latency; 339 340 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 341 } 342 343 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, 344 u64 spe_events_id) 345 { 346 struct arm_spe *spe = speq->spe; 347 struct arm_spe_record *record = &speq->decoder->record; 348 union perf_event *event = speq->event_buf; 349 struct perf_sample sample = { .ip = 0, }; 350 351 arm_spe_prep_sample(spe, speq, event, &sample); 352 353 sample.id = spe_events_id; 354 sample.stream_id = spe_events_id; 355 sample.addr = record->to_ip; 356 sample.weight = record->latency; 357 358 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 359 } 360 361 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, 362 u64 spe_events_id, u64 data_src) 363 { 364 struct arm_spe *spe = speq->spe; 365 struct arm_spe_record *record = &speq->decoder->record; 366 union perf_event *event = speq->event_buf; 367 struct perf_sample sample = { .ip = 0, }; 368 369 /* 370 * Handles perf instruction sampling period. 371 */ 372 speq->period_instructions++; 373 if (speq->period_instructions < spe->instructions_sample_period) 374 return 0; 375 speq->period_instructions = 0; 376 377 arm_spe_prep_sample(spe, speq, event, &sample); 378 379 sample.id = spe_events_id; 380 sample.stream_id = spe_events_id; 381 sample.addr = record->virt_addr; 382 sample.phys_addr = record->phys_addr; 383 sample.data_src = data_src; 384 sample.period = spe->instructions_sample_period; 385 sample.weight = record->latency; 386 387 return arm_spe_deliver_synth_event(spe, speq, event, &sample); 388 } 389 390 #define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ 391 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ 392 ARM_SPE_REMOTE_ACCESS) 393 394 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) 395 { 396 if (type & SPE_MEM_TYPE) 397 return true; 398 399 return false; 400 } 401 402 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) 403 { 404 union perf_mem_data_src data_src = { 0 }; 405 406 if (record->op == ARM_SPE_LD) 407 data_src.mem_op = PERF_MEM_OP_LOAD; 408 else 409 data_src.mem_op = PERF_MEM_OP_STORE; 410 411 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { 412 data_src.mem_lvl = PERF_MEM_LVL_L3; 413 414 if (record->type & ARM_SPE_LLC_MISS) 415 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 416 else 417 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 418 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { 419 data_src.mem_lvl = PERF_MEM_LVL_L1; 420 421 if (record->type & ARM_SPE_L1D_MISS) 422 data_src.mem_lvl |= PERF_MEM_LVL_MISS; 423 else 424 data_src.mem_lvl |= PERF_MEM_LVL_HIT; 425 } 426 427 if (record->type & ARM_SPE_REMOTE_ACCESS) 428 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; 429 430 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { 431 data_src.mem_dtlb = PERF_MEM_TLB_WK; 432 433 if (record->type & ARM_SPE_TLB_MISS) 434 data_src.mem_dtlb |= PERF_MEM_TLB_MISS; 435 else 436 data_src.mem_dtlb |= PERF_MEM_TLB_HIT; 437 } 438 439 return data_src.val; 440 } 441 442 static int arm_spe_sample(struct arm_spe_queue *speq) 443 { 444 const struct arm_spe_record *record = &speq->decoder->record; 445 struct arm_spe *spe = speq->spe; 446 u64 data_src; 447 int err; 448 449 data_src = arm_spe__synth_data_source(record); 450 451 if (spe->sample_flc) { 452 if (record->type & ARM_SPE_L1D_MISS) { 453 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id, 454 data_src); 455 if (err) 456 return err; 457 } 458 459 if (record->type & ARM_SPE_L1D_ACCESS) { 460 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id, 461 data_src); 462 if (err) 463 return err; 464 } 465 } 466 467 if (spe->sample_llc) { 468 if (record->type & ARM_SPE_LLC_MISS) { 469 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id, 470 data_src); 471 if (err) 472 return err; 473 } 474 475 if (record->type & ARM_SPE_LLC_ACCESS) { 476 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id, 477 data_src); 478 if (err) 479 return err; 480 } 481 } 482 483 if (spe->sample_tlb) { 484 if (record->type & ARM_SPE_TLB_MISS) { 485 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id, 486 data_src); 487 if (err) 488 return err; 489 } 490 491 if (record->type & ARM_SPE_TLB_ACCESS) { 492 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id, 493 data_src); 494 if (err) 495 return err; 496 } 497 } 498 499 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { 500 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); 501 if (err) 502 return err; 503 } 504 505 if (spe->sample_remote_access && 506 (record->type & ARM_SPE_REMOTE_ACCESS)) { 507 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id, 508 data_src); 509 if (err) 510 return err; 511 } 512 513 if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { 514 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); 515 if (err) 516 return err; 517 } 518 519 if (spe->sample_instructions) { 520 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); 521 if (err) 522 return err; 523 } 524 525 return 0; 526 } 527 528 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) 529 { 530 struct arm_spe *spe = speq->spe; 531 struct arm_spe_record *record; 532 int ret; 533 534 if (!spe->kernel_start) 535 spe->kernel_start = machine__kernel_start(spe->machine); 536 537 while (1) { 538 /* 539 * The usual logic is firstly to decode the packets, and then 540 * based the record to synthesize sample; but here the flow is 541 * reversed: it calls arm_spe_sample() for synthesizing samples 542 * prior to arm_spe_decode(). 543 * 544 * Two reasons for this code logic: 545 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it 546 * has decoded trace data and generated a record, but the record 547 * is left to generate sample until run to here, so it's correct 548 * to synthesize sample for the left record. 549 * 2. After decoding trace data, it needs to compare the record 550 * timestamp with the coming perf event, if the record timestamp 551 * is later than the perf event, it needs bail out and pushs the 552 * record into auxtrace heap, thus the record can be deferred to 553 * synthesize sample until run to here at the next time; so this 554 * can correlate samples between Arm SPE trace data and other 555 * perf events with correct time ordering. 556 */ 557 558 /* 559 * Update pid/tid info. 560 */ 561 record = &speq->decoder->record; 562 if (!spe->timeless_decoding && record->context_id != (u64)-1) { 563 ret = arm_spe_set_tid(speq, record->context_id); 564 if (ret) 565 return ret; 566 567 spe->use_ctx_pkt_for_pid = true; 568 } 569 570 ret = arm_spe_sample(speq); 571 if (ret) 572 return ret; 573 574 ret = arm_spe_decode(speq->decoder); 575 if (!ret) { 576 pr_debug("No data or all data has been processed.\n"); 577 return 1; 578 } 579 580 /* 581 * Error is detected when decode SPE trace data, continue to 582 * the next trace data and find out more records. 583 */ 584 if (ret < 0) 585 continue; 586 587 record = &speq->decoder->record; 588 589 /* Update timestamp for the last record */ 590 if (record->timestamp > speq->timestamp) 591 speq->timestamp = record->timestamp; 592 593 /* 594 * If the timestamp of the queue is later than timestamp of the 595 * coming perf event, bail out so can allow the perf event to 596 * be processed ahead. 597 */ 598 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { 599 *timestamp = speq->timestamp; 600 return 0; 601 } 602 } 603 604 return 0; 605 } 606 607 static int arm_spe__setup_queue(struct arm_spe *spe, 608 struct auxtrace_queue *queue, 609 unsigned int queue_nr) 610 { 611 struct arm_spe_queue *speq = queue->priv; 612 struct arm_spe_record *record; 613 614 if (list_empty(&queue->head) || speq) 615 return 0; 616 617 speq = arm_spe__alloc_queue(spe, queue_nr); 618 619 if (!speq) 620 return -ENOMEM; 621 622 queue->priv = speq; 623 624 if (queue->cpu != -1) 625 speq->cpu = queue->cpu; 626 627 if (!speq->on_heap) { 628 int ret; 629 630 if (spe->timeless_decoding) 631 return 0; 632 633 retry: 634 ret = arm_spe_decode(speq->decoder); 635 636 if (!ret) 637 return 0; 638 639 if (ret < 0) 640 goto retry; 641 642 record = &speq->decoder->record; 643 644 speq->timestamp = record->timestamp; 645 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp); 646 if (ret) 647 return ret; 648 speq->on_heap = true; 649 } 650 651 return 0; 652 } 653 654 static int arm_spe__setup_queues(struct arm_spe *spe) 655 { 656 unsigned int i; 657 int ret; 658 659 for (i = 0; i < spe->queues.nr_queues; i++) { 660 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i); 661 if (ret) 662 return ret; 663 } 664 665 return 0; 666 } 667 668 static int arm_spe__update_queues(struct arm_spe *spe) 669 { 670 if (spe->queues.new_data) { 671 spe->queues.new_data = false; 672 return arm_spe__setup_queues(spe); 673 } 674 675 return 0; 676 } 677 678 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe) 679 { 680 struct evsel *evsel; 681 struct evlist *evlist = spe->session->evlist; 682 bool timeless_decoding = true; 683 684 /* 685 * Circle through the list of event and complain if we find one 686 * with the time bit set. 687 */ 688 evlist__for_each_entry(evlist, evsel) { 689 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 690 timeless_decoding = false; 691 } 692 693 return timeless_decoding; 694 } 695 696 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp) 697 { 698 unsigned int queue_nr; 699 u64 ts; 700 int ret; 701 702 while (1) { 703 struct auxtrace_queue *queue; 704 struct arm_spe_queue *speq; 705 706 if (!spe->heap.heap_cnt) 707 return 0; 708 709 if (spe->heap.heap_array[0].ordinal >= timestamp) 710 return 0; 711 712 queue_nr = spe->heap.heap_array[0].queue_nr; 713 queue = &spe->queues.queue_array[queue_nr]; 714 speq = queue->priv; 715 716 auxtrace_heap__pop(&spe->heap); 717 718 if (spe->heap.heap_cnt) { 719 ts = spe->heap.heap_array[0].ordinal + 1; 720 if (ts > timestamp) 721 ts = timestamp; 722 } else { 723 ts = timestamp; 724 } 725 726 /* 727 * A previous context-switch event has set pid/tid in the machine's context, so 728 * here we need to update the pid/tid in the thread and SPE queue. 729 */ 730 if (!spe->use_ctx_pkt_for_pid) 731 arm_spe_set_pid_tid_cpu(spe, queue); 732 733 ret = arm_spe_run_decoder(speq, &ts); 734 if (ret < 0) { 735 auxtrace_heap__add(&spe->heap, queue_nr, ts); 736 return ret; 737 } 738 739 if (!ret) { 740 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts); 741 if (ret < 0) 742 return ret; 743 } else { 744 speq->on_heap = false; 745 } 746 } 747 748 return 0; 749 } 750 751 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid, 752 u64 time_) 753 { 754 struct auxtrace_queues *queues = &spe->queues; 755 unsigned int i; 756 u64 ts = 0; 757 758 for (i = 0; i < queues->nr_queues; i++) { 759 struct auxtrace_queue *queue = &spe->queues.queue_array[i]; 760 struct arm_spe_queue *speq = queue->priv; 761 762 if (speq && (tid == -1 || speq->tid == tid)) { 763 speq->time = time_; 764 arm_spe_set_pid_tid_cpu(spe, queue); 765 arm_spe_run_decoder(speq, &ts); 766 } 767 } 768 return 0; 769 } 770 771 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, 772 struct perf_sample *sample) 773 { 774 pid_t pid, tid; 775 int cpu; 776 777 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT)) 778 return 0; 779 780 pid = event->context_switch.next_prev_pid; 781 tid = event->context_switch.next_prev_tid; 782 cpu = sample->cpu; 783 784 if (tid == -1) 785 pr_warning("context_switch event has no tid\n"); 786 787 return machine__set_current_tid(spe->machine, cpu, pid, tid); 788 } 789 790 static int arm_spe_process_event(struct perf_session *session, 791 union perf_event *event, 792 struct perf_sample *sample, 793 struct perf_tool *tool) 794 { 795 int err = 0; 796 u64 timestamp; 797 struct arm_spe *spe = container_of(session->auxtrace, 798 struct arm_spe, auxtrace); 799 800 if (dump_trace) 801 return 0; 802 803 if (!tool->ordered_events) { 804 pr_err("SPE trace requires ordered events\n"); 805 return -EINVAL; 806 } 807 808 if (sample->time && (sample->time != (u64) -1)) 809 timestamp = perf_time_to_tsc(sample->time, &spe->tc); 810 else 811 timestamp = 0; 812 813 if (timestamp || spe->timeless_decoding) { 814 err = arm_spe__update_queues(spe); 815 if (err) 816 return err; 817 } 818 819 if (spe->timeless_decoding) { 820 if (event->header.type == PERF_RECORD_EXIT) { 821 err = arm_spe_process_timeless_queues(spe, 822 event->fork.tid, 823 sample->time); 824 } 825 } else if (timestamp) { 826 err = arm_spe_process_queues(spe, timestamp); 827 if (err) 828 return err; 829 830 if (!spe->use_ctx_pkt_for_pid && 831 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE || 832 event->header.type == PERF_RECORD_SWITCH)) 833 err = arm_spe_context_switch(spe, event, sample); 834 } 835 836 return err; 837 } 838 839 static int arm_spe_process_auxtrace_event(struct perf_session *session, 840 union perf_event *event, 841 struct perf_tool *tool __maybe_unused) 842 { 843 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 844 auxtrace); 845 846 if (!spe->data_queued) { 847 struct auxtrace_buffer *buffer; 848 off_t data_offset; 849 int fd = perf_data__fd(session->data); 850 int err; 851 852 if (perf_data__is_pipe(session->data)) { 853 data_offset = 0; 854 } else { 855 data_offset = lseek(fd, 0, SEEK_CUR); 856 if (data_offset == -1) 857 return -errno; 858 } 859 860 err = auxtrace_queues__add_event(&spe->queues, session, event, 861 data_offset, &buffer); 862 if (err) 863 return err; 864 865 /* Dump here now we have copied a piped trace out of the pipe */ 866 if (dump_trace) { 867 if (auxtrace_buffer__get_data(buffer, fd)) { 868 arm_spe_dump_event(spe, buffer->data, 869 buffer->size); 870 auxtrace_buffer__put_data(buffer); 871 } 872 } 873 } 874 875 return 0; 876 } 877 878 static int arm_spe_flush(struct perf_session *session __maybe_unused, 879 struct perf_tool *tool __maybe_unused) 880 { 881 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 882 auxtrace); 883 int ret; 884 885 if (dump_trace) 886 return 0; 887 888 if (!tool->ordered_events) 889 return -EINVAL; 890 891 ret = arm_spe__update_queues(spe); 892 if (ret < 0) 893 return ret; 894 895 if (spe->timeless_decoding) 896 return arm_spe_process_timeless_queues(spe, -1, 897 MAX_TIMESTAMP - 1); 898 899 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP); 900 if (ret) 901 return ret; 902 903 if (!spe->use_ctx_pkt_for_pid) 904 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n" 905 "Matching of TIDs to SPE events could be inaccurate.\n"); 906 907 return 0; 908 } 909 910 static void arm_spe_free_queue(void *priv) 911 { 912 struct arm_spe_queue *speq = priv; 913 914 if (!speq) 915 return; 916 thread__zput(speq->thread); 917 arm_spe_decoder_free(speq->decoder); 918 zfree(&speq->event_buf); 919 free(speq); 920 } 921 922 static void arm_spe_free_events(struct perf_session *session) 923 { 924 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 925 auxtrace); 926 struct auxtrace_queues *queues = &spe->queues; 927 unsigned int i; 928 929 for (i = 0; i < queues->nr_queues; i++) { 930 arm_spe_free_queue(queues->queue_array[i].priv); 931 queues->queue_array[i].priv = NULL; 932 } 933 auxtrace_queues__free(queues); 934 } 935 936 static void arm_spe_free(struct perf_session *session) 937 { 938 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, 939 auxtrace); 940 941 auxtrace_heap__free(&spe->heap); 942 arm_spe_free_events(session); 943 session->auxtrace = NULL; 944 free(spe); 945 } 946 947 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, 948 struct evsel *evsel) 949 { 950 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); 951 952 return evsel->core.attr.type == spe->pmu_type; 953 } 954 955 static const char * const arm_spe_info_fmts[] = { 956 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", 957 }; 958 959 static void arm_spe_print_info(__u64 *arr) 960 { 961 if (!dump_trace) 962 return; 963 964 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); 965 } 966 967 struct arm_spe_synth { 968 struct perf_tool dummy_tool; 969 struct perf_session *session; 970 }; 971 972 static int arm_spe_event_synth(struct perf_tool *tool, 973 union perf_event *event, 974 struct perf_sample *sample __maybe_unused, 975 struct machine *machine __maybe_unused) 976 { 977 struct arm_spe_synth *arm_spe_synth = 978 container_of(tool, struct arm_spe_synth, dummy_tool); 979 980 return perf_session__deliver_synth_event(arm_spe_synth->session, 981 event, NULL); 982 } 983 984 static int arm_spe_synth_event(struct perf_session *session, 985 struct perf_event_attr *attr, u64 id) 986 { 987 struct arm_spe_synth arm_spe_synth; 988 989 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); 990 arm_spe_synth.session = session; 991 992 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, 993 &id, arm_spe_event_synth); 994 } 995 996 static void arm_spe_set_event_name(struct evlist *evlist, u64 id, 997 const char *name) 998 { 999 struct evsel *evsel; 1000 1001 evlist__for_each_entry(evlist, evsel) { 1002 if (evsel->core.id && evsel->core.id[0] == id) { 1003 if (evsel->name) 1004 zfree(&evsel->name); 1005 evsel->name = strdup(name); 1006 break; 1007 } 1008 } 1009 } 1010 1011 static int 1012 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) 1013 { 1014 struct evlist *evlist = session->evlist; 1015 struct evsel *evsel; 1016 struct perf_event_attr attr; 1017 bool found = false; 1018 u64 id; 1019 int err; 1020 1021 evlist__for_each_entry(evlist, evsel) { 1022 if (evsel->core.attr.type == spe->pmu_type) { 1023 found = true; 1024 break; 1025 } 1026 } 1027 1028 if (!found) { 1029 pr_debug("No selected events with SPE trace data\n"); 1030 return 0; 1031 } 1032 1033 memset(&attr, 0, sizeof(struct perf_event_attr)); 1034 attr.size = sizeof(struct perf_event_attr); 1035 attr.type = PERF_TYPE_HARDWARE; 1036 attr.sample_type = evsel->core.attr.sample_type & 1037 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR); 1038 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1039 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | 1040 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR; 1041 if (spe->timeless_decoding) 1042 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1043 else 1044 attr.sample_type |= PERF_SAMPLE_TIME; 1045 1046 spe->sample_type = attr.sample_type; 1047 1048 attr.exclude_user = evsel->core.attr.exclude_user; 1049 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1050 attr.exclude_hv = evsel->core.attr.exclude_hv; 1051 attr.exclude_host = evsel->core.attr.exclude_host; 1052 attr.exclude_guest = evsel->core.attr.exclude_guest; 1053 attr.sample_id_all = evsel->core.attr.sample_id_all; 1054 attr.read_format = evsel->core.attr.read_format; 1055 1056 /* create new id val to be a fixed offset from evsel id */ 1057 id = evsel->core.id[0] + 1000000000; 1058 1059 if (!id) 1060 id = 1; 1061 1062 if (spe->synth_opts.flc) { 1063 spe->sample_flc = true; 1064 1065 /* Level 1 data cache miss */ 1066 err = arm_spe_synth_event(session, &attr, id); 1067 if (err) 1068 return err; 1069 spe->l1d_miss_id = id; 1070 arm_spe_set_event_name(evlist, id, "l1d-miss"); 1071 id += 1; 1072 1073 /* Level 1 data cache access */ 1074 err = arm_spe_synth_event(session, &attr, id); 1075 if (err) 1076 return err; 1077 spe->l1d_access_id = id; 1078 arm_spe_set_event_name(evlist, id, "l1d-access"); 1079 id += 1; 1080 } 1081 1082 if (spe->synth_opts.llc) { 1083 spe->sample_llc = true; 1084 1085 /* Last level cache miss */ 1086 err = arm_spe_synth_event(session, &attr, id); 1087 if (err) 1088 return err; 1089 spe->llc_miss_id = id; 1090 arm_spe_set_event_name(evlist, id, "llc-miss"); 1091 id += 1; 1092 1093 /* Last level cache access */ 1094 err = arm_spe_synth_event(session, &attr, id); 1095 if (err) 1096 return err; 1097 spe->llc_access_id = id; 1098 arm_spe_set_event_name(evlist, id, "llc-access"); 1099 id += 1; 1100 } 1101 1102 if (spe->synth_opts.tlb) { 1103 spe->sample_tlb = true; 1104 1105 /* TLB miss */ 1106 err = arm_spe_synth_event(session, &attr, id); 1107 if (err) 1108 return err; 1109 spe->tlb_miss_id = id; 1110 arm_spe_set_event_name(evlist, id, "tlb-miss"); 1111 id += 1; 1112 1113 /* TLB access */ 1114 err = arm_spe_synth_event(session, &attr, id); 1115 if (err) 1116 return err; 1117 spe->tlb_access_id = id; 1118 arm_spe_set_event_name(evlist, id, "tlb-access"); 1119 id += 1; 1120 } 1121 1122 if (spe->synth_opts.branches) { 1123 spe->sample_branch = true; 1124 1125 /* Branch miss */ 1126 err = arm_spe_synth_event(session, &attr, id); 1127 if (err) 1128 return err; 1129 spe->branch_miss_id = id; 1130 arm_spe_set_event_name(evlist, id, "branch-miss"); 1131 id += 1; 1132 } 1133 1134 if (spe->synth_opts.remote_access) { 1135 spe->sample_remote_access = true; 1136 1137 /* Remote access */ 1138 err = arm_spe_synth_event(session, &attr, id); 1139 if (err) 1140 return err; 1141 spe->remote_access_id = id; 1142 arm_spe_set_event_name(evlist, id, "remote-access"); 1143 id += 1; 1144 } 1145 1146 if (spe->synth_opts.mem) { 1147 spe->sample_memory = true; 1148 1149 err = arm_spe_synth_event(session, &attr, id); 1150 if (err) 1151 return err; 1152 spe->memory_id = id; 1153 arm_spe_set_event_name(evlist, id, "memory"); 1154 id += 1; 1155 } 1156 1157 if (spe->synth_opts.instructions) { 1158 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { 1159 pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); 1160 goto synth_instructions_out; 1161 } 1162 if (spe->synth_opts.period > 1) 1163 pr_warning("Arm SPE has a hardware-based sample period.\n" 1164 "Additional instruction events will be discarded by --itrace\n"); 1165 1166 spe->sample_instructions = true; 1167 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1168 attr.sample_period = spe->synth_opts.period; 1169 spe->instructions_sample_period = attr.sample_period; 1170 err = arm_spe_synth_event(session, &attr, id); 1171 if (err) 1172 return err; 1173 spe->instructions_id = id; 1174 arm_spe_set_event_name(evlist, id, "instructions"); 1175 } 1176 synth_instructions_out: 1177 1178 return 0; 1179 } 1180 1181 int arm_spe_process_auxtrace_info(union perf_event *event, 1182 struct perf_session *session) 1183 { 1184 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 1185 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; 1186 struct perf_record_time_conv *tc = &session->time_conv; 1187 struct arm_spe *spe; 1188 int err; 1189 1190 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 1191 min_sz) 1192 return -EINVAL; 1193 1194 spe = zalloc(sizeof(struct arm_spe)); 1195 if (!spe) 1196 return -ENOMEM; 1197 1198 err = auxtrace_queues__init(&spe->queues); 1199 if (err) 1200 goto err_free; 1201 1202 spe->session = session; 1203 spe->machine = &session->machines.host; /* No kvm support */ 1204 spe->auxtrace_type = auxtrace_info->type; 1205 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; 1206 1207 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); 1208 1209 /* 1210 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead 1211 * and the parameters for hardware clock are stored in the session 1212 * context. Passes these parameters to the struct perf_tsc_conversion 1213 * in "spe->tc", which is used for later conversion between clock 1214 * counter and timestamp. 1215 * 1216 * For backward compatibility, copies the fields starting from 1217 * "time_cycles" only if they are contained in the event. 1218 */ 1219 spe->tc.time_shift = tc->time_shift; 1220 spe->tc.time_mult = tc->time_mult; 1221 spe->tc.time_zero = tc->time_zero; 1222 1223 if (event_contains(*tc, time_cycles)) { 1224 spe->tc.time_cycles = tc->time_cycles; 1225 spe->tc.time_mask = tc->time_mask; 1226 spe->tc.cap_user_time_zero = tc->cap_user_time_zero; 1227 spe->tc.cap_user_time_short = tc->cap_user_time_short; 1228 } 1229 1230 spe->auxtrace.process_event = arm_spe_process_event; 1231 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; 1232 spe->auxtrace.flush_events = arm_spe_flush; 1233 spe->auxtrace.free_events = arm_spe_free_events; 1234 spe->auxtrace.free = arm_spe_free; 1235 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; 1236 session->auxtrace = &spe->auxtrace; 1237 1238 arm_spe_print_info(&auxtrace_info->priv[0]); 1239 1240 if (dump_trace) 1241 return 0; 1242 1243 if (session->itrace_synth_opts && session->itrace_synth_opts->set) 1244 spe->synth_opts = *session->itrace_synth_opts; 1245 else 1246 itrace_synth_opts__set_default(&spe->synth_opts, false); 1247 1248 err = arm_spe_synth_events(spe, session); 1249 if (err) 1250 goto err_free_queues; 1251 1252 err = auxtrace_queues__process_index(&spe->queues, session); 1253 if (err) 1254 goto err_free_queues; 1255 1256 if (spe->queues.populated) 1257 spe->data_queued = true; 1258 1259 return 0; 1260 1261 err_free_queues: 1262 auxtrace_queues__free(&spe->queues); 1263 session->auxtrace = NULL; 1264 err_free: 1265 free(spe); 1266 return err; 1267 } 1268