1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/err.h> 11 #include <linux/kernel.h> 12 #include <linux/log2.h> 13 #include <linux/types.h> 14 #include <linux/zalloc.h> 15 16 #include <opencsd/ocsd_if_types.h> 17 #include <stdlib.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "cs-etm.h" 22 #include "cs-etm-decoder/cs-etm-decoder.h" 23 #include "debug.h" 24 #include "dso.h" 25 #include "evlist.h" 26 #include "intlist.h" 27 #include "machine.h" 28 #include "map.h" 29 #include "perf.h" 30 #include "session.h" 31 #include "map_symbol.h" 32 #include "branch.h" 33 #include "symbol.h" 34 #include "tool.h" 35 #include "thread.h" 36 #include "thread-stack.h" 37 #include <tools/libc_compat.h> 38 #include "util/synthetic-events.h" 39 40 #define MAX_TIMESTAMP (~0ULL) 41 42 struct cs_etm_auxtrace { 43 struct auxtrace auxtrace; 44 struct auxtrace_queues queues; 45 struct auxtrace_heap heap; 46 struct itrace_synth_opts synth_opts; 47 struct perf_session *session; 48 struct machine *machine; 49 struct thread *unknown_thread; 50 51 u8 timeless_decoding; 52 u8 snapshot_mode; 53 u8 data_queued; 54 u8 sample_branches; 55 u8 sample_instructions; 56 57 int num_cpu; 58 u32 auxtrace_type; 59 u64 branches_sample_type; 60 u64 branches_id; 61 u64 instructions_sample_type; 62 u64 instructions_sample_period; 63 u64 instructions_id; 64 u64 **metadata; 65 u64 kernel_start; 66 unsigned int pmu_type; 67 }; 68 69 struct cs_etm_traceid_queue { 70 u8 trace_chan_id; 71 pid_t pid, tid; 72 u64 period_instructions; 73 size_t last_branch_pos; 74 union perf_event *event_buf; 75 struct thread *thread; 76 struct branch_stack *last_branch; 77 struct branch_stack *last_branch_rb; 78 struct cs_etm_packet *prev_packet; 79 struct cs_etm_packet *packet; 80 struct cs_etm_packet_queue packet_queue; 81 }; 82 83 struct cs_etm_queue { 84 struct cs_etm_auxtrace *etm; 85 struct cs_etm_decoder *decoder; 86 struct auxtrace_buffer *buffer; 87 unsigned int queue_nr; 88 u8 pending_timestamp; 89 u64 offset; 90 const unsigned char *buf; 91 size_t buf_len, buf_used; 92 /* Conversion between traceID and index in traceid_queues array */ 93 struct intlist *traceid_queues_list; 94 struct cs_etm_traceid_queue **traceid_queues; 95 }; 96 97 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 98 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); 99 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 100 pid_t tid); 101 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 102 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 103 104 /* PTMs ETMIDR [11:8] set to b0011 */ 105 #define ETMIDR_PTM_VERSION 0x00000300 106 107 /* 108 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 109 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 110 * encode the etm queue number as the upper 16 bit and the channel as 111 * the lower 16 bit. 112 */ 113 #define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \ 114 (queue_nr << 16 | trace_chan_id) 115 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 116 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 117 118 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 119 { 120 etmidr &= ETMIDR_PTM_VERSION; 121 122 if (etmidr == ETMIDR_PTM_VERSION) 123 return CS_ETM_PROTO_PTM; 124 125 return CS_ETM_PROTO_ETMV3; 126 } 127 128 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 129 { 130 struct int_node *inode; 131 u64 *metadata; 132 133 inode = intlist__find(traceid_list, trace_chan_id); 134 if (!inode) 135 return -EINVAL; 136 137 metadata = inode->priv; 138 *magic = metadata[CS_ETM_MAGIC]; 139 return 0; 140 } 141 142 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 143 { 144 struct int_node *inode; 145 u64 *metadata; 146 147 inode = intlist__find(traceid_list, trace_chan_id); 148 if (!inode) 149 return -EINVAL; 150 151 metadata = inode->priv; 152 *cpu = (int)metadata[CS_ETM_CPU]; 153 return 0; 154 } 155 156 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 157 u8 trace_chan_id) 158 { 159 /* 160 * Wnen a timestamp packet is encountered the backend code 161 * is stopped so that the front end has time to process packets 162 * that were accumulated in the traceID queue. Since there can 163 * be more than one channel per cs_etm_queue, we need to specify 164 * what traceID queue needs servicing. 165 */ 166 etmq->pending_timestamp = trace_chan_id; 167 } 168 169 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 170 u8 *trace_chan_id) 171 { 172 struct cs_etm_packet_queue *packet_queue; 173 174 if (!etmq->pending_timestamp) 175 return 0; 176 177 if (trace_chan_id) 178 *trace_chan_id = etmq->pending_timestamp; 179 180 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 181 etmq->pending_timestamp); 182 if (!packet_queue) 183 return 0; 184 185 /* Acknowledge pending status */ 186 etmq->pending_timestamp = 0; 187 188 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 189 return packet_queue->timestamp; 190 } 191 192 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 193 { 194 int i; 195 196 queue->head = 0; 197 queue->tail = 0; 198 queue->packet_count = 0; 199 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 200 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 201 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 202 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 203 queue->packet_buffer[i].instr_count = 0; 204 queue->packet_buffer[i].last_instr_taken_branch = false; 205 queue->packet_buffer[i].last_instr_size = 0; 206 queue->packet_buffer[i].last_instr_type = 0; 207 queue->packet_buffer[i].last_instr_subtype = 0; 208 queue->packet_buffer[i].last_instr_cond = 0; 209 queue->packet_buffer[i].flags = 0; 210 queue->packet_buffer[i].exception_number = UINT32_MAX; 211 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 212 queue->packet_buffer[i].cpu = INT_MIN; 213 } 214 } 215 216 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 217 { 218 int idx; 219 struct int_node *inode; 220 struct cs_etm_traceid_queue *tidq; 221 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 222 223 intlist__for_each_entry(inode, traceid_queues_list) { 224 idx = (int)(intptr_t)inode->priv; 225 tidq = etmq->traceid_queues[idx]; 226 cs_etm__clear_packet_queue(&tidq->packet_queue); 227 } 228 } 229 230 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 231 struct cs_etm_traceid_queue *tidq, 232 u8 trace_chan_id) 233 { 234 int rc = -ENOMEM; 235 struct auxtrace_queue *queue; 236 struct cs_etm_auxtrace *etm = etmq->etm; 237 238 cs_etm__clear_packet_queue(&tidq->packet_queue); 239 240 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 241 tidq->tid = queue->tid; 242 tidq->pid = -1; 243 tidq->trace_chan_id = trace_chan_id; 244 245 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 246 if (!tidq->packet) 247 goto out; 248 249 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 250 if (!tidq->prev_packet) 251 goto out_free; 252 253 if (etm->synth_opts.last_branch) { 254 size_t sz = sizeof(struct branch_stack); 255 256 sz += etm->synth_opts.last_branch_sz * 257 sizeof(struct branch_entry); 258 tidq->last_branch = zalloc(sz); 259 if (!tidq->last_branch) 260 goto out_free; 261 tidq->last_branch_rb = zalloc(sz); 262 if (!tidq->last_branch_rb) 263 goto out_free; 264 } 265 266 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 267 if (!tidq->event_buf) 268 goto out_free; 269 270 return 0; 271 272 out_free: 273 zfree(&tidq->last_branch_rb); 274 zfree(&tidq->last_branch); 275 zfree(&tidq->prev_packet); 276 zfree(&tidq->packet); 277 out: 278 return rc; 279 } 280 281 static struct cs_etm_traceid_queue 282 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 283 { 284 int idx; 285 struct int_node *inode; 286 struct intlist *traceid_queues_list; 287 struct cs_etm_traceid_queue *tidq, **traceid_queues; 288 struct cs_etm_auxtrace *etm = etmq->etm; 289 290 if (etm->timeless_decoding) 291 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 292 293 traceid_queues_list = etmq->traceid_queues_list; 294 295 /* 296 * Check if the traceid_queue exist for this traceID by looking 297 * in the queue list. 298 */ 299 inode = intlist__find(traceid_queues_list, trace_chan_id); 300 if (inode) { 301 idx = (int)(intptr_t)inode->priv; 302 return etmq->traceid_queues[idx]; 303 } 304 305 /* We couldn't find a traceid_queue for this traceID, allocate one */ 306 tidq = malloc(sizeof(*tidq)); 307 if (!tidq) 308 return NULL; 309 310 memset(tidq, 0, sizeof(*tidq)); 311 312 /* Get a valid index for the new traceid_queue */ 313 idx = intlist__nr_entries(traceid_queues_list); 314 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 315 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 316 if (!inode) 317 goto out_free; 318 319 /* Associate this traceID with this index */ 320 inode->priv = (void *)(intptr_t)idx; 321 322 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 323 goto out_free; 324 325 /* Grow the traceid_queues array by one unit */ 326 traceid_queues = etmq->traceid_queues; 327 traceid_queues = reallocarray(traceid_queues, 328 idx + 1, 329 sizeof(*traceid_queues)); 330 331 /* 332 * On failure reallocarray() returns NULL and the original block of 333 * memory is left untouched. 334 */ 335 if (!traceid_queues) 336 goto out_free; 337 338 traceid_queues[idx] = tidq; 339 etmq->traceid_queues = traceid_queues; 340 341 return etmq->traceid_queues[idx]; 342 343 out_free: 344 /* 345 * Function intlist__remove() removes the inode from the list 346 * and delete the memory associated to it. 347 */ 348 intlist__remove(traceid_queues_list, inode); 349 free(tidq); 350 351 return NULL; 352 } 353 354 struct cs_etm_packet_queue 355 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 356 { 357 struct cs_etm_traceid_queue *tidq; 358 359 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 360 if (tidq) 361 return &tidq->packet_queue; 362 363 return NULL; 364 } 365 366 static void cs_etm__packet_dump(const char *pkt_string) 367 { 368 const char *color = PERF_COLOR_BLUE; 369 int len = strlen(pkt_string); 370 371 if (len && (pkt_string[len-1] == '\n')) 372 color_fprintf(stdout, color, " %s", pkt_string); 373 else 374 color_fprintf(stdout, color, " %s\n", pkt_string); 375 376 fflush(stdout); 377 } 378 379 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 380 struct cs_etm_auxtrace *etm, int idx, 381 u32 etmidr) 382 { 383 u64 **metadata = etm->metadata; 384 385 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 386 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; 387 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; 388 } 389 390 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 391 struct cs_etm_auxtrace *etm, int idx) 392 { 393 u64 **metadata = etm->metadata; 394 395 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; 396 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; 397 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; 398 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; 399 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; 400 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; 401 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; 402 } 403 404 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 405 struct cs_etm_auxtrace *etm) 406 { 407 int i; 408 u32 etmidr; 409 u64 architecture; 410 411 for (i = 0; i < etm->num_cpu; i++) { 412 architecture = etm->metadata[i][CS_ETM_MAGIC]; 413 414 switch (architecture) { 415 case __perf_cs_etmv3_magic: 416 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; 417 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); 418 break; 419 case __perf_cs_etmv4_magic: 420 cs_etm__set_trace_param_etmv4(t_params, etm, i); 421 break; 422 default: 423 return -EINVAL; 424 } 425 } 426 427 return 0; 428 } 429 430 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 431 struct cs_etm_queue *etmq, 432 enum cs_etm_decoder_operation mode) 433 { 434 int ret = -EINVAL; 435 436 if (!(mode < CS_ETM_OPERATION_MAX)) 437 goto out; 438 439 d_params->packet_printer = cs_etm__packet_dump; 440 d_params->operation = mode; 441 d_params->data = etmq; 442 d_params->formatted = true; 443 d_params->fsyncs = false; 444 d_params->hsyncs = false; 445 d_params->frame_aligned = true; 446 447 ret = 0; 448 out: 449 return ret; 450 } 451 452 static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 453 struct auxtrace_buffer *buffer) 454 { 455 int ret; 456 const char *color = PERF_COLOR_BLUE; 457 struct cs_etm_decoder_params d_params; 458 struct cs_etm_trace_params *t_params; 459 struct cs_etm_decoder *decoder; 460 size_t buffer_used = 0; 461 462 fprintf(stdout, "\n"); 463 color_fprintf(stdout, color, 464 ". ... CoreSight ETM Trace data: size %zu bytes\n", 465 buffer->size); 466 467 /* Use metadata to fill in trace parameters for trace decoder */ 468 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 469 470 if (!t_params) 471 return; 472 473 if (cs_etm__init_trace_params(t_params, etm)) 474 goto out_free; 475 476 /* Set decoder parameters to simply print the trace packets */ 477 if (cs_etm__init_decoder_params(&d_params, NULL, 478 CS_ETM_OPERATION_PRINT)) 479 goto out_free; 480 481 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 482 483 if (!decoder) 484 goto out_free; 485 do { 486 size_t consumed; 487 488 ret = cs_etm_decoder__process_data_block( 489 decoder, buffer->offset, 490 &((u8 *)buffer->data)[buffer_used], 491 buffer->size - buffer_used, &consumed); 492 if (ret) 493 break; 494 495 buffer_used += consumed; 496 } while (buffer_used < buffer->size); 497 498 cs_etm_decoder__free(decoder); 499 500 out_free: 501 zfree(&t_params); 502 } 503 504 static int cs_etm__flush_events(struct perf_session *session, 505 struct perf_tool *tool) 506 { 507 int ret; 508 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 509 struct cs_etm_auxtrace, 510 auxtrace); 511 if (dump_trace) 512 return 0; 513 514 if (!tool->ordered_events) 515 return -EINVAL; 516 517 ret = cs_etm__update_queues(etm); 518 519 if (ret < 0) 520 return ret; 521 522 if (etm->timeless_decoding) 523 return cs_etm__process_timeless_queues(etm, -1); 524 525 return cs_etm__process_queues(etm); 526 } 527 528 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 529 { 530 int idx; 531 uintptr_t priv; 532 struct int_node *inode, *tmp; 533 struct cs_etm_traceid_queue *tidq; 534 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 535 536 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 537 priv = (uintptr_t)inode->priv; 538 idx = priv; 539 540 /* Free this traceid_queue from the array */ 541 tidq = etmq->traceid_queues[idx]; 542 thread__zput(tidq->thread); 543 zfree(&tidq->event_buf); 544 zfree(&tidq->last_branch); 545 zfree(&tidq->last_branch_rb); 546 zfree(&tidq->prev_packet); 547 zfree(&tidq->packet); 548 zfree(&tidq); 549 550 /* 551 * Function intlist__remove() removes the inode from the list 552 * and delete the memory associated to it. 553 */ 554 intlist__remove(traceid_queues_list, inode); 555 } 556 557 /* Then the RB tree itself */ 558 intlist__delete(traceid_queues_list); 559 etmq->traceid_queues_list = NULL; 560 561 /* finally free the traceid_queues array */ 562 zfree(&etmq->traceid_queues); 563 } 564 565 static void cs_etm__free_queue(void *priv) 566 { 567 struct cs_etm_queue *etmq = priv; 568 569 if (!etmq) 570 return; 571 572 cs_etm_decoder__free(etmq->decoder); 573 cs_etm__free_traceid_queues(etmq); 574 free(etmq); 575 } 576 577 static void cs_etm__free_events(struct perf_session *session) 578 { 579 unsigned int i; 580 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 581 struct cs_etm_auxtrace, 582 auxtrace); 583 struct auxtrace_queues *queues = &aux->queues; 584 585 for (i = 0; i < queues->nr_queues; i++) { 586 cs_etm__free_queue(queues->queue_array[i].priv); 587 queues->queue_array[i].priv = NULL; 588 } 589 590 auxtrace_queues__free(queues); 591 } 592 593 static void cs_etm__free(struct perf_session *session) 594 { 595 int i; 596 struct int_node *inode, *tmp; 597 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 598 struct cs_etm_auxtrace, 599 auxtrace); 600 cs_etm__free_events(session); 601 session->auxtrace = NULL; 602 603 /* First remove all traceID/metadata nodes for the RB tree */ 604 intlist__for_each_entry_safe(inode, tmp, traceid_list) 605 intlist__remove(traceid_list, inode); 606 /* Then the RB tree itself */ 607 intlist__delete(traceid_list); 608 609 for (i = 0; i < aux->num_cpu; i++) 610 zfree(&aux->metadata[i]); 611 612 thread__zput(aux->unknown_thread); 613 zfree(&aux->metadata); 614 zfree(&aux); 615 } 616 617 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) 618 { 619 struct machine *machine; 620 621 machine = etmq->etm->machine; 622 623 if (address >= etmq->etm->kernel_start) { 624 if (machine__is_host(machine)) 625 return PERF_RECORD_MISC_KERNEL; 626 else 627 return PERF_RECORD_MISC_GUEST_KERNEL; 628 } else { 629 if (machine__is_host(machine)) 630 return PERF_RECORD_MISC_USER; 631 else if (perf_guest) 632 return PERF_RECORD_MISC_GUEST_USER; 633 else 634 return PERF_RECORD_MISC_HYPERVISOR; 635 } 636 } 637 638 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 639 u64 address, size_t size, u8 *buffer) 640 { 641 u8 cpumode; 642 u64 offset; 643 int len; 644 struct thread *thread; 645 struct machine *machine; 646 struct addr_location al; 647 struct cs_etm_traceid_queue *tidq; 648 649 if (!etmq) 650 return 0; 651 652 machine = etmq->etm->machine; 653 cpumode = cs_etm__cpu_mode(etmq, address); 654 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 655 if (!tidq) 656 return 0; 657 658 thread = tidq->thread; 659 if (!thread) { 660 if (cpumode != PERF_RECORD_MISC_KERNEL) 661 return 0; 662 thread = etmq->etm->unknown_thread; 663 } 664 665 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) 666 return 0; 667 668 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 669 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) 670 return 0; 671 672 offset = al.map->map_ip(al.map, address); 673 674 map__load(al.map); 675 676 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); 677 678 if (len <= 0) 679 return 0; 680 681 return len; 682 } 683 684 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) 685 { 686 struct cs_etm_decoder_params d_params; 687 struct cs_etm_trace_params *t_params = NULL; 688 struct cs_etm_queue *etmq; 689 690 etmq = zalloc(sizeof(*etmq)); 691 if (!etmq) 692 return NULL; 693 694 etmq->traceid_queues_list = intlist__new(NULL); 695 if (!etmq->traceid_queues_list) 696 goto out_free; 697 698 /* Use metadata to fill in trace parameters for trace decoder */ 699 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 700 701 if (!t_params) 702 goto out_free; 703 704 if (cs_etm__init_trace_params(t_params, etm)) 705 goto out_free; 706 707 /* Set decoder parameters to decode trace packets */ 708 if (cs_etm__init_decoder_params(&d_params, etmq, 709 CS_ETM_OPERATION_DECODE)) 710 goto out_free; 711 712 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 713 714 if (!etmq->decoder) 715 goto out_free; 716 717 /* 718 * Register a function to handle all memory accesses required by 719 * the trace decoder library. 720 */ 721 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 722 0x0L, ((u64) -1L), 723 cs_etm__mem_access)) 724 goto out_free_decoder; 725 726 zfree(&t_params); 727 return etmq; 728 729 out_free_decoder: 730 cs_etm_decoder__free(etmq->decoder); 731 out_free: 732 intlist__delete(etmq->traceid_queues_list); 733 free(etmq); 734 735 return NULL; 736 } 737 738 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 739 struct auxtrace_queue *queue, 740 unsigned int queue_nr) 741 { 742 int ret = 0; 743 unsigned int cs_queue_nr; 744 u8 trace_chan_id; 745 u64 timestamp; 746 struct cs_etm_queue *etmq = queue->priv; 747 748 if (list_empty(&queue->head) || etmq) 749 goto out; 750 751 etmq = cs_etm__alloc_queue(etm); 752 753 if (!etmq) { 754 ret = -ENOMEM; 755 goto out; 756 } 757 758 queue->priv = etmq; 759 etmq->etm = etm; 760 etmq->queue_nr = queue_nr; 761 etmq->offset = 0; 762 763 if (etm->timeless_decoding) 764 goto out; 765 766 /* 767 * We are under a CPU-wide trace scenario. As such we need to know 768 * when the code that generated the traces started to execute so that 769 * it can be correlated with execution on other CPUs. So we get a 770 * handle on the beginning of traces and decode until we find a 771 * timestamp. The timestamp is then added to the auxtrace min heap 772 * in order to know what nibble (of all the etmqs) to decode first. 773 */ 774 while (1) { 775 /* 776 * Fetch an aux_buffer from this etmq. Bail if no more 777 * blocks or an error has been encountered. 778 */ 779 ret = cs_etm__get_data_block(etmq); 780 if (ret <= 0) 781 goto out; 782 783 /* 784 * Run decoder on the trace block. The decoder will stop when 785 * encountering a timestamp, a full packet queue or the end of 786 * trace for that block. 787 */ 788 ret = cs_etm__decode_data_block(etmq); 789 if (ret) 790 goto out; 791 792 /* 793 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 794 * the timestamp calculation for us. 795 */ 796 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 797 798 /* We found a timestamp, no need to continue. */ 799 if (timestamp) 800 break; 801 802 /* 803 * We didn't find a timestamp so empty all the traceid packet 804 * queues before looking for another timestamp packet, either 805 * in the current data block or a new one. Packets that were 806 * just decoded are useless since no timestamp has been 807 * associated with them. As such simply discard them. 808 */ 809 cs_etm__clear_all_packet_queues(etmq); 810 } 811 812 /* 813 * We have a timestamp. Add it to the min heap to reflect when 814 * instructions conveyed by the range packets of this traceID queue 815 * started to execute. Once the same has been done for all the traceID 816 * queues of each etmq, redenring and decoding can start in 817 * chronological order. 818 * 819 * Note that packets decoded above are still in the traceID's packet 820 * queue and will be processed in cs_etm__process_queues(). 821 */ 822 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan); 823 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 824 out: 825 return ret; 826 } 827 828 static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 829 { 830 unsigned int i; 831 int ret; 832 833 if (!etm->kernel_start) 834 etm->kernel_start = machine__kernel_start(etm->machine); 835 836 for (i = 0; i < etm->queues.nr_queues; i++) { 837 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 838 if (ret) 839 return ret; 840 } 841 842 return 0; 843 } 844 845 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) 846 { 847 if (etm->queues.new_data) { 848 etm->queues.new_data = false; 849 return cs_etm__setup_queues(etm); 850 } 851 852 return 0; 853 } 854 855 static inline 856 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 857 struct cs_etm_traceid_queue *tidq) 858 { 859 struct branch_stack *bs_src = tidq->last_branch_rb; 860 struct branch_stack *bs_dst = tidq->last_branch; 861 size_t nr = 0; 862 863 /* 864 * Set the number of records before early exit: ->nr is used to 865 * determine how many branches to copy from ->entries. 866 */ 867 bs_dst->nr = bs_src->nr; 868 869 /* 870 * Early exit when there is nothing to copy. 871 */ 872 if (!bs_src->nr) 873 return; 874 875 /* 876 * As bs_src->entries is a circular buffer, we need to copy from it in 877 * two steps. First, copy the branches from the most recently inserted 878 * branch ->last_branch_pos until the end of bs_src->entries buffer. 879 */ 880 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 881 memcpy(&bs_dst->entries[0], 882 &bs_src->entries[tidq->last_branch_pos], 883 sizeof(struct branch_entry) * nr); 884 885 /* 886 * If we wrapped around at least once, the branches from the beginning 887 * of the bs_src->entries buffer and until the ->last_branch_pos element 888 * are older valid branches: copy them over. The total number of 889 * branches copied over will be equal to the number of branches asked by 890 * the user in last_branch_sz. 891 */ 892 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 893 memcpy(&bs_dst->entries[nr], 894 &bs_src->entries[0], 895 sizeof(struct branch_entry) * tidq->last_branch_pos); 896 } 897 } 898 899 static inline 900 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 901 { 902 tidq->last_branch_pos = 0; 903 tidq->last_branch_rb->nr = 0; 904 } 905 906 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 907 u8 trace_chan_id, u64 addr) 908 { 909 u8 instrBytes[2]; 910 911 cs_etm__mem_access(etmq, trace_chan_id, addr, 912 ARRAY_SIZE(instrBytes), instrBytes); 913 /* 914 * T32 instruction size is indicated by bits[15:11] of the first 915 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 916 * denote a 32-bit instruction. 917 */ 918 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 919 } 920 921 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 922 { 923 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 924 if (packet->sample_type == CS_ETM_DISCONTINUITY) 925 return 0; 926 927 return packet->start_addr; 928 } 929 930 static inline 931 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 932 { 933 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 934 if (packet->sample_type == CS_ETM_DISCONTINUITY) 935 return 0; 936 937 return packet->end_addr - packet->last_instr_size; 938 } 939 940 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 941 u64 trace_chan_id, 942 const struct cs_etm_packet *packet, 943 u64 offset) 944 { 945 if (packet->isa == CS_ETM_ISA_T32) { 946 u64 addr = packet->start_addr; 947 948 while (offset > 0) { 949 addr += cs_etm__t32_instr_size(etmq, 950 trace_chan_id, addr); 951 offset--; 952 } 953 return addr; 954 } 955 956 /* Assume a 4 byte instruction size (A32/A64) */ 957 return packet->start_addr + offset * 4; 958 } 959 960 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 961 struct cs_etm_traceid_queue *tidq) 962 { 963 struct branch_stack *bs = tidq->last_branch_rb; 964 struct branch_entry *be; 965 966 /* 967 * The branches are recorded in a circular buffer in reverse 968 * chronological order: we start recording from the last element of the 969 * buffer down. After writing the first element of the stack, move the 970 * insert position back to the end of the buffer. 971 */ 972 if (!tidq->last_branch_pos) 973 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 974 975 tidq->last_branch_pos -= 1; 976 977 be = &bs->entries[tidq->last_branch_pos]; 978 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 979 be->to = cs_etm__first_executed_instr(tidq->packet); 980 /* No support for mispredict */ 981 be->flags.mispred = 0; 982 be->flags.predicted = 1; 983 984 /* 985 * Increment bs->nr until reaching the number of last branches asked by 986 * the user on the command line. 987 */ 988 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 989 bs->nr += 1; 990 } 991 992 static int cs_etm__inject_event(union perf_event *event, 993 struct perf_sample *sample, u64 type) 994 { 995 event->header.size = perf_event__sample_event_size(sample, type, 0); 996 return perf_event__synthesize_sample(event, type, 0, sample); 997 } 998 999 1000 static int 1001 cs_etm__get_trace(struct cs_etm_queue *etmq) 1002 { 1003 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1004 struct auxtrace_buffer *old_buffer = aux_buffer; 1005 struct auxtrace_queue *queue; 1006 1007 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1008 1009 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1010 1011 /* If no more data, drop the previous auxtrace_buffer and return */ 1012 if (!aux_buffer) { 1013 if (old_buffer) 1014 auxtrace_buffer__drop_data(old_buffer); 1015 etmq->buf_len = 0; 1016 return 0; 1017 } 1018 1019 etmq->buffer = aux_buffer; 1020 1021 /* If the aux_buffer doesn't have data associated, try to load it */ 1022 if (!aux_buffer->data) { 1023 /* get the file desc associated with the perf data file */ 1024 int fd = perf_data__fd(etmq->etm->session->data); 1025 1026 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1027 if (!aux_buffer->data) 1028 return -ENOMEM; 1029 } 1030 1031 /* If valid, drop the previous buffer */ 1032 if (old_buffer) 1033 auxtrace_buffer__drop_data(old_buffer); 1034 1035 etmq->buf_used = 0; 1036 etmq->buf_len = aux_buffer->size; 1037 etmq->buf = aux_buffer->data; 1038 1039 return etmq->buf_len; 1040 } 1041 1042 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 1043 struct cs_etm_traceid_queue *tidq) 1044 { 1045 if ((!tidq->thread) && (tidq->tid != -1)) 1046 tidq->thread = machine__find_thread(etm->machine, -1, 1047 tidq->tid); 1048 1049 if (tidq->thread) 1050 tidq->pid = tidq->thread->pid_; 1051 } 1052 1053 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, 1054 pid_t tid, u8 trace_chan_id) 1055 { 1056 int cpu, err = -EINVAL; 1057 struct cs_etm_auxtrace *etm = etmq->etm; 1058 struct cs_etm_traceid_queue *tidq; 1059 1060 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1061 if (!tidq) 1062 return err; 1063 1064 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 1065 return err; 1066 1067 err = machine__set_current_tid(etm->machine, cpu, tid, tid); 1068 if (err) 1069 return err; 1070 1071 tidq->tid = tid; 1072 thread__zput(tidq->thread); 1073 1074 cs_etm__set_pid_tid_cpu(etm, tidq); 1075 return 0; 1076 } 1077 1078 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1079 { 1080 return !!etmq->etm->timeless_decoding; 1081 } 1082 1083 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1084 u64 trace_chan_id, 1085 const struct cs_etm_packet *packet, 1086 struct perf_sample *sample) 1087 { 1088 /* 1089 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1090 * packet, so directly bail out with 'insn_len' = 0. 1091 */ 1092 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1093 sample->insn_len = 0; 1094 return; 1095 } 1096 1097 /* 1098 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1099 * cs_etm__t32_instr_size(). 1100 */ 1101 if (packet->isa == CS_ETM_ISA_T32) 1102 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1103 sample->ip); 1104 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1105 else 1106 sample->insn_len = 4; 1107 1108 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, 1109 sample->insn_len, (void *)sample->insn); 1110 } 1111 1112 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1113 struct cs_etm_traceid_queue *tidq, 1114 u64 addr, u64 period) 1115 { 1116 int ret = 0; 1117 struct cs_etm_auxtrace *etm = etmq->etm; 1118 union perf_event *event = tidq->event_buf; 1119 struct perf_sample sample = {.ip = 0,}; 1120 1121 event->sample.header.type = PERF_RECORD_SAMPLE; 1122 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); 1123 event->sample.header.size = sizeof(struct perf_event_header); 1124 1125 sample.ip = addr; 1126 sample.pid = tidq->pid; 1127 sample.tid = tidq->tid; 1128 sample.id = etmq->etm->instructions_id; 1129 sample.stream_id = etmq->etm->instructions_id; 1130 sample.period = period; 1131 sample.cpu = tidq->packet->cpu; 1132 sample.flags = tidq->prev_packet->flags; 1133 sample.cpumode = event->sample.header.misc; 1134 1135 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1136 1137 if (etm->synth_opts.last_branch) { 1138 cs_etm__copy_last_branch_rb(etmq, tidq); 1139 sample.branch_stack = tidq->last_branch; 1140 } 1141 1142 if (etm->synth_opts.inject) { 1143 ret = cs_etm__inject_event(event, &sample, 1144 etm->instructions_sample_type); 1145 if (ret) 1146 return ret; 1147 } 1148 1149 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1150 1151 if (ret) 1152 pr_err( 1153 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1154 ret); 1155 1156 if (etm->synth_opts.last_branch) 1157 cs_etm__reset_last_branch_rb(tidq); 1158 1159 return ret; 1160 } 1161 1162 /* 1163 * The cs etm packet encodes an instruction range between a branch target 1164 * and the next taken branch. Generate sample accordingly. 1165 */ 1166 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1167 struct cs_etm_traceid_queue *tidq) 1168 { 1169 int ret = 0; 1170 struct cs_etm_auxtrace *etm = etmq->etm; 1171 struct perf_sample sample = {.ip = 0,}; 1172 union perf_event *event = tidq->event_buf; 1173 struct dummy_branch_stack { 1174 u64 nr; 1175 struct branch_entry entries; 1176 } dummy_bs; 1177 u64 ip; 1178 1179 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1180 1181 event->sample.header.type = PERF_RECORD_SAMPLE; 1182 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 1183 event->sample.header.size = sizeof(struct perf_event_header); 1184 1185 sample.ip = ip; 1186 sample.pid = tidq->pid; 1187 sample.tid = tidq->tid; 1188 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1189 sample.id = etmq->etm->branches_id; 1190 sample.stream_id = etmq->etm->branches_id; 1191 sample.period = 1; 1192 sample.cpu = tidq->packet->cpu; 1193 sample.flags = tidq->prev_packet->flags; 1194 sample.cpumode = event->sample.header.misc; 1195 1196 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1197 &sample); 1198 1199 /* 1200 * perf report cannot handle events without a branch stack 1201 */ 1202 if (etm->synth_opts.last_branch) { 1203 dummy_bs = (struct dummy_branch_stack){ 1204 .nr = 1, 1205 .entries = { 1206 .from = sample.ip, 1207 .to = sample.addr, 1208 }, 1209 }; 1210 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1211 } 1212 1213 if (etm->synth_opts.inject) { 1214 ret = cs_etm__inject_event(event, &sample, 1215 etm->branches_sample_type); 1216 if (ret) 1217 return ret; 1218 } 1219 1220 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1221 1222 if (ret) 1223 pr_err( 1224 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1225 ret); 1226 1227 return ret; 1228 } 1229 1230 struct cs_etm_synth { 1231 struct perf_tool dummy_tool; 1232 struct perf_session *session; 1233 }; 1234 1235 static int cs_etm__event_synth(struct perf_tool *tool, 1236 union perf_event *event, 1237 struct perf_sample *sample __maybe_unused, 1238 struct machine *machine __maybe_unused) 1239 { 1240 struct cs_etm_synth *cs_etm_synth = 1241 container_of(tool, struct cs_etm_synth, dummy_tool); 1242 1243 return perf_session__deliver_synth_event(cs_etm_synth->session, 1244 event, NULL); 1245 } 1246 1247 static int cs_etm__synth_event(struct perf_session *session, 1248 struct perf_event_attr *attr, u64 id) 1249 { 1250 struct cs_etm_synth cs_etm_synth; 1251 1252 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 1253 cs_etm_synth.session = session; 1254 1255 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 1256 &id, cs_etm__event_synth); 1257 } 1258 1259 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1260 struct perf_session *session) 1261 { 1262 struct evlist *evlist = session->evlist; 1263 struct evsel *evsel; 1264 struct perf_event_attr attr; 1265 bool found = false; 1266 u64 id; 1267 int err; 1268 1269 evlist__for_each_entry(evlist, evsel) { 1270 if (evsel->core.attr.type == etm->pmu_type) { 1271 found = true; 1272 break; 1273 } 1274 } 1275 1276 if (!found) { 1277 pr_debug("No selected events with CoreSight Trace data\n"); 1278 return 0; 1279 } 1280 1281 memset(&attr, 0, sizeof(struct perf_event_attr)); 1282 attr.size = sizeof(struct perf_event_attr); 1283 attr.type = PERF_TYPE_HARDWARE; 1284 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1285 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1286 PERF_SAMPLE_PERIOD; 1287 if (etm->timeless_decoding) 1288 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1289 else 1290 attr.sample_type |= PERF_SAMPLE_TIME; 1291 1292 attr.exclude_user = evsel->core.attr.exclude_user; 1293 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1294 attr.exclude_hv = evsel->core.attr.exclude_hv; 1295 attr.exclude_host = evsel->core.attr.exclude_host; 1296 attr.exclude_guest = evsel->core.attr.exclude_guest; 1297 attr.sample_id_all = evsel->core.attr.sample_id_all; 1298 attr.read_format = evsel->core.attr.read_format; 1299 1300 /* create new id val to be a fixed offset from evsel id */ 1301 id = evsel->core.id[0] + 1000000000; 1302 1303 if (!id) 1304 id = 1; 1305 1306 if (etm->synth_opts.branches) { 1307 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1308 attr.sample_period = 1; 1309 attr.sample_type |= PERF_SAMPLE_ADDR; 1310 err = cs_etm__synth_event(session, &attr, id); 1311 if (err) 1312 return err; 1313 etm->sample_branches = true; 1314 etm->branches_sample_type = attr.sample_type; 1315 etm->branches_id = id; 1316 id += 1; 1317 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1318 } 1319 1320 if (etm->synth_opts.last_branch) 1321 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1322 1323 if (etm->synth_opts.instructions) { 1324 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1325 attr.sample_period = etm->synth_opts.period; 1326 etm->instructions_sample_period = attr.sample_period; 1327 err = cs_etm__synth_event(session, &attr, id); 1328 if (err) 1329 return err; 1330 etm->sample_instructions = true; 1331 etm->instructions_sample_type = attr.sample_type; 1332 etm->instructions_id = id; 1333 id += 1; 1334 } 1335 1336 return 0; 1337 } 1338 1339 static int cs_etm__sample(struct cs_etm_queue *etmq, 1340 struct cs_etm_traceid_queue *tidq) 1341 { 1342 struct cs_etm_auxtrace *etm = etmq->etm; 1343 struct cs_etm_packet *tmp; 1344 int ret; 1345 u8 trace_chan_id = tidq->trace_chan_id; 1346 u64 instrs_executed = tidq->packet->instr_count; 1347 1348 tidq->period_instructions += instrs_executed; 1349 1350 /* 1351 * Record a branch when the last instruction in 1352 * PREV_PACKET is a branch. 1353 */ 1354 if (etm->synth_opts.last_branch && 1355 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1356 tidq->prev_packet->last_instr_taken_branch) 1357 cs_etm__update_last_branch_rb(etmq, tidq); 1358 1359 if (etm->sample_instructions && 1360 tidq->period_instructions >= etm->instructions_sample_period) { 1361 /* 1362 * Emit instruction sample periodically 1363 * TODO: allow period to be defined in cycles and clock time 1364 */ 1365 1366 /* Get number of instructions executed after the sample point */ 1367 u64 instrs_over = tidq->period_instructions - 1368 etm->instructions_sample_period; 1369 1370 /* 1371 * Calculate the address of the sampled instruction (-1 as 1372 * sample is reported as though instruction has just been 1373 * executed, but PC has not advanced to next instruction) 1374 */ 1375 u64 offset = (instrs_executed - instrs_over - 1); 1376 u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1377 tidq->packet, offset); 1378 1379 ret = cs_etm__synth_instruction_sample( 1380 etmq, tidq, addr, etm->instructions_sample_period); 1381 if (ret) 1382 return ret; 1383 1384 /* Carry remaining instructions into next sample period */ 1385 tidq->period_instructions = instrs_over; 1386 } 1387 1388 if (etm->sample_branches) { 1389 bool generate_sample = false; 1390 1391 /* Generate sample for tracing on packet */ 1392 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1393 generate_sample = true; 1394 1395 /* Generate sample for branch taken packet */ 1396 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1397 tidq->prev_packet->last_instr_taken_branch) 1398 generate_sample = true; 1399 1400 if (generate_sample) { 1401 ret = cs_etm__synth_branch_sample(etmq, tidq); 1402 if (ret) 1403 return ret; 1404 } 1405 } 1406 1407 if (etm->sample_branches || etm->synth_opts.last_branch) { 1408 /* 1409 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1410 * the next incoming packet. 1411 */ 1412 tmp = tidq->packet; 1413 tidq->packet = tidq->prev_packet; 1414 tidq->prev_packet = tmp; 1415 } 1416 1417 return 0; 1418 } 1419 1420 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1421 { 1422 /* 1423 * When the exception packet is inserted, whether the last instruction 1424 * in previous range packet is taken branch or not, we need to force 1425 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1426 * to generate branch sample for the instruction range before the 1427 * exception is trapped to kernel or before the exception returning. 1428 * 1429 * The exception packet includes the dummy address values, so don't 1430 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1431 * for generating instruction and branch samples. 1432 */ 1433 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1434 tidq->prev_packet->last_instr_taken_branch = true; 1435 1436 return 0; 1437 } 1438 1439 static int cs_etm__flush(struct cs_etm_queue *etmq, 1440 struct cs_etm_traceid_queue *tidq) 1441 { 1442 int err = 0; 1443 struct cs_etm_auxtrace *etm = etmq->etm; 1444 struct cs_etm_packet *tmp; 1445 1446 /* Handle start tracing packet */ 1447 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1448 goto swap_packet; 1449 1450 if (etmq->etm->synth_opts.last_branch && 1451 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1452 /* 1453 * Generate a last branch event for the branches left in the 1454 * circular buffer at the end of the trace. 1455 * 1456 * Use the address of the end of the last reported execution 1457 * range 1458 */ 1459 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1460 1461 err = cs_etm__synth_instruction_sample( 1462 etmq, tidq, addr, 1463 tidq->period_instructions); 1464 if (err) 1465 return err; 1466 1467 tidq->period_instructions = 0; 1468 1469 } 1470 1471 if (etm->sample_branches && 1472 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1473 err = cs_etm__synth_branch_sample(etmq, tidq); 1474 if (err) 1475 return err; 1476 } 1477 1478 swap_packet: 1479 if (etm->sample_branches || etm->synth_opts.last_branch) { 1480 /* 1481 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1482 * the next incoming packet. 1483 */ 1484 tmp = tidq->packet; 1485 tidq->packet = tidq->prev_packet; 1486 tidq->prev_packet = tmp; 1487 } 1488 1489 return err; 1490 } 1491 1492 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1493 struct cs_etm_traceid_queue *tidq) 1494 { 1495 int err; 1496 1497 /* 1498 * It has no new packet coming and 'etmq->packet' contains the stale 1499 * packet which was set at the previous time with packets swapping; 1500 * so skip to generate branch sample to avoid stale packet. 1501 * 1502 * For this case only flush branch stack and generate a last branch 1503 * event for the branches left in the circular buffer at the end of 1504 * the trace. 1505 */ 1506 if (etmq->etm->synth_opts.last_branch && 1507 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1508 /* 1509 * Use the address of the end of the last reported execution 1510 * range. 1511 */ 1512 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1513 1514 err = cs_etm__synth_instruction_sample( 1515 etmq, tidq, addr, 1516 tidq->period_instructions); 1517 if (err) 1518 return err; 1519 1520 tidq->period_instructions = 0; 1521 } 1522 1523 return 0; 1524 } 1525 /* 1526 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1527 * if need be. 1528 * Returns: < 0 if error 1529 * = 0 if no more auxtrace_buffer to read 1530 * > 0 if the current buffer isn't empty yet 1531 */ 1532 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1533 { 1534 int ret; 1535 1536 if (!etmq->buf_len) { 1537 ret = cs_etm__get_trace(etmq); 1538 if (ret <= 0) 1539 return ret; 1540 /* 1541 * We cannot assume consecutive blocks in the data file 1542 * are contiguous, reset the decoder to force re-sync. 1543 */ 1544 ret = cs_etm_decoder__reset(etmq->decoder); 1545 if (ret) 1546 return ret; 1547 } 1548 1549 return etmq->buf_len; 1550 } 1551 1552 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 1553 struct cs_etm_packet *packet, 1554 u64 end_addr) 1555 { 1556 /* Initialise to keep compiler happy */ 1557 u16 instr16 = 0; 1558 u32 instr32 = 0; 1559 u64 addr; 1560 1561 switch (packet->isa) { 1562 case CS_ETM_ISA_T32: 1563 /* 1564 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1565 * 1566 * b'15 b'8 1567 * +-----------------+--------+ 1568 * | 1 1 0 1 1 1 1 1 | imm8 | 1569 * +-----------------+--------+ 1570 * 1571 * According to the specifiction, it only defines SVC for T32 1572 * with 16 bits instruction and has no definition for 32bits; 1573 * so below only read 2 bytes as instruction size for T32. 1574 */ 1575 addr = end_addr - 2; 1576 cs_etm__mem_access(etmq, trace_chan_id, addr, 1577 sizeof(instr16), (u8 *)&instr16); 1578 if ((instr16 & 0xFF00) == 0xDF00) 1579 return true; 1580 1581 break; 1582 case CS_ETM_ISA_A32: 1583 /* 1584 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 1585 * 1586 * b'31 b'28 b'27 b'24 1587 * +---------+---------+-------------------------+ 1588 * | !1111 | 1 1 1 1 | imm24 | 1589 * +---------+---------+-------------------------+ 1590 */ 1591 addr = end_addr - 4; 1592 cs_etm__mem_access(etmq, trace_chan_id, addr, 1593 sizeof(instr32), (u8 *)&instr32); 1594 if ((instr32 & 0x0F000000) == 0x0F000000 && 1595 (instr32 & 0xF0000000) != 0xF0000000) 1596 return true; 1597 1598 break; 1599 case CS_ETM_ISA_A64: 1600 /* 1601 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 1602 * 1603 * b'31 b'21 b'4 b'0 1604 * +-----------------------+---------+-----------+ 1605 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 1606 * +-----------------------+---------+-----------+ 1607 */ 1608 addr = end_addr - 4; 1609 cs_etm__mem_access(etmq, trace_chan_id, addr, 1610 sizeof(instr32), (u8 *)&instr32); 1611 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1612 return true; 1613 1614 break; 1615 case CS_ETM_ISA_UNKNOWN: 1616 default: 1617 break; 1618 } 1619 1620 return false; 1621 } 1622 1623 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 1624 struct cs_etm_traceid_queue *tidq, u64 magic) 1625 { 1626 u8 trace_chan_id = tidq->trace_chan_id; 1627 struct cs_etm_packet *packet = tidq->packet; 1628 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1629 1630 if (magic == __perf_cs_etmv3_magic) 1631 if (packet->exception_number == CS_ETMV3_EXC_SVC) 1632 return true; 1633 1634 /* 1635 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 1636 * HVC cases; need to check if it's SVC instruction based on 1637 * packet address. 1638 */ 1639 if (magic == __perf_cs_etmv4_magic) { 1640 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1641 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1642 prev_packet->end_addr)) 1643 return true; 1644 } 1645 1646 return false; 1647 } 1648 1649 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 1650 u64 magic) 1651 { 1652 struct cs_etm_packet *packet = tidq->packet; 1653 1654 if (magic == __perf_cs_etmv3_magic) 1655 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1656 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 1657 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 1658 packet->exception_number == CS_ETMV3_EXC_IRQ || 1659 packet->exception_number == CS_ETMV3_EXC_FIQ) 1660 return true; 1661 1662 if (magic == __perf_cs_etmv4_magic) 1663 if (packet->exception_number == CS_ETMV4_EXC_RESET || 1664 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 1665 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 1666 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 1667 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 1668 packet->exception_number == CS_ETMV4_EXC_IRQ || 1669 packet->exception_number == CS_ETMV4_EXC_FIQ) 1670 return true; 1671 1672 return false; 1673 } 1674 1675 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 1676 struct cs_etm_traceid_queue *tidq, 1677 u64 magic) 1678 { 1679 u8 trace_chan_id = tidq->trace_chan_id; 1680 struct cs_etm_packet *packet = tidq->packet; 1681 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1682 1683 if (magic == __perf_cs_etmv3_magic) 1684 if (packet->exception_number == CS_ETMV3_EXC_SMC || 1685 packet->exception_number == CS_ETMV3_EXC_HYP || 1686 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 1687 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 1688 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 1689 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 1690 packet->exception_number == CS_ETMV3_EXC_GENERIC) 1691 return true; 1692 1693 if (magic == __perf_cs_etmv4_magic) { 1694 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 1695 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 1696 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 1697 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 1698 return true; 1699 1700 /* 1701 * For CS_ETMV4_EXC_CALL, except SVC other instructions 1702 * (SMC, HVC) are taken as sync exceptions. 1703 */ 1704 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1705 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1706 prev_packet->end_addr)) 1707 return true; 1708 1709 /* 1710 * ETMv4 has 5 bits for exception number; if the numbers 1711 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 1712 * they are implementation defined exceptions. 1713 * 1714 * For this case, simply take it as sync exception. 1715 */ 1716 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 1717 packet->exception_number <= CS_ETMV4_EXC_END) 1718 return true; 1719 } 1720 1721 return false; 1722 } 1723 1724 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 1725 struct cs_etm_traceid_queue *tidq) 1726 { 1727 struct cs_etm_packet *packet = tidq->packet; 1728 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1729 u8 trace_chan_id = tidq->trace_chan_id; 1730 u64 magic; 1731 int ret; 1732 1733 switch (packet->sample_type) { 1734 case CS_ETM_RANGE: 1735 /* 1736 * Immediate branch instruction without neither link nor 1737 * return flag, it's normal branch instruction within 1738 * the function. 1739 */ 1740 if (packet->last_instr_type == OCSD_INSTR_BR && 1741 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 1742 packet->flags = PERF_IP_FLAG_BRANCH; 1743 1744 if (packet->last_instr_cond) 1745 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 1746 } 1747 1748 /* 1749 * Immediate branch instruction with link (e.g. BL), this is 1750 * branch instruction for function call. 1751 */ 1752 if (packet->last_instr_type == OCSD_INSTR_BR && 1753 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1754 packet->flags = PERF_IP_FLAG_BRANCH | 1755 PERF_IP_FLAG_CALL; 1756 1757 /* 1758 * Indirect branch instruction with link (e.g. BLR), this is 1759 * branch instruction for function call. 1760 */ 1761 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1762 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1763 packet->flags = PERF_IP_FLAG_BRANCH | 1764 PERF_IP_FLAG_CALL; 1765 1766 /* 1767 * Indirect branch instruction with subtype of 1768 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 1769 * function return for A32/T32. 1770 */ 1771 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1772 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 1773 packet->flags = PERF_IP_FLAG_BRANCH | 1774 PERF_IP_FLAG_RETURN; 1775 1776 /* 1777 * Indirect branch instruction without link (e.g. BR), usually 1778 * this is used for function return, especially for functions 1779 * within dynamic link lib. 1780 */ 1781 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1782 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 1783 packet->flags = PERF_IP_FLAG_BRANCH | 1784 PERF_IP_FLAG_RETURN; 1785 1786 /* Return instruction for function return. */ 1787 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1788 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 1789 packet->flags = PERF_IP_FLAG_BRANCH | 1790 PERF_IP_FLAG_RETURN; 1791 1792 /* 1793 * Decoder might insert a discontinuity in the middle of 1794 * instruction packets, fixup prev_packet with flag 1795 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 1796 */ 1797 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1798 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1799 PERF_IP_FLAG_TRACE_BEGIN; 1800 1801 /* 1802 * If the previous packet is an exception return packet 1803 * and the return address just follows SVC instuction, 1804 * it needs to calibrate the previous packet sample flags 1805 * as PERF_IP_FLAG_SYSCALLRET. 1806 */ 1807 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1808 PERF_IP_FLAG_RETURN | 1809 PERF_IP_FLAG_INTERRUPT) && 1810 cs_etm__is_svc_instr(etmq, trace_chan_id, 1811 packet, packet->start_addr)) 1812 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1813 PERF_IP_FLAG_RETURN | 1814 PERF_IP_FLAG_SYSCALLRET; 1815 break; 1816 case CS_ETM_DISCONTINUITY: 1817 /* 1818 * The trace is discontinuous, if the previous packet is 1819 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 1820 * for previous packet. 1821 */ 1822 if (prev_packet->sample_type == CS_ETM_RANGE) 1823 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1824 PERF_IP_FLAG_TRACE_END; 1825 break; 1826 case CS_ETM_EXCEPTION: 1827 ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 1828 if (ret) 1829 return ret; 1830 1831 /* The exception is for system call. */ 1832 if (cs_etm__is_syscall(etmq, tidq, magic)) 1833 packet->flags = PERF_IP_FLAG_BRANCH | 1834 PERF_IP_FLAG_CALL | 1835 PERF_IP_FLAG_SYSCALLRET; 1836 /* 1837 * The exceptions are triggered by external signals from bus, 1838 * interrupt controller, debug module, PE reset or halt. 1839 */ 1840 else if (cs_etm__is_async_exception(tidq, magic)) 1841 packet->flags = PERF_IP_FLAG_BRANCH | 1842 PERF_IP_FLAG_CALL | 1843 PERF_IP_FLAG_ASYNC | 1844 PERF_IP_FLAG_INTERRUPT; 1845 /* 1846 * Otherwise, exception is caused by trap, instruction & 1847 * data fault, or alignment errors. 1848 */ 1849 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 1850 packet->flags = PERF_IP_FLAG_BRANCH | 1851 PERF_IP_FLAG_CALL | 1852 PERF_IP_FLAG_INTERRUPT; 1853 1854 /* 1855 * When the exception packet is inserted, since exception 1856 * packet is not used standalone for generating samples 1857 * and it's affiliation to the previous instruction range 1858 * packet; so set previous range packet flags to tell perf 1859 * it is an exception taken branch. 1860 */ 1861 if (prev_packet->sample_type == CS_ETM_RANGE) 1862 prev_packet->flags = packet->flags; 1863 break; 1864 case CS_ETM_EXCEPTION_RET: 1865 /* 1866 * When the exception return packet is inserted, since 1867 * exception return packet is not used standalone for 1868 * generating samples and it's affiliation to the previous 1869 * instruction range packet; so set previous range packet 1870 * flags to tell perf it is an exception return branch. 1871 * 1872 * The exception return can be for either system call or 1873 * other exception types; unfortunately the packet doesn't 1874 * contain exception type related info so we cannot decide 1875 * the exception type purely based on exception return packet. 1876 * If we record the exception number from exception packet and 1877 * reuse it for excpetion return packet, this is not reliable 1878 * due the trace can be discontinuity or the interrupt can 1879 * be nested, thus the recorded exception number cannot be 1880 * used for exception return packet for these two cases. 1881 * 1882 * For exception return packet, we only need to distinguish the 1883 * packet is for system call or for other types. Thus the 1884 * decision can be deferred when receive the next packet which 1885 * contains the return address, based on the return address we 1886 * can read out the previous instruction and check if it's a 1887 * system call instruction and then calibrate the sample flag 1888 * as needed. 1889 */ 1890 if (prev_packet->sample_type == CS_ETM_RANGE) 1891 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1892 PERF_IP_FLAG_RETURN | 1893 PERF_IP_FLAG_INTERRUPT; 1894 break; 1895 case CS_ETM_EMPTY: 1896 default: 1897 break; 1898 } 1899 1900 return 0; 1901 } 1902 1903 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 1904 { 1905 int ret = 0; 1906 size_t processed = 0; 1907 1908 /* 1909 * Packets are decoded and added to the decoder's packet queue 1910 * until the decoder packet processing callback has requested that 1911 * processing stops or there is nothing left in the buffer. Normal 1912 * operations that stop processing are a timestamp packet or a full 1913 * decoder buffer queue. 1914 */ 1915 ret = cs_etm_decoder__process_data_block(etmq->decoder, 1916 etmq->offset, 1917 &etmq->buf[etmq->buf_used], 1918 etmq->buf_len, 1919 &processed); 1920 if (ret) 1921 goto out; 1922 1923 etmq->offset += processed; 1924 etmq->buf_used += processed; 1925 etmq->buf_len -= processed; 1926 1927 out: 1928 return ret; 1929 } 1930 1931 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 1932 struct cs_etm_traceid_queue *tidq) 1933 { 1934 int ret; 1935 struct cs_etm_packet_queue *packet_queue; 1936 1937 packet_queue = &tidq->packet_queue; 1938 1939 /* Process each packet in this chunk */ 1940 while (1) { 1941 ret = cs_etm_decoder__get_packet(packet_queue, 1942 tidq->packet); 1943 if (ret <= 0) 1944 /* 1945 * Stop processing this chunk on 1946 * end of data or error 1947 */ 1948 break; 1949 1950 /* 1951 * Since packet addresses are swapped in packet 1952 * handling within below switch() statements, 1953 * thus setting sample flags must be called 1954 * prior to switch() statement to use address 1955 * information before packets swapping. 1956 */ 1957 ret = cs_etm__set_sample_flags(etmq, tidq); 1958 if (ret < 0) 1959 break; 1960 1961 switch (tidq->packet->sample_type) { 1962 case CS_ETM_RANGE: 1963 /* 1964 * If the packet contains an instruction 1965 * range, generate instruction sequence 1966 * events. 1967 */ 1968 cs_etm__sample(etmq, tidq); 1969 break; 1970 case CS_ETM_EXCEPTION: 1971 case CS_ETM_EXCEPTION_RET: 1972 /* 1973 * If the exception packet is coming, 1974 * make sure the previous instruction 1975 * range packet to be handled properly. 1976 */ 1977 cs_etm__exception(tidq); 1978 break; 1979 case CS_ETM_DISCONTINUITY: 1980 /* 1981 * Discontinuity in trace, flush 1982 * previous branch stack 1983 */ 1984 cs_etm__flush(etmq, tidq); 1985 break; 1986 case CS_ETM_EMPTY: 1987 /* 1988 * Should not receive empty packet, 1989 * report error. 1990 */ 1991 pr_err("CS ETM Trace: empty packet\n"); 1992 return -EINVAL; 1993 default: 1994 break; 1995 } 1996 } 1997 1998 return ret; 1999 } 2000 2001 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2002 { 2003 int idx; 2004 struct int_node *inode; 2005 struct cs_etm_traceid_queue *tidq; 2006 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2007 2008 intlist__for_each_entry(inode, traceid_queues_list) { 2009 idx = (int)(intptr_t)inode->priv; 2010 tidq = etmq->traceid_queues[idx]; 2011 2012 /* Ignore return value */ 2013 cs_etm__process_traceid_queue(etmq, tidq); 2014 2015 /* 2016 * Generate an instruction sample with the remaining 2017 * branchstack entries. 2018 */ 2019 cs_etm__flush(etmq, tidq); 2020 } 2021 } 2022 2023 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 2024 { 2025 int err = 0; 2026 struct cs_etm_traceid_queue *tidq; 2027 2028 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2029 if (!tidq) 2030 return -EINVAL; 2031 2032 /* Go through each buffer in the queue and decode them one by one */ 2033 while (1) { 2034 err = cs_etm__get_data_block(etmq); 2035 if (err <= 0) 2036 return err; 2037 2038 /* Run trace decoder until buffer consumed or end of trace */ 2039 do { 2040 err = cs_etm__decode_data_block(etmq); 2041 if (err) 2042 return err; 2043 2044 /* 2045 * Process each packet in this chunk, nothing to do if 2046 * an error occurs other than hoping the next one will 2047 * be better. 2048 */ 2049 err = cs_etm__process_traceid_queue(etmq, tidq); 2050 2051 } while (etmq->buf_len); 2052 2053 if (err == 0) 2054 /* Flush any remaining branch stack entries */ 2055 err = cs_etm__end_block(etmq, tidq); 2056 } 2057 2058 return err; 2059 } 2060 2061 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2062 pid_t tid) 2063 { 2064 unsigned int i; 2065 struct auxtrace_queues *queues = &etm->queues; 2066 2067 for (i = 0; i < queues->nr_queues; i++) { 2068 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2069 struct cs_etm_queue *etmq = queue->priv; 2070 struct cs_etm_traceid_queue *tidq; 2071 2072 if (!etmq) 2073 continue; 2074 2075 tidq = cs_etm__etmq_get_traceid_queue(etmq, 2076 CS_ETM_PER_THREAD_TRACEID); 2077 2078 if (!tidq) 2079 continue; 2080 2081 if ((tid == -1) || (tidq->tid == tid)) { 2082 cs_etm__set_pid_tid_cpu(etm, tidq); 2083 cs_etm__run_decoder(etmq); 2084 } 2085 } 2086 2087 return 0; 2088 } 2089 2090 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) 2091 { 2092 int ret = 0; 2093 unsigned int cs_queue_nr, queue_nr; 2094 u8 trace_chan_id; 2095 u64 timestamp; 2096 struct auxtrace_queue *queue; 2097 struct cs_etm_queue *etmq; 2098 struct cs_etm_traceid_queue *tidq; 2099 2100 while (1) { 2101 if (!etm->heap.heap_cnt) 2102 goto out; 2103 2104 /* Take the entry at the top of the min heap */ 2105 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2106 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2107 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2108 queue = &etm->queues.queue_array[queue_nr]; 2109 etmq = queue->priv; 2110 2111 /* 2112 * Remove the top entry from the heap since we are about 2113 * to process it. 2114 */ 2115 auxtrace_heap__pop(&etm->heap); 2116 2117 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2118 if (!tidq) { 2119 /* 2120 * No traceID queue has been allocated for this traceID, 2121 * which means something somewhere went very wrong. No 2122 * other choice than simply exit. 2123 */ 2124 ret = -EINVAL; 2125 goto out; 2126 } 2127 2128 /* 2129 * Packets associated with this timestamp are already in 2130 * the etmq's traceID queue, so process them. 2131 */ 2132 ret = cs_etm__process_traceid_queue(etmq, tidq); 2133 if (ret < 0) 2134 goto out; 2135 2136 /* 2137 * Packets for this timestamp have been processed, time to 2138 * move on to the next timestamp, fetching a new auxtrace_buffer 2139 * if need be. 2140 */ 2141 refetch: 2142 ret = cs_etm__get_data_block(etmq); 2143 if (ret < 0) 2144 goto out; 2145 2146 /* 2147 * No more auxtrace_buffers to process in this etmq, simply 2148 * move on to another entry in the auxtrace_heap. 2149 */ 2150 if (!ret) 2151 continue; 2152 2153 ret = cs_etm__decode_data_block(etmq); 2154 if (ret) 2155 goto out; 2156 2157 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2158 2159 if (!timestamp) { 2160 /* 2161 * Function cs_etm__decode_data_block() returns when 2162 * there is no more traces to decode in the current 2163 * auxtrace_buffer OR when a timestamp has been 2164 * encountered on any of the traceID queues. Since we 2165 * did not get a timestamp, there is no more traces to 2166 * process in this auxtrace_buffer. As such empty and 2167 * flush all traceID queues. 2168 */ 2169 cs_etm__clear_all_traceid_queues(etmq); 2170 2171 /* Fetch another auxtrace_buffer for this etmq */ 2172 goto refetch; 2173 } 2174 2175 /* 2176 * Add to the min heap the timestamp for packets that have 2177 * just been decoded. They will be processed and synthesized 2178 * during the next call to cs_etm__process_traceid_queue() for 2179 * this queue/traceID. 2180 */ 2181 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2182 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 2183 } 2184 2185 out: 2186 return ret; 2187 } 2188 2189 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2190 union perf_event *event) 2191 { 2192 struct thread *th; 2193 2194 if (etm->timeless_decoding) 2195 return 0; 2196 2197 /* 2198 * Add the tid/pid to the log so that we can get a match when 2199 * we get a contextID from the decoder. 2200 */ 2201 th = machine__findnew_thread(etm->machine, 2202 event->itrace_start.pid, 2203 event->itrace_start.tid); 2204 if (!th) 2205 return -ENOMEM; 2206 2207 thread__put(th); 2208 2209 return 0; 2210 } 2211 2212 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2213 union perf_event *event) 2214 { 2215 struct thread *th; 2216 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2217 2218 /* 2219 * Context switch in per-thread mode are irrelevant since perf 2220 * will start/stop tracing as the process is scheduled. 2221 */ 2222 if (etm->timeless_decoding) 2223 return 0; 2224 2225 /* 2226 * SWITCH_IN events carry the next process to be switched out while 2227 * SWITCH_OUT events carry the process to be switched in. As such 2228 * we don't care about IN events. 2229 */ 2230 if (!out) 2231 return 0; 2232 2233 /* 2234 * Add the tid/pid to the log so that we can get a match when 2235 * we get a contextID from the decoder. 2236 */ 2237 th = machine__findnew_thread(etm->machine, 2238 event->context_switch.next_prev_pid, 2239 event->context_switch.next_prev_tid); 2240 if (!th) 2241 return -ENOMEM; 2242 2243 thread__put(th); 2244 2245 return 0; 2246 } 2247 2248 static int cs_etm__process_event(struct perf_session *session, 2249 union perf_event *event, 2250 struct perf_sample *sample, 2251 struct perf_tool *tool) 2252 { 2253 int err = 0; 2254 u64 timestamp; 2255 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2256 struct cs_etm_auxtrace, 2257 auxtrace); 2258 2259 if (dump_trace) 2260 return 0; 2261 2262 if (!tool->ordered_events) { 2263 pr_err("CoreSight ETM Trace requires ordered events\n"); 2264 return -EINVAL; 2265 } 2266 2267 if (sample->time && (sample->time != (u64) -1)) 2268 timestamp = sample->time; 2269 else 2270 timestamp = 0; 2271 2272 if (timestamp || etm->timeless_decoding) { 2273 err = cs_etm__update_queues(etm); 2274 if (err) 2275 return err; 2276 } 2277 2278 if (etm->timeless_decoding && 2279 event->header.type == PERF_RECORD_EXIT) 2280 return cs_etm__process_timeless_queues(etm, 2281 event->fork.tid); 2282 2283 if (event->header.type == PERF_RECORD_ITRACE_START) 2284 return cs_etm__process_itrace_start(etm, event); 2285 else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 2286 return cs_etm__process_switch_cpu_wide(etm, event); 2287 2288 if (!etm->timeless_decoding && 2289 event->header.type == PERF_RECORD_AUX) 2290 return cs_etm__process_queues(etm); 2291 2292 return 0; 2293 } 2294 2295 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2296 union perf_event *event, 2297 struct perf_tool *tool __maybe_unused) 2298 { 2299 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2300 struct cs_etm_auxtrace, 2301 auxtrace); 2302 if (!etm->data_queued) { 2303 struct auxtrace_buffer *buffer; 2304 off_t data_offset; 2305 int fd = perf_data__fd(session->data); 2306 bool is_pipe = perf_data__is_pipe(session->data); 2307 int err; 2308 2309 if (is_pipe) 2310 data_offset = 0; 2311 else { 2312 data_offset = lseek(fd, 0, SEEK_CUR); 2313 if (data_offset == -1) 2314 return -errno; 2315 } 2316 2317 err = auxtrace_queues__add_event(&etm->queues, session, 2318 event, data_offset, &buffer); 2319 if (err) 2320 return err; 2321 2322 if (dump_trace) 2323 if (auxtrace_buffer__get_data(buffer, fd)) { 2324 cs_etm__dump_event(etm, buffer); 2325 auxtrace_buffer__put_data(buffer); 2326 } 2327 } 2328 2329 return 0; 2330 } 2331 2332 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) 2333 { 2334 struct evsel *evsel; 2335 struct evlist *evlist = etm->session->evlist; 2336 bool timeless_decoding = true; 2337 2338 /* 2339 * Circle through the list of event and complain if we find one 2340 * with the time bit set. 2341 */ 2342 evlist__for_each_entry(evlist, evsel) { 2343 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 2344 timeless_decoding = false; 2345 } 2346 2347 return timeless_decoding; 2348 } 2349 2350 static const char * const cs_etm_global_header_fmts[] = { 2351 [CS_HEADER_VERSION_0] = " Header version %llx\n", 2352 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 2353 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 2354 }; 2355 2356 static const char * const cs_etm_priv_fmts[] = { 2357 [CS_ETM_MAGIC] = " Magic number %llx\n", 2358 [CS_ETM_CPU] = " CPU %lld\n", 2359 [CS_ETM_ETMCR] = " ETMCR %llx\n", 2360 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 2361 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", 2362 [CS_ETM_ETMIDR] = " ETMIDR %llx\n", 2363 }; 2364 2365 static const char * const cs_etmv4_priv_fmts[] = { 2366 [CS_ETM_MAGIC] = " Magic number %llx\n", 2367 [CS_ETM_CPU] = " CPU %lld\n", 2368 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 2369 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 2370 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", 2371 [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", 2372 [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", 2373 [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", 2374 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 2375 }; 2376 2377 static void cs_etm__print_auxtrace_info(__u64 *val, int num) 2378 { 2379 int i, j, cpu = 0; 2380 2381 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2382 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 2383 2384 for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 2385 if (val[i] == __perf_cs_etmv3_magic) 2386 for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 2387 fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 2388 else if (val[i] == __perf_cs_etmv4_magic) 2389 for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 2390 fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 2391 else 2392 /* failure.. return */ 2393 return; 2394 } 2395 } 2396 2397 int cs_etm__process_auxtrace_info(union perf_event *event, 2398 struct perf_session *session) 2399 { 2400 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 2401 struct cs_etm_auxtrace *etm = NULL; 2402 struct int_node *inode; 2403 unsigned int pmu_type; 2404 int event_header_size = sizeof(struct perf_event_header); 2405 int info_header_size; 2406 int total_size = auxtrace_info->header.size; 2407 int priv_size = 0; 2408 int num_cpu; 2409 int err = 0, idx = -1; 2410 int i, j, k; 2411 u64 *ptr, *hdr = NULL; 2412 u64 **metadata = NULL; 2413 2414 /* 2415 * sizeof(auxtrace_info_event::type) + 2416 * sizeof(auxtrace_info_event::reserved) == 8 2417 */ 2418 info_header_size = 8; 2419 2420 if (total_size < (event_header_size + info_header_size)) 2421 return -EINVAL; 2422 2423 priv_size = total_size - event_header_size - info_header_size; 2424 2425 /* First the global part */ 2426 ptr = (u64 *) auxtrace_info->priv; 2427 2428 /* Look for version '0' of the header */ 2429 if (ptr[0] != 0) 2430 return -EINVAL; 2431 2432 hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 2433 if (!hdr) 2434 return -ENOMEM; 2435 2436 /* Extract header information - see cs-etm.h for format */ 2437 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2438 hdr[i] = ptr[i]; 2439 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 2440 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 2441 0xffffffff); 2442 2443 /* 2444 * Create an RB tree for traceID-metadata tuple. Since the conversion 2445 * has to be made for each packet that gets decoded, optimizing access 2446 * in anything other than a sequential array is worth doing. 2447 */ 2448 traceid_list = intlist__new(NULL); 2449 if (!traceid_list) { 2450 err = -ENOMEM; 2451 goto err_free_hdr; 2452 } 2453 2454 metadata = zalloc(sizeof(*metadata) * num_cpu); 2455 if (!metadata) { 2456 err = -ENOMEM; 2457 goto err_free_traceid_list; 2458 } 2459 2460 /* 2461 * The metadata is stored in the auxtrace_info section and encodes 2462 * the configuration of the ARM embedded trace macrocell which is 2463 * required by the trace decoder to properly decode the trace due 2464 * to its highly compressed nature. 2465 */ 2466 for (j = 0; j < num_cpu; j++) { 2467 if (ptr[i] == __perf_cs_etmv3_magic) { 2468 metadata[j] = zalloc(sizeof(*metadata[j]) * 2469 CS_ETM_PRIV_MAX); 2470 if (!metadata[j]) { 2471 err = -ENOMEM; 2472 goto err_free_metadata; 2473 } 2474 for (k = 0; k < CS_ETM_PRIV_MAX; k++) 2475 metadata[j][k] = ptr[i + k]; 2476 2477 /* The traceID is our handle */ 2478 idx = metadata[j][CS_ETM_ETMTRACEIDR]; 2479 i += CS_ETM_PRIV_MAX; 2480 } else if (ptr[i] == __perf_cs_etmv4_magic) { 2481 metadata[j] = zalloc(sizeof(*metadata[j]) * 2482 CS_ETMV4_PRIV_MAX); 2483 if (!metadata[j]) { 2484 err = -ENOMEM; 2485 goto err_free_metadata; 2486 } 2487 for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 2488 metadata[j][k] = ptr[i + k]; 2489 2490 /* The traceID is our handle */ 2491 idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 2492 i += CS_ETMV4_PRIV_MAX; 2493 } 2494 2495 /* Get an RB node for this CPU */ 2496 inode = intlist__findnew(traceid_list, idx); 2497 2498 /* Something went wrong, no need to continue */ 2499 if (!inode) { 2500 err = -ENOMEM; 2501 goto err_free_metadata; 2502 } 2503 2504 /* 2505 * The node for that CPU should not be taken. 2506 * Back out if that's the case. 2507 */ 2508 if (inode->priv) { 2509 err = -EINVAL; 2510 goto err_free_metadata; 2511 } 2512 /* All good, associate the traceID with the metadata pointer */ 2513 inode->priv = metadata[j]; 2514 } 2515 2516 /* 2517 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 2518 * CS_ETMV4_PRIV_MAX mark how many double words are in the 2519 * global metadata, and each cpu's metadata respectively. 2520 * The following tests if the correct number of double words was 2521 * present in the auxtrace info section. 2522 */ 2523 if (i * 8 != priv_size) { 2524 err = -EINVAL; 2525 goto err_free_metadata; 2526 } 2527 2528 etm = zalloc(sizeof(*etm)); 2529 2530 if (!etm) { 2531 err = -ENOMEM; 2532 goto err_free_metadata; 2533 } 2534 2535 err = auxtrace_queues__init(&etm->queues); 2536 if (err) 2537 goto err_free_etm; 2538 2539 etm->session = session; 2540 etm->machine = &session->machines.host; 2541 2542 etm->num_cpu = num_cpu; 2543 etm->pmu_type = pmu_type; 2544 etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); 2545 etm->metadata = metadata; 2546 etm->auxtrace_type = auxtrace_info->type; 2547 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); 2548 2549 etm->auxtrace.process_event = cs_etm__process_event; 2550 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 2551 etm->auxtrace.flush_events = cs_etm__flush_events; 2552 etm->auxtrace.free_events = cs_etm__free_events; 2553 etm->auxtrace.free = cs_etm__free; 2554 session->auxtrace = &etm->auxtrace; 2555 2556 etm->unknown_thread = thread__new(999999999, 999999999); 2557 if (!etm->unknown_thread) { 2558 err = -ENOMEM; 2559 goto err_free_queues; 2560 } 2561 2562 /* 2563 * Initialize list node so that at thread__zput() we can avoid 2564 * segmentation fault at list_del_init(). 2565 */ 2566 INIT_LIST_HEAD(&etm->unknown_thread->node); 2567 2568 err = thread__set_comm(etm->unknown_thread, "unknown", 0); 2569 if (err) 2570 goto err_delete_thread; 2571 2572 if (thread__init_map_groups(etm->unknown_thread, etm->machine)) { 2573 err = -ENOMEM; 2574 goto err_delete_thread; 2575 } 2576 2577 if (dump_trace) { 2578 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 2579 return 0; 2580 } 2581 2582 if (session->itrace_synth_opts->set) { 2583 etm->synth_opts = *session->itrace_synth_opts; 2584 } else { 2585 itrace_synth_opts__set_default(&etm->synth_opts, 2586 session->itrace_synth_opts->default_no_sample); 2587 etm->synth_opts.callchain = false; 2588 } 2589 2590 err = cs_etm__synth_events(etm, session); 2591 if (err) 2592 goto err_delete_thread; 2593 2594 err = auxtrace_queues__process_index(&etm->queues, session); 2595 if (err) 2596 goto err_delete_thread; 2597 2598 etm->data_queued = etm->queues.populated; 2599 2600 return 0; 2601 2602 err_delete_thread: 2603 thread__zput(etm->unknown_thread); 2604 err_free_queues: 2605 auxtrace_queues__free(&etm->queues); 2606 session->auxtrace = NULL; 2607 err_free_etm: 2608 zfree(&etm); 2609 err_free_metadata: 2610 /* No need to check @metadata[j], free(NULL) is supported */ 2611 for (j = 0; j < num_cpu; j++) 2612 zfree(&metadata[j]); 2613 zfree(&metadata); 2614 err_free_traceid_list: 2615 intlist__delete(traceid_list); 2616 err_free_hdr: 2617 zfree(&hdr); 2618 2619 return err; 2620 } 2621