1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/err.h> 11 #include <linux/kernel.h> 12 #include <linux/log2.h> 13 #include <linux/types.h> 14 #include <linux/zalloc.h> 15 16 #include <opencsd/ocsd_if_types.h> 17 #include <stdlib.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "cs-etm.h" 22 #include "cs-etm-decoder/cs-etm-decoder.h" 23 #include "debug.h" 24 #include "evlist.h" 25 #include "intlist.h" 26 #include "machine.h" 27 #include "map.h" 28 #include "perf.h" 29 #include "symbol.h" 30 #include "thread.h" 31 #include "thread_map.h" 32 #include "thread-stack.h" 33 #include <tools/libc_compat.h> 34 #include "util.h" 35 36 #define MAX_TIMESTAMP (~0ULL) 37 38 struct cs_etm_auxtrace { 39 struct auxtrace auxtrace; 40 struct auxtrace_queues queues; 41 struct auxtrace_heap heap; 42 struct itrace_synth_opts synth_opts; 43 struct perf_session *session; 44 struct machine *machine; 45 struct thread *unknown_thread; 46 47 u8 timeless_decoding; 48 u8 snapshot_mode; 49 u8 data_queued; 50 u8 sample_branches; 51 u8 sample_instructions; 52 53 int num_cpu; 54 u32 auxtrace_type; 55 u64 branches_sample_type; 56 u64 branches_id; 57 u64 instructions_sample_type; 58 u64 instructions_sample_period; 59 u64 instructions_id; 60 u64 **metadata; 61 u64 kernel_start; 62 unsigned int pmu_type; 63 }; 64 65 struct cs_etm_traceid_queue { 66 u8 trace_chan_id; 67 pid_t pid, tid; 68 u64 period_instructions; 69 size_t last_branch_pos; 70 union perf_event *event_buf; 71 struct thread *thread; 72 struct branch_stack *last_branch; 73 struct branch_stack *last_branch_rb; 74 struct cs_etm_packet *prev_packet; 75 struct cs_etm_packet *packet; 76 struct cs_etm_packet_queue packet_queue; 77 }; 78 79 struct cs_etm_queue { 80 struct cs_etm_auxtrace *etm; 81 struct cs_etm_decoder *decoder; 82 struct auxtrace_buffer *buffer; 83 unsigned int queue_nr; 84 u8 pending_timestamp; 85 u64 offset; 86 const unsigned char *buf; 87 size_t buf_len, buf_used; 88 /* Conversion between traceID and index in traceid_queues array */ 89 struct intlist *traceid_queues_list; 90 struct cs_etm_traceid_queue **traceid_queues; 91 }; 92 93 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 94 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); 95 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 96 pid_t tid); 97 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 98 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 99 100 /* PTMs ETMIDR [11:8] set to b0011 */ 101 #define ETMIDR_PTM_VERSION 0x00000300 102 103 /* 104 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 105 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 106 * encode the etm queue number as the upper 16 bit and the channel as 107 * the lower 16 bit. 108 */ 109 #define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \ 110 (queue_nr << 16 | trace_chan_id) 111 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 112 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 113 114 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 115 { 116 etmidr &= ETMIDR_PTM_VERSION; 117 118 if (etmidr == ETMIDR_PTM_VERSION) 119 return CS_ETM_PROTO_PTM; 120 121 return CS_ETM_PROTO_ETMV3; 122 } 123 124 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 125 { 126 struct int_node *inode; 127 u64 *metadata; 128 129 inode = intlist__find(traceid_list, trace_chan_id); 130 if (!inode) 131 return -EINVAL; 132 133 metadata = inode->priv; 134 *magic = metadata[CS_ETM_MAGIC]; 135 return 0; 136 } 137 138 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 139 { 140 struct int_node *inode; 141 u64 *metadata; 142 143 inode = intlist__find(traceid_list, trace_chan_id); 144 if (!inode) 145 return -EINVAL; 146 147 metadata = inode->priv; 148 *cpu = (int)metadata[CS_ETM_CPU]; 149 return 0; 150 } 151 152 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 153 u8 trace_chan_id) 154 { 155 /* 156 * Wnen a timestamp packet is encountered the backend code 157 * is stopped so that the front end has time to process packets 158 * that were accumulated in the traceID queue. Since there can 159 * be more than one channel per cs_etm_queue, we need to specify 160 * what traceID queue needs servicing. 161 */ 162 etmq->pending_timestamp = trace_chan_id; 163 } 164 165 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 166 u8 *trace_chan_id) 167 { 168 struct cs_etm_packet_queue *packet_queue; 169 170 if (!etmq->pending_timestamp) 171 return 0; 172 173 if (trace_chan_id) 174 *trace_chan_id = etmq->pending_timestamp; 175 176 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 177 etmq->pending_timestamp); 178 if (!packet_queue) 179 return 0; 180 181 /* Acknowledge pending status */ 182 etmq->pending_timestamp = 0; 183 184 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 185 return packet_queue->timestamp; 186 } 187 188 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 189 { 190 int i; 191 192 queue->head = 0; 193 queue->tail = 0; 194 queue->packet_count = 0; 195 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 196 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 197 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 198 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 199 queue->packet_buffer[i].instr_count = 0; 200 queue->packet_buffer[i].last_instr_taken_branch = false; 201 queue->packet_buffer[i].last_instr_size = 0; 202 queue->packet_buffer[i].last_instr_type = 0; 203 queue->packet_buffer[i].last_instr_subtype = 0; 204 queue->packet_buffer[i].last_instr_cond = 0; 205 queue->packet_buffer[i].flags = 0; 206 queue->packet_buffer[i].exception_number = UINT32_MAX; 207 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 208 queue->packet_buffer[i].cpu = INT_MIN; 209 } 210 } 211 212 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 213 { 214 int idx; 215 struct int_node *inode; 216 struct cs_etm_traceid_queue *tidq; 217 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 218 219 intlist__for_each_entry(inode, traceid_queues_list) { 220 idx = (int)(intptr_t)inode->priv; 221 tidq = etmq->traceid_queues[idx]; 222 cs_etm__clear_packet_queue(&tidq->packet_queue); 223 } 224 } 225 226 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 227 struct cs_etm_traceid_queue *tidq, 228 u8 trace_chan_id) 229 { 230 int rc = -ENOMEM; 231 struct auxtrace_queue *queue; 232 struct cs_etm_auxtrace *etm = etmq->etm; 233 234 cs_etm__clear_packet_queue(&tidq->packet_queue); 235 236 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 237 tidq->tid = queue->tid; 238 tidq->pid = -1; 239 tidq->trace_chan_id = trace_chan_id; 240 241 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 242 if (!tidq->packet) 243 goto out; 244 245 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 246 if (!tidq->prev_packet) 247 goto out_free; 248 249 if (etm->synth_opts.last_branch) { 250 size_t sz = sizeof(struct branch_stack); 251 252 sz += etm->synth_opts.last_branch_sz * 253 sizeof(struct branch_entry); 254 tidq->last_branch = zalloc(sz); 255 if (!tidq->last_branch) 256 goto out_free; 257 tidq->last_branch_rb = zalloc(sz); 258 if (!tidq->last_branch_rb) 259 goto out_free; 260 } 261 262 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 263 if (!tidq->event_buf) 264 goto out_free; 265 266 return 0; 267 268 out_free: 269 zfree(&tidq->last_branch_rb); 270 zfree(&tidq->last_branch); 271 zfree(&tidq->prev_packet); 272 zfree(&tidq->packet); 273 out: 274 return rc; 275 } 276 277 static struct cs_etm_traceid_queue 278 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 279 { 280 int idx; 281 struct int_node *inode; 282 struct intlist *traceid_queues_list; 283 struct cs_etm_traceid_queue *tidq, **traceid_queues; 284 struct cs_etm_auxtrace *etm = etmq->etm; 285 286 if (etm->timeless_decoding) 287 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 288 289 traceid_queues_list = etmq->traceid_queues_list; 290 291 /* 292 * Check if the traceid_queue exist for this traceID by looking 293 * in the queue list. 294 */ 295 inode = intlist__find(traceid_queues_list, trace_chan_id); 296 if (inode) { 297 idx = (int)(intptr_t)inode->priv; 298 return etmq->traceid_queues[idx]; 299 } 300 301 /* We couldn't find a traceid_queue for this traceID, allocate one */ 302 tidq = malloc(sizeof(*tidq)); 303 if (!tidq) 304 return NULL; 305 306 memset(tidq, 0, sizeof(*tidq)); 307 308 /* Get a valid index for the new traceid_queue */ 309 idx = intlist__nr_entries(traceid_queues_list); 310 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 311 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 312 if (!inode) 313 goto out_free; 314 315 /* Associate this traceID with this index */ 316 inode->priv = (void *)(intptr_t)idx; 317 318 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 319 goto out_free; 320 321 /* Grow the traceid_queues array by one unit */ 322 traceid_queues = etmq->traceid_queues; 323 traceid_queues = reallocarray(traceid_queues, 324 idx + 1, 325 sizeof(*traceid_queues)); 326 327 /* 328 * On failure reallocarray() returns NULL and the original block of 329 * memory is left untouched. 330 */ 331 if (!traceid_queues) 332 goto out_free; 333 334 traceid_queues[idx] = tidq; 335 etmq->traceid_queues = traceid_queues; 336 337 return etmq->traceid_queues[idx]; 338 339 out_free: 340 /* 341 * Function intlist__remove() removes the inode from the list 342 * and delete the memory associated to it. 343 */ 344 intlist__remove(traceid_queues_list, inode); 345 free(tidq); 346 347 return NULL; 348 } 349 350 struct cs_etm_packet_queue 351 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 352 { 353 struct cs_etm_traceid_queue *tidq; 354 355 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 356 if (tidq) 357 return &tidq->packet_queue; 358 359 return NULL; 360 } 361 362 static void cs_etm__packet_dump(const char *pkt_string) 363 { 364 const char *color = PERF_COLOR_BLUE; 365 int len = strlen(pkt_string); 366 367 if (len && (pkt_string[len-1] == '\n')) 368 color_fprintf(stdout, color, " %s", pkt_string); 369 else 370 color_fprintf(stdout, color, " %s\n", pkt_string); 371 372 fflush(stdout); 373 } 374 375 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 376 struct cs_etm_auxtrace *etm, int idx, 377 u32 etmidr) 378 { 379 u64 **metadata = etm->metadata; 380 381 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 382 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; 383 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; 384 } 385 386 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 387 struct cs_etm_auxtrace *etm, int idx) 388 { 389 u64 **metadata = etm->metadata; 390 391 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; 392 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; 393 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; 394 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; 395 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; 396 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; 397 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; 398 } 399 400 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 401 struct cs_etm_auxtrace *etm) 402 { 403 int i; 404 u32 etmidr; 405 u64 architecture; 406 407 for (i = 0; i < etm->num_cpu; i++) { 408 architecture = etm->metadata[i][CS_ETM_MAGIC]; 409 410 switch (architecture) { 411 case __perf_cs_etmv3_magic: 412 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; 413 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); 414 break; 415 case __perf_cs_etmv4_magic: 416 cs_etm__set_trace_param_etmv4(t_params, etm, i); 417 break; 418 default: 419 return -EINVAL; 420 } 421 } 422 423 return 0; 424 } 425 426 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 427 struct cs_etm_queue *etmq, 428 enum cs_etm_decoder_operation mode) 429 { 430 int ret = -EINVAL; 431 432 if (!(mode < CS_ETM_OPERATION_MAX)) 433 goto out; 434 435 d_params->packet_printer = cs_etm__packet_dump; 436 d_params->operation = mode; 437 d_params->data = etmq; 438 d_params->formatted = true; 439 d_params->fsyncs = false; 440 d_params->hsyncs = false; 441 d_params->frame_aligned = true; 442 443 ret = 0; 444 out: 445 return ret; 446 } 447 448 static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 449 struct auxtrace_buffer *buffer) 450 { 451 int ret; 452 const char *color = PERF_COLOR_BLUE; 453 struct cs_etm_decoder_params d_params; 454 struct cs_etm_trace_params *t_params; 455 struct cs_etm_decoder *decoder; 456 size_t buffer_used = 0; 457 458 fprintf(stdout, "\n"); 459 color_fprintf(stdout, color, 460 ". ... CoreSight ETM Trace data: size %zu bytes\n", 461 buffer->size); 462 463 /* Use metadata to fill in trace parameters for trace decoder */ 464 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 465 466 if (!t_params) 467 return; 468 469 if (cs_etm__init_trace_params(t_params, etm)) 470 goto out_free; 471 472 /* Set decoder parameters to simply print the trace packets */ 473 if (cs_etm__init_decoder_params(&d_params, NULL, 474 CS_ETM_OPERATION_PRINT)) 475 goto out_free; 476 477 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 478 479 if (!decoder) 480 goto out_free; 481 do { 482 size_t consumed; 483 484 ret = cs_etm_decoder__process_data_block( 485 decoder, buffer->offset, 486 &((u8 *)buffer->data)[buffer_used], 487 buffer->size - buffer_used, &consumed); 488 if (ret) 489 break; 490 491 buffer_used += consumed; 492 } while (buffer_used < buffer->size); 493 494 cs_etm_decoder__free(decoder); 495 496 out_free: 497 zfree(&t_params); 498 } 499 500 static int cs_etm__flush_events(struct perf_session *session, 501 struct perf_tool *tool) 502 { 503 int ret; 504 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 505 struct cs_etm_auxtrace, 506 auxtrace); 507 if (dump_trace) 508 return 0; 509 510 if (!tool->ordered_events) 511 return -EINVAL; 512 513 ret = cs_etm__update_queues(etm); 514 515 if (ret < 0) 516 return ret; 517 518 if (etm->timeless_decoding) 519 return cs_etm__process_timeless_queues(etm, -1); 520 521 return cs_etm__process_queues(etm); 522 } 523 524 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 525 { 526 int idx; 527 uintptr_t priv; 528 struct int_node *inode, *tmp; 529 struct cs_etm_traceid_queue *tidq; 530 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 531 532 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 533 priv = (uintptr_t)inode->priv; 534 idx = priv; 535 536 /* Free this traceid_queue from the array */ 537 tidq = etmq->traceid_queues[idx]; 538 thread__zput(tidq->thread); 539 zfree(&tidq->event_buf); 540 zfree(&tidq->last_branch); 541 zfree(&tidq->last_branch_rb); 542 zfree(&tidq->prev_packet); 543 zfree(&tidq->packet); 544 zfree(&tidq); 545 546 /* 547 * Function intlist__remove() removes the inode from the list 548 * and delete the memory associated to it. 549 */ 550 intlist__remove(traceid_queues_list, inode); 551 } 552 553 /* Then the RB tree itself */ 554 intlist__delete(traceid_queues_list); 555 etmq->traceid_queues_list = NULL; 556 557 /* finally free the traceid_queues array */ 558 zfree(&etmq->traceid_queues); 559 } 560 561 static void cs_etm__free_queue(void *priv) 562 { 563 struct cs_etm_queue *etmq = priv; 564 565 if (!etmq) 566 return; 567 568 cs_etm_decoder__free(etmq->decoder); 569 cs_etm__free_traceid_queues(etmq); 570 free(etmq); 571 } 572 573 static void cs_etm__free_events(struct perf_session *session) 574 { 575 unsigned int i; 576 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 577 struct cs_etm_auxtrace, 578 auxtrace); 579 struct auxtrace_queues *queues = &aux->queues; 580 581 for (i = 0; i < queues->nr_queues; i++) { 582 cs_etm__free_queue(queues->queue_array[i].priv); 583 queues->queue_array[i].priv = NULL; 584 } 585 586 auxtrace_queues__free(queues); 587 } 588 589 static void cs_etm__free(struct perf_session *session) 590 { 591 int i; 592 struct int_node *inode, *tmp; 593 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 594 struct cs_etm_auxtrace, 595 auxtrace); 596 cs_etm__free_events(session); 597 session->auxtrace = NULL; 598 599 /* First remove all traceID/metadata nodes for the RB tree */ 600 intlist__for_each_entry_safe(inode, tmp, traceid_list) 601 intlist__remove(traceid_list, inode); 602 /* Then the RB tree itself */ 603 intlist__delete(traceid_list); 604 605 for (i = 0; i < aux->num_cpu; i++) 606 zfree(&aux->metadata[i]); 607 608 thread__zput(aux->unknown_thread); 609 zfree(&aux->metadata); 610 zfree(&aux); 611 } 612 613 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) 614 { 615 struct machine *machine; 616 617 machine = etmq->etm->machine; 618 619 if (address >= etmq->etm->kernel_start) { 620 if (machine__is_host(machine)) 621 return PERF_RECORD_MISC_KERNEL; 622 else 623 return PERF_RECORD_MISC_GUEST_KERNEL; 624 } else { 625 if (machine__is_host(machine)) 626 return PERF_RECORD_MISC_USER; 627 else if (perf_guest) 628 return PERF_RECORD_MISC_GUEST_USER; 629 else 630 return PERF_RECORD_MISC_HYPERVISOR; 631 } 632 } 633 634 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 635 u64 address, size_t size, u8 *buffer) 636 { 637 u8 cpumode; 638 u64 offset; 639 int len; 640 struct thread *thread; 641 struct machine *machine; 642 struct addr_location al; 643 struct cs_etm_traceid_queue *tidq; 644 645 if (!etmq) 646 return 0; 647 648 machine = etmq->etm->machine; 649 cpumode = cs_etm__cpu_mode(etmq, address); 650 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 651 if (!tidq) 652 return 0; 653 654 thread = tidq->thread; 655 if (!thread) { 656 if (cpumode != PERF_RECORD_MISC_KERNEL) 657 return 0; 658 thread = etmq->etm->unknown_thread; 659 } 660 661 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) 662 return 0; 663 664 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 665 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) 666 return 0; 667 668 offset = al.map->map_ip(al.map, address); 669 670 map__load(al.map); 671 672 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); 673 674 if (len <= 0) 675 return 0; 676 677 return len; 678 } 679 680 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) 681 { 682 struct cs_etm_decoder_params d_params; 683 struct cs_etm_trace_params *t_params = NULL; 684 struct cs_etm_queue *etmq; 685 686 etmq = zalloc(sizeof(*etmq)); 687 if (!etmq) 688 return NULL; 689 690 etmq->traceid_queues_list = intlist__new(NULL); 691 if (!etmq->traceid_queues_list) 692 goto out_free; 693 694 /* Use metadata to fill in trace parameters for trace decoder */ 695 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 696 697 if (!t_params) 698 goto out_free; 699 700 if (cs_etm__init_trace_params(t_params, etm)) 701 goto out_free; 702 703 /* Set decoder parameters to decode trace packets */ 704 if (cs_etm__init_decoder_params(&d_params, etmq, 705 CS_ETM_OPERATION_DECODE)) 706 goto out_free; 707 708 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 709 710 if (!etmq->decoder) 711 goto out_free; 712 713 /* 714 * Register a function to handle all memory accesses required by 715 * the trace decoder library. 716 */ 717 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 718 0x0L, ((u64) -1L), 719 cs_etm__mem_access)) 720 goto out_free_decoder; 721 722 zfree(&t_params); 723 return etmq; 724 725 out_free_decoder: 726 cs_etm_decoder__free(etmq->decoder); 727 out_free: 728 intlist__delete(etmq->traceid_queues_list); 729 free(etmq); 730 731 return NULL; 732 } 733 734 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 735 struct auxtrace_queue *queue, 736 unsigned int queue_nr) 737 { 738 int ret = 0; 739 unsigned int cs_queue_nr; 740 u8 trace_chan_id; 741 u64 timestamp; 742 struct cs_etm_queue *etmq = queue->priv; 743 744 if (list_empty(&queue->head) || etmq) 745 goto out; 746 747 etmq = cs_etm__alloc_queue(etm); 748 749 if (!etmq) { 750 ret = -ENOMEM; 751 goto out; 752 } 753 754 queue->priv = etmq; 755 etmq->etm = etm; 756 etmq->queue_nr = queue_nr; 757 etmq->offset = 0; 758 759 if (etm->timeless_decoding) 760 goto out; 761 762 /* 763 * We are under a CPU-wide trace scenario. As such we need to know 764 * when the code that generated the traces started to execute so that 765 * it can be correlated with execution on other CPUs. So we get a 766 * handle on the beginning of traces and decode until we find a 767 * timestamp. The timestamp is then added to the auxtrace min heap 768 * in order to know what nibble (of all the etmqs) to decode first. 769 */ 770 while (1) { 771 /* 772 * Fetch an aux_buffer from this etmq. Bail if no more 773 * blocks or an error has been encountered. 774 */ 775 ret = cs_etm__get_data_block(etmq); 776 if (ret <= 0) 777 goto out; 778 779 /* 780 * Run decoder on the trace block. The decoder will stop when 781 * encountering a timestamp, a full packet queue or the end of 782 * trace for that block. 783 */ 784 ret = cs_etm__decode_data_block(etmq); 785 if (ret) 786 goto out; 787 788 /* 789 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 790 * the timestamp calculation for us. 791 */ 792 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 793 794 /* We found a timestamp, no need to continue. */ 795 if (timestamp) 796 break; 797 798 /* 799 * We didn't find a timestamp so empty all the traceid packet 800 * queues before looking for another timestamp packet, either 801 * in the current data block or a new one. Packets that were 802 * just decoded are useless since no timestamp has been 803 * associated with them. As such simply discard them. 804 */ 805 cs_etm__clear_all_packet_queues(etmq); 806 } 807 808 /* 809 * We have a timestamp. Add it to the min heap to reflect when 810 * instructions conveyed by the range packets of this traceID queue 811 * started to execute. Once the same has been done for all the traceID 812 * queues of each etmq, redenring and decoding can start in 813 * chronological order. 814 * 815 * Note that packets decoded above are still in the traceID's packet 816 * queue and will be processed in cs_etm__process_queues(). 817 */ 818 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan); 819 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 820 out: 821 return ret; 822 } 823 824 static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 825 { 826 unsigned int i; 827 int ret; 828 829 if (!etm->kernel_start) 830 etm->kernel_start = machine__kernel_start(etm->machine); 831 832 for (i = 0; i < etm->queues.nr_queues; i++) { 833 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 834 if (ret) 835 return ret; 836 } 837 838 return 0; 839 } 840 841 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) 842 { 843 if (etm->queues.new_data) { 844 etm->queues.new_data = false; 845 return cs_etm__setup_queues(etm); 846 } 847 848 return 0; 849 } 850 851 static inline 852 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 853 struct cs_etm_traceid_queue *tidq) 854 { 855 struct branch_stack *bs_src = tidq->last_branch_rb; 856 struct branch_stack *bs_dst = tidq->last_branch; 857 size_t nr = 0; 858 859 /* 860 * Set the number of records before early exit: ->nr is used to 861 * determine how many branches to copy from ->entries. 862 */ 863 bs_dst->nr = bs_src->nr; 864 865 /* 866 * Early exit when there is nothing to copy. 867 */ 868 if (!bs_src->nr) 869 return; 870 871 /* 872 * As bs_src->entries is a circular buffer, we need to copy from it in 873 * two steps. First, copy the branches from the most recently inserted 874 * branch ->last_branch_pos until the end of bs_src->entries buffer. 875 */ 876 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 877 memcpy(&bs_dst->entries[0], 878 &bs_src->entries[tidq->last_branch_pos], 879 sizeof(struct branch_entry) * nr); 880 881 /* 882 * If we wrapped around at least once, the branches from the beginning 883 * of the bs_src->entries buffer and until the ->last_branch_pos element 884 * are older valid branches: copy them over. The total number of 885 * branches copied over will be equal to the number of branches asked by 886 * the user in last_branch_sz. 887 */ 888 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 889 memcpy(&bs_dst->entries[nr], 890 &bs_src->entries[0], 891 sizeof(struct branch_entry) * tidq->last_branch_pos); 892 } 893 } 894 895 static inline 896 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 897 { 898 tidq->last_branch_pos = 0; 899 tidq->last_branch_rb->nr = 0; 900 } 901 902 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 903 u8 trace_chan_id, u64 addr) 904 { 905 u8 instrBytes[2]; 906 907 cs_etm__mem_access(etmq, trace_chan_id, addr, 908 ARRAY_SIZE(instrBytes), instrBytes); 909 /* 910 * T32 instruction size is indicated by bits[15:11] of the first 911 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 912 * denote a 32-bit instruction. 913 */ 914 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 915 } 916 917 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 918 { 919 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 920 if (packet->sample_type == CS_ETM_DISCONTINUITY) 921 return 0; 922 923 return packet->start_addr; 924 } 925 926 static inline 927 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 928 { 929 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 930 if (packet->sample_type == CS_ETM_DISCONTINUITY) 931 return 0; 932 933 return packet->end_addr - packet->last_instr_size; 934 } 935 936 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 937 u64 trace_chan_id, 938 const struct cs_etm_packet *packet, 939 u64 offset) 940 { 941 if (packet->isa == CS_ETM_ISA_T32) { 942 u64 addr = packet->start_addr; 943 944 while (offset > 0) { 945 addr += cs_etm__t32_instr_size(etmq, 946 trace_chan_id, addr); 947 offset--; 948 } 949 return addr; 950 } 951 952 /* Assume a 4 byte instruction size (A32/A64) */ 953 return packet->start_addr + offset * 4; 954 } 955 956 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 957 struct cs_etm_traceid_queue *tidq) 958 { 959 struct branch_stack *bs = tidq->last_branch_rb; 960 struct branch_entry *be; 961 962 /* 963 * The branches are recorded in a circular buffer in reverse 964 * chronological order: we start recording from the last element of the 965 * buffer down. After writing the first element of the stack, move the 966 * insert position back to the end of the buffer. 967 */ 968 if (!tidq->last_branch_pos) 969 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 970 971 tidq->last_branch_pos -= 1; 972 973 be = &bs->entries[tidq->last_branch_pos]; 974 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 975 be->to = cs_etm__first_executed_instr(tidq->packet); 976 /* No support for mispredict */ 977 be->flags.mispred = 0; 978 be->flags.predicted = 1; 979 980 /* 981 * Increment bs->nr until reaching the number of last branches asked by 982 * the user on the command line. 983 */ 984 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 985 bs->nr += 1; 986 } 987 988 static int cs_etm__inject_event(union perf_event *event, 989 struct perf_sample *sample, u64 type) 990 { 991 event->header.size = perf_event__sample_event_size(sample, type, 0); 992 return perf_event__synthesize_sample(event, type, 0, sample); 993 } 994 995 996 static int 997 cs_etm__get_trace(struct cs_etm_queue *etmq) 998 { 999 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1000 struct auxtrace_buffer *old_buffer = aux_buffer; 1001 struct auxtrace_queue *queue; 1002 1003 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1004 1005 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1006 1007 /* If no more data, drop the previous auxtrace_buffer and return */ 1008 if (!aux_buffer) { 1009 if (old_buffer) 1010 auxtrace_buffer__drop_data(old_buffer); 1011 etmq->buf_len = 0; 1012 return 0; 1013 } 1014 1015 etmq->buffer = aux_buffer; 1016 1017 /* If the aux_buffer doesn't have data associated, try to load it */ 1018 if (!aux_buffer->data) { 1019 /* get the file desc associated with the perf data file */ 1020 int fd = perf_data__fd(etmq->etm->session->data); 1021 1022 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1023 if (!aux_buffer->data) 1024 return -ENOMEM; 1025 } 1026 1027 /* If valid, drop the previous buffer */ 1028 if (old_buffer) 1029 auxtrace_buffer__drop_data(old_buffer); 1030 1031 etmq->buf_used = 0; 1032 etmq->buf_len = aux_buffer->size; 1033 etmq->buf = aux_buffer->data; 1034 1035 return etmq->buf_len; 1036 } 1037 1038 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 1039 struct cs_etm_traceid_queue *tidq) 1040 { 1041 if ((!tidq->thread) && (tidq->tid != -1)) 1042 tidq->thread = machine__find_thread(etm->machine, -1, 1043 tidq->tid); 1044 1045 if (tidq->thread) 1046 tidq->pid = tidq->thread->pid_; 1047 } 1048 1049 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, 1050 pid_t tid, u8 trace_chan_id) 1051 { 1052 int cpu, err = -EINVAL; 1053 struct cs_etm_auxtrace *etm = etmq->etm; 1054 struct cs_etm_traceid_queue *tidq; 1055 1056 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1057 if (!tidq) 1058 return err; 1059 1060 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 1061 return err; 1062 1063 err = machine__set_current_tid(etm->machine, cpu, tid, tid); 1064 if (err) 1065 return err; 1066 1067 tidq->tid = tid; 1068 thread__zput(tidq->thread); 1069 1070 cs_etm__set_pid_tid_cpu(etm, tidq); 1071 return 0; 1072 } 1073 1074 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1075 { 1076 return !!etmq->etm->timeless_decoding; 1077 } 1078 1079 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1080 struct cs_etm_traceid_queue *tidq, 1081 u64 addr, u64 period) 1082 { 1083 int ret = 0; 1084 struct cs_etm_auxtrace *etm = etmq->etm; 1085 union perf_event *event = tidq->event_buf; 1086 struct perf_sample sample = {.ip = 0,}; 1087 1088 event->sample.header.type = PERF_RECORD_SAMPLE; 1089 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); 1090 event->sample.header.size = sizeof(struct perf_event_header); 1091 1092 sample.ip = addr; 1093 sample.pid = tidq->pid; 1094 sample.tid = tidq->tid; 1095 sample.id = etmq->etm->instructions_id; 1096 sample.stream_id = etmq->etm->instructions_id; 1097 sample.period = period; 1098 sample.cpu = tidq->packet->cpu; 1099 sample.flags = tidq->prev_packet->flags; 1100 sample.insn_len = 1; 1101 sample.cpumode = event->sample.header.misc; 1102 1103 if (etm->synth_opts.last_branch) { 1104 cs_etm__copy_last_branch_rb(etmq, tidq); 1105 sample.branch_stack = tidq->last_branch; 1106 } 1107 1108 if (etm->synth_opts.inject) { 1109 ret = cs_etm__inject_event(event, &sample, 1110 etm->instructions_sample_type); 1111 if (ret) 1112 return ret; 1113 } 1114 1115 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1116 1117 if (ret) 1118 pr_err( 1119 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1120 ret); 1121 1122 if (etm->synth_opts.last_branch) 1123 cs_etm__reset_last_branch_rb(tidq); 1124 1125 return ret; 1126 } 1127 1128 /* 1129 * The cs etm packet encodes an instruction range between a branch target 1130 * and the next taken branch. Generate sample accordingly. 1131 */ 1132 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1133 struct cs_etm_traceid_queue *tidq) 1134 { 1135 int ret = 0; 1136 struct cs_etm_auxtrace *etm = etmq->etm; 1137 struct perf_sample sample = {.ip = 0,}; 1138 union perf_event *event = tidq->event_buf; 1139 struct dummy_branch_stack { 1140 u64 nr; 1141 struct branch_entry entries; 1142 } dummy_bs; 1143 u64 ip; 1144 1145 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1146 1147 event->sample.header.type = PERF_RECORD_SAMPLE; 1148 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 1149 event->sample.header.size = sizeof(struct perf_event_header); 1150 1151 sample.ip = ip; 1152 sample.pid = tidq->pid; 1153 sample.tid = tidq->tid; 1154 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1155 sample.id = etmq->etm->branches_id; 1156 sample.stream_id = etmq->etm->branches_id; 1157 sample.period = 1; 1158 sample.cpu = tidq->packet->cpu; 1159 sample.flags = tidq->prev_packet->flags; 1160 sample.cpumode = event->sample.header.misc; 1161 1162 /* 1163 * perf report cannot handle events without a branch stack 1164 */ 1165 if (etm->synth_opts.last_branch) { 1166 dummy_bs = (struct dummy_branch_stack){ 1167 .nr = 1, 1168 .entries = { 1169 .from = sample.ip, 1170 .to = sample.addr, 1171 }, 1172 }; 1173 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1174 } 1175 1176 if (etm->synth_opts.inject) { 1177 ret = cs_etm__inject_event(event, &sample, 1178 etm->branches_sample_type); 1179 if (ret) 1180 return ret; 1181 } 1182 1183 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1184 1185 if (ret) 1186 pr_err( 1187 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1188 ret); 1189 1190 return ret; 1191 } 1192 1193 struct cs_etm_synth { 1194 struct perf_tool dummy_tool; 1195 struct perf_session *session; 1196 }; 1197 1198 static int cs_etm__event_synth(struct perf_tool *tool, 1199 union perf_event *event, 1200 struct perf_sample *sample __maybe_unused, 1201 struct machine *machine __maybe_unused) 1202 { 1203 struct cs_etm_synth *cs_etm_synth = 1204 container_of(tool, struct cs_etm_synth, dummy_tool); 1205 1206 return perf_session__deliver_synth_event(cs_etm_synth->session, 1207 event, NULL); 1208 } 1209 1210 static int cs_etm__synth_event(struct perf_session *session, 1211 struct perf_event_attr *attr, u64 id) 1212 { 1213 struct cs_etm_synth cs_etm_synth; 1214 1215 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 1216 cs_etm_synth.session = session; 1217 1218 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 1219 &id, cs_etm__event_synth); 1220 } 1221 1222 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1223 struct perf_session *session) 1224 { 1225 struct perf_evlist *evlist = session->evlist; 1226 struct perf_evsel *evsel; 1227 struct perf_event_attr attr; 1228 bool found = false; 1229 u64 id; 1230 int err; 1231 1232 evlist__for_each_entry(evlist, evsel) { 1233 if (evsel->attr.type == etm->pmu_type) { 1234 found = true; 1235 break; 1236 } 1237 } 1238 1239 if (!found) { 1240 pr_debug("No selected events with CoreSight Trace data\n"); 1241 return 0; 1242 } 1243 1244 memset(&attr, 0, sizeof(struct perf_event_attr)); 1245 attr.size = sizeof(struct perf_event_attr); 1246 attr.type = PERF_TYPE_HARDWARE; 1247 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; 1248 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1249 PERF_SAMPLE_PERIOD; 1250 if (etm->timeless_decoding) 1251 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1252 else 1253 attr.sample_type |= PERF_SAMPLE_TIME; 1254 1255 attr.exclude_user = evsel->attr.exclude_user; 1256 attr.exclude_kernel = evsel->attr.exclude_kernel; 1257 attr.exclude_hv = evsel->attr.exclude_hv; 1258 attr.exclude_host = evsel->attr.exclude_host; 1259 attr.exclude_guest = evsel->attr.exclude_guest; 1260 attr.sample_id_all = evsel->attr.sample_id_all; 1261 attr.read_format = evsel->attr.read_format; 1262 1263 /* create new id val to be a fixed offset from evsel id */ 1264 id = evsel->id[0] + 1000000000; 1265 1266 if (!id) 1267 id = 1; 1268 1269 if (etm->synth_opts.branches) { 1270 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1271 attr.sample_period = 1; 1272 attr.sample_type |= PERF_SAMPLE_ADDR; 1273 err = cs_etm__synth_event(session, &attr, id); 1274 if (err) 1275 return err; 1276 etm->sample_branches = true; 1277 etm->branches_sample_type = attr.sample_type; 1278 etm->branches_id = id; 1279 id += 1; 1280 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1281 } 1282 1283 if (etm->synth_opts.last_branch) 1284 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1285 1286 if (etm->synth_opts.instructions) { 1287 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1288 attr.sample_period = etm->synth_opts.period; 1289 etm->instructions_sample_period = attr.sample_period; 1290 err = cs_etm__synth_event(session, &attr, id); 1291 if (err) 1292 return err; 1293 etm->sample_instructions = true; 1294 etm->instructions_sample_type = attr.sample_type; 1295 etm->instructions_id = id; 1296 id += 1; 1297 } 1298 1299 return 0; 1300 } 1301 1302 static int cs_etm__sample(struct cs_etm_queue *etmq, 1303 struct cs_etm_traceid_queue *tidq) 1304 { 1305 struct cs_etm_auxtrace *etm = etmq->etm; 1306 struct cs_etm_packet *tmp; 1307 int ret; 1308 u8 trace_chan_id = tidq->trace_chan_id; 1309 u64 instrs_executed = tidq->packet->instr_count; 1310 1311 tidq->period_instructions += instrs_executed; 1312 1313 /* 1314 * Record a branch when the last instruction in 1315 * PREV_PACKET is a branch. 1316 */ 1317 if (etm->synth_opts.last_branch && 1318 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1319 tidq->prev_packet->last_instr_taken_branch) 1320 cs_etm__update_last_branch_rb(etmq, tidq); 1321 1322 if (etm->sample_instructions && 1323 tidq->period_instructions >= etm->instructions_sample_period) { 1324 /* 1325 * Emit instruction sample periodically 1326 * TODO: allow period to be defined in cycles and clock time 1327 */ 1328 1329 /* Get number of instructions executed after the sample point */ 1330 u64 instrs_over = tidq->period_instructions - 1331 etm->instructions_sample_period; 1332 1333 /* 1334 * Calculate the address of the sampled instruction (-1 as 1335 * sample is reported as though instruction has just been 1336 * executed, but PC has not advanced to next instruction) 1337 */ 1338 u64 offset = (instrs_executed - instrs_over - 1); 1339 u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1340 tidq->packet, offset); 1341 1342 ret = cs_etm__synth_instruction_sample( 1343 etmq, tidq, addr, etm->instructions_sample_period); 1344 if (ret) 1345 return ret; 1346 1347 /* Carry remaining instructions into next sample period */ 1348 tidq->period_instructions = instrs_over; 1349 } 1350 1351 if (etm->sample_branches) { 1352 bool generate_sample = false; 1353 1354 /* Generate sample for tracing on packet */ 1355 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1356 generate_sample = true; 1357 1358 /* Generate sample for branch taken packet */ 1359 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1360 tidq->prev_packet->last_instr_taken_branch) 1361 generate_sample = true; 1362 1363 if (generate_sample) { 1364 ret = cs_etm__synth_branch_sample(etmq, tidq); 1365 if (ret) 1366 return ret; 1367 } 1368 } 1369 1370 if (etm->sample_branches || etm->synth_opts.last_branch) { 1371 /* 1372 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1373 * the next incoming packet. 1374 */ 1375 tmp = tidq->packet; 1376 tidq->packet = tidq->prev_packet; 1377 tidq->prev_packet = tmp; 1378 } 1379 1380 return 0; 1381 } 1382 1383 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1384 { 1385 /* 1386 * When the exception packet is inserted, whether the last instruction 1387 * in previous range packet is taken branch or not, we need to force 1388 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1389 * to generate branch sample for the instruction range before the 1390 * exception is trapped to kernel or before the exception returning. 1391 * 1392 * The exception packet includes the dummy address values, so don't 1393 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1394 * for generating instruction and branch samples. 1395 */ 1396 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1397 tidq->prev_packet->last_instr_taken_branch = true; 1398 1399 return 0; 1400 } 1401 1402 static int cs_etm__flush(struct cs_etm_queue *etmq, 1403 struct cs_etm_traceid_queue *tidq) 1404 { 1405 int err = 0; 1406 struct cs_etm_auxtrace *etm = etmq->etm; 1407 struct cs_etm_packet *tmp; 1408 1409 /* Handle start tracing packet */ 1410 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1411 goto swap_packet; 1412 1413 if (etmq->etm->synth_opts.last_branch && 1414 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1415 /* 1416 * Generate a last branch event for the branches left in the 1417 * circular buffer at the end of the trace. 1418 * 1419 * Use the address of the end of the last reported execution 1420 * range 1421 */ 1422 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1423 1424 err = cs_etm__synth_instruction_sample( 1425 etmq, tidq, addr, 1426 tidq->period_instructions); 1427 if (err) 1428 return err; 1429 1430 tidq->period_instructions = 0; 1431 1432 } 1433 1434 if (etm->sample_branches && 1435 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1436 err = cs_etm__synth_branch_sample(etmq, tidq); 1437 if (err) 1438 return err; 1439 } 1440 1441 swap_packet: 1442 if (etm->sample_branches || etm->synth_opts.last_branch) { 1443 /* 1444 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1445 * the next incoming packet. 1446 */ 1447 tmp = tidq->packet; 1448 tidq->packet = tidq->prev_packet; 1449 tidq->prev_packet = tmp; 1450 } 1451 1452 return err; 1453 } 1454 1455 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1456 struct cs_etm_traceid_queue *tidq) 1457 { 1458 int err; 1459 1460 /* 1461 * It has no new packet coming and 'etmq->packet' contains the stale 1462 * packet which was set at the previous time with packets swapping; 1463 * so skip to generate branch sample to avoid stale packet. 1464 * 1465 * For this case only flush branch stack and generate a last branch 1466 * event for the branches left in the circular buffer at the end of 1467 * the trace. 1468 */ 1469 if (etmq->etm->synth_opts.last_branch && 1470 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1471 /* 1472 * Use the address of the end of the last reported execution 1473 * range. 1474 */ 1475 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1476 1477 err = cs_etm__synth_instruction_sample( 1478 etmq, tidq, addr, 1479 tidq->period_instructions); 1480 if (err) 1481 return err; 1482 1483 tidq->period_instructions = 0; 1484 } 1485 1486 return 0; 1487 } 1488 /* 1489 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1490 * if need be. 1491 * Returns: < 0 if error 1492 * = 0 if no more auxtrace_buffer to read 1493 * > 0 if the current buffer isn't empty yet 1494 */ 1495 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1496 { 1497 int ret; 1498 1499 if (!etmq->buf_len) { 1500 ret = cs_etm__get_trace(etmq); 1501 if (ret <= 0) 1502 return ret; 1503 /* 1504 * We cannot assume consecutive blocks in the data file 1505 * are contiguous, reset the decoder to force re-sync. 1506 */ 1507 ret = cs_etm_decoder__reset(etmq->decoder); 1508 if (ret) 1509 return ret; 1510 } 1511 1512 return etmq->buf_len; 1513 } 1514 1515 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 1516 struct cs_etm_packet *packet, 1517 u64 end_addr) 1518 { 1519 /* Initialise to keep compiler happy */ 1520 u16 instr16 = 0; 1521 u32 instr32 = 0; 1522 u64 addr; 1523 1524 switch (packet->isa) { 1525 case CS_ETM_ISA_T32: 1526 /* 1527 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1528 * 1529 * b'15 b'8 1530 * +-----------------+--------+ 1531 * | 1 1 0 1 1 1 1 1 | imm8 | 1532 * +-----------------+--------+ 1533 * 1534 * According to the specifiction, it only defines SVC for T32 1535 * with 16 bits instruction and has no definition for 32bits; 1536 * so below only read 2 bytes as instruction size for T32. 1537 */ 1538 addr = end_addr - 2; 1539 cs_etm__mem_access(etmq, trace_chan_id, addr, 1540 sizeof(instr16), (u8 *)&instr16); 1541 if ((instr16 & 0xFF00) == 0xDF00) 1542 return true; 1543 1544 break; 1545 case CS_ETM_ISA_A32: 1546 /* 1547 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 1548 * 1549 * b'31 b'28 b'27 b'24 1550 * +---------+---------+-------------------------+ 1551 * | !1111 | 1 1 1 1 | imm24 | 1552 * +---------+---------+-------------------------+ 1553 */ 1554 addr = end_addr - 4; 1555 cs_etm__mem_access(etmq, trace_chan_id, addr, 1556 sizeof(instr32), (u8 *)&instr32); 1557 if ((instr32 & 0x0F000000) == 0x0F000000 && 1558 (instr32 & 0xF0000000) != 0xF0000000) 1559 return true; 1560 1561 break; 1562 case CS_ETM_ISA_A64: 1563 /* 1564 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 1565 * 1566 * b'31 b'21 b'4 b'0 1567 * +-----------------------+---------+-----------+ 1568 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 1569 * +-----------------------+---------+-----------+ 1570 */ 1571 addr = end_addr - 4; 1572 cs_etm__mem_access(etmq, trace_chan_id, addr, 1573 sizeof(instr32), (u8 *)&instr32); 1574 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1575 return true; 1576 1577 break; 1578 case CS_ETM_ISA_UNKNOWN: 1579 default: 1580 break; 1581 } 1582 1583 return false; 1584 } 1585 1586 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 1587 struct cs_etm_traceid_queue *tidq, u64 magic) 1588 { 1589 u8 trace_chan_id = tidq->trace_chan_id; 1590 struct cs_etm_packet *packet = tidq->packet; 1591 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1592 1593 if (magic == __perf_cs_etmv3_magic) 1594 if (packet->exception_number == CS_ETMV3_EXC_SVC) 1595 return true; 1596 1597 /* 1598 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 1599 * HVC cases; need to check if it's SVC instruction based on 1600 * packet address. 1601 */ 1602 if (magic == __perf_cs_etmv4_magic) { 1603 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1604 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1605 prev_packet->end_addr)) 1606 return true; 1607 } 1608 1609 return false; 1610 } 1611 1612 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 1613 u64 magic) 1614 { 1615 struct cs_etm_packet *packet = tidq->packet; 1616 1617 if (magic == __perf_cs_etmv3_magic) 1618 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1619 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 1620 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 1621 packet->exception_number == CS_ETMV3_EXC_IRQ || 1622 packet->exception_number == CS_ETMV3_EXC_FIQ) 1623 return true; 1624 1625 if (magic == __perf_cs_etmv4_magic) 1626 if (packet->exception_number == CS_ETMV4_EXC_RESET || 1627 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 1628 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 1629 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 1630 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 1631 packet->exception_number == CS_ETMV4_EXC_IRQ || 1632 packet->exception_number == CS_ETMV4_EXC_FIQ) 1633 return true; 1634 1635 return false; 1636 } 1637 1638 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 1639 struct cs_etm_traceid_queue *tidq, 1640 u64 magic) 1641 { 1642 u8 trace_chan_id = tidq->trace_chan_id; 1643 struct cs_etm_packet *packet = tidq->packet; 1644 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1645 1646 if (magic == __perf_cs_etmv3_magic) 1647 if (packet->exception_number == CS_ETMV3_EXC_SMC || 1648 packet->exception_number == CS_ETMV3_EXC_HYP || 1649 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 1650 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 1651 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 1652 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 1653 packet->exception_number == CS_ETMV3_EXC_GENERIC) 1654 return true; 1655 1656 if (magic == __perf_cs_etmv4_magic) { 1657 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 1658 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 1659 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 1660 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 1661 return true; 1662 1663 /* 1664 * For CS_ETMV4_EXC_CALL, except SVC other instructions 1665 * (SMC, HVC) are taken as sync exceptions. 1666 */ 1667 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1668 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1669 prev_packet->end_addr)) 1670 return true; 1671 1672 /* 1673 * ETMv4 has 5 bits for exception number; if the numbers 1674 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 1675 * they are implementation defined exceptions. 1676 * 1677 * For this case, simply take it as sync exception. 1678 */ 1679 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 1680 packet->exception_number <= CS_ETMV4_EXC_END) 1681 return true; 1682 } 1683 1684 return false; 1685 } 1686 1687 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 1688 struct cs_etm_traceid_queue *tidq) 1689 { 1690 struct cs_etm_packet *packet = tidq->packet; 1691 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1692 u8 trace_chan_id = tidq->trace_chan_id; 1693 u64 magic; 1694 int ret; 1695 1696 switch (packet->sample_type) { 1697 case CS_ETM_RANGE: 1698 /* 1699 * Immediate branch instruction without neither link nor 1700 * return flag, it's normal branch instruction within 1701 * the function. 1702 */ 1703 if (packet->last_instr_type == OCSD_INSTR_BR && 1704 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 1705 packet->flags = PERF_IP_FLAG_BRANCH; 1706 1707 if (packet->last_instr_cond) 1708 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 1709 } 1710 1711 /* 1712 * Immediate branch instruction with link (e.g. BL), this is 1713 * branch instruction for function call. 1714 */ 1715 if (packet->last_instr_type == OCSD_INSTR_BR && 1716 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1717 packet->flags = PERF_IP_FLAG_BRANCH | 1718 PERF_IP_FLAG_CALL; 1719 1720 /* 1721 * Indirect branch instruction with link (e.g. BLR), this is 1722 * branch instruction for function call. 1723 */ 1724 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1725 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1726 packet->flags = PERF_IP_FLAG_BRANCH | 1727 PERF_IP_FLAG_CALL; 1728 1729 /* 1730 * Indirect branch instruction with subtype of 1731 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 1732 * function return for A32/T32. 1733 */ 1734 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1735 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 1736 packet->flags = PERF_IP_FLAG_BRANCH | 1737 PERF_IP_FLAG_RETURN; 1738 1739 /* 1740 * Indirect branch instruction without link (e.g. BR), usually 1741 * this is used for function return, especially for functions 1742 * within dynamic link lib. 1743 */ 1744 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1745 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 1746 packet->flags = PERF_IP_FLAG_BRANCH | 1747 PERF_IP_FLAG_RETURN; 1748 1749 /* Return instruction for function return. */ 1750 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1751 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 1752 packet->flags = PERF_IP_FLAG_BRANCH | 1753 PERF_IP_FLAG_RETURN; 1754 1755 /* 1756 * Decoder might insert a discontinuity in the middle of 1757 * instruction packets, fixup prev_packet with flag 1758 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 1759 */ 1760 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1761 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1762 PERF_IP_FLAG_TRACE_BEGIN; 1763 1764 /* 1765 * If the previous packet is an exception return packet 1766 * and the return address just follows SVC instuction, 1767 * it needs to calibrate the previous packet sample flags 1768 * as PERF_IP_FLAG_SYSCALLRET. 1769 */ 1770 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1771 PERF_IP_FLAG_RETURN | 1772 PERF_IP_FLAG_INTERRUPT) && 1773 cs_etm__is_svc_instr(etmq, trace_chan_id, 1774 packet, packet->start_addr)) 1775 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1776 PERF_IP_FLAG_RETURN | 1777 PERF_IP_FLAG_SYSCALLRET; 1778 break; 1779 case CS_ETM_DISCONTINUITY: 1780 /* 1781 * The trace is discontinuous, if the previous packet is 1782 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 1783 * for previous packet. 1784 */ 1785 if (prev_packet->sample_type == CS_ETM_RANGE) 1786 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1787 PERF_IP_FLAG_TRACE_END; 1788 break; 1789 case CS_ETM_EXCEPTION: 1790 ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 1791 if (ret) 1792 return ret; 1793 1794 /* The exception is for system call. */ 1795 if (cs_etm__is_syscall(etmq, tidq, magic)) 1796 packet->flags = PERF_IP_FLAG_BRANCH | 1797 PERF_IP_FLAG_CALL | 1798 PERF_IP_FLAG_SYSCALLRET; 1799 /* 1800 * The exceptions are triggered by external signals from bus, 1801 * interrupt controller, debug module, PE reset or halt. 1802 */ 1803 else if (cs_etm__is_async_exception(tidq, magic)) 1804 packet->flags = PERF_IP_FLAG_BRANCH | 1805 PERF_IP_FLAG_CALL | 1806 PERF_IP_FLAG_ASYNC | 1807 PERF_IP_FLAG_INTERRUPT; 1808 /* 1809 * Otherwise, exception is caused by trap, instruction & 1810 * data fault, or alignment errors. 1811 */ 1812 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 1813 packet->flags = PERF_IP_FLAG_BRANCH | 1814 PERF_IP_FLAG_CALL | 1815 PERF_IP_FLAG_INTERRUPT; 1816 1817 /* 1818 * When the exception packet is inserted, since exception 1819 * packet is not used standalone for generating samples 1820 * and it's affiliation to the previous instruction range 1821 * packet; so set previous range packet flags to tell perf 1822 * it is an exception taken branch. 1823 */ 1824 if (prev_packet->sample_type == CS_ETM_RANGE) 1825 prev_packet->flags = packet->flags; 1826 break; 1827 case CS_ETM_EXCEPTION_RET: 1828 /* 1829 * When the exception return packet is inserted, since 1830 * exception return packet is not used standalone for 1831 * generating samples and it's affiliation to the previous 1832 * instruction range packet; so set previous range packet 1833 * flags to tell perf it is an exception return branch. 1834 * 1835 * The exception return can be for either system call or 1836 * other exception types; unfortunately the packet doesn't 1837 * contain exception type related info so we cannot decide 1838 * the exception type purely based on exception return packet. 1839 * If we record the exception number from exception packet and 1840 * reuse it for excpetion return packet, this is not reliable 1841 * due the trace can be discontinuity or the interrupt can 1842 * be nested, thus the recorded exception number cannot be 1843 * used for exception return packet for these two cases. 1844 * 1845 * For exception return packet, we only need to distinguish the 1846 * packet is for system call or for other types. Thus the 1847 * decision can be deferred when receive the next packet which 1848 * contains the return address, based on the return address we 1849 * can read out the previous instruction and check if it's a 1850 * system call instruction and then calibrate the sample flag 1851 * as needed. 1852 */ 1853 if (prev_packet->sample_type == CS_ETM_RANGE) 1854 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1855 PERF_IP_FLAG_RETURN | 1856 PERF_IP_FLAG_INTERRUPT; 1857 break; 1858 case CS_ETM_EMPTY: 1859 default: 1860 break; 1861 } 1862 1863 return 0; 1864 } 1865 1866 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 1867 { 1868 int ret = 0; 1869 size_t processed = 0; 1870 1871 /* 1872 * Packets are decoded and added to the decoder's packet queue 1873 * until the decoder packet processing callback has requested that 1874 * processing stops or there is nothing left in the buffer. Normal 1875 * operations that stop processing are a timestamp packet or a full 1876 * decoder buffer queue. 1877 */ 1878 ret = cs_etm_decoder__process_data_block(etmq->decoder, 1879 etmq->offset, 1880 &etmq->buf[etmq->buf_used], 1881 etmq->buf_len, 1882 &processed); 1883 if (ret) 1884 goto out; 1885 1886 etmq->offset += processed; 1887 etmq->buf_used += processed; 1888 etmq->buf_len -= processed; 1889 1890 out: 1891 return ret; 1892 } 1893 1894 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 1895 struct cs_etm_traceid_queue *tidq) 1896 { 1897 int ret; 1898 struct cs_etm_packet_queue *packet_queue; 1899 1900 packet_queue = &tidq->packet_queue; 1901 1902 /* Process each packet in this chunk */ 1903 while (1) { 1904 ret = cs_etm_decoder__get_packet(packet_queue, 1905 tidq->packet); 1906 if (ret <= 0) 1907 /* 1908 * Stop processing this chunk on 1909 * end of data or error 1910 */ 1911 break; 1912 1913 /* 1914 * Since packet addresses are swapped in packet 1915 * handling within below switch() statements, 1916 * thus setting sample flags must be called 1917 * prior to switch() statement to use address 1918 * information before packets swapping. 1919 */ 1920 ret = cs_etm__set_sample_flags(etmq, tidq); 1921 if (ret < 0) 1922 break; 1923 1924 switch (tidq->packet->sample_type) { 1925 case CS_ETM_RANGE: 1926 /* 1927 * If the packet contains an instruction 1928 * range, generate instruction sequence 1929 * events. 1930 */ 1931 cs_etm__sample(etmq, tidq); 1932 break; 1933 case CS_ETM_EXCEPTION: 1934 case CS_ETM_EXCEPTION_RET: 1935 /* 1936 * If the exception packet is coming, 1937 * make sure the previous instruction 1938 * range packet to be handled properly. 1939 */ 1940 cs_etm__exception(tidq); 1941 break; 1942 case CS_ETM_DISCONTINUITY: 1943 /* 1944 * Discontinuity in trace, flush 1945 * previous branch stack 1946 */ 1947 cs_etm__flush(etmq, tidq); 1948 break; 1949 case CS_ETM_EMPTY: 1950 /* 1951 * Should not receive empty packet, 1952 * report error. 1953 */ 1954 pr_err("CS ETM Trace: empty packet\n"); 1955 return -EINVAL; 1956 default: 1957 break; 1958 } 1959 } 1960 1961 return ret; 1962 } 1963 1964 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 1965 { 1966 int idx; 1967 struct int_node *inode; 1968 struct cs_etm_traceid_queue *tidq; 1969 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 1970 1971 intlist__for_each_entry(inode, traceid_queues_list) { 1972 idx = (int)(intptr_t)inode->priv; 1973 tidq = etmq->traceid_queues[idx]; 1974 1975 /* Ignore return value */ 1976 cs_etm__process_traceid_queue(etmq, tidq); 1977 1978 /* 1979 * Generate an instruction sample with the remaining 1980 * branchstack entries. 1981 */ 1982 cs_etm__flush(etmq, tidq); 1983 } 1984 } 1985 1986 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1987 { 1988 int err = 0; 1989 struct cs_etm_traceid_queue *tidq; 1990 1991 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 1992 if (!tidq) 1993 return -EINVAL; 1994 1995 /* Go through each buffer in the queue and decode them one by one */ 1996 while (1) { 1997 err = cs_etm__get_data_block(etmq); 1998 if (err <= 0) 1999 return err; 2000 2001 /* Run trace decoder until buffer consumed or end of trace */ 2002 do { 2003 err = cs_etm__decode_data_block(etmq); 2004 if (err) 2005 return err; 2006 2007 /* 2008 * Process each packet in this chunk, nothing to do if 2009 * an error occurs other than hoping the next one will 2010 * be better. 2011 */ 2012 err = cs_etm__process_traceid_queue(etmq, tidq); 2013 2014 } while (etmq->buf_len); 2015 2016 if (err == 0) 2017 /* Flush any remaining branch stack entries */ 2018 err = cs_etm__end_block(etmq, tidq); 2019 } 2020 2021 return err; 2022 } 2023 2024 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2025 pid_t tid) 2026 { 2027 unsigned int i; 2028 struct auxtrace_queues *queues = &etm->queues; 2029 2030 for (i = 0; i < queues->nr_queues; i++) { 2031 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2032 struct cs_etm_queue *etmq = queue->priv; 2033 struct cs_etm_traceid_queue *tidq; 2034 2035 if (!etmq) 2036 continue; 2037 2038 tidq = cs_etm__etmq_get_traceid_queue(etmq, 2039 CS_ETM_PER_THREAD_TRACEID); 2040 2041 if (!tidq) 2042 continue; 2043 2044 if ((tid == -1) || (tidq->tid == tid)) { 2045 cs_etm__set_pid_tid_cpu(etm, tidq); 2046 cs_etm__run_decoder(etmq); 2047 } 2048 } 2049 2050 return 0; 2051 } 2052 2053 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) 2054 { 2055 int ret = 0; 2056 unsigned int cs_queue_nr, queue_nr; 2057 u8 trace_chan_id; 2058 u64 timestamp; 2059 struct auxtrace_queue *queue; 2060 struct cs_etm_queue *etmq; 2061 struct cs_etm_traceid_queue *tidq; 2062 2063 while (1) { 2064 if (!etm->heap.heap_cnt) 2065 goto out; 2066 2067 /* Take the entry at the top of the min heap */ 2068 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2069 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2070 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2071 queue = &etm->queues.queue_array[queue_nr]; 2072 etmq = queue->priv; 2073 2074 /* 2075 * Remove the top entry from the heap since we are about 2076 * to process it. 2077 */ 2078 auxtrace_heap__pop(&etm->heap); 2079 2080 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2081 if (!tidq) { 2082 /* 2083 * No traceID queue has been allocated for this traceID, 2084 * which means something somewhere went very wrong. No 2085 * other choice than simply exit. 2086 */ 2087 ret = -EINVAL; 2088 goto out; 2089 } 2090 2091 /* 2092 * Packets associated with this timestamp are already in 2093 * the etmq's traceID queue, so process them. 2094 */ 2095 ret = cs_etm__process_traceid_queue(etmq, tidq); 2096 if (ret < 0) 2097 goto out; 2098 2099 /* 2100 * Packets for this timestamp have been processed, time to 2101 * move on to the next timestamp, fetching a new auxtrace_buffer 2102 * if need be. 2103 */ 2104 refetch: 2105 ret = cs_etm__get_data_block(etmq); 2106 if (ret < 0) 2107 goto out; 2108 2109 /* 2110 * No more auxtrace_buffers to process in this etmq, simply 2111 * move on to another entry in the auxtrace_heap. 2112 */ 2113 if (!ret) 2114 continue; 2115 2116 ret = cs_etm__decode_data_block(etmq); 2117 if (ret) 2118 goto out; 2119 2120 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2121 2122 if (!timestamp) { 2123 /* 2124 * Function cs_etm__decode_data_block() returns when 2125 * there is no more traces to decode in the current 2126 * auxtrace_buffer OR when a timestamp has been 2127 * encountered on any of the traceID queues. Since we 2128 * did not get a timestamp, there is no more traces to 2129 * process in this auxtrace_buffer. As such empty and 2130 * flush all traceID queues. 2131 */ 2132 cs_etm__clear_all_traceid_queues(etmq); 2133 2134 /* Fetch another auxtrace_buffer for this etmq */ 2135 goto refetch; 2136 } 2137 2138 /* 2139 * Add to the min heap the timestamp for packets that have 2140 * just been decoded. They will be processed and synthesized 2141 * during the next call to cs_etm__process_traceid_queue() for 2142 * this queue/traceID. 2143 */ 2144 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2145 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 2146 } 2147 2148 out: 2149 return ret; 2150 } 2151 2152 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2153 union perf_event *event) 2154 { 2155 struct thread *th; 2156 2157 if (etm->timeless_decoding) 2158 return 0; 2159 2160 /* 2161 * Add the tid/pid to the log so that we can get a match when 2162 * we get a contextID from the decoder. 2163 */ 2164 th = machine__findnew_thread(etm->machine, 2165 event->itrace_start.pid, 2166 event->itrace_start.tid); 2167 if (!th) 2168 return -ENOMEM; 2169 2170 thread__put(th); 2171 2172 return 0; 2173 } 2174 2175 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2176 union perf_event *event) 2177 { 2178 struct thread *th; 2179 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2180 2181 /* 2182 * Context switch in per-thread mode are irrelevant since perf 2183 * will start/stop tracing as the process is scheduled. 2184 */ 2185 if (etm->timeless_decoding) 2186 return 0; 2187 2188 /* 2189 * SWITCH_IN events carry the next process to be switched out while 2190 * SWITCH_OUT events carry the process to be switched in. As such 2191 * we don't care about IN events. 2192 */ 2193 if (!out) 2194 return 0; 2195 2196 /* 2197 * Add the tid/pid to the log so that we can get a match when 2198 * we get a contextID from the decoder. 2199 */ 2200 th = machine__findnew_thread(etm->machine, 2201 event->context_switch.next_prev_pid, 2202 event->context_switch.next_prev_tid); 2203 if (!th) 2204 return -ENOMEM; 2205 2206 thread__put(th); 2207 2208 return 0; 2209 } 2210 2211 static int cs_etm__process_event(struct perf_session *session, 2212 union perf_event *event, 2213 struct perf_sample *sample, 2214 struct perf_tool *tool) 2215 { 2216 int err = 0; 2217 u64 timestamp; 2218 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2219 struct cs_etm_auxtrace, 2220 auxtrace); 2221 2222 if (dump_trace) 2223 return 0; 2224 2225 if (!tool->ordered_events) { 2226 pr_err("CoreSight ETM Trace requires ordered events\n"); 2227 return -EINVAL; 2228 } 2229 2230 if (sample->time && (sample->time != (u64) -1)) 2231 timestamp = sample->time; 2232 else 2233 timestamp = 0; 2234 2235 if (timestamp || etm->timeless_decoding) { 2236 err = cs_etm__update_queues(etm); 2237 if (err) 2238 return err; 2239 } 2240 2241 if (etm->timeless_decoding && 2242 event->header.type == PERF_RECORD_EXIT) 2243 return cs_etm__process_timeless_queues(etm, 2244 event->fork.tid); 2245 2246 if (event->header.type == PERF_RECORD_ITRACE_START) 2247 return cs_etm__process_itrace_start(etm, event); 2248 else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 2249 return cs_etm__process_switch_cpu_wide(etm, event); 2250 2251 if (!etm->timeless_decoding && 2252 event->header.type == PERF_RECORD_AUX) 2253 return cs_etm__process_queues(etm); 2254 2255 return 0; 2256 } 2257 2258 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2259 union perf_event *event, 2260 struct perf_tool *tool __maybe_unused) 2261 { 2262 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2263 struct cs_etm_auxtrace, 2264 auxtrace); 2265 if (!etm->data_queued) { 2266 struct auxtrace_buffer *buffer; 2267 off_t data_offset; 2268 int fd = perf_data__fd(session->data); 2269 bool is_pipe = perf_data__is_pipe(session->data); 2270 int err; 2271 2272 if (is_pipe) 2273 data_offset = 0; 2274 else { 2275 data_offset = lseek(fd, 0, SEEK_CUR); 2276 if (data_offset == -1) 2277 return -errno; 2278 } 2279 2280 err = auxtrace_queues__add_event(&etm->queues, session, 2281 event, data_offset, &buffer); 2282 if (err) 2283 return err; 2284 2285 if (dump_trace) 2286 if (auxtrace_buffer__get_data(buffer, fd)) { 2287 cs_etm__dump_event(etm, buffer); 2288 auxtrace_buffer__put_data(buffer); 2289 } 2290 } 2291 2292 return 0; 2293 } 2294 2295 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) 2296 { 2297 struct perf_evsel *evsel; 2298 struct perf_evlist *evlist = etm->session->evlist; 2299 bool timeless_decoding = true; 2300 2301 /* 2302 * Circle through the list of event and complain if we find one 2303 * with the time bit set. 2304 */ 2305 evlist__for_each_entry(evlist, evsel) { 2306 if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) 2307 timeless_decoding = false; 2308 } 2309 2310 return timeless_decoding; 2311 } 2312 2313 static const char * const cs_etm_global_header_fmts[] = { 2314 [CS_HEADER_VERSION_0] = " Header version %llx\n", 2315 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 2316 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 2317 }; 2318 2319 static const char * const cs_etm_priv_fmts[] = { 2320 [CS_ETM_MAGIC] = " Magic number %llx\n", 2321 [CS_ETM_CPU] = " CPU %lld\n", 2322 [CS_ETM_ETMCR] = " ETMCR %llx\n", 2323 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 2324 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", 2325 [CS_ETM_ETMIDR] = " ETMIDR %llx\n", 2326 }; 2327 2328 static const char * const cs_etmv4_priv_fmts[] = { 2329 [CS_ETM_MAGIC] = " Magic number %llx\n", 2330 [CS_ETM_CPU] = " CPU %lld\n", 2331 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 2332 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 2333 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", 2334 [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", 2335 [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", 2336 [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", 2337 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 2338 }; 2339 2340 static void cs_etm__print_auxtrace_info(u64 *val, int num) 2341 { 2342 int i, j, cpu = 0; 2343 2344 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2345 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 2346 2347 for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 2348 if (val[i] == __perf_cs_etmv3_magic) 2349 for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 2350 fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 2351 else if (val[i] == __perf_cs_etmv4_magic) 2352 for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 2353 fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 2354 else 2355 /* failure.. return */ 2356 return; 2357 } 2358 } 2359 2360 int cs_etm__process_auxtrace_info(union perf_event *event, 2361 struct perf_session *session) 2362 { 2363 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 2364 struct cs_etm_auxtrace *etm = NULL; 2365 struct int_node *inode; 2366 unsigned int pmu_type; 2367 int event_header_size = sizeof(struct perf_event_header); 2368 int info_header_size; 2369 int total_size = auxtrace_info->header.size; 2370 int priv_size = 0; 2371 int num_cpu; 2372 int err = 0, idx = -1; 2373 int i, j, k; 2374 u64 *ptr, *hdr = NULL; 2375 u64 **metadata = NULL; 2376 2377 /* 2378 * sizeof(auxtrace_info_event::type) + 2379 * sizeof(auxtrace_info_event::reserved) == 8 2380 */ 2381 info_header_size = 8; 2382 2383 if (total_size < (event_header_size + info_header_size)) 2384 return -EINVAL; 2385 2386 priv_size = total_size - event_header_size - info_header_size; 2387 2388 /* First the global part */ 2389 ptr = (u64 *) auxtrace_info->priv; 2390 2391 /* Look for version '0' of the header */ 2392 if (ptr[0] != 0) 2393 return -EINVAL; 2394 2395 hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 2396 if (!hdr) 2397 return -ENOMEM; 2398 2399 /* Extract header information - see cs-etm.h for format */ 2400 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2401 hdr[i] = ptr[i]; 2402 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 2403 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 2404 0xffffffff); 2405 2406 /* 2407 * Create an RB tree for traceID-metadata tuple. Since the conversion 2408 * has to be made for each packet that gets decoded, optimizing access 2409 * in anything other than a sequential array is worth doing. 2410 */ 2411 traceid_list = intlist__new(NULL); 2412 if (!traceid_list) { 2413 err = -ENOMEM; 2414 goto err_free_hdr; 2415 } 2416 2417 metadata = zalloc(sizeof(*metadata) * num_cpu); 2418 if (!metadata) { 2419 err = -ENOMEM; 2420 goto err_free_traceid_list; 2421 } 2422 2423 /* 2424 * The metadata is stored in the auxtrace_info section and encodes 2425 * the configuration of the ARM embedded trace macrocell which is 2426 * required by the trace decoder to properly decode the trace due 2427 * to its highly compressed nature. 2428 */ 2429 for (j = 0; j < num_cpu; j++) { 2430 if (ptr[i] == __perf_cs_etmv3_magic) { 2431 metadata[j] = zalloc(sizeof(*metadata[j]) * 2432 CS_ETM_PRIV_MAX); 2433 if (!metadata[j]) { 2434 err = -ENOMEM; 2435 goto err_free_metadata; 2436 } 2437 for (k = 0; k < CS_ETM_PRIV_MAX; k++) 2438 metadata[j][k] = ptr[i + k]; 2439 2440 /* The traceID is our handle */ 2441 idx = metadata[j][CS_ETM_ETMTRACEIDR]; 2442 i += CS_ETM_PRIV_MAX; 2443 } else if (ptr[i] == __perf_cs_etmv4_magic) { 2444 metadata[j] = zalloc(sizeof(*metadata[j]) * 2445 CS_ETMV4_PRIV_MAX); 2446 if (!metadata[j]) { 2447 err = -ENOMEM; 2448 goto err_free_metadata; 2449 } 2450 for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 2451 metadata[j][k] = ptr[i + k]; 2452 2453 /* The traceID is our handle */ 2454 idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 2455 i += CS_ETMV4_PRIV_MAX; 2456 } 2457 2458 /* Get an RB node for this CPU */ 2459 inode = intlist__findnew(traceid_list, idx); 2460 2461 /* Something went wrong, no need to continue */ 2462 if (!inode) { 2463 err = -ENOMEM; 2464 goto err_free_metadata; 2465 } 2466 2467 /* 2468 * The node for that CPU should not be taken. 2469 * Back out if that's the case. 2470 */ 2471 if (inode->priv) { 2472 err = -EINVAL; 2473 goto err_free_metadata; 2474 } 2475 /* All good, associate the traceID with the metadata pointer */ 2476 inode->priv = metadata[j]; 2477 } 2478 2479 /* 2480 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 2481 * CS_ETMV4_PRIV_MAX mark how many double words are in the 2482 * global metadata, and each cpu's metadata respectively. 2483 * The following tests if the correct number of double words was 2484 * present in the auxtrace info section. 2485 */ 2486 if (i * 8 != priv_size) { 2487 err = -EINVAL; 2488 goto err_free_metadata; 2489 } 2490 2491 etm = zalloc(sizeof(*etm)); 2492 2493 if (!etm) { 2494 err = -ENOMEM; 2495 goto err_free_metadata; 2496 } 2497 2498 err = auxtrace_queues__init(&etm->queues); 2499 if (err) 2500 goto err_free_etm; 2501 2502 etm->session = session; 2503 etm->machine = &session->machines.host; 2504 2505 etm->num_cpu = num_cpu; 2506 etm->pmu_type = pmu_type; 2507 etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); 2508 etm->metadata = metadata; 2509 etm->auxtrace_type = auxtrace_info->type; 2510 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); 2511 2512 etm->auxtrace.process_event = cs_etm__process_event; 2513 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 2514 etm->auxtrace.flush_events = cs_etm__flush_events; 2515 etm->auxtrace.free_events = cs_etm__free_events; 2516 etm->auxtrace.free = cs_etm__free; 2517 session->auxtrace = &etm->auxtrace; 2518 2519 etm->unknown_thread = thread__new(999999999, 999999999); 2520 if (!etm->unknown_thread) { 2521 err = -ENOMEM; 2522 goto err_free_queues; 2523 } 2524 2525 /* 2526 * Initialize list node so that at thread__zput() we can avoid 2527 * segmentation fault at list_del_init(). 2528 */ 2529 INIT_LIST_HEAD(&etm->unknown_thread->node); 2530 2531 err = thread__set_comm(etm->unknown_thread, "unknown", 0); 2532 if (err) 2533 goto err_delete_thread; 2534 2535 if (thread__init_map_groups(etm->unknown_thread, etm->machine)) { 2536 err = -ENOMEM; 2537 goto err_delete_thread; 2538 } 2539 2540 if (dump_trace) { 2541 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 2542 return 0; 2543 } 2544 2545 if (session->itrace_synth_opts->set) { 2546 etm->synth_opts = *session->itrace_synth_opts; 2547 } else { 2548 itrace_synth_opts__set_default(&etm->synth_opts, 2549 session->itrace_synth_opts->default_no_sample); 2550 etm->synth_opts.callchain = false; 2551 } 2552 2553 err = cs_etm__synth_events(etm, session); 2554 if (err) 2555 goto err_delete_thread; 2556 2557 err = auxtrace_queues__process_index(&etm->queues, session); 2558 if (err) 2559 goto err_delete_thread; 2560 2561 etm->data_queued = etm->queues.populated; 2562 2563 return 0; 2564 2565 err_delete_thread: 2566 thread__zput(etm->unknown_thread); 2567 err_free_queues: 2568 auxtrace_queues__free(&etm->queues); 2569 session->auxtrace = NULL; 2570 err_free_etm: 2571 zfree(&etm); 2572 err_free_metadata: 2573 /* No need to check @metadata[j], free(NULL) is supported */ 2574 for (j = 0; j < num_cpu; j++) 2575 zfree(&metadata[j]); 2576 zfree(&metadata); 2577 err_free_traceid_list: 2578 intlist__delete(traceid_list); 2579 err_free_hdr: 2580 zfree(&hdr); 2581 2582 return err; 2583 } 2584