1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17
18 #include <stdlib.h>
19
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42
43 struct cs_etm_auxtrace {
44 struct auxtrace auxtrace;
45 struct auxtrace_queues queues;
46 struct auxtrace_heap heap;
47 struct itrace_synth_opts synth_opts;
48 struct perf_session *session;
49 struct perf_tsc_conversion tc;
50
51 /*
52 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 * are less accurate but produces smaller trace data. We use context IDs
54 * in the trace instead of matching timestamps with fork records so
55 * they're not really needed in the general case. Overlapping mmaps
56 * happen in cases like between a fork and an exec.
57 */
58 bool timeless_decoding;
59
60 /*
61 * Per-thread ignores the trace channel ID and instead assumes that
62 * everything in a buffer comes from the same process regardless of
63 * which CPU it ran on. It also implies no context IDs so the TID is
64 * taken from the auxtrace buffer.
65 */
66 bool per_thread_decoding;
67 bool snapshot_mode;
68 bool data_queued;
69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71 int num_cpu;
72 u64 latest_kernel_timestamp;
73 u32 auxtrace_type;
74 u64 branches_sample_type;
75 u64 branches_id;
76 u64 instructions_sample_type;
77 u64 instructions_sample_period;
78 u64 instructions_id;
79 u64 **metadata;
80 unsigned int pmu_type;
81 enum cs_etm_pid_fmt pid_fmt;
82 };
83
84 struct cs_etm_traceid_queue {
85 u8 trace_chan_id;
86 u64 period_instructions;
87 size_t last_branch_pos;
88 union perf_event *event_buf;
89 struct thread *thread;
90 struct thread *prev_packet_thread;
91 ocsd_ex_level prev_packet_el;
92 ocsd_ex_level el;
93 struct branch_stack *last_branch;
94 struct branch_stack *last_branch_rb;
95 struct cs_etm_packet *prev_packet;
96 struct cs_etm_packet *packet;
97 struct cs_etm_packet_queue packet_queue;
98 };
99
100 struct cs_etm_queue {
101 struct cs_etm_auxtrace *etm;
102 struct cs_etm_decoder *decoder;
103 struct auxtrace_buffer *buffer;
104 unsigned int queue_nr;
105 u8 pending_timestamp_chan_id;
106 u64 offset;
107 const unsigned char *buf;
108 size_t buf_len, buf_used;
109 /* Conversion between traceID and index in traceid_queues array */
110 struct intlist *traceid_queues_list;
111 struct cs_etm_traceid_queue **traceid_queues;
112 };
113
114 /* RB tree for quick conversion between traceID and metadata pointers */
115 static struct intlist *traceid_list;
116
117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119 pid_t tid);
120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122
123 /* PTMs ETMIDR [11:8] set to b0011 */
124 #define ETMIDR_PTM_VERSION 0x00000300
125
126 /*
127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
129 * encode the etm queue number as the upper 16 bit and the channel as
130 * the lower 16 bit.
131 */
132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
133 (queue_nr << 16 | trace_chan_id)
134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136
cs_etm__get_v7_protocol_version(u32 etmidr)137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138 {
139 etmidr &= ETMIDR_PTM_VERSION;
140
141 if (etmidr == ETMIDR_PTM_VERSION)
142 return CS_ETM_PROTO_PTM;
143
144 return CS_ETM_PROTO_ETMV3;
145 }
146
cs_etm__get_magic(u8 trace_chan_id,u64 * magic)147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148 {
149 struct int_node *inode;
150 u64 *metadata;
151
152 inode = intlist__find(traceid_list, trace_chan_id);
153 if (!inode)
154 return -EINVAL;
155
156 metadata = inode->priv;
157 *magic = metadata[CS_ETM_MAGIC];
158 return 0;
159 }
160
cs_etm__get_cpu(u8 trace_chan_id,int * cpu)161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162 {
163 struct int_node *inode;
164 u64 *metadata;
165
166 inode = intlist__find(traceid_list, trace_chan_id);
167 if (!inode)
168 return -EINVAL;
169
170 metadata = inode->priv;
171 *cpu = (int)metadata[CS_ETM_CPU];
172 return 0;
173 }
174
175 /*
176 * The returned PID format is presented as an enum:
177 *
178 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180 * CS_ETM_PIDFMT_NONE: No context IDs
181 *
182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183 * are enabled at the same time when the session runs on an EL2 kernel.
184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185 * recorded in the trace data, the tool will selectively use
186 * CONTEXTIDR_EL2 as PID.
187 *
188 * The result is cached in etm->pid_fmt so this function only needs to be called
189 * when processing the aux info.
190 */
cs_etm__init_pid_fmt(u64 * metadata)191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192 {
193 u64 val;
194
195 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196 val = metadata[CS_ETM_ETMCR];
197 /* CONTEXTIDR is traced */
198 if (val & BIT(ETM_OPT_CTXTID))
199 return CS_ETM_PIDFMT_CTXTID;
200 } else {
201 val = metadata[CS_ETMV4_TRCCONFIGR];
202 /* CONTEXTIDR_EL2 is traced */
203 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204 return CS_ETM_PIDFMT_CTXTID2;
205 /* CONTEXTIDR_EL1 is traced */
206 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207 return CS_ETM_PIDFMT_CTXTID;
208 }
209
210 return CS_ETM_PIDFMT_NONE;
211 }
212
cs_etm__get_pid_fmt(struct cs_etm_queue * etmq)213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214 {
215 return etmq->etm->pid_fmt;
216 }
217
cs_etm__map_trace_id(u8 trace_chan_id,u64 * cpu_metadata)218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219 {
220 struct int_node *inode;
221
222 /* Get an RB node for this CPU */
223 inode = intlist__findnew(traceid_list, trace_chan_id);
224
225 /* Something went wrong, no need to continue */
226 if (!inode)
227 return -ENOMEM;
228
229 /*
230 * The node for that CPU should not be taken.
231 * Back out if that's the case.
232 */
233 if (inode->priv)
234 return -EINVAL;
235
236 /* All good, associate the traceID with the metadata pointer */
237 inode->priv = cpu_metadata;
238
239 return 0;
240 }
241
cs_etm__metadata_get_trace_id(u8 * trace_chan_id,u64 * cpu_metadata)242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243 {
244 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245
246 switch (cs_etm_magic) {
247 case __perf_cs_etmv3_magic:
248 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249 CORESIGHT_TRACE_ID_VAL_MASK);
250 break;
251 case __perf_cs_etmv4_magic:
252 case __perf_cs_ete_magic:
253 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254 CORESIGHT_TRACE_ID_VAL_MASK);
255 break;
256 default:
257 return -EINVAL;
258 }
259 return 0;
260 }
261
262 /*
263 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265 */
cs_etm__metadata_set_trace_id(u8 trace_chan_id,u64 * cpu_metadata)266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267 {
268 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269
270 switch (cs_etm_magic) {
271 case __perf_cs_etmv3_magic:
272 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273 break;
274 case __perf_cs_etmv4_magic:
275 case __perf_cs_ete_magic:
276 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277 break;
278
279 default:
280 return -EINVAL;
281 }
282 return 0;
283 }
284
285 /*
286 * Get a metadata for a specific cpu from an array.
287 *
288 */
get_cpu_data(struct cs_etm_auxtrace * etm,int cpu)289 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
290 {
291 int i;
292 u64 *metadata = NULL;
293
294 for (i = 0; i < etm->num_cpu; i++) {
295 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
296 metadata = etm->metadata[i];
297 break;
298 }
299 }
300
301 return metadata;
302 }
303
304 /*
305 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
306 *
307 * The payload associates the Trace ID and the CPU.
308 * The routine is tolerant of seeing multiple packets with the same association,
309 * but a CPU / Trace ID association changing during a session is an error.
310 */
cs_etm__process_aux_output_hw_id(struct perf_session * session,union perf_event * event)311 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
312 union perf_event *event)
313 {
314 struct cs_etm_auxtrace *etm;
315 struct perf_sample sample;
316 struct int_node *inode;
317 struct evsel *evsel;
318 u64 *cpu_data;
319 u64 hw_id;
320 int cpu, version, err;
321 u8 trace_chan_id, curr_chan_id;
322
323 /* extract and parse the HW ID */
324 hw_id = event->aux_output_hw_id.hw_id;
325 version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
326 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
327
328 /* check that we can handle this version */
329 if (version > CS_AUX_HW_ID_CURR_VERSION)
330 return -EINVAL;
331
332 /* get access to the etm metadata */
333 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
334 if (!etm || !etm->metadata)
335 return -EINVAL;
336
337 /* parse the sample to get the CPU */
338 evsel = evlist__event2evsel(session->evlist, event);
339 if (!evsel)
340 return -EINVAL;
341 err = evsel__parse_sample(evsel, event, &sample);
342 if (err)
343 return err;
344 cpu = sample.cpu;
345 if (cpu == -1) {
346 /* no CPU in the sample - possibly recorded with an old version of perf */
347 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
348 return -EINVAL;
349 }
350
351 /* See if the ID is mapped to a CPU, and it matches the current CPU */
352 inode = intlist__find(traceid_list, trace_chan_id);
353 if (inode) {
354 cpu_data = inode->priv;
355 if ((int)cpu_data[CS_ETM_CPU] != cpu) {
356 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
357 return -EINVAL;
358 }
359
360 /* check that the mapped ID matches */
361 err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
362 if (err)
363 return err;
364 if (curr_chan_id != trace_chan_id) {
365 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
366 return -EINVAL;
367 }
368
369 /* mapped and matched - return OK */
370 return 0;
371 }
372
373 cpu_data = get_cpu_data(etm, cpu);
374 if (cpu_data == NULL)
375 return err;
376
377 /* not one we've seen before - lets map it */
378 err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
379 if (err)
380 return err;
381
382 /*
383 * if we are picking up the association from the packet, need to plug
384 * the correct trace ID into the metadata for setting up decoders later.
385 */
386 err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
387 return err;
388 }
389
cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue * etmq,u8 trace_chan_id)390 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
391 u8 trace_chan_id)
392 {
393 /*
394 * When a timestamp packet is encountered the backend code
395 * is stopped so that the front end has time to process packets
396 * that were accumulated in the traceID queue. Since there can
397 * be more than one channel per cs_etm_queue, we need to specify
398 * what traceID queue needs servicing.
399 */
400 etmq->pending_timestamp_chan_id = trace_chan_id;
401 }
402
cs_etm__etmq_get_timestamp(struct cs_etm_queue * etmq,u8 * trace_chan_id)403 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
404 u8 *trace_chan_id)
405 {
406 struct cs_etm_packet_queue *packet_queue;
407
408 if (!etmq->pending_timestamp_chan_id)
409 return 0;
410
411 if (trace_chan_id)
412 *trace_chan_id = etmq->pending_timestamp_chan_id;
413
414 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
415 etmq->pending_timestamp_chan_id);
416 if (!packet_queue)
417 return 0;
418
419 /* Acknowledge pending status */
420 etmq->pending_timestamp_chan_id = 0;
421
422 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
423 return packet_queue->cs_timestamp;
424 }
425
cs_etm__clear_packet_queue(struct cs_etm_packet_queue * queue)426 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
427 {
428 int i;
429
430 queue->head = 0;
431 queue->tail = 0;
432 queue->packet_count = 0;
433 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
434 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
435 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
436 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
437 queue->packet_buffer[i].instr_count = 0;
438 queue->packet_buffer[i].last_instr_taken_branch = false;
439 queue->packet_buffer[i].last_instr_size = 0;
440 queue->packet_buffer[i].last_instr_type = 0;
441 queue->packet_buffer[i].last_instr_subtype = 0;
442 queue->packet_buffer[i].last_instr_cond = 0;
443 queue->packet_buffer[i].flags = 0;
444 queue->packet_buffer[i].exception_number = UINT32_MAX;
445 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
446 queue->packet_buffer[i].cpu = INT_MIN;
447 }
448 }
449
cs_etm__clear_all_packet_queues(struct cs_etm_queue * etmq)450 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
451 {
452 int idx;
453 struct int_node *inode;
454 struct cs_etm_traceid_queue *tidq;
455 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
456
457 intlist__for_each_entry(inode, traceid_queues_list) {
458 idx = (int)(intptr_t)inode->priv;
459 tidq = etmq->traceid_queues[idx];
460 cs_etm__clear_packet_queue(&tidq->packet_queue);
461 }
462 }
463
cs_etm__init_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u8 trace_chan_id)464 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
465 struct cs_etm_traceid_queue *tidq,
466 u8 trace_chan_id)
467 {
468 int rc = -ENOMEM;
469 struct auxtrace_queue *queue;
470 struct cs_etm_auxtrace *etm = etmq->etm;
471
472 cs_etm__clear_packet_queue(&tidq->packet_queue);
473
474 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
475 tidq->trace_chan_id = trace_chan_id;
476 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
477 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
478 queue->tid);
479 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
480
481 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
482 if (!tidq->packet)
483 goto out;
484
485 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
486 if (!tidq->prev_packet)
487 goto out_free;
488
489 if (etm->synth_opts.last_branch) {
490 size_t sz = sizeof(struct branch_stack);
491
492 sz += etm->synth_opts.last_branch_sz *
493 sizeof(struct branch_entry);
494 tidq->last_branch = zalloc(sz);
495 if (!tidq->last_branch)
496 goto out_free;
497 tidq->last_branch_rb = zalloc(sz);
498 if (!tidq->last_branch_rb)
499 goto out_free;
500 }
501
502 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
503 if (!tidq->event_buf)
504 goto out_free;
505
506 return 0;
507
508 out_free:
509 zfree(&tidq->last_branch_rb);
510 zfree(&tidq->last_branch);
511 zfree(&tidq->prev_packet);
512 zfree(&tidq->packet);
513 out:
514 return rc;
515 }
516
517 static struct cs_etm_traceid_queue
cs_etm__etmq_get_traceid_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)518 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
519 {
520 int idx;
521 struct int_node *inode;
522 struct intlist *traceid_queues_list;
523 struct cs_etm_traceid_queue *tidq, **traceid_queues;
524 struct cs_etm_auxtrace *etm = etmq->etm;
525
526 if (etm->per_thread_decoding)
527 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
528
529 traceid_queues_list = etmq->traceid_queues_list;
530
531 /*
532 * Check if the traceid_queue exist for this traceID by looking
533 * in the queue list.
534 */
535 inode = intlist__find(traceid_queues_list, trace_chan_id);
536 if (inode) {
537 idx = (int)(intptr_t)inode->priv;
538 return etmq->traceid_queues[idx];
539 }
540
541 /* We couldn't find a traceid_queue for this traceID, allocate one */
542 tidq = malloc(sizeof(*tidq));
543 if (!tidq)
544 return NULL;
545
546 memset(tidq, 0, sizeof(*tidq));
547
548 /* Get a valid index for the new traceid_queue */
549 idx = intlist__nr_entries(traceid_queues_list);
550 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
551 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
552 if (!inode)
553 goto out_free;
554
555 /* Associate this traceID with this index */
556 inode->priv = (void *)(intptr_t)idx;
557
558 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
559 goto out_free;
560
561 /* Grow the traceid_queues array by one unit */
562 traceid_queues = etmq->traceid_queues;
563 traceid_queues = reallocarray(traceid_queues,
564 idx + 1,
565 sizeof(*traceid_queues));
566
567 /*
568 * On failure reallocarray() returns NULL and the original block of
569 * memory is left untouched.
570 */
571 if (!traceid_queues)
572 goto out_free;
573
574 traceid_queues[idx] = tidq;
575 etmq->traceid_queues = traceid_queues;
576
577 return etmq->traceid_queues[idx];
578
579 out_free:
580 /*
581 * Function intlist__remove() removes the inode from the list
582 * and delete the memory associated to it.
583 */
584 intlist__remove(traceid_queues_list, inode);
585 free(tidq);
586
587 return NULL;
588 }
589
590 struct cs_etm_packet_queue
cs_etm__etmq_get_packet_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)591 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
592 {
593 struct cs_etm_traceid_queue *tidq;
594
595 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
596 if (tidq)
597 return &tidq->packet_queue;
598
599 return NULL;
600 }
601
cs_etm__packet_swap(struct cs_etm_auxtrace * etm,struct cs_etm_traceid_queue * tidq)602 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
603 struct cs_etm_traceid_queue *tidq)
604 {
605 struct cs_etm_packet *tmp;
606
607 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
608 etm->synth_opts.instructions) {
609 /*
610 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
611 * the next incoming packet.
612 *
613 * Threads and exception levels are also tracked for both the
614 * previous and current packets. This is because the previous
615 * packet is used for the 'from' IP for branch samples, so the
616 * thread at that time must also be assigned to that sample.
617 * Across discontinuity packets the thread can change, so by
618 * tracking the thread for the previous packet the branch sample
619 * will have the correct info.
620 */
621 tmp = tidq->packet;
622 tidq->packet = tidq->prev_packet;
623 tidq->prev_packet = tmp;
624 tidq->prev_packet_el = tidq->el;
625 thread__put(tidq->prev_packet_thread);
626 tidq->prev_packet_thread = thread__get(tidq->thread);
627 }
628 }
629
cs_etm__packet_dump(const char * pkt_string)630 static void cs_etm__packet_dump(const char *pkt_string)
631 {
632 const char *color = PERF_COLOR_BLUE;
633 int len = strlen(pkt_string);
634
635 if (len && (pkt_string[len-1] == '\n'))
636 color_fprintf(stdout, color, " %s", pkt_string);
637 else
638 color_fprintf(stdout, color, " %s\n", pkt_string);
639
640 fflush(stdout);
641 }
642
cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int idx,u32 etmidr)643 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
644 struct cs_etm_auxtrace *etm, int idx,
645 u32 etmidr)
646 {
647 u64 **metadata = etm->metadata;
648
649 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
650 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
651 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
652 }
653
cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int idx)654 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
655 struct cs_etm_auxtrace *etm, int idx)
656 {
657 u64 **metadata = etm->metadata;
658
659 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
660 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
661 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
662 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
663 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
664 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
665 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
666 }
667
cs_etm__set_trace_param_ete(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int idx)668 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
669 struct cs_etm_auxtrace *etm, int idx)
670 {
671 u64 **metadata = etm->metadata;
672
673 t_params[idx].protocol = CS_ETM_PROTO_ETE;
674 t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0];
675 t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1];
676 t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2];
677 t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8];
678 t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR];
679 t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR];
680 t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
681 }
682
cs_etm__init_trace_params(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int decoders)683 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
684 struct cs_etm_auxtrace *etm,
685 int decoders)
686 {
687 int i;
688 u32 etmidr;
689 u64 architecture;
690
691 for (i = 0; i < decoders; i++) {
692 architecture = etm->metadata[i][CS_ETM_MAGIC];
693
694 switch (architecture) {
695 case __perf_cs_etmv3_magic:
696 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
697 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
698 break;
699 case __perf_cs_etmv4_magic:
700 cs_etm__set_trace_param_etmv4(t_params, etm, i);
701 break;
702 case __perf_cs_ete_magic:
703 cs_etm__set_trace_param_ete(t_params, etm, i);
704 break;
705 default:
706 return -EINVAL;
707 }
708 }
709
710 return 0;
711 }
712
cs_etm__init_decoder_params(struct cs_etm_decoder_params * d_params,struct cs_etm_queue * etmq,enum cs_etm_decoder_operation mode,bool formatted)713 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
714 struct cs_etm_queue *etmq,
715 enum cs_etm_decoder_operation mode,
716 bool formatted)
717 {
718 int ret = -EINVAL;
719
720 if (!(mode < CS_ETM_OPERATION_MAX))
721 goto out;
722
723 d_params->packet_printer = cs_etm__packet_dump;
724 d_params->operation = mode;
725 d_params->data = etmq;
726 d_params->formatted = formatted;
727 d_params->fsyncs = false;
728 d_params->hsyncs = false;
729 d_params->frame_aligned = true;
730
731 ret = 0;
732 out:
733 return ret;
734 }
735
cs_etm__dump_event(struct cs_etm_queue * etmq,struct auxtrace_buffer * buffer)736 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
737 struct auxtrace_buffer *buffer)
738 {
739 int ret;
740 const char *color = PERF_COLOR_BLUE;
741 size_t buffer_used = 0;
742
743 fprintf(stdout, "\n");
744 color_fprintf(stdout, color,
745 ". ... CoreSight %s Trace data: size %#zx bytes\n",
746 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
747
748 do {
749 size_t consumed;
750
751 ret = cs_etm_decoder__process_data_block(
752 etmq->decoder, buffer->offset,
753 &((u8 *)buffer->data)[buffer_used],
754 buffer->size - buffer_used, &consumed);
755 if (ret)
756 break;
757
758 buffer_used += consumed;
759 } while (buffer_used < buffer->size);
760
761 cs_etm_decoder__reset(etmq->decoder);
762 }
763
cs_etm__flush_events(struct perf_session * session,struct perf_tool * tool)764 static int cs_etm__flush_events(struct perf_session *session,
765 struct perf_tool *tool)
766 {
767 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
768 struct cs_etm_auxtrace,
769 auxtrace);
770 if (dump_trace)
771 return 0;
772
773 if (!tool->ordered_events)
774 return -EINVAL;
775
776 if (etm->timeless_decoding) {
777 /*
778 * Pass tid = -1 to process all queues. But likely they will have
779 * already been processed on PERF_RECORD_EXIT anyway.
780 */
781 return cs_etm__process_timeless_queues(etm, -1);
782 }
783
784 return cs_etm__process_timestamped_queues(etm);
785 }
786
cs_etm__free_traceid_queues(struct cs_etm_queue * etmq)787 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
788 {
789 int idx;
790 uintptr_t priv;
791 struct int_node *inode, *tmp;
792 struct cs_etm_traceid_queue *tidq;
793 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
794
795 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
796 priv = (uintptr_t)inode->priv;
797 idx = priv;
798
799 /* Free this traceid_queue from the array */
800 tidq = etmq->traceid_queues[idx];
801 thread__zput(tidq->thread);
802 thread__zput(tidq->prev_packet_thread);
803 zfree(&tidq->event_buf);
804 zfree(&tidq->last_branch);
805 zfree(&tidq->last_branch_rb);
806 zfree(&tidq->prev_packet);
807 zfree(&tidq->packet);
808 zfree(&tidq);
809
810 /*
811 * Function intlist__remove() removes the inode from the list
812 * and delete the memory associated to it.
813 */
814 intlist__remove(traceid_queues_list, inode);
815 }
816
817 /* Then the RB tree itself */
818 intlist__delete(traceid_queues_list);
819 etmq->traceid_queues_list = NULL;
820
821 /* finally free the traceid_queues array */
822 zfree(&etmq->traceid_queues);
823 }
824
cs_etm__free_queue(void * priv)825 static void cs_etm__free_queue(void *priv)
826 {
827 struct cs_etm_queue *etmq = priv;
828
829 if (!etmq)
830 return;
831
832 cs_etm_decoder__free(etmq->decoder);
833 cs_etm__free_traceid_queues(etmq);
834 free(etmq);
835 }
836
cs_etm__free_events(struct perf_session * session)837 static void cs_etm__free_events(struct perf_session *session)
838 {
839 unsigned int i;
840 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
841 struct cs_etm_auxtrace,
842 auxtrace);
843 struct auxtrace_queues *queues = &aux->queues;
844
845 for (i = 0; i < queues->nr_queues; i++) {
846 cs_etm__free_queue(queues->queue_array[i].priv);
847 queues->queue_array[i].priv = NULL;
848 }
849
850 auxtrace_queues__free(queues);
851 }
852
cs_etm__free(struct perf_session * session)853 static void cs_etm__free(struct perf_session *session)
854 {
855 int i;
856 struct int_node *inode, *tmp;
857 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
858 struct cs_etm_auxtrace,
859 auxtrace);
860 cs_etm__free_events(session);
861 session->auxtrace = NULL;
862
863 /* First remove all traceID/metadata nodes for the RB tree */
864 intlist__for_each_entry_safe(inode, tmp, traceid_list)
865 intlist__remove(traceid_list, inode);
866 /* Then the RB tree itself */
867 intlist__delete(traceid_list);
868
869 for (i = 0; i < aux->num_cpu; i++)
870 zfree(&aux->metadata[i]);
871
872 zfree(&aux->metadata);
873 zfree(&aux);
874 }
875
cs_etm__evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)876 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
877 struct evsel *evsel)
878 {
879 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
880 struct cs_etm_auxtrace,
881 auxtrace);
882
883 return evsel->core.attr.type == aux->pmu_type;
884 }
885
cs_etm__get_machine(struct cs_etm_queue * etmq,ocsd_ex_level el)886 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
887 ocsd_ex_level el)
888 {
889 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
890
891 /*
892 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
893 * running at EL1 assume everything is the host.
894 */
895 if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
896 return &etmq->etm->session->machines.host;
897
898 /*
899 * Not perfect, but otherwise assume anything in EL1 is the default
900 * guest, and everything else is the host. Distinguishing between guest
901 * and host userspaces isn't currently supported either. Neither is
902 * multiple guest support. All this does is reduce the likeliness of
903 * decode errors where we look into the host kernel maps when it should
904 * have been the guest maps.
905 */
906 switch (el) {
907 case ocsd_EL1:
908 return machines__find_guest(&etmq->etm->session->machines,
909 DEFAULT_GUEST_KERNEL_ID);
910 case ocsd_EL3:
911 case ocsd_EL2:
912 case ocsd_EL0:
913 case ocsd_EL_unknown:
914 default:
915 return &etmq->etm->session->machines.host;
916 }
917 }
918
cs_etm__cpu_mode(struct cs_etm_queue * etmq,u64 address,ocsd_ex_level el)919 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
920 ocsd_ex_level el)
921 {
922 struct machine *machine = cs_etm__get_machine(etmq, el);
923
924 if (address >= machine__kernel_start(machine)) {
925 if (machine__is_host(machine))
926 return PERF_RECORD_MISC_KERNEL;
927 else
928 return PERF_RECORD_MISC_GUEST_KERNEL;
929 } else {
930 if (machine__is_host(machine))
931 return PERF_RECORD_MISC_USER;
932 else {
933 /*
934 * Can't really happen at the moment because
935 * cs_etm__get_machine() will always return
936 * machines.host for any non EL1 trace.
937 */
938 return PERF_RECORD_MISC_GUEST_USER;
939 }
940 }
941 }
942
cs_etm__mem_access(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 address,size_t size,u8 * buffer,const ocsd_mem_space_acc_t mem_space)943 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
944 u64 address, size_t size, u8 *buffer,
945 const ocsd_mem_space_acc_t mem_space)
946 {
947 u8 cpumode;
948 u64 offset;
949 int len;
950 struct addr_location al;
951 struct dso *dso;
952 struct cs_etm_traceid_queue *tidq;
953 int ret = 0;
954
955 if (!etmq)
956 return 0;
957
958 addr_location__init(&al);
959 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
960 if (!tidq)
961 goto out;
962
963 /*
964 * We've already tracked EL along side the PID in cs_etm__set_thread()
965 * so double check that it matches what OpenCSD thinks as well. It
966 * doesn't distinguish between EL0 and EL1 for this mem access callback
967 * so we had to do the extra tracking. Skip validation if it's any of
968 * the 'any' values.
969 */
970 if (!(mem_space == OCSD_MEM_SPACE_ANY ||
971 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
972 if (mem_space & OCSD_MEM_SPACE_EL1N) {
973 /* Includes both non secure EL1 and EL0 */
974 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
975 } else if (mem_space & OCSD_MEM_SPACE_EL2)
976 assert(tidq->el == ocsd_EL2);
977 else if (mem_space & OCSD_MEM_SPACE_EL3)
978 assert(tidq->el == ocsd_EL3);
979 }
980
981 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
982
983 if (!thread__find_map(tidq->thread, cpumode, address, &al))
984 goto out;
985
986 dso = map__dso(al.map);
987 if (!dso)
988 goto out;
989
990 if (dso->data.status == DSO_DATA_STATUS_ERROR &&
991 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
992 goto out;
993
994 offset = map__map_ip(al.map, address);
995
996 map__load(al.map);
997
998 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
999 offset, buffer, size);
1000
1001 if (len <= 0) {
1002 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1003 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1004 if (!dso->auxtrace_warned) {
1005 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1006 address,
1007 dso->long_name ? dso->long_name : "Unknown");
1008 dso->auxtrace_warned = true;
1009 }
1010 goto out;
1011 }
1012 ret = len;
1013 out:
1014 addr_location__exit(&al);
1015 return ret;
1016 }
1017
cs_etm__alloc_queue(struct cs_etm_auxtrace * etm,bool formatted)1018 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1019 bool formatted)
1020 {
1021 struct cs_etm_decoder_params d_params;
1022 struct cs_etm_trace_params *t_params = NULL;
1023 struct cs_etm_queue *etmq;
1024 /*
1025 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1026 * needed.
1027 */
1028 int decoders = formatted ? etm->num_cpu : 1;
1029
1030 etmq = zalloc(sizeof(*etmq));
1031 if (!etmq)
1032 return NULL;
1033
1034 etmq->traceid_queues_list = intlist__new(NULL);
1035 if (!etmq->traceid_queues_list)
1036 goto out_free;
1037
1038 /* Use metadata to fill in trace parameters for trace decoder */
1039 t_params = zalloc(sizeof(*t_params) * decoders);
1040
1041 if (!t_params)
1042 goto out_free;
1043
1044 if (cs_etm__init_trace_params(t_params, etm, decoders))
1045 goto out_free;
1046
1047 /* Set decoder parameters to decode trace packets */
1048 if (cs_etm__init_decoder_params(&d_params, etmq,
1049 dump_trace ? CS_ETM_OPERATION_PRINT :
1050 CS_ETM_OPERATION_DECODE,
1051 formatted))
1052 goto out_free;
1053
1054 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1055 t_params);
1056
1057 if (!etmq->decoder)
1058 goto out_free;
1059
1060 /*
1061 * Register a function to handle all memory accesses required by
1062 * the trace decoder library.
1063 */
1064 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1065 0x0L, ((u64) -1L),
1066 cs_etm__mem_access))
1067 goto out_free_decoder;
1068
1069 zfree(&t_params);
1070 return etmq;
1071
1072 out_free_decoder:
1073 cs_etm_decoder__free(etmq->decoder);
1074 out_free:
1075 intlist__delete(etmq->traceid_queues_list);
1076 free(etmq);
1077
1078 return NULL;
1079 }
1080
cs_etm__setup_queue(struct cs_etm_auxtrace * etm,struct auxtrace_queue * queue,unsigned int queue_nr,bool formatted)1081 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1082 struct auxtrace_queue *queue,
1083 unsigned int queue_nr,
1084 bool formatted)
1085 {
1086 struct cs_etm_queue *etmq = queue->priv;
1087
1088 if (list_empty(&queue->head) || etmq)
1089 return 0;
1090
1091 etmq = cs_etm__alloc_queue(etm, formatted);
1092
1093 if (!etmq)
1094 return -ENOMEM;
1095
1096 queue->priv = etmq;
1097 etmq->etm = etm;
1098 etmq->queue_nr = queue_nr;
1099 etmq->offset = 0;
1100
1101 return 0;
1102 }
1103
cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace * etm,struct cs_etm_queue * etmq,unsigned int queue_nr)1104 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1105 struct cs_etm_queue *etmq,
1106 unsigned int queue_nr)
1107 {
1108 int ret = 0;
1109 unsigned int cs_queue_nr;
1110 u8 trace_chan_id;
1111 u64 cs_timestamp;
1112
1113 /*
1114 * We are under a CPU-wide trace scenario. As such we need to know
1115 * when the code that generated the traces started to execute so that
1116 * it can be correlated with execution on other CPUs. So we get a
1117 * handle on the beginning of traces and decode until we find a
1118 * timestamp. The timestamp is then added to the auxtrace min heap
1119 * in order to know what nibble (of all the etmqs) to decode first.
1120 */
1121 while (1) {
1122 /*
1123 * Fetch an aux_buffer from this etmq. Bail if no more
1124 * blocks or an error has been encountered.
1125 */
1126 ret = cs_etm__get_data_block(etmq);
1127 if (ret <= 0)
1128 goto out;
1129
1130 /*
1131 * Run decoder on the trace block. The decoder will stop when
1132 * encountering a CS timestamp, a full packet queue or the end of
1133 * trace for that block.
1134 */
1135 ret = cs_etm__decode_data_block(etmq);
1136 if (ret)
1137 goto out;
1138
1139 /*
1140 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1141 * the timestamp calculation for us.
1142 */
1143 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1144
1145 /* We found a timestamp, no need to continue. */
1146 if (cs_timestamp)
1147 break;
1148
1149 /*
1150 * We didn't find a timestamp so empty all the traceid packet
1151 * queues before looking for another timestamp packet, either
1152 * in the current data block or a new one. Packets that were
1153 * just decoded are useless since no timestamp has been
1154 * associated with them. As such simply discard them.
1155 */
1156 cs_etm__clear_all_packet_queues(etmq);
1157 }
1158
1159 /*
1160 * We have a timestamp. Add it to the min heap to reflect when
1161 * instructions conveyed by the range packets of this traceID queue
1162 * started to execute. Once the same has been done for all the traceID
1163 * queues of each etmq, redenring and decoding can start in
1164 * chronological order.
1165 *
1166 * Note that packets decoded above are still in the traceID's packet
1167 * queue and will be processed in cs_etm__process_timestamped_queues().
1168 */
1169 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1170 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1171 out:
1172 return ret;
1173 }
1174
1175 static inline
cs_etm__copy_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1176 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1177 struct cs_etm_traceid_queue *tidq)
1178 {
1179 struct branch_stack *bs_src = tidq->last_branch_rb;
1180 struct branch_stack *bs_dst = tidq->last_branch;
1181 size_t nr = 0;
1182
1183 /*
1184 * Set the number of records before early exit: ->nr is used to
1185 * determine how many branches to copy from ->entries.
1186 */
1187 bs_dst->nr = bs_src->nr;
1188
1189 /*
1190 * Early exit when there is nothing to copy.
1191 */
1192 if (!bs_src->nr)
1193 return;
1194
1195 /*
1196 * As bs_src->entries is a circular buffer, we need to copy from it in
1197 * two steps. First, copy the branches from the most recently inserted
1198 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1199 */
1200 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1201 memcpy(&bs_dst->entries[0],
1202 &bs_src->entries[tidq->last_branch_pos],
1203 sizeof(struct branch_entry) * nr);
1204
1205 /*
1206 * If we wrapped around at least once, the branches from the beginning
1207 * of the bs_src->entries buffer and until the ->last_branch_pos element
1208 * are older valid branches: copy them over. The total number of
1209 * branches copied over will be equal to the number of branches asked by
1210 * the user in last_branch_sz.
1211 */
1212 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1213 memcpy(&bs_dst->entries[nr],
1214 &bs_src->entries[0],
1215 sizeof(struct branch_entry) * tidq->last_branch_pos);
1216 }
1217 }
1218
1219 static inline
cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue * tidq)1220 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1221 {
1222 tidq->last_branch_pos = 0;
1223 tidq->last_branch_rb->nr = 0;
1224 }
1225
cs_etm__t32_instr_size(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 addr)1226 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1227 u8 trace_chan_id, u64 addr)
1228 {
1229 u8 instrBytes[2];
1230
1231 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1232 instrBytes, 0);
1233 /*
1234 * T32 instruction size is indicated by bits[15:11] of the first
1235 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1236 * denote a 32-bit instruction.
1237 */
1238 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1239 }
1240
cs_etm__first_executed_instr(struct cs_etm_packet * packet)1241 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1242 {
1243 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1244 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1245 return 0;
1246
1247 return packet->start_addr;
1248 }
1249
1250 static inline
cs_etm__last_executed_instr(const struct cs_etm_packet * packet)1251 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1252 {
1253 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1254 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1255 return 0;
1256
1257 return packet->end_addr - packet->last_instr_size;
1258 }
1259
cs_etm__instr_addr(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,u64 offset)1260 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1261 u64 trace_chan_id,
1262 const struct cs_etm_packet *packet,
1263 u64 offset)
1264 {
1265 if (packet->isa == CS_ETM_ISA_T32) {
1266 u64 addr = packet->start_addr;
1267
1268 while (offset) {
1269 addr += cs_etm__t32_instr_size(etmq,
1270 trace_chan_id, addr);
1271 offset--;
1272 }
1273 return addr;
1274 }
1275
1276 /* Assume a 4 byte instruction size (A32/A64) */
1277 return packet->start_addr + offset * 4;
1278 }
1279
cs_etm__update_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1280 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1281 struct cs_etm_traceid_queue *tidq)
1282 {
1283 struct branch_stack *bs = tidq->last_branch_rb;
1284 struct branch_entry *be;
1285
1286 /*
1287 * The branches are recorded in a circular buffer in reverse
1288 * chronological order: we start recording from the last element of the
1289 * buffer down. After writing the first element of the stack, move the
1290 * insert position back to the end of the buffer.
1291 */
1292 if (!tidq->last_branch_pos)
1293 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1294
1295 tidq->last_branch_pos -= 1;
1296
1297 be = &bs->entries[tidq->last_branch_pos];
1298 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1299 be->to = cs_etm__first_executed_instr(tidq->packet);
1300 /* No support for mispredict */
1301 be->flags.mispred = 0;
1302 be->flags.predicted = 1;
1303
1304 /*
1305 * Increment bs->nr until reaching the number of last branches asked by
1306 * the user on the command line.
1307 */
1308 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1309 bs->nr += 1;
1310 }
1311
cs_etm__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)1312 static int cs_etm__inject_event(union perf_event *event,
1313 struct perf_sample *sample, u64 type)
1314 {
1315 event->header.size = perf_event__sample_event_size(sample, type, 0);
1316 return perf_event__synthesize_sample(event, type, 0, sample);
1317 }
1318
1319
1320 static int
cs_etm__get_trace(struct cs_etm_queue * etmq)1321 cs_etm__get_trace(struct cs_etm_queue *etmq)
1322 {
1323 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1324 struct auxtrace_buffer *old_buffer = aux_buffer;
1325 struct auxtrace_queue *queue;
1326
1327 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1328
1329 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1330
1331 /* If no more data, drop the previous auxtrace_buffer and return */
1332 if (!aux_buffer) {
1333 if (old_buffer)
1334 auxtrace_buffer__drop_data(old_buffer);
1335 etmq->buf_len = 0;
1336 return 0;
1337 }
1338
1339 etmq->buffer = aux_buffer;
1340
1341 /* If the aux_buffer doesn't have data associated, try to load it */
1342 if (!aux_buffer->data) {
1343 /* get the file desc associated with the perf data file */
1344 int fd = perf_data__fd(etmq->etm->session->data);
1345
1346 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1347 if (!aux_buffer->data)
1348 return -ENOMEM;
1349 }
1350
1351 /* If valid, drop the previous buffer */
1352 if (old_buffer)
1353 auxtrace_buffer__drop_data(old_buffer);
1354
1355 etmq->buf_used = 0;
1356 etmq->buf_len = aux_buffer->size;
1357 etmq->buf = aux_buffer->data;
1358
1359 return etmq->buf_len;
1360 }
1361
cs_etm__set_thread(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,pid_t tid,ocsd_ex_level el)1362 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1363 struct cs_etm_traceid_queue *tidq, pid_t tid,
1364 ocsd_ex_level el)
1365 {
1366 struct machine *machine = cs_etm__get_machine(etmq, el);
1367
1368 if (tid != -1) {
1369 thread__zput(tidq->thread);
1370 tidq->thread = machine__find_thread(machine, -1, tid);
1371 }
1372
1373 /* Couldn't find a known thread */
1374 if (!tidq->thread)
1375 tidq->thread = machine__idle_thread(machine);
1376
1377 tidq->el = el;
1378 }
1379
cs_etm__etmq_set_tid_el(struct cs_etm_queue * etmq,pid_t tid,u8 trace_chan_id,ocsd_ex_level el)1380 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1381 u8 trace_chan_id, ocsd_ex_level el)
1382 {
1383 struct cs_etm_traceid_queue *tidq;
1384
1385 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1386 if (!tidq)
1387 return -EINVAL;
1388
1389 cs_etm__set_thread(etmq, tidq, tid, el);
1390 return 0;
1391 }
1392
cs_etm__etmq_is_timeless(struct cs_etm_queue * etmq)1393 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1394 {
1395 return !!etmq->etm->timeless_decoding;
1396 }
1397
cs_etm__copy_insn(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,struct perf_sample * sample)1398 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1399 u64 trace_chan_id,
1400 const struct cs_etm_packet *packet,
1401 struct perf_sample *sample)
1402 {
1403 /*
1404 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1405 * packet, so directly bail out with 'insn_len' = 0.
1406 */
1407 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1408 sample->insn_len = 0;
1409 return;
1410 }
1411
1412 /*
1413 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1414 * cs_etm__t32_instr_size().
1415 */
1416 if (packet->isa == CS_ETM_ISA_T32)
1417 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1418 sample->ip);
1419 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1420 else
1421 sample->insn_len = 4;
1422
1423 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1424 (void *)sample->insn, 0);
1425 }
1426
cs_etm__convert_sample_time(struct cs_etm_queue * etmq,u64 cs_timestamp)1427 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1428 {
1429 struct cs_etm_auxtrace *etm = etmq->etm;
1430
1431 if (etm->has_virtual_ts)
1432 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1433 else
1434 return cs_timestamp;
1435 }
1436
cs_etm__resolve_sample_time(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1437 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1438 struct cs_etm_traceid_queue *tidq)
1439 {
1440 struct cs_etm_auxtrace *etm = etmq->etm;
1441 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1442
1443 if (!etm->timeless_decoding && etm->has_virtual_ts)
1444 return packet_queue->cs_timestamp;
1445 else
1446 return etm->latest_kernel_timestamp;
1447 }
1448
cs_etm__synth_instruction_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 addr,u64 period)1449 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1450 struct cs_etm_traceid_queue *tidq,
1451 u64 addr, u64 period)
1452 {
1453 int ret = 0;
1454 struct cs_etm_auxtrace *etm = etmq->etm;
1455 union perf_event *event = tidq->event_buf;
1456 struct perf_sample sample = {.ip = 0,};
1457
1458 event->sample.header.type = PERF_RECORD_SAMPLE;
1459 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1460 event->sample.header.size = sizeof(struct perf_event_header);
1461
1462 /* Set time field based on etm auxtrace config. */
1463 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1464
1465 sample.ip = addr;
1466 sample.pid = thread__pid(tidq->thread);
1467 sample.tid = thread__tid(tidq->thread);
1468 sample.id = etmq->etm->instructions_id;
1469 sample.stream_id = etmq->etm->instructions_id;
1470 sample.period = period;
1471 sample.cpu = tidq->packet->cpu;
1472 sample.flags = tidq->prev_packet->flags;
1473 sample.cpumode = event->sample.header.misc;
1474
1475 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1476
1477 if (etm->synth_opts.last_branch)
1478 sample.branch_stack = tidq->last_branch;
1479
1480 if (etm->synth_opts.inject) {
1481 ret = cs_etm__inject_event(event, &sample,
1482 etm->instructions_sample_type);
1483 if (ret)
1484 return ret;
1485 }
1486
1487 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1488
1489 if (ret)
1490 pr_err(
1491 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1492 ret);
1493
1494 return ret;
1495 }
1496
1497 /*
1498 * The cs etm packet encodes an instruction range between a branch target
1499 * and the next taken branch. Generate sample accordingly.
1500 */
cs_etm__synth_branch_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1501 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1502 struct cs_etm_traceid_queue *tidq)
1503 {
1504 int ret = 0;
1505 struct cs_etm_auxtrace *etm = etmq->etm;
1506 struct perf_sample sample = {.ip = 0,};
1507 union perf_event *event = tidq->event_buf;
1508 struct dummy_branch_stack {
1509 u64 nr;
1510 u64 hw_idx;
1511 struct branch_entry entries;
1512 } dummy_bs;
1513 u64 ip;
1514
1515 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1516
1517 event->sample.header.type = PERF_RECORD_SAMPLE;
1518 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1519 tidq->prev_packet_el);
1520 event->sample.header.size = sizeof(struct perf_event_header);
1521
1522 /* Set time field based on etm auxtrace config. */
1523 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1524
1525 sample.ip = ip;
1526 sample.pid = thread__pid(tidq->prev_packet_thread);
1527 sample.tid = thread__tid(tidq->prev_packet_thread);
1528 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1529 sample.id = etmq->etm->branches_id;
1530 sample.stream_id = etmq->etm->branches_id;
1531 sample.period = 1;
1532 sample.cpu = tidq->packet->cpu;
1533 sample.flags = tidq->prev_packet->flags;
1534 sample.cpumode = event->sample.header.misc;
1535
1536 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1537 &sample);
1538
1539 /*
1540 * perf report cannot handle events without a branch stack
1541 */
1542 if (etm->synth_opts.last_branch) {
1543 dummy_bs = (struct dummy_branch_stack){
1544 .nr = 1,
1545 .hw_idx = -1ULL,
1546 .entries = {
1547 .from = sample.ip,
1548 .to = sample.addr,
1549 },
1550 };
1551 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1552 }
1553
1554 if (etm->synth_opts.inject) {
1555 ret = cs_etm__inject_event(event, &sample,
1556 etm->branches_sample_type);
1557 if (ret)
1558 return ret;
1559 }
1560
1561 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1562
1563 if (ret)
1564 pr_err(
1565 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1566 ret);
1567
1568 return ret;
1569 }
1570
1571 struct cs_etm_synth {
1572 struct perf_tool dummy_tool;
1573 struct perf_session *session;
1574 };
1575
cs_etm__event_synth(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1576 static int cs_etm__event_synth(struct perf_tool *tool,
1577 union perf_event *event,
1578 struct perf_sample *sample __maybe_unused,
1579 struct machine *machine __maybe_unused)
1580 {
1581 struct cs_etm_synth *cs_etm_synth =
1582 container_of(tool, struct cs_etm_synth, dummy_tool);
1583
1584 return perf_session__deliver_synth_event(cs_etm_synth->session,
1585 event, NULL);
1586 }
1587
cs_etm__synth_event(struct perf_session * session,struct perf_event_attr * attr,u64 id)1588 static int cs_etm__synth_event(struct perf_session *session,
1589 struct perf_event_attr *attr, u64 id)
1590 {
1591 struct cs_etm_synth cs_etm_synth;
1592
1593 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1594 cs_etm_synth.session = session;
1595
1596 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1597 &id, cs_etm__event_synth);
1598 }
1599
cs_etm__synth_events(struct cs_etm_auxtrace * etm,struct perf_session * session)1600 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1601 struct perf_session *session)
1602 {
1603 struct evlist *evlist = session->evlist;
1604 struct evsel *evsel;
1605 struct perf_event_attr attr;
1606 bool found = false;
1607 u64 id;
1608 int err;
1609
1610 evlist__for_each_entry(evlist, evsel) {
1611 if (evsel->core.attr.type == etm->pmu_type) {
1612 found = true;
1613 break;
1614 }
1615 }
1616
1617 if (!found) {
1618 pr_debug("No selected events with CoreSight Trace data\n");
1619 return 0;
1620 }
1621
1622 memset(&attr, 0, sizeof(struct perf_event_attr));
1623 attr.size = sizeof(struct perf_event_attr);
1624 attr.type = PERF_TYPE_HARDWARE;
1625 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1626 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1627 PERF_SAMPLE_PERIOD;
1628 if (etm->timeless_decoding)
1629 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1630 else
1631 attr.sample_type |= PERF_SAMPLE_TIME;
1632
1633 attr.exclude_user = evsel->core.attr.exclude_user;
1634 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1635 attr.exclude_hv = evsel->core.attr.exclude_hv;
1636 attr.exclude_host = evsel->core.attr.exclude_host;
1637 attr.exclude_guest = evsel->core.attr.exclude_guest;
1638 attr.sample_id_all = evsel->core.attr.sample_id_all;
1639 attr.read_format = evsel->core.attr.read_format;
1640
1641 /* create new id val to be a fixed offset from evsel id */
1642 id = evsel->core.id[0] + 1000000000;
1643
1644 if (!id)
1645 id = 1;
1646
1647 if (etm->synth_opts.branches) {
1648 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1649 attr.sample_period = 1;
1650 attr.sample_type |= PERF_SAMPLE_ADDR;
1651 err = cs_etm__synth_event(session, &attr, id);
1652 if (err)
1653 return err;
1654 etm->branches_sample_type = attr.sample_type;
1655 etm->branches_id = id;
1656 id += 1;
1657 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1658 }
1659
1660 if (etm->synth_opts.last_branch) {
1661 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1662 /*
1663 * We don't use the hardware index, but the sample generation
1664 * code uses the new format branch_stack with this field,
1665 * so the event attributes must indicate that it's present.
1666 */
1667 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1668 }
1669
1670 if (etm->synth_opts.instructions) {
1671 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1672 attr.sample_period = etm->synth_opts.period;
1673 etm->instructions_sample_period = attr.sample_period;
1674 err = cs_etm__synth_event(session, &attr, id);
1675 if (err)
1676 return err;
1677 etm->instructions_sample_type = attr.sample_type;
1678 etm->instructions_id = id;
1679 id += 1;
1680 }
1681
1682 return 0;
1683 }
1684
cs_etm__sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1685 static int cs_etm__sample(struct cs_etm_queue *etmq,
1686 struct cs_etm_traceid_queue *tidq)
1687 {
1688 struct cs_etm_auxtrace *etm = etmq->etm;
1689 int ret;
1690 u8 trace_chan_id = tidq->trace_chan_id;
1691 u64 instrs_prev;
1692
1693 /* Get instructions remainder from previous packet */
1694 instrs_prev = tidq->period_instructions;
1695
1696 tidq->period_instructions += tidq->packet->instr_count;
1697
1698 /*
1699 * Record a branch when the last instruction in
1700 * PREV_PACKET is a branch.
1701 */
1702 if (etm->synth_opts.last_branch &&
1703 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1704 tidq->prev_packet->last_instr_taken_branch)
1705 cs_etm__update_last_branch_rb(etmq, tidq);
1706
1707 if (etm->synth_opts.instructions &&
1708 tidq->period_instructions >= etm->instructions_sample_period) {
1709 /*
1710 * Emit instruction sample periodically
1711 * TODO: allow period to be defined in cycles and clock time
1712 */
1713
1714 /*
1715 * Below diagram demonstrates the instruction samples
1716 * generation flows:
1717 *
1718 * Instrs Instrs Instrs Instrs
1719 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1720 * | | | |
1721 * V V V V
1722 * --------------------------------------------------
1723 * ^ ^
1724 * | |
1725 * Period Period
1726 * instructions(Pi) instructions(Pi')
1727 *
1728 * | |
1729 * \---------------- -----------------/
1730 * V
1731 * tidq->packet->instr_count
1732 *
1733 * Instrs Sample(n...) are the synthesised samples occurring
1734 * every etm->instructions_sample_period instructions - as
1735 * defined on the perf command line. Sample(n) is being the
1736 * last sample before the current etm packet, n+1 to n+3
1737 * samples are generated from the current etm packet.
1738 *
1739 * tidq->packet->instr_count represents the number of
1740 * instructions in the current etm packet.
1741 *
1742 * Period instructions (Pi) contains the number of
1743 * instructions executed after the sample point(n) from the
1744 * previous etm packet. This will always be less than
1745 * etm->instructions_sample_period.
1746 *
1747 * When generate new samples, it combines with two parts
1748 * instructions, one is the tail of the old packet and another
1749 * is the head of the new coming packet, to generate
1750 * sample(n+1); sample(n+2) and sample(n+3) consume the
1751 * instructions with sample period. After sample(n+3), the rest
1752 * instructions will be used by later packet and it is assigned
1753 * to tidq->period_instructions for next round calculation.
1754 */
1755
1756 /*
1757 * Get the initial offset into the current packet instructions;
1758 * entry conditions ensure that instrs_prev is less than
1759 * etm->instructions_sample_period.
1760 */
1761 u64 offset = etm->instructions_sample_period - instrs_prev;
1762 u64 addr;
1763
1764 /* Prepare last branches for instruction sample */
1765 if (etm->synth_opts.last_branch)
1766 cs_etm__copy_last_branch_rb(etmq, tidq);
1767
1768 while (tidq->period_instructions >=
1769 etm->instructions_sample_period) {
1770 /*
1771 * Calculate the address of the sampled instruction (-1
1772 * as sample is reported as though instruction has just
1773 * been executed, but PC has not advanced to next
1774 * instruction)
1775 */
1776 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1777 tidq->packet, offset - 1);
1778 ret = cs_etm__synth_instruction_sample(
1779 etmq, tidq, addr,
1780 etm->instructions_sample_period);
1781 if (ret)
1782 return ret;
1783
1784 offset += etm->instructions_sample_period;
1785 tidq->period_instructions -=
1786 etm->instructions_sample_period;
1787 }
1788 }
1789
1790 if (etm->synth_opts.branches) {
1791 bool generate_sample = false;
1792
1793 /* Generate sample for tracing on packet */
1794 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1795 generate_sample = true;
1796
1797 /* Generate sample for branch taken packet */
1798 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1799 tidq->prev_packet->last_instr_taken_branch)
1800 generate_sample = true;
1801
1802 if (generate_sample) {
1803 ret = cs_etm__synth_branch_sample(etmq, tidq);
1804 if (ret)
1805 return ret;
1806 }
1807 }
1808
1809 cs_etm__packet_swap(etm, tidq);
1810
1811 return 0;
1812 }
1813
cs_etm__exception(struct cs_etm_traceid_queue * tidq)1814 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1815 {
1816 /*
1817 * When the exception packet is inserted, whether the last instruction
1818 * in previous range packet is taken branch or not, we need to force
1819 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1820 * to generate branch sample for the instruction range before the
1821 * exception is trapped to kernel or before the exception returning.
1822 *
1823 * The exception packet includes the dummy address values, so don't
1824 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1825 * for generating instruction and branch samples.
1826 */
1827 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1828 tidq->prev_packet->last_instr_taken_branch = true;
1829
1830 return 0;
1831 }
1832
cs_etm__flush(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1833 static int cs_etm__flush(struct cs_etm_queue *etmq,
1834 struct cs_etm_traceid_queue *tidq)
1835 {
1836 int err = 0;
1837 struct cs_etm_auxtrace *etm = etmq->etm;
1838
1839 /* Handle start tracing packet */
1840 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1841 goto swap_packet;
1842
1843 if (etmq->etm->synth_opts.last_branch &&
1844 etmq->etm->synth_opts.instructions &&
1845 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1846 u64 addr;
1847
1848 /* Prepare last branches for instruction sample */
1849 cs_etm__copy_last_branch_rb(etmq, tidq);
1850
1851 /*
1852 * Generate a last branch event for the branches left in the
1853 * circular buffer at the end of the trace.
1854 *
1855 * Use the address of the end of the last reported execution
1856 * range
1857 */
1858 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1859
1860 err = cs_etm__synth_instruction_sample(
1861 etmq, tidq, addr,
1862 tidq->period_instructions);
1863 if (err)
1864 return err;
1865
1866 tidq->period_instructions = 0;
1867
1868 }
1869
1870 if (etm->synth_opts.branches &&
1871 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1872 err = cs_etm__synth_branch_sample(etmq, tidq);
1873 if (err)
1874 return err;
1875 }
1876
1877 swap_packet:
1878 cs_etm__packet_swap(etm, tidq);
1879
1880 /* Reset last branches after flush the trace */
1881 if (etm->synth_opts.last_branch)
1882 cs_etm__reset_last_branch_rb(tidq);
1883
1884 return err;
1885 }
1886
cs_etm__end_block(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1887 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1888 struct cs_etm_traceid_queue *tidq)
1889 {
1890 int err;
1891
1892 /*
1893 * It has no new packet coming and 'etmq->packet' contains the stale
1894 * packet which was set at the previous time with packets swapping;
1895 * so skip to generate branch sample to avoid stale packet.
1896 *
1897 * For this case only flush branch stack and generate a last branch
1898 * event for the branches left in the circular buffer at the end of
1899 * the trace.
1900 */
1901 if (etmq->etm->synth_opts.last_branch &&
1902 etmq->etm->synth_opts.instructions &&
1903 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1904 u64 addr;
1905
1906 /* Prepare last branches for instruction sample */
1907 cs_etm__copy_last_branch_rb(etmq, tidq);
1908
1909 /*
1910 * Use the address of the end of the last reported execution
1911 * range.
1912 */
1913 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1914
1915 err = cs_etm__synth_instruction_sample(
1916 etmq, tidq, addr,
1917 tidq->period_instructions);
1918 if (err)
1919 return err;
1920
1921 tidq->period_instructions = 0;
1922 }
1923
1924 return 0;
1925 }
1926 /*
1927 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1928 * if need be.
1929 * Returns: < 0 if error
1930 * = 0 if no more auxtrace_buffer to read
1931 * > 0 if the current buffer isn't empty yet
1932 */
cs_etm__get_data_block(struct cs_etm_queue * etmq)1933 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1934 {
1935 int ret;
1936
1937 if (!etmq->buf_len) {
1938 ret = cs_etm__get_trace(etmq);
1939 if (ret <= 0)
1940 return ret;
1941 /*
1942 * We cannot assume consecutive blocks in the data file
1943 * are contiguous, reset the decoder to force re-sync.
1944 */
1945 ret = cs_etm_decoder__reset(etmq->decoder);
1946 if (ret)
1947 return ret;
1948 }
1949
1950 return etmq->buf_len;
1951 }
1952
cs_etm__is_svc_instr(struct cs_etm_queue * etmq,u8 trace_chan_id,struct cs_etm_packet * packet,u64 end_addr)1953 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1954 struct cs_etm_packet *packet,
1955 u64 end_addr)
1956 {
1957 /* Initialise to keep compiler happy */
1958 u16 instr16 = 0;
1959 u32 instr32 = 0;
1960 u64 addr;
1961
1962 switch (packet->isa) {
1963 case CS_ETM_ISA_T32:
1964 /*
1965 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1966 *
1967 * b'15 b'8
1968 * +-----------------+--------+
1969 * | 1 1 0 1 1 1 1 1 | imm8 |
1970 * +-----------------+--------+
1971 *
1972 * According to the specification, it only defines SVC for T32
1973 * with 16 bits instruction and has no definition for 32bits;
1974 * so below only read 2 bytes as instruction size for T32.
1975 */
1976 addr = end_addr - 2;
1977 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
1978 (u8 *)&instr16, 0);
1979 if ((instr16 & 0xFF00) == 0xDF00)
1980 return true;
1981
1982 break;
1983 case CS_ETM_ISA_A32:
1984 /*
1985 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
1986 *
1987 * b'31 b'28 b'27 b'24
1988 * +---------+---------+-------------------------+
1989 * | !1111 | 1 1 1 1 | imm24 |
1990 * +---------+---------+-------------------------+
1991 */
1992 addr = end_addr - 4;
1993 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
1994 (u8 *)&instr32, 0);
1995 if ((instr32 & 0x0F000000) == 0x0F000000 &&
1996 (instr32 & 0xF0000000) != 0xF0000000)
1997 return true;
1998
1999 break;
2000 case CS_ETM_ISA_A64:
2001 /*
2002 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2003 *
2004 * b'31 b'21 b'4 b'0
2005 * +-----------------------+---------+-----------+
2006 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2007 * +-----------------------+---------+-----------+
2008 */
2009 addr = end_addr - 4;
2010 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2011 (u8 *)&instr32, 0);
2012 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2013 return true;
2014
2015 break;
2016 case CS_ETM_ISA_UNKNOWN:
2017 default:
2018 break;
2019 }
2020
2021 return false;
2022 }
2023
cs_etm__is_syscall(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2024 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2025 struct cs_etm_traceid_queue *tidq, u64 magic)
2026 {
2027 u8 trace_chan_id = tidq->trace_chan_id;
2028 struct cs_etm_packet *packet = tidq->packet;
2029 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2030
2031 if (magic == __perf_cs_etmv3_magic)
2032 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2033 return true;
2034
2035 /*
2036 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2037 * HVC cases; need to check if it's SVC instruction based on
2038 * packet address.
2039 */
2040 if (magic == __perf_cs_etmv4_magic) {
2041 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2042 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2043 prev_packet->end_addr))
2044 return true;
2045 }
2046
2047 return false;
2048 }
2049
cs_etm__is_async_exception(struct cs_etm_traceid_queue * tidq,u64 magic)2050 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2051 u64 magic)
2052 {
2053 struct cs_etm_packet *packet = tidq->packet;
2054
2055 if (magic == __perf_cs_etmv3_magic)
2056 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2057 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2058 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2059 packet->exception_number == CS_ETMV3_EXC_IRQ ||
2060 packet->exception_number == CS_ETMV3_EXC_FIQ)
2061 return true;
2062
2063 if (magic == __perf_cs_etmv4_magic)
2064 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2065 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2066 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2067 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2068 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2069 packet->exception_number == CS_ETMV4_EXC_IRQ ||
2070 packet->exception_number == CS_ETMV4_EXC_FIQ)
2071 return true;
2072
2073 return false;
2074 }
2075
cs_etm__is_sync_exception(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2076 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2077 struct cs_etm_traceid_queue *tidq,
2078 u64 magic)
2079 {
2080 u8 trace_chan_id = tidq->trace_chan_id;
2081 struct cs_etm_packet *packet = tidq->packet;
2082 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2083
2084 if (magic == __perf_cs_etmv3_magic)
2085 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2086 packet->exception_number == CS_ETMV3_EXC_HYP ||
2087 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2088 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2089 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2090 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2091 packet->exception_number == CS_ETMV3_EXC_GENERIC)
2092 return true;
2093
2094 if (magic == __perf_cs_etmv4_magic) {
2095 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2096 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2097 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2098 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2099 return true;
2100
2101 /*
2102 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2103 * (SMC, HVC) are taken as sync exceptions.
2104 */
2105 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2106 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2107 prev_packet->end_addr))
2108 return true;
2109
2110 /*
2111 * ETMv4 has 5 bits for exception number; if the numbers
2112 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2113 * they are implementation defined exceptions.
2114 *
2115 * For this case, simply take it as sync exception.
2116 */
2117 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2118 packet->exception_number <= CS_ETMV4_EXC_END)
2119 return true;
2120 }
2121
2122 return false;
2123 }
2124
cs_etm__set_sample_flags(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2125 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2126 struct cs_etm_traceid_queue *tidq)
2127 {
2128 struct cs_etm_packet *packet = tidq->packet;
2129 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2130 u8 trace_chan_id = tidq->trace_chan_id;
2131 u64 magic;
2132 int ret;
2133
2134 switch (packet->sample_type) {
2135 case CS_ETM_RANGE:
2136 /*
2137 * Immediate branch instruction without neither link nor
2138 * return flag, it's normal branch instruction within
2139 * the function.
2140 */
2141 if (packet->last_instr_type == OCSD_INSTR_BR &&
2142 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2143 packet->flags = PERF_IP_FLAG_BRANCH;
2144
2145 if (packet->last_instr_cond)
2146 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2147 }
2148
2149 /*
2150 * Immediate branch instruction with link (e.g. BL), this is
2151 * branch instruction for function call.
2152 */
2153 if (packet->last_instr_type == OCSD_INSTR_BR &&
2154 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2155 packet->flags = PERF_IP_FLAG_BRANCH |
2156 PERF_IP_FLAG_CALL;
2157
2158 /*
2159 * Indirect branch instruction with link (e.g. BLR), this is
2160 * branch instruction for function call.
2161 */
2162 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2163 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2164 packet->flags = PERF_IP_FLAG_BRANCH |
2165 PERF_IP_FLAG_CALL;
2166
2167 /*
2168 * Indirect branch instruction with subtype of
2169 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2170 * function return for A32/T32.
2171 */
2172 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2173 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2174 packet->flags = PERF_IP_FLAG_BRANCH |
2175 PERF_IP_FLAG_RETURN;
2176
2177 /*
2178 * Indirect branch instruction without link (e.g. BR), usually
2179 * this is used for function return, especially for functions
2180 * within dynamic link lib.
2181 */
2182 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2183 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2184 packet->flags = PERF_IP_FLAG_BRANCH |
2185 PERF_IP_FLAG_RETURN;
2186
2187 /* Return instruction for function return. */
2188 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2189 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2190 packet->flags = PERF_IP_FLAG_BRANCH |
2191 PERF_IP_FLAG_RETURN;
2192
2193 /*
2194 * Decoder might insert a discontinuity in the middle of
2195 * instruction packets, fixup prev_packet with flag
2196 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2197 */
2198 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2199 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2200 PERF_IP_FLAG_TRACE_BEGIN;
2201
2202 /*
2203 * If the previous packet is an exception return packet
2204 * and the return address just follows SVC instruction,
2205 * it needs to calibrate the previous packet sample flags
2206 * as PERF_IP_FLAG_SYSCALLRET.
2207 */
2208 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2209 PERF_IP_FLAG_RETURN |
2210 PERF_IP_FLAG_INTERRUPT) &&
2211 cs_etm__is_svc_instr(etmq, trace_chan_id,
2212 packet, packet->start_addr))
2213 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2214 PERF_IP_FLAG_RETURN |
2215 PERF_IP_FLAG_SYSCALLRET;
2216 break;
2217 case CS_ETM_DISCONTINUITY:
2218 /*
2219 * The trace is discontinuous, if the previous packet is
2220 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2221 * for previous packet.
2222 */
2223 if (prev_packet->sample_type == CS_ETM_RANGE)
2224 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2225 PERF_IP_FLAG_TRACE_END;
2226 break;
2227 case CS_ETM_EXCEPTION:
2228 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2229 if (ret)
2230 return ret;
2231
2232 /* The exception is for system call. */
2233 if (cs_etm__is_syscall(etmq, tidq, magic))
2234 packet->flags = PERF_IP_FLAG_BRANCH |
2235 PERF_IP_FLAG_CALL |
2236 PERF_IP_FLAG_SYSCALLRET;
2237 /*
2238 * The exceptions are triggered by external signals from bus,
2239 * interrupt controller, debug module, PE reset or halt.
2240 */
2241 else if (cs_etm__is_async_exception(tidq, magic))
2242 packet->flags = PERF_IP_FLAG_BRANCH |
2243 PERF_IP_FLAG_CALL |
2244 PERF_IP_FLAG_ASYNC |
2245 PERF_IP_FLAG_INTERRUPT;
2246 /*
2247 * Otherwise, exception is caused by trap, instruction &
2248 * data fault, or alignment errors.
2249 */
2250 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2251 packet->flags = PERF_IP_FLAG_BRANCH |
2252 PERF_IP_FLAG_CALL |
2253 PERF_IP_FLAG_INTERRUPT;
2254
2255 /*
2256 * When the exception packet is inserted, since exception
2257 * packet is not used standalone for generating samples
2258 * and it's affiliation to the previous instruction range
2259 * packet; so set previous range packet flags to tell perf
2260 * it is an exception taken branch.
2261 */
2262 if (prev_packet->sample_type == CS_ETM_RANGE)
2263 prev_packet->flags = packet->flags;
2264 break;
2265 case CS_ETM_EXCEPTION_RET:
2266 /*
2267 * When the exception return packet is inserted, since
2268 * exception return packet is not used standalone for
2269 * generating samples and it's affiliation to the previous
2270 * instruction range packet; so set previous range packet
2271 * flags to tell perf it is an exception return branch.
2272 *
2273 * The exception return can be for either system call or
2274 * other exception types; unfortunately the packet doesn't
2275 * contain exception type related info so we cannot decide
2276 * the exception type purely based on exception return packet.
2277 * If we record the exception number from exception packet and
2278 * reuse it for exception return packet, this is not reliable
2279 * due the trace can be discontinuity or the interrupt can
2280 * be nested, thus the recorded exception number cannot be
2281 * used for exception return packet for these two cases.
2282 *
2283 * For exception return packet, we only need to distinguish the
2284 * packet is for system call or for other types. Thus the
2285 * decision can be deferred when receive the next packet which
2286 * contains the return address, based on the return address we
2287 * can read out the previous instruction and check if it's a
2288 * system call instruction and then calibrate the sample flag
2289 * as needed.
2290 */
2291 if (prev_packet->sample_type == CS_ETM_RANGE)
2292 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2293 PERF_IP_FLAG_RETURN |
2294 PERF_IP_FLAG_INTERRUPT;
2295 break;
2296 case CS_ETM_EMPTY:
2297 default:
2298 break;
2299 }
2300
2301 return 0;
2302 }
2303
cs_etm__decode_data_block(struct cs_etm_queue * etmq)2304 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2305 {
2306 int ret = 0;
2307 size_t processed = 0;
2308
2309 /*
2310 * Packets are decoded and added to the decoder's packet queue
2311 * until the decoder packet processing callback has requested that
2312 * processing stops or there is nothing left in the buffer. Normal
2313 * operations that stop processing are a timestamp packet or a full
2314 * decoder buffer queue.
2315 */
2316 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2317 etmq->offset,
2318 &etmq->buf[etmq->buf_used],
2319 etmq->buf_len,
2320 &processed);
2321 if (ret)
2322 goto out;
2323
2324 etmq->offset += processed;
2325 etmq->buf_used += processed;
2326 etmq->buf_len -= processed;
2327
2328 out:
2329 return ret;
2330 }
2331
cs_etm__process_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2332 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2333 struct cs_etm_traceid_queue *tidq)
2334 {
2335 int ret;
2336 struct cs_etm_packet_queue *packet_queue;
2337
2338 packet_queue = &tidq->packet_queue;
2339
2340 /* Process each packet in this chunk */
2341 while (1) {
2342 ret = cs_etm_decoder__get_packet(packet_queue,
2343 tidq->packet);
2344 if (ret <= 0)
2345 /*
2346 * Stop processing this chunk on
2347 * end of data or error
2348 */
2349 break;
2350
2351 /*
2352 * Since packet addresses are swapped in packet
2353 * handling within below switch() statements,
2354 * thus setting sample flags must be called
2355 * prior to switch() statement to use address
2356 * information before packets swapping.
2357 */
2358 ret = cs_etm__set_sample_flags(etmq, tidq);
2359 if (ret < 0)
2360 break;
2361
2362 switch (tidq->packet->sample_type) {
2363 case CS_ETM_RANGE:
2364 /*
2365 * If the packet contains an instruction
2366 * range, generate instruction sequence
2367 * events.
2368 */
2369 cs_etm__sample(etmq, tidq);
2370 break;
2371 case CS_ETM_EXCEPTION:
2372 case CS_ETM_EXCEPTION_RET:
2373 /*
2374 * If the exception packet is coming,
2375 * make sure the previous instruction
2376 * range packet to be handled properly.
2377 */
2378 cs_etm__exception(tidq);
2379 break;
2380 case CS_ETM_DISCONTINUITY:
2381 /*
2382 * Discontinuity in trace, flush
2383 * previous branch stack
2384 */
2385 cs_etm__flush(etmq, tidq);
2386 break;
2387 case CS_ETM_EMPTY:
2388 /*
2389 * Should not receive empty packet,
2390 * report error.
2391 */
2392 pr_err("CS ETM Trace: empty packet\n");
2393 return -EINVAL;
2394 default:
2395 break;
2396 }
2397 }
2398
2399 return ret;
2400 }
2401
cs_etm__clear_all_traceid_queues(struct cs_etm_queue * etmq)2402 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2403 {
2404 int idx;
2405 struct int_node *inode;
2406 struct cs_etm_traceid_queue *tidq;
2407 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2408
2409 intlist__for_each_entry(inode, traceid_queues_list) {
2410 idx = (int)(intptr_t)inode->priv;
2411 tidq = etmq->traceid_queues[idx];
2412
2413 /* Ignore return value */
2414 cs_etm__process_traceid_queue(etmq, tidq);
2415 }
2416 }
2417
cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue * etmq)2418 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2419 {
2420 int err = 0;
2421 struct cs_etm_traceid_queue *tidq;
2422
2423 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2424 if (!tidq)
2425 return -EINVAL;
2426
2427 /* Go through each buffer in the queue and decode them one by one */
2428 while (1) {
2429 err = cs_etm__get_data_block(etmq);
2430 if (err <= 0)
2431 return err;
2432
2433 /* Run trace decoder until buffer consumed or end of trace */
2434 do {
2435 err = cs_etm__decode_data_block(etmq);
2436 if (err)
2437 return err;
2438
2439 /*
2440 * Process each packet in this chunk, nothing to do if
2441 * an error occurs other than hoping the next one will
2442 * be better.
2443 */
2444 err = cs_etm__process_traceid_queue(etmq, tidq);
2445
2446 } while (etmq->buf_len);
2447
2448 if (err == 0)
2449 /* Flush any remaining branch stack entries */
2450 err = cs_etm__end_block(etmq, tidq);
2451 }
2452
2453 return err;
2454 }
2455
cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue * etmq)2456 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2457 {
2458 int idx, err = 0;
2459 struct cs_etm_traceid_queue *tidq;
2460 struct int_node *inode;
2461
2462 /* Go through each buffer in the queue and decode them one by one */
2463 while (1) {
2464 err = cs_etm__get_data_block(etmq);
2465 if (err <= 0)
2466 return err;
2467
2468 /* Run trace decoder until buffer consumed or end of trace */
2469 do {
2470 err = cs_etm__decode_data_block(etmq);
2471 if (err)
2472 return err;
2473
2474 /*
2475 * cs_etm__run_per_thread_timeless_decoder() runs on a
2476 * single traceID queue because each TID has a separate
2477 * buffer. But here in per-cpu mode we need to iterate
2478 * over each channel instead.
2479 */
2480 intlist__for_each_entry(inode,
2481 etmq->traceid_queues_list) {
2482 idx = (int)(intptr_t)inode->priv;
2483 tidq = etmq->traceid_queues[idx];
2484 cs_etm__process_traceid_queue(etmq, tidq);
2485 }
2486 } while (etmq->buf_len);
2487
2488 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2489 idx = (int)(intptr_t)inode->priv;
2490 tidq = etmq->traceid_queues[idx];
2491 /* Flush any remaining branch stack entries */
2492 err = cs_etm__end_block(etmq, tidq);
2493 if (err)
2494 return err;
2495 }
2496 }
2497
2498 return err;
2499 }
2500
cs_etm__process_timeless_queues(struct cs_etm_auxtrace * etm,pid_t tid)2501 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2502 pid_t tid)
2503 {
2504 unsigned int i;
2505 struct auxtrace_queues *queues = &etm->queues;
2506
2507 for (i = 0; i < queues->nr_queues; i++) {
2508 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2509 struct cs_etm_queue *etmq = queue->priv;
2510 struct cs_etm_traceid_queue *tidq;
2511
2512 if (!etmq)
2513 continue;
2514
2515 if (etm->per_thread_decoding) {
2516 tidq = cs_etm__etmq_get_traceid_queue(
2517 etmq, CS_ETM_PER_THREAD_TRACEID);
2518
2519 if (!tidq)
2520 continue;
2521
2522 if (tid == -1 || thread__tid(tidq->thread) == tid)
2523 cs_etm__run_per_thread_timeless_decoder(etmq);
2524 } else
2525 cs_etm__run_per_cpu_timeless_decoder(etmq);
2526 }
2527
2528 return 0;
2529 }
2530
cs_etm__process_timestamped_queues(struct cs_etm_auxtrace * etm)2531 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2532 {
2533 int ret = 0;
2534 unsigned int cs_queue_nr, queue_nr, i;
2535 u8 trace_chan_id;
2536 u64 cs_timestamp;
2537 struct auxtrace_queue *queue;
2538 struct cs_etm_queue *etmq;
2539 struct cs_etm_traceid_queue *tidq;
2540
2541 /*
2542 * Pre-populate the heap with one entry from each queue so that we can
2543 * start processing in time order across all queues.
2544 */
2545 for (i = 0; i < etm->queues.nr_queues; i++) {
2546 etmq = etm->queues.queue_array[i].priv;
2547 if (!etmq)
2548 continue;
2549
2550 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2551 if (ret)
2552 return ret;
2553 }
2554
2555 while (1) {
2556 if (!etm->heap.heap_cnt)
2557 break;
2558
2559 /* Take the entry at the top of the min heap */
2560 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2561 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2562 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2563 queue = &etm->queues.queue_array[queue_nr];
2564 etmq = queue->priv;
2565
2566 /*
2567 * Remove the top entry from the heap since we are about
2568 * to process it.
2569 */
2570 auxtrace_heap__pop(&etm->heap);
2571
2572 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2573 if (!tidq) {
2574 /*
2575 * No traceID queue has been allocated for this traceID,
2576 * which means something somewhere went very wrong. No
2577 * other choice than simply exit.
2578 */
2579 ret = -EINVAL;
2580 goto out;
2581 }
2582
2583 /*
2584 * Packets associated with this timestamp are already in
2585 * the etmq's traceID queue, so process them.
2586 */
2587 ret = cs_etm__process_traceid_queue(etmq, tidq);
2588 if (ret < 0)
2589 goto out;
2590
2591 /*
2592 * Packets for this timestamp have been processed, time to
2593 * move on to the next timestamp, fetching a new auxtrace_buffer
2594 * if need be.
2595 */
2596 refetch:
2597 ret = cs_etm__get_data_block(etmq);
2598 if (ret < 0)
2599 goto out;
2600
2601 /*
2602 * No more auxtrace_buffers to process in this etmq, simply
2603 * move on to another entry in the auxtrace_heap.
2604 */
2605 if (!ret)
2606 continue;
2607
2608 ret = cs_etm__decode_data_block(etmq);
2609 if (ret)
2610 goto out;
2611
2612 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2613
2614 if (!cs_timestamp) {
2615 /*
2616 * Function cs_etm__decode_data_block() returns when
2617 * there is no more traces to decode in the current
2618 * auxtrace_buffer OR when a timestamp has been
2619 * encountered on any of the traceID queues. Since we
2620 * did not get a timestamp, there is no more traces to
2621 * process in this auxtrace_buffer. As such empty and
2622 * flush all traceID queues.
2623 */
2624 cs_etm__clear_all_traceid_queues(etmq);
2625
2626 /* Fetch another auxtrace_buffer for this etmq */
2627 goto refetch;
2628 }
2629
2630 /*
2631 * Add to the min heap the timestamp for packets that have
2632 * just been decoded. They will be processed and synthesized
2633 * during the next call to cs_etm__process_traceid_queue() for
2634 * this queue/traceID.
2635 */
2636 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2637 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2638 }
2639
2640 for (i = 0; i < etm->queues.nr_queues; i++) {
2641 struct int_node *inode;
2642
2643 etmq = etm->queues.queue_array[i].priv;
2644 if (!etmq)
2645 continue;
2646
2647 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2648 int idx = (int)(intptr_t)inode->priv;
2649
2650 /* Flush any remaining branch stack entries */
2651 tidq = etmq->traceid_queues[idx];
2652 ret = cs_etm__end_block(etmq, tidq);
2653 if (ret)
2654 return ret;
2655 }
2656 }
2657 out:
2658 return ret;
2659 }
2660
cs_etm__process_itrace_start(struct cs_etm_auxtrace * etm,union perf_event * event)2661 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2662 union perf_event *event)
2663 {
2664 struct thread *th;
2665
2666 if (etm->timeless_decoding)
2667 return 0;
2668
2669 /*
2670 * Add the tid/pid to the log so that we can get a match when we get a
2671 * contextID from the decoder. Only track for the host: only kernel
2672 * trace is supported for guests which wouldn't need pids so this should
2673 * be fine.
2674 */
2675 th = machine__findnew_thread(&etm->session->machines.host,
2676 event->itrace_start.pid,
2677 event->itrace_start.tid);
2678 if (!th)
2679 return -ENOMEM;
2680
2681 thread__put(th);
2682
2683 return 0;
2684 }
2685
cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace * etm,union perf_event * event)2686 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2687 union perf_event *event)
2688 {
2689 struct thread *th;
2690 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2691
2692 /*
2693 * Context switch in per-thread mode are irrelevant since perf
2694 * will start/stop tracing as the process is scheduled.
2695 */
2696 if (etm->timeless_decoding)
2697 return 0;
2698
2699 /*
2700 * SWITCH_IN events carry the next process to be switched out while
2701 * SWITCH_OUT events carry the process to be switched in. As such
2702 * we don't care about IN events.
2703 */
2704 if (!out)
2705 return 0;
2706
2707 /*
2708 * Add the tid/pid to the log so that we can get a match when we get a
2709 * contextID from the decoder. Only track for the host: only kernel
2710 * trace is supported for guests which wouldn't need pids so this should
2711 * be fine.
2712 */
2713 th = machine__findnew_thread(&etm->session->machines.host,
2714 event->context_switch.next_prev_pid,
2715 event->context_switch.next_prev_tid);
2716 if (!th)
2717 return -ENOMEM;
2718
2719 thread__put(th);
2720
2721 return 0;
2722 }
2723
cs_etm__process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,struct perf_tool * tool)2724 static int cs_etm__process_event(struct perf_session *session,
2725 union perf_event *event,
2726 struct perf_sample *sample,
2727 struct perf_tool *tool)
2728 {
2729 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2730 struct cs_etm_auxtrace,
2731 auxtrace);
2732
2733 if (dump_trace)
2734 return 0;
2735
2736 if (!tool->ordered_events) {
2737 pr_err("CoreSight ETM Trace requires ordered events\n");
2738 return -EINVAL;
2739 }
2740
2741 switch (event->header.type) {
2742 case PERF_RECORD_EXIT:
2743 /*
2744 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2745 * start the decode because we know there will be no more trace from
2746 * this thread. All this does is emit samples earlier than waiting for
2747 * the flush in other modes, but with timestamps it makes sense to wait
2748 * for flush so that events from different threads are interleaved
2749 * properly.
2750 */
2751 if (etm->per_thread_decoding && etm->timeless_decoding)
2752 return cs_etm__process_timeless_queues(etm,
2753 event->fork.tid);
2754 break;
2755
2756 case PERF_RECORD_ITRACE_START:
2757 return cs_etm__process_itrace_start(etm, event);
2758
2759 case PERF_RECORD_SWITCH_CPU_WIDE:
2760 return cs_etm__process_switch_cpu_wide(etm, event);
2761
2762 case PERF_RECORD_AUX:
2763 /*
2764 * Record the latest kernel timestamp available in the header
2765 * for samples so that synthesised samples occur from this point
2766 * onwards.
2767 */
2768 if (sample->time && (sample->time != (u64)-1))
2769 etm->latest_kernel_timestamp = sample->time;
2770 break;
2771
2772 default:
2773 break;
2774 }
2775
2776 return 0;
2777 }
2778
dump_queued_data(struct cs_etm_auxtrace * etm,struct perf_record_auxtrace * event)2779 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2780 struct perf_record_auxtrace *event)
2781 {
2782 struct auxtrace_buffer *buf;
2783 unsigned int i;
2784 /*
2785 * Find all buffers with same reference in the queues and dump them.
2786 * This is because the queues can contain multiple entries of the same
2787 * buffer that were split on aux records.
2788 */
2789 for (i = 0; i < etm->queues.nr_queues; ++i)
2790 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2791 if (buf->reference == event->reference)
2792 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2793 }
2794
cs_etm__process_auxtrace_event(struct perf_session * session,union perf_event * event,struct perf_tool * tool __maybe_unused)2795 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2796 union perf_event *event,
2797 struct perf_tool *tool __maybe_unused)
2798 {
2799 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2800 struct cs_etm_auxtrace,
2801 auxtrace);
2802 if (!etm->data_queued) {
2803 struct auxtrace_buffer *buffer;
2804 off_t data_offset;
2805 int fd = perf_data__fd(session->data);
2806 bool is_pipe = perf_data__is_pipe(session->data);
2807 int err;
2808 int idx = event->auxtrace.idx;
2809
2810 if (is_pipe)
2811 data_offset = 0;
2812 else {
2813 data_offset = lseek(fd, 0, SEEK_CUR);
2814 if (data_offset == -1)
2815 return -errno;
2816 }
2817
2818 err = auxtrace_queues__add_event(&etm->queues, session,
2819 event, data_offset, &buffer);
2820 if (err)
2821 return err;
2822
2823 /*
2824 * Knowing if the trace is formatted or not requires a lookup of
2825 * the aux record so only works in non-piped mode where data is
2826 * queued in cs_etm__queue_aux_records(). Always assume
2827 * formatted in piped mode (true).
2828 */
2829 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2830 idx, true);
2831 if (err)
2832 return err;
2833
2834 if (dump_trace)
2835 if (auxtrace_buffer__get_data(buffer, fd)) {
2836 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2837 auxtrace_buffer__put_data(buffer);
2838 }
2839 } else if (dump_trace)
2840 dump_queued_data(etm, &event->auxtrace);
2841
2842 return 0;
2843 }
2844
cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace * etm)2845 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2846 {
2847 struct evsel *evsel;
2848 struct evlist *evlist = etm->session->evlist;
2849
2850 /* Override timeless mode with user input from --itrace=Z */
2851 if (etm->synth_opts.timeless_decoding) {
2852 etm->timeless_decoding = true;
2853 return 0;
2854 }
2855
2856 /*
2857 * Find the cs_etm evsel and look at what its timestamp setting was
2858 */
2859 evlist__for_each_entry(evlist, evsel)
2860 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2861 etm->timeless_decoding =
2862 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2863 return 0;
2864 }
2865
2866 pr_err("CS ETM: Couldn't find ETM evsel\n");
2867 return -EINVAL;
2868 }
2869
2870 /*
2871 * Read a single cpu parameter block from the auxtrace_info priv block.
2872 *
2873 * For version 1 there is a per cpu nr_params entry. If we are handling
2874 * version 1 file, then there may be less, the same, or more params
2875 * indicated by this value than the compile time number we understand.
2876 *
2877 * For a version 0 info block, there are a fixed number, and we need to
2878 * fill out the nr_param value in the metadata we create.
2879 */
cs_etm__create_meta_blk(u64 * buff_in,int * buff_in_offset,int out_blk_size,int nr_params_v0)2880 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2881 int out_blk_size, int nr_params_v0)
2882 {
2883 u64 *metadata = NULL;
2884 int hdr_version;
2885 int nr_in_params, nr_out_params, nr_cmn_params;
2886 int i, k;
2887
2888 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2889 if (!metadata)
2890 return NULL;
2891
2892 /* read block current index & version */
2893 i = *buff_in_offset;
2894 hdr_version = buff_in[CS_HEADER_VERSION];
2895
2896 if (!hdr_version) {
2897 /* read version 0 info block into a version 1 metadata block */
2898 nr_in_params = nr_params_v0;
2899 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2900 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2901 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2902 /* remaining block params at offset +1 from source */
2903 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2904 metadata[k + 1] = buff_in[i + k];
2905 /* version 0 has 2 common params */
2906 nr_cmn_params = 2;
2907 } else {
2908 /* read version 1 info block - input and output nr_params may differ */
2909 /* version 1 has 3 common params */
2910 nr_cmn_params = 3;
2911 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2912
2913 /* if input has more params than output - skip excess */
2914 nr_out_params = nr_in_params + nr_cmn_params;
2915 if (nr_out_params > out_blk_size)
2916 nr_out_params = out_blk_size;
2917
2918 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2919 metadata[k] = buff_in[i + k];
2920
2921 /* record the actual nr params we copied */
2922 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2923 }
2924
2925 /* adjust in offset by number of in params used */
2926 i += nr_in_params + nr_cmn_params;
2927 *buff_in_offset = i;
2928 return metadata;
2929 }
2930
2931 /**
2932 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2933 * on the bounds of aux_event, if it matches with the buffer that's at
2934 * file_offset.
2935 *
2936 * Normally, whole auxtrace buffers would be added to the queue. But we
2937 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2938 * is reset across each buffer, so splitting the buffers up in advance has
2939 * the same effect.
2940 */
cs_etm__queue_aux_fragment(struct perf_session * session,off_t file_offset,size_t sz,struct perf_record_aux * aux_event,struct perf_sample * sample)2941 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2942 struct perf_record_aux *aux_event, struct perf_sample *sample)
2943 {
2944 int err;
2945 char buf[PERF_SAMPLE_MAX_SIZE];
2946 union perf_event *auxtrace_event_union;
2947 struct perf_record_auxtrace *auxtrace_event;
2948 union perf_event auxtrace_fragment;
2949 __u64 aux_offset, aux_size;
2950 __u32 idx;
2951 bool formatted;
2952
2953 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2954 struct cs_etm_auxtrace,
2955 auxtrace);
2956
2957 /*
2958 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2959 * from looping through the auxtrace index.
2960 */
2961 err = perf_session__peek_event(session, file_offset, buf,
2962 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2963 if (err)
2964 return err;
2965 auxtrace_event = &auxtrace_event_union->auxtrace;
2966 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2967 return -EINVAL;
2968
2969 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2970 auxtrace_event->header.size != sz) {
2971 return -EINVAL;
2972 }
2973
2974 /*
2975 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2976 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2977 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2978 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2979 * Return 'not found' if mismatch.
2980 */
2981 if (auxtrace_event->cpu == (__u32) -1) {
2982 etm->per_thread_decoding = true;
2983 if (auxtrace_event->tid != sample->tid)
2984 return 1;
2985 } else if (auxtrace_event->cpu != sample->cpu) {
2986 if (etm->per_thread_decoding) {
2987 /*
2988 * Found a per-cpu buffer after a per-thread one was
2989 * already found
2990 */
2991 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
2992 return -EINVAL;
2993 }
2994 return 1;
2995 }
2996
2997 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
2998 /*
2999 * Clamp size in snapshot mode. The buffer size is clamped in
3000 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3001 * the buffer size.
3002 */
3003 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3004
3005 /*
3006 * In this mode, the head also points to the end of the buffer so aux_offset
3007 * needs to have the size subtracted so it points to the beginning as in normal mode
3008 */
3009 aux_offset = aux_event->aux_offset - aux_size;
3010 } else {
3011 aux_size = aux_event->aux_size;
3012 aux_offset = aux_event->aux_offset;
3013 }
3014
3015 if (aux_offset >= auxtrace_event->offset &&
3016 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3017 /*
3018 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3019 * based on the sizes of the aux event, and queue that fragment.
3020 */
3021 auxtrace_fragment.auxtrace = *auxtrace_event;
3022 auxtrace_fragment.auxtrace.size = aux_size;
3023 auxtrace_fragment.auxtrace.offset = aux_offset;
3024 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3025
3026 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3027 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3028 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3029 file_offset, NULL);
3030 if (err)
3031 return err;
3032
3033 idx = auxtrace_event->idx;
3034 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3035 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3036 idx, formatted);
3037 }
3038
3039 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3040 return 1;
3041 }
3042
cs_etm__process_aux_hw_id_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3043 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3044 u64 offset __maybe_unused, void *data __maybe_unused)
3045 {
3046 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3047 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3048 (*(int *)data)++; /* increment found count */
3049 return cs_etm__process_aux_output_hw_id(session, event);
3050 }
3051 return 0;
3052 }
3053
cs_etm__queue_aux_records_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3054 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3055 u64 offset __maybe_unused, void *data __maybe_unused)
3056 {
3057 struct perf_sample sample;
3058 int ret;
3059 struct auxtrace_index_entry *ent;
3060 struct auxtrace_index *auxtrace_index;
3061 struct evsel *evsel;
3062 size_t i;
3063
3064 /* Don't care about any other events, we're only queuing buffers for AUX events */
3065 if (event->header.type != PERF_RECORD_AUX)
3066 return 0;
3067
3068 if (event->header.size < sizeof(struct perf_record_aux))
3069 return -EINVAL;
3070
3071 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3072 if (!event->aux.aux_size)
3073 return 0;
3074
3075 /*
3076 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3077 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3078 */
3079 evsel = evlist__event2evsel(session->evlist, event);
3080 if (!evsel)
3081 return -EINVAL;
3082 ret = evsel__parse_sample(evsel, event, &sample);
3083 if (ret)
3084 return ret;
3085
3086 /*
3087 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3088 */
3089 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3090 for (i = 0; i < auxtrace_index->nr; i++) {
3091 ent = &auxtrace_index->entries[i];
3092 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3093 ent->sz, &event->aux, &sample);
3094 /*
3095 * Stop search on error or successful values. Continue search on
3096 * 1 ('not found')
3097 */
3098 if (ret != 1)
3099 return ret;
3100 }
3101 }
3102
3103 /*
3104 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3105 * don't exit with an error because it will still be possible to decode other aux records.
3106 */
3107 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3108 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3109 return 0;
3110 }
3111
cs_etm__queue_aux_records(struct perf_session * session)3112 static int cs_etm__queue_aux_records(struct perf_session *session)
3113 {
3114 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3115 struct auxtrace_index, list);
3116 if (index && index->nr > 0)
3117 return perf_session__peek_events(session, session->header.data_offset,
3118 session->header.data_size,
3119 cs_etm__queue_aux_records_cb, NULL);
3120
3121 /*
3122 * We would get here if there are no entries in the index (either no auxtrace
3123 * buffers or no index at all). Fail silently as there is the possibility of
3124 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3125 * false.
3126 *
3127 * In that scenario, buffers will not be split by AUX records.
3128 */
3129 return 0;
3130 }
3131
3132 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3133 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3134
3135 /*
3136 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3137 * timestamps).
3138 */
cs_etm__has_virtual_ts(u64 ** metadata,int num_cpu)3139 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3140 {
3141 int j;
3142
3143 for (j = 0; j < num_cpu; j++) {
3144 switch (metadata[j][CS_ETM_MAGIC]) {
3145 case __perf_cs_etmv4_magic:
3146 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3147 return false;
3148 break;
3149 case __perf_cs_ete_magic:
3150 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3151 return false;
3152 break;
3153 default:
3154 /* Unknown / unsupported magic number. */
3155 return false;
3156 }
3157 }
3158 return true;
3159 }
3160
3161 /* map trace ids to correct metadata block, from information in metadata */
cs_etm__map_trace_ids_metadata(int num_cpu,u64 ** metadata)3162 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3163 {
3164 u64 cs_etm_magic;
3165 u8 trace_chan_id;
3166 int i, err;
3167
3168 for (i = 0; i < num_cpu; i++) {
3169 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3170 switch (cs_etm_magic) {
3171 case __perf_cs_etmv3_magic:
3172 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3173 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3174 break;
3175 case __perf_cs_etmv4_magic:
3176 case __perf_cs_ete_magic:
3177 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3178 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3179 break;
3180 default:
3181 /* unknown magic number */
3182 return -EINVAL;
3183 }
3184 err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3185 if (err)
3186 return err;
3187 }
3188 return 0;
3189 }
3190
3191 /*
3192 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3193 * unused value to reduce the number of unneeded decoders created.
3194 */
cs_etm__clear_unused_trace_ids_metadata(int num_cpu,u64 ** metadata)3195 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3196 {
3197 u64 cs_etm_magic;
3198 int i;
3199
3200 for (i = 0; i < num_cpu; i++) {
3201 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3202 switch (cs_etm_magic) {
3203 case __perf_cs_etmv3_magic:
3204 if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3205 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3206 break;
3207 case __perf_cs_etmv4_magic:
3208 case __perf_cs_ete_magic:
3209 if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3210 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3211 break;
3212 default:
3213 /* unknown magic number */
3214 return -EINVAL;
3215 }
3216 }
3217 return 0;
3218 }
3219
cs_etm__process_auxtrace_info_full(union perf_event * event,struct perf_session * session)3220 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3221 struct perf_session *session)
3222 {
3223 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3224 struct cs_etm_auxtrace *etm = NULL;
3225 struct perf_record_time_conv *tc = &session->time_conv;
3226 int event_header_size = sizeof(struct perf_event_header);
3227 int total_size = auxtrace_info->header.size;
3228 int priv_size = 0;
3229 int num_cpu;
3230 int err = 0;
3231 int aux_hw_id_found;
3232 int i, j;
3233 u64 *ptr = NULL;
3234 u64 **metadata = NULL;
3235
3236 /*
3237 * Create an RB tree for traceID-metadata tuple. Since the conversion
3238 * has to be made for each packet that gets decoded, optimizing access
3239 * in anything other than a sequential array is worth doing.
3240 */
3241 traceid_list = intlist__new(NULL);
3242 if (!traceid_list)
3243 return -ENOMEM;
3244
3245 /* First the global part */
3246 ptr = (u64 *) auxtrace_info->priv;
3247 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3248 metadata = zalloc(sizeof(*metadata) * num_cpu);
3249 if (!metadata) {
3250 err = -ENOMEM;
3251 goto err_free_traceid_list;
3252 }
3253
3254 /* Start parsing after the common part of the header */
3255 i = CS_HEADER_VERSION_MAX;
3256
3257 /*
3258 * The metadata is stored in the auxtrace_info section and encodes
3259 * the configuration of the ARM embedded trace macrocell which is
3260 * required by the trace decoder to properly decode the trace due
3261 * to its highly compressed nature.
3262 */
3263 for (j = 0; j < num_cpu; j++) {
3264 if (ptr[i] == __perf_cs_etmv3_magic) {
3265 metadata[j] =
3266 cs_etm__create_meta_blk(ptr, &i,
3267 CS_ETM_PRIV_MAX,
3268 CS_ETM_NR_TRC_PARAMS_V0);
3269 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3270 metadata[j] =
3271 cs_etm__create_meta_blk(ptr, &i,
3272 CS_ETMV4_PRIV_MAX,
3273 CS_ETMV4_NR_TRC_PARAMS_V0);
3274 } else if (ptr[i] == __perf_cs_ete_magic) {
3275 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3276 } else {
3277 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3278 ptr[i]);
3279 err = -EINVAL;
3280 goto err_free_metadata;
3281 }
3282
3283 if (!metadata[j]) {
3284 err = -ENOMEM;
3285 goto err_free_metadata;
3286 }
3287 }
3288
3289 /*
3290 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3291 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3292 * global metadata, and each cpu's metadata respectively.
3293 * The following tests if the correct number of double words was
3294 * present in the auxtrace info section.
3295 */
3296 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3297 if (i * 8 != priv_size) {
3298 err = -EINVAL;
3299 goto err_free_metadata;
3300 }
3301
3302 etm = zalloc(sizeof(*etm));
3303
3304 if (!etm) {
3305 err = -ENOMEM;
3306 goto err_free_metadata;
3307 }
3308
3309 /*
3310 * As all the ETMs run at the same exception level, the system should
3311 * have the same PID format crossing CPUs. So cache the PID format
3312 * and reuse it for sequential decoding.
3313 */
3314 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3315
3316 err = auxtrace_queues__init(&etm->queues);
3317 if (err)
3318 goto err_free_etm;
3319
3320 if (session->itrace_synth_opts->set) {
3321 etm->synth_opts = *session->itrace_synth_opts;
3322 } else {
3323 itrace_synth_opts__set_default(&etm->synth_opts,
3324 session->itrace_synth_opts->default_no_sample);
3325 etm->synth_opts.callchain = false;
3326 }
3327
3328 etm->session = session;
3329
3330 etm->num_cpu = num_cpu;
3331 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3332 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3333 etm->metadata = metadata;
3334 etm->auxtrace_type = auxtrace_info->type;
3335
3336 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3337 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3338
3339 if (!etm->has_virtual_ts)
3340 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3341 "The time field of the samples will not be set accurately.\n\n");
3342
3343 etm->auxtrace.process_event = cs_etm__process_event;
3344 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3345 etm->auxtrace.flush_events = cs_etm__flush_events;
3346 etm->auxtrace.free_events = cs_etm__free_events;
3347 etm->auxtrace.free = cs_etm__free;
3348 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3349 session->auxtrace = &etm->auxtrace;
3350
3351 err = cs_etm__setup_timeless_decoding(etm);
3352 if (err)
3353 return err;
3354
3355 etm->tc.time_shift = tc->time_shift;
3356 etm->tc.time_mult = tc->time_mult;
3357 etm->tc.time_zero = tc->time_zero;
3358 if (event_contains(*tc, time_cycles)) {
3359 etm->tc.time_cycles = tc->time_cycles;
3360 etm->tc.time_mask = tc->time_mask;
3361 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3362 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3363 }
3364 err = cs_etm__synth_events(etm, session);
3365 if (err)
3366 goto err_free_queues;
3367
3368 /*
3369 * Map Trace ID values to CPU metadata.
3370 *
3371 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3372 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3373 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3374 *
3375 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3376 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3377 * in which case a different value will be used. This means an older perf may still
3378 * be able to record and read files generate on a newer system.
3379 *
3380 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3381 * those packets. If they are there then the values will be mapped and plugged into
3382 * the metadata. We then set any remaining metadata values with the used flag to a
3383 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3384 *
3385 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3386 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3387 * flags if present.
3388 */
3389
3390 /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3391 aux_hw_id_found = 0;
3392 err = perf_session__peek_events(session, session->header.data_offset,
3393 session->header.data_size,
3394 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3395 if (err)
3396 goto err_free_queues;
3397
3398 /* if HW ID found then clear any unused metadata ID values */
3399 if (aux_hw_id_found)
3400 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3401 /* otherwise, this is a file with metadata values only, map from metadata */
3402 else
3403 err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3404
3405 if (err)
3406 goto err_free_queues;
3407
3408 err = cs_etm__queue_aux_records(session);
3409 if (err)
3410 goto err_free_queues;
3411
3412 etm->data_queued = etm->queues.populated;
3413 return 0;
3414
3415 err_free_queues:
3416 auxtrace_queues__free(&etm->queues);
3417 session->auxtrace = NULL;
3418 err_free_etm:
3419 zfree(&etm);
3420 err_free_metadata:
3421 /* No need to check @metadata[j], free(NULL) is supported */
3422 for (j = 0; j < num_cpu; j++)
3423 zfree(&metadata[j]);
3424 zfree(&metadata);
3425 err_free_traceid_list:
3426 intlist__delete(traceid_list);
3427 return err;
3428 }
3429