xref: /openbmc/linux/tools/perf/util/arm-spe.c (revision c83eeec79ff64f777cbd59a8bd15d0a3fe1f92c0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #define MAX_TIMESTAMP (~0ULL)
38 
39 struct arm_spe {
40 	struct auxtrace			auxtrace;
41 	struct auxtrace_queues		queues;
42 	struct auxtrace_heap		heap;
43 	struct itrace_synth_opts        synth_opts;
44 	u32				auxtrace_type;
45 	struct perf_session		*session;
46 	struct machine			*machine;
47 	u32				pmu_type;
48 
49 	struct perf_tsc_conversion	tc;
50 
51 	u8				timeless_decoding;
52 	u8				data_queued;
53 
54 	u8				sample_flc;
55 	u8				sample_llc;
56 	u8				sample_tlb;
57 	u8				sample_branch;
58 	u8				sample_remote_access;
59 	u8				sample_memory;
60 
61 	u64				l1d_miss_id;
62 	u64				l1d_access_id;
63 	u64				llc_miss_id;
64 	u64				llc_access_id;
65 	u64				tlb_miss_id;
66 	u64				tlb_access_id;
67 	u64				branch_miss_id;
68 	u64				remote_access_id;
69 	u64				memory_id;
70 
71 	u64				kernel_start;
72 
73 	unsigned long			num_events;
74 	u8				use_ctx_pkt_for_pid;
75 };
76 
77 struct arm_spe_queue {
78 	struct arm_spe			*spe;
79 	unsigned int			queue_nr;
80 	struct auxtrace_buffer		*buffer;
81 	struct auxtrace_buffer		*old_buffer;
82 	union perf_event		*event_buf;
83 	bool				on_heap;
84 	bool				done;
85 	pid_t				pid;
86 	pid_t				tid;
87 	int				cpu;
88 	struct arm_spe_decoder		*decoder;
89 	u64				time;
90 	u64				timestamp;
91 	struct thread			*thread;
92 };
93 
94 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
95 			 unsigned char *buf, size_t len)
96 {
97 	struct arm_spe_pkt packet;
98 	size_t pos = 0;
99 	int ret, pkt_len, i;
100 	char desc[ARM_SPE_PKT_DESC_MAX];
101 	const char *color = PERF_COLOR_BLUE;
102 
103 	color_fprintf(stdout, color,
104 		      ". ... ARM SPE data: size %#zx bytes\n",
105 		      len);
106 
107 	while (len) {
108 		ret = arm_spe_get_packet(buf, len, &packet);
109 		if (ret > 0)
110 			pkt_len = ret;
111 		else
112 			pkt_len = 1;
113 		printf(".");
114 		color_fprintf(stdout, color, "  %08x: ", pos);
115 		for (i = 0; i < pkt_len; i++)
116 			color_fprintf(stdout, color, " %02x", buf[i]);
117 		for (; i < 16; i++)
118 			color_fprintf(stdout, color, "   ");
119 		if (ret > 0) {
120 			ret = arm_spe_pkt_desc(&packet, desc,
121 					       ARM_SPE_PKT_DESC_MAX);
122 			if (!ret)
123 				color_fprintf(stdout, color, " %s\n", desc);
124 		} else {
125 			color_fprintf(stdout, color, " Bad packet!\n");
126 		}
127 		pos += pkt_len;
128 		buf += pkt_len;
129 		len -= pkt_len;
130 	}
131 }
132 
133 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
134 			       size_t len)
135 {
136 	printf(".\n");
137 	arm_spe_dump(spe, buf, len);
138 }
139 
140 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
141 {
142 	struct arm_spe_queue *speq = data;
143 	struct auxtrace_buffer *buffer = speq->buffer;
144 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
145 	struct auxtrace_queue *queue;
146 
147 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
148 
149 	buffer = auxtrace_buffer__next(queue, buffer);
150 	/* If no more data, drop the previous auxtrace_buffer and return */
151 	if (!buffer) {
152 		if (old_buffer)
153 			auxtrace_buffer__drop_data(old_buffer);
154 		b->len = 0;
155 		return 0;
156 	}
157 
158 	speq->buffer = buffer;
159 
160 	/* If the aux_buffer doesn't have data associated, try to load it */
161 	if (!buffer->data) {
162 		/* get the file desc associated with the perf data file */
163 		int fd = perf_data__fd(speq->spe->session->data);
164 
165 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
166 		if (!buffer->data)
167 			return -ENOMEM;
168 	}
169 
170 	b->len = buffer->size;
171 	b->buf = buffer->data;
172 
173 	if (b->len) {
174 		if (old_buffer)
175 			auxtrace_buffer__drop_data(old_buffer);
176 		speq->old_buffer = buffer;
177 	} else {
178 		auxtrace_buffer__drop_data(buffer);
179 		return arm_spe_get_trace(b, data);
180 	}
181 
182 	return 0;
183 }
184 
185 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
186 		unsigned int queue_nr)
187 {
188 	struct arm_spe_params params = { .get_trace = 0, };
189 	struct arm_spe_queue *speq;
190 
191 	speq = zalloc(sizeof(*speq));
192 	if (!speq)
193 		return NULL;
194 
195 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
196 	if (!speq->event_buf)
197 		goto out_free;
198 
199 	speq->spe = spe;
200 	speq->queue_nr = queue_nr;
201 	speq->pid = -1;
202 	speq->tid = -1;
203 	speq->cpu = -1;
204 
205 	/* params set */
206 	params.get_trace = arm_spe_get_trace;
207 	params.data = speq;
208 
209 	/* create new decoder */
210 	speq->decoder = arm_spe_decoder_new(&params);
211 	if (!speq->decoder)
212 		goto out_free;
213 
214 	return speq;
215 
216 out_free:
217 	zfree(&speq->event_buf);
218 	free(speq);
219 
220 	return NULL;
221 }
222 
223 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
224 {
225 	return ip >= spe->kernel_start ?
226 		PERF_RECORD_MISC_KERNEL :
227 		PERF_RECORD_MISC_USER;
228 }
229 
230 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
231 				    struct auxtrace_queue *queue)
232 {
233 	struct arm_spe_queue *speq = queue->priv;
234 	pid_t tid;
235 
236 	tid = machine__get_current_tid(spe->machine, speq->cpu);
237 	if (tid != -1) {
238 		speq->tid = tid;
239 		thread__zput(speq->thread);
240 	} else
241 		speq->tid = queue->tid;
242 
243 	if ((!speq->thread) && (speq->tid != -1)) {
244 		speq->thread = machine__find_thread(spe->machine, -1,
245 						    speq->tid);
246 	}
247 
248 	if (speq->thread) {
249 		speq->pid = speq->thread->pid_;
250 		if (queue->cpu == -1)
251 			speq->cpu = speq->thread->cpu;
252 	}
253 }
254 
255 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
256 {
257 	struct arm_spe *spe = speq->spe;
258 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
259 
260 	if (err)
261 		return err;
262 
263 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
264 
265 	return 0;
266 }
267 
268 static void arm_spe_prep_sample(struct arm_spe *spe,
269 				struct arm_spe_queue *speq,
270 				union perf_event *event,
271 				struct perf_sample *sample)
272 {
273 	struct arm_spe_record *record = &speq->decoder->record;
274 
275 	if (!spe->timeless_decoding)
276 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
277 
278 	sample->ip = record->from_ip;
279 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
280 	sample->pid = speq->pid;
281 	sample->tid = speq->tid;
282 	sample->period = 1;
283 	sample->cpu = speq->cpu;
284 
285 	event->sample.header.type = PERF_RECORD_SAMPLE;
286 	event->sample.header.misc = sample->cpumode;
287 	event->sample.header.size = sizeof(struct perf_event_header);
288 }
289 
290 static inline int
291 arm_spe_deliver_synth_event(struct arm_spe *spe,
292 			    struct arm_spe_queue *speq __maybe_unused,
293 			    union perf_event *event,
294 			    struct perf_sample *sample)
295 {
296 	int ret;
297 
298 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
299 	if (ret)
300 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
301 
302 	return ret;
303 }
304 
305 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
306 				     u64 spe_events_id, u64 data_src)
307 {
308 	struct arm_spe *spe = speq->spe;
309 	struct arm_spe_record *record = &speq->decoder->record;
310 	union perf_event *event = speq->event_buf;
311 	struct perf_sample sample = { .ip = 0, };
312 
313 	arm_spe_prep_sample(spe, speq, event, &sample);
314 
315 	sample.id = spe_events_id;
316 	sample.stream_id = spe_events_id;
317 	sample.addr = record->virt_addr;
318 	sample.phys_addr = record->phys_addr;
319 	sample.data_src = data_src;
320 
321 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
322 }
323 
324 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
325 					u64 spe_events_id)
326 {
327 	struct arm_spe *spe = speq->spe;
328 	struct arm_spe_record *record = &speq->decoder->record;
329 	union perf_event *event = speq->event_buf;
330 	struct perf_sample sample = { .ip = 0, };
331 
332 	arm_spe_prep_sample(spe, speq, event, &sample);
333 
334 	sample.id = spe_events_id;
335 	sample.stream_id = spe_events_id;
336 	sample.addr = record->to_ip;
337 
338 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
339 }
340 
341 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
342 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
343 			 ARM_SPE_REMOTE_ACCESS)
344 
345 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
346 {
347 	if (type & SPE_MEM_TYPE)
348 		return true;
349 
350 	return false;
351 }
352 
353 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
354 {
355 	union perf_mem_data_src	data_src = { 0 };
356 
357 	if (record->op == ARM_SPE_LD)
358 		data_src.mem_op = PERF_MEM_OP_LOAD;
359 	else
360 		data_src.mem_op = PERF_MEM_OP_STORE;
361 
362 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
363 		data_src.mem_lvl = PERF_MEM_LVL_L3;
364 
365 		if (record->type & ARM_SPE_LLC_MISS)
366 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
367 		else
368 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
369 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
370 		data_src.mem_lvl = PERF_MEM_LVL_L1;
371 
372 		if (record->type & ARM_SPE_L1D_MISS)
373 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
374 		else
375 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
376 	}
377 
378 	if (record->type & ARM_SPE_REMOTE_ACCESS)
379 		data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
380 
381 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
382 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
383 
384 		if (record->type & ARM_SPE_TLB_MISS)
385 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
386 		else
387 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
388 	}
389 
390 	return data_src.val;
391 }
392 
393 static int arm_spe_sample(struct arm_spe_queue *speq)
394 {
395 	const struct arm_spe_record *record = &speq->decoder->record;
396 	struct arm_spe *spe = speq->spe;
397 	u64 data_src;
398 	int err;
399 
400 	data_src = arm_spe__synth_data_source(record);
401 
402 	if (spe->sample_flc) {
403 		if (record->type & ARM_SPE_L1D_MISS) {
404 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
405 							data_src);
406 			if (err)
407 				return err;
408 		}
409 
410 		if (record->type & ARM_SPE_L1D_ACCESS) {
411 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
412 							data_src);
413 			if (err)
414 				return err;
415 		}
416 	}
417 
418 	if (spe->sample_llc) {
419 		if (record->type & ARM_SPE_LLC_MISS) {
420 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
421 							data_src);
422 			if (err)
423 				return err;
424 		}
425 
426 		if (record->type & ARM_SPE_LLC_ACCESS) {
427 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
428 							data_src);
429 			if (err)
430 				return err;
431 		}
432 	}
433 
434 	if (spe->sample_tlb) {
435 		if (record->type & ARM_SPE_TLB_MISS) {
436 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
437 							data_src);
438 			if (err)
439 				return err;
440 		}
441 
442 		if (record->type & ARM_SPE_TLB_ACCESS) {
443 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
444 							data_src);
445 			if (err)
446 				return err;
447 		}
448 	}
449 
450 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
451 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
452 		if (err)
453 			return err;
454 	}
455 
456 	if (spe->sample_remote_access &&
457 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
458 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
459 						data_src);
460 		if (err)
461 			return err;
462 	}
463 
464 	if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
465 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
466 		if (err)
467 			return err;
468 	}
469 
470 	return 0;
471 }
472 
473 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
474 {
475 	struct arm_spe *spe = speq->spe;
476 	struct arm_spe_record *record;
477 	int ret;
478 
479 	if (!spe->kernel_start)
480 		spe->kernel_start = machine__kernel_start(spe->machine);
481 
482 	while (1) {
483 		/*
484 		 * The usual logic is firstly to decode the packets, and then
485 		 * based the record to synthesize sample; but here the flow is
486 		 * reversed: it calls arm_spe_sample() for synthesizing samples
487 		 * prior to arm_spe_decode().
488 		 *
489 		 * Two reasons for this code logic:
490 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
491 		 * has decoded trace data and generated a record, but the record
492 		 * is left to generate sample until run to here, so it's correct
493 		 * to synthesize sample for the left record.
494 		 * 2. After decoding trace data, it needs to compare the record
495 		 * timestamp with the coming perf event, if the record timestamp
496 		 * is later than the perf event, it needs bail out and pushs the
497 		 * record into auxtrace heap, thus the record can be deferred to
498 		 * synthesize sample until run to here at the next time; so this
499 		 * can correlate samples between Arm SPE trace data and other
500 		 * perf events with correct time ordering.
501 		 */
502 
503 		/*
504 		 * Update pid/tid info.
505 		 */
506 		record = &speq->decoder->record;
507 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
508 			ret = arm_spe_set_tid(speq, record->context_id);
509 			if (ret)
510 				return ret;
511 
512 			spe->use_ctx_pkt_for_pid = true;
513 		}
514 
515 		ret = arm_spe_sample(speq);
516 		if (ret)
517 			return ret;
518 
519 		ret = arm_spe_decode(speq->decoder);
520 		if (!ret) {
521 			pr_debug("No data or all data has been processed.\n");
522 			return 1;
523 		}
524 
525 		/*
526 		 * Error is detected when decode SPE trace data, continue to
527 		 * the next trace data and find out more records.
528 		 */
529 		if (ret < 0)
530 			continue;
531 
532 		record = &speq->decoder->record;
533 
534 		/* Update timestamp for the last record */
535 		if (record->timestamp > speq->timestamp)
536 			speq->timestamp = record->timestamp;
537 
538 		/*
539 		 * If the timestamp of the queue is later than timestamp of the
540 		 * coming perf event, bail out so can allow the perf event to
541 		 * be processed ahead.
542 		 */
543 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
544 			*timestamp = speq->timestamp;
545 			return 0;
546 		}
547 	}
548 
549 	return 0;
550 }
551 
552 static int arm_spe__setup_queue(struct arm_spe *spe,
553 			       struct auxtrace_queue *queue,
554 			       unsigned int queue_nr)
555 {
556 	struct arm_spe_queue *speq = queue->priv;
557 	struct arm_spe_record *record;
558 
559 	if (list_empty(&queue->head) || speq)
560 		return 0;
561 
562 	speq = arm_spe__alloc_queue(spe, queue_nr);
563 
564 	if (!speq)
565 		return -ENOMEM;
566 
567 	queue->priv = speq;
568 
569 	if (queue->cpu != -1)
570 		speq->cpu = queue->cpu;
571 
572 	if (!speq->on_heap) {
573 		int ret;
574 
575 		if (spe->timeless_decoding)
576 			return 0;
577 
578 retry:
579 		ret = arm_spe_decode(speq->decoder);
580 
581 		if (!ret)
582 			return 0;
583 
584 		if (ret < 0)
585 			goto retry;
586 
587 		record = &speq->decoder->record;
588 
589 		speq->timestamp = record->timestamp;
590 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
591 		if (ret)
592 			return ret;
593 		speq->on_heap = true;
594 	}
595 
596 	return 0;
597 }
598 
599 static int arm_spe__setup_queues(struct arm_spe *spe)
600 {
601 	unsigned int i;
602 	int ret;
603 
604 	for (i = 0; i < spe->queues.nr_queues; i++) {
605 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
606 		if (ret)
607 			return ret;
608 	}
609 
610 	return 0;
611 }
612 
613 static int arm_spe__update_queues(struct arm_spe *spe)
614 {
615 	if (spe->queues.new_data) {
616 		spe->queues.new_data = false;
617 		return arm_spe__setup_queues(spe);
618 	}
619 
620 	return 0;
621 }
622 
623 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
624 {
625 	struct evsel *evsel;
626 	struct evlist *evlist = spe->session->evlist;
627 	bool timeless_decoding = true;
628 
629 	/*
630 	 * Circle through the list of event and complain if we find one
631 	 * with the time bit set.
632 	 */
633 	evlist__for_each_entry(evlist, evsel) {
634 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
635 			timeless_decoding = false;
636 	}
637 
638 	return timeless_decoding;
639 }
640 
641 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
642 {
643 	unsigned int queue_nr;
644 	u64 ts;
645 	int ret;
646 
647 	while (1) {
648 		struct auxtrace_queue *queue;
649 		struct arm_spe_queue *speq;
650 
651 		if (!spe->heap.heap_cnt)
652 			return 0;
653 
654 		if (spe->heap.heap_array[0].ordinal >= timestamp)
655 			return 0;
656 
657 		queue_nr = spe->heap.heap_array[0].queue_nr;
658 		queue = &spe->queues.queue_array[queue_nr];
659 		speq = queue->priv;
660 
661 		auxtrace_heap__pop(&spe->heap);
662 
663 		if (spe->heap.heap_cnt) {
664 			ts = spe->heap.heap_array[0].ordinal + 1;
665 			if (ts > timestamp)
666 				ts = timestamp;
667 		} else {
668 			ts = timestamp;
669 		}
670 
671 		/*
672 		 * A previous context-switch event has set pid/tid in the machine's context, so
673 		 * here we need to update the pid/tid in the thread and SPE queue.
674 		 */
675 		if (!spe->use_ctx_pkt_for_pid)
676 			arm_spe_set_pid_tid_cpu(spe, queue);
677 
678 		ret = arm_spe_run_decoder(speq, &ts);
679 		if (ret < 0) {
680 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
681 			return ret;
682 		}
683 
684 		if (!ret) {
685 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
686 			if (ret < 0)
687 				return ret;
688 		} else {
689 			speq->on_heap = false;
690 		}
691 	}
692 
693 	return 0;
694 }
695 
696 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
697 					    u64 time_)
698 {
699 	struct auxtrace_queues *queues = &spe->queues;
700 	unsigned int i;
701 	u64 ts = 0;
702 
703 	for (i = 0; i < queues->nr_queues; i++) {
704 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
705 		struct arm_spe_queue *speq = queue->priv;
706 
707 		if (speq && (tid == -1 || speq->tid == tid)) {
708 			speq->time = time_;
709 			arm_spe_set_pid_tid_cpu(spe, queue);
710 			arm_spe_run_decoder(speq, &ts);
711 		}
712 	}
713 	return 0;
714 }
715 
716 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
717 				  struct perf_sample *sample)
718 {
719 	pid_t pid, tid;
720 	int cpu;
721 
722 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
723 		return 0;
724 
725 	pid = event->context_switch.next_prev_pid;
726 	tid = event->context_switch.next_prev_tid;
727 	cpu = sample->cpu;
728 
729 	if (tid == -1)
730 		pr_warning("context_switch event has no tid\n");
731 
732 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
733 }
734 
735 static int arm_spe_process_event(struct perf_session *session,
736 				 union perf_event *event,
737 				 struct perf_sample *sample,
738 				 struct perf_tool *tool)
739 {
740 	int err = 0;
741 	u64 timestamp;
742 	struct arm_spe *spe = container_of(session->auxtrace,
743 			struct arm_spe, auxtrace);
744 
745 	if (dump_trace)
746 		return 0;
747 
748 	if (!tool->ordered_events) {
749 		pr_err("SPE trace requires ordered events\n");
750 		return -EINVAL;
751 	}
752 
753 	if (sample->time && (sample->time != (u64) -1))
754 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
755 	else
756 		timestamp = 0;
757 
758 	if (timestamp || spe->timeless_decoding) {
759 		err = arm_spe__update_queues(spe);
760 		if (err)
761 			return err;
762 	}
763 
764 	if (spe->timeless_decoding) {
765 		if (event->header.type == PERF_RECORD_EXIT) {
766 			err = arm_spe_process_timeless_queues(spe,
767 					event->fork.tid,
768 					sample->time);
769 		}
770 	} else if (timestamp) {
771 		err = arm_spe_process_queues(spe, timestamp);
772 		if (err)
773 			return err;
774 
775 		if (!spe->use_ctx_pkt_for_pid &&
776 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
777 		    event->header.type == PERF_RECORD_SWITCH))
778 			err = arm_spe_context_switch(spe, event, sample);
779 	}
780 
781 	return err;
782 }
783 
784 static int arm_spe_process_auxtrace_event(struct perf_session *session,
785 					  union perf_event *event,
786 					  struct perf_tool *tool __maybe_unused)
787 {
788 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
789 					     auxtrace);
790 
791 	if (!spe->data_queued) {
792 		struct auxtrace_buffer *buffer;
793 		off_t data_offset;
794 		int fd = perf_data__fd(session->data);
795 		int err;
796 
797 		if (perf_data__is_pipe(session->data)) {
798 			data_offset = 0;
799 		} else {
800 			data_offset = lseek(fd, 0, SEEK_CUR);
801 			if (data_offset == -1)
802 				return -errno;
803 		}
804 
805 		err = auxtrace_queues__add_event(&spe->queues, session, event,
806 				data_offset, &buffer);
807 		if (err)
808 			return err;
809 
810 		/* Dump here now we have copied a piped trace out of the pipe */
811 		if (dump_trace) {
812 			if (auxtrace_buffer__get_data(buffer, fd)) {
813 				arm_spe_dump_event(spe, buffer->data,
814 						buffer->size);
815 				auxtrace_buffer__put_data(buffer);
816 			}
817 		}
818 	}
819 
820 	return 0;
821 }
822 
823 static int arm_spe_flush(struct perf_session *session __maybe_unused,
824 			 struct perf_tool *tool __maybe_unused)
825 {
826 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
827 			auxtrace);
828 	int ret;
829 
830 	if (dump_trace)
831 		return 0;
832 
833 	if (!tool->ordered_events)
834 		return -EINVAL;
835 
836 	ret = arm_spe__update_queues(spe);
837 	if (ret < 0)
838 		return ret;
839 
840 	if (spe->timeless_decoding)
841 		return arm_spe_process_timeless_queues(spe, -1,
842 				MAX_TIMESTAMP - 1);
843 
844 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
845 	if (ret)
846 		return ret;
847 
848 	if (!spe->use_ctx_pkt_for_pid)
849 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
850 			    "Matching of TIDs to SPE events could be inaccurate.\n");
851 
852 	return 0;
853 }
854 
855 static void arm_spe_free_queue(void *priv)
856 {
857 	struct arm_spe_queue *speq = priv;
858 
859 	if (!speq)
860 		return;
861 	thread__zput(speq->thread);
862 	arm_spe_decoder_free(speq->decoder);
863 	zfree(&speq->event_buf);
864 	free(speq);
865 }
866 
867 static void arm_spe_free_events(struct perf_session *session)
868 {
869 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
870 					     auxtrace);
871 	struct auxtrace_queues *queues = &spe->queues;
872 	unsigned int i;
873 
874 	for (i = 0; i < queues->nr_queues; i++) {
875 		arm_spe_free_queue(queues->queue_array[i].priv);
876 		queues->queue_array[i].priv = NULL;
877 	}
878 	auxtrace_queues__free(queues);
879 }
880 
881 static void arm_spe_free(struct perf_session *session)
882 {
883 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
884 					     auxtrace);
885 
886 	auxtrace_heap__free(&spe->heap);
887 	arm_spe_free_events(session);
888 	session->auxtrace = NULL;
889 	free(spe);
890 }
891 
892 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
893 				      struct evsel *evsel)
894 {
895 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
896 
897 	return evsel->core.attr.type == spe->pmu_type;
898 }
899 
900 static const char * const arm_spe_info_fmts[] = {
901 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
902 };
903 
904 static void arm_spe_print_info(__u64 *arr)
905 {
906 	if (!dump_trace)
907 		return;
908 
909 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
910 }
911 
912 struct arm_spe_synth {
913 	struct perf_tool dummy_tool;
914 	struct perf_session *session;
915 };
916 
917 static int arm_spe_event_synth(struct perf_tool *tool,
918 			       union perf_event *event,
919 			       struct perf_sample *sample __maybe_unused,
920 			       struct machine *machine __maybe_unused)
921 {
922 	struct arm_spe_synth *arm_spe_synth =
923 		      container_of(tool, struct arm_spe_synth, dummy_tool);
924 
925 	return perf_session__deliver_synth_event(arm_spe_synth->session,
926 						 event, NULL);
927 }
928 
929 static int arm_spe_synth_event(struct perf_session *session,
930 			       struct perf_event_attr *attr, u64 id)
931 {
932 	struct arm_spe_synth arm_spe_synth;
933 
934 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
935 	arm_spe_synth.session = session;
936 
937 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
938 					   &id, arm_spe_event_synth);
939 }
940 
941 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
942 				    const char *name)
943 {
944 	struct evsel *evsel;
945 
946 	evlist__for_each_entry(evlist, evsel) {
947 		if (evsel->core.id && evsel->core.id[0] == id) {
948 			if (evsel->name)
949 				zfree(&evsel->name);
950 			evsel->name = strdup(name);
951 			break;
952 		}
953 	}
954 }
955 
956 static int
957 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
958 {
959 	struct evlist *evlist = session->evlist;
960 	struct evsel *evsel;
961 	struct perf_event_attr attr;
962 	bool found = false;
963 	u64 id;
964 	int err;
965 
966 	evlist__for_each_entry(evlist, evsel) {
967 		if (evsel->core.attr.type == spe->pmu_type) {
968 			found = true;
969 			break;
970 		}
971 	}
972 
973 	if (!found) {
974 		pr_debug("No selected events with SPE trace data\n");
975 		return 0;
976 	}
977 
978 	memset(&attr, 0, sizeof(struct perf_event_attr));
979 	attr.size = sizeof(struct perf_event_attr);
980 	attr.type = PERF_TYPE_HARDWARE;
981 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
982 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
983 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
984 	if (spe->timeless_decoding)
985 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
986 	else
987 		attr.sample_type |= PERF_SAMPLE_TIME;
988 
989 	attr.exclude_user = evsel->core.attr.exclude_user;
990 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
991 	attr.exclude_hv = evsel->core.attr.exclude_hv;
992 	attr.exclude_host = evsel->core.attr.exclude_host;
993 	attr.exclude_guest = evsel->core.attr.exclude_guest;
994 	attr.sample_id_all = evsel->core.attr.sample_id_all;
995 	attr.read_format = evsel->core.attr.read_format;
996 
997 	/* create new id val to be a fixed offset from evsel id */
998 	id = evsel->core.id[0] + 1000000000;
999 
1000 	if (!id)
1001 		id = 1;
1002 
1003 	if (spe->synth_opts.flc) {
1004 		spe->sample_flc = true;
1005 
1006 		/* Level 1 data cache miss */
1007 		err = arm_spe_synth_event(session, &attr, id);
1008 		if (err)
1009 			return err;
1010 		spe->l1d_miss_id = id;
1011 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1012 		id += 1;
1013 
1014 		/* Level 1 data cache access */
1015 		err = arm_spe_synth_event(session, &attr, id);
1016 		if (err)
1017 			return err;
1018 		spe->l1d_access_id = id;
1019 		arm_spe_set_event_name(evlist, id, "l1d-access");
1020 		id += 1;
1021 	}
1022 
1023 	if (spe->synth_opts.llc) {
1024 		spe->sample_llc = true;
1025 
1026 		/* Last level cache miss */
1027 		err = arm_spe_synth_event(session, &attr, id);
1028 		if (err)
1029 			return err;
1030 		spe->llc_miss_id = id;
1031 		arm_spe_set_event_name(evlist, id, "llc-miss");
1032 		id += 1;
1033 
1034 		/* Last level cache access */
1035 		err = arm_spe_synth_event(session, &attr, id);
1036 		if (err)
1037 			return err;
1038 		spe->llc_access_id = id;
1039 		arm_spe_set_event_name(evlist, id, "llc-access");
1040 		id += 1;
1041 	}
1042 
1043 	if (spe->synth_opts.tlb) {
1044 		spe->sample_tlb = true;
1045 
1046 		/* TLB miss */
1047 		err = arm_spe_synth_event(session, &attr, id);
1048 		if (err)
1049 			return err;
1050 		spe->tlb_miss_id = id;
1051 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1052 		id += 1;
1053 
1054 		/* TLB access */
1055 		err = arm_spe_synth_event(session, &attr, id);
1056 		if (err)
1057 			return err;
1058 		spe->tlb_access_id = id;
1059 		arm_spe_set_event_name(evlist, id, "tlb-access");
1060 		id += 1;
1061 	}
1062 
1063 	if (spe->synth_opts.branches) {
1064 		spe->sample_branch = true;
1065 
1066 		/* Branch miss */
1067 		err = arm_spe_synth_event(session, &attr, id);
1068 		if (err)
1069 			return err;
1070 		spe->branch_miss_id = id;
1071 		arm_spe_set_event_name(evlist, id, "branch-miss");
1072 		id += 1;
1073 	}
1074 
1075 	if (spe->synth_opts.remote_access) {
1076 		spe->sample_remote_access = true;
1077 
1078 		/* Remote access */
1079 		err = arm_spe_synth_event(session, &attr, id);
1080 		if (err)
1081 			return err;
1082 		spe->remote_access_id = id;
1083 		arm_spe_set_event_name(evlist, id, "remote-access");
1084 		id += 1;
1085 	}
1086 
1087 	if (spe->synth_opts.mem) {
1088 		spe->sample_memory = true;
1089 
1090 		err = arm_spe_synth_event(session, &attr, id);
1091 		if (err)
1092 			return err;
1093 		spe->memory_id = id;
1094 		arm_spe_set_event_name(evlist, id, "memory");
1095 	}
1096 
1097 	return 0;
1098 }
1099 
1100 int arm_spe_process_auxtrace_info(union perf_event *event,
1101 				  struct perf_session *session)
1102 {
1103 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1104 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1105 	struct perf_record_time_conv *tc = &session->time_conv;
1106 	struct arm_spe *spe;
1107 	int err;
1108 
1109 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1110 					min_sz)
1111 		return -EINVAL;
1112 
1113 	spe = zalloc(sizeof(struct arm_spe));
1114 	if (!spe)
1115 		return -ENOMEM;
1116 
1117 	err = auxtrace_queues__init(&spe->queues);
1118 	if (err)
1119 		goto err_free;
1120 
1121 	spe->session = session;
1122 	spe->machine = &session->machines.host; /* No kvm support */
1123 	spe->auxtrace_type = auxtrace_info->type;
1124 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1125 
1126 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1127 
1128 	/*
1129 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1130 	 * and the parameters for hardware clock are stored in the session
1131 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1132 	 * in "spe->tc", which is used for later conversion between clock
1133 	 * counter and timestamp.
1134 	 *
1135 	 * For backward compatibility, copies the fields starting from
1136 	 * "time_cycles" only if they are contained in the event.
1137 	 */
1138 	spe->tc.time_shift = tc->time_shift;
1139 	spe->tc.time_mult = tc->time_mult;
1140 	spe->tc.time_zero = tc->time_zero;
1141 
1142 	if (event_contains(*tc, time_cycles)) {
1143 		spe->tc.time_cycles = tc->time_cycles;
1144 		spe->tc.time_mask = tc->time_mask;
1145 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1146 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1147 	}
1148 
1149 	spe->auxtrace.process_event = arm_spe_process_event;
1150 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1151 	spe->auxtrace.flush_events = arm_spe_flush;
1152 	spe->auxtrace.free_events = arm_spe_free_events;
1153 	spe->auxtrace.free = arm_spe_free;
1154 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1155 	session->auxtrace = &spe->auxtrace;
1156 
1157 	arm_spe_print_info(&auxtrace_info->priv[0]);
1158 
1159 	if (dump_trace)
1160 		return 0;
1161 
1162 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1163 		spe->synth_opts = *session->itrace_synth_opts;
1164 	else
1165 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1166 
1167 	err = arm_spe_synth_events(spe, session);
1168 	if (err)
1169 		goto err_free_queues;
1170 
1171 	err = auxtrace_queues__process_index(&spe->queues, session);
1172 	if (err)
1173 		goto err_free_queues;
1174 
1175 	if (spe->queues.populated)
1176 		spe->data_queued = true;
1177 
1178 	return 0;
1179 
1180 err_free_queues:
1181 	auxtrace_queues__free(&spe->queues);
1182 	session->auxtrace = NULL;
1183 err_free:
1184 	free(spe);
1185 	return err;
1186 }
1187