xref: /openbmc/linux/tools/perf/util/arm-spe.c (revision a8f4fcdd8ba7d191c29ae87a2315906fe90368d6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #define MAX_TIMESTAMP (~0ULL)
38 
39 struct arm_spe {
40 	struct auxtrace			auxtrace;
41 	struct auxtrace_queues		queues;
42 	struct auxtrace_heap		heap;
43 	struct itrace_synth_opts        synth_opts;
44 	u32				auxtrace_type;
45 	struct perf_session		*session;
46 	struct machine			*machine;
47 	u32				pmu_type;
48 
49 	struct perf_tsc_conversion	tc;
50 
51 	u8				timeless_decoding;
52 	u8				data_queued;
53 
54 	u64				sample_type;
55 	u8				sample_flc;
56 	u8				sample_llc;
57 	u8				sample_tlb;
58 	u8				sample_branch;
59 	u8				sample_remote_access;
60 	u8				sample_memory;
61 
62 	u64				l1d_miss_id;
63 	u64				l1d_access_id;
64 	u64				llc_miss_id;
65 	u64				llc_access_id;
66 	u64				tlb_miss_id;
67 	u64				tlb_access_id;
68 	u64				branch_miss_id;
69 	u64				remote_access_id;
70 	u64				memory_id;
71 
72 	u64				kernel_start;
73 
74 	unsigned long			num_events;
75 	u8				use_ctx_pkt_for_pid;
76 };
77 
78 struct arm_spe_queue {
79 	struct arm_spe			*spe;
80 	unsigned int			queue_nr;
81 	struct auxtrace_buffer		*buffer;
82 	struct auxtrace_buffer		*old_buffer;
83 	union perf_event		*event_buf;
84 	bool				on_heap;
85 	bool				done;
86 	pid_t				pid;
87 	pid_t				tid;
88 	int				cpu;
89 	struct arm_spe_decoder		*decoder;
90 	u64				time;
91 	u64				timestamp;
92 	struct thread			*thread;
93 };
94 
95 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
96 			 unsigned char *buf, size_t len)
97 {
98 	struct arm_spe_pkt packet;
99 	size_t pos = 0;
100 	int ret, pkt_len, i;
101 	char desc[ARM_SPE_PKT_DESC_MAX];
102 	const char *color = PERF_COLOR_BLUE;
103 
104 	color_fprintf(stdout, color,
105 		      ". ... ARM SPE data: size %#zx bytes\n",
106 		      len);
107 
108 	while (len) {
109 		ret = arm_spe_get_packet(buf, len, &packet);
110 		if (ret > 0)
111 			pkt_len = ret;
112 		else
113 			pkt_len = 1;
114 		printf(".");
115 		color_fprintf(stdout, color, "  %08x: ", pos);
116 		for (i = 0; i < pkt_len; i++)
117 			color_fprintf(stdout, color, " %02x", buf[i]);
118 		for (; i < 16; i++)
119 			color_fprintf(stdout, color, "   ");
120 		if (ret > 0) {
121 			ret = arm_spe_pkt_desc(&packet, desc,
122 					       ARM_SPE_PKT_DESC_MAX);
123 			if (!ret)
124 				color_fprintf(stdout, color, " %s\n", desc);
125 		} else {
126 			color_fprintf(stdout, color, " Bad packet!\n");
127 		}
128 		pos += pkt_len;
129 		buf += pkt_len;
130 		len -= pkt_len;
131 	}
132 }
133 
134 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
135 			       size_t len)
136 {
137 	printf(".\n");
138 	arm_spe_dump(spe, buf, len);
139 }
140 
141 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
142 {
143 	struct arm_spe_queue *speq = data;
144 	struct auxtrace_buffer *buffer = speq->buffer;
145 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
146 	struct auxtrace_queue *queue;
147 
148 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
149 
150 	buffer = auxtrace_buffer__next(queue, buffer);
151 	/* If no more data, drop the previous auxtrace_buffer and return */
152 	if (!buffer) {
153 		if (old_buffer)
154 			auxtrace_buffer__drop_data(old_buffer);
155 		b->len = 0;
156 		return 0;
157 	}
158 
159 	speq->buffer = buffer;
160 
161 	/* If the aux_buffer doesn't have data associated, try to load it */
162 	if (!buffer->data) {
163 		/* get the file desc associated with the perf data file */
164 		int fd = perf_data__fd(speq->spe->session->data);
165 
166 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
167 		if (!buffer->data)
168 			return -ENOMEM;
169 	}
170 
171 	b->len = buffer->size;
172 	b->buf = buffer->data;
173 
174 	if (b->len) {
175 		if (old_buffer)
176 			auxtrace_buffer__drop_data(old_buffer);
177 		speq->old_buffer = buffer;
178 	} else {
179 		auxtrace_buffer__drop_data(buffer);
180 		return arm_spe_get_trace(b, data);
181 	}
182 
183 	return 0;
184 }
185 
186 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
187 		unsigned int queue_nr)
188 {
189 	struct arm_spe_params params = { .get_trace = 0, };
190 	struct arm_spe_queue *speq;
191 
192 	speq = zalloc(sizeof(*speq));
193 	if (!speq)
194 		return NULL;
195 
196 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
197 	if (!speq->event_buf)
198 		goto out_free;
199 
200 	speq->spe = spe;
201 	speq->queue_nr = queue_nr;
202 	speq->pid = -1;
203 	speq->tid = -1;
204 	speq->cpu = -1;
205 
206 	/* params set */
207 	params.get_trace = arm_spe_get_trace;
208 	params.data = speq;
209 
210 	/* create new decoder */
211 	speq->decoder = arm_spe_decoder_new(&params);
212 	if (!speq->decoder)
213 		goto out_free;
214 
215 	return speq;
216 
217 out_free:
218 	zfree(&speq->event_buf);
219 	free(speq);
220 
221 	return NULL;
222 }
223 
224 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
225 {
226 	return ip >= spe->kernel_start ?
227 		PERF_RECORD_MISC_KERNEL :
228 		PERF_RECORD_MISC_USER;
229 }
230 
231 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
232 				    struct auxtrace_queue *queue)
233 {
234 	struct arm_spe_queue *speq = queue->priv;
235 	pid_t tid;
236 
237 	tid = machine__get_current_tid(spe->machine, speq->cpu);
238 	if (tid != -1) {
239 		speq->tid = tid;
240 		thread__zput(speq->thread);
241 	} else
242 		speq->tid = queue->tid;
243 
244 	if ((!speq->thread) && (speq->tid != -1)) {
245 		speq->thread = machine__find_thread(spe->machine, -1,
246 						    speq->tid);
247 	}
248 
249 	if (speq->thread) {
250 		speq->pid = speq->thread->pid_;
251 		if (queue->cpu == -1)
252 			speq->cpu = speq->thread->cpu;
253 	}
254 }
255 
256 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
257 {
258 	struct arm_spe *spe = speq->spe;
259 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
260 
261 	if (err)
262 		return err;
263 
264 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
265 
266 	return 0;
267 }
268 
269 static void arm_spe_prep_sample(struct arm_spe *spe,
270 				struct arm_spe_queue *speq,
271 				union perf_event *event,
272 				struct perf_sample *sample)
273 {
274 	struct arm_spe_record *record = &speq->decoder->record;
275 
276 	if (!spe->timeless_decoding)
277 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
278 
279 	sample->ip = record->from_ip;
280 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
281 	sample->pid = speq->pid;
282 	sample->tid = speq->tid;
283 	sample->period = 1;
284 	sample->cpu = speq->cpu;
285 
286 	event->sample.header.type = PERF_RECORD_SAMPLE;
287 	event->sample.header.misc = sample->cpumode;
288 	event->sample.header.size = sizeof(struct perf_event_header);
289 }
290 
291 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
292 {
293 	event->header.size = perf_event__sample_event_size(sample, type, 0);
294 	return perf_event__synthesize_sample(event, type, 0, sample);
295 }
296 
297 static inline int
298 arm_spe_deliver_synth_event(struct arm_spe *spe,
299 			    struct arm_spe_queue *speq __maybe_unused,
300 			    union perf_event *event,
301 			    struct perf_sample *sample)
302 {
303 	int ret;
304 
305 	if (spe->synth_opts.inject) {
306 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
307 		if (ret)
308 			return ret;
309 	}
310 
311 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
312 	if (ret)
313 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
314 
315 	return ret;
316 }
317 
318 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
319 				     u64 spe_events_id, u64 data_src)
320 {
321 	struct arm_spe *spe = speq->spe;
322 	struct arm_spe_record *record = &speq->decoder->record;
323 	union perf_event *event = speq->event_buf;
324 	struct perf_sample sample = { .ip = 0, };
325 
326 	arm_spe_prep_sample(spe, speq, event, &sample);
327 
328 	sample.id = spe_events_id;
329 	sample.stream_id = spe_events_id;
330 	sample.addr = record->virt_addr;
331 	sample.phys_addr = record->phys_addr;
332 	sample.data_src = data_src;
333 
334 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
335 }
336 
337 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
338 					u64 spe_events_id)
339 {
340 	struct arm_spe *spe = speq->spe;
341 	struct arm_spe_record *record = &speq->decoder->record;
342 	union perf_event *event = speq->event_buf;
343 	struct perf_sample sample = { .ip = 0, };
344 
345 	arm_spe_prep_sample(spe, speq, event, &sample);
346 
347 	sample.id = spe_events_id;
348 	sample.stream_id = spe_events_id;
349 	sample.addr = record->to_ip;
350 
351 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
352 }
353 
354 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
355 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
356 			 ARM_SPE_REMOTE_ACCESS)
357 
358 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
359 {
360 	if (type & SPE_MEM_TYPE)
361 		return true;
362 
363 	return false;
364 }
365 
366 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
367 {
368 	union perf_mem_data_src	data_src = { 0 };
369 
370 	if (record->op == ARM_SPE_LD)
371 		data_src.mem_op = PERF_MEM_OP_LOAD;
372 	else
373 		data_src.mem_op = PERF_MEM_OP_STORE;
374 
375 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
376 		data_src.mem_lvl = PERF_MEM_LVL_L3;
377 
378 		if (record->type & ARM_SPE_LLC_MISS)
379 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
380 		else
381 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
382 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
383 		data_src.mem_lvl = PERF_MEM_LVL_L1;
384 
385 		if (record->type & ARM_SPE_L1D_MISS)
386 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
387 		else
388 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
389 	}
390 
391 	if (record->type & ARM_SPE_REMOTE_ACCESS)
392 		data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
393 
394 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
395 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
396 
397 		if (record->type & ARM_SPE_TLB_MISS)
398 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
399 		else
400 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
401 	}
402 
403 	return data_src.val;
404 }
405 
406 static int arm_spe_sample(struct arm_spe_queue *speq)
407 {
408 	const struct arm_spe_record *record = &speq->decoder->record;
409 	struct arm_spe *spe = speq->spe;
410 	u64 data_src;
411 	int err;
412 
413 	data_src = arm_spe__synth_data_source(record);
414 
415 	if (spe->sample_flc) {
416 		if (record->type & ARM_SPE_L1D_MISS) {
417 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
418 							data_src);
419 			if (err)
420 				return err;
421 		}
422 
423 		if (record->type & ARM_SPE_L1D_ACCESS) {
424 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
425 							data_src);
426 			if (err)
427 				return err;
428 		}
429 	}
430 
431 	if (spe->sample_llc) {
432 		if (record->type & ARM_SPE_LLC_MISS) {
433 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
434 							data_src);
435 			if (err)
436 				return err;
437 		}
438 
439 		if (record->type & ARM_SPE_LLC_ACCESS) {
440 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
441 							data_src);
442 			if (err)
443 				return err;
444 		}
445 	}
446 
447 	if (spe->sample_tlb) {
448 		if (record->type & ARM_SPE_TLB_MISS) {
449 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
450 							data_src);
451 			if (err)
452 				return err;
453 		}
454 
455 		if (record->type & ARM_SPE_TLB_ACCESS) {
456 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
457 							data_src);
458 			if (err)
459 				return err;
460 		}
461 	}
462 
463 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
464 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
465 		if (err)
466 			return err;
467 	}
468 
469 	if (spe->sample_remote_access &&
470 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
471 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
472 						data_src);
473 		if (err)
474 			return err;
475 	}
476 
477 	if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
478 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
479 		if (err)
480 			return err;
481 	}
482 
483 	return 0;
484 }
485 
486 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
487 {
488 	struct arm_spe *spe = speq->spe;
489 	struct arm_spe_record *record;
490 	int ret;
491 
492 	if (!spe->kernel_start)
493 		spe->kernel_start = machine__kernel_start(spe->machine);
494 
495 	while (1) {
496 		/*
497 		 * The usual logic is firstly to decode the packets, and then
498 		 * based the record to synthesize sample; but here the flow is
499 		 * reversed: it calls arm_spe_sample() for synthesizing samples
500 		 * prior to arm_spe_decode().
501 		 *
502 		 * Two reasons for this code logic:
503 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
504 		 * has decoded trace data and generated a record, but the record
505 		 * is left to generate sample until run to here, so it's correct
506 		 * to synthesize sample for the left record.
507 		 * 2. After decoding trace data, it needs to compare the record
508 		 * timestamp with the coming perf event, if the record timestamp
509 		 * is later than the perf event, it needs bail out and pushs the
510 		 * record into auxtrace heap, thus the record can be deferred to
511 		 * synthesize sample until run to here at the next time; so this
512 		 * can correlate samples between Arm SPE trace data and other
513 		 * perf events with correct time ordering.
514 		 */
515 
516 		/*
517 		 * Update pid/tid info.
518 		 */
519 		record = &speq->decoder->record;
520 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
521 			ret = arm_spe_set_tid(speq, record->context_id);
522 			if (ret)
523 				return ret;
524 
525 			spe->use_ctx_pkt_for_pid = true;
526 		}
527 
528 		ret = arm_spe_sample(speq);
529 		if (ret)
530 			return ret;
531 
532 		ret = arm_spe_decode(speq->decoder);
533 		if (!ret) {
534 			pr_debug("No data or all data has been processed.\n");
535 			return 1;
536 		}
537 
538 		/*
539 		 * Error is detected when decode SPE trace data, continue to
540 		 * the next trace data and find out more records.
541 		 */
542 		if (ret < 0)
543 			continue;
544 
545 		record = &speq->decoder->record;
546 
547 		/* Update timestamp for the last record */
548 		if (record->timestamp > speq->timestamp)
549 			speq->timestamp = record->timestamp;
550 
551 		/*
552 		 * If the timestamp of the queue is later than timestamp of the
553 		 * coming perf event, bail out so can allow the perf event to
554 		 * be processed ahead.
555 		 */
556 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
557 			*timestamp = speq->timestamp;
558 			return 0;
559 		}
560 	}
561 
562 	return 0;
563 }
564 
565 static int arm_spe__setup_queue(struct arm_spe *spe,
566 			       struct auxtrace_queue *queue,
567 			       unsigned int queue_nr)
568 {
569 	struct arm_spe_queue *speq = queue->priv;
570 	struct arm_spe_record *record;
571 
572 	if (list_empty(&queue->head) || speq)
573 		return 0;
574 
575 	speq = arm_spe__alloc_queue(spe, queue_nr);
576 
577 	if (!speq)
578 		return -ENOMEM;
579 
580 	queue->priv = speq;
581 
582 	if (queue->cpu != -1)
583 		speq->cpu = queue->cpu;
584 
585 	if (!speq->on_heap) {
586 		int ret;
587 
588 		if (spe->timeless_decoding)
589 			return 0;
590 
591 retry:
592 		ret = arm_spe_decode(speq->decoder);
593 
594 		if (!ret)
595 			return 0;
596 
597 		if (ret < 0)
598 			goto retry;
599 
600 		record = &speq->decoder->record;
601 
602 		speq->timestamp = record->timestamp;
603 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
604 		if (ret)
605 			return ret;
606 		speq->on_heap = true;
607 	}
608 
609 	return 0;
610 }
611 
612 static int arm_spe__setup_queues(struct arm_spe *spe)
613 {
614 	unsigned int i;
615 	int ret;
616 
617 	for (i = 0; i < spe->queues.nr_queues; i++) {
618 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
619 		if (ret)
620 			return ret;
621 	}
622 
623 	return 0;
624 }
625 
626 static int arm_spe__update_queues(struct arm_spe *spe)
627 {
628 	if (spe->queues.new_data) {
629 		spe->queues.new_data = false;
630 		return arm_spe__setup_queues(spe);
631 	}
632 
633 	return 0;
634 }
635 
636 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
637 {
638 	struct evsel *evsel;
639 	struct evlist *evlist = spe->session->evlist;
640 	bool timeless_decoding = true;
641 
642 	/*
643 	 * Circle through the list of event and complain if we find one
644 	 * with the time bit set.
645 	 */
646 	evlist__for_each_entry(evlist, evsel) {
647 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
648 			timeless_decoding = false;
649 	}
650 
651 	return timeless_decoding;
652 }
653 
654 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
655 {
656 	unsigned int queue_nr;
657 	u64 ts;
658 	int ret;
659 
660 	while (1) {
661 		struct auxtrace_queue *queue;
662 		struct arm_spe_queue *speq;
663 
664 		if (!spe->heap.heap_cnt)
665 			return 0;
666 
667 		if (spe->heap.heap_array[0].ordinal >= timestamp)
668 			return 0;
669 
670 		queue_nr = spe->heap.heap_array[0].queue_nr;
671 		queue = &spe->queues.queue_array[queue_nr];
672 		speq = queue->priv;
673 
674 		auxtrace_heap__pop(&spe->heap);
675 
676 		if (spe->heap.heap_cnt) {
677 			ts = spe->heap.heap_array[0].ordinal + 1;
678 			if (ts > timestamp)
679 				ts = timestamp;
680 		} else {
681 			ts = timestamp;
682 		}
683 
684 		/*
685 		 * A previous context-switch event has set pid/tid in the machine's context, so
686 		 * here we need to update the pid/tid in the thread and SPE queue.
687 		 */
688 		if (!spe->use_ctx_pkt_for_pid)
689 			arm_spe_set_pid_tid_cpu(spe, queue);
690 
691 		ret = arm_spe_run_decoder(speq, &ts);
692 		if (ret < 0) {
693 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
694 			return ret;
695 		}
696 
697 		if (!ret) {
698 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
699 			if (ret < 0)
700 				return ret;
701 		} else {
702 			speq->on_heap = false;
703 		}
704 	}
705 
706 	return 0;
707 }
708 
709 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
710 					    u64 time_)
711 {
712 	struct auxtrace_queues *queues = &spe->queues;
713 	unsigned int i;
714 	u64 ts = 0;
715 
716 	for (i = 0; i < queues->nr_queues; i++) {
717 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
718 		struct arm_spe_queue *speq = queue->priv;
719 
720 		if (speq && (tid == -1 || speq->tid == tid)) {
721 			speq->time = time_;
722 			arm_spe_set_pid_tid_cpu(spe, queue);
723 			arm_spe_run_decoder(speq, &ts);
724 		}
725 	}
726 	return 0;
727 }
728 
729 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
730 				  struct perf_sample *sample)
731 {
732 	pid_t pid, tid;
733 	int cpu;
734 
735 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
736 		return 0;
737 
738 	pid = event->context_switch.next_prev_pid;
739 	tid = event->context_switch.next_prev_tid;
740 	cpu = sample->cpu;
741 
742 	if (tid == -1)
743 		pr_warning("context_switch event has no tid\n");
744 
745 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
746 }
747 
748 static int arm_spe_process_event(struct perf_session *session,
749 				 union perf_event *event,
750 				 struct perf_sample *sample,
751 				 struct perf_tool *tool)
752 {
753 	int err = 0;
754 	u64 timestamp;
755 	struct arm_spe *spe = container_of(session->auxtrace,
756 			struct arm_spe, auxtrace);
757 
758 	if (dump_trace)
759 		return 0;
760 
761 	if (!tool->ordered_events) {
762 		pr_err("SPE trace requires ordered events\n");
763 		return -EINVAL;
764 	}
765 
766 	if (sample->time && (sample->time != (u64) -1))
767 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
768 	else
769 		timestamp = 0;
770 
771 	if (timestamp || spe->timeless_decoding) {
772 		err = arm_spe__update_queues(spe);
773 		if (err)
774 			return err;
775 	}
776 
777 	if (spe->timeless_decoding) {
778 		if (event->header.type == PERF_RECORD_EXIT) {
779 			err = arm_spe_process_timeless_queues(spe,
780 					event->fork.tid,
781 					sample->time);
782 		}
783 	} else if (timestamp) {
784 		err = arm_spe_process_queues(spe, timestamp);
785 		if (err)
786 			return err;
787 
788 		if (!spe->use_ctx_pkt_for_pid &&
789 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
790 		    event->header.type == PERF_RECORD_SWITCH))
791 			err = arm_spe_context_switch(spe, event, sample);
792 	}
793 
794 	return err;
795 }
796 
797 static int arm_spe_process_auxtrace_event(struct perf_session *session,
798 					  union perf_event *event,
799 					  struct perf_tool *tool __maybe_unused)
800 {
801 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
802 					     auxtrace);
803 
804 	if (!spe->data_queued) {
805 		struct auxtrace_buffer *buffer;
806 		off_t data_offset;
807 		int fd = perf_data__fd(session->data);
808 		int err;
809 
810 		if (perf_data__is_pipe(session->data)) {
811 			data_offset = 0;
812 		} else {
813 			data_offset = lseek(fd, 0, SEEK_CUR);
814 			if (data_offset == -1)
815 				return -errno;
816 		}
817 
818 		err = auxtrace_queues__add_event(&spe->queues, session, event,
819 				data_offset, &buffer);
820 		if (err)
821 			return err;
822 
823 		/* Dump here now we have copied a piped trace out of the pipe */
824 		if (dump_trace) {
825 			if (auxtrace_buffer__get_data(buffer, fd)) {
826 				arm_spe_dump_event(spe, buffer->data,
827 						buffer->size);
828 				auxtrace_buffer__put_data(buffer);
829 			}
830 		}
831 	}
832 
833 	return 0;
834 }
835 
836 static int arm_spe_flush(struct perf_session *session __maybe_unused,
837 			 struct perf_tool *tool __maybe_unused)
838 {
839 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
840 			auxtrace);
841 	int ret;
842 
843 	if (dump_trace)
844 		return 0;
845 
846 	if (!tool->ordered_events)
847 		return -EINVAL;
848 
849 	ret = arm_spe__update_queues(spe);
850 	if (ret < 0)
851 		return ret;
852 
853 	if (spe->timeless_decoding)
854 		return arm_spe_process_timeless_queues(spe, -1,
855 				MAX_TIMESTAMP - 1);
856 
857 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
858 	if (ret)
859 		return ret;
860 
861 	if (!spe->use_ctx_pkt_for_pid)
862 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
863 			    "Matching of TIDs to SPE events could be inaccurate.\n");
864 
865 	return 0;
866 }
867 
868 static void arm_spe_free_queue(void *priv)
869 {
870 	struct arm_spe_queue *speq = priv;
871 
872 	if (!speq)
873 		return;
874 	thread__zput(speq->thread);
875 	arm_spe_decoder_free(speq->decoder);
876 	zfree(&speq->event_buf);
877 	free(speq);
878 }
879 
880 static void arm_spe_free_events(struct perf_session *session)
881 {
882 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
883 					     auxtrace);
884 	struct auxtrace_queues *queues = &spe->queues;
885 	unsigned int i;
886 
887 	for (i = 0; i < queues->nr_queues; i++) {
888 		arm_spe_free_queue(queues->queue_array[i].priv);
889 		queues->queue_array[i].priv = NULL;
890 	}
891 	auxtrace_queues__free(queues);
892 }
893 
894 static void arm_spe_free(struct perf_session *session)
895 {
896 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
897 					     auxtrace);
898 
899 	auxtrace_heap__free(&spe->heap);
900 	arm_spe_free_events(session);
901 	session->auxtrace = NULL;
902 	free(spe);
903 }
904 
905 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
906 				      struct evsel *evsel)
907 {
908 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
909 
910 	return evsel->core.attr.type == spe->pmu_type;
911 }
912 
913 static const char * const arm_spe_info_fmts[] = {
914 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
915 };
916 
917 static void arm_spe_print_info(__u64 *arr)
918 {
919 	if (!dump_trace)
920 		return;
921 
922 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
923 }
924 
925 struct arm_spe_synth {
926 	struct perf_tool dummy_tool;
927 	struct perf_session *session;
928 };
929 
930 static int arm_spe_event_synth(struct perf_tool *tool,
931 			       union perf_event *event,
932 			       struct perf_sample *sample __maybe_unused,
933 			       struct machine *machine __maybe_unused)
934 {
935 	struct arm_spe_synth *arm_spe_synth =
936 		      container_of(tool, struct arm_spe_synth, dummy_tool);
937 
938 	return perf_session__deliver_synth_event(arm_spe_synth->session,
939 						 event, NULL);
940 }
941 
942 static int arm_spe_synth_event(struct perf_session *session,
943 			       struct perf_event_attr *attr, u64 id)
944 {
945 	struct arm_spe_synth arm_spe_synth;
946 
947 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
948 	arm_spe_synth.session = session;
949 
950 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
951 					   &id, arm_spe_event_synth);
952 }
953 
954 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
955 				    const char *name)
956 {
957 	struct evsel *evsel;
958 
959 	evlist__for_each_entry(evlist, evsel) {
960 		if (evsel->core.id && evsel->core.id[0] == id) {
961 			if (evsel->name)
962 				zfree(&evsel->name);
963 			evsel->name = strdup(name);
964 			break;
965 		}
966 	}
967 }
968 
969 static int
970 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
971 {
972 	struct evlist *evlist = session->evlist;
973 	struct evsel *evsel;
974 	struct perf_event_attr attr;
975 	bool found = false;
976 	u64 id;
977 	int err;
978 
979 	evlist__for_each_entry(evlist, evsel) {
980 		if (evsel->core.attr.type == spe->pmu_type) {
981 			found = true;
982 			break;
983 		}
984 	}
985 
986 	if (!found) {
987 		pr_debug("No selected events with SPE trace data\n");
988 		return 0;
989 	}
990 
991 	memset(&attr, 0, sizeof(struct perf_event_attr));
992 	attr.size = sizeof(struct perf_event_attr);
993 	attr.type = PERF_TYPE_HARDWARE;
994 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
995 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
996 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
997 	if (spe->timeless_decoding)
998 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
999 	else
1000 		attr.sample_type |= PERF_SAMPLE_TIME;
1001 
1002 	spe->sample_type = attr.sample_type;
1003 
1004 	attr.exclude_user = evsel->core.attr.exclude_user;
1005 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1006 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1007 	attr.exclude_host = evsel->core.attr.exclude_host;
1008 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1009 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1010 	attr.read_format = evsel->core.attr.read_format;
1011 
1012 	/* create new id val to be a fixed offset from evsel id */
1013 	id = evsel->core.id[0] + 1000000000;
1014 
1015 	if (!id)
1016 		id = 1;
1017 
1018 	if (spe->synth_opts.flc) {
1019 		spe->sample_flc = true;
1020 
1021 		/* Level 1 data cache miss */
1022 		err = arm_spe_synth_event(session, &attr, id);
1023 		if (err)
1024 			return err;
1025 		spe->l1d_miss_id = id;
1026 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1027 		id += 1;
1028 
1029 		/* Level 1 data cache access */
1030 		err = arm_spe_synth_event(session, &attr, id);
1031 		if (err)
1032 			return err;
1033 		spe->l1d_access_id = id;
1034 		arm_spe_set_event_name(evlist, id, "l1d-access");
1035 		id += 1;
1036 	}
1037 
1038 	if (spe->synth_opts.llc) {
1039 		spe->sample_llc = true;
1040 
1041 		/* Last level cache miss */
1042 		err = arm_spe_synth_event(session, &attr, id);
1043 		if (err)
1044 			return err;
1045 		spe->llc_miss_id = id;
1046 		arm_spe_set_event_name(evlist, id, "llc-miss");
1047 		id += 1;
1048 
1049 		/* Last level cache access */
1050 		err = arm_spe_synth_event(session, &attr, id);
1051 		if (err)
1052 			return err;
1053 		spe->llc_access_id = id;
1054 		arm_spe_set_event_name(evlist, id, "llc-access");
1055 		id += 1;
1056 	}
1057 
1058 	if (spe->synth_opts.tlb) {
1059 		spe->sample_tlb = true;
1060 
1061 		/* TLB miss */
1062 		err = arm_spe_synth_event(session, &attr, id);
1063 		if (err)
1064 			return err;
1065 		spe->tlb_miss_id = id;
1066 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1067 		id += 1;
1068 
1069 		/* TLB access */
1070 		err = arm_spe_synth_event(session, &attr, id);
1071 		if (err)
1072 			return err;
1073 		spe->tlb_access_id = id;
1074 		arm_spe_set_event_name(evlist, id, "tlb-access");
1075 		id += 1;
1076 	}
1077 
1078 	if (spe->synth_opts.branches) {
1079 		spe->sample_branch = true;
1080 
1081 		/* Branch miss */
1082 		err = arm_spe_synth_event(session, &attr, id);
1083 		if (err)
1084 			return err;
1085 		spe->branch_miss_id = id;
1086 		arm_spe_set_event_name(evlist, id, "branch-miss");
1087 		id += 1;
1088 	}
1089 
1090 	if (spe->synth_opts.remote_access) {
1091 		spe->sample_remote_access = true;
1092 
1093 		/* Remote access */
1094 		err = arm_spe_synth_event(session, &attr, id);
1095 		if (err)
1096 			return err;
1097 		spe->remote_access_id = id;
1098 		arm_spe_set_event_name(evlist, id, "remote-access");
1099 		id += 1;
1100 	}
1101 
1102 	if (spe->synth_opts.mem) {
1103 		spe->sample_memory = true;
1104 
1105 		err = arm_spe_synth_event(session, &attr, id);
1106 		if (err)
1107 			return err;
1108 		spe->memory_id = id;
1109 		arm_spe_set_event_name(evlist, id, "memory");
1110 	}
1111 
1112 	return 0;
1113 }
1114 
1115 int arm_spe_process_auxtrace_info(union perf_event *event,
1116 				  struct perf_session *session)
1117 {
1118 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1119 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1120 	struct perf_record_time_conv *tc = &session->time_conv;
1121 	struct arm_spe *spe;
1122 	int err;
1123 
1124 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1125 					min_sz)
1126 		return -EINVAL;
1127 
1128 	spe = zalloc(sizeof(struct arm_spe));
1129 	if (!spe)
1130 		return -ENOMEM;
1131 
1132 	err = auxtrace_queues__init(&spe->queues);
1133 	if (err)
1134 		goto err_free;
1135 
1136 	spe->session = session;
1137 	spe->machine = &session->machines.host; /* No kvm support */
1138 	spe->auxtrace_type = auxtrace_info->type;
1139 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1140 
1141 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1142 
1143 	/*
1144 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1145 	 * and the parameters for hardware clock are stored in the session
1146 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1147 	 * in "spe->tc", which is used for later conversion between clock
1148 	 * counter and timestamp.
1149 	 *
1150 	 * For backward compatibility, copies the fields starting from
1151 	 * "time_cycles" only if they are contained in the event.
1152 	 */
1153 	spe->tc.time_shift = tc->time_shift;
1154 	spe->tc.time_mult = tc->time_mult;
1155 	spe->tc.time_zero = tc->time_zero;
1156 
1157 	if (event_contains(*tc, time_cycles)) {
1158 		spe->tc.time_cycles = tc->time_cycles;
1159 		spe->tc.time_mask = tc->time_mask;
1160 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1161 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1162 	}
1163 
1164 	spe->auxtrace.process_event = arm_spe_process_event;
1165 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1166 	spe->auxtrace.flush_events = arm_spe_flush;
1167 	spe->auxtrace.free_events = arm_spe_free_events;
1168 	spe->auxtrace.free = arm_spe_free;
1169 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1170 	session->auxtrace = &spe->auxtrace;
1171 
1172 	arm_spe_print_info(&auxtrace_info->priv[0]);
1173 
1174 	if (dump_trace)
1175 		return 0;
1176 
1177 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1178 		spe->synth_opts = *session->itrace_synth_opts;
1179 	else
1180 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1181 
1182 	err = arm_spe_synth_events(spe, session);
1183 	if (err)
1184 		goto err_free_queues;
1185 
1186 	err = auxtrace_queues__process_index(&spe->queues, session);
1187 	if (err)
1188 		goto err_free_queues;
1189 
1190 	if (spe->queues.populated)
1191 		spe->data_queued = true;
1192 
1193 	return 0;
1194 
1195 err_free_queues:
1196 	auxtrace_queues__free(&spe->queues);
1197 	session->auxtrace = NULL;
1198 err_free:
1199 	free(spe);
1200 	return err;
1201 }
1202