xref: /openbmc/linux/tools/perf/util/arm-spe.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1ffd3d18cSKim Phillips // SPDX-License-Identifier: GPL-2.0
2ffd3d18cSKim Phillips /*
3ffd3d18cSKim Phillips  * Arm Statistical Profiling Extensions (SPE) support
4ffd3d18cSKim Phillips  * Copyright (c) 2017-2018, Arm Ltd.
5ffd3d18cSKim Phillips  */
6ffd3d18cSKim Phillips 
7a54ca194STan Xiaojun #include <byteswap.h>
8ffd3d18cSKim Phillips #include <endian.h>
9ffd3d18cSKim Phillips #include <errno.h>
10ffd3d18cSKim Phillips #include <inttypes.h>
11ffd3d18cSKim Phillips #include <linux/bitops.h>
12a54ca194STan Xiaojun #include <linux/kernel.h>
13ffd3d18cSKim Phillips #include <linux/log2.h>
14a54ca194STan Xiaojun #include <linux/types.h>
157f7c536fSArnaldo Carvalho de Melo #include <linux/zalloc.h>
16a54ca194STan Xiaojun #include <stdlib.h>
17a54ca194STan Xiaojun #include <unistd.h>
18ffd3d18cSKim Phillips 
19a54ca194STan Xiaojun #include "auxtrace.h"
20ffd3d18cSKim Phillips #include "color.h"
21a54ca194STan Xiaojun #include "debug.h"
22a54ca194STan Xiaojun #include "evlist.h"
23ffd3d18cSKim Phillips #include "evsel.h"
24ffd3d18cSKim Phillips #include "machine.h"
25ffd3d18cSKim Phillips #include "session.h"
26a54ca194STan Xiaojun #include "symbol.h"
27a54ca194STan Xiaojun #include "thread.h"
28a54ca194STan Xiaojun #include "thread-stack.h"
29c210c306SLeo Yan #include "tsc.h"
30a54ca194STan Xiaojun #include "tool.h"
31a54ca194STan Xiaojun #include "util/synthetic-events.h"
32a54ca194STan Xiaojun 
33ffd3d18cSKim Phillips #include "arm-spe.h"
34a54ca194STan Xiaojun #include "arm-spe-decoder/arm-spe-decoder.h"
354db25f66STan Xiaojun #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36ffd3d18cSKim Phillips 
374e6430cbSAli Saidi #include "../../arch/arm64/include/asm/cputype.h"
38a54ca194STan Xiaojun #define MAX_TIMESTAMP (~0ULL)
39a54ca194STan Xiaojun 
40ffd3d18cSKim Phillips struct arm_spe {
41ffd3d18cSKim Phillips 	struct auxtrace			auxtrace;
42ffd3d18cSKim Phillips 	struct auxtrace_queues		queues;
43ffd3d18cSKim Phillips 	struct auxtrace_heap		heap;
44a54ca194STan Xiaojun 	struct itrace_synth_opts        synth_opts;
45ffd3d18cSKim Phillips 	u32				auxtrace_type;
46ffd3d18cSKim Phillips 	struct perf_session		*session;
47ffd3d18cSKim Phillips 	struct machine			*machine;
48ffd3d18cSKim Phillips 	u32				pmu_type;
494e6430cbSAli Saidi 	u64				midr;
50a54ca194STan Xiaojun 
51c210c306SLeo Yan 	struct perf_tsc_conversion	tc;
52c210c306SLeo Yan 
53a54ca194STan Xiaojun 	u8				timeless_decoding;
54a54ca194STan Xiaojun 	u8				data_queued;
55a54ca194STan Xiaojun 
569e1a8d9fSGerman Gomez 	u64				sample_type;
57a54ca194STan Xiaojun 	u8				sample_flc;
58a54ca194STan Xiaojun 	u8				sample_llc;
59a54ca194STan Xiaojun 	u8				sample_tlb;
60a54ca194STan Xiaojun 	u8				sample_branch;
61a54ca194STan Xiaojun 	u8				sample_remote_access;
62e55ed342SLeo Yan 	u8				sample_memory;
63ff8752d7SGerman Gomez 	u8				sample_instructions;
64ff8752d7SGerman Gomez 	u64				instructions_sample_period;
65a54ca194STan Xiaojun 
66a54ca194STan Xiaojun 	u64				l1d_miss_id;
67a54ca194STan Xiaojun 	u64				l1d_access_id;
68a54ca194STan Xiaojun 	u64				llc_miss_id;
69a54ca194STan Xiaojun 	u64				llc_access_id;
70a54ca194STan Xiaojun 	u64				tlb_miss_id;
71a54ca194STan Xiaojun 	u64				tlb_access_id;
72a54ca194STan Xiaojun 	u64				branch_miss_id;
73a54ca194STan Xiaojun 	u64				remote_access_id;
74e55ed342SLeo Yan 	u64				memory_id;
75ff8752d7SGerman Gomez 	u64				instructions_id;
76a54ca194STan Xiaojun 
77a54ca194STan Xiaojun 	u64				kernel_start;
78a54ca194STan Xiaojun 
79a54ca194STan Xiaojun 	unsigned long			num_events;
8027d113cfSGerman Gomez 	u8				use_ctx_pkt_for_pid;
81ffd3d18cSKim Phillips };
82ffd3d18cSKim Phillips 
83ffd3d18cSKim Phillips struct arm_spe_queue {
84ffd3d18cSKim Phillips 	struct arm_spe			*spe;
85ffd3d18cSKim Phillips 	unsigned int			queue_nr;
86ffd3d18cSKim Phillips 	struct auxtrace_buffer		*buffer;
87a54ca194STan Xiaojun 	struct auxtrace_buffer		*old_buffer;
88a54ca194STan Xiaojun 	union perf_event		*event_buf;
89ffd3d18cSKim Phillips 	bool				on_heap;
90ffd3d18cSKim Phillips 	bool				done;
91ffd3d18cSKim Phillips 	pid_t				pid;
92ffd3d18cSKim Phillips 	pid_t				tid;
93ffd3d18cSKim Phillips 	int				cpu;
94a54ca194STan Xiaojun 	struct arm_spe_decoder		*decoder;
95a54ca194STan Xiaojun 	u64				time;
96a54ca194STan Xiaojun 	u64				timestamp;
97a54ca194STan Xiaojun 	struct thread			*thread;
98ff8752d7SGerman Gomez 	u64				period_instructions;
99ffd3d18cSKim Phillips };
100ffd3d18cSKim Phillips 
arm_spe_dump(struct arm_spe * spe __maybe_unused,unsigned char * buf,size_t len)101ffd3d18cSKim Phillips static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
102ffd3d18cSKim Phillips 			 unsigned char *buf, size_t len)
103ffd3d18cSKim Phillips {
104ffd3d18cSKim Phillips 	struct arm_spe_pkt packet;
105ffd3d18cSKim Phillips 	size_t pos = 0;
106ffd3d18cSKim Phillips 	int ret, pkt_len, i;
107ffd3d18cSKim Phillips 	char desc[ARM_SPE_PKT_DESC_MAX];
108ffd3d18cSKim Phillips 	const char *color = PERF_COLOR_BLUE;
109ffd3d18cSKim Phillips 
110ffd3d18cSKim Phillips 	color_fprintf(stdout, color,
11109e9afacSAndrew Kilroy 		      ". ... ARM SPE data: size %#zx bytes\n",
112ffd3d18cSKim Phillips 		      len);
113ffd3d18cSKim Phillips 
114ffd3d18cSKim Phillips 	while (len) {
115ffd3d18cSKim Phillips 		ret = arm_spe_get_packet(buf, len, &packet);
116ffd3d18cSKim Phillips 		if (ret > 0)
117ffd3d18cSKim Phillips 			pkt_len = ret;
118ffd3d18cSKim Phillips 		else
119ffd3d18cSKim Phillips 			pkt_len = 1;
120ffd3d18cSKim Phillips 		printf(".");
121ffd3d18cSKim Phillips 		color_fprintf(stdout, color, "  %08x: ", pos);
122ffd3d18cSKim Phillips 		for (i = 0; i < pkt_len; i++)
123ffd3d18cSKim Phillips 			color_fprintf(stdout, color, " %02x", buf[i]);
124ffd3d18cSKim Phillips 		for (; i < 16; i++)
125ffd3d18cSKim Phillips 			color_fprintf(stdout, color, "   ");
126ffd3d18cSKim Phillips 		if (ret > 0) {
127ffd3d18cSKim Phillips 			ret = arm_spe_pkt_desc(&packet, desc,
128ffd3d18cSKim Phillips 					       ARM_SPE_PKT_DESC_MAX);
12975eeadddSLeo Yan 			if (!ret)
130ffd3d18cSKim Phillips 				color_fprintf(stdout, color, " %s\n", desc);
131ffd3d18cSKim Phillips 		} else {
132ffd3d18cSKim Phillips 			color_fprintf(stdout, color, " Bad packet!\n");
133ffd3d18cSKim Phillips 		}
134ffd3d18cSKim Phillips 		pos += pkt_len;
135ffd3d18cSKim Phillips 		buf += pkt_len;
136ffd3d18cSKim Phillips 		len -= pkt_len;
137ffd3d18cSKim Phillips 	}
138ffd3d18cSKim Phillips }
139ffd3d18cSKim Phillips 
arm_spe_dump_event(struct arm_spe * spe,unsigned char * buf,size_t len)140ffd3d18cSKim Phillips static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
141ffd3d18cSKim Phillips 			       size_t len)
142ffd3d18cSKim Phillips {
143ffd3d18cSKim Phillips 	printf(".\n");
144ffd3d18cSKim Phillips 	arm_spe_dump(spe, buf, len);
145ffd3d18cSKim Phillips }
146ffd3d18cSKim Phillips 
arm_spe_get_trace(struct arm_spe_buffer * b,void * data)147a54ca194STan Xiaojun static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
148ffd3d18cSKim Phillips {
149a54ca194STan Xiaojun 	struct arm_spe_queue *speq = data;
150a54ca194STan Xiaojun 	struct auxtrace_buffer *buffer = speq->buffer;
151a54ca194STan Xiaojun 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
152a54ca194STan Xiaojun 	struct auxtrace_queue *queue;
153a54ca194STan Xiaojun 
154a54ca194STan Xiaojun 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
155a54ca194STan Xiaojun 
156a54ca194STan Xiaojun 	buffer = auxtrace_buffer__next(queue, buffer);
157a54ca194STan Xiaojun 	/* If no more data, drop the previous auxtrace_buffer and return */
158a54ca194STan Xiaojun 	if (!buffer) {
159a54ca194STan Xiaojun 		if (old_buffer)
160a54ca194STan Xiaojun 			auxtrace_buffer__drop_data(old_buffer);
161a54ca194STan Xiaojun 		b->len = 0;
162ffd3d18cSKim Phillips 		return 0;
163ffd3d18cSKim Phillips 	}
164ffd3d18cSKim Phillips 
165a54ca194STan Xiaojun 	speq->buffer = buffer;
166a54ca194STan Xiaojun 
167a54ca194STan Xiaojun 	/* If the aux_buffer doesn't have data associated, try to load it */
168a54ca194STan Xiaojun 	if (!buffer->data) {
169a54ca194STan Xiaojun 		/* get the file desc associated with the perf data file */
170a54ca194STan Xiaojun 		int fd = perf_data__fd(speq->spe->session->data);
171a54ca194STan Xiaojun 
172a54ca194STan Xiaojun 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
173a54ca194STan Xiaojun 		if (!buffer->data)
174a54ca194STan Xiaojun 			return -ENOMEM;
175a54ca194STan Xiaojun 	}
176a54ca194STan Xiaojun 
177a54ca194STan Xiaojun 	b->len = buffer->size;
178a54ca194STan Xiaojun 	b->buf = buffer->data;
179a54ca194STan Xiaojun 
180a54ca194STan Xiaojun 	if (b->len) {
181a54ca194STan Xiaojun 		if (old_buffer)
182a54ca194STan Xiaojun 			auxtrace_buffer__drop_data(old_buffer);
183a54ca194STan Xiaojun 		speq->old_buffer = buffer;
184a54ca194STan Xiaojun 	} else {
185a54ca194STan Xiaojun 		auxtrace_buffer__drop_data(buffer);
186a54ca194STan Xiaojun 		return arm_spe_get_trace(b, data);
187a54ca194STan Xiaojun 	}
188a54ca194STan Xiaojun 
189a54ca194STan Xiaojun 	return 0;
190a54ca194STan Xiaojun }
191a54ca194STan Xiaojun 
arm_spe__alloc_queue(struct arm_spe * spe,unsigned int queue_nr)192a54ca194STan Xiaojun static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
193a54ca194STan Xiaojun 		unsigned int queue_nr)
194a54ca194STan Xiaojun {
195a54ca194STan Xiaojun 	struct arm_spe_params params = { .get_trace = 0, };
196a54ca194STan Xiaojun 	struct arm_spe_queue *speq;
197a54ca194STan Xiaojun 
198a54ca194STan Xiaojun 	speq = zalloc(sizeof(*speq));
199a54ca194STan Xiaojun 	if (!speq)
200a54ca194STan Xiaojun 		return NULL;
201a54ca194STan Xiaojun 
202a54ca194STan Xiaojun 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
203a54ca194STan Xiaojun 	if (!speq->event_buf)
204a54ca194STan Xiaojun 		goto out_free;
205a54ca194STan Xiaojun 
206a54ca194STan Xiaojun 	speq->spe = spe;
207a54ca194STan Xiaojun 	speq->queue_nr = queue_nr;
208a54ca194STan Xiaojun 	speq->pid = -1;
209a54ca194STan Xiaojun 	speq->tid = -1;
210a54ca194STan Xiaojun 	speq->cpu = -1;
211ff8752d7SGerman Gomez 	speq->period_instructions = 0;
212a54ca194STan Xiaojun 
213a54ca194STan Xiaojun 	/* params set */
214a54ca194STan Xiaojun 	params.get_trace = arm_spe_get_trace;
215a54ca194STan Xiaojun 	params.data = speq;
216a54ca194STan Xiaojun 
217a54ca194STan Xiaojun 	/* create new decoder */
218a54ca194STan Xiaojun 	speq->decoder = arm_spe_decoder_new(&params);
219a54ca194STan Xiaojun 	if (!speq->decoder)
220a54ca194STan Xiaojun 		goto out_free;
221a54ca194STan Xiaojun 
222a54ca194STan Xiaojun 	return speq;
223a54ca194STan Xiaojun 
224a54ca194STan Xiaojun out_free:
225a54ca194STan Xiaojun 	zfree(&speq->event_buf);
226a54ca194STan Xiaojun 	free(speq);
227a54ca194STan Xiaojun 
228a54ca194STan Xiaojun 	return NULL;
229a54ca194STan Xiaojun }
230a54ca194STan Xiaojun 
arm_spe_cpumode(struct arm_spe * spe,u64 ip)231a54ca194STan Xiaojun static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
232a54ca194STan Xiaojun {
233a54ca194STan Xiaojun 	return ip >= spe->kernel_start ?
234a54ca194STan Xiaojun 		PERF_RECORD_MISC_KERNEL :
235a54ca194STan Xiaojun 		PERF_RECORD_MISC_USER;
236a54ca194STan Xiaojun }
237a54ca194STan Xiaojun 
arm_spe_set_pid_tid_cpu(struct arm_spe * spe,struct auxtrace_queue * queue)23827d113cfSGerman Gomez static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
23927d113cfSGerman Gomez 				    struct auxtrace_queue *queue)
24027d113cfSGerman Gomez {
24127d113cfSGerman Gomez 	struct arm_spe_queue *speq = queue->priv;
24227d113cfSGerman Gomez 	pid_t tid;
24327d113cfSGerman Gomez 
24427d113cfSGerman Gomez 	tid = machine__get_current_tid(spe->machine, speq->cpu);
24527d113cfSGerman Gomez 	if (tid != -1) {
24627d113cfSGerman Gomez 		speq->tid = tid;
24727d113cfSGerman Gomez 		thread__zput(speq->thread);
24827d113cfSGerman Gomez 	} else
24927d113cfSGerman Gomez 		speq->tid = queue->tid;
25027d113cfSGerman Gomez 
25127d113cfSGerman Gomez 	if ((!speq->thread) && (speq->tid != -1)) {
25227d113cfSGerman Gomez 		speq->thread = machine__find_thread(spe->machine, -1,
25327d113cfSGerman Gomez 						    speq->tid);
25427d113cfSGerman Gomez 	}
25527d113cfSGerman Gomez 
25627d113cfSGerman Gomez 	if (speq->thread) {
25727d113cfSGerman Gomez 		speq->pid = thread__pid(speq->thread);
25827d113cfSGerman Gomez 		if (queue->cpu == -1)
25927d113cfSGerman Gomez 			speq->cpu = thread__cpu(speq->thread);
26027d113cfSGerman Gomez 	}
26127d113cfSGerman Gomez }
26227d113cfSGerman Gomez 
arm_spe_set_tid(struct arm_spe_queue * speq,pid_t tid)26327d113cfSGerman Gomez static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
26427d113cfSGerman Gomez {
26527d113cfSGerman Gomez 	struct arm_spe *spe = speq->spe;
26627d113cfSGerman Gomez 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
26727d113cfSGerman Gomez 
26827d113cfSGerman Gomez 	if (err)
26927d113cfSGerman Gomez 		return err;
27027d113cfSGerman Gomez 
27127d113cfSGerman Gomez 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
27227d113cfSGerman Gomez 
27327d113cfSGerman Gomez 	return 0;
27427d113cfSGerman Gomez }
27527d113cfSGerman Gomez 
arm_spe__synth_simd_flags(const struct arm_spe_record * record)276*03a6c16eSGerman Gomez static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
277*03a6c16eSGerman Gomez {
278*03a6c16eSGerman Gomez 	struct simd_flags simd_flags = {};
279*03a6c16eSGerman Gomez 
280*03a6c16eSGerman Gomez 	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
281*03a6c16eSGerman Gomez 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
282*03a6c16eSGerman Gomez 
283*03a6c16eSGerman Gomez 	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
284*03a6c16eSGerman Gomez 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
285*03a6c16eSGerman Gomez 
286*03a6c16eSGerman Gomez 	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
287*03a6c16eSGerman Gomez 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
288*03a6c16eSGerman Gomez 
289*03a6c16eSGerman Gomez 	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
290*03a6c16eSGerman Gomez 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
291*03a6c16eSGerman Gomez 
292*03a6c16eSGerman Gomez 	return simd_flags;
293*03a6c16eSGerman Gomez }
294*03a6c16eSGerman Gomez 
arm_spe_prep_sample(struct arm_spe * spe,struct arm_spe_queue * speq,union perf_event * event,struct perf_sample * sample)295a54ca194STan Xiaojun static void arm_spe_prep_sample(struct arm_spe *spe,
296a54ca194STan Xiaojun 				struct arm_spe_queue *speq,
297a54ca194STan Xiaojun 				union perf_event *event,
298a54ca194STan Xiaojun 				struct perf_sample *sample)
299a54ca194STan Xiaojun {
300a54ca194STan Xiaojun 	struct arm_spe_record *record = &speq->decoder->record;
301a54ca194STan Xiaojun 
302a54ca194STan Xiaojun 	if (!spe->timeless_decoding)
30385498f75SLeo Yan 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
304a54ca194STan Xiaojun 
305a54ca194STan Xiaojun 	sample->ip = record->from_ip;
306a54ca194STan Xiaojun 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
307a54ca194STan Xiaojun 	sample->pid = speq->pid;
308a54ca194STan Xiaojun 	sample->tid = speq->tid;
309a54ca194STan Xiaojun 	sample->period = 1;
310a54ca194STan Xiaojun 	sample->cpu = speq->cpu;
311*03a6c16eSGerman Gomez 	sample->simd_flags = arm_spe__synth_simd_flags(record);
312a54ca194STan Xiaojun 
313a54ca194STan Xiaojun 	event->sample.header.type = PERF_RECORD_SAMPLE;
314a54ca194STan Xiaojun 	event->sample.header.misc = sample->cpumode;
315a54ca194STan Xiaojun 	event->sample.header.size = sizeof(struct perf_event_header);
316a54ca194STan Xiaojun }
317a54ca194STan Xiaojun 
arm_spe__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)3189e1a8d9fSGerman Gomez static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
3199e1a8d9fSGerman Gomez {
3209e1a8d9fSGerman Gomez 	event->header.size = perf_event__sample_event_size(sample, type, 0);
3219e1a8d9fSGerman Gomez 	return perf_event__synthesize_sample(event, type, 0, sample);
3229e1a8d9fSGerman Gomez }
3239e1a8d9fSGerman Gomez 
324a54ca194STan Xiaojun static inline int
arm_spe_deliver_synth_event(struct arm_spe * spe,struct arm_spe_queue * speq __maybe_unused,union perf_event * event,struct perf_sample * sample)325a54ca194STan Xiaojun arm_spe_deliver_synth_event(struct arm_spe *spe,
326a54ca194STan Xiaojun 			    struct arm_spe_queue *speq __maybe_unused,
327a54ca194STan Xiaojun 			    union perf_event *event,
328a54ca194STan Xiaojun 			    struct perf_sample *sample)
329a54ca194STan Xiaojun {
330a54ca194STan Xiaojun 	int ret;
331a54ca194STan Xiaojun 
3329e1a8d9fSGerman Gomez 	if (spe->synth_opts.inject) {
3339e1a8d9fSGerman Gomez 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
3349e1a8d9fSGerman Gomez 		if (ret)
3359e1a8d9fSGerman Gomez 			return ret;
3369e1a8d9fSGerman Gomez 	}
3379e1a8d9fSGerman Gomez 
338a54ca194STan Xiaojun 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
339a54ca194STan Xiaojun 	if (ret)
340a54ca194STan Xiaojun 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
341a54ca194STan Xiaojun 
342a54ca194STan Xiaojun 	return ret;
343a54ca194STan Xiaojun }
344a54ca194STan Xiaojun 
arm_spe__synth_mem_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)34554f7815eSLeo Yan static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
346a89dbc9bSLeo Yan 				     u64 spe_events_id, u64 data_src)
347a54ca194STan Xiaojun {
348a54ca194STan Xiaojun 	struct arm_spe *spe = speq->spe;
34954f7815eSLeo Yan 	struct arm_spe_record *record = &speq->decoder->record;
350a54ca194STan Xiaojun 	union perf_event *event = speq->event_buf;
351a54ca194STan Xiaojun 	struct perf_sample sample = { .ip = 0, };
352a54ca194STan Xiaojun 
353a54ca194STan Xiaojun 	arm_spe_prep_sample(spe, speq, event, &sample);
354a54ca194STan Xiaojun 
355a54ca194STan Xiaojun 	sample.id = spe_events_id;
356a54ca194STan Xiaojun 	sample.stream_id = spe_events_id;
35754f7815eSLeo Yan 	sample.addr = record->virt_addr;
35854f7815eSLeo Yan 	sample.phys_addr = record->phys_addr;
359a89dbc9bSLeo Yan 	sample.data_src = data_src;
360b0fde9c6SNamhyung Kim 	sample.weight = record->latency;
36154f7815eSLeo Yan 
36254f7815eSLeo Yan 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
36354f7815eSLeo Yan }
36454f7815eSLeo Yan 
arm_spe__synth_branch_sample(struct arm_spe_queue * speq,u64 spe_events_id)36554f7815eSLeo Yan static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
36654f7815eSLeo Yan 					u64 spe_events_id)
36754f7815eSLeo Yan {
36854f7815eSLeo Yan 	struct arm_spe *spe = speq->spe;
36954f7815eSLeo Yan 	struct arm_spe_record *record = &speq->decoder->record;
37054f7815eSLeo Yan 	union perf_event *event = speq->event_buf;
37154f7815eSLeo Yan 	struct perf_sample sample = { .ip = 0, };
37254f7815eSLeo Yan 
37354f7815eSLeo Yan 	arm_spe_prep_sample(spe, speq, event, &sample);
37454f7815eSLeo Yan 
37554f7815eSLeo Yan 	sample.id = spe_events_id;
37654f7815eSLeo Yan 	sample.stream_id = spe_events_id;
37754f7815eSLeo Yan 	sample.addr = record->to_ip;
378b0fde9c6SNamhyung Kim 	sample.weight = record->latency;
379a54ca194STan Xiaojun 
380a54ca194STan Xiaojun 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
381a54ca194STan Xiaojun }
382a54ca194STan Xiaojun 
arm_spe__synth_instruction_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)383ff8752d7SGerman Gomez static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
384ff8752d7SGerman Gomez 					     u64 spe_events_id, u64 data_src)
385ff8752d7SGerman Gomez {
386ff8752d7SGerman Gomez 	struct arm_spe *spe = speq->spe;
387ff8752d7SGerman Gomez 	struct arm_spe_record *record = &speq->decoder->record;
388ff8752d7SGerman Gomez 	union perf_event *event = speq->event_buf;
389ff8752d7SGerman Gomez 	struct perf_sample sample = { .ip = 0, };
390ff8752d7SGerman Gomez 
391ff8752d7SGerman Gomez 	/*
392ff8752d7SGerman Gomez 	 * Handles perf instruction sampling period.
393ff8752d7SGerman Gomez 	 */
394ff8752d7SGerman Gomez 	speq->period_instructions++;
395ff8752d7SGerman Gomez 	if (speq->period_instructions < spe->instructions_sample_period)
396ff8752d7SGerman Gomez 		return 0;
397ff8752d7SGerman Gomez 	speq->period_instructions = 0;
398ff8752d7SGerman Gomez 
399ff8752d7SGerman Gomez 	arm_spe_prep_sample(spe, speq, event, &sample);
400ff8752d7SGerman Gomez 
401ff8752d7SGerman Gomez 	sample.id = spe_events_id;
402ff8752d7SGerman Gomez 	sample.stream_id = spe_events_id;
403ff8752d7SGerman Gomez 	sample.addr = record->virt_addr;
404ff8752d7SGerman Gomez 	sample.phys_addr = record->phys_addr;
405ff8752d7SGerman Gomez 	sample.data_src = data_src;
406ff8752d7SGerman Gomez 	sample.period = spe->instructions_sample_period;
407ff8752d7SGerman Gomez 	sample.weight = record->latency;
408ff8752d7SGerman Gomez 
409ff8752d7SGerman Gomez 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
410ff8752d7SGerman Gomez }
411ff8752d7SGerman Gomez 
4124e6430cbSAli Saidi static const struct midr_range neoverse_spe[] = {
4134e6430cbSAli Saidi 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
4144e6430cbSAli Saidi 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
4154e6430cbSAli Saidi 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
4164e6430cbSAli Saidi 	{},
4174e6430cbSAli Saidi };
4184e6430cbSAli Saidi 
arm_spe__synth_data_source_neoverse(const struct arm_spe_record * record,union perf_mem_data_src * data_src)4194e6430cbSAli Saidi static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
4204e6430cbSAli Saidi 						union perf_mem_data_src *data_src)
4214e6430cbSAli Saidi {
4224e6430cbSAli Saidi 	/*
4234e6430cbSAli Saidi 	 * Even though four levels of cache hierarchy are possible, no known
4244e6430cbSAli Saidi 	 * production Neoverse systems currently include more than three levels
4254e6430cbSAli Saidi 	 * so for the time being we assume three exist. If a production system
4264e6430cbSAli Saidi 	 * is built with four the this function would have to be changed to
4274e6430cbSAli Saidi 	 * detect the number of levels for reporting.
4284e6430cbSAli Saidi 	 */
4294e6430cbSAli Saidi 
4304e6430cbSAli Saidi 	/*
4314e6430cbSAli Saidi 	 * We have no data on the hit level or data source for stores in the
4324e6430cbSAli Saidi 	 * Neoverse SPE records.
4334e6430cbSAli Saidi 	 */
4340066015aSGerman Gomez 	if (record->op & ARM_SPE_OP_ST) {
4354e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_NA;
4364e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
4374e6430cbSAli Saidi 		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
4384e6430cbSAli Saidi 		return;
4394e6430cbSAli Saidi 	}
4404e6430cbSAli Saidi 
4414e6430cbSAli Saidi 	switch (record->source) {
4424e6430cbSAli Saidi 	case ARM_SPE_NV_L1D:
4434e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
4444e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
4454e6430cbSAli Saidi 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
4464e6430cbSAli Saidi 		break;
4474e6430cbSAli Saidi 	case ARM_SPE_NV_L2:
4484e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
4494e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
4504e6430cbSAli Saidi 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
4514e6430cbSAli Saidi 		break;
4524e6430cbSAli Saidi 	case ARM_SPE_NV_PEER_CORE:
4534e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
4544e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
4554e6430cbSAli Saidi 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
4564e6430cbSAli Saidi 		break;
4574e6430cbSAli Saidi 	/*
4584e6430cbSAli Saidi 	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
4594e6430cbSAli Saidi 	 * transfer, so set SNOOPX_PEER
4604e6430cbSAli Saidi 	 */
4614e6430cbSAli Saidi 	case ARM_SPE_NV_LOCAL_CLUSTER:
4624e6430cbSAli Saidi 	case ARM_SPE_NV_PEER_CLUSTER:
4634e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
4644e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
4654e6430cbSAli Saidi 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
4664e6430cbSAli Saidi 		break;
4674e6430cbSAli Saidi 	/*
4684e6430cbSAli Saidi 	 * System cache is assumed to be L3
4694e6430cbSAli Saidi 	 */
4704e6430cbSAli Saidi 	case ARM_SPE_NV_SYS_CACHE:
4714e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
4724e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
4734e6430cbSAli Saidi 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
4744e6430cbSAli Saidi 		break;
4754e6430cbSAli Saidi 	/*
4764e6430cbSAli Saidi 	 * We don't know what level it hit in, except it came from the other
4774e6430cbSAli Saidi 	 * socket
4784e6430cbSAli Saidi 	 */
4794e6430cbSAli Saidi 	case ARM_SPE_NV_REMOTE:
4804e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
4814e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
4824e6430cbSAli Saidi 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
4834e6430cbSAli Saidi 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
4844e6430cbSAli Saidi 		break;
4854e6430cbSAli Saidi 	case ARM_SPE_NV_DRAM:
4864e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
4874e6430cbSAli Saidi 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
4884e6430cbSAli Saidi 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
4894e6430cbSAli Saidi 		break;
4904e6430cbSAli Saidi 	default:
4914e6430cbSAli Saidi 		break;
4924e6430cbSAli Saidi 	}
4934e6430cbSAli Saidi }
4944e6430cbSAli Saidi 
arm_spe__synth_data_source_generic(const struct arm_spe_record * record,union perf_mem_data_src * data_src)4954e6430cbSAli Saidi static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
4964e6430cbSAli Saidi 					       union perf_mem_data_src *data_src)
4974e6430cbSAli Saidi {
4984e6430cbSAli Saidi 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
4994e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L3;
5004e6430cbSAli Saidi 
5014e6430cbSAli Saidi 		if (record->type & ARM_SPE_LLC_MISS)
5024e6430cbSAli Saidi 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
5034e6430cbSAli Saidi 		else
5044e6430cbSAli Saidi 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
5054e6430cbSAli Saidi 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
5064e6430cbSAli Saidi 		data_src->mem_lvl = PERF_MEM_LVL_L1;
5074e6430cbSAli Saidi 
5084e6430cbSAli Saidi 		if (record->type & ARM_SPE_L1D_MISS)
5094e6430cbSAli Saidi 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
5104e6430cbSAli Saidi 		else
5114e6430cbSAli Saidi 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
5124e6430cbSAli Saidi 	}
5134e6430cbSAli Saidi 
5144e6430cbSAli Saidi 	if (record->type & ARM_SPE_REMOTE_ACCESS)
5154e6430cbSAli Saidi 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
5164e6430cbSAli Saidi }
5174e6430cbSAli Saidi 
arm_spe__synth_data_source(const struct arm_spe_record * record,u64 midr)5184e6430cbSAli Saidi static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
519a89dbc9bSLeo Yan {
5200066015aSGerman Gomez 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
52174a61d53SJing Zhang 	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
522a89dbc9bSLeo Yan 
5230066015aSGerman Gomez 	if (record->op & ARM_SPE_OP_LD)
524a89dbc9bSLeo Yan 		data_src.mem_op = PERF_MEM_OP_LOAD;
5250066015aSGerman Gomez 	else if (record->op & ARM_SPE_OP_ST)
526a89dbc9bSLeo Yan 		data_src.mem_op = PERF_MEM_OP_STORE;
52751ba539fSLeo Yan 	else
52851ba539fSLeo Yan 		return 0;
529a89dbc9bSLeo Yan 
5304e6430cbSAli Saidi 	if (is_neoverse)
5314e6430cbSAli Saidi 		arm_spe__synth_data_source_neoverse(record, &data_src);
532a89dbc9bSLeo Yan 	else
5334e6430cbSAli Saidi 		arm_spe__synth_data_source_generic(record, &data_src);
534a89dbc9bSLeo Yan 
535a89dbc9bSLeo Yan 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
536a89dbc9bSLeo Yan 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
537a89dbc9bSLeo Yan 
538a89dbc9bSLeo Yan 		if (record->type & ARM_SPE_TLB_MISS)
539a89dbc9bSLeo Yan 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
540a89dbc9bSLeo Yan 		else
541a89dbc9bSLeo Yan 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
542a89dbc9bSLeo Yan 	}
543a89dbc9bSLeo Yan 
544a89dbc9bSLeo Yan 	return data_src.val;
545a89dbc9bSLeo Yan }
546a89dbc9bSLeo Yan 
arm_spe_sample(struct arm_spe_queue * speq)547a54ca194STan Xiaojun static int arm_spe_sample(struct arm_spe_queue *speq)
548a54ca194STan Xiaojun {
549a54ca194STan Xiaojun 	const struct arm_spe_record *record = &speq->decoder->record;
550a54ca194STan Xiaojun 	struct arm_spe *spe = speq->spe;
551a89dbc9bSLeo Yan 	u64 data_src;
552a54ca194STan Xiaojun 	int err;
553a54ca194STan Xiaojun 
5544e6430cbSAli Saidi 	data_src = arm_spe__synth_data_source(record, spe->midr);
555a89dbc9bSLeo Yan 
556a54ca194STan Xiaojun 	if (spe->sample_flc) {
557a54ca194STan Xiaojun 		if (record->type & ARM_SPE_L1D_MISS) {
558a89dbc9bSLeo Yan 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
559a89dbc9bSLeo Yan 							data_src);
560a54ca194STan Xiaojun 			if (err)
561a54ca194STan Xiaojun 				return err;
562a54ca194STan Xiaojun 		}
563a54ca194STan Xiaojun 
564a54ca194STan Xiaojun 		if (record->type & ARM_SPE_L1D_ACCESS) {
565a89dbc9bSLeo Yan 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
566a89dbc9bSLeo Yan 							data_src);
567a54ca194STan Xiaojun 			if (err)
568a54ca194STan Xiaojun 				return err;
569a54ca194STan Xiaojun 		}
570a54ca194STan Xiaojun 	}
571a54ca194STan Xiaojun 
572a54ca194STan Xiaojun 	if (spe->sample_llc) {
573a54ca194STan Xiaojun 		if (record->type & ARM_SPE_LLC_MISS) {
574a89dbc9bSLeo Yan 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
575a89dbc9bSLeo Yan 							data_src);
576a54ca194STan Xiaojun 			if (err)
577a54ca194STan Xiaojun 				return err;
578a54ca194STan Xiaojun 		}
579a54ca194STan Xiaojun 
580a54ca194STan Xiaojun 		if (record->type & ARM_SPE_LLC_ACCESS) {
581a89dbc9bSLeo Yan 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
582a89dbc9bSLeo Yan 							data_src);
583a54ca194STan Xiaojun 			if (err)
584a54ca194STan Xiaojun 				return err;
585a54ca194STan Xiaojun 		}
586a54ca194STan Xiaojun 	}
587a54ca194STan Xiaojun 
588a54ca194STan Xiaojun 	if (spe->sample_tlb) {
589a54ca194STan Xiaojun 		if (record->type & ARM_SPE_TLB_MISS) {
590a89dbc9bSLeo Yan 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
591a89dbc9bSLeo Yan 							data_src);
592a54ca194STan Xiaojun 			if (err)
593a54ca194STan Xiaojun 				return err;
594a54ca194STan Xiaojun 		}
595a54ca194STan Xiaojun 
596a54ca194STan Xiaojun 		if (record->type & ARM_SPE_TLB_ACCESS) {
597a89dbc9bSLeo Yan 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
598a89dbc9bSLeo Yan 							data_src);
599a54ca194STan Xiaojun 			if (err)
600a54ca194STan Xiaojun 				return err;
601a54ca194STan Xiaojun 		}
602a54ca194STan Xiaojun 	}
603a54ca194STan Xiaojun 
604a54ca194STan Xiaojun 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
60554f7815eSLeo Yan 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
606a54ca194STan Xiaojun 		if (err)
607a54ca194STan Xiaojun 			return err;
608a54ca194STan Xiaojun 	}
609a54ca194STan Xiaojun 
610a54ca194STan Xiaojun 	if (spe->sample_remote_access &&
611a54ca194STan Xiaojun 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
612a89dbc9bSLeo Yan 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
613a89dbc9bSLeo Yan 						data_src);
614a54ca194STan Xiaojun 		if (err)
615a54ca194STan Xiaojun 			return err;
616a54ca194STan Xiaojun 	}
617a54ca194STan Xiaojun 
61851ba539fSLeo Yan 	/*
61951ba539fSLeo Yan 	 * When data_src is zero it means the record is not a memory operation,
62051ba539fSLeo Yan 	 * skip to synthesize memory sample for this case.
62151ba539fSLeo Yan 	 */
62251ba539fSLeo Yan 	if (spe->sample_memory && data_src) {
623a89dbc9bSLeo Yan 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
624e55ed342SLeo Yan 		if (err)
625e55ed342SLeo Yan 			return err;
626e55ed342SLeo Yan 	}
627e55ed342SLeo Yan 
628ff8752d7SGerman Gomez 	if (spe->sample_instructions) {
629ff8752d7SGerman Gomez 		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
630ff8752d7SGerman Gomez 		if (err)
631ff8752d7SGerman Gomez 			return err;
632ff8752d7SGerman Gomez 	}
633ff8752d7SGerman Gomez 
634a54ca194STan Xiaojun 	return 0;
635a54ca194STan Xiaojun }
636a54ca194STan Xiaojun 
arm_spe_run_decoder(struct arm_spe_queue * speq,u64 * timestamp)637a54ca194STan Xiaojun static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
638a54ca194STan Xiaojun {
639a54ca194STan Xiaojun 	struct arm_spe *spe = speq->spe;
640afb5e9e4SLeo Yan 	struct arm_spe_record *record;
641a54ca194STan Xiaojun 	int ret;
642a54ca194STan Xiaojun 
643a54ca194STan Xiaojun 	if (!spe->kernel_start)
644a54ca194STan Xiaojun 		spe->kernel_start = machine__kernel_start(spe->machine);
645a54ca194STan Xiaojun 
646a54ca194STan Xiaojun 	while (1) {
647afb5e9e4SLeo Yan 		/*
648afb5e9e4SLeo Yan 		 * The usual logic is firstly to decode the packets, and then
649afb5e9e4SLeo Yan 		 * based the record to synthesize sample; but here the flow is
650afb5e9e4SLeo Yan 		 * reversed: it calls arm_spe_sample() for synthesizing samples
651afb5e9e4SLeo Yan 		 * prior to arm_spe_decode().
652afb5e9e4SLeo Yan 		 *
653afb5e9e4SLeo Yan 		 * Two reasons for this code logic:
654afb5e9e4SLeo Yan 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
655afb5e9e4SLeo Yan 		 * has decoded trace data and generated a record, but the record
656afb5e9e4SLeo Yan 		 * is left to generate sample until run to here, so it's correct
657afb5e9e4SLeo Yan 		 * to synthesize sample for the left record.
658afb5e9e4SLeo Yan 		 * 2. After decoding trace data, it needs to compare the record
659afb5e9e4SLeo Yan 		 * timestamp with the coming perf event, if the record timestamp
660afb5e9e4SLeo Yan 		 * is later than the perf event, it needs bail out and pushs the
661afb5e9e4SLeo Yan 		 * record into auxtrace heap, thus the record can be deferred to
662afb5e9e4SLeo Yan 		 * synthesize sample until run to here at the next time; so this
663afb5e9e4SLeo Yan 		 * can correlate samples between Arm SPE trace data and other
664afb5e9e4SLeo Yan 		 * perf events with correct time ordering.
665afb5e9e4SLeo Yan 		 */
66627d113cfSGerman Gomez 
66727d113cfSGerman Gomez 		/*
66827d113cfSGerman Gomez 		 * Update pid/tid info.
66927d113cfSGerman Gomez 		 */
67027d113cfSGerman Gomez 		record = &speq->decoder->record;
67127d113cfSGerman Gomez 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
67227d113cfSGerman Gomez 			ret = arm_spe_set_tid(speq, record->context_id);
67327d113cfSGerman Gomez 			if (ret)
67427d113cfSGerman Gomez 				return ret;
67527d113cfSGerman Gomez 
67627d113cfSGerman Gomez 			spe->use_ctx_pkt_for_pid = true;
67727d113cfSGerman Gomez 		}
67827d113cfSGerman Gomez 
679afb5e9e4SLeo Yan 		ret = arm_spe_sample(speq);
680afb5e9e4SLeo Yan 		if (ret)
681afb5e9e4SLeo Yan 			return ret;
682afb5e9e4SLeo Yan 
683a54ca194STan Xiaojun 		ret = arm_spe_decode(speq->decoder);
684a54ca194STan Xiaojun 		if (!ret) {
685a54ca194STan Xiaojun 			pr_debug("No data or all data has been processed.\n");
686a54ca194STan Xiaojun 			return 1;
687a54ca194STan Xiaojun 		}
688a54ca194STan Xiaojun 
689a54ca194STan Xiaojun 		/*
690a54ca194STan Xiaojun 		 * Error is detected when decode SPE trace data, continue to
691a54ca194STan Xiaojun 		 * the next trace data and find out more records.
692a54ca194STan Xiaojun 		 */
693a54ca194STan Xiaojun 		if (ret < 0)
694a54ca194STan Xiaojun 			continue;
695a54ca194STan Xiaojun 
696afb5e9e4SLeo Yan 		record = &speq->decoder->record;
697a54ca194STan Xiaojun 
698afb5e9e4SLeo Yan 		/* Update timestamp for the last record */
699afb5e9e4SLeo Yan 		if (record->timestamp > speq->timestamp)
700afb5e9e4SLeo Yan 			speq->timestamp = record->timestamp;
701afb5e9e4SLeo Yan 
702afb5e9e4SLeo Yan 		/*
703afb5e9e4SLeo Yan 		 * If the timestamp of the queue is later than timestamp of the
704afb5e9e4SLeo Yan 		 * coming perf event, bail out so can allow the perf event to
705afb5e9e4SLeo Yan 		 * be processed ahead.
706afb5e9e4SLeo Yan 		 */
707a54ca194STan Xiaojun 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
708a54ca194STan Xiaojun 			*timestamp = speq->timestamp;
709a54ca194STan Xiaojun 			return 0;
710a54ca194STan Xiaojun 		}
711a54ca194STan Xiaojun 	}
712a54ca194STan Xiaojun 
713a54ca194STan Xiaojun 	return 0;
714a54ca194STan Xiaojun }
715a54ca194STan Xiaojun 
arm_spe__setup_queue(struct arm_spe * spe,struct auxtrace_queue * queue,unsigned int queue_nr)716a54ca194STan Xiaojun static int arm_spe__setup_queue(struct arm_spe *spe,
717a54ca194STan Xiaojun 			       struct auxtrace_queue *queue,
718a54ca194STan Xiaojun 			       unsigned int queue_nr)
719a54ca194STan Xiaojun {
720a54ca194STan Xiaojun 	struct arm_spe_queue *speq = queue->priv;
721a54ca194STan Xiaojun 	struct arm_spe_record *record;
722a54ca194STan Xiaojun 
723a54ca194STan Xiaojun 	if (list_empty(&queue->head) || speq)
724a54ca194STan Xiaojun 		return 0;
725a54ca194STan Xiaojun 
726a54ca194STan Xiaojun 	speq = arm_spe__alloc_queue(spe, queue_nr);
727a54ca194STan Xiaojun 
728a54ca194STan Xiaojun 	if (!speq)
729a54ca194STan Xiaojun 		return -ENOMEM;
730a54ca194STan Xiaojun 
731a54ca194STan Xiaojun 	queue->priv = speq;
732a54ca194STan Xiaojun 
733a54ca194STan Xiaojun 	if (queue->cpu != -1)
734a54ca194STan Xiaojun 		speq->cpu = queue->cpu;
735a54ca194STan Xiaojun 
736a54ca194STan Xiaojun 	if (!speq->on_heap) {
737a54ca194STan Xiaojun 		int ret;
738a54ca194STan Xiaojun 
739a54ca194STan Xiaojun 		if (spe->timeless_decoding)
740a54ca194STan Xiaojun 			return 0;
741a54ca194STan Xiaojun 
742a54ca194STan Xiaojun retry:
743a54ca194STan Xiaojun 		ret = arm_spe_decode(speq->decoder);
744a54ca194STan Xiaojun 
745a54ca194STan Xiaojun 		if (!ret)
746a54ca194STan Xiaojun 			return 0;
747a54ca194STan Xiaojun 
748a54ca194STan Xiaojun 		if (ret < 0)
749a54ca194STan Xiaojun 			goto retry;
750a54ca194STan Xiaojun 
751a54ca194STan Xiaojun 		record = &speq->decoder->record;
752a54ca194STan Xiaojun 
753a54ca194STan Xiaojun 		speq->timestamp = record->timestamp;
754a54ca194STan Xiaojun 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
755a54ca194STan Xiaojun 		if (ret)
756a54ca194STan Xiaojun 			return ret;
757a54ca194STan Xiaojun 		speq->on_heap = true;
758a54ca194STan Xiaojun 	}
759a54ca194STan Xiaojun 
760a54ca194STan Xiaojun 	return 0;
761a54ca194STan Xiaojun }
762a54ca194STan Xiaojun 
arm_spe__setup_queues(struct arm_spe * spe)763a54ca194STan Xiaojun static int arm_spe__setup_queues(struct arm_spe *spe)
764a54ca194STan Xiaojun {
765a54ca194STan Xiaojun 	unsigned int i;
766a54ca194STan Xiaojun 	int ret;
767a54ca194STan Xiaojun 
768a54ca194STan Xiaojun 	for (i = 0; i < spe->queues.nr_queues; i++) {
769a54ca194STan Xiaojun 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
770a54ca194STan Xiaojun 		if (ret)
771a54ca194STan Xiaojun 			return ret;
772a54ca194STan Xiaojun 	}
773a54ca194STan Xiaojun 
774a54ca194STan Xiaojun 	return 0;
775a54ca194STan Xiaojun }
776a54ca194STan Xiaojun 
arm_spe__update_queues(struct arm_spe * spe)777a54ca194STan Xiaojun static int arm_spe__update_queues(struct arm_spe *spe)
778a54ca194STan Xiaojun {
779a54ca194STan Xiaojun 	if (spe->queues.new_data) {
780a54ca194STan Xiaojun 		spe->queues.new_data = false;
781a54ca194STan Xiaojun 		return arm_spe__setup_queues(spe);
782a54ca194STan Xiaojun 	}
783a54ca194STan Xiaojun 
784a54ca194STan Xiaojun 	return 0;
785a54ca194STan Xiaojun }
786a54ca194STan Xiaojun 
arm_spe__is_timeless_decoding(struct arm_spe * spe)787a54ca194STan Xiaojun static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
788a54ca194STan Xiaojun {
789a54ca194STan Xiaojun 	struct evsel *evsel;
790a54ca194STan Xiaojun 	struct evlist *evlist = spe->session->evlist;
791a54ca194STan Xiaojun 	bool timeless_decoding = true;
792a54ca194STan Xiaojun 
793a54ca194STan Xiaojun 	/*
794a54ca194STan Xiaojun 	 * Circle through the list of event and complain if we find one
795a54ca194STan Xiaojun 	 * with the time bit set.
796a54ca194STan Xiaojun 	 */
797a54ca194STan Xiaojun 	evlist__for_each_entry(evlist, evsel) {
798a54ca194STan Xiaojun 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
799a54ca194STan Xiaojun 			timeless_decoding = false;
800a54ca194STan Xiaojun 	}
801a54ca194STan Xiaojun 
802a54ca194STan Xiaojun 	return timeless_decoding;
803a54ca194STan Xiaojun }
804a54ca194STan Xiaojun 
arm_spe_process_queues(struct arm_spe * spe,u64 timestamp)805a54ca194STan Xiaojun static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
806a54ca194STan Xiaojun {
807a54ca194STan Xiaojun 	unsigned int queue_nr;
808a54ca194STan Xiaojun 	u64 ts;
809a54ca194STan Xiaojun 	int ret;
810a54ca194STan Xiaojun 
811a54ca194STan Xiaojun 	while (1) {
812a54ca194STan Xiaojun 		struct auxtrace_queue *queue;
813a54ca194STan Xiaojun 		struct arm_spe_queue *speq;
814a54ca194STan Xiaojun 
815a54ca194STan Xiaojun 		if (!spe->heap.heap_cnt)
816a54ca194STan Xiaojun 			return 0;
817a54ca194STan Xiaojun 
818a54ca194STan Xiaojun 		if (spe->heap.heap_array[0].ordinal >= timestamp)
819a54ca194STan Xiaojun 			return 0;
820a54ca194STan Xiaojun 
821a54ca194STan Xiaojun 		queue_nr = spe->heap.heap_array[0].queue_nr;
822a54ca194STan Xiaojun 		queue = &spe->queues.queue_array[queue_nr];
823a54ca194STan Xiaojun 		speq = queue->priv;
824a54ca194STan Xiaojun 
825a54ca194STan Xiaojun 		auxtrace_heap__pop(&spe->heap);
826a54ca194STan Xiaojun 
827a54ca194STan Xiaojun 		if (spe->heap.heap_cnt) {
828a54ca194STan Xiaojun 			ts = spe->heap.heap_array[0].ordinal + 1;
829a54ca194STan Xiaojun 			if (ts > timestamp)
830a54ca194STan Xiaojun 				ts = timestamp;
831a54ca194STan Xiaojun 		} else {
832a54ca194STan Xiaojun 			ts = timestamp;
833a54ca194STan Xiaojun 		}
834a54ca194STan Xiaojun 
83527d113cfSGerman Gomez 		/*
83627d113cfSGerman Gomez 		 * A previous context-switch event has set pid/tid in the machine's context, so
83727d113cfSGerman Gomez 		 * here we need to update the pid/tid in the thread and SPE queue.
83827d113cfSGerman Gomez 		 */
83927d113cfSGerman Gomez 		if (!spe->use_ctx_pkt_for_pid)
840a54ca194STan Xiaojun 			arm_spe_set_pid_tid_cpu(spe, queue);
841a54ca194STan Xiaojun 
842a54ca194STan Xiaojun 		ret = arm_spe_run_decoder(speq, &ts);
843a54ca194STan Xiaojun 		if (ret < 0) {
844a54ca194STan Xiaojun 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
845a54ca194STan Xiaojun 			return ret;
846a54ca194STan Xiaojun 		}
847a54ca194STan Xiaojun 
848a54ca194STan Xiaojun 		if (!ret) {
849a54ca194STan Xiaojun 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
850a54ca194STan Xiaojun 			if (ret < 0)
851a54ca194STan Xiaojun 				return ret;
852a54ca194STan Xiaojun 		} else {
853a54ca194STan Xiaojun 			speq->on_heap = false;
854a54ca194STan Xiaojun 		}
855a54ca194STan Xiaojun 	}
856a54ca194STan Xiaojun 
857a54ca194STan Xiaojun 	return 0;
858a54ca194STan Xiaojun }
859a54ca194STan Xiaojun 
arm_spe_process_timeless_queues(struct arm_spe * spe,pid_t tid,u64 time_)860a54ca194STan Xiaojun static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
861a54ca194STan Xiaojun 					    u64 time_)
862a54ca194STan Xiaojun {
863a54ca194STan Xiaojun 	struct auxtrace_queues *queues = &spe->queues;
864a54ca194STan Xiaojun 	unsigned int i;
865a54ca194STan Xiaojun 	u64 ts = 0;
866a54ca194STan Xiaojun 
867a54ca194STan Xiaojun 	for (i = 0; i < queues->nr_queues; i++) {
868a54ca194STan Xiaojun 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
869a54ca194STan Xiaojun 		struct arm_spe_queue *speq = queue->priv;
870a54ca194STan Xiaojun 
871a54ca194STan Xiaojun 		if (speq && (tid == -1 || speq->tid == tid)) {
872a54ca194STan Xiaojun 			speq->time = time_;
873a54ca194STan Xiaojun 			arm_spe_set_pid_tid_cpu(spe, queue);
874a54ca194STan Xiaojun 			arm_spe_run_decoder(speq, &ts);
875a54ca194STan Xiaojun 		}
876a54ca194STan Xiaojun 	}
877a54ca194STan Xiaojun 	return 0;
878a54ca194STan Xiaojun }
879a54ca194STan Xiaojun 
arm_spe_context_switch(struct arm_spe * spe,union perf_event * event,struct perf_sample * sample)8809dc9855fSNamhyung Kim static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
8819dc9855fSNamhyung Kim 				  struct perf_sample *sample)
8829dc9855fSNamhyung Kim {
8839dc9855fSNamhyung Kim 	pid_t pid, tid;
8849dc9855fSNamhyung Kim 	int cpu;
8859dc9855fSNamhyung Kim 
8869dc9855fSNamhyung Kim 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
8879dc9855fSNamhyung Kim 		return 0;
8889dc9855fSNamhyung Kim 
8899dc9855fSNamhyung Kim 	pid = event->context_switch.next_prev_pid;
8909dc9855fSNamhyung Kim 	tid = event->context_switch.next_prev_tid;
8919dc9855fSNamhyung Kim 	cpu = sample->cpu;
8929dc9855fSNamhyung Kim 
8939dc9855fSNamhyung Kim 	if (tid == -1)
8949dc9855fSNamhyung Kim 		pr_warning("context_switch event has no tid\n");
8959dc9855fSNamhyung Kim 
8969dc9855fSNamhyung Kim 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
8979dc9855fSNamhyung Kim }
8989dc9855fSNamhyung Kim 
arm_spe_process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,struct perf_tool * tool)899a54ca194STan Xiaojun static int arm_spe_process_event(struct perf_session *session,
900a54ca194STan Xiaojun 				 union perf_event *event,
901a54ca194STan Xiaojun 				 struct perf_sample *sample,
902a54ca194STan Xiaojun 				 struct perf_tool *tool)
903a54ca194STan Xiaojun {
904a54ca194STan Xiaojun 	int err = 0;
905a54ca194STan Xiaojun 	u64 timestamp;
906a54ca194STan Xiaojun 	struct arm_spe *spe = container_of(session->auxtrace,
907a54ca194STan Xiaojun 			struct arm_spe, auxtrace);
908a54ca194STan Xiaojun 
909a54ca194STan Xiaojun 	if (dump_trace)
910a54ca194STan Xiaojun 		return 0;
911a54ca194STan Xiaojun 
912a54ca194STan Xiaojun 	if (!tool->ordered_events) {
913a54ca194STan Xiaojun 		pr_err("SPE trace requires ordered events\n");
914a54ca194STan Xiaojun 		return -EINVAL;
915a54ca194STan Xiaojun 	}
916a54ca194STan Xiaojun 
917a54ca194STan Xiaojun 	if (sample->time && (sample->time != (u64) -1))
91863051901SLeo Yan 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
919a54ca194STan Xiaojun 	else
920a54ca194STan Xiaojun 		timestamp = 0;
921a54ca194STan Xiaojun 
922a54ca194STan Xiaojun 	if (timestamp || spe->timeless_decoding) {
923a54ca194STan Xiaojun 		err = arm_spe__update_queues(spe);
924a54ca194STan Xiaojun 		if (err)
925a54ca194STan Xiaojun 			return err;
926a54ca194STan Xiaojun 	}
927a54ca194STan Xiaojun 
928a54ca194STan Xiaojun 	if (spe->timeless_decoding) {
929a54ca194STan Xiaojun 		if (event->header.type == PERF_RECORD_EXIT) {
930a54ca194STan Xiaojun 			err = arm_spe_process_timeless_queues(spe,
931a54ca194STan Xiaojun 					event->fork.tid,
932a54ca194STan Xiaojun 					sample->time);
933a54ca194STan Xiaojun 		}
934a54ca194STan Xiaojun 	} else if (timestamp) {
935a54ca194STan Xiaojun 		err = arm_spe_process_queues(spe, timestamp);
9369dc9855fSNamhyung Kim 		if (err)
9379dc9855fSNamhyung Kim 			return err;
9389dc9855fSNamhyung Kim 
93927d113cfSGerman Gomez 		if (!spe->use_ctx_pkt_for_pid &&
94027d113cfSGerman Gomez 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
94127d113cfSGerman Gomez 		    event->header.type == PERF_RECORD_SWITCH))
9429dc9855fSNamhyung Kim 			err = arm_spe_context_switch(spe, event, sample);
943a54ca194STan Xiaojun 	}
944a54ca194STan Xiaojun 
945a54ca194STan Xiaojun 	return err;
946a54ca194STan Xiaojun }
947a54ca194STan Xiaojun 
arm_spe_process_auxtrace_event(struct perf_session * session,union perf_event * event,struct perf_tool * tool __maybe_unused)948ffd3d18cSKim Phillips static int arm_spe_process_auxtrace_event(struct perf_session *session,
949ffd3d18cSKim Phillips 					  union perf_event *event,
950ffd3d18cSKim Phillips 					  struct perf_tool *tool __maybe_unused)
951ffd3d18cSKim Phillips {
952ffd3d18cSKim Phillips 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
953ffd3d18cSKim Phillips 					     auxtrace);
954a54ca194STan Xiaojun 
955a54ca194STan Xiaojun 	if (!spe->data_queued) {
956ffd3d18cSKim Phillips 		struct auxtrace_buffer *buffer;
957ffd3d18cSKim Phillips 		off_t data_offset;
958ffd3d18cSKim Phillips 		int fd = perf_data__fd(session->data);
959ffd3d18cSKim Phillips 		int err;
960ffd3d18cSKim Phillips 
961ffd3d18cSKim Phillips 		if (perf_data__is_pipe(session->data)) {
962ffd3d18cSKim Phillips 			data_offset = 0;
963ffd3d18cSKim Phillips 		} else {
964ffd3d18cSKim Phillips 			data_offset = lseek(fd, 0, SEEK_CUR);
965ffd3d18cSKim Phillips 			if (data_offset == -1)
966ffd3d18cSKim Phillips 				return -errno;
967ffd3d18cSKim Phillips 		}
968ffd3d18cSKim Phillips 
969ffd3d18cSKim Phillips 		err = auxtrace_queues__add_event(&spe->queues, session, event,
970ffd3d18cSKim Phillips 				data_offset, &buffer);
971ffd3d18cSKim Phillips 		if (err)
972ffd3d18cSKim Phillips 			return err;
973ffd3d18cSKim Phillips 
974ffd3d18cSKim Phillips 		/* Dump here now we have copied a piped trace out of the pipe */
975ffd3d18cSKim Phillips 		if (dump_trace) {
976ffd3d18cSKim Phillips 			if (auxtrace_buffer__get_data(buffer, fd)) {
977ffd3d18cSKim Phillips 				arm_spe_dump_event(spe, buffer->data,
978ffd3d18cSKim Phillips 						buffer->size);
979ffd3d18cSKim Phillips 				auxtrace_buffer__put_data(buffer);
980ffd3d18cSKim Phillips 			}
981ffd3d18cSKim Phillips 		}
982a54ca194STan Xiaojun 	}
983ffd3d18cSKim Phillips 
984ffd3d18cSKim Phillips 	return 0;
985ffd3d18cSKim Phillips }
986ffd3d18cSKim Phillips 
arm_spe_flush(struct perf_session * session __maybe_unused,struct perf_tool * tool __maybe_unused)987ffd3d18cSKim Phillips static int arm_spe_flush(struct perf_session *session __maybe_unused,
988ffd3d18cSKim Phillips 			 struct perf_tool *tool __maybe_unused)
989ffd3d18cSKim Phillips {
990a54ca194STan Xiaojun 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
991a54ca194STan Xiaojun 			auxtrace);
992a54ca194STan Xiaojun 	int ret;
993a54ca194STan Xiaojun 
994a54ca194STan Xiaojun 	if (dump_trace)
995ffd3d18cSKim Phillips 		return 0;
996a54ca194STan Xiaojun 
997a54ca194STan Xiaojun 	if (!tool->ordered_events)
998a54ca194STan Xiaojun 		return -EINVAL;
999a54ca194STan Xiaojun 
1000a54ca194STan Xiaojun 	ret = arm_spe__update_queues(spe);
1001a54ca194STan Xiaojun 	if (ret < 0)
1002a54ca194STan Xiaojun 		return ret;
1003a54ca194STan Xiaojun 
1004a54ca194STan Xiaojun 	if (spe->timeless_decoding)
1005a54ca194STan Xiaojun 		return arm_spe_process_timeless_queues(spe, -1,
1006a54ca194STan Xiaojun 				MAX_TIMESTAMP - 1);
1007a54ca194STan Xiaojun 
100827d113cfSGerman Gomez 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
100927d113cfSGerman Gomez 	if (ret)
101027d113cfSGerman Gomez 		return ret;
101127d113cfSGerman Gomez 
101227d113cfSGerman Gomez 	if (!spe->use_ctx_pkt_for_pid)
101327d113cfSGerman Gomez 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
101427d113cfSGerman Gomez 			    "Matching of TIDs to SPE events could be inaccurate.\n");
101527d113cfSGerman Gomez 
101627d113cfSGerman Gomez 	return 0;
1017ffd3d18cSKim Phillips }
1018ffd3d18cSKim Phillips 
arm_spe_free_queue(void * priv)1019ffd3d18cSKim Phillips static void arm_spe_free_queue(void *priv)
1020ffd3d18cSKim Phillips {
1021ffd3d18cSKim Phillips 	struct arm_spe_queue *speq = priv;
1022ffd3d18cSKim Phillips 
1023ffd3d18cSKim Phillips 	if (!speq)
1024ffd3d18cSKim Phillips 		return;
1025a54ca194STan Xiaojun 	thread__zput(speq->thread);
1026a54ca194STan Xiaojun 	arm_spe_decoder_free(speq->decoder);
1027a54ca194STan Xiaojun 	zfree(&speq->event_buf);
1028ffd3d18cSKim Phillips 	free(speq);
1029ffd3d18cSKim Phillips }
1030ffd3d18cSKim Phillips 
arm_spe_free_events(struct perf_session * session)1031ffd3d18cSKim Phillips static void arm_spe_free_events(struct perf_session *session)
1032ffd3d18cSKim Phillips {
1033ffd3d18cSKim Phillips 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1034ffd3d18cSKim Phillips 					     auxtrace);
1035ffd3d18cSKim Phillips 	struct auxtrace_queues *queues = &spe->queues;
1036ffd3d18cSKim Phillips 	unsigned int i;
1037ffd3d18cSKim Phillips 
1038ffd3d18cSKim Phillips 	for (i = 0; i < queues->nr_queues; i++) {
1039ffd3d18cSKim Phillips 		arm_spe_free_queue(queues->queue_array[i].priv);
1040ffd3d18cSKim Phillips 		queues->queue_array[i].priv = NULL;
1041ffd3d18cSKim Phillips 	}
1042ffd3d18cSKim Phillips 	auxtrace_queues__free(queues);
1043ffd3d18cSKim Phillips }
1044ffd3d18cSKim Phillips 
arm_spe_free(struct perf_session * session)1045ffd3d18cSKim Phillips static void arm_spe_free(struct perf_session *session)
1046ffd3d18cSKim Phillips {
1047ffd3d18cSKim Phillips 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1048ffd3d18cSKim Phillips 					     auxtrace);
1049ffd3d18cSKim Phillips 
1050ffd3d18cSKim Phillips 	auxtrace_heap__free(&spe->heap);
1051ffd3d18cSKim Phillips 	arm_spe_free_events(session);
1052ffd3d18cSKim Phillips 	session->auxtrace = NULL;
1053ffd3d18cSKim Phillips 	free(spe);
1054ffd3d18cSKim Phillips }
1055ffd3d18cSKim Phillips 
arm_spe_evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)1056508c71e3SAdrian Hunter static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1057508c71e3SAdrian Hunter 				      struct evsel *evsel)
1058508c71e3SAdrian Hunter {
1059508c71e3SAdrian Hunter 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1060508c71e3SAdrian Hunter 
1061508c71e3SAdrian Hunter 	return evsel->core.attr.type == spe->pmu_type;
1062508c71e3SAdrian Hunter }
1063508c71e3SAdrian Hunter 
1064ffd3d18cSKim Phillips static const char * const arm_spe_info_fmts[] = {
1065ffd3d18cSKim Phillips 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
1066ffd3d18cSKim Phillips };
1067ffd3d18cSKim Phillips 
arm_spe_print_info(__u64 * arr)10689a8dad04SJiri Olsa static void arm_spe_print_info(__u64 *arr)
1069ffd3d18cSKim Phillips {
1070ffd3d18cSKim Phillips 	if (!dump_trace)
1071ffd3d18cSKim Phillips 		return;
1072ffd3d18cSKim Phillips 
1073ffd3d18cSKim Phillips 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
1074ffd3d18cSKim Phillips }
1075ffd3d18cSKim Phillips 
1076a54ca194STan Xiaojun struct arm_spe_synth {
1077a54ca194STan Xiaojun 	struct perf_tool dummy_tool;
1078a54ca194STan Xiaojun 	struct perf_session *session;
1079a54ca194STan Xiaojun };
1080a54ca194STan Xiaojun 
arm_spe_event_synth(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1081a54ca194STan Xiaojun static int arm_spe_event_synth(struct perf_tool *tool,
1082a54ca194STan Xiaojun 			       union perf_event *event,
1083a54ca194STan Xiaojun 			       struct perf_sample *sample __maybe_unused,
1084a54ca194STan Xiaojun 			       struct machine *machine __maybe_unused)
1085a54ca194STan Xiaojun {
1086a54ca194STan Xiaojun 	struct arm_spe_synth *arm_spe_synth =
1087a54ca194STan Xiaojun 		      container_of(tool, struct arm_spe_synth, dummy_tool);
1088a54ca194STan Xiaojun 
1089a54ca194STan Xiaojun 	return perf_session__deliver_synth_event(arm_spe_synth->session,
1090a54ca194STan Xiaojun 						 event, NULL);
1091a54ca194STan Xiaojun }
1092a54ca194STan Xiaojun 
arm_spe_synth_event(struct perf_session * session,struct perf_event_attr * attr,u64 id)1093a54ca194STan Xiaojun static int arm_spe_synth_event(struct perf_session *session,
1094a54ca194STan Xiaojun 			       struct perf_event_attr *attr, u64 id)
1095a54ca194STan Xiaojun {
1096a54ca194STan Xiaojun 	struct arm_spe_synth arm_spe_synth;
1097a54ca194STan Xiaojun 
1098a54ca194STan Xiaojun 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
1099a54ca194STan Xiaojun 	arm_spe_synth.session = session;
1100a54ca194STan Xiaojun 
1101a54ca194STan Xiaojun 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
1102a54ca194STan Xiaojun 					   &id, arm_spe_event_synth);
1103a54ca194STan Xiaojun }
1104a54ca194STan Xiaojun 
arm_spe_set_event_name(struct evlist * evlist,u64 id,const char * name)1105a54ca194STan Xiaojun static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1106a54ca194STan Xiaojun 				    const char *name)
1107a54ca194STan Xiaojun {
1108a54ca194STan Xiaojun 	struct evsel *evsel;
1109a54ca194STan Xiaojun 
1110a54ca194STan Xiaojun 	evlist__for_each_entry(evlist, evsel) {
1111a54ca194STan Xiaojun 		if (evsel->core.id && evsel->core.id[0] == id) {
1112a54ca194STan Xiaojun 			if (evsel->name)
1113a54ca194STan Xiaojun 				zfree(&evsel->name);
1114a54ca194STan Xiaojun 			evsel->name = strdup(name);
1115a54ca194STan Xiaojun 			break;
1116a54ca194STan Xiaojun 		}
1117a54ca194STan Xiaojun 	}
1118a54ca194STan Xiaojun }
1119a54ca194STan Xiaojun 
1120a54ca194STan Xiaojun static int
arm_spe_synth_events(struct arm_spe * spe,struct perf_session * session)1121a54ca194STan Xiaojun arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1122a54ca194STan Xiaojun {
1123a54ca194STan Xiaojun 	struct evlist *evlist = session->evlist;
1124a54ca194STan Xiaojun 	struct evsel *evsel;
1125a54ca194STan Xiaojun 	struct perf_event_attr attr;
1126a54ca194STan Xiaojun 	bool found = false;
1127a54ca194STan Xiaojun 	u64 id;
1128a54ca194STan Xiaojun 	int err;
1129a54ca194STan Xiaojun 
1130a54ca194STan Xiaojun 	evlist__for_each_entry(evlist, evsel) {
1131a54ca194STan Xiaojun 		if (evsel->core.attr.type == spe->pmu_type) {
1132a54ca194STan Xiaojun 			found = true;
1133a54ca194STan Xiaojun 			break;
1134a54ca194STan Xiaojun 		}
1135a54ca194STan Xiaojun 	}
1136a54ca194STan Xiaojun 
1137a54ca194STan Xiaojun 	if (!found) {
1138a54ca194STan Xiaojun 		pr_debug("No selected events with SPE trace data\n");
1139a54ca194STan Xiaojun 		return 0;
1140a54ca194STan Xiaojun 	}
1141a54ca194STan Xiaojun 
1142a54ca194STan Xiaojun 	memset(&attr, 0, sizeof(struct perf_event_attr));
1143a54ca194STan Xiaojun 	attr.size = sizeof(struct perf_event_attr);
1144a54ca194STan Xiaojun 	attr.type = PERF_TYPE_HARDWARE;
11457599b70aSTimothy Hayes 	attr.sample_type = evsel->core.attr.sample_type &
11467599b70aSTimothy Hayes 				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1147a54ca194STan Xiaojun 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1148b0fde9c6SNamhyung Kim 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
11494e13f670STimothy Hayes 			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1150a54ca194STan Xiaojun 	if (spe->timeless_decoding)
1151a54ca194STan Xiaojun 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1152a54ca194STan Xiaojun 	else
1153a54ca194STan Xiaojun 		attr.sample_type |= PERF_SAMPLE_TIME;
1154a54ca194STan Xiaojun 
11559e1a8d9fSGerman Gomez 	spe->sample_type = attr.sample_type;
11569e1a8d9fSGerman Gomez 
1157a54ca194STan Xiaojun 	attr.exclude_user = evsel->core.attr.exclude_user;
1158a54ca194STan Xiaojun 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1159a54ca194STan Xiaojun 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1160a54ca194STan Xiaojun 	attr.exclude_host = evsel->core.attr.exclude_host;
1161a54ca194STan Xiaojun 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1162a54ca194STan Xiaojun 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1163a54ca194STan Xiaojun 	attr.read_format = evsel->core.attr.read_format;
1164a54ca194STan Xiaojun 
1165a54ca194STan Xiaojun 	/* create new id val to be a fixed offset from evsel id */
1166a54ca194STan Xiaojun 	id = evsel->core.id[0] + 1000000000;
1167a54ca194STan Xiaojun 
1168a54ca194STan Xiaojun 	if (!id)
1169a54ca194STan Xiaojun 		id = 1;
1170a54ca194STan Xiaojun 
1171a54ca194STan Xiaojun 	if (spe->synth_opts.flc) {
1172a54ca194STan Xiaojun 		spe->sample_flc = true;
1173a54ca194STan Xiaojun 
1174a54ca194STan Xiaojun 		/* Level 1 data cache miss */
1175a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1176a54ca194STan Xiaojun 		if (err)
1177a54ca194STan Xiaojun 			return err;
1178a54ca194STan Xiaojun 		spe->l1d_miss_id = id;
1179a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1180a54ca194STan Xiaojun 		id += 1;
1181a54ca194STan Xiaojun 
1182a54ca194STan Xiaojun 		/* Level 1 data cache access */
1183a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1184a54ca194STan Xiaojun 		if (err)
1185a54ca194STan Xiaojun 			return err;
1186a54ca194STan Xiaojun 		spe->l1d_access_id = id;
1187a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "l1d-access");
1188a54ca194STan Xiaojun 		id += 1;
1189a54ca194STan Xiaojun 	}
1190a54ca194STan Xiaojun 
1191a54ca194STan Xiaojun 	if (spe->synth_opts.llc) {
1192a54ca194STan Xiaojun 		spe->sample_llc = true;
1193a54ca194STan Xiaojun 
1194a54ca194STan Xiaojun 		/* Last level cache miss */
1195a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1196a54ca194STan Xiaojun 		if (err)
1197a54ca194STan Xiaojun 			return err;
1198a54ca194STan Xiaojun 		spe->llc_miss_id = id;
1199a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "llc-miss");
1200a54ca194STan Xiaojun 		id += 1;
1201a54ca194STan Xiaojun 
1202a54ca194STan Xiaojun 		/* Last level cache access */
1203a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1204a54ca194STan Xiaojun 		if (err)
1205a54ca194STan Xiaojun 			return err;
1206a54ca194STan Xiaojun 		spe->llc_access_id = id;
1207a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "llc-access");
1208a54ca194STan Xiaojun 		id += 1;
1209a54ca194STan Xiaojun 	}
1210a54ca194STan Xiaojun 
1211a54ca194STan Xiaojun 	if (spe->synth_opts.tlb) {
1212a54ca194STan Xiaojun 		spe->sample_tlb = true;
1213a54ca194STan Xiaojun 
1214a54ca194STan Xiaojun 		/* TLB miss */
1215a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1216a54ca194STan Xiaojun 		if (err)
1217a54ca194STan Xiaojun 			return err;
1218a54ca194STan Xiaojun 		spe->tlb_miss_id = id;
1219a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1220a54ca194STan Xiaojun 		id += 1;
1221a54ca194STan Xiaojun 
1222a54ca194STan Xiaojun 		/* TLB access */
1223a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1224a54ca194STan Xiaojun 		if (err)
1225a54ca194STan Xiaojun 			return err;
1226a54ca194STan Xiaojun 		spe->tlb_access_id = id;
1227a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "tlb-access");
1228a54ca194STan Xiaojun 		id += 1;
1229a54ca194STan Xiaojun 	}
1230a54ca194STan Xiaojun 
1231a54ca194STan Xiaojun 	if (spe->synth_opts.branches) {
1232a54ca194STan Xiaojun 		spe->sample_branch = true;
1233a54ca194STan Xiaojun 
1234a54ca194STan Xiaojun 		/* Branch miss */
1235a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1236a54ca194STan Xiaojun 		if (err)
1237a54ca194STan Xiaojun 			return err;
1238a54ca194STan Xiaojun 		spe->branch_miss_id = id;
1239a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "branch-miss");
1240a54ca194STan Xiaojun 		id += 1;
1241a54ca194STan Xiaojun 	}
1242a54ca194STan Xiaojun 
1243a54ca194STan Xiaojun 	if (spe->synth_opts.remote_access) {
1244a54ca194STan Xiaojun 		spe->sample_remote_access = true;
1245a54ca194STan Xiaojun 
1246a54ca194STan Xiaojun 		/* Remote access */
1247a54ca194STan Xiaojun 		err = arm_spe_synth_event(session, &attr, id);
1248a54ca194STan Xiaojun 		if (err)
1249a54ca194STan Xiaojun 			return err;
1250a54ca194STan Xiaojun 		spe->remote_access_id = id;
1251a54ca194STan Xiaojun 		arm_spe_set_event_name(evlist, id, "remote-access");
1252a54ca194STan Xiaojun 		id += 1;
1253a54ca194STan Xiaojun 	}
1254a54ca194STan Xiaojun 
1255e55ed342SLeo Yan 	if (spe->synth_opts.mem) {
1256e55ed342SLeo Yan 		spe->sample_memory = true;
1257e55ed342SLeo Yan 
1258e55ed342SLeo Yan 		err = arm_spe_synth_event(session, &attr, id);
1259e55ed342SLeo Yan 		if (err)
1260e55ed342SLeo Yan 			return err;
1261e55ed342SLeo Yan 		spe->memory_id = id;
1262e55ed342SLeo Yan 		arm_spe_set_event_name(evlist, id, "memory");
1263ff8752d7SGerman Gomez 		id += 1;
1264e55ed342SLeo Yan 	}
1265e55ed342SLeo Yan 
1266ff8752d7SGerman Gomez 	if (spe->synth_opts.instructions) {
1267ff8752d7SGerman Gomez 		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1268ff8752d7SGerman Gomez 			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1269ff8752d7SGerman Gomez 			goto synth_instructions_out;
1270ff8752d7SGerman Gomez 		}
1271ff8752d7SGerman Gomez 		if (spe->synth_opts.period > 1)
1272ff8752d7SGerman Gomez 			pr_warning("Arm SPE has a hardware-based sample period.\n"
1273ff8752d7SGerman Gomez 				   "Additional instruction events will be discarded by --itrace\n");
1274ff8752d7SGerman Gomez 
1275ff8752d7SGerman Gomez 		spe->sample_instructions = true;
1276ff8752d7SGerman Gomez 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1277ff8752d7SGerman Gomez 		attr.sample_period = spe->synth_opts.period;
1278ff8752d7SGerman Gomez 		spe->instructions_sample_period = attr.sample_period;
1279ff8752d7SGerman Gomez 		err = arm_spe_synth_event(session, &attr, id);
1280ff8752d7SGerman Gomez 		if (err)
1281ff8752d7SGerman Gomez 			return err;
1282ff8752d7SGerman Gomez 		spe->instructions_id = id;
1283ff8752d7SGerman Gomez 		arm_spe_set_event_name(evlist, id, "instructions");
1284ff8752d7SGerman Gomez 	}
1285ff8752d7SGerman Gomez synth_instructions_out:
1286ff8752d7SGerman Gomez 
1287a54ca194STan Xiaojun 	return 0;
1288a54ca194STan Xiaojun }
1289a54ca194STan Xiaojun 
arm_spe_process_auxtrace_info(union perf_event * event,struct perf_session * session)1290ffd3d18cSKim Phillips int arm_spe_process_auxtrace_info(union perf_event *event,
1291ffd3d18cSKim Phillips 				  struct perf_session *session)
1292ffd3d18cSKim Phillips {
129372932371SJiri Olsa 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1294a54ca194STan Xiaojun 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1295c210c306SLeo Yan 	struct perf_record_time_conv *tc = &session->time_conv;
12964e6430cbSAli Saidi 	const char *cpuid = perf_env__cpuid(session->evlist->env);
12974e6430cbSAli Saidi 	u64 midr = strtol(cpuid, NULL, 16);
1298ffd3d18cSKim Phillips 	struct arm_spe *spe;
1299ffd3d18cSKim Phillips 	int err;
1300ffd3d18cSKim Phillips 
130172932371SJiri Olsa 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1302ffd3d18cSKim Phillips 					min_sz)
1303ffd3d18cSKim Phillips 		return -EINVAL;
1304ffd3d18cSKim Phillips 
1305ffd3d18cSKim Phillips 	spe = zalloc(sizeof(struct arm_spe));
1306ffd3d18cSKim Phillips 	if (!spe)
1307ffd3d18cSKim Phillips 		return -ENOMEM;
1308ffd3d18cSKim Phillips 
1309ffd3d18cSKim Phillips 	err = auxtrace_queues__init(&spe->queues);
1310ffd3d18cSKim Phillips 	if (err)
1311ffd3d18cSKim Phillips 		goto err_free;
1312ffd3d18cSKim Phillips 
1313ffd3d18cSKim Phillips 	spe->session = session;
1314ffd3d18cSKim Phillips 	spe->machine = &session->machines.host; /* No kvm support */
1315ffd3d18cSKim Phillips 	spe->auxtrace_type = auxtrace_info->type;
1316ffd3d18cSKim Phillips 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
13174e6430cbSAli Saidi 	spe->midr = midr;
1318ffd3d18cSKim Phillips 
1319a54ca194STan Xiaojun 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1320c210c306SLeo Yan 
1321c210c306SLeo Yan 	/*
1322c210c306SLeo Yan 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1323c210c306SLeo Yan 	 * and the parameters for hardware clock are stored in the session
1324c210c306SLeo Yan 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1325c210c306SLeo Yan 	 * in "spe->tc", which is used for later conversion between clock
1326c210c306SLeo Yan 	 * counter and timestamp.
1327c210c306SLeo Yan 	 *
1328c210c306SLeo Yan 	 * For backward compatibility, copies the fields starting from
1329c210c306SLeo Yan 	 * "time_cycles" only if they are contained in the event.
1330c210c306SLeo Yan 	 */
1331c210c306SLeo Yan 	spe->tc.time_shift = tc->time_shift;
1332c210c306SLeo Yan 	spe->tc.time_mult = tc->time_mult;
1333c210c306SLeo Yan 	spe->tc.time_zero = tc->time_zero;
1334c210c306SLeo Yan 
1335c210c306SLeo Yan 	if (event_contains(*tc, time_cycles)) {
1336c210c306SLeo Yan 		spe->tc.time_cycles = tc->time_cycles;
1337c210c306SLeo Yan 		spe->tc.time_mask = tc->time_mask;
1338c210c306SLeo Yan 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1339c210c306SLeo Yan 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1340c210c306SLeo Yan 	}
1341c210c306SLeo Yan 
1342ffd3d18cSKim Phillips 	spe->auxtrace.process_event = arm_spe_process_event;
1343ffd3d18cSKim Phillips 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1344ffd3d18cSKim Phillips 	spe->auxtrace.flush_events = arm_spe_flush;
1345ffd3d18cSKim Phillips 	spe->auxtrace.free_events = arm_spe_free_events;
1346ffd3d18cSKim Phillips 	spe->auxtrace.free = arm_spe_free;
1347508c71e3SAdrian Hunter 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1348ffd3d18cSKim Phillips 	session->auxtrace = &spe->auxtrace;
1349ffd3d18cSKim Phillips 
1350ffd3d18cSKim Phillips 	arm_spe_print_info(&auxtrace_info->priv[0]);
1351ffd3d18cSKim Phillips 
1352a54ca194STan Xiaojun 	if (dump_trace)
1353ffd3d18cSKim Phillips 		return 0;
1354ffd3d18cSKim Phillips 
1355a54ca194STan Xiaojun 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1356a54ca194STan Xiaojun 		spe->synth_opts = *session->itrace_synth_opts;
1357a54ca194STan Xiaojun 	else
1358a54ca194STan Xiaojun 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1359a54ca194STan Xiaojun 
1360a54ca194STan Xiaojun 	err = arm_spe_synth_events(spe, session);
1361a54ca194STan Xiaojun 	if (err)
1362a54ca194STan Xiaojun 		goto err_free_queues;
1363a54ca194STan Xiaojun 
1364a54ca194STan Xiaojun 	err = auxtrace_queues__process_index(&spe->queues, session);
1365a54ca194STan Xiaojun 	if (err)
1366a54ca194STan Xiaojun 		goto err_free_queues;
1367a54ca194STan Xiaojun 
1368a54ca194STan Xiaojun 	if (spe->queues.populated)
1369a54ca194STan Xiaojun 		spe->data_queued = true;
1370a54ca194STan Xiaojun 
1371a54ca194STan Xiaojun 	return 0;
1372a54ca194STan Xiaojun 
1373a54ca194STan Xiaojun err_free_queues:
1374a54ca194STan Xiaojun 	auxtrace_queues__free(&spe->queues);
1375a54ca194STan Xiaojun 	session->auxtrace = NULL;
1376ffd3d18cSKim Phillips err_free:
1377ffd3d18cSKim Phillips 	free(spe);
1378ffd3d18cSKim Phillips 	return err;
1379ffd3d18cSKim Phillips }
1380