xref: /openbmc/linux/tools/perf/util/arm-spe.c (revision b3d9fc14)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tool.h"
30 #include "util/synthetic-events.h"
31 
32 #include "arm-spe.h"
33 #include "arm-spe-decoder/arm-spe-decoder.h"
34 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
35 
36 #define MAX_TIMESTAMP (~0ULL)
37 
38 struct arm_spe {
39 	struct auxtrace			auxtrace;
40 	struct auxtrace_queues		queues;
41 	struct auxtrace_heap		heap;
42 	struct itrace_synth_opts        synth_opts;
43 	u32				auxtrace_type;
44 	struct perf_session		*session;
45 	struct machine			*machine;
46 	u32				pmu_type;
47 
48 	u8				timeless_decoding;
49 	u8				data_queued;
50 
51 	u8				sample_flc;
52 	u8				sample_llc;
53 	u8				sample_tlb;
54 	u8				sample_branch;
55 	u8				sample_remote_access;
56 	u8				sample_memory;
57 
58 	u64				l1d_miss_id;
59 	u64				l1d_access_id;
60 	u64				llc_miss_id;
61 	u64				llc_access_id;
62 	u64				tlb_miss_id;
63 	u64				tlb_access_id;
64 	u64				branch_miss_id;
65 	u64				remote_access_id;
66 	u64				memory_id;
67 
68 	u64				kernel_start;
69 
70 	unsigned long			num_events;
71 };
72 
73 struct arm_spe_queue {
74 	struct arm_spe			*spe;
75 	unsigned int			queue_nr;
76 	struct auxtrace_buffer		*buffer;
77 	struct auxtrace_buffer		*old_buffer;
78 	union perf_event		*event_buf;
79 	bool				on_heap;
80 	bool				done;
81 	pid_t				pid;
82 	pid_t				tid;
83 	int				cpu;
84 	struct arm_spe_decoder		*decoder;
85 	u64				time;
86 	u64				timestamp;
87 	struct thread			*thread;
88 };
89 
90 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
91 			 unsigned char *buf, size_t len)
92 {
93 	struct arm_spe_pkt packet;
94 	size_t pos = 0;
95 	int ret, pkt_len, i;
96 	char desc[ARM_SPE_PKT_DESC_MAX];
97 	const char *color = PERF_COLOR_BLUE;
98 
99 	color_fprintf(stdout, color,
100 		      ". ... ARM SPE data: size %zu bytes\n",
101 		      len);
102 
103 	while (len) {
104 		ret = arm_spe_get_packet(buf, len, &packet);
105 		if (ret > 0)
106 			pkt_len = ret;
107 		else
108 			pkt_len = 1;
109 		printf(".");
110 		color_fprintf(stdout, color, "  %08x: ", pos);
111 		for (i = 0; i < pkt_len; i++)
112 			color_fprintf(stdout, color, " %02x", buf[i]);
113 		for (; i < 16; i++)
114 			color_fprintf(stdout, color, "   ");
115 		if (ret > 0) {
116 			ret = arm_spe_pkt_desc(&packet, desc,
117 					       ARM_SPE_PKT_DESC_MAX);
118 			if (!ret)
119 				color_fprintf(stdout, color, " %s\n", desc);
120 		} else {
121 			color_fprintf(stdout, color, " Bad packet!\n");
122 		}
123 		pos += pkt_len;
124 		buf += pkt_len;
125 		len -= pkt_len;
126 	}
127 }
128 
129 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
130 			       size_t len)
131 {
132 	printf(".\n");
133 	arm_spe_dump(spe, buf, len);
134 }
135 
136 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
137 {
138 	struct arm_spe_queue *speq = data;
139 	struct auxtrace_buffer *buffer = speq->buffer;
140 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
141 	struct auxtrace_queue *queue;
142 
143 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
144 
145 	buffer = auxtrace_buffer__next(queue, buffer);
146 	/* If no more data, drop the previous auxtrace_buffer and return */
147 	if (!buffer) {
148 		if (old_buffer)
149 			auxtrace_buffer__drop_data(old_buffer);
150 		b->len = 0;
151 		return 0;
152 	}
153 
154 	speq->buffer = buffer;
155 
156 	/* If the aux_buffer doesn't have data associated, try to load it */
157 	if (!buffer->data) {
158 		/* get the file desc associated with the perf data file */
159 		int fd = perf_data__fd(speq->spe->session->data);
160 
161 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
162 		if (!buffer->data)
163 			return -ENOMEM;
164 	}
165 
166 	b->len = buffer->size;
167 	b->buf = buffer->data;
168 
169 	if (b->len) {
170 		if (old_buffer)
171 			auxtrace_buffer__drop_data(old_buffer);
172 		speq->old_buffer = buffer;
173 	} else {
174 		auxtrace_buffer__drop_data(buffer);
175 		return arm_spe_get_trace(b, data);
176 	}
177 
178 	return 0;
179 }
180 
181 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
182 		unsigned int queue_nr)
183 {
184 	struct arm_spe_params params = { .get_trace = 0, };
185 	struct arm_spe_queue *speq;
186 
187 	speq = zalloc(sizeof(*speq));
188 	if (!speq)
189 		return NULL;
190 
191 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
192 	if (!speq->event_buf)
193 		goto out_free;
194 
195 	speq->spe = spe;
196 	speq->queue_nr = queue_nr;
197 	speq->pid = -1;
198 	speq->tid = -1;
199 	speq->cpu = -1;
200 
201 	/* params set */
202 	params.get_trace = arm_spe_get_trace;
203 	params.data = speq;
204 
205 	/* create new decoder */
206 	speq->decoder = arm_spe_decoder_new(&params);
207 	if (!speq->decoder)
208 		goto out_free;
209 
210 	return speq;
211 
212 out_free:
213 	zfree(&speq->event_buf);
214 	free(speq);
215 
216 	return NULL;
217 }
218 
219 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
220 {
221 	return ip >= spe->kernel_start ?
222 		PERF_RECORD_MISC_KERNEL :
223 		PERF_RECORD_MISC_USER;
224 }
225 
226 static void arm_spe_prep_sample(struct arm_spe *spe,
227 				struct arm_spe_queue *speq,
228 				union perf_event *event,
229 				struct perf_sample *sample)
230 {
231 	struct arm_spe_record *record = &speq->decoder->record;
232 
233 	if (!spe->timeless_decoding)
234 		sample->time = speq->timestamp;
235 
236 	sample->ip = record->from_ip;
237 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
238 	sample->pid = speq->pid;
239 	sample->tid = speq->tid;
240 	sample->period = 1;
241 	sample->cpu = speq->cpu;
242 
243 	event->sample.header.type = PERF_RECORD_SAMPLE;
244 	event->sample.header.misc = sample->cpumode;
245 	event->sample.header.size = sizeof(struct perf_event_header);
246 }
247 
248 static inline int
249 arm_spe_deliver_synth_event(struct arm_spe *spe,
250 			    struct arm_spe_queue *speq __maybe_unused,
251 			    union perf_event *event,
252 			    struct perf_sample *sample)
253 {
254 	int ret;
255 
256 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
257 	if (ret)
258 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
259 
260 	return ret;
261 }
262 
263 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
264 				     u64 spe_events_id, u64 data_src)
265 {
266 	struct arm_spe *spe = speq->spe;
267 	struct arm_spe_record *record = &speq->decoder->record;
268 	union perf_event *event = speq->event_buf;
269 	struct perf_sample sample = { .ip = 0, };
270 
271 	arm_spe_prep_sample(spe, speq, event, &sample);
272 
273 	sample.id = spe_events_id;
274 	sample.stream_id = spe_events_id;
275 	sample.addr = record->virt_addr;
276 	sample.phys_addr = record->phys_addr;
277 	sample.data_src = data_src;
278 
279 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
280 }
281 
282 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
283 					u64 spe_events_id)
284 {
285 	struct arm_spe *spe = speq->spe;
286 	struct arm_spe_record *record = &speq->decoder->record;
287 	union perf_event *event = speq->event_buf;
288 	struct perf_sample sample = { .ip = 0, };
289 
290 	arm_spe_prep_sample(spe, speq, event, &sample);
291 
292 	sample.id = spe_events_id;
293 	sample.stream_id = spe_events_id;
294 	sample.addr = record->to_ip;
295 
296 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
297 }
298 
299 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
300 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
301 			 ARM_SPE_REMOTE_ACCESS)
302 
303 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
304 {
305 	if (type & SPE_MEM_TYPE)
306 		return true;
307 
308 	return false;
309 }
310 
311 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
312 {
313 	union perf_mem_data_src	data_src = { 0 };
314 
315 	if (record->op == ARM_SPE_LD)
316 		data_src.mem_op = PERF_MEM_OP_LOAD;
317 	else
318 		data_src.mem_op = PERF_MEM_OP_STORE;
319 
320 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
321 		data_src.mem_lvl = PERF_MEM_LVL_L3;
322 
323 		if (record->type & ARM_SPE_LLC_MISS)
324 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
325 		else
326 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
327 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
328 		data_src.mem_lvl = PERF_MEM_LVL_L1;
329 
330 		if (record->type & ARM_SPE_L1D_MISS)
331 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
332 		else
333 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
334 	}
335 
336 	if (record->type & ARM_SPE_REMOTE_ACCESS)
337 		data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
338 
339 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
340 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
341 
342 		if (record->type & ARM_SPE_TLB_MISS)
343 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
344 		else
345 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
346 	}
347 
348 	return data_src.val;
349 }
350 
351 static int arm_spe_sample(struct arm_spe_queue *speq)
352 {
353 	const struct arm_spe_record *record = &speq->decoder->record;
354 	struct arm_spe *spe = speq->spe;
355 	u64 data_src;
356 	int err;
357 
358 	data_src = arm_spe__synth_data_source(record);
359 
360 	if (spe->sample_flc) {
361 		if (record->type & ARM_SPE_L1D_MISS) {
362 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
363 							data_src);
364 			if (err)
365 				return err;
366 		}
367 
368 		if (record->type & ARM_SPE_L1D_ACCESS) {
369 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
370 							data_src);
371 			if (err)
372 				return err;
373 		}
374 	}
375 
376 	if (spe->sample_llc) {
377 		if (record->type & ARM_SPE_LLC_MISS) {
378 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
379 							data_src);
380 			if (err)
381 				return err;
382 		}
383 
384 		if (record->type & ARM_SPE_LLC_ACCESS) {
385 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
386 							data_src);
387 			if (err)
388 				return err;
389 		}
390 	}
391 
392 	if (spe->sample_tlb) {
393 		if (record->type & ARM_SPE_TLB_MISS) {
394 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
395 							data_src);
396 			if (err)
397 				return err;
398 		}
399 
400 		if (record->type & ARM_SPE_TLB_ACCESS) {
401 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
402 							data_src);
403 			if (err)
404 				return err;
405 		}
406 	}
407 
408 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
409 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
410 		if (err)
411 			return err;
412 	}
413 
414 	if (spe->sample_remote_access &&
415 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
416 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
417 						data_src);
418 		if (err)
419 			return err;
420 	}
421 
422 	if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
423 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
424 		if (err)
425 			return err;
426 	}
427 
428 	return 0;
429 }
430 
431 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
432 {
433 	struct arm_spe *spe = speq->spe;
434 	int ret;
435 
436 	if (!spe->kernel_start)
437 		spe->kernel_start = machine__kernel_start(spe->machine);
438 
439 	while (1) {
440 		ret = arm_spe_decode(speq->decoder);
441 		if (!ret) {
442 			pr_debug("No data or all data has been processed.\n");
443 			return 1;
444 		}
445 
446 		/*
447 		 * Error is detected when decode SPE trace data, continue to
448 		 * the next trace data and find out more records.
449 		 */
450 		if (ret < 0)
451 			continue;
452 
453 		ret = arm_spe_sample(speq);
454 		if (ret)
455 			return ret;
456 
457 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
458 			*timestamp = speq->timestamp;
459 			return 0;
460 		}
461 	}
462 
463 	return 0;
464 }
465 
466 static int arm_spe__setup_queue(struct arm_spe *spe,
467 			       struct auxtrace_queue *queue,
468 			       unsigned int queue_nr)
469 {
470 	struct arm_spe_queue *speq = queue->priv;
471 	struct arm_spe_record *record;
472 
473 	if (list_empty(&queue->head) || speq)
474 		return 0;
475 
476 	speq = arm_spe__alloc_queue(spe, queue_nr);
477 
478 	if (!speq)
479 		return -ENOMEM;
480 
481 	queue->priv = speq;
482 
483 	if (queue->cpu != -1)
484 		speq->cpu = queue->cpu;
485 
486 	if (!speq->on_heap) {
487 		int ret;
488 
489 		if (spe->timeless_decoding)
490 			return 0;
491 
492 retry:
493 		ret = arm_spe_decode(speq->decoder);
494 
495 		if (!ret)
496 			return 0;
497 
498 		if (ret < 0)
499 			goto retry;
500 
501 		record = &speq->decoder->record;
502 
503 		speq->timestamp = record->timestamp;
504 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
505 		if (ret)
506 			return ret;
507 		speq->on_heap = true;
508 	}
509 
510 	return 0;
511 }
512 
513 static int arm_spe__setup_queues(struct arm_spe *spe)
514 {
515 	unsigned int i;
516 	int ret;
517 
518 	for (i = 0; i < spe->queues.nr_queues; i++) {
519 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
520 		if (ret)
521 			return ret;
522 	}
523 
524 	return 0;
525 }
526 
527 static int arm_spe__update_queues(struct arm_spe *spe)
528 {
529 	if (spe->queues.new_data) {
530 		spe->queues.new_data = false;
531 		return arm_spe__setup_queues(spe);
532 	}
533 
534 	return 0;
535 }
536 
537 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
538 {
539 	struct evsel *evsel;
540 	struct evlist *evlist = spe->session->evlist;
541 	bool timeless_decoding = true;
542 
543 	/*
544 	 * Circle through the list of event and complain if we find one
545 	 * with the time bit set.
546 	 */
547 	evlist__for_each_entry(evlist, evsel) {
548 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
549 			timeless_decoding = false;
550 	}
551 
552 	return timeless_decoding;
553 }
554 
555 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
556 				    struct auxtrace_queue *queue)
557 {
558 	struct arm_spe_queue *speq = queue->priv;
559 	pid_t tid;
560 
561 	tid = machine__get_current_tid(spe->machine, speq->cpu);
562 	if (tid != -1) {
563 		speq->tid = tid;
564 		thread__zput(speq->thread);
565 	} else
566 		speq->tid = queue->tid;
567 
568 	if ((!speq->thread) && (speq->tid != -1)) {
569 		speq->thread = machine__find_thread(spe->machine, -1,
570 						    speq->tid);
571 	}
572 
573 	if (speq->thread) {
574 		speq->pid = speq->thread->pid_;
575 		if (queue->cpu == -1)
576 			speq->cpu = speq->thread->cpu;
577 	}
578 }
579 
580 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
581 {
582 	unsigned int queue_nr;
583 	u64 ts;
584 	int ret;
585 
586 	while (1) {
587 		struct auxtrace_queue *queue;
588 		struct arm_spe_queue *speq;
589 
590 		if (!spe->heap.heap_cnt)
591 			return 0;
592 
593 		if (spe->heap.heap_array[0].ordinal >= timestamp)
594 			return 0;
595 
596 		queue_nr = spe->heap.heap_array[0].queue_nr;
597 		queue = &spe->queues.queue_array[queue_nr];
598 		speq = queue->priv;
599 
600 		auxtrace_heap__pop(&spe->heap);
601 
602 		if (spe->heap.heap_cnt) {
603 			ts = spe->heap.heap_array[0].ordinal + 1;
604 			if (ts > timestamp)
605 				ts = timestamp;
606 		} else {
607 			ts = timestamp;
608 		}
609 
610 		arm_spe_set_pid_tid_cpu(spe, queue);
611 
612 		ret = arm_spe_run_decoder(speq, &ts);
613 		if (ret < 0) {
614 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
615 			return ret;
616 		}
617 
618 		if (!ret) {
619 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
620 			if (ret < 0)
621 				return ret;
622 		} else {
623 			speq->on_heap = false;
624 		}
625 	}
626 
627 	return 0;
628 }
629 
630 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
631 					    u64 time_)
632 {
633 	struct auxtrace_queues *queues = &spe->queues;
634 	unsigned int i;
635 	u64 ts = 0;
636 
637 	for (i = 0; i < queues->nr_queues; i++) {
638 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
639 		struct arm_spe_queue *speq = queue->priv;
640 
641 		if (speq && (tid == -1 || speq->tid == tid)) {
642 			speq->time = time_;
643 			arm_spe_set_pid_tid_cpu(spe, queue);
644 			arm_spe_run_decoder(speq, &ts);
645 		}
646 	}
647 	return 0;
648 }
649 
650 static int arm_spe_process_event(struct perf_session *session,
651 				 union perf_event *event,
652 				 struct perf_sample *sample,
653 				 struct perf_tool *tool)
654 {
655 	int err = 0;
656 	u64 timestamp;
657 	struct arm_spe *spe = container_of(session->auxtrace,
658 			struct arm_spe, auxtrace);
659 
660 	if (dump_trace)
661 		return 0;
662 
663 	if (!tool->ordered_events) {
664 		pr_err("SPE trace requires ordered events\n");
665 		return -EINVAL;
666 	}
667 
668 	if (sample->time && (sample->time != (u64) -1))
669 		timestamp = sample->time;
670 	else
671 		timestamp = 0;
672 
673 	if (timestamp || spe->timeless_decoding) {
674 		err = arm_spe__update_queues(spe);
675 		if (err)
676 			return err;
677 	}
678 
679 	if (spe->timeless_decoding) {
680 		if (event->header.type == PERF_RECORD_EXIT) {
681 			err = arm_spe_process_timeless_queues(spe,
682 					event->fork.tid,
683 					sample->time);
684 		}
685 	} else if (timestamp) {
686 		if (event->header.type == PERF_RECORD_EXIT) {
687 			err = arm_spe_process_queues(spe, timestamp);
688 			if (err)
689 				return err;
690 		}
691 	}
692 
693 	return err;
694 }
695 
696 static int arm_spe_process_auxtrace_event(struct perf_session *session,
697 					  union perf_event *event,
698 					  struct perf_tool *tool __maybe_unused)
699 {
700 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
701 					     auxtrace);
702 
703 	if (!spe->data_queued) {
704 		struct auxtrace_buffer *buffer;
705 		off_t data_offset;
706 		int fd = perf_data__fd(session->data);
707 		int err;
708 
709 		if (perf_data__is_pipe(session->data)) {
710 			data_offset = 0;
711 		} else {
712 			data_offset = lseek(fd, 0, SEEK_CUR);
713 			if (data_offset == -1)
714 				return -errno;
715 		}
716 
717 		err = auxtrace_queues__add_event(&spe->queues, session, event,
718 				data_offset, &buffer);
719 		if (err)
720 			return err;
721 
722 		/* Dump here now we have copied a piped trace out of the pipe */
723 		if (dump_trace) {
724 			if (auxtrace_buffer__get_data(buffer, fd)) {
725 				arm_spe_dump_event(spe, buffer->data,
726 						buffer->size);
727 				auxtrace_buffer__put_data(buffer);
728 			}
729 		}
730 	}
731 
732 	return 0;
733 }
734 
735 static int arm_spe_flush(struct perf_session *session __maybe_unused,
736 			 struct perf_tool *tool __maybe_unused)
737 {
738 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
739 			auxtrace);
740 	int ret;
741 
742 	if (dump_trace)
743 		return 0;
744 
745 	if (!tool->ordered_events)
746 		return -EINVAL;
747 
748 	ret = arm_spe__update_queues(spe);
749 	if (ret < 0)
750 		return ret;
751 
752 	if (spe->timeless_decoding)
753 		return arm_spe_process_timeless_queues(spe, -1,
754 				MAX_TIMESTAMP - 1);
755 
756 	return arm_spe_process_queues(spe, MAX_TIMESTAMP);
757 }
758 
759 static void arm_spe_free_queue(void *priv)
760 {
761 	struct arm_spe_queue *speq = priv;
762 
763 	if (!speq)
764 		return;
765 	thread__zput(speq->thread);
766 	arm_spe_decoder_free(speq->decoder);
767 	zfree(&speq->event_buf);
768 	free(speq);
769 }
770 
771 static void arm_spe_free_events(struct perf_session *session)
772 {
773 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
774 					     auxtrace);
775 	struct auxtrace_queues *queues = &spe->queues;
776 	unsigned int i;
777 
778 	for (i = 0; i < queues->nr_queues; i++) {
779 		arm_spe_free_queue(queues->queue_array[i].priv);
780 		queues->queue_array[i].priv = NULL;
781 	}
782 	auxtrace_queues__free(queues);
783 }
784 
785 static void arm_spe_free(struct perf_session *session)
786 {
787 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
788 					     auxtrace);
789 
790 	auxtrace_heap__free(&spe->heap);
791 	arm_spe_free_events(session);
792 	session->auxtrace = NULL;
793 	free(spe);
794 }
795 
796 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
797 				      struct evsel *evsel)
798 {
799 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
800 
801 	return evsel->core.attr.type == spe->pmu_type;
802 }
803 
804 static const char * const arm_spe_info_fmts[] = {
805 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
806 };
807 
808 static void arm_spe_print_info(__u64 *arr)
809 {
810 	if (!dump_trace)
811 		return;
812 
813 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
814 }
815 
816 struct arm_spe_synth {
817 	struct perf_tool dummy_tool;
818 	struct perf_session *session;
819 };
820 
821 static int arm_spe_event_synth(struct perf_tool *tool,
822 			       union perf_event *event,
823 			       struct perf_sample *sample __maybe_unused,
824 			       struct machine *machine __maybe_unused)
825 {
826 	struct arm_spe_synth *arm_spe_synth =
827 		      container_of(tool, struct arm_spe_synth, dummy_tool);
828 
829 	return perf_session__deliver_synth_event(arm_spe_synth->session,
830 						 event, NULL);
831 }
832 
833 static int arm_spe_synth_event(struct perf_session *session,
834 			       struct perf_event_attr *attr, u64 id)
835 {
836 	struct arm_spe_synth arm_spe_synth;
837 
838 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
839 	arm_spe_synth.session = session;
840 
841 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
842 					   &id, arm_spe_event_synth);
843 }
844 
845 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
846 				    const char *name)
847 {
848 	struct evsel *evsel;
849 
850 	evlist__for_each_entry(evlist, evsel) {
851 		if (evsel->core.id && evsel->core.id[0] == id) {
852 			if (evsel->name)
853 				zfree(&evsel->name);
854 			evsel->name = strdup(name);
855 			break;
856 		}
857 	}
858 }
859 
860 static int
861 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
862 {
863 	struct evlist *evlist = session->evlist;
864 	struct evsel *evsel;
865 	struct perf_event_attr attr;
866 	bool found = false;
867 	u64 id;
868 	int err;
869 
870 	evlist__for_each_entry(evlist, evsel) {
871 		if (evsel->core.attr.type == spe->pmu_type) {
872 			found = true;
873 			break;
874 		}
875 	}
876 
877 	if (!found) {
878 		pr_debug("No selected events with SPE trace data\n");
879 		return 0;
880 	}
881 
882 	memset(&attr, 0, sizeof(struct perf_event_attr));
883 	attr.size = sizeof(struct perf_event_attr);
884 	attr.type = PERF_TYPE_HARDWARE;
885 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
886 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
887 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
888 	if (spe->timeless_decoding)
889 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
890 	else
891 		attr.sample_type |= PERF_SAMPLE_TIME;
892 
893 	attr.exclude_user = evsel->core.attr.exclude_user;
894 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
895 	attr.exclude_hv = evsel->core.attr.exclude_hv;
896 	attr.exclude_host = evsel->core.attr.exclude_host;
897 	attr.exclude_guest = evsel->core.attr.exclude_guest;
898 	attr.sample_id_all = evsel->core.attr.sample_id_all;
899 	attr.read_format = evsel->core.attr.read_format;
900 
901 	/* create new id val to be a fixed offset from evsel id */
902 	id = evsel->core.id[0] + 1000000000;
903 
904 	if (!id)
905 		id = 1;
906 
907 	if (spe->synth_opts.flc) {
908 		spe->sample_flc = true;
909 
910 		/* Level 1 data cache miss */
911 		err = arm_spe_synth_event(session, &attr, id);
912 		if (err)
913 			return err;
914 		spe->l1d_miss_id = id;
915 		arm_spe_set_event_name(evlist, id, "l1d-miss");
916 		id += 1;
917 
918 		/* Level 1 data cache access */
919 		err = arm_spe_synth_event(session, &attr, id);
920 		if (err)
921 			return err;
922 		spe->l1d_access_id = id;
923 		arm_spe_set_event_name(evlist, id, "l1d-access");
924 		id += 1;
925 	}
926 
927 	if (spe->synth_opts.llc) {
928 		spe->sample_llc = true;
929 
930 		/* Last level cache miss */
931 		err = arm_spe_synth_event(session, &attr, id);
932 		if (err)
933 			return err;
934 		spe->llc_miss_id = id;
935 		arm_spe_set_event_name(evlist, id, "llc-miss");
936 		id += 1;
937 
938 		/* Last level cache access */
939 		err = arm_spe_synth_event(session, &attr, id);
940 		if (err)
941 			return err;
942 		spe->llc_access_id = id;
943 		arm_spe_set_event_name(evlist, id, "llc-access");
944 		id += 1;
945 	}
946 
947 	if (spe->synth_opts.tlb) {
948 		spe->sample_tlb = true;
949 
950 		/* TLB miss */
951 		err = arm_spe_synth_event(session, &attr, id);
952 		if (err)
953 			return err;
954 		spe->tlb_miss_id = id;
955 		arm_spe_set_event_name(evlist, id, "tlb-miss");
956 		id += 1;
957 
958 		/* TLB access */
959 		err = arm_spe_synth_event(session, &attr, id);
960 		if (err)
961 			return err;
962 		spe->tlb_access_id = id;
963 		arm_spe_set_event_name(evlist, id, "tlb-access");
964 		id += 1;
965 	}
966 
967 	if (spe->synth_opts.branches) {
968 		spe->sample_branch = true;
969 
970 		/* Branch miss */
971 		err = arm_spe_synth_event(session, &attr, id);
972 		if (err)
973 			return err;
974 		spe->branch_miss_id = id;
975 		arm_spe_set_event_name(evlist, id, "branch-miss");
976 		id += 1;
977 	}
978 
979 	if (spe->synth_opts.remote_access) {
980 		spe->sample_remote_access = true;
981 
982 		/* Remote access */
983 		err = arm_spe_synth_event(session, &attr, id);
984 		if (err)
985 			return err;
986 		spe->remote_access_id = id;
987 		arm_spe_set_event_name(evlist, id, "remote-access");
988 		id += 1;
989 	}
990 
991 	if (spe->synth_opts.mem) {
992 		spe->sample_memory = true;
993 
994 		err = arm_spe_synth_event(session, &attr, id);
995 		if (err)
996 			return err;
997 		spe->memory_id = id;
998 		arm_spe_set_event_name(evlist, id, "memory");
999 	}
1000 
1001 	return 0;
1002 }
1003 
1004 int arm_spe_process_auxtrace_info(union perf_event *event,
1005 				  struct perf_session *session)
1006 {
1007 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1008 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1009 	struct arm_spe *spe;
1010 	int err;
1011 
1012 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1013 					min_sz)
1014 		return -EINVAL;
1015 
1016 	spe = zalloc(sizeof(struct arm_spe));
1017 	if (!spe)
1018 		return -ENOMEM;
1019 
1020 	err = auxtrace_queues__init(&spe->queues);
1021 	if (err)
1022 		goto err_free;
1023 
1024 	spe->session = session;
1025 	spe->machine = &session->machines.host; /* No kvm support */
1026 	spe->auxtrace_type = auxtrace_info->type;
1027 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1028 
1029 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1030 	spe->auxtrace.process_event = arm_spe_process_event;
1031 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1032 	spe->auxtrace.flush_events = arm_spe_flush;
1033 	spe->auxtrace.free_events = arm_spe_free_events;
1034 	spe->auxtrace.free = arm_spe_free;
1035 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1036 	session->auxtrace = &spe->auxtrace;
1037 
1038 	arm_spe_print_info(&auxtrace_info->priv[0]);
1039 
1040 	if (dump_trace)
1041 		return 0;
1042 
1043 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1044 		spe->synth_opts = *session->itrace_synth_opts;
1045 	else
1046 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1047 
1048 	err = arm_spe_synth_events(spe, session);
1049 	if (err)
1050 		goto err_free_queues;
1051 
1052 	err = auxtrace_queues__process_index(&spe->queues, session);
1053 	if (err)
1054 		goto err_free_queues;
1055 
1056 	if (spe->queues.populated)
1057 		spe->data_queued = true;
1058 
1059 	return 0;
1060 
1061 err_free_queues:
1062 	auxtrace_queues__free(&spe->queues);
1063 	session->auxtrace = NULL;
1064 err_free:
1065 	free(spe);
1066 	return err;
1067 }
1068