xref: /openbmc/linux/tools/perf/util/intel-pt.c (revision a6ca5ac746d104019e76c29e69c2a1fc6dd2b29f)
1 /*
2  * intel_pt.c: Intel Processor Trace support
3  * Copyright (c) 2013-2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <stdbool.h>
19 #include <errno.h>
20 #include <linux/kernel.h>
21 #include <linux/types.h>
22 
23 #include "../perf.h"
24 #include "session.h"
25 #include "machine.h"
26 #include "memswap.h"
27 #include "sort.h"
28 #include "tool.h"
29 #include "event.h"
30 #include "evlist.h"
31 #include "evsel.h"
32 #include "map.h"
33 #include "color.h"
34 #include "util.h"
35 #include "thread.h"
36 #include "thread-stack.h"
37 #include "symbol.h"
38 #include "callchain.h"
39 #include "dso.h"
40 #include "debug.h"
41 #include "auxtrace.h"
42 #include "tsc.h"
43 #include "intel-pt.h"
44 #include "config.h"
45 
46 #include "intel-pt-decoder/intel-pt-log.h"
47 #include "intel-pt-decoder/intel-pt-decoder.h"
48 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
49 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
50 
51 #define MAX_TIMESTAMP (~0ULL)
52 
53 struct intel_pt {
54 	struct auxtrace auxtrace;
55 	struct auxtrace_queues queues;
56 	struct auxtrace_heap heap;
57 	u32 auxtrace_type;
58 	struct perf_session *session;
59 	struct machine *machine;
60 	struct perf_evsel *switch_evsel;
61 	struct thread *unknown_thread;
62 	bool timeless_decoding;
63 	bool sampling_mode;
64 	bool snapshot_mode;
65 	bool per_cpu_mmaps;
66 	bool have_tsc;
67 	bool data_queued;
68 	bool est_tsc;
69 	bool sync_switch;
70 	bool mispred_all;
71 	int have_sched_switch;
72 	u32 pmu_type;
73 	u64 kernel_start;
74 	u64 switch_ip;
75 	u64 ptss_ip;
76 
77 	struct perf_tsc_conversion tc;
78 	bool cap_user_time_zero;
79 
80 	struct itrace_synth_opts synth_opts;
81 
82 	bool sample_instructions;
83 	u64 instructions_sample_type;
84 	u64 instructions_sample_period;
85 	u64 instructions_id;
86 
87 	bool sample_branches;
88 	u32 branches_filter;
89 	u64 branches_sample_type;
90 	u64 branches_id;
91 
92 	bool sample_transactions;
93 	u64 transactions_sample_type;
94 	u64 transactions_id;
95 
96 	bool synth_needs_swap;
97 
98 	u64 tsc_bit;
99 	u64 mtc_bit;
100 	u64 mtc_freq_bits;
101 	u32 tsc_ctc_ratio_n;
102 	u32 tsc_ctc_ratio_d;
103 	u64 cyc_bit;
104 	u64 noretcomp_bit;
105 	unsigned max_non_turbo_ratio;
106 
107 	unsigned long num_events;
108 
109 	char *filter;
110 	struct addr_filters filts;
111 };
112 
113 enum switch_state {
114 	INTEL_PT_SS_NOT_TRACING,
115 	INTEL_PT_SS_UNKNOWN,
116 	INTEL_PT_SS_TRACING,
117 	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
118 	INTEL_PT_SS_EXPECTING_SWITCH_IP,
119 };
120 
121 struct intel_pt_queue {
122 	struct intel_pt *pt;
123 	unsigned int queue_nr;
124 	struct auxtrace_buffer *buffer;
125 	void *decoder;
126 	const struct intel_pt_state *state;
127 	struct ip_callchain *chain;
128 	struct branch_stack *last_branch;
129 	struct branch_stack *last_branch_rb;
130 	size_t last_branch_pos;
131 	union perf_event *event_buf;
132 	bool on_heap;
133 	bool stop;
134 	bool step_through_buffers;
135 	bool use_buffer_pid_tid;
136 	pid_t pid, tid;
137 	int cpu;
138 	int switch_state;
139 	pid_t next_tid;
140 	struct thread *thread;
141 	bool exclude_kernel;
142 	bool have_sample;
143 	u64 time;
144 	u64 timestamp;
145 	u32 flags;
146 	u16 insn_len;
147 	u64 last_insn_cnt;
148 	char insn[INTEL_PT_INSN_BUF_SZ];
149 };
150 
151 static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
152 			  unsigned char *buf, size_t len)
153 {
154 	struct intel_pt_pkt packet;
155 	size_t pos = 0;
156 	int ret, pkt_len, i;
157 	char desc[INTEL_PT_PKT_DESC_MAX];
158 	const char *color = PERF_COLOR_BLUE;
159 
160 	color_fprintf(stdout, color,
161 		      ". ... Intel Processor Trace data: size %zu bytes\n",
162 		      len);
163 
164 	while (len) {
165 		ret = intel_pt_get_packet(buf, len, &packet);
166 		if (ret > 0)
167 			pkt_len = ret;
168 		else
169 			pkt_len = 1;
170 		printf(".");
171 		color_fprintf(stdout, color, "  %08x: ", pos);
172 		for (i = 0; i < pkt_len; i++)
173 			color_fprintf(stdout, color, " %02x", buf[i]);
174 		for (; i < 16; i++)
175 			color_fprintf(stdout, color, "   ");
176 		if (ret > 0) {
177 			ret = intel_pt_pkt_desc(&packet, desc,
178 						INTEL_PT_PKT_DESC_MAX);
179 			if (ret > 0)
180 				color_fprintf(stdout, color, " %s\n", desc);
181 		} else {
182 			color_fprintf(stdout, color, " Bad packet!\n");
183 		}
184 		pos += pkt_len;
185 		buf += pkt_len;
186 		len -= pkt_len;
187 	}
188 }
189 
190 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
191 				size_t len)
192 {
193 	printf(".\n");
194 	intel_pt_dump(pt, buf, len);
195 }
196 
197 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
198 				   struct auxtrace_buffer *b)
199 {
200 	void *start;
201 
202 	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
203 				      pt->have_tsc);
204 	if (!start)
205 		return -EINVAL;
206 	b->use_size = b->data + b->size - start;
207 	b->use_data = start;
208 	return 0;
209 }
210 
211 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
212 					struct auxtrace_queue *queue,
213 					struct auxtrace_buffer *buffer)
214 {
215 	if (queue->cpu == -1 && buffer->cpu != -1)
216 		ptq->cpu = buffer->cpu;
217 
218 	ptq->pid = buffer->pid;
219 	ptq->tid = buffer->tid;
220 
221 	intel_pt_log("queue %u cpu %d pid %d tid %d\n",
222 		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
223 
224 	thread__zput(ptq->thread);
225 
226 	if (ptq->tid != -1) {
227 		if (ptq->pid != -1)
228 			ptq->thread = machine__findnew_thread(ptq->pt->machine,
229 							      ptq->pid,
230 							      ptq->tid);
231 		else
232 			ptq->thread = machine__find_thread(ptq->pt->machine, -1,
233 							   ptq->tid);
234 	}
235 }
236 
237 /* This function assumes data is processed sequentially only */
238 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
239 {
240 	struct intel_pt_queue *ptq = data;
241 	struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
242 	struct auxtrace_queue *queue;
243 
244 	if (ptq->stop) {
245 		b->len = 0;
246 		return 0;
247 	}
248 
249 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
250 next:
251 	buffer = auxtrace_buffer__next(queue, buffer);
252 	if (!buffer) {
253 		if (old_buffer)
254 			auxtrace_buffer__drop_data(old_buffer);
255 		b->len = 0;
256 		return 0;
257 	}
258 
259 	ptq->buffer = buffer;
260 
261 	if (!buffer->data) {
262 		int fd = perf_data_file__fd(ptq->pt->session->file);
263 
264 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
265 		if (!buffer->data)
266 			return -ENOMEM;
267 	}
268 
269 	if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
270 	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
271 		return -ENOMEM;
272 
273 	if (buffer->use_data) {
274 		b->len = buffer->use_size;
275 		b->buf = buffer->use_data;
276 	} else {
277 		b->len = buffer->size;
278 		b->buf = buffer->data;
279 	}
280 	b->ref_timestamp = buffer->reference;
281 
282 	/*
283 	 * If in snapshot mode and the buffer has no usable data, get next
284 	 * buffer and again check overlap against old_buffer.
285 	 */
286 	if (ptq->pt->snapshot_mode && !b->len)
287 		goto next;
288 
289 	if (old_buffer)
290 		auxtrace_buffer__drop_data(old_buffer);
291 
292 	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
293 						      !buffer->consecutive)) {
294 		b->consecutive = false;
295 		b->trace_nr = buffer->buffer_nr + 1;
296 	} else {
297 		b->consecutive = true;
298 	}
299 
300 	if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
301 					ptq->tid != buffer->tid))
302 		intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
303 
304 	if (ptq->step_through_buffers)
305 		ptq->stop = true;
306 
307 	if (!b->len)
308 		return intel_pt_get_trace(b, data);
309 
310 	return 0;
311 }
312 
313 struct intel_pt_cache_entry {
314 	struct auxtrace_cache_entry	entry;
315 	u64				insn_cnt;
316 	u64				byte_cnt;
317 	enum intel_pt_insn_op		op;
318 	enum intel_pt_insn_branch	branch;
319 	int				length;
320 	int32_t				rel;
321 	char				insn[INTEL_PT_INSN_BUF_SZ];
322 };
323 
324 static int intel_pt_config_div(const char *var, const char *value, void *data)
325 {
326 	int *d = data;
327 	long val;
328 
329 	if (!strcmp(var, "intel-pt.cache-divisor")) {
330 		val = strtol(value, NULL, 0);
331 		if (val > 0 && val <= INT_MAX)
332 			*d = val;
333 	}
334 
335 	return 0;
336 }
337 
338 static int intel_pt_cache_divisor(void)
339 {
340 	static int d;
341 
342 	if (d)
343 		return d;
344 
345 	perf_config(intel_pt_config_div, &d);
346 
347 	if (!d)
348 		d = 64;
349 
350 	return d;
351 }
352 
353 static unsigned int intel_pt_cache_size(struct dso *dso,
354 					struct machine *machine)
355 {
356 	off_t size;
357 
358 	size = dso__data_size(dso, machine);
359 	size /= intel_pt_cache_divisor();
360 	if (size < 1000)
361 		return 10;
362 	if (size > (1 << 21))
363 		return 21;
364 	return 32 - __builtin_clz(size);
365 }
366 
367 static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
368 					     struct machine *machine)
369 {
370 	struct auxtrace_cache *c;
371 	unsigned int bits;
372 
373 	if (dso->auxtrace_cache)
374 		return dso->auxtrace_cache;
375 
376 	bits = intel_pt_cache_size(dso, machine);
377 
378 	/* Ignoring cache creation failure */
379 	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
380 
381 	dso->auxtrace_cache = c;
382 
383 	return c;
384 }
385 
386 static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
387 			      u64 offset, u64 insn_cnt, u64 byte_cnt,
388 			      struct intel_pt_insn *intel_pt_insn)
389 {
390 	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
391 	struct intel_pt_cache_entry *e;
392 	int err;
393 
394 	if (!c)
395 		return -ENOMEM;
396 
397 	e = auxtrace_cache__alloc_entry(c);
398 	if (!e)
399 		return -ENOMEM;
400 
401 	e->insn_cnt = insn_cnt;
402 	e->byte_cnt = byte_cnt;
403 	e->op = intel_pt_insn->op;
404 	e->branch = intel_pt_insn->branch;
405 	e->length = intel_pt_insn->length;
406 	e->rel = intel_pt_insn->rel;
407 	memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
408 
409 	err = auxtrace_cache__add(c, offset, &e->entry);
410 	if (err)
411 		auxtrace_cache__free_entry(c, e);
412 
413 	return err;
414 }
415 
416 static struct intel_pt_cache_entry *
417 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
418 {
419 	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
420 
421 	if (!c)
422 		return NULL;
423 
424 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
425 }
426 
427 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
428 				   uint64_t *insn_cnt_ptr, uint64_t *ip,
429 				   uint64_t to_ip, uint64_t max_insn_cnt,
430 				   void *data)
431 {
432 	struct intel_pt_queue *ptq = data;
433 	struct machine *machine = ptq->pt->machine;
434 	struct thread *thread;
435 	struct addr_location al;
436 	unsigned char buf[INTEL_PT_INSN_BUF_SZ];
437 	ssize_t len;
438 	int x86_64;
439 	u8 cpumode;
440 	u64 offset, start_offset, start_ip;
441 	u64 insn_cnt = 0;
442 	bool one_map = true;
443 
444 	intel_pt_insn->length = 0;
445 
446 	if (to_ip && *ip == to_ip)
447 		goto out_no_cache;
448 
449 	if (*ip >= ptq->pt->kernel_start)
450 		cpumode = PERF_RECORD_MISC_KERNEL;
451 	else
452 		cpumode = PERF_RECORD_MISC_USER;
453 
454 	thread = ptq->thread;
455 	if (!thread) {
456 		if (cpumode != PERF_RECORD_MISC_KERNEL)
457 			return -EINVAL;
458 		thread = ptq->pt->unknown_thread;
459 	}
460 
461 	while (1) {
462 		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
463 		if (!al.map || !al.map->dso)
464 			return -EINVAL;
465 
466 		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
467 		    dso__data_status_seen(al.map->dso,
468 					  DSO_DATA_STATUS_SEEN_ITRACE))
469 			return -ENOENT;
470 
471 		offset = al.map->map_ip(al.map, *ip);
472 
473 		if (!to_ip && one_map) {
474 			struct intel_pt_cache_entry *e;
475 
476 			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
477 			if (e &&
478 			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
479 				*insn_cnt_ptr = e->insn_cnt;
480 				*ip += e->byte_cnt;
481 				intel_pt_insn->op = e->op;
482 				intel_pt_insn->branch = e->branch;
483 				intel_pt_insn->length = e->length;
484 				intel_pt_insn->rel = e->rel;
485 				memcpy(intel_pt_insn->buf, e->insn,
486 				       INTEL_PT_INSN_BUF_SZ);
487 				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
488 				return 0;
489 			}
490 		}
491 
492 		start_offset = offset;
493 		start_ip = *ip;
494 
495 		/* Load maps to ensure dso->is_64_bit has been updated */
496 		map__load(al.map);
497 
498 		x86_64 = al.map->dso->is_64_bit;
499 
500 		while (1) {
501 			len = dso__data_read_offset(al.map->dso, machine,
502 						    offset, buf,
503 						    INTEL_PT_INSN_BUF_SZ);
504 			if (len <= 0)
505 				return -EINVAL;
506 
507 			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
508 				return -EINVAL;
509 
510 			intel_pt_log_insn(intel_pt_insn, *ip);
511 
512 			insn_cnt += 1;
513 
514 			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
515 				goto out;
516 
517 			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
518 				goto out_no_cache;
519 
520 			*ip += intel_pt_insn->length;
521 
522 			if (to_ip && *ip == to_ip)
523 				goto out_no_cache;
524 
525 			if (*ip >= al.map->end)
526 				break;
527 
528 			offset += intel_pt_insn->length;
529 		}
530 		one_map = false;
531 	}
532 out:
533 	*insn_cnt_ptr = insn_cnt;
534 
535 	if (!one_map)
536 		goto out_no_cache;
537 
538 	/*
539 	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
540 	 * entries.
541 	 */
542 	if (to_ip) {
543 		struct intel_pt_cache_entry *e;
544 
545 		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
546 		if (e)
547 			return 0;
548 	}
549 
550 	/* Ignore cache errors */
551 	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
552 			   *ip - start_ip, intel_pt_insn);
553 
554 	return 0;
555 
556 out_no_cache:
557 	*insn_cnt_ptr = insn_cnt;
558 	return 0;
559 }
560 
561 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
562 				  uint64_t offset, const char *filename)
563 {
564 	struct addr_filter *filt;
565 	bool have_filter   = false;
566 	bool hit_tracestop = false;
567 	bool hit_filter    = false;
568 
569 	list_for_each_entry(filt, &pt->filts.head, list) {
570 		if (filt->start)
571 			have_filter = true;
572 
573 		if ((filename && !filt->filename) ||
574 		    (!filename && filt->filename) ||
575 		    (filename && strcmp(filename, filt->filename)))
576 			continue;
577 
578 		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
579 			continue;
580 
581 		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
582 			     ip, offset, filename ? filename : "[kernel]",
583 			     filt->start ? "filter" : "stop",
584 			     filt->addr, filt->size);
585 
586 		if (filt->start)
587 			hit_filter = true;
588 		else
589 			hit_tracestop = true;
590 	}
591 
592 	if (!hit_tracestop && !hit_filter)
593 		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
594 			     ip, offset, filename ? filename : "[kernel]");
595 
596 	return hit_tracestop || (have_filter && !hit_filter);
597 }
598 
599 static int __intel_pt_pgd_ip(uint64_t ip, void *data)
600 {
601 	struct intel_pt_queue *ptq = data;
602 	struct thread *thread;
603 	struct addr_location al;
604 	u8 cpumode;
605 	u64 offset;
606 
607 	if (ip >= ptq->pt->kernel_start)
608 		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
609 
610 	cpumode = PERF_RECORD_MISC_USER;
611 
612 	thread = ptq->thread;
613 	if (!thread)
614 		return -EINVAL;
615 
616 	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
617 	if (!al.map || !al.map->dso)
618 		return -EINVAL;
619 
620 	offset = al.map->map_ip(al.map, ip);
621 
622 	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
623 				     al.map->dso->long_name);
624 }
625 
626 static bool intel_pt_pgd_ip(uint64_t ip, void *data)
627 {
628 	return __intel_pt_pgd_ip(ip, data) > 0;
629 }
630 
631 static bool intel_pt_get_config(struct intel_pt *pt,
632 				struct perf_event_attr *attr, u64 *config)
633 {
634 	if (attr->type == pt->pmu_type) {
635 		if (config)
636 			*config = attr->config;
637 		return true;
638 	}
639 
640 	return false;
641 }
642 
643 static bool intel_pt_exclude_kernel(struct intel_pt *pt)
644 {
645 	struct perf_evsel *evsel;
646 
647 	evlist__for_each_entry(pt->session->evlist, evsel) {
648 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
649 		    !evsel->attr.exclude_kernel)
650 			return false;
651 	}
652 	return true;
653 }
654 
655 static bool intel_pt_return_compression(struct intel_pt *pt)
656 {
657 	struct perf_evsel *evsel;
658 	u64 config;
659 
660 	if (!pt->noretcomp_bit)
661 		return true;
662 
663 	evlist__for_each_entry(pt->session->evlist, evsel) {
664 		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
665 		    (config & pt->noretcomp_bit))
666 			return false;
667 	}
668 	return true;
669 }
670 
671 static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
672 {
673 	struct perf_evsel *evsel;
674 	unsigned int shift;
675 	u64 config;
676 
677 	if (!pt->mtc_freq_bits)
678 		return 0;
679 
680 	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
681 		config >>= 1;
682 
683 	evlist__for_each_entry(pt->session->evlist, evsel) {
684 		if (intel_pt_get_config(pt, &evsel->attr, &config))
685 			return (config & pt->mtc_freq_bits) >> shift;
686 	}
687 	return 0;
688 }
689 
690 static bool intel_pt_timeless_decoding(struct intel_pt *pt)
691 {
692 	struct perf_evsel *evsel;
693 	bool timeless_decoding = true;
694 	u64 config;
695 
696 	if (!pt->tsc_bit || !pt->cap_user_time_zero)
697 		return true;
698 
699 	evlist__for_each_entry(pt->session->evlist, evsel) {
700 		if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
701 			return true;
702 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
703 			if (config & pt->tsc_bit)
704 				timeless_decoding = false;
705 			else
706 				return true;
707 		}
708 	}
709 	return timeless_decoding;
710 }
711 
712 static bool intel_pt_tracing_kernel(struct intel_pt *pt)
713 {
714 	struct perf_evsel *evsel;
715 
716 	evlist__for_each_entry(pt->session->evlist, evsel) {
717 		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
718 		    !evsel->attr.exclude_kernel)
719 			return true;
720 	}
721 	return false;
722 }
723 
724 static bool intel_pt_have_tsc(struct intel_pt *pt)
725 {
726 	struct perf_evsel *evsel;
727 	bool have_tsc = false;
728 	u64 config;
729 
730 	if (!pt->tsc_bit)
731 		return false;
732 
733 	evlist__for_each_entry(pt->session->evlist, evsel) {
734 		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
735 			if (config & pt->tsc_bit)
736 				have_tsc = true;
737 			else
738 				return false;
739 		}
740 	}
741 	return have_tsc;
742 }
743 
744 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
745 {
746 	u64 quot, rem;
747 
748 	quot = ns / pt->tc.time_mult;
749 	rem  = ns % pt->tc.time_mult;
750 	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
751 		pt->tc.time_mult;
752 }
753 
754 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
755 						   unsigned int queue_nr)
756 {
757 	struct intel_pt_params params = { .get_trace = 0, };
758 	struct intel_pt_queue *ptq;
759 
760 	ptq = zalloc(sizeof(struct intel_pt_queue));
761 	if (!ptq)
762 		return NULL;
763 
764 	if (pt->synth_opts.callchain) {
765 		size_t sz = sizeof(struct ip_callchain);
766 
767 		sz += pt->synth_opts.callchain_sz * sizeof(u64);
768 		ptq->chain = zalloc(sz);
769 		if (!ptq->chain)
770 			goto out_free;
771 	}
772 
773 	if (pt->synth_opts.last_branch) {
774 		size_t sz = sizeof(struct branch_stack);
775 
776 		sz += pt->synth_opts.last_branch_sz *
777 		      sizeof(struct branch_entry);
778 		ptq->last_branch = zalloc(sz);
779 		if (!ptq->last_branch)
780 			goto out_free;
781 		ptq->last_branch_rb = zalloc(sz);
782 		if (!ptq->last_branch_rb)
783 			goto out_free;
784 	}
785 
786 	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
787 	if (!ptq->event_buf)
788 		goto out_free;
789 
790 	ptq->pt = pt;
791 	ptq->queue_nr = queue_nr;
792 	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
793 	ptq->pid = -1;
794 	ptq->tid = -1;
795 	ptq->cpu = -1;
796 	ptq->next_tid = -1;
797 
798 	params.get_trace = intel_pt_get_trace;
799 	params.walk_insn = intel_pt_walk_next_insn;
800 	params.data = ptq;
801 	params.return_compression = intel_pt_return_compression(pt);
802 	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
803 	params.mtc_period = intel_pt_mtc_period(pt);
804 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
805 	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
806 
807 	if (pt->filts.cnt > 0)
808 		params.pgd_ip = intel_pt_pgd_ip;
809 
810 	if (pt->synth_opts.instructions) {
811 		if (pt->synth_opts.period) {
812 			switch (pt->synth_opts.period_type) {
813 			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
814 				params.period_type =
815 						INTEL_PT_PERIOD_INSTRUCTIONS;
816 				params.period = pt->synth_opts.period;
817 				break;
818 			case PERF_ITRACE_PERIOD_TICKS:
819 				params.period_type = INTEL_PT_PERIOD_TICKS;
820 				params.period = pt->synth_opts.period;
821 				break;
822 			case PERF_ITRACE_PERIOD_NANOSECS:
823 				params.period_type = INTEL_PT_PERIOD_TICKS;
824 				params.period = intel_pt_ns_to_ticks(pt,
825 							pt->synth_opts.period);
826 				break;
827 			default:
828 				break;
829 			}
830 		}
831 
832 		if (!params.period) {
833 			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
834 			params.period = 1;
835 		}
836 	}
837 
838 	ptq->decoder = intel_pt_decoder_new(&params);
839 	if (!ptq->decoder)
840 		goto out_free;
841 
842 	return ptq;
843 
844 out_free:
845 	zfree(&ptq->event_buf);
846 	zfree(&ptq->last_branch);
847 	zfree(&ptq->last_branch_rb);
848 	zfree(&ptq->chain);
849 	free(ptq);
850 	return NULL;
851 }
852 
853 static void intel_pt_free_queue(void *priv)
854 {
855 	struct intel_pt_queue *ptq = priv;
856 
857 	if (!ptq)
858 		return;
859 	thread__zput(ptq->thread);
860 	intel_pt_decoder_free(ptq->decoder);
861 	zfree(&ptq->event_buf);
862 	zfree(&ptq->last_branch);
863 	zfree(&ptq->last_branch_rb);
864 	zfree(&ptq->chain);
865 	free(ptq);
866 }
867 
868 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
869 				     struct auxtrace_queue *queue)
870 {
871 	struct intel_pt_queue *ptq = queue->priv;
872 
873 	if (queue->tid == -1 || pt->have_sched_switch) {
874 		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
875 		thread__zput(ptq->thread);
876 	}
877 
878 	if (!ptq->thread && ptq->tid != -1)
879 		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
880 
881 	if (ptq->thread) {
882 		ptq->pid = ptq->thread->pid_;
883 		if (queue->cpu == -1)
884 			ptq->cpu = ptq->thread->cpu;
885 	}
886 }
887 
888 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
889 {
890 	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
891 		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
892 	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
893 		if (ptq->state->to_ip)
894 			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
895 				     PERF_IP_FLAG_ASYNC |
896 				     PERF_IP_FLAG_INTERRUPT;
897 		else
898 			ptq->flags = PERF_IP_FLAG_BRANCH |
899 				     PERF_IP_FLAG_TRACE_END;
900 		ptq->insn_len = 0;
901 	} else {
902 		if (ptq->state->from_ip)
903 			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
904 		else
905 			ptq->flags = PERF_IP_FLAG_BRANCH |
906 				     PERF_IP_FLAG_TRACE_BEGIN;
907 		if (ptq->state->flags & INTEL_PT_IN_TX)
908 			ptq->flags |= PERF_IP_FLAG_IN_TX;
909 		ptq->insn_len = ptq->state->insn_len;
910 		memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
911 	}
912 }
913 
914 static int intel_pt_setup_queue(struct intel_pt *pt,
915 				struct auxtrace_queue *queue,
916 				unsigned int queue_nr)
917 {
918 	struct intel_pt_queue *ptq = queue->priv;
919 
920 	if (list_empty(&queue->head))
921 		return 0;
922 
923 	if (!ptq) {
924 		ptq = intel_pt_alloc_queue(pt, queue_nr);
925 		if (!ptq)
926 			return -ENOMEM;
927 		queue->priv = ptq;
928 
929 		if (queue->cpu != -1)
930 			ptq->cpu = queue->cpu;
931 		ptq->tid = queue->tid;
932 
933 		if (pt->sampling_mode) {
934 			if (pt->timeless_decoding)
935 				ptq->step_through_buffers = true;
936 			if (pt->timeless_decoding || !pt->have_sched_switch)
937 				ptq->use_buffer_pid_tid = true;
938 		}
939 	}
940 
941 	if (!ptq->on_heap &&
942 	    (!pt->sync_switch ||
943 	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
944 		const struct intel_pt_state *state;
945 		int ret;
946 
947 		if (pt->timeless_decoding)
948 			return 0;
949 
950 		intel_pt_log("queue %u getting timestamp\n", queue_nr);
951 		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
952 			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
953 		while (1) {
954 			state = intel_pt_decode(ptq->decoder);
955 			if (state->err) {
956 				if (state->err == INTEL_PT_ERR_NODATA) {
957 					intel_pt_log("queue %u has no timestamp\n",
958 						     queue_nr);
959 					return 0;
960 				}
961 				continue;
962 			}
963 			if (state->timestamp)
964 				break;
965 		}
966 
967 		ptq->timestamp = state->timestamp;
968 		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
969 			     queue_nr, ptq->timestamp);
970 		ptq->state = state;
971 		ptq->have_sample = true;
972 		intel_pt_sample_flags(ptq);
973 		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
974 		if (ret)
975 			return ret;
976 		ptq->on_heap = true;
977 	}
978 
979 	return 0;
980 }
981 
982 static int intel_pt_setup_queues(struct intel_pt *pt)
983 {
984 	unsigned int i;
985 	int ret;
986 
987 	for (i = 0; i < pt->queues.nr_queues; i++) {
988 		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
989 		if (ret)
990 			return ret;
991 	}
992 	return 0;
993 }
994 
995 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
996 {
997 	struct branch_stack *bs_src = ptq->last_branch_rb;
998 	struct branch_stack *bs_dst = ptq->last_branch;
999 	size_t nr = 0;
1000 
1001 	bs_dst->nr = bs_src->nr;
1002 
1003 	if (!bs_src->nr)
1004 		return;
1005 
1006 	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
1007 	memcpy(&bs_dst->entries[0],
1008 	       &bs_src->entries[ptq->last_branch_pos],
1009 	       sizeof(struct branch_entry) * nr);
1010 
1011 	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1012 		memcpy(&bs_dst->entries[nr],
1013 		       &bs_src->entries[0],
1014 		       sizeof(struct branch_entry) * ptq->last_branch_pos);
1015 	}
1016 }
1017 
1018 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1019 {
1020 	ptq->last_branch_pos = 0;
1021 	ptq->last_branch_rb->nr = 0;
1022 }
1023 
1024 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1025 {
1026 	const struct intel_pt_state *state = ptq->state;
1027 	struct branch_stack *bs = ptq->last_branch_rb;
1028 	struct branch_entry *be;
1029 
1030 	if (!ptq->last_branch_pos)
1031 		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1032 
1033 	ptq->last_branch_pos -= 1;
1034 
1035 	be              = &bs->entries[ptq->last_branch_pos];
1036 	be->from        = state->from_ip;
1037 	be->to          = state->to_ip;
1038 	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1039 	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1040 	/* No support for mispredict */
1041 	be->flags.mispred = ptq->pt->mispred_all;
1042 
1043 	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1044 		bs->nr += 1;
1045 }
1046 
1047 static int intel_pt_inject_event(union perf_event *event,
1048 				 struct perf_sample *sample, u64 type,
1049 				 bool swapped)
1050 {
1051 	event->header.size = perf_event__sample_event_size(sample, type, 0);
1052 	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
1053 }
1054 
1055 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1056 {
1057 	int ret;
1058 	struct intel_pt *pt = ptq->pt;
1059 	union perf_event *event = ptq->event_buf;
1060 	struct perf_sample sample = { .ip = 0, };
1061 	struct dummy_branch_stack {
1062 		u64			nr;
1063 		struct branch_entry	entries;
1064 	} dummy_bs;
1065 
1066 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1067 		return 0;
1068 
1069 	if (pt->synth_opts.initial_skip &&
1070 	    pt->num_events++ < pt->synth_opts.initial_skip)
1071 		return 0;
1072 
1073 	event->sample.header.type = PERF_RECORD_SAMPLE;
1074 	event->sample.header.misc = PERF_RECORD_MISC_USER;
1075 	event->sample.header.size = sizeof(struct perf_event_header);
1076 
1077 	if (!pt->timeless_decoding)
1078 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1079 
1080 	sample.cpumode = PERF_RECORD_MISC_USER;
1081 	sample.ip = ptq->state->from_ip;
1082 	sample.pid = ptq->pid;
1083 	sample.tid = ptq->tid;
1084 	sample.addr = ptq->state->to_ip;
1085 	sample.id = ptq->pt->branches_id;
1086 	sample.stream_id = ptq->pt->branches_id;
1087 	sample.period = 1;
1088 	sample.cpu = ptq->cpu;
1089 	sample.flags = ptq->flags;
1090 	sample.insn_len = ptq->insn_len;
1091 	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1092 
1093 	/*
1094 	 * perf report cannot handle events without a branch stack when using
1095 	 * SORT_MODE__BRANCH so make a dummy one.
1096 	 */
1097 	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1098 		dummy_bs = (struct dummy_branch_stack){
1099 			.nr = 1,
1100 			.entries = {
1101 				.from = sample.ip,
1102 				.to = sample.addr,
1103 			},
1104 		};
1105 		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1106 	}
1107 
1108 	if (pt->synth_opts.inject) {
1109 		ret = intel_pt_inject_event(event, &sample,
1110 					    pt->branches_sample_type,
1111 					    pt->synth_needs_swap);
1112 		if (ret)
1113 			return ret;
1114 	}
1115 
1116 	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1117 	if (ret)
1118 		pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
1119 		       ret);
1120 
1121 	return ret;
1122 }
1123 
1124 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1125 {
1126 	int ret;
1127 	struct intel_pt *pt = ptq->pt;
1128 	union perf_event *event = ptq->event_buf;
1129 	struct perf_sample sample = { .ip = 0, };
1130 
1131 	if (pt->synth_opts.initial_skip &&
1132 	    pt->num_events++ < pt->synth_opts.initial_skip)
1133 		return 0;
1134 
1135 	event->sample.header.type = PERF_RECORD_SAMPLE;
1136 	event->sample.header.misc = PERF_RECORD_MISC_USER;
1137 	event->sample.header.size = sizeof(struct perf_event_header);
1138 
1139 	if (!pt->timeless_decoding)
1140 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1141 
1142 	sample.cpumode = PERF_RECORD_MISC_USER;
1143 	sample.ip = ptq->state->from_ip;
1144 	sample.pid = ptq->pid;
1145 	sample.tid = ptq->tid;
1146 	sample.addr = ptq->state->to_ip;
1147 	sample.id = ptq->pt->instructions_id;
1148 	sample.stream_id = ptq->pt->instructions_id;
1149 	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1150 	sample.cpu = ptq->cpu;
1151 	sample.flags = ptq->flags;
1152 	sample.insn_len = ptq->insn_len;
1153 	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1154 
1155 	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1156 
1157 	if (pt->synth_opts.callchain) {
1158 		thread_stack__sample(ptq->thread, ptq->chain,
1159 				     pt->synth_opts.callchain_sz, sample.ip);
1160 		sample.callchain = ptq->chain;
1161 	}
1162 
1163 	if (pt->synth_opts.last_branch) {
1164 		intel_pt_copy_last_branch_rb(ptq);
1165 		sample.branch_stack = ptq->last_branch;
1166 	}
1167 
1168 	if (pt->synth_opts.inject) {
1169 		ret = intel_pt_inject_event(event, &sample,
1170 					    pt->instructions_sample_type,
1171 					    pt->synth_needs_swap);
1172 		if (ret)
1173 			return ret;
1174 	}
1175 
1176 	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1177 	if (ret)
1178 		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
1179 		       ret);
1180 
1181 	if (pt->synth_opts.last_branch)
1182 		intel_pt_reset_last_branch_rb(ptq);
1183 
1184 	return ret;
1185 }
1186 
1187 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1188 {
1189 	int ret;
1190 	struct intel_pt *pt = ptq->pt;
1191 	union perf_event *event = ptq->event_buf;
1192 	struct perf_sample sample = { .ip = 0, };
1193 
1194 	if (pt->synth_opts.initial_skip &&
1195 	    pt->num_events++ < pt->synth_opts.initial_skip)
1196 		return 0;
1197 
1198 	event->sample.header.type = PERF_RECORD_SAMPLE;
1199 	event->sample.header.misc = PERF_RECORD_MISC_USER;
1200 	event->sample.header.size = sizeof(struct perf_event_header);
1201 
1202 	if (!pt->timeless_decoding)
1203 		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1204 
1205 	sample.cpumode = PERF_RECORD_MISC_USER;
1206 	sample.ip = ptq->state->from_ip;
1207 	sample.pid = ptq->pid;
1208 	sample.tid = ptq->tid;
1209 	sample.addr = ptq->state->to_ip;
1210 	sample.id = ptq->pt->transactions_id;
1211 	sample.stream_id = ptq->pt->transactions_id;
1212 	sample.period = 1;
1213 	sample.cpu = ptq->cpu;
1214 	sample.flags = ptq->flags;
1215 	sample.insn_len = ptq->insn_len;
1216 	memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1217 
1218 	if (pt->synth_opts.callchain) {
1219 		thread_stack__sample(ptq->thread, ptq->chain,
1220 				     pt->synth_opts.callchain_sz, sample.ip);
1221 		sample.callchain = ptq->chain;
1222 	}
1223 
1224 	if (pt->synth_opts.last_branch) {
1225 		intel_pt_copy_last_branch_rb(ptq);
1226 		sample.branch_stack = ptq->last_branch;
1227 	}
1228 
1229 	if (pt->synth_opts.inject) {
1230 		ret = intel_pt_inject_event(event, &sample,
1231 					    pt->transactions_sample_type,
1232 					    pt->synth_needs_swap);
1233 		if (ret)
1234 			return ret;
1235 	}
1236 
1237 	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1238 	if (ret)
1239 		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1240 		       ret);
1241 
1242 	if (pt->synth_opts.last_branch)
1243 		intel_pt_reset_last_branch_rb(ptq);
1244 
1245 	return ret;
1246 }
1247 
1248 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1249 				pid_t pid, pid_t tid, u64 ip)
1250 {
1251 	union perf_event event;
1252 	char msg[MAX_AUXTRACE_ERROR_MSG];
1253 	int err;
1254 
1255 	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1256 
1257 	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1258 			     code, cpu, pid, tid, ip, msg);
1259 
1260 	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1261 	if (err)
1262 		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1263 		       err);
1264 
1265 	return err;
1266 }
1267 
1268 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1269 {
1270 	struct auxtrace_queue *queue;
1271 	pid_t tid = ptq->next_tid;
1272 	int err;
1273 
1274 	if (tid == -1)
1275 		return 0;
1276 
1277 	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1278 
1279 	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1280 
1281 	queue = &pt->queues.queue_array[ptq->queue_nr];
1282 	intel_pt_set_pid_tid_cpu(pt, queue);
1283 
1284 	ptq->next_tid = -1;
1285 
1286 	return err;
1287 }
1288 
1289 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1290 {
1291 	struct intel_pt *pt = ptq->pt;
1292 
1293 	return ip == pt->switch_ip &&
1294 	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1295 	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1296 			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1297 }
1298 
1299 static int intel_pt_sample(struct intel_pt_queue *ptq)
1300 {
1301 	const struct intel_pt_state *state = ptq->state;
1302 	struct intel_pt *pt = ptq->pt;
1303 	int err;
1304 
1305 	if (!ptq->have_sample)
1306 		return 0;
1307 
1308 	ptq->have_sample = false;
1309 
1310 	if (pt->sample_instructions &&
1311 	    (state->type & INTEL_PT_INSTRUCTION) &&
1312 	    (!pt->synth_opts.initial_skip ||
1313 	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
1314 		err = intel_pt_synth_instruction_sample(ptq);
1315 		if (err)
1316 			return err;
1317 	}
1318 
1319 	if (pt->sample_transactions &&
1320 	    (state->type & INTEL_PT_TRANSACTION) &&
1321 	    (!pt->synth_opts.initial_skip ||
1322 	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
1323 		err = intel_pt_synth_transaction_sample(ptq);
1324 		if (err)
1325 			return err;
1326 	}
1327 
1328 	if (!(state->type & INTEL_PT_BRANCH))
1329 		return 0;
1330 
1331 	if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1332 		thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1333 				    state->to_ip, ptq->insn_len,
1334 				    state->trace_nr);
1335 	else
1336 		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1337 
1338 	if (pt->sample_branches) {
1339 		err = intel_pt_synth_branch_sample(ptq);
1340 		if (err)
1341 			return err;
1342 	}
1343 
1344 	if (pt->synth_opts.last_branch)
1345 		intel_pt_update_last_branch_rb(ptq);
1346 
1347 	if (!pt->sync_switch)
1348 		return 0;
1349 
1350 	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1351 		switch (ptq->switch_state) {
1352 		case INTEL_PT_SS_UNKNOWN:
1353 		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1354 			err = intel_pt_next_tid(pt, ptq);
1355 			if (err)
1356 				return err;
1357 			ptq->switch_state = INTEL_PT_SS_TRACING;
1358 			break;
1359 		default:
1360 			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1361 			return 1;
1362 		}
1363 	} else if (!state->to_ip) {
1364 		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1365 	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1366 		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1367 	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1368 		   state->to_ip == pt->ptss_ip &&
1369 		   (ptq->flags & PERF_IP_FLAG_CALL)) {
1370 		ptq->switch_state = INTEL_PT_SS_TRACING;
1371 	}
1372 
1373 	return 0;
1374 }
1375 
1376 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1377 {
1378 	struct machine *machine = pt->machine;
1379 	struct map *map;
1380 	struct symbol *sym, *start;
1381 	u64 ip, switch_ip = 0;
1382 	const char *ptss;
1383 
1384 	if (ptss_ip)
1385 		*ptss_ip = 0;
1386 
1387 	map = machine__kernel_map(machine);
1388 	if (!map)
1389 		return 0;
1390 
1391 	if (map__load(map))
1392 		return 0;
1393 
1394 	start = dso__first_symbol(map->dso, MAP__FUNCTION);
1395 
1396 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
1397 		if (sym->binding == STB_GLOBAL &&
1398 		    !strcmp(sym->name, "__switch_to")) {
1399 			ip = map->unmap_ip(map, sym->start);
1400 			if (ip >= map->start && ip < map->end) {
1401 				switch_ip = ip;
1402 				break;
1403 			}
1404 		}
1405 	}
1406 
1407 	if (!switch_ip || !ptss_ip)
1408 		return 0;
1409 
1410 	if (pt->have_sched_switch == 1)
1411 		ptss = "perf_trace_sched_switch";
1412 	else
1413 		ptss = "__perf_event_task_sched_out";
1414 
1415 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
1416 		if (!strcmp(sym->name, ptss)) {
1417 			ip = map->unmap_ip(map, sym->start);
1418 			if (ip >= map->start && ip < map->end) {
1419 				*ptss_ip = ip;
1420 				break;
1421 			}
1422 		}
1423 	}
1424 
1425 	return switch_ip;
1426 }
1427 
1428 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1429 {
1430 	const struct intel_pt_state *state = ptq->state;
1431 	struct intel_pt *pt = ptq->pt;
1432 	int err;
1433 
1434 	if (!pt->kernel_start) {
1435 		pt->kernel_start = machine__kernel_start(pt->machine);
1436 		if (pt->per_cpu_mmaps &&
1437 		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1438 		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1439 		    !pt->sampling_mode) {
1440 			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1441 			if (pt->switch_ip) {
1442 				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1443 					     pt->switch_ip, pt->ptss_ip);
1444 				pt->sync_switch = true;
1445 			}
1446 		}
1447 	}
1448 
1449 	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1450 		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1451 	while (1) {
1452 		err = intel_pt_sample(ptq);
1453 		if (err)
1454 			return err;
1455 
1456 		state = intel_pt_decode(ptq->decoder);
1457 		if (state->err) {
1458 			if (state->err == INTEL_PT_ERR_NODATA)
1459 				return 1;
1460 			if (pt->sync_switch &&
1461 			    state->from_ip >= pt->kernel_start) {
1462 				pt->sync_switch = false;
1463 				intel_pt_next_tid(pt, ptq);
1464 			}
1465 			if (pt->synth_opts.errors) {
1466 				err = intel_pt_synth_error(pt, state->err,
1467 							   ptq->cpu, ptq->pid,
1468 							   ptq->tid,
1469 							   state->from_ip);
1470 				if (err)
1471 					return err;
1472 			}
1473 			continue;
1474 		}
1475 
1476 		ptq->state = state;
1477 		ptq->have_sample = true;
1478 		intel_pt_sample_flags(ptq);
1479 
1480 		/* Use estimated TSC upon return to user space */
1481 		if (pt->est_tsc &&
1482 		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1483 		    state->to_ip && state->to_ip < pt->kernel_start) {
1484 			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1485 				     state->timestamp, state->est_timestamp);
1486 			ptq->timestamp = state->est_timestamp;
1487 		/* Use estimated TSC in unknown switch state */
1488 		} else if (pt->sync_switch &&
1489 			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1490 			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
1491 			   ptq->next_tid == -1) {
1492 			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1493 				     state->timestamp, state->est_timestamp);
1494 			ptq->timestamp = state->est_timestamp;
1495 		} else if (state->timestamp > ptq->timestamp) {
1496 			ptq->timestamp = state->timestamp;
1497 		}
1498 
1499 		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1500 			*timestamp = ptq->timestamp;
1501 			return 0;
1502 		}
1503 	}
1504 	return 0;
1505 }
1506 
1507 static inline int intel_pt_update_queues(struct intel_pt *pt)
1508 {
1509 	if (pt->queues.new_data) {
1510 		pt->queues.new_data = false;
1511 		return intel_pt_setup_queues(pt);
1512 	}
1513 	return 0;
1514 }
1515 
1516 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1517 {
1518 	unsigned int queue_nr;
1519 	u64 ts;
1520 	int ret;
1521 
1522 	while (1) {
1523 		struct auxtrace_queue *queue;
1524 		struct intel_pt_queue *ptq;
1525 
1526 		if (!pt->heap.heap_cnt)
1527 			return 0;
1528 
1529 		if (pt->heap.heap_array[0].ordinal >= timestamp)
1530 			return 0;
1531 
1532 		queue_nr = pt->heap.heap_array[0].queue_nr;
1533 		queue = &pt->queues.queue_array[queue_nr];
1534 		ptq = queue->priv;
1535 
1536 		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1537 			     queue_nr, pt->heap.heap_array[0].ordinal,
1538 			     timestamp);
1539 
1540 		auxtrace_heap__pop(&pt->heap);
1541 
1542 		if (pt->heap.heap_cnt) {
1543 			ts = pt->heap.heap_array[0].ordinal + 1;
1544 			if (ts > timestamp)
1545 				ts = timestamp;
1546 		} else {
1547 			ts = timestamp;
1548 		}
1549 
1550 		intel_pt_set_pid_tid_cpu(pt, queue);
1551 
1552 		ret = intel_pt_run_decoder(ptq, &ts);
1553 
1554 		if (ret < 0) {
1555 			auxtrace_heap__add(&pt->heap, queue_nr, ts);
1556 			return ret;
1557 		}
1558 
1559 		if (!ret) {
1560 			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1561 			if (ret < 0)
1562 				return ret;
1563 		} else {
1564 			ptq->on_heap = false;
1565 		}
1566 	}
1567 
1568 	return 0;
1569 }
1570 
1571 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1572 					    u64 time_)
1573 {
1574 	struct auxtrace_queues *queues = &pt->queues;
1575 	unsigned int i;
1576 	u64 ts = 0;
1577 
1578 	for (i = 0; i < queues->nr_queues; i++) {
1579 		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1580 		struct intel_pt_queue *ptq = queue->priv;
1581 
1582 		if (ptq && (tid == -1 || ptq->tid == tid)) {
1583 			ptq->time = time_;
1584 			intel_pt_set_pid_tid_cpu(pt, queue);
1585 			intel_pt_run_decoder(ptq, &ts);
1586 		}
1587 	}
1588 	return 0;
1589 }
1590 
1591 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1592 {
1593 	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1594 				    sample->pid, sample->tid, 0);
1595 }
1596 
1597 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1598 {
1599 	unsigned i, j;
1600 
1601 	if (cpu < 0 || !pt->queues.nr_queues)
1602 		return NULL;
1603 
1604 	if ((unsigned)cpu >= pt->queues.nr_queues)
1605 		i = pt->queues.nr_queues - 1;
1606 	else
1607 		i = cpu;
1608 
1609 	if (pt->queues.queue_array[i].cpu == cpu)
1610 		return pt->queues.queue_array[i].priv;
1611 
1612 	for (j = 0; i > 0; j++) {
1613 		if (pt->queues.queue_array[--i].cpu == cpu)
1614 			return pt->queues.queue_array[i].priv;
1615 	}
1616 
1617 	for (; j < pt->queues.nr_queues; j++) {
1618 		if (pt->queues.queue_array[j].cpu == cpu)
1619 			return pt->queues.queue_array[j].priv;
1620 	}
1621 
1622 	return NULL;
1623 }
1624 
1625 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1626 				u64 timestamp)
1627 {
1628 	struct intel_pt_queue *ptq;
1629 	int err;
1630 
1631 	if (!pt->sync_switch)
1632 		return 1;
1633 
1634 	ptq = intel_pt_cpu_to_ptq(pt, cpu);
1635 	if (!ptq)
1636 		return 1;
1637 
1638 	switch (ptq->switch_state) {
1639 	case INTEL_PT_SS_NOT_TRACING:
1640 		ptq->next_tid = -1;
1641 		break;
1642 	case INTEL_PT_SS_UNKNOWN:
1643 	case INTEL_PT_SS_TRACING:
1644 		ptq->next_tid = tid;
1645 		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1646 		return 0;
1647 	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1648 		if (!ptq->on_heap) {
1649 			ptq->timestamp = perf_time_to_tsc(timestamp,
1650 							  &pt->tc);
1651 			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1652 						 ptq->timestamp);
1653 			if (err)
1654 				return err;
1655 			ptq->on_heap = true;
1656 		}
1657 		ptq->switch_state = INTEL_PT_SS_TRACING;
1658 		break;
1659 	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1660 		ptq->next_tid = tid;
1661 		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1662 		break;
1663 	default:
1664 		break;
1665 	}
1666 
1667 	return 1;
1668 }
1669 
1670 static int intel_pt_process_switch(struct intel_pt *pt,
1671 				   struct perf_sample *sample)
1672 {
1673 	struct perf_evsel *evsel;
1674 	pid_t tid;
1675 	int cpu, ret;
1676 
1677 	evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1678 	if (evsel != pt->switch_evsel)
1679 		return 0;
1680 
1681 	tid = perf_evsel__intval(evsel, sample, "next_pid");
1682 	cpu = sample->cpu;
1683 
1684 	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1685 		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1686 		     &pt->tc));
1687 
1688 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1689 	if (ret <= 0)
1690 		return ret;
1691 
1692 	return machine__set_current_tid(pt->machine, cpu, -1, tid);
1693 }
1694 
1695 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1696 				   struct perf_sample *sample)
1697 {
1698 	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1699 	pid_t pid, tid;
1700 	int cpu, ret;
1701 
1702 	cpu = sample->cpu;
1703 
1704 	if (pt->have_sched_switch == 3) {
1705 		if (!out)
1706 			return 0;
1707 		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1708 			pr_err("Expecting CPU-wide context switch event\n");
1709 			return -EINVAL;
1710 		}
1711 		pid = event->context_switch.next_prev_pid;
1712 		tid = event->context_switch.next_prev_tid;
1713 	} else {
1714 		if (out)
1715 			return 0;
1716 		pid = sample->pid;
1717 		tid = sample->tid;
1718 	}
1719 
1720 	if (tid == -1) {
1721 		pr_err("context_switch event has no tid\n");
1722 		return -EINVAL;
1723 	}
1724 
1725 	intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1726 		     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1727 		     &pt->tc));
1728 
1729 	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1730 	if (ret <= 0)
1731 		return ret;
1732 
1733 	return machine__set_current_tid(pt->machine, cpu, pid, tid);
1734 }
1735 
1736 static int intel_pt_process_itrace_start(struct intel_pt *pt,
1737 					 union perf_event *event,
1738 					 struct perf_sample *sample)
1739 {
1740 	if (!pt->per_cpu_mmaps)
1741 		return 0;
1742 
1743 	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1744 		     sample->cpu, event->itrace_start.pid,
1745 		     event->itrace_start.tid, sample->time,
1746 		     perf_time_to_tsc(sample->time, &pt->tc));
1747 
1748 	return machine__set_current_tid(pt->machine, sample->cpu,
1749 					event->itrace_start.pid,
1750 					event->itrace_start.tid);
1751 }
1752 
1753 static int intel_pt_process_event(struct perf_session *session,
1754 				  union perf_event *event,
1755 				  struct perf_sample *sample,
1756 				  struct perf_tool *tool)
1757 {
1758 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1759 					   auxtrace);
1760 	u64 timestamp;
1761 	int err = 0;
1762 
1763 	if (dump_trace)
1764 		return 0;
1765 
1766 	if (!tool->ordered_events) {
1767 		pr_err("Intel Processor Trace requires ordered events\n");
1768 		return -EINVAL;
1769 	}
1770 
1771 	if (sample->time && sample->time != (u64)-1)
1772 		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1773 	else
1774 		timestamp = 0;
1775 
1776 	if (timestamp || pt->timeless_decoding) {
1777 		err = intel_pt_update_queues(pt);
1778 		if (err)
1779 			return err;
1780 	}
1781 
1782 	if (pt->timeless_decoding) {
1783 		if (event->header.type == PERF_RECORD_EXIT) {
1784 			err = intel_pt_process_timeless_queues(pt,
1785 							       event->fork.tid,
1786 							       sample->time);
1787 		}
1788 	} else if (timestamp) {
1789 		err = intel_pt_process_queues(pt, timestamp);
1790 	}
1791 	if (err)
1792 		return err;
1793 
1794 	if (event->header.type == PERF_RECORD_AUX &&
1795 	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1796 	    pt->synth_opts.errors) {
1797 		err = intel_pt_lost(pt, sample);
1798 		if (err)
1799 			return err;
1800 	}
1801 
1802 	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1803 		err = intel_pt_process_switch(pt, sample);
1804 	else if (event->header.type == PERF_RECORD_ITRACE_START)
1805 		err = intel_pt_process_itrace_start(pt, event, sample);
1806 	else if (event->header.type == PERF_RECORD_SWITCH ||
1807 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
1808 		err = intel_pt_context_switch(pt, event, sample);
1809 
1810 	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1811 		     perf_event__name(event->header.type), event->header.type,
1812 		     sample->cpu, sample->time, timestamp);
1813 
1814 	return err;
1815 }
1816 
1817 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1818 {
1819 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1820 					   auxtrace);
1821 	int ret;
1822 
1823 	if (dump_trace)
1824 		return 0;
1825 
1826 	if (!tool->ordered_events)
1827 		return -EINVAL;
1828 
1829 	ret = intel_pt_update_queues(pt);
1830 	if (ret < 0)
1831 		return ret;
1832 
1833 	if (pt->timeless_decoding)
1834 		return intel_pt_process_timeless_queues(pt, -1,
1835 							MAX_TIMESTAMP - 1);
1836 
1837 	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1838 }
1839 
1840 static void intel_pt_free_events(struct perf_session *session)
1841 {
1842 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1843 					   auxtrace);
1844 	struct auxtrace_queues *queues = &pt->queues;
1845 	unsigned int i;
1846 
1847 	for (i = 0; i < queues->nr_queues; i++) {
1848 		intel_pt_free_queue(queues->queue_array[i].priv);
1849 		queues->queue_array[i].priv = NULL;
1850 	}
1851 	intel_pt_log_disable();
1852 	auxtrace_queues__free(queues);
1853 }
1854 
1855 static void intel_pt_free(struct perf_session *session)
1856 {
1857 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1858 					   auxtrace);
1859 
1860 	auxtrace_heap__free(&pt->heap);
1861 	intel_pt_free_events(session);
1862 	session->auxtrace = NULL;
1863 	thread__put(pt->unknown_thread);
1864 	addr_filters__exit(&pt->filts);
1865 	zfree(&pt->filter);
1866 	free(pt);
1867 }
1868 
1869 static int intel_pt_process_auxtrace_event(struct perf_session *session,
1870 					   union perf_event *event,
1871 					   struct perf_tool *tool __maybe_unused)
1872 {
1873 	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1874 					   auxtrace);
1875 
1876 	if (pt->sampling_mode)
1877 		return 0;
1878 
1879 	if (!pt->data_queued) {
1880 		struct auxtrace_buffer *buffer;
1881 		off_t data_offset;
1882 		int fd = perf_data_file__fd(session->file);
1883 		int err;
1884 
1885 		if (perf_data_file__is_pipe(session->file)) {
1886 			data_offset = 0;
1887 		} else {
1888 			data_offset = lseek(fd, 0, SEEK_CUR);
1889 			if (data_offset == -1)
1890 				return -errno;
1891 		}
1892 
1893 		err = auxtrace_queues__add_event(&pt->queues, session, event,
1894 						 data_offset, &buffer);
1895 		if (err)
1896 			return err;
1897 
1898 		/* Dump here now we have copied a piped trace out of the pipe */
1899 		if (dump_trace) {
1900 			if (auxtrace_buffer__get_data(buffer, fd)) {
1901 				intel_pt_dump_event(pt, buffer->data,
1902 						    buffer->size);
1903 				auxtrace_buffer__put_data(buffer);
1904 			}
1905 		}
1906 	}
1907 
1908 	return 0;
1909 }
1910 
1911 struct intel_pt_synth {
1912 	struct perf_tool dummy_tool;
1913 	struct perf_session *session;
1914 };
1915 
1916 static int intel_pt_event_synth(struct perf_tool *tool,
1917 				union perf_event *event,
1918 				struct perf_sample *sample __maybe_unused,
1919 				struct machine *machine __maybe_unused)
1920 {
1921 	struct intel_pt_synth *intel_pt_synth =
1922 			container_of(tool, struct intel_pt_synth, dummy_tool);
1923 
1924 	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1925 						 NULL);
1926 }
1927 
1928 static int intel_pt_synth_event(struct perf_session *session,
1929 				struct perf_event_attr *attr, u64 id)
1930 {
1931 	struct intel_pt_synth intel_pt_synth;
1932 
1933 	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1934 	intel_pt_synth.session = session;
1935 
1936 	return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1937 					   &id, intel_pt_event_synth);
1938 }
1939 
1940 static int intel_pt_synth_events(struct intel_pt *pt,
1941 				 struct perf_session *session)
1942 {
1943 	struct perf_evlist *evlist = session->evlist;
1944 	struct perf_evsel *evsel;
1945 	struct perf_event_attr attr;
1946 	bool found = false;
1947 	u64 id;
1948 	int err;
1949 
1950 	evlist__for_each_entry(evlist, evsel) {
1951 		if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1952 			found = true;
1953 			break;
1954 		}
1955 	}
1956 
1957 	if (!found) {
1958 		pr_debug("There are no selected events with Intel Processor Trace data\n");
1959 		return 0;
1960 	}
1961 
1962 	memset(&attr, 0, sizeof(struct perf_event_attr));
1963 	attr.size = sizeof(struct perf_event_attr);
1964 	attr.type = PERF_TYPE_HARDWARE;
1965 	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1966 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1967 			    PERF_SAMPLE_PERIOD;
1968 	if (pt->timeless_decoding)
1969 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1970 	else
1971 		attr.sample_type |= PERF_SAMPLE_TIME;
1972 	if (!pt->per_cpu_mmaps)
1973 		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1974 	attr.exclude_user = evsel->attr.exclude_user;
1975 	attr.exclude_kernel = evsel->attr.exclude_kernel;
1976 	attr.exclude_hv = evsel->attr.exclude_hv;
1977 	attr.exclude_host = evsel->attr.exclude_host;
1978 	attr.exclude_guest = evsel->attr.exclude_guest;
1979 	attr.sample_id_all = evsel->attr.sample_id_all;
1980 	attr.read_format = evsel->attr.read_format;
1981 
1982 	id = evsel->id[0] + 1000000000;
1983 	if (!id)
1984 		id = 1;
1985 
1986 	if (pt->synth_opts.instructions) {
1987 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1988 		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1989 			attr.sample_period =
1990 				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1991 		else
1992 			attr.sample_period = pt->synth_opts.period;
1993 		pt->instructions_sample_period = attr.sample_period;
1994 		if (pt->synth_opts.callchain)
1995 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1996 		if (pt->synth_opts.last_branch)
1997 			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1998 		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1999 			 id, (u64)attr.sample_type);
2000 		err = intel_pt_synth_event(session, &attr, id);
2001 		if (err) {
2002 			pr_err("%s: failed to synthesize 'instructions' event type\n",
2003 			       __func__);
2004 			return err;
2005 		}
2006 		pt->sample_instructions = true;
2007 		pt->instructions_sample_type = attr.sample_type;
2008 		pt->instructions_id = id;
2009 		id += 1;
2010 	}
2011 
2012 	if (pt->synth_opts.transactions) {
2013 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2014 		attr.sample_period = 1;
2015 		if (pt->synth_opts.callchain)
2016 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2017 		if (pt->synth_opts.last_branch)
2018 			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2019 		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2020 			 id, (u64)attr.sample_type);
2021 		err = intel_pt_synth_event(session, &attr, id);
2022 		if (err) {
2023 			pr_err("%s: failed to synthesize 'transactions' event type\n",
2024 			       __func__);
2025 			return err;
2026 		}
2027 		pt->sample_transactions = true;
2028 		pt->transactions_id = id;
2029 		id += 1;
2030 		evlist__for_each_entry(evlist, evsel) {
2031 			if (evsel->id && evsel->id[0] == pt->transactions_id) {
2032 				if (evsel->name)
2033 					zfree(&evsel->name);
2034 				evsel->name = strdup("transactions");
2035 				break;
2036 			}
2037 		}
2038 	}
2039 
2040 	if (pt->synth_opts.branches) {
2041 		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2042 		attr.sample_period = 1;
2043 		attr.sample_type |= PERF_SAMPLE_ADDR;
2044 		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
2045 		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
2046 		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2047 			 id, (u64)attr.sample_type);
2048 		err = intel_pt_synth_event(session, &attr, id);
2049 		if (err) {
2050 			pr_err("%s: failed to synthesize 'branches' event type\n",
2051 			       __func__);
2052 			return err;
2053 		}
2054 		pt->sample_branches = true;
2055 		pt->branches_sample_type = attr.sample_type;
2056 		pt->branches_id = id;
2057 	}
2058 
2059 	pt->synth_needs_swap = evsel->needs_swap;
2060 
2061 	return 0;
2062 }
2063 
2064 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2065 {
2066 	struct perf_evsel *evsel;
2067 
2068 	evlist__for_each_entry_reverse(evlist, evsel) {
2069 		const char *name = perf_evsel__name(evsel);
2070 
2071 		if (!strcmp(name, "sched:sched_switch"))
2072 			return evsel;
2073 	}
2074 
2075 	return NULL;
2076 }
2077 
2078 static bool intel_pt_find_switch(struct perf_evlist *evlist)
2079 {
2080 	struct perf_evsel *evsel;
2081 
2082 	evlist__for_each_entry(evlist, evsel) {
2083 		if (evsel->attr.context_switch)
2084 			return true;
2085 	}
2086 
2087 	return false;
2088 }
2089 
2090 static int intel_pt_perf_config(const char *var, const char *value, void *data)
2091 {
2092 	struct intel_pt *pt = data;
2093 
2094 	if (!strcmp(var, "intel-pt.mispred-all"))
2095 		pt->mispred_all = perf_config_bool(var, value);
2096 
2097 	return 0;
2098 }
2099 
2100 static const char * const intel_pt_info_fmts[] = {
2101 	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
2102 	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
2103 	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
2104 	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
2105 	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
2106 	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
2107 	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
2108 	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
2109 	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
2110 	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
2111 	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
2112 	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
2113 	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
2114 	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
2115 	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
2116 	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
2117 };
2118 
2119 static void intel_pt_print_info(u64 *arr, int start, int finish)
2120 {
2121 	int i;
2122 
2123 	if (!dump_trace)
2124 		return;
2125 
2126 	for (i = start; i <= finish; i++)
2127 		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2128 }
2129 
2130 static void intel_pt_print_info_str(const char *name, const char *str)
2131 {
2132 	if (!dump_trace)
2133 		return;
2134 
2135 	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
2136 }
2137 
2138 static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2139 {
2140 	return auxtrace_info->header.size >=
2141 		sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2142 }
2143 
2144 int intel_pt_process_auxtrace_info(union perf_event *event,
2145 				   struct perf_session *session)
2146 {
2147 	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2148 	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2149 	struct intel_pt *pt;
2150 	void *info_end;
2151 	u64 *info;
2152 	int err;
2153 
2154 	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2155 					min_sz)
2156 		return -EINVAL;
2157 
2158 	pt = zalloc(sizeof(struct intel_pt));
2159 	if (!pt)
2160 		return -ENOMEM;
2161 
2162 	addr_filters__init(&pt->filts);
2163 
2164 	err = perf_config(intel_pt_perf_config, pt);
2165 	if (err)
2166 		goto err_free;
2167 
2168 	err = auxtrace_queues__init(&pt->queues);
2169 	if (err)
2170 		goto err_free;
2171 
2172 	intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2173 
2174 	pt->session = session;
2175 	pt->machine = &session->machines.host; /* No kvm support */
2176 	pt->auxtrace_type = auxtrace_info->type;
2177 	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2178 	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2179 	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2180 	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2181 	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2182 	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2183 	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2184 	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2185 	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2186 	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2187 	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2188 			    INTEL_PT_PER_CPU_MMAPS);
2189 
2190 	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2191 		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2192 		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2193 		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2194 		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2195 		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2196 		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2197 				    INTEL_PT_CYC_BIT);
2198 	}
2199 
2200 	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2201 		pt->max_non_turbo_ratio =
2202 			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2203 		intel_pt_print_info(&auxtrace_info->priv[0],
2204 				    INTEL_PT_MAX_NONTURBO_RATIO,
2205 				    INTEL_PT_MAX_NONTURBO_RATIO);
2206 	}
2207 
2208 	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2209 	info_end = (void *)info + auxtrace_info->header.size;
2210 
2211 	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2212 		size_t len;
2213 
2214 		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2215 		intel_pt_print_info(&auxtrace_info->priv[0],
2216 				    INTEL_PT_FILTER_STR_LEN,
2217 				    INTEL_PT_FILTER_STR_LEN);
2218 		if (len) {
2219 			const char *filter = (const char *)info;
2220 
2221 			len = roundup(len + 1, 8);
2222 			info += len >> 3;
2223 			if ((void *)info > info_end) {
2224 				pr_err("%s: bad filter string length\n", __func__);
2225 				err = -EINVAL;
2226 				goto err_free_queues;
2227 			}
2228 			pt->filter = memdup(filter, len);
2229 			if (!pt->filter) {
2230 				err = -ENOMEM;
2231 				goto err_free_queues;
2232 			}
2233 			if (session->header.needs_swap)
2234 				mem_bswap_64(pt->filter, len);
2235 			if (pt->filter[len - 1]) {
2236 				pr_err("%s: filter string not null terminated\n", __func__);
2237 				err = -EINVAL;
2238 				goto err_free_queues;
2239 			}
2240 			err = addr_filters__parse_bare_filter(&pt->filts,
2241 							      filter);
2242 			if (err)
2243 				goto err_free_queues;
2244 		}
2245 		intel_pt_print_info_str("Filter string", pt->filter);
2246 	}
2247 
2248 	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2249 	pt->have_tsc = intel_pt_have_tsc(pt);
2250 	pt->sampling_mode = false;
2251 	pt->est_tsc = !pt->timeless_decoding;
2252 
2253 	pt->unknown_thread = thread__new(999999999, 999999999);
2254 	if (!pt->unknown_thread) {
2255 		err = -ENOMEM;
2256 		goto err_free_queues;
2257 	}
2258 
2259 	/*
2260 	 * Since this thread will not be kept in any rbtree not in a
2261 	 * list, initialize its list node so that at thread__put() the
2262 	 * current thread lifetime assuption is kept and we don't segfault
2263 	 * at list_del_init().
2264 	 */
2265 	INIT_LIST_HEAD(&pt->unknown_thread->node);
2266 
2267 	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2268 	if (err)
2269 		goto err_delete_thread;
2270 	if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2271 		err = -ENOMEM;
2272 		goto err_delete_thread;
2273 	}
2274 
2275 	pt->auxtrace.process_event = intel_pt_process_event;
2276 	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2277 	pt->auxtrace.flush_events = intel_pt_flush;
2278 	pt->auxtrace.free_events = intel_pt_free_events;
2279 	pt->auxtrace.free = intel_pt_free;
2280 	session->auxtrace = &pt->auxtrace;
2281 
2282 	if (dump_trace)
2283 		return 0;
2284 
2285 	if (pt->have_sched_switch == 1) {
2286 		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2287 		if (!pt->switch_evsel) {
2288 			pr_err("%s: missing sched_switch event\n", __func__);
2289 			err = -EINVAL;
2290 			goto err_delete_thread;
2291 		}
2292 	} else if (pt->have_sched_switch == 2 &&
2293 		   !intel_pt_find_switch(session->evlist)) {
2294 		pr_err("%s: missing context_switch attribute flag\n", __func__);
2295 		err = -EINVAL;
2296 		goto err_delete_thread;
2297 	}
2298 
2299 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2300 		pt->synth_opts = *session->itrace_synth_opts;
2301 	} else {
2302 		itrace_synth_opts__set_default(&pt->synth_opts);
2303 		if (use_browser != -1) {
2304 			pt->synth_opts.branches = false;
2305 			pt->synth_opts.callchain = true;
2306 		}
2307 		if (session->itrace_synth_opts)
2308 			pt->synth_opts.thread_stack =
2309 				session->itrace_synth_opts->thread_stack;
2310 	}
2311 
2312 	if (pt->synth_opts.log)
2313 		intel_pt_log_enable();
2314 
2315 	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
2316 	if (pt->tc.time_mult) {
2317 		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2318 
2319 		if (!pt->max_non_turbo_ratio)
2320 			pt->max_non_turbo_ratio =
2321 					(tsc_freq + 50000000) / 100000000;
2322 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2323 		intel_pt_log("Maximum non-turbo ratio %u\n",
2324 			     pt->max_non_turbo_ratio);
2325 	}
2326 
2327 	if (pt->synth_opts.calls)
2328 		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2329 				       PERF_IP_FLAG_TRACE_END;
2330 	if (pt->synth_opts.returns)
2331 		pt->branches_filter |= PERF_IP_FLAG_RETURN |
2332 				       PERF_IP_FLAG_TRACE_BEGIN;
2333 
2334 	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2335 		symbol_conf.use_callchain = true;
2336 		if (callchain_register_param(&callchain_param) < 0) {
2337 			symbol_conf.use_callchain = false;
2338 			pt->synth_opts.callchain = false;
2339 		}
2340 	}
2341 
2342 	err = intel_pt_synth_events(pt, session);
2343 	if (err)
2344 		goto err_delete_thread;
2345 
2346 	err = auxtrace_queues__process_index(&pt->queues, session);
2347 	if (err)
2348 		goto err_delete_thread;
2349 
2350 	if (pt->queues.populated)
2351 		pt->data_queued = true;
2352 
2353 	if (pt->timeless_decoding)
2354 		pr_debug2("Intel PT decoding without timestamps\n");
2355 
2356 	return 0;
2357 
2358 err_delete_thread:
2359 	thread__zput(pt->unknown_thread);
2360 err_free_queues:
2361 	intel_pt_log_disable();
2362 	auxtrace_queues__free(&pt->queues);
2363 	session->auxtrace = NULL;
2364 err_free:
2365 	addr_filters__exit(&pt->filts);
2366 	zfree(&pt->filter);
2367 	free(pt);
2368 	return err;
2369 }
2370