xref: /openbmc/linux/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c (revision c0ecca6604b80e438b032578634c6e133c7028f6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_pt_decoder.c: Intel Processor Trace support
4  * Copyright (c) 2013-2014, Intel Corporation.
5  */
6 
7 #ifndef _GNU_SOURCE
8 #define _GNU_SOURCE
9 #endif
10 #include <stdlib.h>
11 #include <stdbool.h>
12 #include <string.h>
13 #include <errno.h>
14 #include <stdint.h>
15 #include <inttypes.h>
16 #include <linux/compiler.h>
17 #include <linux/string.h>
18 #include <linux/zalloc.h>
19 
20 #include "../auxtrace.h"
21 
22 #include "intel-pt-insn-decoder.h"
23 #include "intel-pt-pkt-decoder.h"
24 #include "intel-pt-decoder.h"
25 #include "intel-pt-log.h"
26 
27 #define BITULL(x) (1ULL << (x))
28 
29 /* IA32_RTIT_CTL MSR bits */
30 #define INTEL_PT_CYC_ENABLE		BITULL(1)
31 #define INTEL_PT_CYC_THRESHOLD		(BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19))
32 #define INTEL_PT_CYC_THRESHOLD_SHIFT	19
33 
34 #define INTEL_PT_BLK_SIZE 1024
35 
36 #define BIT63 (((uint64_t)1 << 63))
37 
38 #define INTEL_PT_RETURN 1
39 
40 /* Maximum number of loops with no packets consumed i.e. stuck in a loop */
41 #define INTEL_PT_MAX_LOOPS 10000
42 
43 struct intel_pt_blk {
44 	struct intel_pt_blk *prev;
45 	uint64_t ip[INTEL_PT_BLK_SIZE];
46 };
47 
48 struct intel_pt_stack {
49 	struct intel_pt_blk *blk;
50 	struct intel_pt_blk *spare;
51 	int pos;
52 };
53 
54 enum intel_pt_pkt_state {
55 	INTEL_PT_STATE_NO_PSB,
56 	INTEL_PT_STATE_NO_IP,
57 	INTEL_PT_STATE_ERR_RESYNC,
58 	INTEL_PT_STATE_IN_SYNC,
59 	INTEL_PT_STATE_TNT_CONT,
60 	INTEL_PT_STATE_TNT,
61 	INTEL_PT_STATE_TIP,
62 	INTEL_PT_STATE_TIP_PGD,
63 	INTEL_PT_STATE_FUP,
64 	INTEL_PT_STATE_FUP_NO_TIP,
65 	INTEL_PT_STATE_FUP_IN_PSB,
66 	INTEL_PT_STATE_RESAMPLE,
67 };
68 
69 static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
70 {
71 	switch (pkt_state) {
72 	case INTEL_PT_STATE_NO_PSB:
73 	case INTEL_PT_STATE_NO_IP:
74 	case INTEL_PT_STATE_ERR_RESYNC:
75 	case INTEL_PT_STATE_IN_SYNC:
76 	case INTEL_PT_STATE_TNT_CONT:
77 	case INTEL_PT_STATE_RESAMPLE:
78 		return true;
79 	case INTEL_PT_STATE_TNT:
80 	case INTEL_PT_STATE_TIP:
81 	case INTEL_PT_STATE_TIP_PGD:
82 	case INTEL_PT_STATE_FUP:
83 	case INTEL_PT_STATE_FUP_NO_TIP:
84 	case INTEL_PT_STATE_FUP_IN_PSB:
85 		return false;
86 	default:
87 		return true;
88 	};
89 }
90 
91 #ifdef INTEL_PT_STRICT
92 #define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
93 #define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
94 #define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_NO_PSB
95 #define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_NO_PSB
96 #else
97 #define INTEL_PT_STATE_ERR1	(decoder->pkt_state)
98 #define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_IP
99 #define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_ERR_RESYNC
100 #define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_IN_SYNC
101 #endif
102 
103 struct intel_pt_decoder {
104 	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
105 	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
106 			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
107 			 uint64_t max_insn_cnt, void *data);
108 	bool (*pgd_ip)(uint64_t ip, void *data);
109 	int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
110 	void *data;
111 	struct intel_pt_state state;
112 	const unsigned char *buf;
113 	size_t len;
114 	bool return_compression;
115 	bool branch_enable;
116 	bool mtc_insn;
117 	bool pge;
118 	bool have_tma;
119 	bool have_cyc;
120 	bool fixup_last_mtc;
121 	bool have_last_ip;
122 	bool in_psb;
123 	bool hop;
124 	bool leap;
125 	bool nr;
126 	bool next_nr;
127 	enum intel_pt_param_flags flags;
128 	uint64_t pos;
129 	uint64_t last_ip;
130 	uint64_t ip;
131 	uint64_t pip_payload;
132 	uint64_t timestamp;
133 	uint64_t tsc_timestamp;
134 	uint64_t ref_timestamp;
135 	uint64_t buf_timestamp;
136 	uint64_t sample_timestamp;
137 	uint64_t ret_addr;
138 	uint64_t ctc_timestamp;
139 	uint64_t ctc_delta;
140 	uint64_t cycle_cnt;
141 	uint64_t cyc_ref_timestamp;
142 	uint32_t last_mtc;
143 	uint32_t tsc_ctc_ratio_n;
144 	uint32_t tsc_ctc_ratio_d;
145 	uint32_t tsc_ctc_mult;
146 	uint32_t tsc_slip;
147 	uint32_t ctc_rem_mask;
148 	int mtc_shift;
149 	struct intel_pt_stack stack;
150 	enum intel_pt_pkt_state pkt_state;
151 	enum intel_pt_pkt_ctx pkt_ctx;
152 	enum intel_pt_pkt_ctx prev_pkt_ctx;
153 	enum intel_pt_blk_type blk_type;
154 	int blk_type_pos;
155 	struct intel_pt_pkt packet;
156 	struct intel_pt_pkt tnt;
157 	int pkt_step;
158 	int pkt_len;
159 	int last_packet_type;
160 	unsigned int cbr;
161 	unsigned int cbr_seen;
162 	unsigned int max_non_turbo_ratio;
163 	double max_non_turbo_ratio_fp;
164 	double cbr_cyc_to_tsc;
165 	double calc_cyc_to_tsc;
166 	bool have_calc_cyc_to_tsc;
167 	int exec_mode;
168 	unsigned int insn_bytes;
169 	uint64_t period;
170 	enum intel_pt_period_type period_type;
171 	uint64_t tot_insn_cnt;
172 	uint64_t period_insn_cnt;
173 	uint64_t period_mask;
174 	uint64_t period_ticks;
175 	uint64_t last_masked_timestamp;
176 	uint64_t tot_cyc_cnt;
177 	uint64_t sample_tot_cyc_cnt;
178 	uint64_t base_cyc_cnt;
179 	uint64_t cyc_cnt_timestamp;
180 	uint64_t ctl;
181 	uint64_t cyc_threshold;
182 	double tsc_to_cyc;
183 	bool continuous_period;
184 	bool overflow;
185 	bool set_fup_tx_flags;
186 	bool set_fup_ptw;
187 	bool set_fup_mwait;
188 	bool set_fup_pwre;
189 	bool set_fup_exstop;
190 	bool set_fup_bep;
191 	bool sample_cyc;
192 	unsigned int fup_tx_flags;
193 	unsigned int tx_flags;
194 	uint64_t fup_ptw_payload;
195 	uint64_t fup_mwait_payload;
196 	uint64_t fup_pwre_payload;
197 	uint64_t cbr_payload;
198 	uint64_t timestamp_insn_cnt;
199 	uint64_t sample_insn_cnt;
200 	uint64_t stuck_ip;
201 	int no_progress;
202 	int stuck_ip_prd;
203 	int stuck_ip_cnt;
204 	uint64_t psb_ip;
205 	const unsigned char *next_buf;
206 	size_t next_len;
207 	unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
208 };
209 
210 static uint64_t intel_pt_lower_power_of_2(uint64_t x)
211 {
212 	int i;
213 
214 	for (i = 0; x != 1; i++)
215 		x >>= 1;
216 
217 	return x << i;
218 }
219 
220 static uint64_t intel_pt_cyc_threshold(uint64_t ctl)
221 {
222 	if (!(ctl & INTEL_PT_CYC_ENABLE))
223 		return 0;
224 
225 	return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT;
226 }
227 
228 static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
229 {
230 	if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
231 		uint64_t period;
232 
233 		period = intel_pt_lower_power_of_2(decoder->period);
234 		decoder->period_mask  = ~(period - 1);
235 		decoder->period_ticks = period;
236 	}
237 }
238 
239 static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
240 {
241 	if (!d)
242 		return 0;
243 	return (t / d) * n + ((t % d) * n) / d;
244 }
245 
246 struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
247 {
248 	struct intel_pt_decoder *decoder;
249 
250 	if (!params->get_trace || !params->walk_insn)
251 		return NULL;
252 
253 	decoder = zalloc(sizeof(struct intel_pt_decoder));
254 	if (!decoder)
255 		return NULL;
256 
257 	decoder->get_trace          = params->get_trace;
258 	decoder->walk_insn          = params->walk_insn;
259 	decoder->pgd_ip             = params->pgd_ip;
260 	decoder->lookahead          = params->lookahead;
261 	decoder->data               = params->data;
262 	decoder->return_compression = params->return_compression;
263 	decoder->branch_enable      = params->branch_enable;
264 	decoder->hop                = params->quick >= 1;
265 	decoder->leap               = params->quick >= 2;
266 
267 	decoder->flags              = params->flags;
268 
269 	decoder->ctl                = params->ctl;
270 	decoder->period             = params->period;
271 	decoder->period_type        = params->period_type;
272 
273 	decoder->max_non_turbo_ratio    = params->max_non_turbo_ratio;
274 	decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
275 
276 	decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl);
277 
278 	intel_pt_setup_period(decoder);
279 
280 	decoder->mtc_shift = params->mtc_period;
281 	decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
282 
283 	decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
284 	decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
285 
286 	if (!decoder->tsc_ctc_ratio_n)
287 		decoder->tsc_ctc_ratio_d = 0;
288 
289 	if (decoder->tsc_ctc_ratio_d) {
290 		if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
291 			decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
292 						decoder->tsc_ctc_ratio_d;
293 	}
294 
295 	/*
296 	 * A TSC packet can slip past MTC packets so that the timestamp appears
297 	 * to go backwards. One estimate is that can be up to about 40 CPU
298 	 * cycles, which is certainly less than 0x1000 TSC ticks, but accept
299 	 * slippage an order of magnitude more to be on the safe side.
300 	 */
301 	decoder->tsc_slip = 0x10000;
302 
303 	intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
304 	intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
305 	intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
306 	intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
307 	intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
308 
309 	if (decoder->hop)
310 		intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
311 
312 	return decoder;
313 }
314 
315 static void intel_pt_pop_blk(struct intel_pt_stack *stack)
316 {
317 	struct intel_pt_blk *blk = stack->blk;
318 
319 	stack->blk = blk->prev;
320 	if (!stack->spare)
321 		stack->spare = blk;
322 	else
323 		free(blk);
324 }
325 
326 static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
327 {
328 	if (!stack->pos) {
329 		if (!stack->blk)
330 			return 0;
331 		intel_pt_pop_blk(stack);
332 		if (!stack->blk)
333 			return 0;
334 		stack->pos = INTEL_PT_BLK_SIZE;
335 	}
336 	return stack->blk->ip[--stack->pos];
337 }
338 
339 static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
340 {
341 	struct intel_pt_blk *blk;
342 
343 	if (stack->spare) {
344 		blk = stack->spare;
345 		stack->spare = NULL;
346 	} else {
347 		blk = malloc(sizeof(struct intel_pt_blk));
348 		if (!blk)
349 			return -ENOMEM;
350 	}
351 
352 	blk->prev = stack->blk;
353 	stack->blk = blk;
354 	stack->pos = 0;
355 	return 0;
356 }
357 
358 static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
359 {
360 	int err;
361 
362 	if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
363 		err = intel_pt_alloc_blk(stack);
364 		if (err)
365 			return err;
366 	}
367 
368 	stack->blk->ip[stack->pos++] = ip;
369 	return 0;
370 }
371 
372 static void intel_pt_clear_stack(struct intel_pt_stack *stack)
373 {
374 	while (stack->blk)
375 		intel_pt_pop_blk(stack);
376 	stack->pos = 0;
377 }
378 
379 static void intel_pt_free_stack(struct intel_pt_stack *stack)
380 {
381 	intel_pt_clear_stack(stack);
382 	zfree(&stack->blk);
383 	zfree(&stack->spare);
384 }
385 
386 void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
387 {
388 	intel_pt_free_stack(&decoder->stack);
389 	free(decoder);
390 }
391 
392 static int intel_pt_ext_err(int code)
393 {
394 	switch (code) {
395 	case -ENOMEM:
396 		return INTEL_PT_ERR_NOMEM;
397 	case -ENOSYS:
398 		return INTEL_PT_ERR_INTERN;
399 	case -EBADMSG:
400 		return INTEL_PT_ERR_BADPKT;
401 	case -ENODATA:
402 		return INTEL_PT_ERR_NODATA;
403 	case -EILSEQ:
404 		return INTEL_PT_ERR_NOINSN;
405 	case -ENOENT:
406 		return INTEL_PT_ERR_MISMAT;
407 	case -EOVERFLOW:
408 		return INTEL_PT_ERR_OVR;
409 	case -ENOSPC:
410 		return INTEL_PT_ERR_LOST;
411 	case -ELOOP:
412 		return INTEL_PT_ERR_NELOOP;
413 	default:
414 		return INTEL_PT_ERR_UNK;
415 	}
416 }
417 
418 static const char *intel_pt_err_msgs[] = {
419 	[INTEL_PT_ERR_NOMEM]  = "Memory allocation failed",
420 	[INTEL_PT_ERR_INTERN] = "Internal error",
421 	[INTEL_PT_ERR_BADPKT] = "Bad packet",
422 	[INTEL_PT_ERR_NODATA] = "No more data",
423 	[INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
424 	[INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
425 	[INTEL_PT_ERR_OVR]    = "Overflow packet",
426 	[INTEL_PT_ERR_LOST]   = "Lost trace data",
427 	[INTEL_PT_ERR_UNK]    = "Unknown error!",
428 	[INTEL_PT_ERR_NELOOP] = "Never-ending loop",
429 };
430 
431 int intel_pt__strerror(int code, char *buf, size_t buflen)
432 {
433 	if (code < 1 || code >= INTEL_PT_ERR_MAX)
434 		code = INTEL_PT_ERR_UNK;
435 	strlcpy(buf, intel_pt_err_msgs[code], buflen);
436 	return 0;
437 }
438 
439 static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
440 				 uint64_t last_ip)
441 {
442 	uint64_t ip;
443 
444 	switch (packet->count) {
445 	case 1:
446 		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
447 		     packet->payload;
448 		break;
449 	case 2:
450 		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
451 		     packet->payload;
452 		break;
453 	case 3:
454 		ip = packet->payload;
455 		/* Sign-extend 6-byte ip */
456 		if (ip & (uint64_t)0x800000000000ULL)
457 			ip |= (uint64_t)0xffff000000000000ULL;
458 		break;
459 	case 4:
460 		ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
461 		     packet->payload;
462 		break;
463 	case 6:
464 		ip = packet->payload;
465 		break;
466 	default:
467 		return 0;
468 	}
469 
470 	return ip;
471 }
472 
473 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
474 {
475 	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
476 	decoder->have_last_ip = true;
477 }
478 
479 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
480 {
481 	intel_pt_set_last_ip(decoder);
482 	decoder->ip = decoder->last_ip;
483 }
484 
485 static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
486 {
487 	intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
488 			    decoder->buf);
489 }
490 
491 static int intel_pt_bug(struct intel_pt_decoder *decoder)
492 {
493 	intel_pt_log("ERROR: Internal error\n");
494 	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
495 	return -ENOSYS;
496 }
497 
498 static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
499 {
500 	decoder->tx_flags = 0;
501 }
502 
503 static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
504 {
505 	decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
506 }
507 
508 static inline void intel_pt_update_pip(struct intel_pt_decoder *decoder)
509 {
510 	decoder->pip_payload = decoder->packet.payload;
511 }
512 
513 static inline void intel_pt_update_nr(struct intel_pt_decoder *decoder)
514 {
515 	decoder->next_nr = decoder->pip_payload & 1;
516 }
517 
518 static inline void intel_pt_set_nr(struct intel_pt_decoder *decoder)
519 {
520 	decoder->nr = decoder->pip_payload & 1;
521 	decoder->next_nr = decoder->nr;
522 }
523 
524 static inline void intel_pt_set_pip(struct intel_pt_decoder *decoder)
525 {
526 	intel_pt_update_pip(decoder);
527 	intel_pt_set_nr(decoder);
528 }
529 
530 static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
531 {
532 	intel_pt_clear_tx_flags(decoder);
533 	decoder->have_tma = false;
534 	decoder->pkt_len = 1;
535 	decoder->pkt_step = 1;
536 	intel_pt_decoder_log_packet(decoder);
537 	if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
538 		intel_pt_log("ERROR: Bad packet\n");
539 		decoder->pkt_state = INTEL_PT_STATE_ERR1;
540 	}
541 	return -EBADMSG;
542 }
543 
544 static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
545 {
546 	decoder->sample_timestamp = decoder->timestamp;
547 	decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
548 }
549 
550 static void intel_pt_reposition(struct intel_pt_decoder *decoder)
551 {
552 	decoder->ip = 0;
553 	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
554 	decoder->timestamp = 0;
555 	decoder->have_tma = false;
556 }
557 
558 static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
559 {
560 	struct intel_pt_buffer buffer = { .buf = 0, };
561 	int ret;
562 
563 	decoder->pkt_step = 0;
564 
565 	intel_pt_log("Getting more data\n");
566 	ret = decoder->get_trace(&buffer, decoder->data);
567 	if (ret)
568 		return ret;
569 	decoder->buf = buffer.buf;
570 	decoder->len = buffer.len;
571 	if (!decoder->len) {
572 		intel_pt_log("No more data\n");
573 		return -ENODATA;
574 	}
575 	decoder->buf_timestamp = buffer.ref_timestamp;
576 	if (!buffer.consecutive || reposition) {
577 		intel_pt_reposition(decoder);
578 		decoder->ref_timestamp = buffer.ref_timestamp;
579 		decoder->state.trace_nr = buffer.trace_nr;
580 		intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
581 			     decoder->ref_timestamp);
582 		return -ENOLINK;
583 	}
584 
585 	return 0;
586 }
587 
588 static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
589 				  bool reposition)
590 {
591 	if (!decoder->next_buf)
592 		return intel_pt_get_data(decoder, reposition);
593 
594 	decoder->buf = decoder->next_buf;
595 	decoder->len = decoder->next_len;
596 	decoder->next_buf = 0;
597 	decoder->next_len = 0;
598 	return 0;
599 }
600 
601 static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
602 {
603 	unsigned char *buf = decoder->temp_buf;
604 	size_t old_len, len, n;
605 	int ret;
606 
607 	old_len = decoder->len;
608 	len = decoder->len;
609 	memcpy(buf, decoder->buf, len);
610 
611 	ret = intel_pt_get_data(decoder, false);
612 	if (ret) {
613 		decoder->pos += old_len;
614 		return ret < 0 ? ret : -EINVAL;
615 	}
616 
617 	n = INTEL_PT_PKT_MAX_SZ - len;
618 	if (n > decoder->len)
619 		n = decoder->len;
620 	memcpy(buf + len, decoder->buf, n);
621 	len += n;
622 
623 	decoder->prev_pkt_ctx = decoder->pkt_ctx;
624 	ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
625 	if (ret < (int)old_len) {
626 		decoder->next_buf = decoder->buf;
627 		decoder->next_len = decoder->len;
628 		decoder->buf = buf;
629 		decoder->len = old_len;
630 		return intel_pt_bad_packet(decoder);
631 	}
632 
633 	decoder->next_buf = decoder->buf + (ret - old_len);
634 	decoder->next_len = decoder->len - (ret - old_len);
635 
636 	decoder->buf = buf;
637 	decoder->len = ret;
638 
639 	return ret;
640 }
641 
642 struct intel_pt_pkt_info {
643 	struct intel_pt_decoder	  *decoder;
644 	struct intel_pt_pkt       packet;
645 	uint64_t                  pos;
646 	int                       pkt_len;
647 	int                       last_packet_type;
648 	void                      *data;
649 };
650 
651 typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
652 
653 /* Lookahead packets in current buffer */
654 static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
655 				  intel_pt_pkt_cb_t cb, void *data)
656 {
657 	struct intel_pt_pkt_info pkt_info;
658 	const unsigned char *buf = decoder->buf;
659 	enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
660 	size_t len = decoder->len;
661 	int ret;
662 
663 	pkt_info.decoder          = decoder;
664 	pkt_info.pos              = decoder->pos;
665 	pkt_info.pkt_len          = decoder->pkt_step;
666 	pkt_info.last_packet_type = decoder->last_packet_type;
667 	pkt_info.data             = data;
668 
669 	while (1) {
670 		do {
671 			pkt_info.pos += pkt_info.pkt_len;
672 			buf          += pkt_info.pkt_len;
673 			len          -= pkt_info.pkt_len;
674 
675 			if (!len)
676 				return INTEL_PT_NEED_MORE_BYTES;
677 
678 			ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
679 						  &pkt_ctx);
680 			if (!ret)
681 				return INTEL_PT_NEED_MORE_BYTES;
682 			if (ret < 0)
683 				return ret;
684 
685 			pkt_info.pkt_len = ret;
686 		} while (pkt_info.packet.type == INTEL_PT_PAD);
687 
688 		ret = cb(&pkt_info);
689 		if (ret)
690 			return 0;
691 
692 		pkt_info.last_packet_type = pkt_info.packet.type;
693 	}
694 }
695 
696 struct intel_pt_calc_cyc_to_tsc_info {
697 	uint64_t        cycle_cnt;
698 	unsigned int    cbr;
699 	uint32_t        last_mtc;
700 	uint64_t        ctc_timestamp;
701 	uint64_t        ctc_delta;
702 	uint64_t        tsc_timestamp;
703 	uint64_t        timestamp;
704 	bool            have_tma;
705 	bool            fixup_last_mtc;
706 	bool            from_mtc;
707 	double          cbr_cyc_to_tsc;
708 };
709 
710 /*
711  * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
712  * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
713  * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
714  * packet by copying the missing bits from the current MTC assuming the least
715  * difference between the two, and that the current MTC comes after last_mtc.
716  */
717 static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
718 				    uint32_t *last_mtc)
719 {
720 	uint32_t first_missing_bit = 1U << (16 - mtc_shift);
721 	uint32_t mask = ~(first_missing_bit - 1);
722 
723 	*last_mtc |= mtc & mask;
724 	if (*last_mtc >= mtc) {
725 		*last_mtc -= first_missing_bit;
726 		*last_mtc &= 0xff;
727 	}
728 }
729 
730 static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
731 {
732 	struct intel_pt_decoder *decoder = pkt_info->decoder;
733 	struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
734 	uint64_t timestamp;
735 	double cyc_to_tsc;
736 	unsigned int cbr;
737 	uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
738 
739 	switch (pkt_info->packet.type) {
740 	case INTEL_PT_TNT:
741 	case INTEL_PT_TIP_PGE:
742 	case INTEL_PT_TIP:
743 	case INTEL_PT_FUP:
744 	case INTEL_PT_PSB:
745 	case INTEL_PT_PIP:
746 	case INTEL_PT_MODE_EXEC:
747 	case INTEL_PT_MODE_TSX:
748 	case INTEL_PT_PSBEND:
749 	case INTEL_PT_PAD:
750 	case INTEL_PT_VMCS:
751 	case INTEL_PT_MNT:
752 	case INTEL_PT_PTWRITE:
753 	case INTEL_PT_PTWRITE_IP:
754 	case INTEL_PT_BBP:
755 	case INTEL_PT_BIP:
756 	case INTEL_PT_BEP:
757 	case INTEL_PT_BEP_IP:
758 		return 0;
759 
760 	case INTEL_PT_MTC:
761 		if (!data->have_tma)
762 			return 0;
763 
764 		mtc = pkt_info->packet.payload;
765 		if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
766 			data->fixup_last_mtc = false;
767 			intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
768 						&data->last_mtc);
769 		}
770 		if (mtc > data->last_mtc)
771 			mtc_delta = mtc - data->last_mtc;
772 		else
773 			mtc_delta = mtc + 256 - data->last_mtc;
774 		data->ctc_delta += mtc_delta << decoder->mtc_shift;
775 		data->last_mtc = mtc;
776 
777 		if (decoder->tsc_ctc_mult) {
778 			timestamp = data->ctc_timestamp +
779 				data->ctc_delta * decoder->tsc_ctc_mult;
780 		} else {
781 			timestamp = data->ctc_timestamp +
782 				multdiv(data->ctc_delta,
783 					decoder->tsc_ctc_ratio_n,
784 					decoder->tsc_ctc_ratio_d);
785 		}
786 
787 		if (timestamp < data->timestamp)
788 			return 1;
789 
790 		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
791 			data->timestamp = timestamp;
792 			return 0;
793 		}
794 
795 		break;
796 
797 	case INTEL_PT_TSC:
798 		/*
799 		 * For now, do not support using TSC packets - refer
800 		 * intel_pt_calc_cyc_to_tsc().
801 		 */
802 		if (data->from_mtc)
803 			return 1;
804 		timestamp = pkt_info->packet.payload |
805 			    (data->timestamp & (0xffULL << 56));
806 		if (data->from_mtc && timestamp < data->timestamp &&
807 		    data->timestamp - timestamp < decoder->tsc_slip)
808 			return 1;
809 		if (timestamp < data->timestamp)
810 			timestamp += (1ULL << 56);
811 		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
812 			if (data->from_mtc)
813 				return 1;
814 			data->tsc_timestamp = timestamp;
815 			data->timestamp = timestamp;
816 			return 0;
817 		}
818 		break;
819 
820 	case INTEL_PT_TMA:
821 		if (data->from_mtc)
822 			return 1;
823 
824 		if (!decoder->tsc_ctc_ratio_d)
825 			return 0;
826 
827 		ctc = pkt_info->packet.payload;
828 		fc = pkt_info->packet.count;
829 		ctc_rem = ctc & decoder->ctc_rem_mask;
830 
831 		data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
832 
833 		data->ctc_timestamp = data->tsc_timestamp - fc;
834 		if (decoder->tsc_ctc_mult) {
835 			data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
836 		} else {
837 			data->ctc_timestamp -=
838 				multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
839 					decoder->tsc_ctc_ratio_d);
840 		}
841 
842 		data->ctc_delta = 0;
843 		data->have_tma = true;
844 		data->fixup_last_mtc = true;
845 
846 		return 0;
847 
848 	case INTEL_PT_CYC:
849 		data->cycle_cnt += pkt_info->packet.payload;
850 		return 0;
851 
852 	case INTEL_PT_CBR:
853 		cbr = pkt_info->packet.payload;
854 		if (data->cbr && data->cbr != cbr)
855 			return 1;
856 		data->cbr = cbr;
857 		data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
858 		return 0;
859 
860 	case INTEL_PT_TIP_PGD:
861 	case INTEL_PT_TRACESTOP:
862 	case INTEL_PT_EXSTOP:
863 	case INTEL_PT_EXSTOP_IP:
864 	case INTEL_PT_MWAIT:
865 	case INTEL_PT_PWRE:
866 	case INTEL_PT_PWRX:
867 	case INTEL_PT_OVF:
868 	case INTEL_PT_BAD: /* Does not happen */
869 	default:
870 		return 1;
871 	}
872 
873 	if (!data->cbr && decoder->cbr) {
874 		data->cbr = decoder->cbr;
875 		data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
876 	}
877 
878 	if (!data->cycle_cnt)
879 		return 1;
880 
881 	cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
882 
883 	if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
884 	    cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
885 		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
886 			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
887 		return 1;
888 	}
889 
890 	decoder->calc_cyc_to_tsc = cyc_to_tsc;
891 	decoder->have_calc_cyc_to_tsc = true;
892 
893 	if (data->cbr) {
894 		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
895 			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
896 	} else {
897 		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
898 			     cyc_to_tsc, pkt_info->pos);
899 	}
900 
901 	return 1;
902 }
903 
904 static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
905 				     bool from_mtc)
906 {
907 	struct intel_pt_calc_cyc_to_tsc_info data = {
908 		.cycle_cnt      = 0,
909 		.cbr            = 0,
910 		.last_mtc       = decoder->last_mtc,
911 		.ctc_timestamp  = decoder->ctc_timestamp,
912 		.ctc_delta      = decoder->ctc_delta,
913 		.tsc_timestamp  = decoder->tsc_timestamp,
914 		.timestamp      = decoder->timestamp,
915 		.have_tma       = decoder->have_tma,
916 		.fixup_last_mtc = decoder->fixup_last_mtc,
917 		.from_mtc       = from_mtc,
918 		.cbr_cyc_to_tsc = 0,
919 	};
920 
921 	/*
922 	 * For now, do not support using TSC packets for at least the reasons:
923 	 * 1) timing might have stopped
924 	 * 2) TSC packets within PSB+ can slip against CYC packets
925 	 */
926 	if (!from_mtc)
927 		return;
928 
929 	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
930 }
931 
932 static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
933 {
934 	int ret;
935 
936 	decoder->last_packet_type = decoder->packet.type;
937 
938 	do {
939 		decoder->pos += decoder->pkt_step;
940 		decoder->buf += decoder->pkt_step;
941 		decoder->len -= decoder->pkt_step;
942 
943 		if (!decoder->len) {
944 			ret = intel_pt_get_next_data(decoder, false);
945 			if (ret)
946 				return ret;
947 		}
948 
949 		decoder->prev_pkt_ctx = decoder->pkt_ctx;
950 		ret = intel_pt_get_packet(decoder->buf, decoder->len,
951 					  &decoder->packet, &decoder->pkt_ctx);
952 		if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
953 		    decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
954 			ret = intel_pt_get_split_packet(decoder);
955 			if (ret < 0)
956 				return ret;
957 		}
958 		if (ret <= 0)
959 			return intel_pt_bad_packet(decoder);
960 
961 		decoder->pkt_len = ret;
962 		decoder->pkt_step = ret;
963 		intel_pt_decoder_log_packet(decoder);
964 	} while (decoder->packet.type == INTEL_PT_PAD);
965 
966 	return 0;
967 }
968 
969 static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
970 {
971 	uint64_t timestamp, masked_timestamp;
972 
973 	timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
974 	masked_timestamp = timestamp & decoder->period_mask;
975 	if (decoder->continuous_period) {
976 		if (masked_timestamp > decoder->last_masked_timestamp)
977 			return 1;
978 	} else {
979 		timestamp += 1;
980 		masked_timestamp = timestamp & decoder->period_mask;
981 		if (masked_timestamp > decoder->last_masked_timestamp) {
982 			decoder->last_masked_timestamp = masked_timestamp;
983 			decoder->continuous_period = true;
984 		}
985 	}
986 
987 	if (masked_timestamp < decoder->last_masked_timestamp)
988 		return decoder->period_ticks;
989 
990 	return decoder->period_ticks - (timestamp - masked_timestamp);
991 }
992 
993 static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
994 {
995 	switch (decoder->period_type) {
996 	case INTEL_PT_PERIOD_INSTRUCTIONS:
997 		return decoder->period - decoder->period_insn_cnt;
998 	case INTEL_PT_PERIOD_TICKS:
999 		return intel_pt_next_period(decoder);
1000 	case INTEL_PT_PERIOD_NONE:
1001 	case INTEL_PT_PERIOD_MTC:
1002 	default:
1003 		return 0;
1004 	}
1005 }
1006 
1007 static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
1008 {
1009 	uint64_t timestamp, masked_timestamp;
1010 
1011 	switch (decoder->period_type) {
1012 	case INTEL_PT_PERIOD_INSTRUCTIONS:
1013 		decoder->period_insn_cnt = 0;
1014 		break;
1015 	case INTEL_PT_PERIOD_TICKS:
1016 		timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
1017 		masked_timestamp = timestamp & decoder->period_mask;
1018 		if (masked_timestamp > decoder->last_masked_timestamp)
1019 			decoder->last_masked_timestamp = masked_timestamp;
1020 		else
1021 			decoder->last_masked_timestamp += decoder->period_ticks;
1022 		break;
1023 	case INTEL_PT_PERIOD_NONE:
1024 	case INTEL_PT_PERIOD_MTC:
1025 	default:
1026 		break;
1027 	}
1028 
1029 	decoder->state.type |= INTEL_PT_INSTRUCTION;
1030 }
1031 
1032 static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
1033 			      struct intel_pt_insn *intel_pt_insn, uint64_t ip)
1034 {
1035 	uint64_t max_insn_cnt, insn_cnt = 0;
1036 	int err;
1037 
1038 	if (!decoder->mtc_insn)
1039 		decoder->mtc_insn = true;
1040 
1041 	max_insn_cnt = intel_pt_next_sample(decoder);
1042 
1043 	err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
1044 				 max_insn_cnt, decoder->data);
1045 
1046 	decoder->tot_insn_cnt += insn_cnt;
1047 	decoder->timestamp_insn_cnt += insn_cnt;
1048 	decoder->sample_insn_cnt += insn_cnt;
1049 	decoder->period_insn_cnt += insn_cnt;
1050 
1051 	if (err) {
1052 		decoder->no_progress = 0;
1053 		decoder->pkt_state = INTEL_PT_STATE_ERR2;
1054 		intel_pt_log_at("ERROR: Failed to get instruction",
1055 				decoder->ip);
1056 		if (err == -ENOENT)
1057 			return -ENOLINK;
1058 		return -EILSEQ;
1059 	}
1060 
1061 	if (ip && decoder->ip == ip) {
1062 		err = -EAGAIN;
1063 		goto out;
1064 	}
1065 
1066 	if (max_insn_cnt && insn_cnt >= max_insn_cnt)
1067 		intel_pt_sample_insn(decoder);
1068 
1069 	if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
1070 		decoder->state.type = INTEL_PT_INSTRUCTION;
1071 		decoder->state.from_ip = decoder->ip;
1072 		decoder->state.to_ip = 0;
1073 		decoder->ip += intel_pt_insn->length;
1074 		err = INTEL_PT_RETURN;
1075 		goto out;
1076 	}
1077 
1078 	if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
1079 		/* Zero-length calls are excluded */
1080 		if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
1081 		    intel_pt_insn->rel) {
1082 			err = intel_pt_push(&decoder->stack, decoder->ip +
1083 					    intel_pt_insn->length);
1084 			if (err)
1085 				goto out;
1086 		}
1087 	} else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
1088 		decoder->ret_addr = intel_pt_pop(&decoder->stack);
1089 	}
1090 
1091 	if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
1092 		int cnt = decoder->no_progress++;
1093 
1094 		decoder->state.from_ip = decoder->ip;
1095 		decoder->ip += intel_pt_insn->length +
1096 				intel_pt_insn->rel;
1097 		decoder->state.to_ip = decoder->ip;
1098 		err = INTEL_PT_RETURN;
1099 
1100 		/*
1101 		 * Check for being stuck in a loop.  This can happen if a
1102 		 * decoder error results in the decoder erroneously setting the
1103 		 * ip to an address that is itself in an infinite loop that
1104 		 * consumes no packets.  When that happens, there must be an
1105 		 * unconditional branch.
1106 		 */
1107 		if (cnt) {
1108 			if (cnt == 1) {
1109 				decoder->stuck_ip = decoder->state.to_ip;
1110 				decoder->stuck_ip_prd = 1;
1111 				decoder->stuck_ip_cnt = 1;
1112 			} else if (cnt > INTEL_PT_MAX_LOOPS ||
1113 				   decoder->state.to_ip == decoder->stuck_ip) {
1114 				intel_pt_log_at("ERROR: Never-ending loop",
1115 						decoder->state.to_ip);
1116 				decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1117 				err = -ELOOP;
1118 				goto out;
1119 			} else if (!--decoder->stuck_ip_cnt) {
1120 				decoder->stuck_ip_prd += 1;
1121 				decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
1122 				decoder->stuck_ip = decoder->state.to_ip;
1123 			}
1124 		}
1125 		goto out_no_progress;
1126 	}
1127 out:
1128 	decoder->no_progress = 0;
1129 out_no_progress:
1130 	decoder->state.insn_op = intel_pt_insn->op;
1131 	decoder->state.insn_len = intel_pt_insn->length;
1132 	memcpy(decoder->state.insn, intel_pt_insn->buf,
1133 	       INTEL_PT_INSN_BUF_SZ);
1134 
1135 	if (decoder->tx_flags & INTEL_PT_IN_TX)
1136 		decoder->state.flags |= INTEL_PT_IN_TX;
1137 
1138 	return err;
1139 }
1140 
1141 static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
1142 {
1143 	bool ret = false;
1144 
1145 	if (decoder->set_fup_tx_flags) {
1146 		decoder->set_fup_tx_flags = false;
1147 		decoder->tx_flags = decoder->fup_tx_flags;
1148 		decoder->state.type = INTEL_PT_TRANSACTION;
1149 		if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
1150 			decoder->state.type |= INTEL_PT_BRANCH;
1151 		decoder->state.from_ip = decoder->ip;
1152 		decoder->state.to_ip = 0;
1153 		decoder->state.flags = decoder->fup_tx_flags;
1154 		return true;
1155 	}
1156 	if (decoder->set_fup_ptw) {
1157 		decoder->set_fup_ptw = false;
1158 		decoder->state.type = INTEL_PT_PTW;
1159 		decoder->state.flags |= INTEL_PT_FUP_IP;
1160 		decoder->state.from_ip = decoder->ip;
1161 		decoder->state.to_ip = 0;
1162 		decoder->state.ptw_payload = decoder->fup_ptw_payload;
1163 		return true;
1164 	}
1165 	if (decoder->set_fup_mwait) {
1166 		decoder->set_fup_mwait = false;
1167 		decoder->state.type = INTEL_PT_MWAIT_OP;
1168 		decoder->state.from_ip = decoder->ip;
1169 		decoder->state.to_ip = 0;
1170 		decoder->state.mwait_payload = decoder->fup_mwait_payload;
1171 		ret = true;
1172 	}
1173 	if (decoder->set_fup_pwre) {
1174 		decoder->set_fup_pwre = false;
1175 		decoder->state.type |= INTEL_PT_PWR_ENTRY;
1176 		decoder->state.type &= ~INTEL_PT_BRANCH;
1177 		decoder->state.from_ip = decoder->ip;
1178 		decoder->state.to_ip = 0;
1179 		decoder->state.pwre_payload = decoder->fup_pwre_payload;
1180 		ret = true;
1181 	}
1182 	if (decoder->set_fup_exstop) {
1183 		decoder->set_fup_exstop = false;
1184 		decoder->state.type |= INTEL_PT_EX_STOP;
1185 		decoder->state.type &= ~INTEL_PT_BRANCH;
1186 		decoder->state.flags |= INTEL_PT_FUP_IP;
1187 		decoder->state.from_ip = decoder->ip;
1188 		decoder->state.to_ip = 0;
1189 		ret = true;
1190 	}
1191 	if (decoder->set_fup_bep) {
1192 		decoder->set_fup_bep = false;
1193 		decoder->state.type |= INTEL_PT_BLK_ITEMS;
1194 		decoder->state.type &= ~INTEL_PT_BRANCH;
1195 		decoder->state.from_ip = decoder->ip;
1196 		decoder->state.to_ip = 0;
1197 		ret = true;
1198 	}
1199 	return ret;
1200 }
1201 
1202 static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
1203 					  struct intel_pt_insn *intel_pt_insn,
1204 					  uint64_t ip, int err)
1205 {
1206 	return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
1207 	       intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
1208 	       ip == decoder->ip + intel_pt_insn->length;
1209 }
1210 
1211 static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
1212 {
1213 	struct intel_pt_insn intel_pt_insn;
1214 	uint64_t ip;
1215 	int err;
1216 
1217 	ip = decoder->last_ip;
1218 
1219 	while (1) {
1220 		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
1221 		if (err == INTEL_PT_RETURN)
1222 			return 0;
1223 		if (err == -EAGAIN ||
1224 		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
1225 			bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
1226 
1227 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1228 			if (intel_pt_fup_event(decoder) && no_tip)
1229 				return 0;
1230 			return -EAGAIN;
1231 		}
1232 		decoder->set_fup_tx_flags = false;
1233 		if (err)
1234 			return err;
1235 
1236 		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1237 			intel_pt_log_at("ERROR: Unexpected indirect branch",
1238 					decoder->ip);
1239 			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1240 			return -ENOENT;
1241 		}
1242 
1243 		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1244 			intel_pt_log_at("ERROR: Unexpected conditional branch",
1245 					decoder->ip);
1246 			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1247 			return -ENOENT;
1248 		}
1249 
1250 		intel_pt_bug(decoder);
1251 	}
1252 }
1253 
1254 static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
1255 {
1256 	struct intel_pt_insn intel_pt_insn;
1257 	int err;
1258 
1259 	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
1260 	if (err == INTEL_PT_RETURN &&
1261 	    decoder->pgd_ip &&
1262 	    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
1263 	    (decoder->state.type & INTEL_PT_BRANCH) &&
1264 	    decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
1265 		/* Unconditional branch leaving filter region */
1266 		decoder->no_progress = 0;
1267 		decoder->pge = false;
1268 		decoder->continuous_period = false;
1269 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1270 		decoder->state.type |= INTEL_PT_TRACE_END;
1271 		intel_pt_update_nr(decoder);
1272 		return 0;
1273 	}
1274 	if (err == INTEL_PT_RETURN)
1275 		return 0;
1276 	if (err)
1277 		return err;
1278 
1279 	intel_pt_update_nr(decoder);
1280 
1281 	if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1282 		if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
1283 			decoder->pge = false;
1284 			decoder->continuous_period = false;
1285 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1286 			decoder->state.from_ip = decoder->ip;
1287 			if (decoder->packet.count == 0) {
1288 				decoder->state.to_ip = 0;
1289 			} else {
1290 				decoder->state.to_ip = decoder->last_ip;
1291 				decoder->ip = decoder->last_ip;
1292 			}
1293 			decoder->state.type |= INTEL_PT_TRACE_END;
1294 		} else {
1295 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1296 			decoder->state.from_ip = decoder->ip;
1297 			if (decoder->packet.count == 0) {
1298 				decoder->state.to_ip = 0;
1299 			} else {
1300 				decoder->state.to_ip = decoder->last_ip;
1301 				decoder->ip = decoder->last_ip;
1302 			}
1303 		}
1304 		return 0;
1305 	}
1306 
1307 	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1308 		uint64_t to_ip = decoder->ip + intel_pt_insn.length +
1309 				 intel_pt_insn.rel;
1310 
1311 		if (decoder->pgd_ip &&
1312 		    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
1313 		    decoder->pgd_ip(to_ip, decoder->data)) {
1314 			/* Conditional branch leaving filter region */
1315 			decoder->pge = false;
1316 			decoder->continuous_period = false;
1317 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1318 			decoder->ip = to_ip;
1319 			decoder->state.from_ip = decoder->ip;
1320 			decoder->state.to_ip = to_ip;
1321 			decoder->state.type |= INTEL_PT_TRACE_END;
1322 			return 0;
1323 		}
1324 		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
1325 				decoder->ip);
1326 		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1327 		return -ENOENT;
1328 	}
1329 
1330 	return intel_pt_bug(decoder);
1331 }
1332 
1333 static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1334 {
1335 	struct intel_pt_insn intel_pt_insn;
1336 	int err;
1337 
1338 	while (1) {
1339 		err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
1340 		if (err == INTEL_PT_RETURN)
1341 			return 0;
1342 		if (err)
1343 			return err;
1344 
1345 		if (intel_pt_insn.op == INTEL_PT_OP_RET) {
1346 			if (!decoder->return_compression) {
1347 				intel_pt_log_at("ERROR: RET when expecting conditional branch",
1348 						decoder->ip);
1349 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1350 				return -ENOENT;
1351 			}
1352 			if (!decoder->ret_addr) {
1353 				intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
1354 						decoder->ip);
1355 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1356 				return -ENOENT;
1357 			}
1358 			if (!(decoder->tnt.payload & BIT63)) {
1359 				intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
1360 						decoder->ip);
1361 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1362 				return -ENOENT;
1363 			}
1364 			decoder->tnt.count -= 1;
1365 			if (decoder->tnt.count)
1366 				decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1367 			else
1368 				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1369 			decoder->tnt.payload <<= 1;
1370 			decoder->state.from_ip = decoder->ip;
1371 			decoder->ip = decoder->ret_addr;
1372 			decoder->state.to_ip = decoder->ip;
1373 			return 0;
1374 		}
1375 
1376 		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1377 			/* Handle deferred TIPs */
1378 			err = intel_pt_get_next_packet(decoder);
1379 			if (err)
1380 				return err;
1381 			if (decoder->packet.type != INTEL_PT_TIP ||
1382 			    decoder->packet.count == 0) {
1383 				intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
1384 						decoder->ip);
1385 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1386 				decoder->pkt_step = 0;
1387 				return -ENOENT;
1388 			}
1389 			intel_pt_set_last_ip(decoder);
1390 			decoder->state.from_ip = decoder->ip;
1391 			decoder->state.to_ip = decoder->last_ip;
1392 			decoder->ip = decoder->last_ip;
1393 			intel_pt_update_nr(decoder);
1394 			return 0;
1395 		}
1396 
1397 		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1398 			decoder->tnt.count -= 1;
1399 			if (decoder->tnt.count)
1400 				decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1401 			else
1402 				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1403 			if (decoder->tnt.payload & BIT63) {
1404 				decoder->tnt.payload <<= 1;
1405 				decoder->state.from_ip = decoder->ip;
1406 				decoder->ip += intel_pt_insn.length +
1407 					       intel_pt_insn.rel;
1408 				decoder->state.to_ip = decoder->ip;
1409 				return 0;
1410 			}
1411 			/* Instruction sample for a non-taken branch */
1412 			if (decoder->state.type & INTEL_PT_INSTRUCTION) {
1413 				decoder->tnt.payload <<= 1;
1414 				decoder->state.type = INTEL_PT_INSTRUCTION;
1415 				decoder->state.from_ip = decoder->ip;
1416 				decoder->state.to_ip = 0;
1417 				decoder->ip += intel_pt_insn.length;
1418 				return 0;
1419 			}
1420 			decoder->sample_cyc = false;
1421 			decoder->ip += intel_pt_insn.length;
1422 			if (!decoder->tnt.count) {
1423 				intel_pt_update_sample_time(decoder);
1424 				return -EAGAIN;
1425 			}
1426 			decoder->tnt.payload <<= 1;
1427 			continue;
1428 		}
1429 
1430 		return intel_pt_bug(decoder);
1431 	}
1432 }
1433 
1434 static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
1435 {
1436 	unsigned int fup_tx_flags;
1437 	int err;
1438 
1439 	fup_tx_flags = decoder->packet.payload &
1440 		       (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
1441 	err = intel_pt_get_next_packet(decoder);
1442 	if (err)
1443 		return err;
1444 	if (decoder->packet.type == INTEL_PT_FUP) {
1445 		decoder->fup_tx_flags = fup_tx_flags;
1446 		decoder->set_fup_tx_flags = true;
1447 		if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
1448 			*no_tip = true;
1449 	} else {
1450 		intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
1451 				decoder->pos);
1452 		intel_pt_update_in_tx(decoder);
1453 	}
1454 	return 0;
1455 }
1456 
1457 static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
1458 {
1459 	timestamp |= (ref_timestamp & (0xffULL << 56));
1460 
1461 	if (timestamp < ref_timestamp) {
1462 		if (ref_timestamp - timestamp > (1ULL << 55))
1463 			timestamp += (1ULL << 56);
1464 	} else {
1465 		if (timestamp - ref_timestamp > (1ULL << 55))
1466 			timestamp -= (1ULL << 56);
1467 	}
1468 
1469 	return timestamp;
1470 }
1471 
1472 static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
1473 {
1474 	uint64_t timestamp;
1475 
1476 	decoder->have_tma = false;
1477 
1478 	if (decoder->ref_timestamp) {
1479 		timestamp = intel_pt_8b_tsc(decoder->packet.payload,
1480 					    decoder->ref_timestamp);
1481 		decoder->tsc_timestamp = timestamp;
1482 		decoder->timestamp = timestamp;
1483 		decoder->ref_timestamp = 0;
1484 		decoder->timestamp_insn_cnt = 0;
1485 	} else if (decoder->timestamp) {
1486 		timestamp = decoder->packet.payload |
1487 			    (decoder->timestamp & (0xffULL << 56));
1488 		decoder->tsc_timestamp = timestamp;
1489 		if (timestamp < decoder->timestamp &&
1490 		    decoder->timestamp - timestamp < decoder->tsc_slip) {
1491 			intel_pt_log_to("Suppressing backwards timestamp",
1492 					timestamp);
1493 			timestamp = decoder->timestamp;
1494 		}
1495 		if (timestamp < decoder->timestamp) {
1496 			intel_pt_log_to("Wraparound timestamp", timestamp);
1497 			timestamp += (1ULL << 56);
1498 			decoder->tsc_timestamp = timestamp;
1499 		}
1500 		decoder->timestamp = timestamp;
1501 		decoder->timestamp_insn_cnt = 0;
1502 	}
1503 
1504 	if (decoder->last_packet_type == INTEL_PT_CYC) {
1505 		decoder->cyc_ref_timestamp = decoder->timestamp;
1506 		decoder->cycle_cnt = 0;
1507 		decoder->have_calc_cyc_to_tsc = false;
1508 		intel_pt_calc_cyc_to_tsc(decoder, false);
1509 	}
1510 
1511 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
1512 }
1513 
1514 static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1515 {
1516 	intel_pt_log("ERROR: Buffer overflow\n");
1517 	intel_pt_clear_tx_flags(decoder);
1518 	intel_pt_set_nr(decoder);
1519 	decoder->timestamp_insn_cnt = 0;
1520 	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1521 	decoder->overflow = true;
1522 	return -EOVERFLOW;
1523 }
1524 
1525 static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
1526 {
1527 	if (decoder->have_cyc)
1528 		return;
1529 
1530 	decoder->cyc_cnt_timestamp = decoder->timestamp;
1531 	decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
1532 }
1533 
1534 static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
1535 {
1536 	decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
1537 
1538 	if (decoder->pge)
1539 		intel_pt_mtc_cyc_cnt_pge(decoder);
1540 }
1541 
1542 static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
1543 {
1544 	uint64_t tot_cyc_cnt, tsc_delta;
1545 
1546 	if (decoder->have_cyc)
1547 		return;
1548 
1549 	decoder->sample_cyc = true;
1550 
1551 	if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
1552 		return;
1553 
1554 	tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
1555 	tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
1556 
1557 	if (tot_cyc_cnt > decoder->tot_cyc_cnt)
1558 		decoder->tot_cyc_cnt = tot_cyc_cnt;
1559 }
1560 
1561 static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
1562 {
1563 	uint32_t ctc = decoder->packet.payload;
1564 	uint32_t fc = decoder->packet.count;
1565 	uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
1566 
1567 	if (!decoder->tsc_ctc_ratio_d)
1568 		return;
1569 
1570 	if (decoder->pge && !decoder->in_psb)
1571 		intel_pt_mtc_cyc_cnt_pge(decoder);
1572 	else
1573 		intel_pt_mtc_cyc_cnt_upd(decoder);
1574 
1575 	decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
1576 	decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
1577 	if (decoder->tsc_ctc_mult) {
1578 		decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
1579 	} else {
1580 		decoder->ctc_timestamp -= multdiv(ctc_rem,
1581 						  decoder->tsc_ctc_ratio_n,
1582 						  decoder->tsc_ctc_ratio_d);
1583 	}
1584 	decoder->ctc_delta = 0;
1585 	decoder->have_tma = true;
1586 	decoder->fixup_last_mtc = true;
1587 	intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x  CTC rem %#x\n",
1588 		     decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
1589 }
1590 
1591 static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
1592 {
1593 	uint64_t timestamp;
1594 	uint32_t mtc, mtc_delta;
1595 
1596 	if (!decoder->have_tma)
1597 		return;
1598 
1599 	mtc = decoder->packet.payload;
1600 
1601 	if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
1602 		decoder->fixup_last_mtc = false;
1603 		intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
1604 					&decoder->last_mtc);
1605 	}
1606 
1607 	if (mtc > decoder->last_mtc)
1608 		mtc_delta = mtc - decoder->last_mtc;
1609 	else
1610 		mtc_delta = mtc + 256 - decoder->last_mtc;
1611 
1612 	decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
1613 
1614 	if (decoder->tsc_ctc_mult) {
1615 		timestamp = decoder->ctc_timestamp +
1616 			    decoder->ctc_delta * decoder->tsc_ctc_mult;
1617 	} else {
1618 		timestamp = decoder->ctc_timestamp +
1619 			    multdiv(decoder->ctc_delta,
1620 				    decoder->tsc_ctc_ratio_n,
1621 				    decoder->tsc_ctc_ratio_d);
1622 	}
1623 
1624 	if (timestamp < decoder->timestamp)
1625 		intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1626 			     timestamp, decoder->timestamp);
1627 	else
1628 		decoder->timestamp = timestamp;
1629 
1630 	intel_pt_mtc_cyc_cnt_upd(decoder);
1631 
1632 	decoder->timestamp_insn_cnt = 0;
1633 	decoder->last_mtc = mtc;
1634 
1635 	if (decoder->last_packet_type == INTEL_PT_CYC) {
1636 		decoder->cyc_ref_timestamp = decoder->timestamp;
1637 		decoder->cycle_cnt = 0;
1638 		decoder->have_calc_cyc_to_tsc = false;
1639 		intel_pt_calc_cyc_to_tsc(decoder, true);
1640 	}
1641 
1642 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
1643 }
1644 
1645 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
1646 {
1647 	unsigned int cbr = decoder->packet.payload & 0xff;
1648 
1649 	decoder->cbr_payload = decoder->packet.payload;
1650 
1651 	if (decoder->cbr == cbr)
1652 		return;
1653 
1654 	decoder->cbr = cbr;
1655 	decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
1656 
1657 	intel_pt_mtc_cyc_cnt_cbr(decoder);
1658 }
1659 
1660 static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1661 {
1662 	uint64_t timestamp = decoder->cyc_ref_timestamp;
1663 
1664 	decoder->have_cyc = true;
1665 
1666 	decoder->cycle_cnt += decoder->packet.payload;
1667 	if (decoder->pge)
1668 		decoder->tot_cyc_cnt += decoder->packet.payload;
1669 	decoder->sample_cyc = true;
1670 
1671 	if (!decoder->cyc_ref_timestamp)
1672 		return;
1673 
1674 	if (decoder->have_calc_cyc_to_tsc)
1675 		timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
1676 	else if (decoder->cbr)
1677 		timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
1678 	else
1679 		return;
1680 
1681 	if (timestamp < decoder->timestamp)
1682 		intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1683 			     timestamp, decoder->timestamp);
1684 	else
1685 		decoder->timestamp = timestamp;
1686 
1687 	decoder->timestamp_insn_cnt = 0;
1688 
1689 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
1690 }
1691 
1692 static void intel_pt_bbp(struct intel_pt_decoder *decoder)
1693 {
1694 	if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
1695 		memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
1696 		decoder->state.items.is_32_bit = false;
1697 	}
1698 	decoder->blk_type = decoder->packet.payload;
1699 	decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
1700 	if (decoder->blk_type == INTEL_PT_GP_REGS)
1701 		decoder->state.items.is_32_bit = decoder->packet.count;
1702 	if (decoder->blk_type_pos < 0) {
1703 		intel_pt_log("WARNING: Unknown block type %u\n",
1704 			     decoder->blk_type);
1705 	} else if (decoder->state.items.mask[decoder->blk_type_pos]) {
1706 		intel_pt_log("WARNING: Duplicate block type %u\n",
1707 			     decoder->blk_type);
1708 	}
1709 }
1710 
1711 static void intel_pt_bip(struct intel_pt_decoder *decoder)
1712 {
1713 	uint32_t id = decoder->packet.count;
1714 	uint32_t bit = 1 << id;
1715 	int pos = decoder->blk_type_pos;
1716 
1717 	if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
1718 		intel_pt_log("WARNING: Unknown block item %u type %d\n",
1719 			     id, decoder->blk_type);
1720 		return;
1721 	}
1722 
1723 	if (decoder->state.items.mask[pos] & bit) {
1724 		intel_pt_log("WARNING: Duplicate block item %u type %d\n",
1725 			     id, decoder->blk_type);
1726 	}
1727 
1728 	decoder->state.items.mask[pos] |= bit;
1729 	decoder->state.items.val[pos][id] = decoder->packet.payload;
1730 }
1731 
1732 /* Walk PSB+ packets when already in sync. */
1733 static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1734 {
1735 	int err;
1736 
1737 	decoder->in_psb = true;
1738 
1739 	while (1) {
1740 		err = intel_pt_get_next_packet(decoder);
1741 		if (err)
1742 			goto out;
1743 
1744 		switch (decoder->packet.type) {
1745 		case INTEL_PT_PSBEND:
1746 			err = 0;
1747 			goto out;
1748 
1749 		case INTEL_PT_TIP_PGD:
1750 		case INTEL_PT_TIP_PGE:
1751 		case INTEL_PT_TIP:
1752 		case INTEL_PT_TNT:
1753 		case INTEL_PT_TRACESTOP:
1754 		case INTEL_PT_BAD:
1755 		case INTEL_PT_PSB:
1756 		case INTEL_PT_PTWRITE:
1757 		case INTEL_PT_PTWRITE_IP:
1758 		case INTEL_PT_EXSTOP:
1759 		case INTEL_PT_EXSTOP_IP:
1760 		case INTEL_PT_MWAIT:
1761 		case INTEL_PT_PWRE:
1762 		case INTEL_PT_PWRX:
1763 		case INTEL_PT_BBP:
1764 		case INTEL_PT_BIP:
1765 		case INTEL_PT_BEP:
1766 		case INTEL_PT_BEP_IP:
1767 			decoder->have_tma = false;
1768 			intel_pt_log("ERROR: Unexpected packet\n");
1769 			err = -EAGAIN;
1770 			goto out;
1771 
1772 		case INTEL_PT_OVF:
1773 			err = intel_pt_overflow(decoder);
1774 			goto out;
1775 
1776 		case INTEL_PT_TSC:
1777 			intel_pt_calc_tsc_timestamp(decoder);
1778 			break;
1779 
1780 		case INTEL_PT_TMA:
1781 			intel_pt_calc_tma(decoder);
1782 			break;
1783 
1784 		case INTEL_PT_CBR:
1785 			intel_pt_calc_cbr(decoder);
1786 			break;
1787 
1788 		case INTEL_PT_MODE_EXEC:
1789 			decoder->exec_mode = decoder->packet.payload;
1790 			break;
1791 
1792 		case INTEL_PT_PIP:
1793 			intel_pt_set_pip(decoder);
1794 			break;
1795 
1796 		case INTEL_PT_FUP:
1797 			decoder->pge = true;
1798 			if (decoder->packet.count) {
1799 				intel_pt_set_last_ip(decoder);
1800 				decoder->psb_ip = decoder->last_ip;
1801 			}
1802 			break;
1803 
1804 		case INTEL_PT_MODE_TSX:
1805 			intel_pt_update_in_tx(decoder);
1806 			break;
1807 
1808 		case INTEL_PT_MTC:
1809 			intel_pt_calc_mtc_timestamp(decoder);
1810 			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
1811 				decoder->state.type |= INTEL_PT_INSTRUCTION;
1812 			break;
1813 
1814 		case INTEL_PT_CYC:
1815 			intel_pt_calc_cyc_timestamp(decoder);
1816 			break;
1817 
1818 		case INTEL_PT_VMCS:
1819 		case INTEL_PT_MNT:
1820 		case INTEL_PT_PAD:
1821 		default:
1822 			break;
1823 		}
1824 	}
1825 out:
1826 	decoder->in_psb = false;
1827 
1828 	return err;
1829 }
1830 
1831 static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1832 {
1833 	int err;
1834 
1835 	if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
1836 		decoder->tx_flags = 0;
1837 		decoder->state.flags &= ~INTEL_PT_IN_TX;
1838 		decoder->state.flags |= INTEL_PT_ABORT_TX;
1839 	} else {
1840 		decoder->state.flags |= INTEL_PT_ASYNC;
1841 	}
1842 
1843 	while (1) {
1844 		err = intel_pt_get_next_packet(decoder);
1845 		if (err)
1846 			return err;
1847 
1848 		switch (decoder->packet.type) {
1849 		case INTEL_PT_TNT:
1850 		case INTEL_PT_FUP:
1851 		case INTEL_PT_TRACESTOP:
1852 		case INTEL_PT_PSB:
1853 		case INTEL_PT_TSC:
1854 		case INTEL_PT_TMA:
1855 		case INTEL_PT_MODE_TSX:
1856 		case INTEL_PT_BAD:
1857 		case INTEL_PT_PSBEND:
1858 		case INTEL_PT_PTWRITE:
1859 		case INTEL_PT_PTWRITE_IP:
1860 		case INTEL_PT_EXSTOP:
1861 		case INTEL_PT_EXSTOP_IP:
1862 		case INTEL_PT_MWAIT:
1863 		case INTEL_PT_PWRE:
1864 		case INTEL_PT_PWRX:
1865 		case INTEL_PT_BBP:
1866 		case INTEL_PT_BIP:
1867 		case INTEL_PT_BEP:
1868 		case INTEL_PT_BEP_IP:
1869 			intel_pt_log("ERROR: Missing TIP after FUP\n");
1870 			decoder->pkt_state = INTEL_PT_STATE_ERR3;
1871 			decoder->pkt_step = 0;
1872 			return -ENOENT;
1873 
1874 		case INTEL_PT_CBR:
1875 			intel_pt_calc_cbr(decoder);
1876 			break;
1877 
1878 		case INTEL_PT_OVF:
1879 			return intel_pt_overflow(decoder);
1880 
1881 		case INTEL_PT_TIP_PGD:
1882 			decoder->state.from_ip = decoder->ip;
1883 			if (decoder->packet.count == 0) {
1884 				decoder->state.to_ip = 0;
1885 			} else {
1886 				intel_pt_set_ip(decoder);
1887 				decoder->state.to_ip = decoder->ip;
1888 			}
1889 			decoder->pge = false;
1890 			decoder->continuous_period = false;
1891 			decoder->state.type |= INTEL_PT_TRACE_END;
1892 			intel_pt_update_nr(decoder);
1893 			return 0;
1894 
1895 		case INTEL_PT_TIP_PGE:
1896 			decoder->pge = true;
1897 			intel_pt_log("Omitting PGE ip " x64_fmt "\n",
1898 				     decoder->ip);
1899 			decoder->state.from_ip = 0;
1900 			if (decoder->packet.count == 0) {
1901 				decoder->state.to_ip = 0;
1902 			} else {
1903 				intel_pt_set_ip(decoder);
1904 				decoder->state.to_ip = decoder->ip;
1905 			}
1906 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
1907 			intel_pt_mtc_cyc_cnt_pge(decoder);
1908 			intel_pt_set_nr(decoder);
1909 			return 0;
1910 
1911 		case INTEL_PT_TIP:
1912 			decoder->state.from_ip = decoder->ip;
1913 			if (decoder->packet.count == 0) {
1914 				decoder->state.to_ip = 0;
1915 			} else {
1916 				intel_pt_set_ip(decoder);
1917 				decoder->state.to_ip = decoder->ip;
1918 			}
1919 			intel_pt_update_nr(decoder);
1920 			return 0;
1921 
1922 		case INTEL_PT_PIP:
1923 			intel_pt_update_pip(decoder);
1924 			break;
1925 
1926 		case INTEL_PT_MTC:
1927 			intel_pt_calc_mtc_timestamp(decoder);
1928 			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
1929 				decoder->state.type |= INTEL_PT_INSTRUCTION;
1930 			break;
1931 
1932 		case INTEL_PT_CYC:
1933 			intel_pt_calc_cyc_timestamp(decoder);
1934 			break;
1935 
1936 		case INTEL_PT_MODE_EXEC:
1937 			decoder->exec_mode = decoder->packet.payload;
1938 			break;
1939 
1940 		case INTEL_PT_VMCS:
1941 		case INTEL_PT_MNT:
1942 		case INTEL_PT_PAD:
1943 			break;
1944 
1945 		default:
1946 			return intel_pt_bug(decoder);
1947 		}
1948 	}
1949 }
1950 
1951 static int intel_pt_resample(struct intel_pt_decoder *decoder)
1952 {
1953 	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1954 	decoder->state.type = INTEL_PT_INSTRUCTION;
1955 	decoder->state.from_ip = decoder->ip;
1956 	decoder->state.to_ip = 0;
1957 	return 0;
1958 }
1959 
1960 #define HOP_PROCESS	0
1961 #define HOP_IGNORE	1
1962 #define HOP_RETURN	2
1963 #define HOP_AGAIN	3
1964 
1965 static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
1966 
1967 /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
1968 static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
1969 {
1970 	/* Leap from PSB to PSB, getting ip from FUP within PSB+ */
1971 	if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
1972 		*err = intel_pt_scan_for_psb(decoder);
1973 		if (*err)
1974 			return HOP_RETURN;
1975 	}
1976 
1977 	switch (decoder->packet.type) {
1978 	case INTEL_PT_TNT:
1979 		return HOP_IGNORE;
1980 
1981 	case INTEL_PT_TIP_PGD:
1982 		if (!decoder->packet.count) {
1983 			intel_pt_set_nr(decoder);
1984 			return HOP_IGNORE;
1985 		}
1986 		intel_pt_set_ip(decoder);
1987 		decoder->state.type |= INTEL_PT_TRACE_END;
1988 		decoder->state.from_ip = 0;
1989 		decoder->state.to_ip = decoder->ip;
1990 		intel_pt_update_nr(decoder);
1991 		return HOP_RETURN;
1992 
1993 	case INTEL_PT_TIP:
1994 		if (!decoder->packet.count) {
1995 			intel_pt_set_nr(decoder);
1996 			return HOP_IGNORE;
1997 		}
1998 		intel_pt_set_ip(decoder);
1999 		decoder->state.type = INTEL_PT_INSTRUCTION;
2000 		decoder->state.from_ip = decoder->ip;
2001 		decoder->state.to_ip = 0;
2002 		intel_pt_update_nr(decoder);
2003 		return HOP_RETURN;
2004 
2005 	case INTEL_PT_FUP:
2006 		if (!decoder->packet.count)
2007 			return HOP_IGNORE;
2008 		intel_pt_set_ip(decoder);
2009 		if (intel_pt_fup_event(decoder))
2010 			return HOP_RETURN;
2011 		if (!decoder->branch_enable)
2012 			*no_tip = true;
2013 		if (*no_tip) {
2014 			decoder->state.type = INTEL_PT_INSTRUCTION;
2015 			decoder->state.from_ip = decoder->ip;
2016 			decoder->state.to_ip = 0;
2017 			return HOP_RETURN;
2018 		}
2019 		*err = intel_pt_walk_fup_tip(decoder);
2020 		if (!*err)
2021 			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
2022 		return HOP_RETURN;
2023 
2024 	case INTEL_PT_PSB:
2025 		decoder->state.psb_offset = decoder->pos;
2026 		decoder->psb_ip = 0;
2027 		decoder->last_ip = 0;
2028 		decoder->have_last_ip = true;
2029 		*err = intel_pt_walk_psbend(decoder);
2030 		if (*err == -EAGAIN)
2031 			return HOP_AGAIN;
2032 		if (*err)
2033 			return HOP_RETURN;
2034 		decoder->state.type = INTEL_PT_PSB_EVT;
2035 		if (decoder->psb_ip) {
2036 			decoder->state.type |= INTEL_PT_INSTRUCTION;
2037 			decoder->ip = decoder->psb_ip;
2038 		}
2039 		decoder->state.from_ip = decoder->psb_ip;
2040 		decoder->state.to_ip = 0;
2041 		return HOP_RETURN;
2042 
2043 	case INTEL_PT_BAD:
2044 	case INTEL_PT_PAD:
2045 	case INTEL_PT_TIP_PGE:
2046 	case INTEL_PT_TSC:
2047 	case INTEL_PT_TMA:
2048 	case INTEL_PT_MODE_EXEC:
2049 	case INTEL_PT_MODE_TSX:
2050 	case INTEL_PT_MTC:
2051 	case INTEL_PT_CYC:
2052 	case INTEL_PT_VMCS:
2053 	case INTEL_PT_PSBEND:
2054 	case INTEL_PT_CBR:
2055 	case INTEL_PT_TRACESTOP:
2056 	case INTEL_PT_PIP:
2057 	case INTEL_PT_OVF:
2058 	case INTEL_PT_MNT:
2059 	case INTEL_PT_PTWRITE:
2060 	case INTEL_PT_PTWRITE_IP:
2061 	case INTEL_PT_EXSTOP:
2062 	case INTEL_PT_EXSTOP_IP:
2063 	case INTEL_PT_MWAIT:
2064 	case INTEL_PT_PWRE:
2065 	case INTEL_PT_PWRX:
2066 	case INTEL_PT_BBP:
2067 	case INTEL_PT_BIP:
2068 	case INTEL_PT_BEP:
2069 	case INTEL_PT_BEP_IP:
2070 	default:
2071 		return HOP_PROCESS;
2072 	}
2073 }
2074 
2075 struct intel_pt_psb_info {
2076 	struct intel_pt_pkt fup_packet;
2077 	bool fup;
2078 	int after_psbend;
2079 };
2080 
2081 /* Lookahead and get the FUP packet from PSB+ */
2082 static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
2083 {
2084 	struct intel_pt_psb_info *data = pkt_info->data;
2085 
2086 	switch (pkt_info->packet.type) {
2087 	case INTEL_PT_PAD:
2088 	case INTEL_PT_MNT:
2089 	case INTEL_PT_TSC:
2090 	case INTEL_PT_TMA:
2091 	case INTEL_PT_MODE_EXEC:
2092 	case INTEL_PT_MODE_TSX:
2093 	case INTEL_PT_MTC:
2094 	case INTEL_PT_CYC:
2095 	case INTEL_PT_VMCS:
2096 	case INTEL_PT_CBR:
2097 	case INTEL_PT_PIP:
2098 		if (data->after_psbend) {
2099 			data->after_psbend -= 1;
2100 			if (!data->after_psbend)
2101 				return 1;
2102 		}
2103 		break;
2104 
2105 	case INTEL_PT_FUP:
2106 		if (data->after_psbend)
2107 			return 1;
2108 		if (data->fup || pkt_info->packet.count == 0)
2109 			return 1;
2110 		data->fup_packet = pkt_info->packet;
2111 		data->fup = true;
2112 		break;
2113 
2114 	case INTEL_PT_PSBEND:
2115 		if (!data->fup)
2116 			return 1;
2117 		/* Keep going to check for a TIP.PGE */
2118 		data->after_psbend = 6;
2119 		break;
2120 
2121 	case INTEL_PT_TIP_PGE:
2122 		/* Ignore FUP in PSB+ if followed by TIP.PGE */
2123 		if (data->after_psbend)
2124 			data->fup = false;
2125 		return 1;
2126 
2127 	case INTEL_PT_PTWRITE:
2128 	case INTEL_PT_PTWRITE_IP:
2129 	case INTEL_PT_EXSTOP:
2130 	case INTEL_PT_EXSTOP_IP:
2131 	case INTEL_PT_MWAIT:
2132 	case INTEL_PT_PWRE:
2133 	case INTEL_PT_PWRX:
2134 	case INTEL_PT_BBP:
2135 	case INTEL_PT_BIP:
2136 	case INTEL_PT_BEP:
2137 	case INTEL_PT_BEP_IP:
2138 		if (data->after_psbend) {
2139 			data->after_psbend -= 1;
2140 			if (!data->after_psbend)
2141 				return 1;
2142 			break;
2143 		}
2144 		return 1;
2145 
2146 	case INTEL_PT_OVF:
2147 	case INTEL_PT_BAD:
2148 	case INTEL_PT_TNT:
2149 	case INTEL_PT_TIP_PGD:
2150 	case INTEL_PT_TIP:
2151 	case INTEL_PT_PSB:
2152 	case INTEL_PT_TRACESTOP:
2153 	default:
2154 		return 1;
2155 	}
2156 
2157 	return 0;
2158 }
2159 
2160 static int intel_pt_psb(struct intel_pt_decoder *decoder)
2161 {
2162 	int err;
2163 
2164 	decoder->last_ip = 0;
2165 	decoder->psb_ip = 0;
2166 	decoder->have_last_ip = true;
2167 	intel_pt_clear_stack(&decoder->stack);
2168 	err = intel_pt_walk_psbend(decoder);
2169 	if (err)
2170 		return err;
2171 	decoder->state.type = INTEL_PT_PSB_EVT;
2172 	decoder->state.from_ip = decoder->psb_ip;
2173 	decoder->state.to_ip = 0;
2174 	return 0;
2175 }
2176 
2177 static int intel_pt_fup_in_psb(struct intel_pt_decoder *decoder)
2178 {
2179 	int err;
2180 
2181 	if (decoder->ip != decoder->last_ip) {
2182 		err = intel_pt_walk_fup(decoder);
2183 		if (!err || err != -EAGAIN)
2184 			return err;
2185 	}
2186 
2187 	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2188 	err = intel_pt_psb(decoder);
2189 	if (err) {
2190 		decoder->pkt_state = INTEL_PT_STATE_ERR3;
2191 		return -ENOENT;
2192 	}
2193 
2194 	return 0;
2195 }
2196 
2197 static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err)
2198 {
2199 	struct intel_pt_psb_info data = { .fup = false };
2200 
2201 	if (!decoder->branch_enable || !decoder->pge)
2202 		return false;
2203 
2204 	intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data);
2205 	if (!data.fup)
2206 		return false;
2207 
2208 	decoder->packet = data.fup_packet;
2209 	intel_pt_set_last_ip(decoder);
2210 	decoder->pkt_state = INTEL_PT_STATE_FUP_IN_PSB;
2211 
2212 	*err = intel_pt_fup_in_psb(decoder);
2213 
2214 	return true;
2215 }
2216 
2217 static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
2218 {
2219 	int last_packet_type = INTEL_PT_PAD;
2220 	bool no_tip = false;
2221 	int err;
2222 
2223 	while (1) {
2224 		err = intel_pt_get_next_packet(decoder);
2225 		if (err)
2226 			return err;
2227 next:
2228 		if (decoder->cyc_threshold) {
2229 			if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC)
2230 				decoder->sample_cyc = false;
2231 			last_packet_type = decoder->packet.type;
2232 		}
2233 
2234 		if (decoder->hop) {
2235 			switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
2236 			case HOP_IGNORE:
2237 				continue;
2238 			case HOP_RETURN:
2239 				return err;
2240 			case HOP_AGAIN:
2241 				goto next;
2242 			default:
2243 				break;
2244 			}
2245 		}
2246 
2247 		switch (decoder->packet.type) {
2248 		case INTEL_PT_TNT:
2249 			if (!decoder->packet.count)
2250 				break;
2251 			decoder->tnt = decoder->packet;
2252 			decoder->pkt_state = INTEL_PT_STATE_TNT;
2253 			err = intel_pt_walk_tnt(decoder);
2254 			if (err == -EAGAIN)
2255 				break;
2256 			return err;
2257 
2258 		case INTEL_PT_TIP_PGD:
2259 			if (decoder->packet.count != 0)
2260 				intel_pt_set_last_ip(decoder);
2261 			decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
2262 			return intel_pt_walk_tip(decoder);
2263 
2264 		case INTEL_PT_TIP_PGE: {
2265 			decoder->pge = true;
2266 			intel_pt_mtc_cyc_cnt_pge(decoder);
2267 			intel_pt_set_nr(decoder);
2268 			if (decoder->packet.count == 0) {
2269 				intel_pt_log_at("Skipping zero TIP.PGE",
2270 						decoder->pos);
2271 				break;
2272 			}
2273 			intel_pt_set_ip(decoder);
2274 			decoder->state.from_ip = 0;
2275 			decoder->state.to_ip = decoder->ip;
2276 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
2277 			/*
2278 			 * In hop mode, resample to get the to_ip as an
2279 			 * "instruction" sample.
2280 			 */
2281 			if (decoder->hop)
2282 				decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
2283 			return 0;
2284 		}
2285 
2286 		case INTEL_PT_OVF:
2287 			return intel_pt_overflow(decoder);
2288 
2289 		case INTEL_PT_TIP:
2290 			if (decoder->packet.count != 0)
2291 				intel_pt_set_last_ip(decoder);
2292 			decoder->pkt_state = INTEL_PT_STATE_TIP;
2293 			return intel_pt_walk_tip(decoder);
2294 
2295 		case INTEL_PT_FUP:
2296 			if (decoder->packet.count == 0) {
2297 				intel_pt_log_at("Skipping zero FUP",
2298 						decoder->pos);
2299 				no_tip = false;
2300 				break;
2301 			}
2302 			intel_pt_set_last_ip(decoder);
2303 			if (!decoder->branch_enable) {
2304 				decoder->ip = decoder->last_ip;
2305 				if (intel_pt_fup_event(decoder))
2306 					return 0;
2307 				no_tip = false;
2308 				break;
2309 			}
2310 			if (decoder->set_fup_mwait)
2311 				no_tip = true;
2312 			if (no_tip)
2313 				decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
2314 			else
2315 				decoder->pkt_state = INTEL_PT_STATE_FUP;
2316 			err = intel_pt_walk_fup(decoder);
2317 			if (err != -EAGAIN)
2318 				return err;
2319 			if (no_tip) {
2320 				no_tip = false;
2321 				break;
2322 			}
2323 			return intel_pt_walk_fup_tip(decoder);
2324 
2325 		case INTEL_PT_TRACESTOP:
2326 			decoder->pge = false;
2327 			decoder->continuous_period = false;
2328 			intel_pt_clear_tx_flags(decoder);
2329 			decoder->have_tma = false;
2330 			break;
2331 
2332 		case INTEL_PT_PSB:
2333 			decoder->state.psb_offset = decoder->pos;
2334 			decoder->psb_ip = 0;
2335 			if (intel_pt_psb_with_fup(decoder, &err))
2336 				return err;
2337 			err = intel_pt_psb(decoder);
2338 			if (err == -EAGAIN)
2339 				goto next;
2340 			return err;
2341 
2342 		case INTEL_PT_PIP:
2343 			intel_pt_update_pip(decoder);
2344 			break;
2345 
2346 		case INTEL_PT_MTC:
2347 			intel_pt_calc_mtc_timestamp(decoder);
2348 			if (decoder->period_type != INTEL_PT_PERIOD_MTC)
2349 				break;
2350 			/*
2351 			 * Ensure that there has been an instruction since the
2352 			 * last MTC.
2353 			 */
2354 			if (!decoder->mtc_insn)
2355 				break;
2356 			decoder->mtc_insn = false;
2357 			/* Ensure that there is a timestamp */
2358 			if (!decoder->timestamp)
2359 				break;
2360 			decoder->state.type = INTEL_PT_INSTRUCTION;
2361 			decoder->state.from_ip = decoder->ip;
2362 			decoder->state.to_ip = 0;
2363 			decoder->mtc_insn = false;
2364 			return 0;
2365 
2366 		case INTEL_PT_TSC:
2367 			intel_pt_calc_tsc_timestamp(decoder);
2368 			break;
2369 
2370 		case INTEL_PT_TMA:
2371 			intel_pt_calc_tma(decoder);
2372 			break;
2373 
2374 		case INTEL_PT_CYC:
2375 			intel_pt_calc_cyc_timestamp(decoder);
2376 			break;
2377 
2378 		case INTEL_PT_CBR:
2379 			intel_pt_calc_cbr(decoder);
2380 			if (decoder->cbr != decoder->cbr_seen) {
2381 				decoder->state.type = 0;
2382 				return 0;
2383 			}
2384 			break;
2385 
2386 		case INTEL_PT_MODE_EXEC:
2387 			decoder->exec_mode = decoder->packet.payload;
2388 			break;
2389 
2390 		case INTEL_PT_MODE_TSX:
2391 			/* MODE_TSX need not be followed by FUP */
2392 			if (!decoder->pge || decoder->in_psb) {
2393 				intel_pt_update_in_tx(decoder);
2394 				break;
2395 			}
2396 			err = intel_pt_mode_tsx(decoder, &no_tip);
2397 			if (err)
2398 				return err;
2399 			goto next;
2400 
2401 		case INTEL_PT_BAD: /* Does not happen */
2402 			return intel_pt_bug(decoder);
2403 
2404 		case INTEL_PT_PSBEND:
2405 		case INTEL_PT_VMCS:
2406 		case INTEL_PT_MNT:
2407 		case INTEL_PT_PAD:
2408 			break;
2409 
2410 		case INTEL_PT_PTWRITE_IP:
2411 			decoder->fup_ptw_payload = decoder->packet.payload;
2412 			err = intel_pt_get_next_packet(decoder);
2413 			if (err)
2414 				return err;
2415 			if (decoder->packet.type == INTEL_PT_FUP) {
2416 				decoder->set_fup_ptw = true;
2417 				no_tip = true;
2418 			} else {
2419 				intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
2420 						decoder->pos);
2421 			}
2422 			goto next;
2423 
2424 		case INTEL_PT_PTWRITE:
2425 			decoder->state.type = INTEL_PT_PTW;
2426 			decoder->state.from_ip = decoder->ip;
2427 			decoder->state.to_ip = 0;
2428 			decoder->state.ptw_payload = decoder->packet.payload;
2429 			return 0;
2430 
2431 		case INTEL_PT_MWAIT:
2432 			decoder->fup_mwait_payload = decoder->packet.payload;
2433 			decoder->set_fup_mwait = true;
2434 			break;
2435 
2436 		case INTEL_PT_PWRE:
2437 			if (decoder->set_fup_mwait) {
2438 				decoder->fup_pwre_payload =
2439 							decoder->packet.payload;
2440 				decoder->set_fup_pwre = true;
2441 				break;
2442 			}
2443 			decoder->state.type = INTEL_PT_PWR_ENTRY;
2444 			decoder->state.from_ip = decoder->ip;
2445 			decoder->state.to_ip = 0;
2446 			decoder->state.pwrx_payload = decoder->packet.payload;
2447 			return 0;
2448 
2449 		case INTEL_PT_EXSTOP_IP:
2450 			err = intel_pt_get_next_packet(decoder);
2451 			if (err)
2452 				return err;
2453 			if (decoder->packet.type == INTEL_PT_FUP) {
2454 				decoder->set_fup_exstop = true;
2455 				no_tip = true;
2456 			} else {
2457 				intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
2458 						decoder->pos);
2459 			}
2460 			goto next;
2461 
2462 		case INTEL_PT_EXSTOP:
2463 			decoder->state.type = INTEL_PT_EX_STOP;
2464 			decoder->state.from_ip = decoder->ip;
2465 			decoder->state.to_ip = 0;
2466 			return 0;
2467 
2468 		case INTEL_PT_PWRX:
2469 			decoder->state.type = INTEL_PT_PWR_EXIT;
2470 			decoder->state.from_ip = decoder->ip;
2471 			decoder->state.to_ip = 0;
2472 			decoder->state.pwrx_payload = decoder->packet.payload;
2473 			return 0;
2474 
2475 		case INTEL_PT_BBP:
2476 			intel_pt_bbp(decoder);
2477 			break;
2478 
2479 		case INTEL_PT_BIP:
2480 			intel_pt_bip(decoder);
2481 			break;
2482 
2483 		case INTEL_PT_BEP:
2484 			decoder->state.type = INTEL_PT_BLK_ITEMS;
2485 			decoder->state.from_ip = decoder->ip;
2486 			decoder->state.to_ip = 0;
2487 			return 0;
2488 
2489 		case INTEL_PT_BEP_IP:
2490 			err = intel_pt_get_next_packet(decoder);
2491 			if (err)
2492 				return err;
2493 			if (decoder->packet.type == INTEL_PT_FUP) {
2494 				decoder->set_fup_bep = true;
2495 				no_tip = true;
2496 			} else {
2497 				intel_pt_log_at("ERROR: Missing FUP after BEP",
2498 						decoder->pos);
2499 			}
2500 			goto next;
2501 
2502 		default:
2503 			return intel_pt_bug(decoder);
2504 		}
2505 	}
2506 }
2507 
2508 static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
2509 {
2510 	return decoder->packet.count &&
2511 	       (decoder->have_last_ip || decoder->packet.count == 3 ||
2512 		decoder->packet.count == 6);
2513 }
2514 
2515 /* Walk PSB+ packets to get in sync. */
2516 static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
2517 {
2518 	int err;
2519 
2520 	decoder->in_psb = true;
2521 
2522 	while (1) {
2523 		err = intel_pt_get_next_packet(decoder);
2524 		if (err)
2525 			goto out;
2526 
2527 		switch (decoder->packet.type) {
2528 		case INTEL_PT_TIP_PGD:
2529 			decoder->continuous_period = false;
2530 			__fallthrough;
2531 		case INTEL_PT_TIP_PGE:
2532 		case INTEL_PT_TIP:
2533 		case INTEL_PT_PTWRITE:
2534 		case INTEL_PT_PTWRITE_IP:
2535 		case INTEL_PT_EXSTOP:
2536 		case INTEL_PT_EXSTOP_IP:
2537 		case INTEL_PT_MWAIT:
2538 		case INTEL_PT_PWRE:
2539 		case INTEL_PT_PWRX:
2540 		case INTEL_PT_BBP:
2541 		case INTEL_PT_BIP:
2542 		case INTEL_PT_BEP:
2543 		case INTEL_PT_BEP_IP:
2544 			intel_pt_log("ERROR: Unexpected packet\n");
2545 			err = -ENOENT;
2546 			goto out;
2547 
2548 		case INTEL_PT_FUP:
2549 			decoder->pge = true;
2550 			if (intel_pt_have_ip(decoder)) {
2551 				uint64_t current_ip = decoder->ip;
2552 
2553 				intel_pt_set_ip(decoder);
2554 				decoder->psb_ip = decoder->ip;
2555 				if (current_ip)
2556 					intel_pt_log_to("Setting IP",
2557 							decoder->ip);
2558 			}
2559 			break;
2560 
2561 		case INTEL_PT_MTC:
2562 			intel_pt_calc_mtc_timestamp(decoder);
2563 			break;
2564 
2565 		case INTEL_PT_TSC:
2566 			intel_pt_calc_tsc_timestamp(decoder);
2567 			break;
2568 
2569 		case INTEL_PT_TMA:
2570 			intel_pt_calc_tma(decoder);
2571 			break;
2572 
2573 		case INTEL_PT_CYC:
2574 			intel_pt_calc_cyc_timestamp(decoder);
2575 			break;
2576 
2577 		case INTEL_PT_CBR:
2578 			intel_pt_calc_cbr(decoder);
2579 			break;
2580 
2581 		case INTEL_PT_PIP:
2582 			intel_pt_set_pip(decoder);
2583 			break;
2584 
2585 		case INTEL_PT_MODE_EXEC:
2586 			decoder->exec_mode = decoder->packet.payload;
2587 			break;
2588 
2589 		case INTEL_PT_MODE_TSX:
2590 			intel_pt_update_in_tx(decoder);
2591 			break;
2592 
2593 		case INTEL_PT_TRACESTOP:
2594 			decoder->pge = false;
2595 			decoder->continuous_period = false;
2596 			intel_pt_clear_tx_flags(decoder);
2597 			__fallthrough;
2598 
2599 		case INTEL_PT_TNT:
2600 			decoder->have_tma = false;
2601 			intel_pt_log("ERROR: Unexpected packet\n");
2602 			if (decoder->ip)
2603 				decoder->pkt_state = INTEL_PT_STATE_ERR4;
2604 			else
2605 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
2606 			err = -ENOENT;
2607 			goto out;
2608 
2609 		case INTEL_PT_BAD: /* Does not happen */
2610 			err = intel_pt_bug(decoder);
2611 			goto out;
2612 
2613 		case INTEL_PT_OVF:
2614 			err = intel_pt_overflow(decoder);
2615 			goto out;
2616 
2617 		case INTEL_PT_PSBEND:
2618 			err = 0;
2619 			goto out;
2620 
2621 		case INTEL_PT_PSB:
2622 		case INTEL_PT_VMCS:
2623 		case INTEL_PT_MNT:
2624 		case INTEL_PT_PAD:
2625 		default:
2626 			break;
2627 		}
2628 	}
2629 out:
2630 	decoder->in_psb = false;
2631 
2632 	return err;
2633 }
2634 
2635 static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
2636 {
2637 	int err;
2638 
2639 	while (1) {
2640 		err = intel_pt_get_next_packet(decoder);
2641 		if (err)
2642 			return err;
2643 
2644 		switch (decoder->packet.type) {
2645 		case INTEL_PT_TIP_PGD:
2646 			decoder->continuous_period = false;
2647 			decoder->pge = false;
2648 			if (intel_pt_have_ip(decoder))
2649 				intel_pt_set_ip(decoder);
2650 			if (!decoder->ip)
2651 				break;
2652 			decoder->state.type |= INTEL_PT_TRACE_END;
2653 			return 0;
2654 
2655 		case INTEL_PT_TIP_PGE:
2656 			decoder->pge = true;
2657 			intel_pt_mtc_cyc_cnt_pge(decoder);
2658 			if (intel_pt_have_ip(decoder))
2659 				intel_pt_set_ip(decoder);
2660 			if (!decoder->ip)
2661 				break;
2662 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
2663 			return 0;
2664 
2665 		case INTEL_PT_TIP:
2666 			decoder->pge = true;
2667 			if (intel_pt_have_ip(decoder))
2668 				intel_pt_set_ip(decoder);
2669 			if (!decoder->ip)
2670 				break;
2671 			return 0;
2672 
2673 		case INTEL_PT_FUP:
2674 			if (intel_pt_have_ip(decoder))
2675 				intel_pt_set_ip(decoder);
2676 			if (decoder->ip)
2677 				return 0;
2678 			break;
2679 
2680 		case INTEL_PT_MTC:
2681 			intel_pt_calc_mtc_timestamp(decoder);
2682 			break;
2683 
2684 		case INTEL_PT_TSC:
2685 			intel_pt_calc_tsc_timestamp(decoder);
2686 			break;
2687 
2688 		case INTEL_PT_TMA:
2689 			intel_pt_calc_tma(decoder);
2690 			break;
2691 
2692 		case INTEL_PT_CYC:
2693 			intel_pt_calc_cyc_timestamp(decoder);
2694 			break;
2695 
2696 		case INTEL_PT_CBR:
2697 			intel_pt_calc_cbr(decoder);
2698 			break;
2699 
2700 		case INTEL_PT_PIP:
2701 			intel_pt_set_pip(decoder);
2702 			break;
2703 
2704 		case INTEL_PT_MODE_EXEC:
2705 			decoder->exec_mode = decoder->packet.payload;
2706 			break;
2707 
2708 		case INTEL_PT_MODE_TSX:
2709 			intel_pt_update_in_tx(decoder);
2710 			break;
2711 
2712 		case INTEL_PT_OVF:
2713 			return intel_pt_overflow(decoder);
2714 
2715 		case INTEL_PT_BAD: /* Does not happen */
2716 			return intel_pt_bug(decoder);
2717 
2718 		case INTEL_PT_TRACESTOP:
2719 			decoder->pge = false;
2720 			decoder->continuous_period = false;
2721 			intel_pt_clear_tx_flags(decoder);
2722 			decoder->have_tma = false;
2723 			break;
2724 
2725 		case INTEL_PT_PSB:
2726 			decoder->state.psb_offset = decoder->pos;
2727 			decoder->psb_ip = 0;
2728 			decoder->last_ip = 0;
2729 			decoder->have_last_ip = true;
2730 			intel_pt_clear_stack(&decoder->stack);
2731 			err = intel_pt_walk_psb(decoder);
2732 			if (err)
2733 				return err;
2734 			decoder->state.type = INTEL_PT_PSB_EVT;
2735 			decoder->state.from_ip = decoder->psb_ip;
2736 			decoder->state.to_ip = 0;
2737 			return 0;
2738 
2739 		case INTEL_PT_TNT:
2740 		case INTEL_PT_PSBEND:
2741 		case INTEL_PT_VMCS:
2742 		case INTEL_PT_MNT:
2743 		case INTEL_PT_PAD:
2744 		case INTEL_PT_PTWRITE:
2745 		case INTEL_PT_PTWRITE_IP:
2746 		case INTEL_PT_EXSTOP:
2747 		case INTEL_PT_EXSTOP_IP:
2748 		case INTEL_PT_MWAIT:
2749 		case INTEL_PT_PWRE:
2750 		case INTEL_PT_PWRX:
2751 		case INTEL_PT_BBP:
2752 		case INTEL_PT_BIP:
2753 		case INTEL_PT_BEP:
2754 		case INTEL_PT_BEP_IP:
2755 		default:
2756 			break;
2757 		}
2758 	}
2759 }
2760 
2761 static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
2762 {
2763 	int err;
2764 
2765 	decoder->set_fup_tx_flags = false;
2766 	decoder->set_fup_ptw = false;
2767 	decoder->set_fup_mwait = false;
2768 	decoder->set_fup_pwre = false;
2769 	decoder->set_fup_exstop = false;
2770 	decoder->set_fup_bep = false;
2771 
2772 	if (!decoder->branch_enable) {
2773 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2774 		decoder->overflow = false;
2775 		decoder->state.type = 0; /* Do not have a sample */
2776 		return 0;
2777 	}
2778 
2779 	intel_pt_log("Scanning for full IP\n");
2780 	err = intel_pt_walk_to_ip(decoder);
2781 	if (err || ((decoder->state.type & INTEL_PT_PSB_EVT) && !decoder->ip))
2782 		return err;
2783 
2784 	/* In hop mode, resample to get the to_ip as an "instruction" sample */
2785 	if (decoder->hop)
2786 		decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
2787 	else
2788 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2789 	decoder->overflow = false;
2790 
2791 	decoder->state.from_ip = 0;
2792 	decoder->state.to_ip = decoder->ip;
2793 	intel_pt_log_to("Setting IP", decoder->ip);
2794 
2795 	return 0;
2796 }
2797 
2798 static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
2799 {
2800 	const unsigned char *end = decoder->buf + decoder->len;
2801 	size_t i;
2802 
2803 	for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
2804 		if (i > decoder->len)
2805 			continue;
2806 		if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
2807 			return i;
2808 	}
2809 	return 0;
2810 }
2811 
2812 static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
2813 {
2814 	size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
2815 	const char *psb = INTEL_PT_PSB_STR;
2816 
2817 	if (rest_psb > decoder->len ||
2818 	    memcmp(decoder->buf, psb + part_psb, rest_psb))
2819 		return 0;
2820 
2821 	return rest_psb;
2822 }
2823 
2824 static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
2825 				  int part_psb)
2826 {
2827 	int rest_psb, ret;
2828 
2829 	decoder->pos += decoder->len;
2830 	decoder->len = 0;
2831 
2832 	ret = intel_pt_get_next_data(decoder, false);
2833 	if (ret)
2834 		return ret;
2835 
2836 	rest_psb = intel_pt_rest_psb(decoder, part_psb);
2837 	if (!rest_psb)
2838 		return 0;
2839 
2840 	decoder->pos -= part_psb;
2841 	decoder->next_buf = decoder->buf + rest_psb;
2842 	decoder->next_len = decoder->len - rest_psb;
2843 	memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
2844 	decoder->buf = decoder->temp_buf;
2845 	decoder->len = INTEL_PT_PSB_LEN;
2846 
2847 	return 0;
2848 }
2849 
2850 static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
2851 {
2852 	unsigned char *next;
2853 	int ret;
2854 
2855 	intel_pt_log("Scanning for PSB\n");
2856 	while (1) {
2857 		if (!decoder->len) {
2858 			ret = intel_pt_get_next_data(decoder, false);
2859 			if (ret)
2860 				return ret;
2861 		}
2862 
2863 		next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
2864 			      INTEL_PT_PSB_LEN);
2865 		if (!next) {
2866 			int part_psb;
2867 
2868 			part_psb = intel_pt_part_psb(decoder);
2869 			if (part_psb) {
2870 				ret = intel_pt_get_split_psb(decoder, part_psb);
2871 				if (ret)
2872 					return ret;
2873 			} else {
2874 				decoder->pos += decoder->len;
2875 				decoder->len = 0;
2876 			}
2877 			continue;
2878 		}
2879 
2880 		decoder->pkt_step = next - decoder->buf;
2881 		return intel_pt_get_next_packet(decoder);
2882 	}
2883 }
2884 
2885 static int intel_pt_sync(struct intel_pt_decoder *decoder)
2886 {
2887 	int err;
2888 
2889 	decoder->pge = false;
2890 	decoder->continuous_period = false;
2891 	decoder->have_last_ip = false;
2892 	decoder->last_ip = 0;
2893 	decoder->psb_ip = 0;
2894 	decoder->ip = 0;
2895 	intel_pt_clear_stack(&decoder->stack);
2896 
2897 	err = intel_pt_scan_for_psb(decoder);
2898 	if (err)
2899 		return err;
2900 
2901 	decoder->have_last_ip = true;
2902 	decoder->pkt_state = INTEL_PT_STATE_NO_IP;
2903 
2904 	err = intel_pt_walk_psb(decoder);
2905 	if (err)
2906 		return err;
2907 
2908 	decoder->state.type = INTEL_PT_PSB_EVT; /* Only PSB sample */
2909 	decoder->state.from_ip = decoder->psb_ip;
2910 	decoder->state.to_ip = 0;
2911 
2912 	if (decoder->ip) {
2913 		/*
2914 		 * In hop mode, resample to get the PSB FUP ip as an
2915 		 * "instruction" sample.
2916 		 */
2917 		if (decoder->hop)
2918 			decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
2919 		else
2920 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2921 	}
2922 
2923 	return 0;
2924 }
2925 
2926 static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
2927 {
2928 	uint64_t est = decoder->sample_insn_cnt << 1;
2929 
2930 	if (!decoder->cbr || !decoder->max_non_turbo_ratio)
2931 		goto out;
2932 
2933 	est *= decoder->max_non_turbo_ratio;
2934 	est /= decoder->cbr;
2935 out:
2936 	return decoder->sample_timestamp + est;
2937 }
2938 
2939 const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2940 {
2941 	int err;
2942 
2943 	do {
2944 		decoder->state.type = INTEL_PT_BRANCH;
2945 		decoder->state.flags = 0;
2946 
2947 		switch (decoder->pkt_state) {
2948 		case INTEL_PT_STATE_NO_PSB:
2949 			err = intel_pt_sync(decoder);
2950 			break;
2951 		case INTEL_PT_STATE_NO_IP:
2952 			decoder->have_last_ip = false;
2953 			decoder->last_ip = 0;
2954 			decoder->ip = 0;
2955 			__fallthrough;
2956 		case INTEL_PT_STATE_ERR_RESYNC:
2957 			err = intel_pt_sync_ip(decoder);
2958 			break;
2959 		case INTEL_PT_STATE_IN_SYNC:
2960 			err = intel_pt_walk_trace(decoder);
2961 			break;
2962 		case INTEL_PT_STATE_TNT:
2963 		case INTEL_PT_STATE_TNT_CONT:
2964 			err = intel_pt_walk_tnt(decoder);
2965 			if (err == -EAGAIN)
2966 				err = intel_pt_walk_trace(decoder);
2967 			break;
2968 		case INTEL_PT_STATE_TIP:
2969 		case INTEL_PT_STATE_TIP_PGD:
2970 			err = intel_pt_walk_tip(decoder);
2971 			break;
2972 		case INTEL_PT_STATE_FUP:
2973 			err = intel_pt_walk_fup(decoder);
2974 			if (err == -EAGAIN)
2975 				err = intel_pt_walk_fup_tip(decoder);
2976 			break;
2977 		case INTEL_PT_STATE_FUP_NO_TIP:
2978 			err = intel_pt_walk_fup(decoder);
2979 			if (err == -EAGAIN)
2980 				err = intel_pt_walk_trace(decoder);
2981 			break;
2982 		case INTEL_PT_STATE_FUP_IN_PSB:
2983 			err = intel_pt_fup_in_psb(decoder);
2984 			break;
2985 		case INTEL_PT_STATE_RESAMPLE:
2986 			err = intel_pt_resample(decoder);
2987 			break;
2988 		default:
2989 			err = intel_pt_bug(decoder);
2990 			break;
2991 		}
2992 	} while (err == -ENOLINK);
2993 
2994 	if (err) {
2995 		decoder->state.err = intel_pt_ext_err(err);
2996 		decoder->state.from_ip = decoder->ip;
2997 		intel_pt_update_sample_time(decoder);
2998 		decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2999 		intel_pt_set_nr(decoder);
3000 	} else {
3001 		decoder->state.err = 0;
3002 		if (decoder->cbr != decoder->cbr_seen) {
3003 			decoder->cbr_seen = decoder->cbr;
3004 			if (!decoder->state.type) {
3005 				decoder->state.from_ip = decoder->ip;
3006 				decoder->state.to_ip = 0;
3007 			}
3008 			decoder->state.type |= INTEL_PT_CBR_CHG;
3009 			decoder->state.cbr_payload = decoder->cbr_payload;
3010 			decoder->state.cbr = decoder->cbr;
3011 		}
3012 		if (intel_pt_sample_time(decoder->pkt_state)) {
3013 			intel_pt_update_sample_time(decoder);
3014 			if (decoder->sample_cyc) {
3015 				decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
3016 				decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
3017 				decoder->sample_cyc = false;
3018 			}
3019 		}
3020 		/*
3021 		 * When using only TSC/MTC to compute cycles, IPC can be
3022 		 * sampled as soon as the cycle count changes.
3023 		 */
3024 		if (!decoder->have_cyc)
3025 			decoder->state.flags |= INTEL_PT_SAMPLE_IPC;
3026 	}
3027 
3028 	 /* Let PSB event always have TSC timestamp */
3029 	if ((decoder->state.type & INTEL_PT_PSB_EVT) && decoder->tsc_timestamp)
3030 		decoder->sample_timestamp = decoder->tsc_timestamp;
3031 
3032 	decoder->state.from_nr = decoder->nr;
3033 	decoder->state.to_nr = decoder->next_nr;
3034 	decoder->nr = decoder->next_nr;
3035 
3036 	decoder->state.timestamp = decoder->sample_timestamp;
3037 	decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
3038 	decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
3039 	decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
3040 
3041 	return &decoder->state;
3042 }
3043 
3044 /**
3045  * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
3046  * @buf: pointer to buffer pointer
3047  * @len: size of buffer
3048  *
3049  * Updates the buffer pointer to point to the start of the next PSB packet if
3050  * there is one, otherwise the buffer pointer is unchanged.  If @buf is updated,
3051  * @len is adjusted accordingly.
3052  *
3053  * Return: %true if a PSB packet is found, %false otherwise.
3054  */
3055 static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
3056 {
3057 	unsigned char *next;
3058 
3059 	next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
3060 	if (next) {
3061 		*len -= next - *buf;
3062 		*buf = next;
3063 		return true;
3064 	}
3065 	return false;
3066 }
3067 
3068 /**
3069  * intel_pt_step_psb - move buffer pointer to the start of the following PSB
3070  *                     packet.
3071  * @buf: pointer to buffer pointer
3072  * @len: size of buffer
3073  *
3074  * Updates the buffer pointer to point to the start of the following PSB packet
3075  * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
3076  * pointer is unchanged.  If @buf is updated, @len is adjusted accordingly.
3077  *
3078  * Return: %true if a PSB packet is found, %false otherwise.
3079  */
3080 static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
3081 {
3082 	unsigned char *next;
3083 
3084 	if (!*len)
3085 		return false;
3086 
3087 	next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
3088 	if (next) {
3089 		*len -= next - *buf;
3090 		*buf = next;
3091 		return true;
3092 	}
3093 	return false;
3094 }
3095 
3096 /**
3097  * intel_pt_last_psb - find the last PSB packet in a buffer.
3098  * @buf: buffer
3099  * @len: size of buffer
3100  *
3101  * This function finds the last PSB in a buffer.
3102  *
3103  * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
3104  */
3105 static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
3106 {
3107 	const char *n = INTEL_PT_PSB_STR;
3108 	unsigned char *p;
3109 	size_t k;
3110 
3111 	if (len < INTEL_PT_PSB_LEN)
3112 		return NULL;
3113 
3114 	k = len - INTEL_PT_PSB_LEN + 1;
3115 	while (1) {
3116 		p = memrchr(buf, n[0], k);
3117 		if (!p)
3118 			return NULL;
3119 		if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
3120 			return p;
3121 		k = p - buf;
3122 		if (!k)
3123 			return NULL;
3124 	}
3125 }
3126 
3127 /**
3128  * intel_pt_next_tsc - find and return next TSC.
3129  * @buf: buffer
3130  * @len: size of buffer
3131  * @tsc: TSC value returned
3132  * @rem: returns remaining size when TSC is found
3133  *
3134  * Find a TSC packet in @buf and return the TSC value.  This function assumes
3135  * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
3136  * PSBEND packet is found.
3137  *
3138  * Return: %true if TSC is found, false otherwise.
3139  */
3140 static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
3141 			      size_t *rem)
3142 {
3143 	enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
3144 	struct intel_pt_pkt packet;
3145 	int ret;
3146 
3147 	while (len) {
3148 		ret = intel_pt_get_packet(buf, len, &packet, &ctx);
3149 		if (ret <= 0)
3150 			return false;
3151 		if (packet.type == INTEL_PT_TSC) {
3152 			*tsc = packet.payload;
3153 			*rem = len;
3154 			return true;
3155 		}
3156 		if (packet.type == INTEL_PT_PSBEND)
3157 			return false;
3158 		buf += ret;
3159 		len -= ret;
3160 	}
3161 	return false;
3162 }
3163 
3164 /**
3165  * intel_pt_tsc_cmp - compare 7-byte TSCs.
3166  * @tsc1: first TSC to compare
3167  * @tsc2: second TSC to compare
3168  *
3169  * This function compares 7-byte TSC values allowing for the possibility that
3170  * TSC wrapped around.  Generally it is not possible to know if TSC has wrapped
3171  * around so for that purpose this function assumes the absolute difference is
3172  * less than half the maximum difference.
3173  *
3174  * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
3175  * after @tsc2.
3176  */
3177 static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
3178 {
3179 	const uint64_t halfway = (1ULL << 55);
3180 
3181 	if (tsc1 == tsc2)
3182 		return 0;
3183 
3184 	if (tsc1 < tsc2) {
3185 		if (tsc2 - tsc1 < halfway)
3186 			return -1;
3187 		else
3188 			return 1;
3189 	} else {
3190 		if (tsc1 - tsc2 < halfway)
3191 			return 1;
3192 		else
3193 			return -1;
3194 	}
3195 }
3196 
3197 #define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
3198 
3199 /**
3200  * adj_for_padding - adjust overlap to account for padding.
3201  * @buf_b: second buffer
3202  * @buf_a: first buffer
3203  * @len_a: size of first buffer
3204  *
3205  * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
3206  * accordingly.
3207  *
3208  * Return: A pointer into @buf_b from where non-overlapped data starts
3209  */
3210 static unsigned char *adj_for_padding(unsigned char *buf_b,
3211 				      unsigned char *buf_a, size_t len_a)
3212 {
3213 	unsigned char *p = buf_b - MAX_PADDING;
3214 	unsigned char *q = buf_a + len_a - MAX_PADDING;
3215 	int i;
3216 
3217 	for (i = MAX_PADDING; i; i--, p++, q++) {
3218 		if (*p != *q)
3219 			break;
3220 	}
3221 
3222 	return p;
3223 }
3224 
3225 /**
3226  * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
3227  *                             using TSC.
3228  * @buf_a: first buffer
3229  * @len_a: size of first buffer
3230  * @buf_b: second buffer
3231  * @len_b: size of second buffer
3232  * @consecutive: returns true if there is data in buf_b that is consecutive
3233  *               to buf_a
3234  *
3235  * If the trace contains TSC we can look at the last TSC of @buf_a and the
3236  * first TSC of @buf_b in order to determine if the buffers overlap, and then
3237  * walk forward in @buf_b until a later TSC is found.  A precondition is that
3238  * @buf_a and @buf_b are positioned at a PSB.
3239  *
3240  * Return: A pointer into @buf_b from where non-overlapped data starts, or
3241  * @buf_b + @len_b if there is no non-overlapped data.
3242  */
3243 static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
3244 						size_t len_a,
3245 						unsigned char *buf_b,
3246 						size_t len_b, bool *consecutive)
3247 {
3248 	uint64_t tsc_a, tsc_b;
3249 	unsigned char *p;
3250 	size_t len, rem_a, rem_b;
3251 
3252 	p = intel_pt_last_psb(buf_a, len_a);
3253 	if (!p)
3254 		return buf_b; /* No PSB in buf_a => no overlap */
3255 
3256 	len = len_a - (p - buf_a);
3257 	if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
3258 		/* The last PSB+ in buf_a is incomplete, so go back one more */
3259 		len_a -= len;
3260 		p = intel_pt_last_psb(buf_a, len_a);
3261 		if (!p)
3262 			return buf_b; /* No full PSB+ => assume no overlap */
3263 		len = len_a - (p - buf_a);
3264 		if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
3265 			return buf_b; /* No TSC in buf_a => assume no overlap */
3266 	}
3267 
3268 	while (1) {
3269 		/* Ignore PSB+ with no TSC */
3270 		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
3271 			int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
3272 
3273 			/* Same TSC, so buffers are consecutive */
3274 			if (!cmp && rem_b >= rem_a) {
3275 				unsigned char *start;
3276 
3277 				*consecutive = true;
3278 				start = buf_b + len_b - (rem_b - rem_a);
3279 				return adj_for_padding(start, buf_a, len_a);
3280 			}
3281 			if (cmp < 0)
3282 				return buf_b; /* tsc_a < tsc_b => no overlap */
3283 		}
3284 
3285 		if (!intel_pt_step_psb(&buf_b, &len_b))
3286 			return buf_b + len_b; /* No PSB in buf_b => no data */
3287 	}
3288 }
3289 
3290 /**
3291  * intel_pt_find_overlap - determine start of non-overlapped trace data.
3292  * @buf_a: first buffer
3293  * @len_a: size of first buffer
3294  * @buf_b: second buffer
3295  * @len_b: size of second buffer
3296  * @have_tsc: can use TSC packets to detect overlap
3297  * @consecutive: returns true if there is data in buf_b that is consecutive
3298  *               to buf_a
3299  *
3300  * When trace samples or snapshots are recorded there is the possibility that
3301  * the data overlaps.  Note that, for the purposes of decoding, data is only
3302  * useful if it begins with a PSB packet.
3303  *
3304  * Return: A pointer into @buf_b from where non-overlapped data starts, or
3305  * @buf_b + @len_b if there is no non-overlapped data.
3306  */
3307 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
3308 				     unsigned char *buf_b, size_t len_b,
3309 				     bool have_tsc, bool *consecutive)
3310 {
3311 	unsigned char *found;
3312 
3313 	/* Buffer 'b' must start at PSB so throw away everything before that */
3314 	if (!intel_pt_next_psb(&buf_b, &len_b))
3315 		return buf_b + len_b; /* No PSB */
3316 
3317 	if (!intel_pt_next_psb(&buf_a, &len_a))
3318 		return buf_b; /* No overlap */
3319 
3320 	if (have_tsc) {
3321 		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
3322 						  consecutive);
3323 		if (found)
3324 			return found;
3325 	}
3326 
3327 	/*
3328 	 * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
3329 	 * we can ignore the first part of buffer 'a'.
3330 	 */
3331 	while (len_b < len_a) {
3332 		if (!intel_pt_step_psb(&buf_a, &len_a))
3333 			return buf_b; /* No overlap */
3334 	}
3335 
3336 	/* Now len_b >= len_a */
3337 	while (1) {
3338 		/* Potential overlap so check the bytes */
3339 		found = memmem(buf_a, len_a, buf_b, len_a);
3340 		if (found) {
3341 			*consecutive = true;
3342 			return adj_for_padding(buf_b + len_a, buf_a, len_a);
3343 		}
3344 
3345 		/* Try again at next PSB in buffer 'a' */
3346 		if (!intel_pt_step_psb(&buf_a, &len_a))
3347 			return buf_b; /* No overlap */
3348 	}
3349 }
3350 
3351 /**
3352  * struct fast_forward_data - data used by intel_pt_ff_cb().
3353  * @timestamp: timestamp to fast forward towards
3354  * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
3355  *                 the fast forward timestamp.
3356  */
3357 struct fast_forward_data {
3358 	uint64_t timestamp;
3359 	uint64_t buf_timestamp;
3360 };
3361 
3362 /**
3363  * intel_pt_ff_cb - fast forward lookahead callback.
3364  * @buffer: Intel PT trace buffer
3365  * @data: opaque pointer to fast forward data (struct fast_forward_data)
3366  *
3367  * Determine if @buffer trace is past the fast forward timestamp.
3368  *
3369  * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
3370  *         timestamp, and 0 otherwise.
3371  */
3372 static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
3373 {
3374 	struct fast_forward_data *d = data;
3375 	unsigned char *buf;
3376 	uint64_t tsc;
3377 	size_t rem;
3378 	size_t len;
3379 
3380 	buf = (unsigned char *)buffer->buf;
3381 	len = buffer->len;
3382 
3383 	if (!intel_pt_next_psb(&buf, &len) ||
3384 	    !intel_pt_next_tsc(buf, len, &tsc, &rem))
3385 		return 0;
3386 
3387 	tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
3388 
3389 	intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
3390 		     tsc, buffer->ref_timestamp);
3391 
3392 	/*
3393 	 * If the buffer contains a timestamp earlier that the fast forward
3394 	 * timestamp, then record it, else stop.
3395 	 */
3396 	if (tsc < d->timestamp)
3397 		d->buf_timestamp = buffer->ref_timestamp;
3398 	else
3399 		return 1;
3400 
3401 	return 0;
3402 }
3403 
3404 /**
3405  * intel_pt_fast_forward - reposition decoder forwards.
3406  * @decoder: Intel PT decoder
3407  * @timestamp: timestamp to fast forward towards
3408  *
3409  * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
3410  *
3411  * Return: 0 on success or negative error code on failure.
3412  */
3413 int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
3414 {
3415 	struct fast_forward_data d = { .timestamp = timestamp };
3416 	unsigned char *buf;
3417 	size_t len;
3418 	int err;
3419 
3420 	intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
3421 
3422 	/* Find buffer timestamp of buffer to fast forward to */
3423 	err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
3424 	if (err < 0)
3425 		return err;
3426 
3427 	/* Walk to buffer with same buffer timestamp */
3428 	if (d.buf_timestamp) {
3429 		do {
3430 			decoder->pos += decoder->len;
3431 			decoder->len = 0;
3432 			err = intel_pt_get_next_data(decoder, true);
3433 			/* -ENOLINK means non-consecutive trace */
3434 			if (err && err != -ENOLINK)
3435 				return err;
3436 		} while (decoder->buf_timestamp != d.buf_timestamp);
3437 	}
3438 
3439 	if (!decoder->buf)
3440 		return 0;
3441 
3442 	buf = (unsigned char *)decoder->buf;
3443 	len = decoder->len;
3444 
3445 	if (!intel_pt_next_psb(&buf, &len))
3446 		return 0;
3447 
3448 	/*
3449 	 * Walk PSBs while the PSB timestamp is less than the fast forward
3450 	 * timestamp.
3451 	 */
3452 	do {
3453 		uint64_t tsc;
3454 		size_t rem;
3455 
3456 		if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
3457 			break;
3458 		tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
3459 		/*
3460 		 * A TSC packet can slip past MTC packets but, after fast
3461 		 * forward, decoding starts at the TSC timestamp. That means
3462 		 * the timestamps may not be exactly the same as the timestamps
3463 		 * that would have been decoded without fast forward.
3464 		 */
3465 		if (tsc < timestamp) {
3466 			intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
3467 			decoder->pos += decoder->len - len;
3468 			decoder->buf = buf;
3469 			decoder->len = len;
3470 			intel_pt_reposition(decoder);
3471 		} else {
3472 			break;
3473 		}
3474 	} while (intel_pt_step_psb(&buf, &len));
3475 
3476 	return 0;
3477 }
3478