1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_pt_decoder.c: Intel Processor Trace support
4  * Copyright (c) 2013-2014, Intel Corporation.
5  */
6 
7 #ifndef _GNU_SOURCE
8 #define _GNU_SOURCE
9 #endif
10 #include <stdlib.h>
11 #include <stdbool.h>
12 #include <string.h>
13 #include <errno.h>
14 #include <stdint.h>
15 #include <inttypes.h>
16 #include <linux/compiler.h>
17 
18 #include "../cache.h"
19 #include "../util.h"
20 #include "../auxtrace.h"
21 
22 #include "intel-pt-insn-decoder.h"
23 #include "intel-pt-pkt-decoder.h"
24 #include "intel-pt-decoder.h"
25 #include "intel-pt-log.h"
26 
27 #define INTEL_PT_BLK_SIZE 1024
28 
29 #define BIT63 (((uint64_t)1 << 63))
30 
31 #define INTEL_PT_RETURN 1
32 
33 /* Maximum number of loops with no packets consumed i.e. stuck in a loop */
34 #define INTEL_PT_MAX_LOOPS 10000
35 
36 struct intel_pt_blk {
37 	struct intel_pt_blk *prev;
38 	uint64_t ip[INTEL_PT_BLK_SIZE];
39 };
40 
41 struct intel_pt_stack {
42 	struct intel_pt_blk *blk;
43 	struct intel_pt_blk *spare;
44 	int pos;
45 };
46 
47 enum intel_pt_pkt_state {
48 	INTEL_PT_STATE_NO_PSB,
49 	INTEL_PT_STATE_NO_IP,
50 	INTEL_PT_STATE_ERR_RESYNC,
51 	INTEL_PT_STATE_IN_SYNC,
52 	INTEL_PT_STATE_TNT_CONT,
53 	INTEL_PT_STATE_TNT,
54 	INTEL_PT_STATE_TIP,
55 	INTEL_PT_STATE_TIP_PGD,
56 	INTEL_PT_STATE_FUP,
57 	INTEL_PT_STATE_FUP_NO_TIP,
58 };
59 
60 static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
61 {
62 	switch (pkt_state) {
63 	case INTEL_PT_STATE_NO_PSB:
64 	case INTEL_PT_STATE_NO_IP:
65 	case INTEL_PT_STATE_ERR_RESYNC:
66 	case INTEL_PT_STATE_IN_SYNC:
67 	case INTEL_PT_STATE_TNT_CONT:
68 		return true;
69 	case INTEL_PT_STATE_TNT:
70 	case INTEL_PT_STATE_TIP:
71 	case INTEL_PT_STATE_TIP_PGD:
72 	case INTEL_PT_STATE_FUP:
73 	case INTEL_PT_STATE_FUP_NO_TIP:
74 		return false;
75 	default:
76 		return true;
77 	};
78 }
79 
80 #ifdef INTEL_PT_STRICT
81 #define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
82 #define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
83 #define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_NO_PSB
84 #define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_NO_PSB
85 #else
86 #define INTEL_PT_STATE_ERR1	(decoder->pkt_state)
87 #define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_IP
88 #define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_ERR_RESYNC
89 #define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_IN_SYNC
90 #endif
91 
92 struct intel_pt_decoder {
93 	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
94 	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
95 			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
96 			 uint64_t max_insn_cnt, void *data);
97 	bool (*pgd_ip)(uint64_t ip, void *data);
98 	int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
99 	void *data;
100 	struct intel_pt_state state;
101 	const unsigned char *buf;
102 	size_t len;
103 	bool return_compression;
104 	bool branch_enable;
105 	bool mtc_insn;
106 	bool pge;
107 	bool have_tma;
108 	bool have_cyc;
109 	bool fixup_last_mtc;
110 	bool have_last_ip;
111 	bool in_psb;
112 	enum intel_pt_param_flags flags;
113 	uint64_t pos;
114 	uint64_t last_ip;
115 	uint64_t ip;
116 	uint64_t cr3;
117 	uint64_t timestamp;
118 	uint64_t tsc_timestamp;
119 	uint64_t ref_timestamp;
120 	uint64_t buf_timestamp;
121 	uint64_t sample_timestamp;
122 	uint64_t ret_addr;
123 	uint64_t ctc_timestamp;
124 	uint64_t ctc_delta;
125 	uint64_t cycle_cnt;
126 	uint64_t cyc_ref_timestamp;
127 	uint32_t last_mtc;
128 	uint32_t tsc_ctc_ratio_n;
129 	uint32_t tsc_ctc_ratio_d;
130 	uint32_t tsc_ctc_mult;
131 	uint32_t tsc_slip;
132 	uint32_t ctc_rem_mask;
133 	int mtc_shift;
134 	struct intel_pt_stack stack;
135 	enum intel_pt_pkt_state pkt_state;
136 	enum intel_pt_pkt_ctx pkt_ctx;
137 	struct intel_pt_pkt packet;
138 	struct intel_pt_pkt tnt;
139 	int pkt_step;
140 	int pkt_len;
141 	int last_packet_type;
142 	unsigned int cbr;
143 	unsigned int cbr_seen;
144 	unsigned int max_non_turbo_ratio;
145 	double max_non_turbo_ratio_fp;
146 	double cbr_cyc_to_tsc;
147 	double calc_cyc_to_tsc;
148 	bool have_calc_cyc_to_tsc;
149 	int exec_mode;
150 	unsigned int insn_bytes;
151 	uint64_t period;
152 	enum intel_pt_period_type period_type;
153 	uint64_t tot_insn_cnt;
154 	uint64_t period_insn_cnt;
155 	uint64_t period_mask;
156 	uint64_t period_ticks;
157 	uint64_t last_masked_timestamp;
158 	uint64_t tot_cyc_cnt;
159 	uint64_t sample_tot_cyc_cnt;
160 	uint64_t base_cyc_cnt;
161 	uint64_t cyc_cnt_timestamp;
162 	double tsc_to_cyc;
163 	bool continuous_period;
164 	bool overflow;
165 	bool set_fup_tx_flags;
166 	bool set_fup_ptw;
167 	bool set_fup_mwait;
168 	bool set_fup_pwre;
169 	bool set_fup_exstop;
170 	bool sample_cyc;
171 	unsigned int fup_tx_flags;
172 	unsigned int tx_flags;
173 	uint64_t fup_ptw_payload;
174 	uint64_t fup_mwait_payload;
175 	uint64_t fup_pwre_payload;
176 	uint64_t cbr_payload;
177 	uint64_t timestamp_insn_cnt;
178 	uint64_t sample_insn_cnt;
179 	uint64_t stuck_ip;
180 	int no_progress;
181 	int stuck_ip_prd;
182 	int stuck_ip_cnt;
183 	const unsigned char *next_buf;
184 	size_t next_len;
185 	unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
186 };
187 
188 static uint64_t intel_pt_lower_power_of_2(uint64_t x)
189 {
190 	int i;
191 
192 	for (i = 0; x != 1; i++)
193 		x >>= 1;
194 
195 	return x << i;
196 }
197 
198 static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
199 {
200 	if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
201 		uint64_t period;
202 
203 		period = intel_pt_lower_power_of_2(decoder->period);
204 		decoder->period_mask  = ~(period - 1);
205 		decoder->period_ticks = period;
206 	}
207 }
208 
209 static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
210 {
211 	if (!d)
212 		return 0;
213 	return (t / d) * n + ((t % d) * n) / d;
214 }
215 
216 struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
217 {
218 	struct intel_pt_decoder *decoder;
219 
220 	if (!params->get_trace || !params->walk_insn)
221 		return NULL;
222 
223 	decoder = zalloc(sizeof(struct intel_pt_decoder));
224 	if (!decoder)
225 		return NULL;
226 
227 	decoder->get_trace          = params->get_trace;
228 	decoder->walk_insn          = params->walk_insn;
229 	decoder->pgd_ip             = params->pgd_ip;
230 	decoder->lookahead          = params->lookahead;
231 	decoder->data               = params->data;
232 	decoder->return_compression = params->return_compression;
233 	decoder->branch_enable      = params->branch_enable;
234 
235 	decoder->flags              = params->flags;
236 
237 	decoder->period             = params->period;
238 	decoder->period_type        = params->period_type;
239 
240 	decoder->max_non_turbo_ratio    = params->max_non_turbo_ratio;
241 	decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
242 
243 	intel_pt_setup_period(decoder);
244 
245 	decoder->mtc_shift = params->mtc_period;
246 	decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
247 
248 	decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
249 	decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
250 
251 	if (!decoder->tsc_ctc_ratio_n)
252 		decoder->tsc_ctc_ratio_d = 0;
253 
254 	if (decoder->tsc_ctc_ratio_d) {
255 		if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
256 			decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
257 						decoder->tsc_ctc_ratio_d;
258 	}
259 
260 	/*
261 	 * A TSC packet can slip past MTC packets so that the timestamp appears
262 	 * to go backwards. One estimate is that can be up to about 40 CPU
263 	 * cycles, which is certainly less than 0x1000 TSC ticks, but accept
264 	 * slippage an order of magnitude more to be on the safe side.
265 	 */
266 	decoder->tsc_slip = 0x10000;
267 
268 	intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
269 	intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
270 	intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
271 	intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
272 	intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
273 
274 	return decoder;
275 }
276 
277 static void intel_pt_pop_blk(struct intel_pt_stack *stack)
278 {
279 	struct intel_pt_blk *blk = stack->blk;
280 
281 	stack->blk = blk->prev;
282 	if (!stack->spare)
283 		stack->spare = blk;
284 	else
285 		free(blk);
286 }
287 
288 static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
289 {
290 	if (!stack->pos) {
291 		if (!stack->blk)
292 			return 0;
293 		intel_pt_pop_blk(stack);
294 		if (!stack->blk)
295 			return 0;
296 		stack->pos = INTEL_PT_BLK_SIZE;
297 	}
298 	return stack->blk->ip[--stack->pos];
299 }
300 
301 static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
302 {
303 	struct intel_pt_blk *blk;
304 
305 	if (stack->spare) {
306 		blk = stack->spare;
307 		stack->spare = NULL;
308 	} else {
309 		blk = malloc(sizeof(struct intel_pt_blk));
310 		if (!blk)
311 			return -ENOMEM;
312 	}
313 
314 	blk->prev = stack->blk;
315 	stack->blk = blk;
316 	stack->pos = 0;
317 	return 0;
318 }
319 
320 static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
321 {
322 	int err;
323 
324 	if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
325 		err = intel_pt_alloc_blk(stack);
326 		if (err)
327 			return err;
328 	}
329 
330 	stack->blk->ip[stack->pos++] = ip;
331 	return 0;
332 }
333 
334 static void intel_pt_clear_stack(struct intel_pt_stack *stack)
335 {
336 	while (stack->blk)
337 		intel_pt_pop_blk(stack);
338 	stack->pos = 0;
339 }
340 
341 static void intel_pt_free_stack(struct intel_pt_stack *stack)
342 {
343 	intel_pt_clear_stack(stack);
344 	zfree(&stack->blk);
345 	zfree(&stack->spare);
346 }
347 
348 void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
349 {
350 	intel_pt_free_stack(&decoder->stack);
351 	free(decoder);
352 }
353 
354 static int intel_pt_ext_err(int code)
355 {
356 	switch (code) {
357 	case -ENOMEM:
358 		return INTEL_PT_ERR_NOMEM;
359 	case -ENOSYS:
360 		return INTEL_PT_ERR_INTERN;
361 	case -EBADMSG:
362 		return INTEL_PT_ERR_BADPKT;
363 	case -ENODATA:
364 		return INTEL_PT_ERR_NODATA;
365 	case -EILSEQ:
366 		return INTEL_PT_ERR_NOINSN;
367 	case -ENOENT:
368 		return INTEL_PT_ERR_MISMAT;
369 	case -EOVERFLOW:
370 		return INTEL_PT_ERR_OVR;
371 	case -ENOSPC:
372 		return INTEL_PT_ERR_LOST;
373 	case -ELOOP:
374 		return INTEL_PT_ERR_NELOOP;
375 	default:
376 		return INTEL_PT_ERR_UNK;
377 	}
378 }
379 
380 static const char *intel_pt_err_msgs[] = {
381 	[INTEL_PT_ERR_NOMEM]  = "Memory allocation failed",
382 	[INTEL_PT_ERR_INTERN] = "Internal error",
383 	[INTEL_PT_ERR_BADPKT] = "Bad packet",
384 	[INTEL_PT_ERR_NODATA] = "No more data",
385 	[INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
386 	[INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
387 	[INTEL_PT_ERR_OVR]    = "Overflow packet",
388 	[INTEL_PT_ERR_LOST]   = "Lost trace data",
389 	[INTEL_PT_ERR_UNK]    = "Unknown error!",
390 	[INTEL_PT_ERR_NELOOP] = "Never-ending loop",
391 };
392 
393 int intel_pt__strerror(int code, char *buf, size_t buflen)
394 {
395 	if (code < 1 || code >= INTEL_PT_ERR_MAX)
396 		code = INTEL_PT_ERR_UNK;
397 	strlcpy(buf, intel_pt_err_msgs[code], buflen);
398 	return 0;
399 }
400 
401 static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
402 				 uint64_t last_ip)
403 {
404 	uint64_t ip;
405 
406 	switch (packet->count) {
407 	case 1:
408 		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
409 		     packet->payload;
410 		break;
411 	case 2:
412 		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
413 		     packet->payload;
414 		break;
415 	case 3:
416 		ip = packet->payload;
417 		/* Sign-extend 6-byte ip */
418 		if (ip & (uint64_t)0x800000000000ULL)
419 			ip |= (uint64_t)0xffff000000000000ULL;
420 		break;
421 	case 4:
422 		ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
423 		     packet->payload;
424 		break;
425 	case 6:
426 		ip = packet->payload;
427 		break;
428 	default:
429 		return 0;
430 	}
431 
432 	return ip;
433 }
434 
435 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
436 {
437 	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
438 	decoder->have_last_ip = true;
439 }
440 
441 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
442 {
443 	intel_pt_set_last_ip(decoder);
444 	decoder->ip = decoder->last_ip;
445 }
446 
447 static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
448 {
449 	intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
450 			    decoder->buf);
451 }
452 
453 static int intel_pt_bug(struct intel_pt_decoder *decoder)
454 {
455 	intel_pt_log("ERROR: Internal error\n");
456 	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
457 	return -ENOSYS;
458 }
459 
460 static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
461 {
462 	decoder->tx_flags = 0;
463 }
464 
465 static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
466 {
467 	decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
468 }
469 
470 static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
471 {
472 	intel_pt_clear_tx_flags(decoder);
473 	decoder->have_tma = false;
474 	decoder->pkt_len = 1;
475 	decoder->pkt_step = 1;
476 	intel_pt_decoder_log_packet(decoder);
477 	if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
478 		intel_pt_log("ERROR: Bad packet\n");
479 		decoder->pkt_state = INTEL_PT_STATE_ERR1;
480 	}
481 	return -EBADMSG;
482 }
483 
484 static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
485 {
486 	decoder->sample_timestamp = decoder->timestamp;
487 	decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
488 }
489 
490 static void intel_pt_reposition(struct intel_pt_decoder *decoder)
491 {
492 	decoder->ip = 0;
493 	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
494 	decoder->timestamp = 0;
495 	decoder->have_tma = false;
496 }
497 
498 static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
499 {
500 	struct intel_pt_buffer buffer = { .buf = 0, };
501 	int ret;
502 
503 	decoder->pkt_step = 0;
504 
505 	intel_pt_log("Getting more data\n");
506 	ret = decoder->get_trace(&buffer, decoder->data);
507 	if (ret)
508 		return ret;
509 	decoder->buf = buffer.buf;
510 	decoder->len = buffer.len;
511 	if (!decoder->len) {
512 		intel_pt_log("No more data\n");
513 		return -ENODATA;
514 	}
515 	decoder->buf_timestamp = buffer.ref_timestamp;
516 	if (!buffer.consecutive || reposition) {
517 		intel_pt_reposition(decoder);
518 		decoder->ref_timestamp = buffer.ref_timestamp;
519 		decoder->state.trace_nr = buffer.trace_nr;
520 		intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
521 			     decoder->ref_timestamp);
522 		return -ENOLINK;
523 	}
524 
525 	return 0;
526 }
527 
528 static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
529 				  bool reposition)
530 {
531 	if (!decoder->next_buf)
532 		return intel_pt_get_data(decoder, reposition);
533 
534 	decoder->buf = decoder->next_buf;
535 	decoder->len = decoder->next_len;
536 	decoder->next_buf = 0;
537 	decoder->next_len = 0;
538 	return 0;
539 }
540 
541 static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
542 {
543 	unsigned char *buf = decoder->temp_buf;
544 	size_t old_len, len, n;
545 	int ret;
546 
547 	old_len = decoder->len;
548 	len = decoder->len;
549 	memcpy(buf, decoder->buf, len);
550 
551 	ret = intel_pt_get_data(decoder, false);
552 	if (ret) {
553 		decoder->pos += old_len;
554 		return ret < 0 ? ret : -EINVAL;
555 	}
556 
557 	n = INTEL_PT_PKT_MAX_SZ - len;
558 	if (n > decoder->len)
559 		n = decoder->len;
560 	memcpy(buf + len, decoder->buf, n);
561 	len += n;
562 
563 	ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
564 	if (ret < (int)old_len) {
565 		decoder->next_buf = decoder->buf;
566 		decoder->next_len = decoder->len;
567 		decoder->buf = buf;
568 		decoder->len = old_len;
569 		return intel_pt_bad_packet(decoder);
570 	}
571 
572 	decoder->next_buf = decoder->buf + (ret - old_len);
573 	decoder->next_len = decoder->len - (ret - old_len);
574 
575 	decoder->buf = buf;
576 	decoder->len = ret;
577 
578 	return ret;
579 }
580 
581 struct intel_pt_pkt_info {
582 	struct intel_pt_decoder	  *decoder;
583 	struct intel_pt_pkt       packet;
584 	uint64_t                  pos;
585 	int                       pkt_len;
586 	int                       last_packet_type;
587 	void                      *data;
588 };
589 
590 typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
591 
592 /* Lookahead packets in current buffer */
593 static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
594 				  intel_pt_pkt_cb_t cb, void *data)
595 {
596 	struct intel_pt_pkt_info pkt_info;
597 	const unsigned char *buf = decoder->buf;
598 	enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
599 	size_t len = decoder->len;
600 	int ret;
601 
602 	pkt_info.decoder          = decoder;
603 	pkt_info.pos              = decoder->pos;
604 	pkt_info.pkt_len          = decoder->pkt_step;
605 	pkt_info.last_packet_type = decoder->last_packet_type;
606 	pkt_info.data             = data;
607 
608 	while (1) {
609 		do {
610 			pkt_info.pos += pkt_info.pkt_len;
611 			buf          += pkt_info.pkt_len;
612 			len          -= pkt_info.pkt_len;
613 
614 			if (!len)
615 				return INTEL_PT_NEED_MORE_BYTES;
616 
617 			ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
618 						  &pkt_ctx);
619 			if (!ret)
620 				return INTEL_PT_NEED_MORE_BYTES;
621 			if (ret < 0)
622 				return ret;
623 
624 			pkt_info.pkt_len = ret;
625 		} while (pkt_info.packet.type == INTEL_PT_PAD);
626 
627 		ret = cb(&pkt_info);
628 		if (ret)
629 			return 0;
630 
631 		pkt_info.last_packet_type = pkt_info.packet.type;
632 	}
633 }
634 
635 struct intel_pt_calc_cyc_to_tsc_info {
636 	uint64_t        cycle_cnt;
637 	unsigned int    cbr;
638 	uint32_t        last_mtc;
639 	uint64_t        ctc_timestamp;
640 	uint64_t        ctc_delta;
641 	uint64_t        tsc_timestamp;
642 	uint64_t        timestamp;
643 	bool            have_tma;
644 	bool            fixup_last_mtc;
645 	bool            from_mtc;
646 	double          cbr_cyc_to_tsc;
647 };
648 
649 /*
650  * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
651  * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
652  * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
653  * packet by copying the missing bits from the current MTC assuming the least
654  * difference between the two, and that the current MTC comes after last_mtc.
655  */
656 static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
657 				    uint32_t *last_mtc)
658 {
659 	uint32_t first_missing_bit = 1U << (16 - mtc_shift);
660 	uint32_t mask = ~(first_missing_bit - 1);
661 
662 	*last_mtc |= mtc & mask;
663 	if (*last_mtc >= mtc) {
664 		*last_mtc -= first_missing_bit;
665 		*last_mtc &= 0xff;
666 	}
667 }
668 
669 static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
670 {
671 	struct intel_pt_decoder *decoder = pkt_info->decoder;
672 	struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
673 	uint64_t timestamp;
674 	double cyc_to_tsc;
675 	unsigned int cbr;
676 	uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
677 
678 	switch (pkt_info->packet.type) {
679 	case INTEL_PT_TNT:
680 	case INTEL_PT_TIP_PGE:
681 	case INTEL_PT_TIP:
682 	case INTEL_PT_FUP:
683 	case INTEL_PT_PSB:
684 	case INTEL_PT_PIP:
685 	case INTEL_PT_MODE_EXEC:
686 	case INTEL_PT_MODE_TSX:
687 	case INTEL_PT_PSBEND:
688 	case INTEL_PT_PAD:
689 	case INTEL_PT_VMCS:
690 	case INTEL_PT_MNT:
691 	case INTEL_PT_PTWRITE:
692 	case INTEL_PT_PTWRITE_IP:
693 	case INTEL_PT_BBP:
694 	case INTEL_PT_BIP:
695 	case INTEL_PT_BEP:
696 	case INTEL_PT_BEP_IP:
697 		return 0;
698 
699 	case INTEL_PT_MTC:
700 		if (!data->have_tma)
701 			return 0;
702 
703 		mtc = pkt_info->packet.payload;
704 		if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
705 			data->fixup_last_mtc = false;
706 			intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
707 						&data->last_mtc);
708 		}
709 		if (mtc > data->last_mtc)
710 			mtc_delta = mtc - data->last_mtc;
711 		else
712 			mtc_delta = mtc + 256 - data->last_mtc;
713 		data->ctc_delta += mtc_delta << decoder->mtc_shift;
714 		data->last_mtc = mtc;
715 
716 		if (decoder->tsc_ctc_mult) {
717 			timestamp = data->ctc_timestamp +
718 				data->ctc_delta * decoder->tsc_ctc_mult;
719 		} else {
720 			timestamp = data->ctc_timestamp +
721 				multdiv(data->ctc_delta,
722 					decoder->tsc_ctc_ratio_n,
723 					decoder->tsc_ctc_ratio_d);
724 		}
725 
726 		if (timestamp < data->timestamp)
727 			return 1;
728 
729 		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
730 			data->timestamp = timestamp;
731 			return 0;
732 		}
733 
734 		break;
735 
736 	case INTEL_PT_TSC:
737 		/*
738 		 * For now, do not support using TSC packets - refer
739 		 * intel_pt_calc_cyc_to_tsc().
740 		 */
741 		if (data->from_mtc)
742 			return 1;
743 		timestamp = pkt_info->packet.payload |
744 			    (data->timestamp & (0xffULL << 56));
745 		if (data->from_mtc && timestamp < data->timestamp &&
746 		    data->timestamp - timestamp < decoder->tsc_slip)
747 			return 1;
748 		if (timestamp < data->timestamp)
749 			timestamp += (1ULL << 56);
750 		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
751 			if (data->from_mtc)
752 				return 1;
753 			data->tsc_timestamp = timestamp;
754 			data->timestamp = timestamp;
755 			return 0;
756 		}
757 		break;
758 
759 	case INTEL_PT_TMA:
760 		if (data->from_mtc)
761 			return 1;
762 
763 		if (!decoder->tsc_ctc_ratio_d)
764 			return 0;
765 
766 		ctc = pkt_info->packet.payload;
767 		fc = pkt_info->packet.count;
768 		ctc_rem = ctc & decoder->ctc_rem_mask;
769 
770 		data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
771 
772 		data->ctc_timestamp = data->tsc_timestamp - fc;
773 		if (decoder->tsc_ctc_mult) {
774 			data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
775 		} else {
776 			data->ctc_timestamp -=
777 				multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
778 					decoder->tsc_ctc_ratio_d);
779 		}
780 
781 		data->ctc_delta = 0;
782 		data->have_tma = true;
783 		data->fixup_last_mtc = true;
784 
785 		return 0;
786 
787 	case INTEL_PT_CYC:
788 		data->cycle_cnt += pkt_info->packet.payload;
789 		return 0;
790 
791 	case INTEL_PT_CBR:
792 		cbr = pkt_info->packet.payload;
793 		if (data->cbr && data->cbr != cbr)
794 			return 1;
795 		data->cbr = cbr;
796 		data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
797 		return 0;
798 
799 	case INTEL_PT_TIP_PGD:
800 	case INTEL_PT_TRACESTOP:
801 	case INTEL_PT_EXSTOP:
802 	case INTEL_PT_EXSTOP_IP:
803 	case INTEL_PT_MWAIT:
804 	case INTEL_PT_PWRE:
805 	case INTEL_PT_PWRX:
806 	case INTEL_PT_OVF:
807 	case INTEL_PT_BAD: /* Does not happen */
808 	default:
809 		return 1;
810 	}
811 
812 	if (!data->cbr && decoder->cbr) {
813 		data->cbr = decoder->cbr;
814 		data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
815 	}
816 
817 	if (!data->cycle_cnt)
818 		return 1;
819 
820 	cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
821 
822 	if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
823 	    cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
824 		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
825 			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
826 		return 1;
827 	}
828 
829 	decoder->calc_cyc_to_tsc = cyc_to_tsc;
830 	decoder->have_calc_cyc_to_tsc = true;
831 
832 	if (data->cbr) {
833 		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
834 			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
835 	} else {
836 		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
837 			     cyc_to_tsc, pkt_info->pos);
838 	}
839 
840 	return 1;
841 }
842 
843 static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
844 				     bool from_mtc)
845 {
846 	struct intel_pt_calc_cyc_to_tsc_info data = {
847 		.cycle_cnt      = 0,
848 		.cbr            = 0,
849 		.last_mtc       = decoder->last_mtc,
850 		.ctc_timestamp  = decoder->ctc_timestamp,
851 		.ctc_delta      = decoder->ctc_delta,
852 		.tsc_timestamp  = decoder->tsc_timestamp,
853 		.timestamp      = decoder->timestamp,
854 		.have_tma       = decoder->have_tma,
855 		.fixup_last_mtc = decoder->fixup_last_mtc,
856 		.from_mtc       = from_mtc,
857 		.cbr_cyc_to_tsc = 0,
858 	};
859 
860 	/*
861 	 * For now, do not support using TSC packets for at least the reasons:
862 	 * 1) timing might have stopped
863 	 * 2) TSC packets within PSB+ can slip against CYC packets
864 	 */
865 	if (!from_mtc)
866 		return;
867 
868 	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
869 }
870 
871 static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
872 {
873 	int ret;
874 
875 	decoder->last_packet_type = decoder->packet.type;
876 
877 	do {
878 		decoder->pos += decoder->pkt_step;
879 		decoder->buf += decoder->pkt_step;
880 		decoder->len -= decoder->pkt_step;
881 
882 		if (!decoder->len) {
883 			ret = intel_pt_get_next_data(decoder, false);
884 			if (ret)
885 				return ret;
886 		}
887 
888 		ret = intel_pt_get_packet(decoder->buf, decoder->len,
889 					  &decoder->packet, &decoder->pkt_ctx);
890 		if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
891 		    decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
892 			ret = intel_pt_get_split_packet(decoder);
893 			if (ret < 0)
894 				return ret;
895 		}
896 		if (ret <= 0)
897 			return intel_pt_bad_packet(decoder);
898 
899 		decoder->pkt_len = ret;
900 		decoder->pkt_step = ret;
901 		intel_pt_decoder_log_packet(decoder);
902 	} while (decoder->packet.type == INTEL_PT_PAD);
903 
904 	return 0;
905 }
906 
907 static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
908 {
909 	uint64_t timestamp, masked_timestamp;
910 
911 	timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
912 	masked_timestamp = timestamp & decoder->period_mask;
913 	if (decoder->continuous_period) {
914 		if (masked_timestamp > decoder->last_masked_timestamp)
915 			return 1;
916 	} else {
917 		timestamp += 1;
918 		masked_timestamp = timestamp & decoder->period_mask;
919 		if (masked_timestamp > decoder->last_masked_timestamp) {
920 			decoder->last_masked_timestamp = masked_timestamp;
921 			decoder->continuous_period = true;
922 		}
923 	}
924 
925 	if (masked_timestamp < decoder->last_masked_timestamp)
926 		return decoder->period_ticks;
927 
928 	return decoder->period_ticks - (timestamp - masked_timestamp);
929 }
930 
931 static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
932 {
933 	switch (decoder->period_type) {
934 	case INTEL_PT_PERIOD_INSTRUCTIONS:
935 		return decoder->period - decoder->period_insn_cnt;
936 	case INTEL_PT_PERIOD_TICKS:
937 		return intel_pt_next_period(decoder);
938 	case INTEL_PT_PERIOD_NONE:
939 	case INTEL_PT_PERIOD_MTC:
940 	default:
941 		return 0;
942 	}
943 }
944 
945 static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
946 {
947 	uint64_t timestamp, masked_timestamp;
948 
949 	switch (decoder->period_type) {
950 	case INTEL_PT_PERIOD_INSTRUCTIONS:
951 		decoder->period_insn_cnt = 0;
952 		break;
953 	case INTEL_PT_PERIOD_TICKS:
954 		timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
955 		masked_timestamp = timestamp & decoder->period_mask;
956 		if (masked_timestamp > decoder->last_masked_timestamp)
957 			decoder->last_masked_timestamp = masked_timestamp;
958 		else
959 			decoder->last_masked_timestamp += decoder->period_ticks;
960 		break;
961 	case INTEL_PT_PERIOD_NONE:
962 	case INTEL_PT_PERIOD_MTC:
963 	default:
964 		break;
965 	}
966 
967 	decoder->state.type |= INTEL_PT_INSTRUCTION;
968 }
969 
970 static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
971 			      struct intel_pt_insn *intel_pt_insn, uint64_t ip)
972 {
973 	uint64_t max_insn_cnt, insn_cnt = 0;
974 	int err;
975 
976 	if (!decoder->mtc_insn)
977 		decoder->mtc_insn = true;
978 
979 	max_insn_cnt = intel_pt_next_sample(decoder);
980 
981 	err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
982 				 max_insn_cnt, decoder->data);
983 
984 	decoder->tot_insn_cnt += insn_cnt;
985 	decoder->timestamp_insn_cnt += insn_cnt;
986 	decoder->sample_insn_cnt += insn_cnt;
987 	decoder->period_insn_cnt += insn_cnt;
988 
989 	if (err) {
990 		decoder->no_progress = 0;
991 		decoder->pkt_state = INTEL_PT_STATE_ERR2;
992 		intel_pt_log_at("ERROR: Failed to get instruction",
993 				decoder->ip);
994 		if (err == -ENOENT)
995 			return -ENOLINK;
996 		return -EILSEQ;
997 	}
998 
999 	if (ip && decoder->ip == ip) {
1000 		err = -EAGAIN;
1001 		goto out;
1002 	}
1003 
1004 	if (max_insn_cnt && insn_cnt >= max_insn_cnt)
1005 		intel_pt_sample_insn(decoder);
1006 
1007 	if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
1008 		decoder->state.type = INTEL_PT_INSTRUCTION;
1009 		decoder->state.from_ip = decoder->ip;
1010 		decoder->state.to_ip = 0;
1011 		decoder->ip += intel_pt_insn->length;
1012 		err = INTEL_PT_RETURN;
1013 		goto out;
1014 	}
1015 
1016 	if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
1017 		/* Zero-length calls are excluded */
1018 		if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
1019 		    intel_pt_insn->rel) {
1020 			err = intel_pt_push(&decoder->stack, decoder->ip +
1021 					    intel_pt_insn->length);
1022 			if (err)
1023 				goto out;
1024 		}
1025 	} else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
1026 		decoder->ret_addr = intel_pt_pop(&decoder->stack);
1027 	}
1028 
1029 	if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
1030 		int cnt = decoder->no_progress++;
1031 
1032 		decoder->state.from_ip = decoder->ip;
1033 		decoder->ip += intel_pt_insn->length +
1034 				intel_pt_insn->rel;
1035 		decoder->state.to_ip = decoder->ip;
1036 		err = INTEL_PT_RETURN;
1037 
1038 		/*
1039 		 * Check for being stuck in a loop.  This can happen if a
1040 		 * decoder error results in the decoder erroneously setting the
1041 		 * ip to an address that is itself in an infinite loop that
1042 		 * consumes no packets.  When that happens, there must be an
1043 		 * unconditional branch.
1044 		 */
1045 		if (cnt) {
1046 			if (cnt == 1) {
1047 				decoder->stuck_ip = decoder->state.to_ip;
1048 				decoder->stuck_ip_prd = 1;
1049 				decoder->stuck_ip_cnt = 1;
1050 			} else if (cnt > INTEL_PT_MAX_LOOPS ||
1051 				   decoder->state.to_ip == decoder->stuck_ip) {
1052 				intel_pt_log_at("ERROR: Never-ending loop",
1053 						decoder->state.to_ip);
1054 				decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1055 				err = -ELOOP;
1056 				goto out;
1057 			} else if (!--decoder->stuck_ip_cnt) {
1058 				decoder->stuck_ip_prd += 1;
1059 				decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
1060 				decoder->stuck_ip = decoder->state.to_ip;
1061 			}
1062 		}
1063 		goto out_no_progress;
1064 	}
1065 out:
1066 	decoder->no_progress = 0;
1067 out_no_progress:
1068 	decoder->state.insn_op = intel_pt_insn->op;
1069 	decoder->state.insn_len = intel_pt_insn->length;
1070 	memcpy(decoder->state.insn, intel_pt_insn->buf,
1071 	       INTEL_PT_INSN_BUF_SZ);
1072 
1073 	if (decoder->tx_flags & INTEL_PT_IN_TX)
1074 		decoder->state.flags |= INTEL_PT_IN_TX;
1075 
1076 	return err;
1077 }
1078 
1079 static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
1080 {
1081 	bool ret = false;
1082 
1083 	if (decoder->set_fup_tx_flags) {
1084 		decoder->set_fup_tx_flags = false;
1085 		decoder->tx_flags = decoder->fup_tx_flags;
1086 		decoder->state.type = INTEL_PT_TRANSACTION;
1087 		decoder->state.from_ip = decoder->ip;
1088 		decoder->state.to_ip = 0;
1089 		decoder->state.flags = decoder->fup_tx_flags;
1090 		return true;
1091 	}
1092 	if (decoder->set_fup_ptw) {
1093 		decoder->set_fup_ptw = false;
1094 		decoder->state.type = INTEL_PT_PTW;
1095 		decoder->state.flags |= INTEL_PT_FUP_IP;
1096 		decoder->state.from_ip = decoder->ip;
1097 		decoder->state.to_ip = 0;
1098 		decoder->state.ptw_payload = decoder->fup_ptw_payload;
1099 		return true;
1100 	}
1101 	if (decoder->set_fup_mwait) {
1102 		decoder->set_fup_mwait = false;
1103 		decoder->state.type = INTEL_PT_MWAIT_OP;
1104 		decoder->state.from_ip = decoder->ip;
1105 		decoder->state.to_ip = 0;
1106 		decoder->state.mwait_payload = decoder->fup_mwait_payload;
1107 		ret = true;
1108 	}
1109 	if (decoder->set_fup_pwre) {
1110 		decoder->set_fup_pwre = false;
1111 		decoder->state.type |= INTEL_PT_PWR_ENTRY;
1112 		decoder->state.type &= ~INTEL_PT_BRANCH;
1113 		decoder->state.from_ip = decoder->ip;
1114 		decoder->state.to_ip = 0;
1115 		decoder->state.pwre_payload = decoder->fup_pwre_payload;
1116 		ret = true;
1117 	}
1118 	if (decoder->set_fup_exstop) {
1119 		decoder->set_fup_exstop = false;
1120 		decoder->state.type |= INTEL_PT_EX_STOP;
1121 		decoder->state.type &= ~INTEL_PT_BRANCH;
1122 		decoder->state.flags |= INTEL_PT_FUP_IP;
1123 		decoder->state.from_ip = decoder->ip;
1124 		decoder->state.to_ip = 0;
1125 		ret = true;
1126 	}
1127 	return ret;
1128 }
1129 
1130 static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
1131 					  struct intel_pt_insn *intel_pt_insn,
1132 					  uint64_t ip, int err)
1133 {
1134 	return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
1135 	       intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
1136 	       ip == decoder->ip + intel_pt_insn->length;
1137 }
1138 
1139 static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
1140 {
1141 	struct intel_pt_insn intel_pt_insn;
1142 	uint64_t ip;
1143 	int err;
1144 
1145 	ip = decoder->last_ip;
1146 
1147 	while (1) {
1148 		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
1149 		if (err == INTEL_PT_RETURN)
1150 			return 0;
1151 		if (err == -EAGAIN ||
1152 		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
1153 			if (intel_pt_fup_event(decoder))
1154 				return 0;
1155 			return -EAGAIN;
1156 		}
1157 		decoder->set_fup_tx_flags = false;
1158 		if (err)
1159 			return err;
1160 
1161 		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1162 			intel_pt_log_at("ERROR: Unexpected indirect branch",
1163 					decoder->ip);
1164 			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1165 			return -ENOENT;
1166 		}
1167 
1168 		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1169 			intel_pt_log_at("ERROR: Unexpected conditional branch",
1170 					decoder->ip);
1171 			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1172 			return -ENOENT;
1173 		}
1174 
1175 		intel_pt_bug(decoder);
1176 	}
1177 }
1178 
1179 static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
1180 {
1181 	struct intel_pt_insn intel_pt_insn;
1182 	int err;
1183 
1184 	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
1185 	if (err == INTEL_PT_RETURN &&
1186 	    decoder->pgd_ip &&
1187 	    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
1188 	    (decoder->state.type & INTEL_PT_BRANCH) &&
1189 	    decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
1190 		/* Unconditional branch leaving filter region */
1191 		decoder->no_progress = 0;
1192 		decoder->pge = false;
1193 		decoder->continuous_period = false;
1194 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1195 		decoder->state.type |= INTEL_PT_TRACE_END;
1196 		return 0;
1197 	}
1198 	if (err == INTEL_PT_RETURN)
1199 		return 0;
1200 	if (err)
1201 		return err;
1202 
1203 	if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1204 		if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
1205 			decoder->pge = false;
1206 			decoder->continuous_period = false;
1207 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1208 			decoder->state.from_ip = decoder->ip;
1209 			if (decoder->packet.count == 0) {
1210 				decoder->state.to_ip = 0;
1211 			} else {
1212 				decoder->state.to_ip = decoder->last_ip;
1213 				decoder->ip = decoder->last_ip;
1214 			}
1215 			decoder->state.type |= INTEL_PT_TRACE_END;
1216 		} else {
1217 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1218 			decoder->state.from_ip = decoder->ip;
1219 			if (decoder->packet.count == 0) {
1220 				decoder->state.to_ip = 0;
1221 			} else {
1222 				decoder->state.to_ip = decoder->last_ip;
1223 				decoder->ip = decoder->last_ip;
1224 			}
1225 		}
1226 		return 0;
1227 	}
1228 
1229 	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1230 		uint64_t to_ip = decoder->ip + intel_pt_insn.length +
1231 				 intel_pt_insn.rel;
1232 
1233 		if (decoder->pgd_ip &&
1234 		    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
1235 		    decoder->pgd_ip(to_ip, decoder->data)) {
1236 			/* Conditional branch leaving filter region */
1237 			decoder->pge = false;
1238 			decoder->continuous_period = false;
1239 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1240 			decoder->ip = to_ip;
1241 			decoder->state.from_ip = decoder->ip;
1242 			decoder->state.to_ip = to_ip;
1243 			decoder->state.type |= INTEL_PT_TRACE_END;
1244 			return 0;
1245 		}
1246 		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
1247 				decoder->ip);
1248 		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1249 		return -ENOENT;
1250 	}
1251 
1252 	return intel_pt_bug(decoder);
1253 }
1254 
1255 static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1256 {
1257 	struct intel_pt_insn intel_pt_insn;
1258 	int err;
1259 
1260 	while (1) {
1261 		err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
1262 		if (err == INTEL_PT_RETURN)
1263 			return 0;
1264 		if (err)
1265 			return err;
1266 
1267 		if (intel_pt_insn.op == INTEL_PT_OP_RET) {
1268 			if (!decoder->return_compression) {
1269 				intel_pt_log_at("ERROR: RET when expecting conditional branch",
1270 						decoder->ip);
1271 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1272 				return -ENOENT;
1273 			}
1274 			if (!decoder->ret_addr) {
1275 				intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
1276 						decoder->ip);
1277 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1278 				return -ENOENT;
1279 			}
1280 			if (!(decoder->tnt.payload & BIT63)) {
1281 				intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
1282 						decoder->ip);
1283 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1284 				return -ENOENT;
1285 			}
1286 			decoder->tnt.count -= 1;
1287 			if (decoder->tnt.count)
1288 				decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1289 			else
1290 				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1291 			decoder->tnt.payload <<= 1;
1292 			decoder->state.from_ip = decoder->ip;
1293 			decoder->ip = decoder->ret_addr;
1294 			decoder->state.to_ip = decoder->ip;
1295 			return 0;
1296 		}
1297 
1298 		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
1299 			/* Handle deferred TIPs */
1300 			err = intel_pt_get_next_packet(decoder);
1301 			if (err)
1302 				return err;
1303 			if (decoder->packet.type != INTEL_PT_TIP ||
1304 			    decoder->packet.count == 0) {
1305 				intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
1306 						decoder->ip);
1307 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
1308 				decoder->pkt_step = 0;
1309 				return -ENOENT;
1310 			}
1311 			intel_pt_set_last_ip(decoder);
1312 			decoder->state.from_ip = decoder->ip;
1313 			decoder->state.to_ip = decoder->last_ip;
1314 			decoder->ip = decoder->last_ip;
1315 			return 0;
1316 		}
1317 
1318 		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1319 			decoder->tnt.count -= 1;
1320 			if (decoder->tnt.count)
1321 				decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1322 			else
1323 				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1324 			if (decoder->tnt.payload & BIT63) {
1325 				decoder->tnt.payload <<= 1;
1326 				decoder->state.from_ip = decoder->ip;
1327 				decoder->ip += intel_pt_insn.length +
1328 					       intel_pt_insn.rel;
1329 				decoder->state.to_ip = decoder->ip;
1330 				return 0;
1331 			}
1332 			/* Instruction sample for a non-taken branch */
1333 			if (decoder->state.type & INTEL_PT_INSTRUCTION) {
1334 				decoder->tnt.payload <<= 1;
1335 				decoder->state.type = INTEL_PT_INSTRUCTION;
1336 				decoder->state.from_ip = decoder->ip;
1337 				decoder->state.to_ip = 0;
1338 				decoder->ip += intel_pt_insn.length;
1339 				return 0;
1340 			}
1341 			decoder->sample_cyc = false;
1342 			decoder->ip += intel_pt_insn.length;
1343 			if (!decoder->tnt.count) {
1344 				intel_pt_update_sample_time(decoder);
1345 				return -EAGAIN;
1346 			}
1347 			decoder->tnt.payload <<= 1;
1348 			continue;
1349 		}
1350 
1351 		return intel_pt_bug(decoder);
1352 	}
1353 }
1354 
1355 static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
1356 {
1357 	unsigned int fup_tx_flags;
1358 	int err;
1359 
1360 	fup_tx_flags = decoder->packet.payload &
1361 		       (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
1362 	err = intel_pt_get_next_packet(decoder);
1363 	if (err)
1364 		return err;
1365 	if (decoder->packet.type == INTEL_PT_FUP) {
1366 		decoder->fup_tx_flags = fup_tx_flags;
1367 		decoder->set_fup_tx_flags = true;
1368 		if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
1369 			*no_tip = true;
1370 	} else {
1371 		intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
1372 				decoder->pos);
1373 		intel_pt_update_in_tx(decoder);
1374 	}
1375 	return 0;
1376 }
1377 
1378 static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
1379 {
1380 	timestamp |= (ref_timestamp & (0xffULL << 56));
1381 
1382 	if (timestamp < ref_timestamp) {
1383 		if (ref_timestamp - timestamp > (1ULL << 55))
1384 			timestamp += (1ULL << 56);
1385 	} else {
1386 		if (timestamp - ref_timestamp > (1ULL << 55))
1387 			timestamp -= (1ULL << 56);
1388 	}
1389 
1390 	return timestamp;
1391 }
1392 
1393 static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
1394 {
1395 	uint64_t timestamp;
1396 
1397 	decoder->have_tma = false;
1398 
1399 	if (decoder->ref_timestamp) {
1400 		timestamp = intel_pt_8b_tsc(decoder->packet.payload,
1401 					    decoder->ref_timestamp);
1402 		decoder->tsc_timestamp = timestamp;
1403 		decoder->timestamp = timestamp;
1404 		decoder->ref_timestamp = 0;
1405 		decoder->timestamp_insn_cnt = 0;
1406 	} else if (decoder->timestamp) {
1407 		timestamp = decoder->packet.payload |
1408 			    (decoder->timestamp & (0xffULL << 56));
1409 		decoder->tsc_timestamp = timestamp;
1410 		if (timestamp < decoder->timestamp &&
1411 		    decoder->timestamp - timestamp < decoder->tsc_slip) {
1412 			intel_pt_log_to("Suppressing backwards timestamp",
1413 					timestamp);
1414 			timestamp = decoder->timestamp;
1415 		}
1416 		if (timestamp < decoder->timestamp) {
1417 			intel_pt_log_to("Wraparound timestamp", timestamp);
1418 			timestamp += (1ULL << 56);
1419 			decoder->tsc_timestamp = timestamp;
1420 		}
1421 		decoder->timestamp = timestamp;
1422 		decoder->timestamp_insn_cnt = 0;
1423 	}
1424 
1425 	if (decoder->last_packet_type == INTEL_PT_CYC) {
1426 		decoder->cyc_ref_timestamp = decoder->timestamp;
1427 		decoder->cycle_cnt = 0;
1428 		decoder->have_calc_cyc_to_tsc = false;
1429 		intel_pt_calc_cyc_to_tsc(decoder, false);
1430 	}
1431 
1432 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
1433 }
1434 
1435 static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1436 {
1437 	intel_pt_log("ERROR: Buffer overflow\n");
1438 	intel_pt_clear_tx_flags(decoder);
1439 	decoder->timestamp_insn_cnt = 0;
1440 	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1441 	decoder->overflow = true;
1442 	return -EOVERFLOW;
1443 }
1444 
1445 static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
1446 {
1447 	if (decoder->have_cyc)
1448 		return;
1449 
1450 	decoder->cyc_cnt_timestamp = decoder->timestamp;
1451 	decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
1452 }
1453 
1454 static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
1455 {
1456 	decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
1457 
1458 	if (decoder->pge)
1459 		intel_pt_mtc_cyc_cnt_pge(decoder);
1460 }
1461 
1462 static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
1463 {
1464 	uint64_t tot_cyc_cnt, tsc_delta;
1465 
1466 	if (decoder->have_cyc)
1467 		return;
1468 
1469 	decoder->sample_cyc = true;
1470 
1471 	if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
1472 		return;
1473 
1474 	tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
1475 	tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
1476 
1477 	if (tot_cyc_cnt > decoder->tot_cyc_cnt)
1478 		decoder->tot_cyc_cnt = tot_cyc_cnt;
1479 }
1480 
1481 static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
1482 {
1483 	uint32_t ctc = decoder->packet.payload;
1484 	uint32_t fc = decoder->packet.count;
1485 	uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
1486 
1487 	if (!decoder->tsc_ctc_ratio_d)
1488 		return;
1489 
1490 	if (decoder->pge && !decoder->in_psb)
1491 		intel_pt_mtc_cyc_cnt_pge(decoder);
1492 	else
1493 		intel_pt_mtc_cyc_cnt_upd(decoder);
1494 
1495 	decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
1496 	decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
1497 	if (decoder->tsc_ctc_mult) {
1498 		decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
1499 	} else {
1500 		decoder->ctc_timestamp -= multdiv(ctc_rem,
1501 						  decoder->tsc_ctc_ratio_n,
1502 						  decoder->tsc_ctc_ratio_d);
1503 	}
1504 	decoder->ctc_delta = 0;
1505 	decoder->have_tma = true;
1506 	decoder->fixup_last_mtc = true;
1507 	intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x  CTC rem %#x\n",
1508 		     decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
1509 }
1510 
1511 static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
1512 {
1513 	uint64_t timestamp;
1514 	uint32_t mtc, mtc_delta;
1515 
1516 	if (!decoder->have_tma)
1517 		return;
1518 
1519 	mtc = decoder->packet.payload;
1520 
1521 	if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
1522 		decoder->fixup_last_mtc = false;
1523 		intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
1524 					&decoder->last_mtc);
1525 	}
1526 
1527 	if (mtc > decoder->last_mtc)
1528 		mtc_delta = mtc - decoder->last_mtc;
1529 	else
1530 		mtc_delta = mtc + 256 - decoder->last_mtc;
1531 
1532 	decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
1533 
1534 	if (decoder->tsc_ctc_mult) {
1535 		timestamp = decoder->ctc_timestamp +
1536 			    decoder->ctc_delta * decoder->tsc_ctc_mult;
1537 	} else {
1538 		timestamp = decoder->ctc_timestamp +
1539 			    multdiv(decoder->ctc_delta,
1540 				    decoder->tsc_ctc_ratio_n,
1541 				    decoder->tsc_ctc_ratio_d);
1542 	}
1543 
1544 	if (timestamp < decoder->timestamp)
1545 		intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1546 			     timestamp, decoder->timestamp);
1547 	else
1548 		decoder->timestamp = timestamp;
1549 
1550 	intel_pt_mtc_cyc_cnt_upd(decoder);
1551 
1552 	decoder->timestamp_insn_cnt = 0;
1553 	decoder->last_mtc = mtc;
1554 
1555 	if (decoder->last_packet_type == INTEL_PT_CYC) {
1556 		decoder->cyc_ref_timestamp = decoder->timestamp;
1557 		decoder->cycle_cnt = 0;
1558 		decoder->have_calc_cyc_to_tsc = false;
1559 		intel_pt_calc_cyc_to_tsc(decoder, true);
1560 	}
1561 
1562 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
1563 }
1564 
1565 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
1566 {
1567 	unsigned int cbr = decoder->packet.payload & 0xff;
1568 
1569 	decoder->cbr_payload = decoder->packet.payload;
1570 
1571 	if (decoder->cbr == cbr)
1572 		return;
1573 
1574 	decoder->cbr = cbr;
1575 	decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
1576 
1577 	intel_pt_mtc_cyc_cnt_cbr(decoder);
1578 }
1579 
1580 static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1581 {
1582 	uint64_t timestamp = decoder->cyc_ref_timestamp;
1583 
1584 	decoder->have_cyc = true;
1585 
1586 	decoder->cycle_cnt += decoder->packet.payload;
1587 	if (decoder->pge)
1588 		decoder->tot_cyc_cnt += decoder->packet.payload;
1589 	decoder->sample_cyc = true;
1590 
1591 	if (!decoder->cyc_ref_timestamp)
1592 		return;
1593 
1594 	if (decoder->have_calc_cyc_to_tsc)
1595 		timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
1596 	else if (decoder->cbr)
1597 		timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
1598 	else
1599 		return;
1600 
1601 	if (timestamp < decoder->timestamp)
1602 		intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
1603 			     timestamp, decoder->timestamp);
1604 	else
1605 		decoder->timestamp = timestamp;
1606 
1607 	decoder->timestamp_insn_cnt = 0;
1608 
1609 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
1610 }
1611 
1612 /* Walk PSB+ packets when already in sync. */
1613 static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1614 {
1615 	int err;
1616 
1617 	decoder->in_psb = true;
1618 
1619 	while (1) {
1620 		err = intel_pt_get_next_packet(decoder);
1621 		if (err)
1622 			goto out;
1623 
1624 		switch (decoder->packet.type) {
1625 		case INTEL_PT_PSBEND:
1626 			err = 0;
1627 			goto out;
1628 
1629 		case INTEL_PT_TIP_PGD:
1630 		case INTEL_PT_TIP_PGE:
1631 		case INTEL_PT_TIP:
1632 		case INTEL_PT_TNT:
1633 		case INTEL_PT_TRACESTOP:
1634 		case INTEL_PT_BAD:
1635 		case INTEL_PT_PSB:
1636 		case INTEL_PT_PTWRITE:
1637 		case INTEL_PT_PTWRITE_IP:
1638 		case INTEL_PT_EXSTOP:
1639 		case INTEL_PT_EXSTOP_IP:
1640 		case INTEL_PT_MWAIT:
1641 		case INTEL_PT_PWRE:
1642 		case INTEL_PT_PWRX:
1643 		case INTEL_PT_BBP:
1644 		case INTEL_PT_BIP:
1645 		case INTEL_PT_BEP:
1646 		case INTEL_PT_BEP_IP:
1647 			decoder->have_tma = false;
1648 			intel_pt_log("ERROR: Unexpected packet\n");
1649 			err = -EAGAIN;
1650 			goto out;
1651 
1652 		case INTEL_PT_OVF:
1653 			err = intel_pt_overflow(decoder);
1654 			goto out;
1655 
1656 		case INTEL_PT_TSC:
1657 			intel_pt_calc_tsc_timestamp(decoder);
1658 			break;
1659 
1660 		case INTEL_PT_TMA:
1661 			intel_pt_calc_tma(decoder);
1662 			break;
1663 
1664 		case INTEL_PT_CBR:
1665 			intel_pt_calc_cbr(decoder);
1666 			break;
1667 
1668 		case INTEL_PT_MODE_EXEC:
1669 			decoder->exec_mode = decoder->packet.payload;
1670 			break;
1671 
1672 		case INTEL_PT_PIP:
1673 			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1674 			break;
1675 
1676 		case INTEL_PT_FUP:
1677 			decoder->pge = true;
1678 			if (decoder->packet.count)
1679 				intel_pt_set_last_ip(decoder);
1680 			break;
1681 
1682 		case INTEL_PT_MODE_TSX:
1683 			intel_pt_update_in_tx(decoder);
1684 			break;
1685 
1686 		case INTEL_PT_MTC:
1687 			intel_pt_calc_mtc_timestamp(decoder);
1688 			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
1689 				decoder->state.type |= INTEL_PT_INSTRUCTION;
1690 			break;
1691 
1692 		case INTEL_PT_CYC:
1693 		case INTEL_PT_VMCS:
1694 		case INTEL_PT_MNT:
1695 		case INTEL_PT_PAD:
1696 		default:
1697 			break;
1698 		}
1699 	}
1700 out:
1701 	decoder->in_psb = false;
1702 
1703 	return err;
1704 }
1705 
1706 static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1707 {
1708 	int err;
1709 
1710 	if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
1711 		decoder->tx_flags = 0;
1712 		decoder->state.flags &= ~INTEL_PT_IN_TX;
1713 		decoder->state.flags |= INTEL_PT_ABORT_TX;
1714 	} else {
1715 		decoder->state.flags |= INTEL_PT_ASYNC;
1716 	}
1717 
1718 	while (1) {
1719 		err = intel_pt_get_next_packet(decoder);
1720 		if (err)
1721 			return err;
1722 
1723 		switch (decoder->packet.type) {
1724 		case INTEL_PT_TNT:
1725 		case INTEL_PT_FUP:
1726 		case INTEL_PT_TRACESTOP:
1727 		case INTEL_PT_PSB:
1728 		case INTEL_PT_TSC:
1729 		case INTEL_PT_TMA:
1730 		case INTEL_PT_MODE_TSX:
1731 		case INTEL_PT_BAD:
1732 		case INTEL_PT_PSBEND:
1733 		case INTEL_PT_PTWRITE:
1734 		case INTEL_PT_PTWRITE_IP:
1735 		case INTEL_PT_EXSTOP:
1736 		case INTEL_PT_EXSTOP_IP:
1737 		case INTEL_PT_MWAIT:
1738 		case INTEL_PT_PWRE:
1739 		case INTEL_PT_PWRX:
1740 		case INTEL_PT_BBP:
1741 		case INTEL_PT_BIP:
1742 		case INTEL_PT_BEP:
1743 		case INTEL_PT_BEP_IP:
1744 			intel_pt_log("ERROR: Missing TIP after FUP\n");
1745 			decoder->pkt_state = INTEL_PT_STATE_ERR3;
1746 			decoder->pkt_step = 0;
1747 			return -ENOENT;
1748 
1749 		case INTEL_PT_CBR:
1750 			intel_pt_calc_cbr(decoder);
1751 			break;
1752 
1753 		case INTEL_PT_OVF:
1754 			return intel_pt_overflow(decoder);
1755 
1756 		case INTEL_PT_TIP_PGD:
1757 			decoder->state.from_ip = decoder->ip;
1758 			if (decoder->packet.count == 0) {
1759 				decoder->state.to_ip = 0;
1760 			} else {
1761 				intel_pt_set_ip(decoder);
1762 				decoder->state.to_ip = decoder->ip;
1763 			}
1764 			decoder->pge = false;
1765 			decoder->continuous_period = false;
1766 			decoder->state.type |= INTEL_PT_TRACE_END;
1767 			return 0;
1768 
1769 		case INTEL_PT_TIP_PGE:
1770 			decoder->pge = true;
1771 			intel_pt_log("Omitting PGE ip " x64_fmt "\n",
1772 				     decoder->ip);
1773 			decoder->state.from_ip = 0;
1774 			if (decoder->packet.count == 0) {
1775 				decoder->state.to_ip = 0;
1776 			} else {
1777 				intel_pt_set_ip(decoder);
1778 				decoder->state.to_ip = decoder->ip;
1779 			}
1780 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
1781 			intel_pt_mtc_cyc_cnt_pge(decoder);
1782 			return 0;
1783 
1784 		case INTEL_PT_TIP:
1785 			decoder->state.from_ip = decoder->ip;
1786 			if (decoder->packet.count == 0) {
1787 				decoder->state.to_ip = 0;
1788 			} else {
1789 				intel_pt_set_ip(decoder);
1790 				decoder->state.to_ip = decoder->ip;
1791 			}
1792 			return 0;
1793 
1794 		case INTEL_PT_PIP:
1795 			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1796 			break;
1797 
1798 		case INTEL_PT_MTC:
1799 			intel_pt_calc_mtc_timestamp(decoder);
1800 			if (decoder->period_type == INTEL_PT_PERIOD_MTC)
1801 				decoder->state.type |= INTEL_PT_INSTRUCTION;
1802 			break;
1803 
1804 		case INTEL_PT_CYC:
1805 			intel_pt_calc_cyc_timestamp(decoder);
1806 			break;
1807 
1808 		case INTEL_PT_MODE_EXEC:
1809 			decoder->exec_mode = decoder->packet.payload;
1810 			break;
1811 
1812 		case INTEL_PT_VMCS:
1813 		case INTEL_PT_MNT:
1814 		case INTEL_PT_PAD:
1815 			break;
1816 
1817 		default:
1818 			return intel_pt_bug(decoder);
1819 		}
1820 	}
1821 }
1822 
1823 static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
1824 {
1825 	bool no_tip = false;
1826 	int err;
1827 
1828 	while (1) {
1829 		err = intel_pt_get_next_packet(decoder);
1830 		if (err)
1831 			return err;
1832 next:
1833 		switch (decoder->packet.type) {
1834 		case INTEL_PT_TNT:
1835 			if (!decoder->packet.count)
1836 				break;
1837 			decoder->tnt = decoder->packet;
1838 			decoder->pkt_state = INTEL_PT_STATE_TNT;
1839 			err = intel_pt_walk_tnt(decoder);
1840 			if (err == -EAGAIN)
1841 				break;
1842 			return err;
1843 
1844 		case INTEL_PT_TIP_PGD:
1845 			if (decoder->packet.count != 0)
1846 				intel_pt_set_last_ip(decoder);
1847 			decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
1848 			return intel_pt_walk_tip(decoder);
1849 
1850 		case INTEL_PT_TIP_PGE: {
1851 			decoder->pge = true;
1852 			intel_pt_mtc_cyc_cnt_pge(decoder);
1853 			if (decoder->packet.count == 0) {
1854 				intel_pt_log_at("Skipping zero TIP.PGE",
1855 						decoder->pos);
1856 				break;
1857 			}
1858 			intel_pt_set_ip(decoder);
1859 			decoder->state.from_ip = 0;
1860 			decoder->state.to_ip = decoder->ip;
1861 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
1862 			return 0;
1863 		}
1864 
1865 		case INTEL_PT_OVF:
1866 			return intel_pt_overflow(decoder);
1867 
1868 		case INTEL_PT_TIP:
1869 			if (decoder->packet.count != 0)
1870 				intel_pt_set_last_ip(decoder);
1871 			decoder->pkt_state = INTEL_PT_STATE_TIP;
1872 			return intel_pt_walk_tip(decoder);
1873 
1874 		case INTEL_PT_FUP:
1875 			if (decoder->packet.count == 0) {
1876 				intel_pt_log_at("Skipping zero FUP",
1877 						decoder->pos);
1878 				no_tip = false;
1879 				break;
1880 			}
1881 			intel_pt_set_last_ip(decoder);
1882 			if (!decoder->branch_enable) {
1883 				decoder->ip = decoder->last_ip;
1884 				if (intel_pt_fup_event(decoder))
1885 					return 0;
1886 				no_tip = false;
1887 				break;
1888 			}
1889 			if (decoder->set_fup_mwait)
1890 				no_tip = true;
1891 			err = intel_pt_walk_fup(decoder);
1892 			if (err != -EAGAIN) {
1893 				if (err)
1894 					return err;
1895 				if (no_tip)
1896 					decoder->pkt_state =
1897 						INTEL_PT_STATE_FUP_NO_TIP;
1898 				else
1899 					decoder->pkt_state = INTEL_PT_STATE_FUP;
1900 				return 0;
1901 			}
1902 			if (no_tip) {
1903 				no_tip = false;
1904 				break;
1905 			}
1906 			return intel_pt_walk_fup_tip(decoder);
1907 
1908 		case INTEL_PT_TRACESTOP:
1909 			decoder->pge = false;
1910 			decoder->continuous_period = false;
1911 			intel_pt_clear_tx_flags(decoder);
1912 			decoder->have_tma = false;
1913 			break;
1914 
1915 		case INTEL_PT_PSB:
1916 			decoder->last_ip = 0;
1917 			decoder->have_last_ip = true;
1918 			intel_pt_clear_stack(&decoder->stack);
1919 			err = intel_pt_walk_psbend(decoder);
1920 			if (err == -EAGAIN)
1921 				goto next;
1922 			if (err)
1923 				return err;
1924 			break;
1925 
1926 		case INTEL_PT_PIP:
1927 			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
1928 			break;
1929 
1930 		case INTEL_PT_MTC:
1931 			intel_pt_calc_mtc_timestamp(decoder);
1932 			if (decoder->period_type != INTEL_PT_PERIOD_MTC)
1933 				break;
1934 			/*
1935 			 * Ensure that there has been an instruction since the
1936 			 * last MTC.
1937 			 */
1938 			if (!decoder->mtc_insn)
1939 				break;
1940 			decoder->mtc_insn = false;
1941 			/* Ensure that there is a timestamp */
1942 			if (!decoder->timestamp)
1943 				break;
1944 			decoder->state.type = INTEL_PT_INSTRUCTION;
1945 			decoder->state.from_ip = decoder->ip;
1946 			decoder->state.to_ip = 0;
1947 			decoder->mtc_insn = false;
1948 			return 0;
1949 
1950 		case INTEL_PT_TSC:
1951 			intel_pt_calc_tsc_timestamp(decoder);
1952 			break;
1953 
1954 		case INTEL_PT_TMA:
1955 			intel_pt_calc_tma(decoder);
1956 			break;
1957 
1958 		case INTEL_PT_CYC:
1959 			intel_pt_calc_cyc_timestamp(decoder);
1960 			break;
1961 
1962 		case INTEL_PT_CBR:
1963 			intel_pt_calc_cbr(decoder);
1964 			if (!decoder->branch_enable &&
1965 			    decoder->cbr != decoder->cbr_seen) {
1966 				decoder->cbr_seen = decoder->cbr;
1967 				decoder->state.type = INTEL_PT_CBR_CHG;
1968 				decoder->state.from_ip = decoder->ip;
1969 				decoder->state.to_ip = 0;
1970 				decoder->state.cbr_payload =
1971 							decoder->packet.payload;
1972 				return 0;
1973 			}
1974 			break;
1975 
1976 		case INTEL_PT_MODE_EXEC:
1977 			decoder->exec_mode = decoder->packet.payload;
1978 			break;
1979 
1980 		case INTEL_PT_MODE_TSX:
1981 			/* MODE_TSX need not be followed by FUP */
1982 			if (!decoder->pge) {
1983 				intel_pt_update_in_tx(decoder);
1984 				break;
1985 			}
1986 			err = intel_pt_mode_tsx(decoder, &no_tip);
1987 			if (err)
1988 				return err;
1989 			goto next;
1990 
1991 		case INTEL_PT_BAD: /* Does not happen */
1992 			return intel_pt_bug(decoder);
1993 
1994 		case INTEL_PT_PSBEND:
1995 		case INTEL_PT_VMCS:
1996 		case INTEL_PT_MNT:
1997 		case INTEL_PT_PAD:
1998 			break;
1999 
2000 		case INTEL_PT_PTWRITE_IP:
2001 			decoder->fup_ptw_payload = decoder->packet.payload;
2002 			err = intel_pt_get_next_packet(decoder);
2003 			if (err)
2004 				return err;
2005 			if (decoder->packet.type == INTEL_PT_FUP) {
2006 				decoder->set_fup_ptw = true;
2007 				no_tip = true;
2008 			} else {
2009 				intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
2010 						decoder->pos);
2011 			}
2012 			goto next;
2013 
2014 		case INTEL_PT_PTWRITE:
2015 			decoder->state.type = INTEL_PT_PTW;
2016 			decoder->state.from_ip = decoder->ip;
2017 			decoder->state.to_ip = 0;
2018 			decoder->state.ptw_payload = decoder->packet.payload;
2019 			return 0;
2020 
2021 		case INTEL_PT_MWAIT:
2022 			decoder->fup_mwait_payload = decoder->packet.payload;
2023 			decoder->set_fup_mwait = true;
2024 			break;
2025 
2026 		case INTEL_PT_PWRE:
2027 			if (decoder->set_fup_mwait) {
2028 				decoder->fup_pwre_payload =
2029 							decoder->packet.payload;
2030 				decoder->set_fup_pwre = true;
2031 				break;
2032 			}
2033 			decoder->state.type = INTEL_PT_PWR_ENTRY;
2034 			decoder->state.from_ip = decoder->ip;
2035 			decoder->state.to_ip = 0;
2036 			decoder->state.pwrx_payload = decoder->packet.payload;
2037 			return 0;
2038 
2039 		case INTEL_PT_EXSTOP_IP:
2040 			err = intel_pt_get_next_packet(decoder);
2041 			if (err)
2042 				return err;
2043 			if (decoder->packet.type == INTEL_PT_FUP) {
2044 				decoder->set_fup_exstop = true;
2045 				no_tip = true;
2046 			} else {
2047 				intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
2048 						decoder->pos);
2049 			}
2050 			goto next;
2051 
2052 		case INTEL_PT_EXSTOP:
2053 			decoder->state.type = INTEL_PT_EX_STOP;
2054 			decoder->state.from_ip = decoder->ip;
2055 			decoder->state.to_ip = 0;
2056 			return 0;
2057 
2058 		case INTEL_PT_PWRX:
2059 			decoder->state.type = INTEL_PT_PWR_EXIT;
2060 			decoder->state.from_ip = decoder->ip;
2061 			decoder->state.to_ip = 0;
2062 			decoder->state.pwrx_payload = decoder->packet.payload;
2063 			return 0;
2064 
2065 		case INTEL_PT_BBP:
2066 		case INTEL_PT_BIP:
2067 		case INTEL_PT_BEP:
2068 		case INTEL_PT_BEP_IP:
2069 			break;
2070 
2071 		default:
2072 			return intel_pt_bug(decoder);
2073 		}
2074 	}
2075 }
2076 
2077 static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
2078 {
2079 	return decoder->packet.count &&
2080 	       (decoder->have_last_ip || decoder->packet.count == 3 ||
2081 		decoder->packet.count == 6);
2082 }
2083 
2084 /* Walk PSB+ packets to get in sync. */
2085 static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
2086 {
2087 	int err;
2088 
2089 	decoder->in_psb = true;
2090 
2091 	while (1) {
2092 		err = intel_pt_get_next_packet(decoder);
2093 		if (err)
2094 			goto out;
2095 
2096 		switch (decoder->packet.type) {
2097 		case INTEL_PT_TIP_PGD:
2098 			decoder->continuous_period = false;
2099 			__fallthrough;
2100 		case INTEL_PT_TIP_PGE:
2101 		case INTEL_PT_TIP:
2102 		case INTEL_PT_PTWRITE:
2103 		case INTEL_PT_PTWRITE_IP:
2104 		case INTEL_PT_EXSTOP:
2105 		case INTEL_PT_EXSTOP_IP:
2106 		case INTEL_PT_MWAIT:
2107 		case INTEL_PT_PWRE:
2108 		case INTEL_PT_PWRX:
2109 		case INTEL_PT_BBP:
2110 		case INTEL_PT_BIP:
2111 		case INTEL_PT_BEP:
2112 		case INTEL_PT_BEP_IP:
2113 			intel_pt_log("ERROR: Unexpected packet\n");
2114 			err = -ENOENT;
2115 			goto out;
2116 
2117 		case INTEL_PT_FUP:
2118 			decoder->pge = true;
2119 			if (intel_pt_have_ip(decoder)) {
2120 				uint64_t current_ip = decoder->ip;
2121 
2122 				intel_pt_set_ip(decoder);
2123 				if (current_ip)
2124 					intel_pt_log_to("Setting IP",
2125 							decoder->ip);
2126 			}
2127 			break;
2128 
2129 		case INTEL_PT_MTC:
2130 			intel_pt_calc_mtc_timestamp(decoder);
2131 			break;
2132 
2133 		case INTEL_PT_TSC:
2134 			intel_pt_calc_tsc_timestamp(decoder);
2135 			break;
2136 
2137 		case INTEL_PT_TMA:
2138 			intel_pt_calc_tma(decoder);
2139 			break;
2140 
2141 		case INTEL_PT_CYC:
2142 			intel_pt_calc_cyc_timestamp(decoder);
2143 			break;
2144 
2145 		case INTEL_PT_CBR:
2146 			intel_pt_calc_cbr(decoder);
2147 			break;
2148 
2149 		case INTEL_PT_PIP:
2150 			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
2151 			break;
2152 
2153 		case INTEL_PT_MODE_EXEC:
2154 			decoder->exec_mode = decoder->packet.payload;
2155 			break;
2156 
2157 		case INTEL_PT_MODE_TSX:
2158 			intel_pt_update_in_tx(decoder);
2159 			break;
2160 
2161 		case INTEL_PT_TRACESTOP:
2162 			decoder->pge = false;
2163 			decoder->continuous_period = false;
2164 			intel_pt_clear_tx_flags(decoder);
2165 			__fallthrough;
2166 
2167 		case INTEL_PT_TNT:
2168 			decoder->have_tma = false;
2169 			intel_pt_log("ERROR: Unexpected packet\n");
2170 			if (decoder->ip)
2171 				decoder->pkt_state = INTEL_PT_STATE_ERR4;
2172 			else
2173 				decoder->pkt_state = INTEL_PT_STATE_ERR3;
2174 			err = -ENOENT;
2175 			goto out;
2176 
2177 		case INTEL_PT_BAD: /* Does not happen */
2178 			err = intel_pt_bug(decoder);
2179 			goto out;
2180 
2181 		case INTEL_PT_OVF:
2182 			err = intel_pt_overflow(decoder);
2183 			goto out;
2184 
2185 		case INTEL_PT_PSBEND:
2186 			err = 0;
2187 			goto out;
2188 
2189 		case INTEL_PT_PSB:
2190 		case INTEL_PT_VMCS:
2191 		case INTEL_PT_MNT:
2192 		case INTEL_PT_PAD:
2193 		default:
2194 			break;
2195 		}
2196 	}
2197 out:
2198 	decoder->in_psb = false;
2199 
2200 	return err;
2201 }
2202 
2203 static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
2204 {
2205 	int err;
2206 
2207 	while (1) {
2208 		err = intel_pt_get_next_packet(decoder);
2209 		if (err)
2210 			return err;
2211 
2212 		switch (decoder->packet.type) {
2213 		case INTEL_PT_TIP_PGD:
2214 			decoder->continuous_period = false;
2215 			decoder->pge = false;
2216 			if (intel_pt_have_ip(decoder))
2217 				intel_pt_set_ip(decoder);
2218 			if (!decoder->ip)
2219 				break;
2220 			decoder->state.type |= INTEL_PT_TRACE_END;
2221 			return 0;
2222 
2223 		case INTEL_PT_TIP_PGE:
2224 			decoder->pge = true;
2225 			intel_pt_mtc_cyc_cnt_pge(decoder);
2226 			if (intel_pt_have_ip(decoder))
2227 				intel_pt_set_ip(decoder);
2228 			if (!decoder->ip)
2229 				break;
2230 			decoder->state.type |= INTEL_PT_TRACE_BEGIN;
2231 			return 0;
2232 
2233 		case INTEL_PT_TIP:
2234 			decoder->pge = true;
2235 			if (intel_pt_have_ip(decoder))
2236 				intel_pt_set_ip(decoder);
2237 			if (!decoder->ip)
2238 				break;
2239 			return 0;
2240 
2241 		case INTEL_PT_FUP:
2242 			if (intel_pt_have_ip(decoder))
2243 				intel_pt_set_ip(decoder);
2244 			if (decoder->ip)
2245 				return 0;
2246 			break;
2247 
2248 		case INTEL_PT_MTC:
2249 			intel_pt_calc_mtc_timestamp(decoder);
2250 			break;
2251 
2252 		case INTEL_PT_TSC:
2253 			intel_pt_calc_tsc_timestamp(decoder);
2254 			break;
2255 
2256 		case INTEL_PT_TMA:
2257 			intel_pt_calc_tma(decoder);
2258 			break;
2259 
2260 		case INTEL_PT_CYC:
2261 			intel_pt_calc_cyc_timestamp(decoder);
2262 			break;
2263 
2264 		case INTEL_PT_CBR:
2265 			intel_pt_calc_cbr(decoder);
2266 			break;
2267 
2268 		case INTEL_PT_PIP:
2269 			decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
2270 			break;
2271 
2272 		case INTEL_PT_MODE_EXEC:
2273 			decoder->exec_mode = decoder->packet.payload;
2274 			break;
2275 
2276 		case INTEL_PT_MODE_TSX:
2277 			intel_pt_update_in_tx(decoder);
2278 			break;
2279 
2280 		case INTEL_PT_OVF:
2281 			return intel_pt_overflow(decoder);
2282 
2283 		case INTEL_PT_BAD: /* Does not happen */
2284 			return intel_pt_bug(decoder);
2285 
2286 		case INTEL_PT_TRACESTOP:
2287 			decoder->pge = false;
2288 			decoder->continuous_period = false;
2289 			intel_pt_clear_tx_flags(decoder);
2290 			decoder->have_tma = false;
2291 			break;
2292 
2293 		case INTEL_PT_PSB:
2294 			decoder->last_ip = 0;
2295 			decoder->have_last_ip = true;
2296 			intel_pt_clear_stack(&decoder->stack);
2297 			err = intel_pt_walk_psb(decoder);
2298 			if (err)
2299 				return err;
2300 			if (decoder->ip) {
2301 				/* Do not have a sample */
2302 				decoder->state.type = 0;
2303 				return 0;
2304 			}
2305 			break;
2306 
2307 		case INTEL_PT_TNT:
2308 		case INTEL_PT_PSBEND:
2309 		case INTEL_PT_VMCS:
2310 		case INTEL_PT_MNT:
2311 		case INTEL_PT_PAD:
2312 		case INTEL_PT_PTWRITE:
2313 		case INTEL_PT_PTWRITE_IP:
2314 		case INTEL_PT_EXSTOP:
2315 		case INTEL_PT_EXSTOP_IP:
2316 		case INTEL_PT_MWAIT:
2317 		case INTEL_PT_PWRE:
2318 		case INTEL_PT_PWRX:
2319 		case INTEL_PT_BBP:
2320 		case INTEL_PT_BIP:
2321 		case INTEL_PT_BEP:
2322 		case INTEL_PT_BEP_IP:
2323 		default:
2324 			break;
2325 		}
2326 	}
2327 }
2328 
2329 static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
2330 {
2331 	int err;
2332 
2333 	decoder->set_fup_tx_flags = false;
2334 	decoder->set_fup_ptw = false;
2335 	decoder->set_fup_mwait = false;
2336 	decoder->set_fup_pwre = false;
2337 	decoder->set_fup_exstop = false;
2338 
2339 	if (!decoder->branch_enable) {
2340 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2341 		decoder->overflow = false;
2342 		decoder->state.type = 0; /* Do not have a sample */
2343 		return 0;
2344 	}
2345 
2346 	intel_pt_log("Scanning for full IP\n");
2347 	err = intel_pt_walk_to_ip(decoder);
2348 	if (err)
2349 		return err;
2350 
2351 	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2352 	decoder->overflow = false;
2353 
2354 	decoder->state.from_ip = 0;
2355 	decoder->state.to_ip = decoder->ip;
2356 	intel_pt_log_to("Setting IP", decoder->ip);
2357 
2358 	return 0;
2359 }
2360 
2361 static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
2362 {
2363 	const unsigned char *end = decoder->buf + decoder->len;
2364 	size_t i;
2365 
2366 	for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
2367 		if (i > decoder->len)
2368 			continue;
2369 		if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
2370 			return i;
2371 	}
2372 	return 0;
2373 }
2374 
2375 static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
2376 {
2377 	size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
2378 	const char *psb = INTEL_PT_PSB_STR;
2379 
2380 	if (rest_psb > decoder->len ||
2381 	    memcmp(decoder->buf, psb + part_psb, rest_psb))
2382 		return 0;
2383 
2384 	return rest_psb;
2385 }
2386 
2387 static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
2388 				  int part_psb)
2389 {
2390 	int rest_psb, ret;
2391 
2392 	decoder->pos += decoder->len;
2393 	decoder->len = 0;
2394 
2395 	ret = intel_pt_get_next_data(decoder, false);
2396 	if (ret)
2397 		return ret;
2398 
2399 	rest_psb = intel_pt_rest_psb(decoder, part_psb);
2400 	if (!rest_psb)
2401 		return 0;
2402 
2403 	decoder->pos -= part_psb;
2404 	decoder->next_buf = decoder->buf + rest_psb;
2405 	decoder->next_len = decoder->len - rest_psb;
2406 	memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
2407 	decoder->buf = decoder->temp_buf;
2408 	decoder->len = INTEL_PT_PSB_LEN;
2409 
2410 	return 0;
2411 }
2412 
2413 static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
2414 {
2415 	unsigned char *next;
2416 	int ret;
2417 
2418 	intel_pt_log("Scanning for PSB\n");
2419 	while (1) {
2420 		if (!decoder->len) {
2421 			ret = intel_pt_get_next_data(decoder, false);
2422 			if (ret)
2423 				return ret;
2424 		}
2425 
2426 		next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
2427 			      INTEL_PT_PSB_LEN);
2428 		if (!next) {
2429 			int part_psb;
2430 
2431 			part_psb = intel_pt_part_psb(decoder);
2432 			if (part_psb) {
2433 				ret = intel_pt_get_split_psb(decoder, part_psb);
2434 				if (ret)
2435 					return ret;
2436 			} else {
2437 				decoder->pos += decoder->len;
2438 				decoder->len = 0;
2439 			}
2440 			continue;
2441 		}
2442 
2443 		decoder->pkt_step = next - decoder->buf;
2444 		return intel_pt_get_next_packet(decoder);
2445 	}
2446 }
2447 
2448 static int intel_pt_sync(struct intel_pt_decoder *decoder)
2449 {
2450 	int err;
2451 
2452 	decoder->pge = false;
2453 	decoder->continuous_period = false;
2454 	decoder->have_last_ip = false;
2455 	decoder->last_ip = 0;
2456 	decoder->ip = 0;
2457 	intel_pt_clear_stack(&decoder->stack);
2458 
2459 	err = intel_pt_scan_for_psb(decoder);
2460 	if (err)
2461 		return err;
2462 
2463 	decoder->have_last_ip = true;
2464 	decoder->pkt_state = INTEL_PT_STATE_NO_IP;
2465 
2466 	err = intel_pt_walk_psb(decoder);
2467 	if (err)
2468 		return err;
2469 
2470 	if (decoder->ip) {
2471 		decoder->state.type = 0; /* Do not have a sample */
2472 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2473 	} else {
2474 		return intel_pt_sync_ip(decoder);
2475 	}
2476 
2477 	return 0;
2478 }
2479 
2480 static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
2481 {
2482 	uint64_t est = decoder->sample_insn_cnt << 1;
2483 
2484 	if (!decoder->cbr || !decoder->max_non_turbo_ratio)
2485 		goto out;
2486 
2487 	est *= decoder->max_non_turbo_ratio;
2488 	est /= decoder->cbr;
2489 out:
2490 	return decoder->sample_timestamp + est;
2491 }
2492 
2493 const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2494 {
2495 	int err;
2496 
2497 	do {
2498 		decoder->state.type = INTEL_PT_BRANCH;
2499 		decoder->state.flags = 0;
2500 
2501 		switch (decoder->pkt_state) {
2502 		case INTEL_PT_STATE_NO_PSB:
2503 			err = intel_pt_sync(decoder);
2504 			break;
2505 		case INTEL_PT_STATE_NO_IP:
2506 			decoder->have_last_ip = false;
2507 			decoder->last_ip = 0;
2508 			decoder->ip = 0;
2509 			__fallthrough;
2510 		case INTEL_PT_STATE_ERR_RESYNC:
2511 			err = intel_pt_sync_ip(decoder);
2512 			break;
2513 		case INTEL_PT_STATE_IN_SYNC:
2514 			err = intel_pt_walk_trace(decoder);
2515 			break;
2516 		case INTEL_PT_STATE_TNT:
2517 		case INTEL_PT_STATE_TNT_CONT:
2518 			err = intel_pt_walk_tnt(decoder);
2519 			if (err == -EAGAIN)
2520 				err = intel_pt_walk_trace(decoder);
2521 			break;
2522 		case INTEL_PT_STATE_TIP:
2523 		case INTEL_PT_STATE_TIP_PGD:
2524 			err = intel_pt_walk_tip(decoder);
2525 			break;
2526 		case INTEL_PT_STATE_FUP:
2527 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2528 			err = intel_pt_walk_fup(decoder);
2529 			if (err == -EAGAIN)
2530 				err = intel_pt_walk_fup_tip(decoder);
2531 			else if (!err)
2532 				decoder->pkt_state = INTEL_PT_STATE_FUP;
2533 			break;
2534 		case INTEL_PT_STATE_FUP_NO_TIP:
2535 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2536 			err = intel_pt_walk_fup(decoder);
2537 			if (err == -EAGAIN)
2538 				err = intel_pt_walk_trace(decoder);
2539 			break;
2540 		default:
2541 			err = intel_pt_bug(decoder);
2542 			break;
2543 		}
2544 	} while (err == -ENOLINK);
2545 
2546 	if (err) {
2547 		decoder->state.err = intel_pt_ext_err(err);
2548 		decoder->state.from_ip = decoder->ip;
2549 		intel_pt_update_sample_time(decoder);
2550 		decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2551 	} else {
2552 		decoder->state.err = 0;
2553 		if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
2554 			decoder->cbr_seen = decoder->cbr;
2555 			decoder->state.type |= INTEL_PT_CBR_CHG;
2556 			decoder->state.cbr_payload = decoder->cbr_payload;
2557 		}
2558 		if (intel_pt_sample_time(decoder->pkt_state)) {
2559 			intel_pt_update_sample_time(decoder);
2560 			if (decoder->sample_cyc)
2561 				decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2562 		}
2563 	}
2564 
2565 	decoder->state.timestamp = decoder->sample_timestamp;
2566 	decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
2567 	decoder->state.cr3 = decoder->cr3;
2568 	decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
2569 	decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
2570 
2571 	return &decoder->state;
2572 }
2573 
2574 /**
2575  * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
2576  * @buf: pointer to buffer pointer
2577  * @len: size of buffer
2578  *
2579  * Updates the buffer pointer to point to the start of the next PSB packet if
2580  * there is one, otherwise the buffer pointer is unchanged.  If @buf is updated,
2581  * @len is adjusted accordingly.
2582  *
2583  * Return: %true if a PSB packet is found, %false otherwise.
2584  */
2585 static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
2586 {
2587 	unsigned char *next;
2588 
2589 	next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
2590 	if (next) {
2591 		*len -= next - *buf;
2592 		*buf = next;
2593 		return true;
2594 	}
2595 	return false;
2596 }
2597 
2598 /**
2599  * intel_pt_step_psb - move buffer pointer to the start of the following PSB
2600  *                     packet.
2601  * @buf: pointer to buffer pointer
2602  * @len: size of buffer
2603  *
2604  * Updates the buffer pointer to point to the start of the following PSB packet
2605  * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
2606  * pointer is unchanged.  If @buf is updated, @len is adjusted accordingly.
2607  *
2608  * Return: %true if a PSB packet is found, %false otherwise.
2609  */
2610 static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
2611 {
2612 	unsigned char *next;
2613 
2614 	if (!*len)
2615 		return false;
2616 
2617 	next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
2618 	if (next) {
2619 		*len -= next - *buf;
2620 		*buf = next;
2621 		return true;
2622 	}
2623 	return false;
2624 }
2625 
2626 /**
2627  * intel_pt_last_psb - find the last PSB packet in a buffer.
2628  * @buf: buffer
2629  * @len: size of buffer
2630  *
2631  * This function finds the last PSB in a buffer.
2632  *
2633  * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
2634  */
2635 static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
2636 {
2637 	const char *n = INTEL_PT_PSB_STR;
2638 	unsigned char *p;
2639 	size_t k;
2640 
2641 	if (len < INTEL_PT_PSB_LEN)
2642 		return NULL;
2643 
2644 	k = len - INTEL_PT_PSB_LEN + 1;
2645 	while (1) {
2646 		p = memrchr(buf, n[0], k);
2647 		if (!p)
2648 			return NULL;
2649 		if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
2650 			return p;
2651 		k = p - buf;
2652 		if (!k)
2653 			return NULL;
2654 	}
2655 }
2656 
2657 /**
2658  * intel_pt_next_tsc - find and return next TSC.
2659  * @buf: buffer
2660  * @len: size of buffer
2661  * @tsc: TSC value returned
2662  * @rem: returns remaining size when TSC is found
2663  *
2664  * Find a TSC packet in @buf and return the TSC value.  This function assumes
2665  * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
2666  * PSBEND packet is found.
2667  *
2668  * Return: %true if TSC is found, false otherwise.
2669  */
2670 static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
2671 			      size_t *rem)
2672 {
2673 	enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
2674 	struct intel_pt_pkt packet;
2675 	int ret;
2676 
2677 	while (len) {
2678 		ret = intel_pt_get_packet(buf, len, &packet, &ctx);
2679 		if (ret <= 0)
2680 			return false;
2681 		if (packet.type == INTEL_PT_TSC) {
2682 			*tsc = packet.payload;
2683 			*rem = len;
2684 			return true;
2685 		}
2686 		if (packet.type == INTEL_PT_PSBEND)
2687 			return false;
2688 		buf += ret;
2689 		len -= ret;
2690 	}
2691 	return false;
2692 }
2693 
2694 /**
2695  * intel_pt_tsc_cmp - compare 7-byte TSCs.
2696  * @tsc1: first TSC to compare
2697  * @tsc2: second TSC to compare
2698  *
2699  * This function compares 7-byte TSC values allowing for the possibility that
2700  * TSC wrapped around.  Generally it is not possible to know if TSC has wrapped
2701  * around so for that purpose this function assumes the absolute difference is
2702  * less than half the maximum difference.
2703  *
2704  * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
2705  * after @tsc2.
2706  */
2707 static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
2708 {
2709 	const uint64_t halfway = (1ULL << 55);
2710 
2711 	if (tsc1 == tsc2)
2712 		return 0;
2713 
2714 	if (tsc1 < tsc2) {
2715 		if (tsc2 - tsc1 < halfway)
2716 			return -1;
2717 		else
2718 			return 1;
2719 	} else {
2720 		if (tsc1 - tsc2 < halfway)
2721 			return 1;
2722 		else
2723 			return -1;
2724 	}
2725 }
2726 
2727 #define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
2728 
2729 /**
2730  * adj_for_padding - adjust overlap to account for padding.
2731  * @buf_b: second buffer
2732  * @buf_a: first buffer
2733  * @len_a: size of first buffer
2734  *
2735  * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
2736  * accordingly.
2737  *
2738  * Return: A pointer into @buf_b from where non-overlapped data starts
2739  */
2740 static unsigned char *adj_for_padding(unsigned char *buf_b,
2741 				      unsigned char *buf_a, size_t len_a)
2742 {
2743 	unsigned char *p = buf_b - MAX_PADDING;
2744 	unsigned char *q = buf_a + len_a - MAX_PADDING;
2745 	int i;
2746 
2747 	for (i = MAX_PADDING; i; i--, p++, q++) {
2748 		if (*p != *q)
2749 			break;
2750 	}
2751 
2752 	return p;
2753 }
2754 
2755 /**
2756  * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
2757  *                             using TSC.
2758  * @buf_a: first buffer
2759  * @len_a: size of first buffer
2760  * @buf_b: second buffer
2761  * @len_b: size of second buffer
2762  * @consecutive: returns true if there is data in buf_b that is consecutive
2763  *               to buf_a
2764  *
2765  * If the trace contains TSC we can look at the last TSC of @buf_a and the
2766  * first TSC of @buf_b in order to determine if the buffers overlap, and then
2767  * walk forward in @buf_b until a later TSC is found.  A precondition is that
2768  * @buf_a and @buf_b are positioned at a PSB.
2769  *
2770  * Return: A pointer into @buf_b from where non-overlapped data starts, or
2771  * @buf_b + @len_b if there is no non-overlapped data.
2772  */
2773 static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
2774 						size_t len_a,
2775 						unsigned char *buf_b,
2776 						size_t len_b, bool *consecutive)
2777 {
2778 	uint64_t tsc_a, tsc_b;
2779 	unsigned char *p;
2780 	size_t len, rem_a, rem_b;
2781 
2782 	p = intel_pt_last_psb(buf_a, len_a);
2783 	if (!p)
2784 		return buf_b; /* No PSB in buf_a => no overlap */
2785 
2786 	len = len_a - (p - buf_a);
2787 	if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
2788 		/* The last PSB+ in buf_a is incomplete, so go back one more */
2789 		len_a -= len;
2790 		p = intel_pt_last_psb(buf_a, len_a);
2791 		if (!p)
2792 			return buf_b; /* No full PSB+ => assume no overlap */
2793 		len = len_a - (p - buf_a);
2794 		if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
2795 			return buf_b; /* No TSC in buf_a => assume no overlap */
2796 	}
2797 
2798 	while (1) {
2799 		/* Ignore PSB+ with no TSC */
2800 		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
2801 			int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
2802 
2803 			/* Same TSC, so buffers are consecutive */
2804 			if (!cmp && rem_b >= rem_a) {
2805 				unsigned char *start;
2806 
2807 				*consecutive = true;
2808 				start = buf_b + len_b - (rem_b - rem_a);
2809 				return adj_for_padding(start, buf_a, len_a);
2810 			}
2811 			if (cmp < 0)
2812 				return buf_b; /* tsc_a < tsc_b => no overlap */
2813 		}
2814 
2815 		if (!intel_pt_step_psb(&buf_b, &len_b))
2816 			return buf_b + len_b; /* No PSB in buf_b => no data */
2817 	}
2818 }
2819 
2820 /**
2821  * intel_pt_find_overlap - determine start of non-overlapped trace data.
2822  * @buf_a: first buffer
2823  * @len_a: size of first buffer
2824  * @buf_b: second buffer
2825  * @len_b: size of second buffer
2826  * @have_tsc: can use TSC packets to detect overlap
2827  * @consecutive: returns true if there is data in buf_b that is consecutive
2828  *               to buf_a
2829  *
2830  * When trace samples or snapshots are recorded there is the possibility that
2831  * the data overlaps.  Note that, for the purposes of decoding, data is only
2832  * useful if it begins with a PSB packet.
2833  *
2834  * Return: A pointer into @buf_b from where non-overlapped data starts, or
2835  * @buf_b + @len_b if there is no non-overlapped data.
2836  */
2837 unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2838 				     unsigned char *buf_b, size_t len_b,
2839 				     bool have_tsc, bool *consecutive)
2840 {
2841 	unsigned char *found;
2842 
2843 	/* Buffer 'b' must start at PSB so throw away everything before that */
2844 	if (!intel_pt_next_psb(&buf_b, &len_b))
2845 		return buf_b + len_b; /* No PSB */
2846 
2847 	if (!intel_pt_next_psb(&buf_a, &len_a))
2848 		return buf_b; /* No overlap */
2849 
2850 	if (have_tsc) {
2851 		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
2852 						  consecutive);
2853 		if (found)
2854 			return found;
2855 	}
2856 
2857 	/*
2858 	 * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
2859 	 * we can ignore the first part of buffer 'a'.
2860 	 */
2861 	while (len_b < len_a) {
2862 		if (!intel_pt_step_psb(&buf_a, &len_a))
2863 			return buf_b; /* No overlap */
2864 	}
2865 
2866 	/* Now len_b >= len_a */
2867 	while (1) {
2868 		/* Potential overlap so check the bytes */
2869 		found = memmem(buf_a, len_a, buf_b, len_a);
2870 		if (found) {
2871 			*consecutive = true;
2872 			return adj_for_padding(buf_b + len_a, buf_a, len_a);
2873 		}
2874 
2875 		/* Try again at next PSB in buffer 'a' */
2876 		if (!intel_pt_step_psb(&buf_a, &len_a))
2877 			return buf_b; /* No overlap */
2878 	}
2879 }
2880 
2881 /**
2882  * struct fast_forward_data - data used by intel_pt_ff_cb().
2883  * @timestamp: timestamp to fast forward towards
2884  * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
2885  *                 the fast forward timestamp.
2886  */
2887 struct fast_forward_data {
2888 	uint64_t timestamp;
2889 	uint64_t buf_timestamp;
2890 };
2891 
2892 /**
2893  * intel_pt_ff_cb - fast forward lookahead callback.
2894  * @buffer: Intel PT trace buffer
2895  * @data: opaque pointer to fast forward data (struct fast_forward_data)
2896  *
2897  * Determine if @buffer trace is past the fast forward timestamp.
2898  *
2899  * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
2900  *         timestamp, and 0 otherwise.
2901  */
2902 static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
2903 {
2904 	struct fast_forward_data *d = data;
2905 	unsigned char *buf;
2906 	uint64_t tsc;
2907 	size_t rem;
2908 	size_t len;
2909 
2910 	buf = (unsigned char *)buffer->buf;
2911 	len = buffer->len;
2912 
2913 	if (!intel_pt_next_psb(&buf, &len) ||
2914 	    !intel_pt_next_tsc(buf, len, &tsc, &rem))
2915 		return 0;
2916 
2917 	tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
2918 
2919 	intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
2920 		     tsc, buffer->ref_timestamp);
2921 
2922 	/*
2923 	 * If the buffer contains a timestamp earlier that the fast forward
2924 	 * timestamp, then record it, else stop.
2925 	 */
2926 	if (tsc < d->timestamp)
2927 		d->buf_timestamp = buffer->ref_timestamp;
2928 	else
2929 		return 1;
2930 
2931 	return 0;
2932 }
2933 
2934 /**
2935  * intel_pt_fast_forward - reposition decoder forwards.
2936  * @decoder: Intel PT decoder
2937  * @timestamp: timestamp to fast forward towards
2938  *
2939  * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
2940  *
2941  * Return: 0 on success or negative error code on failure.
2942  */
2943 int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
2944 {
2945 	struct fast_forward_data d = { .timestamp = timestamp };
2946 	unsigned char *buf;
2947 	size_t len;
2948 	int err;
2949 
2950 	intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
2951 
2952 	/* Find buffer timestamp of buffer to fast forward to */
2953 	err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
2954 	if (err < 0)
2955 		return err;
2956 
2957 	/* Walk to buffer with same buffer timestamp */
2958 	if (d.buf_timestamp) {
2959 		do {
2960 			decoder->pos += decoder->len;
2961 			decoder->len = 0;
2962 			err = intel_pt_get_next_data(decoder, true);
2963 			/* -ENOLINK means non-consecutive trace */
2964 			if (err && err != -ENOLINK)
2965 				return err;
2966 		} while (decoder->buf_timestamp != d.buf_timestamp);
2967 	}
2968 
2969 	if (!decoder->buf)
2970 		return 0;
2971 
2972 	buf = (unsigned char *)decoder->buf;
2973 	len = decoder->len;
2974 
2975 	if (!intel_pt_next_psb(&buf, &len))
2976 		return 0;
2977 
2978 	/*
2979 	 * Walk PSBs while the PSB timestamp is less than the fast forward
2980 	 * timestamp.
2981 	 */
2982 	do {
2983 		uint64_t tsc;
2984 		size_t rem;
2985 
2986 		if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
2987 			break;
2988 		tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
2989 		/*
2990 		 * A TSC packet can slip past MTC packets but, after fast
2991 		 * forward, decoding starts at the TSC timestamp. That means
2992 		 * the timestamps may not be exactly the same as the timestamps
2993 		 * that would have been decoded without fast forward.
2994 		 */
2995 		if (tsc < timestamp) {
2996 			intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
2997 			decoder->pos += decoder->len - len;
2998 			decoder->buf = buf;
2999 			decoder->len = len;
3000 			intel_pt_reposition(decoder);
3001 		} else {
3002 			break;
3003 		}
3004 	} while (intel_pt_step_psb(&buf, &len));
3005 
3006 	return 0;
3007 }
3008