1 /*
2 * Copyright (c) 2016-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "pt_block_decoder.h"
30 #include "pt_block_cache.h"
31 #include "pt_section.h"
32 #include "pt_image.h"
33 #include "pt_insn.h"
34 #include "pt_config.h"
35 #include "pt_asid.h"
36 #include "pt_compiler.h"
37
38 #include "intel-pt.h"
39
40 #include <string.h>
41 #include <stdlib.h>
42
43
44 static int pt_blk_proceed_trailing_event(struct pt_block_decoder *,
45 struct pt_block *);
46
47
pt_blk_status(const struct pt_block_decoder * decoder,int flags)48 static int pt_blk_status(const struct pt_block_decoder *decoder, int flags)
49 {
50 int status;
51
52 if (!decoder)
53 return -pte_internal;
54
55 status = decoder->status;
56
57 /* Indicate whether tracing is disabled or enabled.
58 *
59 * This duplicates the indication in struct pt_insn and covers the case
60 * where we indicate the status after synchronizing.
61 */
62 if (!decoder->enabled)
63 flags |= pts_ip_suppressed;
64
65 /* Forward end-of-trace indications.
66 *
67 * Postpone it as long as we're still processing events, though.
68 */
69 if ((status & pts_eos) && !decoder->process_event)
70 flags |= pts_eos;
71
72 return flags;
73 }
74
pt_blk_reset(struct pt_block_decoder * decoder)75 static void pt_blk_reset(struct pt_block_decoder *decoder)
76 {
77 if (!decoder)
78 return;
79
80 decoder->mode = ptem_unknown;
81 decoder->ip = 0ull;
82 decoder->status = 0;
83 decoder->enabled = 0;
84 decoder->process_event = 0;
85 decoder->speculative = 0;
86 decoder->process_insn = 0;
87 decoder->bound_paging = 0;
88 decoder->bound_vmcs = 0;
89 decoder->bound_ptwrite = 0;
90
91 memset(&decoder->event, 0, sizeof(decoder->event));
92 pt_retstack_init(&decoder->retstack);
93 pt_asid_init(&decoder->asid);
94 }
95
96 /* Initialize the query decoder flags based on our flags. */
97
pt_blk_init_qry_flags(struct pt_conf_flags * qflags,const struct pt_conf_flags * flags)98 static int pt_blk_init_qry_flags(struct pt_conf_flags *qflags,
99 const struct pt_conf_flags *flags)
100 {
101 if (!qflags || !flags)
102 return -pte_internal;
103
104 memset(qflags, 0, sizeof(*qflags));
105
106 return 0;
107 }
108
pt_blk_decoder_init(struct pt_block_decoder * decoder,const struct pt_config * uconfig)109 int pt_blk_decoder_init(struct pt_block_decoder *decoder,
110 const struct pt_config *uconfig)
111 {
112 struct pt_config config;
113 int errcode;
114
115 if (!decoder)
116 return -pte_internal;
117
118 errcode = pt_config_from_user(&config, uconfig);
119 if (errcode < 0)
120 return errcode;
121
122 /* The user supplied decoder flags. */
123 decoder->flags = config.flags;
124
125 /* Set the flags we need for the query decoder we use. */
126 errcode = pt_blk_init_qry_flags(&config.flags, &decoder->flags);
127 if (errcode < 0)
128 return errcode;
129
130 errcode = pt_qry_decoder_init(&decoder->query, &config);
131 if (errcode < 0)
132 return errcode;
133
134 pt_image_init(&decoder->default_image, NULL);
135 decoder->image = &decoder->default_image;
136
137 errcode = pt_msec_cache_init(&decoder->scache);
138 if (errcode < 0)
139 return errcode;
140
141 pt_blk_reset(decoder);
142
143 return 0;
144 }
145
pt_blk_decoder_fini(struct pt_block_decoder * decoder)146 void pt_blk_decoder_fini(struct pt_block_decoder *decoder)
147 {
148 if (!decoder)
149 return;
150
151 pt_msec_cache_fini(&decoder->scache);
152 pt_image_fini(&decoder->default_image);
153 pt_qry_decoder_fini(&decoder->query);
154 }
155
156 struct pt_block_decoder *
pt_blk_alloc_decoder(const struct pt_config * config)157 pt_blk_alloc_decoder(const struct pt_config *config)
158 {
159 struct pt_block_decoder *decoder;
160 int errcode;
161
162 decoder = malloc(sizeof(*decoder));
163 if (!decoder)
164 return NULL;
165
166 errcode = pt_blk_decoder_init(decoder, config);
167 if (errcode < 0) {
168 free(decoder);
169 return NULL;
170 }
171
172 return decoder;
173 }
174
pt_blk_free_decoder(struct pt_block_decoder * decoder)175 void pt_blk_free_decoder(struct pt_block_decoder *decoder)
176 {
177 if (!decoder)
178 return;
179
180 pt_blk_decoder_fini(decoder);
181 free(decoder);
182 }
183
184 /* Maybe synthesize a tick event.
185 *
186 * If we're not already processing events, check the current time against the
187 * last event's time. If it changed, synthesize a tick event with the new time.
188 *
189 * Returns zero if no tick event has been created.
190 * Returns a positive integer if a tick event has been created.
191 * Returns a negative error code otherwise.
192 */
pt_blk_tick(struct pt_block_decoder * decoder,uint64_t ip)193 static int pt_blk_tick(struct pt_block_decoder *decoder, uint64_t ip)
194 {
195 struct pt_event *ev;
196 uint64_t tsc;
197 uint32_t lost_mtc, lost_cyc;
198 int errcode;
199
200 if (!decoder)
201 return -pte_internal;
202
203 /* We're not generating tick events if tracing is disabled. */
204 if (!decoder->enabled)
205 return -pte_internal;
206
207 /* Events already provide a timestamp so there is no need to synthesize
208 * an artificial tick event. There's no room, either, since this would
209 * overwrite the in-progress event.
210 *
211 * In rare cases where we need to proceed to an event location using
212 * trace this may cause us to miss a timing update if the event is not
213 * forwarded to the user.
214 *
215 * The only case I can come up with at the moment is a MODE.EXEC binding
216 * to the TIP IP of a far branch.
217 */
218 if (decoder->process_event)
219 return 0;
220
221 errcode = pt_qry_time(&decoder->query, &tsc, &lost_mtc, &lost_cyc);
222 if (errcode < 0) {
223 /* If we don't have wall-clock time, we use relative time. */
224 if (errcode != -pte_no_time)
225 return errcode;
226 }
227
228 ev = &decoder->event;
229
230 /* We're done if time has not changed since the last event. */
231 if (tsc == ev->tsc)
232 return 0;
233
234 /* Time has changed so we create a new tick event. */
235 memset(ev, 0, sizeof(*ev));
236 ev->type = ptev_tick;
237 ev->variant.tick.ip = ip;
238
239 /* Indicate if we have wall-clock time or only relative time. */
240 if (errcode != -pte_no_time)
241 ev->has_tsc = 1;
242 ev->tsc = tsc;
243 ev->lost_mtc = lost_mtc;
244 ev->lost_cyc = lost_cyc;
245
246 /* We now have an event to process. */
247 decoder->process_event = 1;
248
249 return 1;
250 }
251
252 /* Query an indirect branch.
253 *
254 * Returns zero on success, a negative error code otherwise.
255 */
pt_blk_indirect_branch(struct pt_block_decoder * decoder,uint64_t * ip)256 static int pt_blk_indirect_branch(struct pt_block_decoder *decoder,
257 uint64_t *ip)
258 {
259 uint64_t evip;
260 int status, errcode;
261
262 if (!decoder)
263 return -pte_internal;
264
265 evip = decoder->ip;
266
267 status = pt_qry_indirect_branch(&decoder->query, ip);
268 if (status < 0)
269 return status;
270
271 if (decoder->flags.variant.block.enable_tick_events) {
272 errcode = pt_blk_tick(decoder, evip);
273 if (errcode < 0)
274 return errcode;
275 }
276
277 return status;
278 }
279
280 /* Query a conditional branch.
281 *
282 * Returns zero on success, a negative error code otherwise.
283 */
pt_blk_cond_branch(struct pt_block_decoder * decoder,int * taken)284 static int pt_blk_cond_branch(struct pt_block_decoder *decoder, int *taken)
285 {
286 int status, errcode;
287
288 if (!decoder)
289 return -pte_internal;
290
291 status = pt_qry_cond_branch(&decoder->query, taken);
292 if (status < 0)
293 return status;
294
295 if (decoder->flags.variant.block.enable_tick_events) {
296 errcode = pt_blk_tick(decoder, decoder->ip);
297 if (errcode < 0)
298 return errcode;
299 }
300
301 return status;
302 }
303
pt_blk_start(struct pt_block_decoder * decoder,int status)304 static int pt_blk_start(struct pt_block_decoder *decoder, int status)
305 {
306 if (!decoder)
307 return -pte_internal;
308
309 if (status < 0)
310 return status;
311
312 decoder->status = status;
313 if (!(status & pts_ip_suppressed))
314 decoder->enabled = 1;
315
316 /* We will always have an event.
317 *
318 * If we synchronized onto an empty PSB+, tracing is disabled and we'll
319 * process events until the enabled event.
320 *
321 * If tracing is enabled, PSB+ must at least provide the execution mode,
322 * which we're going to forward to the user.
323 */
324 return pt_blk_proceed_trailing_event(decoder, NULL);
325 }
326
pt_blk_sync_reset(struct pt_block_decoder * decoder)327 static int pt_blk_sync_reset(struct pt_block_decoder *decoder)
328 {
329 if (!decoder)
330 return -pte_internal;
331
332 pt_blk_reset(decoder);
333
334 return 0;
335 }
336
pt_blk_sync_forward(struct pt_block_decoder * decoder)337 int pt_blk_sync_forward(struct pt_block_decoder *decoder)
338 {
339 int errcode, status;
340
341 if (!decoder)
342 return -pte_invalid;
343
344 errcode = pt_blk_sync_reset(decoder);
345 if (errcode < 0)
346 return errcode;
347
348 status = pt_qry_sync_forward(&decoder->query, &decoder->ip);
349
350 return pt_blk_start(decoder, status);
351 }
352
pt_blk_sync_backward(struct pt_block_decoder * decoder)353 int pt_blk_sync_backward(struct pt_block_decoder *decoder)
354 {
355 int errcode, status;
356
357 if (!decoder)
358 return -pte_invalid;
359
360 errcode = pt_blk_sync_reset(decoder);
361 if (errcode < 0)
362 return errcode;
363
364 status = pt_qry_sync_backward(&decoder->query, &decoder->ip);
365
366 return pt_blk_start(decoder, status);
367 }
368
pt_blk_sync_set(struct pt_block_decoder * decoder,uint64_t offset)369 int pt_blk_sync_set(struct pt_block_decoder *decoder, uint64_t offset)
370 {
371 int errcode, status;
372
373 if (!decoder)
374 return -pte_invalid;
375
376 errcode = pt_blk_sync_reset(decoder);
377 if (errcode < 0)
378 return errcode;
379
380 status = pt_qry_sync_set(&decoder->query, &decoder->ip, offset);
381
382 return pt_blk_start(decoder, status);
383 }
384
pt_blk_get_offset(const struct pt_block_decoder * decoder,uint64_t * offset)385 int pt_blk_get_offset(const struct pt_block_decoder *decoder, uint64_t *offset)
386 {
387 if (!decoder)
388 return -pte_invalid;
389
390 return pt_qry_get_offset(&decoder->query, offset);
391 }
392
pt_blk_get_sync_offset(const struct pt_block_decoder * decoder,uint64_t * offset)393 int pt_blk_get_sync_offset(const struct pt_block_decoder *decoder,
394 uint64_t *offset)
395 {
396 if (!decoder)
397 return -pte_invalid;
398
399 return pt_qry_get_sync_offset(&decoder->query, offset);
400 }
401
pt_blk_get_image(struct pt_block_decoder * decoder)402 struct pt_image *pt_blk_get_image(struct pt_block_decoder *decoder)
403 {
404 if (!decoder)
405 return NULL;
406
407 return decoder->image;
408 }
409
pt_blk_set_image(struct pt_block_decoder * decoder,struct pt_image * image)410 int pt_blk_set_image(struct pt_block_decoder *decoder, struct pt_image *image)
411 {
412 if (!decoder)
413 return -pte_invalid;
414
415 if (!image)
416 image = &decoder->default_image;
417
418 decoder->image = image;
419 return 0;
420 }
421
422 const struct pt_config *
pt_blk_get_config(const struct pt_block_decoder * decoder)423 pt_blk_get_config(const struct pt_block_decoder *decoder)
424 {
425 if (!decoder)
426 return NULL;
427
428 return pt_qry_get_config(&decoder->query);
429 }
430
pt_blk_time(struct pt_block_decoder * decoder,uint64_t * time,uint32_t * lost_mtc,uint32_t * lost_cyc)431 int pt_blk_time(struct pt_block_decoder *decoder, uint64_t *time,
432 uint32_t *lost_mtc, uint32_t *lost_cyc)
433 {
434 if (!decoder || !time)
435 return -pte_invalid;
436
437 return pt_qry_time(&decoder->query, time, lost_mtc, lost_cyc);
438 }
439
pt_blk_core_bus_ratio(struct pt_block_decoder * decoder,uint32_t * cbr)440 int pt_blk_core_bus_ratio(struct pt_block_decoder *decoder, uint32_t *cbr)
441 {
442 if (!decoder || !cbr)
443 return -pte_invalid;
444
445 return pt_qry_core_bus_ratio(&decoder->query, cbr);
446 }
447
pt_blk_asid(const struct pt_block_decoder * decoder,struct pt_asid * asid,size_t size)448 int pt_blk_asid(const struct pt_block_decoder *decoder, struct pt_asid *asid,
449 size_t size)
450 {
451 if (!decoder || !asid)
452 return -pte_invalid;
453
454 return pt_asid_to_user(asid, &decoder->asid, size);
455 }
456
457 /* Fetch the next pending event.
458 *
459 * Checks for pending events. If an event is pending, fetches it (if not
460 * already in process).
461 *
462 * Returns zero if no event is pending.
463 * Returns a positive integer if an event is pending or in process.
464 * Returns a negative error code otherwise.
465 */
pt_blk_fetch_event(struct pt_block_decoder * decoder)466 static inline int pt_blk_fetch_event(struct pt_block_decoder *decoder)
467 {
468 int status;
469
470 if (!decoder)
471 return -pte_internal;
472
473 if (decoder->process_event)
474 return 1;
475
476 if (!(decoder->status & pts_event_pending))
477 return 0;
478
479 status = pt_qry_event(&decoder->query, &decoder->event,
480 sizeof(decoder->event));
481 if (status < 0)
482 return status;
483
484 decoder->process_event = 1;
485 decoder->status = status;
486
487 return 1;
488 }
489
pt_blk_block_is_empty(const struct pt_block * block)490 static inline int pt_blk_block_is_empty(const struct pt_block *block)
491 {
492 if (!block)
493 return 1;
494
495 return !block->ninsn;
496 }
497
block_to_user(struct pt_block * ublock,size_t size,const struct pt_block * block)498 static inline int block_to_user(struct pt_block *ublock, size_t size,
499 const struct pt_block *block)
500 {
501 if (!ublock || !block)
502 return -pte_internal;
503
504 if (ublock == block)
505 return 0;
506
507 /* Zero out any unknown bytes. */
508 if (sizeof(*block) < size) {
509 memset(ublock + sizeof(*block), 0, size - sizeof(*block));
510
511 size = sizeof(*block);
512 }
513
514 memcpy(ublock, block, size);
515
516 return 0;
517 }
518
pt_insn_false(const struct pt_insn * insn,const struct pt_insn_ext * iext)519 static int pt_insn_false(const struct pt_insn *insn,
520 const struct pt_insn_ext *iext)
521 {
522 (void) insn;
523 (void) iext;
524
525 return 0;
526 }
527
528 /* Determine the next IP using trace.
529 *
530 * Tries to determine the IP of the next instruction using trace and provides it
531 * in @pip.
532 *
533 * Not requiring trace to determine the IP is treated as an internal error.
534 *
535 * Does not update the return compression stack for indirect calls. This is
536 * expected to have been done, already, when trying to determine the next IP
537 * without using trace.
538 *
539 * Does not update @decoder->status. The caller is expected to do that.
540 *
541 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
542 * code otherwise.
543 * Returns -pte_internal if @pip, @decoder, @insn, or @iext are NULL.
544 * Returns -pte_internal if no trace is required.
545 */
pt_blk_next_ip(uint64_t * pip,struct pt_block_decoder * decoder,const struct pt_insn * insn,const struct pt_insn_ext * iext)546 static int pt_blk_next_ip(uint64_t *pip, struct pt_block_decoder *decoder,
547 const struct pt_insn *insn,
548 const struct pt_insn_ext *iext)
549 {
550 int status, errcode;
551
552 if (!pip || !decoder || !insn || !iext)
553 return -pte_internal;
554
555 /* We handle non-taken conditional branches, and compressed returns
556 * directly in the switch.
557 *
558 * All kinds of branches are handled below the switch.
559 */
560 switch (insn->iclass) {
561 case ptic_cond_jump: {
562 uint64_t ip;
563 int taken;
564
565 status = pt_blk_cond_branch(decoder, &taken);
566 if (status < 0)
567 return status;
568
569 ip = insn->ip + insn->size;
570 if (taken)
571 ip += iext->variant.branch.displacement;
572
573 *pip = ip;
574 return status;
575 }
576
577 case ptic_return: {
578 int taken;
579
580 /* Check for a compressed return. */
581 status = pt_blk_cond_branch(decoder, &taken);
582 if (status < 0) {
583 if (status != -pte_bad_query)
584 return status;
585
586 break;
587 }
588
589 /* A compressed return is indicated by a taken conditional
590 * branch.
591 */
592 if (!taken)
593 return -pte_bad_retcomp;
594
595 errcode = pt_retstack_pop(&decoder->retstack, pip);
596 if (errcode < 0)
597 return errcode;
598
599 return status;
600 }
601
602 case ptic_jump:
603 case ptic_call:
604 /* A direct jump or call wouldn't require trace. */
605 if (iext->variant.branch.is_direct)
606 return -pte_internal;
607
608 break;
609
610 case ptic_far_call:
611 case ptic_far_return:
612 case ptic_far_jump:
613 break;
614
615 case ptic_ptwrite:
616 case ptic_other:
617 return -pte_internal;
618
619 case ptic_error:
620 return -pte_bad_insn;
621 }
622
623 /* Process an indirect branch.
624 *
625 * This covers indirect jumps and calls, non-compressed returns, and all
626 * flavors of far transfers.
627 */
628 return pt_blk_indirect_branch(decoder, pip);
629 }
630
631 /* Proceed to the next IP using trace.
632 *
633 * We failed to proceed without trace. This ends the current block. Now use
634 * trace to do one final step to determine the start IP of the next block.
635 *
636 * Returns zero on success, a negative error code otherwise.
637 */
pt_blk_proceed_with_trace(struct pt_block_decoder * decoder,const struct pt_insn * insn,const struct pt_insn_ext * iext)638 static int pt_blk_proceed_with_trace(struct pt_block_decoder *decoder,
639 const struct pt_insn *insn,
640 const struct pt_insn_ext *iext)
641 {
642 int status;
643
644 if (!decoder)
645 return -pte_internal;
646
647 status = pt_blk_next_ip(&decoder->ip, decoder, insn, iext);
648 if (status < 0)
649 return status;
650
651 /* Preserve the query decoder's response which indicates upcoming
652 * events.
653 */
654 decoder->status = status;
655
656 /* We do need an IP in order to proceed. */
657 if (status & pts_ip_suppressed)
658 return -pte_noip;
659
660 return 0;
661 }
662
663 /* Decode one instruction in a known section.
664 *
665 * Decode the instruction at @insn->ip in @msec assuming execution mode
666 * @insn->mode.
667 *
668 * Returns zero on success, a negative error code otherwise.
669 */
pt_blk_decode_in_section(struct pt_insn * insn,struct pt_insn_ext * iext,const struct pt_mapped_section * msec)670 static int pt_blk_decode_in_section(struct pt_insn *insn,
671 struct pt_insn_ext *iext,
672 const struct pt_mapped_section *msec)
673 {
674 int status;
675
676 if (!insn || !iext)
677 return -pte_internal;
678
679 /* We know that @ip is contained in @section.
680 *
681 * Note that we need to translate @ip into a section offset.
682 */
683 status = pt_msec_read(msec, insn->raw, sizeof(insn->raw), insn->ip);
684 if (status < 0)
685 return status;
686
687 /* We initialize @insn->size to the maximal possible size. It will be
688 * set to the actual size during instruction decode.
689 */
690 insn->size = (uint8_t) status;
691
692 return pt_ild_decode(insn, iext);
693 }
694
695 /* Update the return-address stack if @insn is a near call.
696 *
697 * Returns zero on success, a negative error code otherwise.
698 */
pt_blk_log_call(struct pt_block_decoder * decoder,const struct pt_insn * insn,const struct pt_insn_ext * iext)699 static inline int pt_blk_log_call(struct pt_block_decoder *decoder,
700 const struct pt_insn *insn,
701 const struct pt_insn_ext *iext)
702 {
703 if (!decoder || !insn || !iext)
704 return -pte_internal;
705
706 if (insn->iclass != ptic_call)
707 return 0;
708
709 /* Ignore direct calls to the next instruction that are used for
710 * position independent code.
711 */
712 if (iext->variant.branch.is_direct &&
713 !iext->variant.branch.displacement)
714 return 0;
715
716 return pt_retstack_push(&decoder->retstack, insn->ip + insn->size);
717 }
718
719 /* Proceed by one instruction.
720 *
721 * Tries to decode the instruction at @decoder->ip and, on success, adds it to
722 * @block and provides it in @pinsn and @piext.
723 *
724 * The instruction will not be added if:
725 *
726 * - the memory could not be read: return error
727 * - it could not be decoded: return error
728 * - @block is already full: return zero
729 * - @block would switch sections: return zero
730 *
731 * Returns a positive integer if the instruction was added.
732 * Returns zero if the instruction didn't fit into @block.
733 * Returns a negative error code otherwise.
734 */
pt_blk_proceed_one_insn(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_insn * pinsn,struct pt_insn_ext * piext)735 static int pt_blk_proceed_one_insn(struct pt_block_decoder *decoder,
736 struct pt_block *block,
737 struct pt_insn *pinsn,
738 struct pt_insn_ext *piext)
739 {
740 struct pt_insn_ext iext;
741 struct pt_insn insn;
742 uint16_t ninsn;
743 int status;
744
745 if (!decoder || !block || !pinsn || !piext)
746 return -pte_internal;
747
748 /* There's nothing to do if there is no room in @block. */
749 ninsn = block->ninsn + 1;
750 if (!ninsn)
751 return 0;
752
753 /* The truncated instruction must be last. */
754 if (block->truncated)
755 return 0;
756
757 memset(&insn, 0, sizeof(insn));
758 memset(&iext, 0, sizeof(iext));
759
760 insn.mode = decoder->mode;
761 insn.ip = decoder->ip;
762
763 status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
764 if (status < 0)
765 return status;
766
767 /* We do not switch sections inside a block. */
768 if (insn.isid != block->isid) {
769 if (!pt_blk_block_is_empty(block))
770 return 0;
771
772 block->isid = insn.isid;
773 }
774
775 /* If we couldn't read @insn's memory in one chunk from @insn.isid, we
776 * provide the memory in @block.
777 */
778 if (insn.truncated) {
779 memcpy(block->raw, insn.raw, insn.size);
780 block->size = insn.size;
781 block->truncated = 1;
782 }
783
784 /* Log calls' return addresses for return compression. */
785 status = pt_blk_log_call(decoder, &insn, &iext);
786 if (status < 0)
787 return status;
788
789 /* We have a new instruction. */
790 block->iclass = insn.iclass;
791 block->end_ip = insn.ip;
792 block->ninsn = ninsn;
793
794 *pinsn = insn;
795 *piext = iext;
796
797 return 1;
798 }
799
800
801 /* Proceed to a particular type of instruction without using trace.
802 *
803 * Proceed until we reach an instruction for which @predicate returns a positive
804 * integer or until:
805 *
806 * - @predicate returns an error: return error
807 * - @block is full: return zero
808 * - @block would switch sections: return zero
809 * - we would need trace: return -pte_bad_query
810 *
811 * Provide the last instruction that was reached in @insn and @iext.
812 *
813 * Update @decoder->ip to point to the last IP that was reached. If we fail due
814 * to lack of trace or if we reach a desired instruction, this is @insn->ip;
815 * otherwise this is the next instruction's IP.
816 *
817 * Returns a positive integer if a suitable instruction was reached.
818 * Returns zero if no such instruction was reached.
819 * Returns a negative error code otherwise.
820 */
pt_blk_proceed_to_insn(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_insn * insn,struct pt_insn_ext * iext,int (* predicate)(const struct pt_insn *,const struct pt_insn_ext *))821 static int pt_blk_proceed_to_insn(struct pt_block_decoder *decoder,
822 struct pt_block *block,
823 struct pt_insn *insn,
824 struct pt_insn_ext *iext,
825 int (*predicate)(const struct pt_insn *,
826 const struct pt_insn_ext *))
827 {
828 int status;
829
830 if (!decoder || !insn || !predicate)
831 return -pte_internal;
832
833 for (;;) {
834 status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
835 if (status <= 0)
836 return status;
837
838 /* We're done if this instruction matches the spec (positive
839 * status) or we run into an error (negative status).
840 */
841 status = predicate(insn, iext);
842 if (status != 0)
843 return status;
844
845 /* Let's see if we can proceed to the next IP without trace. */
846 status = pt_insn_next_ip(&decoder->ip, insn, iext);
847 if (status < 0)
848 return status;
849
850 /* End the block if the user asked us to.
851 *
852 * We only need to take care about direct near branches.
853 * Indirect and far branches require trace and will naturally
854 * end a block.
855 */
856 if ((decoder->flags.variant.block.end_on_call &&
857 (insn->iclass == ptic_call)) ||
858 (decoder->flags.variant.block.end_on_jump &&
859 (insn->iclass == ptic_jump)))
860 return 0;
861 }
862 }
863
864 /* Proceed to a particular IP without using trace.
865 *
866 * Proceed until we reach @ip or until:
867 *
868 * - @block is full: return zero
869 * - @block would switch sections: return zero
870 * - we would need trace: return -pte_bad_query
871 *
872 * Provide the last instruction that was reached in @insn and @iext. If we
873 * reached @ip, this is the instruction preceding it.
874 *
875 * Update @decoder->ip to point to the last IP that was reached. If we fail due
876 * to lack of trace, this is @insn->ip; otherwise this is the next instruction's
877 * IP.
878 *
879 * Returns a positive integer if @ip was reached.
880 * Returns zero if no such instruction was reached.
881 * Returns a negative error code otherwise.
882 */
pt_blk_proceed_to_ip(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_insn * insn,struct pt_insn_ext * iext,uint64_t ip)883 static int pt_blk_proceed_to_ip(struct pt_block_decoder *decoder,
884 struct pt_block *block, struct pt_insn *insn,
885 struct pt_insn_ext *iext, uint64_t ip)
886 {
887 int status;
888
889 if (!decoder || !insn)
890 return -pte_internal;
891
892 for (;;) {
893 /* We're done when we reach @ip. We may not even have to decode
894 * a single instruction in some cases.
895 */
896 if (decoder->ip == ip)
897 return 1;
898
899 status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
900 if (status <= 0)
901 return status;
902
903 /* Let's see if we can proceed to the next IP without trace. */
904 status = pt_insn_next_ip(&decoder->ip, insn, iext);
905 if (status < 0)
906 return status;
907
908 /* End the block if the user asked us to.
909 *
910 * We only need to take care about direct near branches.
911 * Indirect and far branches require trace and will naturally
912 * end a block.
913 *
914 * The call at the end of the block may have reached @ip; make
915 * sure to indicate that.
916 */
917 if ((decoder->flags.variant.block.end_on_call &&
918 (insn->iclass == ptic_call)) ||
919 (decoder->flags.variant.block.end_on_jump &&
920 (insn->iclass == ptic_jump))) {
921 return (decoder->ip == ip ? 1 : 0);
922 }
923 }
924 }
925
926 /* Proceed to a particular IP with trace, if necessary.
927 *
928 * Proceed until we reach @ip or until:
929 *
930 * - @block is full: return zero
931 * - @block would switch sections: return zero
932 * - we need trace: return zero
933 *
934 * Update @decoder->ip to point to the last IP that was reached.
935 *
936 * A return of zero ends @block.
937 *
938 * Returns a positive integer if @ip was reached.
939 * Returns zero if no such instruction was reached.
940 * Returns a negative error code otherwise.
941 */
pt_blk_proceed_to_ip_with_trace(struct pt_block_decoder * decoder,struct pt_block * block,uint64_t ip)942 static int pt_blk_proceed_to_ip_with_trace(struct pt_block_decoder *decoder,
943 struct pt_block *block,
944 uint64_t ip)
945 {
946 struct pt_insn_ext iext;
947 struct pt_insn insn;
948 int status;
949
950 /* Try to reach @ip without trace.
951 *
952 * We're also OK if @block overflowed or we switched sections and we
953 * have to try again in the next iteration.
954 */
955 status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext, ip);
956 if (status != -pte_bad_query)
957 return status;
958
959 /* Needing trace is not an error. We use trace to determine the next
960 * start IP and end the block.
961 */
962 return pt_blk_proceed_with_trace(decoder, &insn, &iext);
963 }
964
pt_insn_skl014(const struct pt_insn * insn,const struct pt_insn_ext * iext)965 static int pt_insn_skl014(const struct pt_insn *insn,
966 const struct pt_insn_ext *iext)
967 {
968 if (!insn || !iext)
969 return 0;
970
971 switch (insn->iclass) {
972 default:
973 return 0;
974
975 case ptic_call:
976 case ptic_jump:
977 return iext->variant.branch.is_direct;
978
979 case ptic_other:
980 return pt_insn_changes_cr3(insn, iext);
981 }
982 }
983
984 /* Proceed to the location of a synchronous disabled event with suppressed IP
985 * considering SKL014.
986 *
987 * We have a (synchronous) disabled event pending. Proceed to the event
988 * location and indicate whether we were able to reach it.
989 *
990 * With SKL014 a TIP.PGD with suppressed IP may also be generated by a direct
991 * unconditional branch that clears FilterEn by jumping out of a filter region
992 * or into a TraceStop region. Use the filter configuration to determine the
993 * exact branch the event binds to.
994 *
995 * The last instruction that was reached is stored in @insn/@iext.
996 *
997 * Returns a positive integer if the event location was reached.
998 * Returns zero if the event location was not reached.
999 * Returns a negative error code otherwise.
1000 */
pt_blk_proceed_skl014(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_insn * insn,struct pt_insn_ext * iext)1001 static int pt_blk_proceed_skl014(struct pt_block_decoder *decoder,
1002 struct pt_block *block, struct pt_insn *insn,
1003 struct pt_insn_ext *iext)
1004 {
1005 const struct pt_conf_addr_filter *addr_filter;
1006 int status;
1007
1008 if (!decoder || !block || !insn || !iext)
1009 return -pte_internal;
1010
1011 addr_filter = &decoder->query.config.addr_filter;
1012 for (;;) {
1013 uint64_t ip;
1014
1015 status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
1016 pt_insn_skl014);
1017 if (status <= 0)
1018 break;
1019
1020 /* The erratum doesn't apply if we can bind the event to a
1021 * CR3-changing instruction.
1022 */
1023 if (pt_insn_changes_cr3(insn, iext))
1024 break;
1025
1026 /* Check the filter against the branch target. */
1027 status = pt_insn_next_ip(&ip, insn, iext);
1028 if (status < 0)
1029 break;
1030
1031 status = pt_filter_addr_check(addr_filter, ip);
1032 if (status <= 0) {
1033 /* We need to flip the indication.
1034 *
1035 * We reached the event location when @ip lies inside a
1036 * tracing-disabled region.
1037 */
1038 if (!status)
1039 status = 1;
1040
1041 break;
1042 }
1043
1044 /* This is not the correct instruction. Proceed past it and try
1045 * again.
1046 */
1047 decoder->ip = ip;
1048
1049 /* End the block if the user asked us to.
1050 *
1051 * We only need to take care about direct near branches.
1052 * Indirect and far branches require trace and will naturally
1053 * end a block.
1054 */
1055 if ((decoder->flags.variant.block.end_on_call &&
1056 (insn->iclass == ptic_call)) ||
1057 (decoder->flags.variant.block.end_on_jump &&
1058 (insn->iclass == ptic_jump)))
1059 break;
1060 }
1061
1062 return status;
1063 }
1064
1065 /* Proceed to the event location for a disabled event.
1066 *
1067 * We have a (synchronous) disabled event pending. Proceed to the event
1068 * location and indicate whether we were able to reach it.
1069 *
1070 * The last instruction that was reached is stored in @insn/@iext.
1071 *
1072 * Returns a positive integer if the event location was reached.
1073 * Returns zero if the event location was not reached.
1074 * Returns a negative error code otherwise.
1075 */
pt_blk_proceed_to_disabled(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_insn * insn,struct pt_insn_ext * iext,const struct pt_event * ev)1076 static int pt_blk_proceed_to_disabled(struct pt_block_decoder *decoder,
1077 struct pt_block *block,
1078 struct pt_insn *insn,
1079 struct pt_insn_ext *iext,
1080 const struct pt_event *ev)
1081 {
1082 if (!decoder || !block || !ev)
1083 return -pte_internal;
1084
1085 if (ev->ip_suppressed) {
1086 /* Due to SKL014 the TIP.PGD payload may be suppressed also for
1087 * direct branches.
1088 *
1089 * If we don't have a filter configuration we assume that no
1090 * address filters were used and the erratum does not apply.
1091 *
1092 * We might otherwise disable tracing too early.
1093 */
1094 if (decoder->query.config.addr_filter.config.addr_cfg &&
1095 decoder->query.config.errata.skl014)
1096 return pt_blk_proceed_skl014(decoder, block, insn,
1097 iext);
1098
1099 /* A synchronous disabled event also binds to far branches and
1100 * CPL-changing instructions. Both would require trace,
1101 * however, and are thus implicitly handled by erroring out.
1102 *
1103 * The would-require-trace error is handled by our caller.
1104 */
1105 return pt_blk_proceed_to_insn(decoder, block, insn, iext,
1106 pt_insn_changes_cr3);
1107 } else
1108 return pt_blk_proceed_to_ip(decoder, block, insn, iext,
1109 ev->variant.disabled.ip);
1110 }
1111
1112 /* Set the expected resume address for a synchronous disable.
1113 *
1114 * On a synchronous disable, @decoder->ip still points to the instruction to
1115 * which the event bound. That's not where we expect tracing to resume.
1116 *
1117 * For calls, a fair assumption is that tracing resumes after returning from the
1118 * called function. For other types of instructions, we simply don't know.
1119 *
1120 * Returns zero on success, a negative pt_error_code otherwise.
1121 */
pt_blk_set_disable_resume_ip(struct pt_block_decoder * decoder,const struct pt_insn * insn)1122 static int pt_blk_set_disable_resume_ip(struct pt_block_decoder *decoder,
1123 const struct pt_insn *insn)
1124 {
1125 if (!decoder || !insn)
1126 return -pte_internal;
1127
1128 switch (insn->iclass) {
1129 case ptic_call:
1130 case ptic_far_call:
1131 decoder->ip = insn->ip + insn->size;
1132 break;
1133
1134 default:
1135 decoder->ip = 0ull;
1136 break;
1137 }
1138
1139 return 0;
1140 }
1141
1142 /* Proceed to the event location for an async paging event.
1143 *
1144 * We have an async paging event pending. Proceed to the event location and
1145 * indicate whether we were able to reach it. Needing trace in order to proceed
1146 * is not an error in this case but ends the block.
1147 *
1148 * Returns a positive integer if the event location was reached.
1149 * Returns zero if the event location was not reached.
1150 * Returns a negative error code otherwise.
1151 */
pt_blk_proceed_to_async_paging(struct pt_block_decoder * decoder,struct pt_block * block,const struct pt_event * ev)1152 static int pt_blk_proceed_to_async_paging(struct pt_block_decoder *decoder,
1153 struct pt_block *block,
1154 const struct pt_event *ev)
1155 {
1156 int status;
1157
1158 if (!decoder || !ev)
1159 return -pte_internal;
1160
1161 /* Apply the event immediately if we don't have an IP. */
1162 if (ev->ip_suppressed)
1163 return 1;
1164
1165 status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1166 ev->variant.async_paging.ip);
1167 if (status < 0)
1168 return status;
1169
1170 /* We may have reached the IP. */
1171 return (decoder->ip == ev->variant.async_paging.ip ? 1 : 0);
1172 }
1173
1174 /* Proceed to the event location for an async vmcs event.
1175 *
1176 * We have an async vmcs event pending. Proceed to the event location and
1177 * indicate whether we were able to reach it. Needing trace in order to proceed
1178 * is not an error in this case but ends the block.
1179 *
1180 * Returns a positive integer if the event location was reached.
1181 * Returns zero if the event location was not reached.
1182 * Returns a negative error code otherwise.
1183 */
pt_blk_proceed_to_async_vmcs(struct pt_block_decoder * decoder,struct pt_block * block,const struct pt_event * ev)1184 static int pt_blk_proceed_to_async_vmcs(struct pt_block_decoder *decoder,
1185 struct pt_block *block,
1186 const struct pt_event *ev)
1187 {
1188 int status;
1189
1190 if (!decoder || !ev)
1191 return -pte_internal;
1192
1193 /* Apply the event immediately if we don't have an IP. */
1194 if (ev->ip_suppressed)
1195 return 1;
1196
1197 status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1198 ev->variant.async_vmcs.ip);
1199 if (status < 0)
1200 return status;
1201
1202 /* We may have reached the IP. */
1203 return (decoder->ip == ev->variant.async_vmcs.ip ? 1 : 0);
1204 }
1205
1206 /* Proceed to the event location for an exec mode event.
1207 *
1208 * We have an exec mode event pending. Proceed to the event location and
1209 * indicate whether we were able to reach it. Needing trace in order to proceed
1210 * is not an error in this case but ends the block.
1211 *
1212 * Returns a positive integer if the event location was reached.
1213 * Returns zero if the event location was not reached.
1214 * Returns a negative error code otherwise.
1215 */
pt_blk_proceed_to_exec_mode(struct pt_block_decoder * decoder,struct pt_block * block,const struct pt_event * ev)1216 static int pt_blk_proceed_to_exec_mode(struct pt_block_decoder *decoder,
1217 struct pt_block *block,
1218 const struct pt_event *ev)
1219 {
1220 int status;
1221
1222 if (!decoder || !ev)
1223 return -pte_internal;
1224
1225 /* Apply the event immediately if we don't have an IP. */
1226 if (ev->ip_suppressed)
1227 return 1;
1228
1229 status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1230 ev->variant.exec_mode.ip);
1231 if (status < 0)
1232 return status;
1233
1234 /* We may have reached the IP. */
1235 return (decoder->ip == ev->variant.exec_mode.ip ? 1 : 0);
1236 }
1237
1238 /* Proceed to the event location for a ptwrite event.
1239 *
1240 * We have a ptwrite event pending. Proceed to the event location and indicate
1241 * whether we were able to reach it.
1242 *
1243 * In case of the event binding to a ptwrite instruction, we pass beyond that
1244 * instruction and update the event to provide the instruction's IP.
1245 *
1246 * In the case of the event binding to an IP provided in the event, we move
1247 * beyond the instruction at that IP.
1248 *
1249 * Returns a positive integer if the event location was reached.
1250 * Returns zero if the event location was not reached.
1251 * Returns a negative error code otherwise.
1252 */
pt_blk_proceed_to_ptwrite(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_insn * insn,struct pt_insn_ext * iext,struct pt_event * ev)1253 static int pt_blk_proceed_to_ptwrite(struct pt_block_decoder *decoder,
1254 struct pt_block *block,
1255 struct pt_insn *insn,
1256 struct pt_insn_ext *iext,
1257 struct pt_event *ev)
1258 {
1259 int status;
1260
1261 if (!insn || !ev)
1262 return -pte_internal;
1263
1264 /* If we don't have an IP, the event binds to the next PTWRITE
1265 * instruction.
1266 *
1267 * If we have an IP it still binds to the next PTWRITE instruction but
1268 * now the IP tells us where that instruction is. This makes most sense
1269 * when tracing is disabled and we don't have any other means of finding
1270 * the PTWRITE instruction. We nevertheless distinguish the two cases,
1271 * here.
1272 *
1273 * In both cases, we move beyond the PTWRITE instruction, so it will be
1274 * the last instruction in the current block and @decoder->ip will point
1275 * to the instruction following it.
1276 */
1277 if (ev->ip_suppressed) {
1278 status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
1279 pt_insn_is_ptwrite);
1280 if (status <= 0)
1281 return status;
1282
1283 /* We now know the IP of the PTWRITE instruction corresponding
1284 * to this event. Fill it in to make it more convenient for the
1285 * user to process the event.
1286 */
1287 ev->variant.ptwrite.ip = insn->ip;
1288 ev->ip_suppressed = 0;
1289 } else {
1290 status = pt_blk_proceed_to_ip(decoder, block, insn, iext,
1291 ev->variant.ptwrite.ip);
1292 if (status <= 0)
1293 return status;
1294
1295 /* We reached the PTWRITE instruction and @decoder->ip points to
1296 * it; @insn/@iext still contain the preceding instruction.
1297 *
1298 * Proceed beyond the PTWRITE to account for it. Note that we
1299 * may still overflow the block, which would cause us to
1300 * postpone both instruction and event to the next block.
1301 */
1302 status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
1303 if (status <= 0)
1304 return status;
1305 }
1306
1307 return 1;
1308 }
1309
1310 /* Try to work around erratum SKD022.
1311 *
1312 * If we get an asynchronous disable on VMLAUNCH or VMRESUME, the FUP that
1313 * caused the disable to be asynchronous might have been bogous.
1314 *
1315 * Returns a positive integer if the erratum has been handled.
1316 * Returns zero if the erratum does not apply.
1317 * Returns a negative error code otherwise.
1318 */
pt_blk_handle_erratum_skd022(struct pt_block_decoder * decoder,struct pt_event * ev)1319 static int pt_blk_handle_erratum_skd022(struct pt_block_decoder *decoder,
1320 struct pt_event *ev)
1321 {
1322 struct pt_insn_ext iext;
1323 struct pt_insn insn;
1324 int errcode;
1325
1326 if (!decoder || !ev)
1327 return -pte_internal;
1328
1329 insn.mode = decoder->mode;
1330 insn.ip = ev->variant.async_disabled.at;
1331
1332 errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
1333 if (errcode < 0)
1334 return 0;
1335
1336 switch (iext.iclass) {
1337 default:
1338 /* The erratum does not apply. */
1339 return 0;
1340
1341 case PTI_INST_VMLAUNCH:
1342 case PTI_INST_VMRESUME:
1343 /* The erratum may apply. We can't be sure without a lot more
1344 * analysis. Let's assume it does.
1345 *
1346 * We turn the async disable into a sync disable. Our caller
1347 * will restart event processing.
1348 */
1349 ev->type = ptev_disabled;
1350 ev->variant.disabled.ip = ev->variant.async_disabled.ip;
1351
1352 return 1;
1353 }
1354 }
1355
1356 /* Postpone proceeding past @insn/@iext and indicate a pending event.
1357 *
1358 * There may be further events pending on @insn/@iext. Postpone proceeding past
1359 * @insn/@iext until we processed all events that bind to it.
1360 *
1361 * Returns a non-negative pt_status_flag bit-vector indicating a pending event
1362 * on success, a negative pt_error_code otherwise.
1363 */
pt_blk_postpone_insn(struct pt_block_decoder * decoder,const struct pt_insn * insn,const struct pt_insn_ext * iext)1364 static int pt_blk_postpone_insn(struct pt_block_decoder *decoder,
1365 const struct pt_insn *insn,
1366 const struct pt_insn_ext *iext)
1367 {
1368 if (!decoder || !insn || !iext)
1369 return -pte_internal;
1370
1371 /* Only one can be active. */
1372 if (decoder->process_insn)
1373 return -pte_internal;
1374
1375 decoder->process_insn = 1;
1376 decoder->insn = *insn;
1377 decoder->iext = *iext;
1378
1379 return pt_blk_status(decoder, pts_event_pending);
1380 }
1381
1382 /* Remove any postponed instruction from @decoder.
1383 *
1384 * Returns zero on success, a negative pt_error_code otherwise.
1385 */
pt_blk_clear_postponed_insn(struct pt_block_decoder * decoder)1386 static int pt_blk_clear_postponed_insn(struct pt_block_decoder *decoder)
1387 {
1388 if (!decoder)
1389 return -pte_internal;
1390
1391 decoder->process_insn = 0;
1392 decoder->bound_paging = 0;
1393 decoder->bound_vmcs = 0;
1394 decoder->bound_ptwrite = 0;
1395
1396 return 0;
1397 }
1398
1399 /* Proceed past a postponed instruction.
1400 *
1401 * If an instruction has been postponed in @decoder, proceed past it.
1402 *
1403 * Returns zero on success, a negative pt_error_code otherwise.
1404 */
pt_blk_proceed_postponed_insn(struct pt_block_decoder * decoder)1405 static int pt_blk_proceed_postponed_insn(struct pt_block_decoder *decoder)
1406 {
1407 int status;
1408
1409 if (!decoder)
1410 return -pte_internal;
1411
1412 /* There's nothing to do if we have no postponed instruction. */
1413 if (!decoder->process_insn)
1414 return 0;
1415
1416 /* There's nothing to do if tracing got disabled. */
1417 if (!decoder->enabled)
1418 return pt_blk_clear_postponed_insn(decoder);
1419
1420 status = pt_insn_next_ip(&decoder->ip, &decoder->insn, &decoder->iext);
1421 if (status < 0) {
1422 if (status != -pte_bad_query)
1423 return status;
1424
1425 status = pt_blk_proceed_with_trace(decoder, &decoder->insn,
1426 &decoder->iext);
1427 if (status < 0)
1428 return status;
1429 }
1430
1431 return pt_blk_clear_postponed_insn(decoder);
1432 }
1433
1434 /* Proceed to the next event.
1435 *
1436 * We have an event pending. Proceed to the event location and indicate the
1437 * event to the user.
1438 *
1439 * On our way to the event location we may also be forced to postpone the event
1440 * to the next block, e.g. if we overflow the number of instructions in the
1441 * block or if we need trace in order to reach the event location.
1442 *
1443 * If we're not able to reach the event location, we return zero. This is what
1444 * pt_blk_status() would return since:
1445 *
1446 * - we suppress pts_eos as long as we're processing events
1447 * - we do not set pts_ip_suppressed since tracing must be enabled
1448 *
1449 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
1450 * code otherwise.
1451 */
pt_blk_proceed_event(struct pt_block_decoder * decoder,struct pt_block * block)1452 static int pt_blk_proceed_event(struct pt_block_decoder *decoder,
1453 struct pt_block *block)
1454 {
1455 struct pt_insn_ext iext;
1456 struct pt_insn insn;
1457 struct pt_event *ev;
1458 int status;
1459
1460 if (!decoder || !decoder->process_event || !block)
1461 return -pte_internal;
1462
1463 ev = &decoder->event;
1464 switch (ev->type) {
1465 case ptev_enabled:
1466 break;
1467
1468 case ptev_disabled:
1469 status = pt_blk_proceed_to_disabled(decoder, block, &insn,
1470 &iext, ev);
1471 if (status <= 0) {
1472 /* A synchronous disable event also binds to the next
1473 * indirect or conditional branch, i.e. to any branch
1474 * that would have required trace.
1475 */
1476 if (status != -pte_bad_query)
1477 return status;
1478
1479 status = pt_blk_set_disable_resume_ip(decoder, &insn);
1480 if (status < 0)
1481 return status;
1482 }
1483
1484 break;
1485
1486 case ptev_async_disabled:
1487 status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1488 ev->variant.async_disabled.at);
1489 if (status <= 0)
1490 return status;
1491
1492 if (decoder->query.config.errata.skd022) {
1493 status = pt_blk_handle_erratum_skd022(decoder, ev);
1494 if (status != 0) {
1495 if (status < 0)
1496 return status;
1497
1498 /* If the erratum hits, we modify the event.
1499 * Try again.
1500 */
1501 return pt_blk_proceed_event(decoder, block);
1502 }
1503 }
1504
1505 break;
1506
1507 case ptev_async_branch:
1508 status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1509 ev->variant.async_branch.from);
1510 if (status <= 0)
1511 return status;
1512
1513 break;
1514
1515 case ptev_paging:
1516 if (!decoder->enabled)
1517 break;
1518
1519 status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1520 pt_insn_binds_to_pip);
1521 if (status <= 0)
1522 return status;
1523
1524 /* We bound a paging event. Make sure we do not bind further
1525 * paging events to this instruction.
1526 */
1527 decoder->bound_paging = 1;
1528
1529 return pt_blk_postpone_insn(decoder, &insn, &iext);
1530
1531 case ptev_async_paging:
1532 status = pt_blk_proceed_to_async_paging(decoder, block, ev);
1533 if (status <= 0)
1534 return status;
1535
1536 break;
1537
1538 case ptev_vmcs:
1539 if (!decoder->enabled)
1540 break;
1541
1542 status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1543 pt_insn_binds_to_vmcs);
1544 if (status <= 0)
1545 return status;
1546
1547 /* We bound a vmcs event. Make sure we do not bind further vmcs
1548 * events to this instruction.
1549 */
1550 decoder->bound_vmcs = 1;
1551
1552 return pt_blk_postpone_insn(decoder, &insn, &iext);
1553
1554 case ptev_async_vmcs:
1555 status = pt_blk_proceed_to_async_vmcs(decoder, block, ev);
1556 if (status <= 0)
1557 return status;
1558
1559 break;
1560
1561 case ptev_overflow:
1562 break;
1563
1564 case ptev_exec_mode:
1565 status = pt_blk_proceed_to_exec_mode(decoder, block, ev);
1566 if (status <= 0)
1567 return status;
1568
1569 break;
1570
1571 case ptev_tsx:
1572 if (ev->ip_suppressed)
1573 break;
1574
1575 status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1576 ev->variant.tsx.ip);
1577 if (status <= 0)
1578 return status;
1579
1580 break;
1581
1582 case ptev_stop:
1583 break;
1584
1585 case ptev_exstop:
1586 if (!decoder->enabled || ev->ip_suppressed)
1587 break;
1588
1589 status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1590 ev->variant.exstop.ip);
1591 if (status <= 0)
1592 return status;
1593
1594 break;
1595
1596 case ptev_mwait:
1597 if (!decoder->enabled || ev->ip_suppressed)
1598 break;
1599
1600 status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1601 ev->variant.mwait.ip);
1602 if (status <= 0)
1603 return status;
1604
1605 break;
1606
1607 case ptev_pwre:
1608 case ptev_pwrx:
1609 break;
1610
1611 case ptev_ptwrite:
1612 if (!decoder->enabled)
1613 break;
1614
1615 status = pt_blk_proceed_to_ptwrite(decoder, block, &insn,
1616 &iext, ev);
1617 if (status <= 0)
1618 return status;
1619
1620 /* We bound a ptwrite event. Make sure we do not bind further
1621 * ptwrite events to this instruction.
1622 */
1623 decoder->bound_ptwrite = 1;
1624
1625 return pt_blk_postpone_insn(decoder, &insn, &iext);
1626
1627 case ptev_tick:
1628 case ptev_cbr:
1629 case ptev_mnt:
1630 break;
1631 }
1632
1633 return pt_blk_status(decoder, pts_event_pending);
1634 }
1635
1636 /* Proceed to the next decision point without using the block cache.
1637 *
1638 * Tracing is enabled and we don't have an event pending. Proceed as far as
1639 * we get without trace. Stop when we either:
1640 *
1641 * - need trace in order to continue
1642 * - overflow the max number of instructions in a block
1643 *
1644 * We actually proceed one instruction further to get the start IP for the next
1645 * block. This only updates @decoder's internal state, though.
1646 *
1647 * Returns zero on success, a negative error code otherwise.
1648 */
pt_blk_proceed_no_event_uncached(struct pt_block_decoder * decoder,struct pt_block * block)1649 static int pt_blk_proceed_no_event_uncached(struct pt_block_decoder *decoder,
1650 struct pt_block *block)
1651 {
1652 struct pt_insn_ext iext;
1653 struct pt_insn insn;
1654 int status;
1655
1656 if (!decoder || !block)
1657 return -pte_internal;
1658
1659 /* This is overly conservative, really. We shouldn't get a bad-query
1660 * status unless we decoded at least one instruction successfully.
1661 */
1662 memset(&insn, 0, sizeof(insn));
1663 memset(&iext, 0, sizeof(iext));
1664
1665 /* Proceed as far as we get without trace. */
1666 status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1667 pt_insn_false);
1668 if (status < 0) {
1669 if (status != -pte_bad_query)
1670 return status;
1671
1672 return pt_blk_proceed_with_trace(decoder, &insn, &iext);
1673 }
1674
1675 return 0;
1676 }
1677
1678 /* Check if @ip is contained in @section loaded at @laddr.
1679 *
1680 * Returns non-zero if it is.
1681 * Returns zero if it isn't or of @section is NULL.
1682 */
pt_blk_is_in_section(const struct pt_mapped_section * msec,uint64_t ip)1683 static inline int pt_blk_is_in_section(const struct pt_mapped_section *msec,
1684 uint64_t ip)
1685 {
1686 uint64_t begin, end;
1687
1688 begin = pt_msec_begin(msec);
1689 end = pt_msec_end(msec);
1690
1691 return (begin <= ip && ip < end);
1692 }
1693
1694 /* Insert a trampoline block cache entry.
1695 *
1696 * Add a trampoline block cache entry at @ip to continue at @nip, where @nip
1697 * must be the next instruction after @ip.
1698 *
1699 * Both @ip and @nip must be section-relative
1700 *
1701 * Returns zero on success, a negative error code otherwise.
1702 */
pt_blk_add_trampoline(struct pt_block_cache * bcache,uint64_t ip,uint64_t nip,enum pt_exec_mode mode)1703 static inline int pt_blk_add_trampoline(struct pt_block_cache *bcache,
1704 uint64_t ip, uint64_t nip,
1705 enum pt_exec_mode mode)
1706 {
1707 struct pt_bcache_entry bce;
1708 int64_t disp;
1709
1710 /* The displacement from @ip to @nip for the trampoline. */
1711 disp = (int64_t) (nip - ip);
1712
1713 memset(&bce, 0, sizeof(bce));
1714 bce.displacement = (int32_t) disp;
1715 bce.ninsn = 1;
1716 bce.mode = mode;
1717 bce.qualifier = ptbq_again;
1718
1719 /* If we can't reach @nip without overflowing the displacement field, we
1720 * have to stop and re-decode the instruction at @ip.
1721 */
1722 if ((int64_t) bce.displacement != disp) {
1723
1724 memset(&bce, 0, sizeof(bce));
1725 bce.ninsn = 1;
1726 bce.mode = mode;
1727 bce.qualifier = ptbq_decode;
1728 }
1729
1730 return pt_bcache_add(bcache, ip, bce);
1731 }
1732
1733 /* Insert a decode block cache entry.
1734 *
1735 * Add a decode block cache entry at @ioff.
1736 *
1737 * Returns zero on success, a negative error code otherwise.
1738 */
pt_blk_add_decode(struct pt_block_cache * bcache,uint64_t ioff,enum pt_exec_mode mode)1739 static inline int pt_blk_add_decode(struct pt_block_cache *bcache,
1740 uint64_t ioff, enum pt_exec_mode mode)
1741 {
1742 struct pt_bcache_entry bce;
1743
1744 memset(&bce, 0, sizeof(bce));
1745 bce.ninsn = 1;
1746 bce.mode = mode;
1747 bce.qualifier = ptbq_decode;
1748
1749 return pt_bcache_add(bcache, ioff, bce);
1750 }
1751
1752 enum {
1753 /* The maximum number of steps when filling the block cache. */
1754 bcache_fill_steps = 0x400
1755 };
1756
1757 /* Proceed to the next instruction and fill the block cache for @decoder->ip.
1758 *
1759 * Tracing is enabled and we don't have an event pending. The current IP is not
1760 * yet cached.
1761 *
1762 * Proceed one instruction without using the block cache, then try to proceed
1763 * further using the block cache.
1764 *
1765 * On our way back, add a block cache entry for the IP before proceeding. Note
1766 * that the recursion is bounded by @steps and ultimately by the maximum number
1767 * of instructions in a block.
1768 *
1769 * Returns zero on success, a negative error code otherwise.
1770 */
1771 static int
pt_blk_proceed_no_event_fill_cache(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_block_cache * bcache,const struct pt_mapped_section * msec,size_t steps)1772 pt_blk_proceed_no_event_fill_cache(struct pt_block_decoder *decoder,
1773 struct pt_block *block,
1774 struct pt_block_cache *bcache,
1775 const struct pt_mapped_section *msec,
1776 size_t steps)
1777 {
1778 struct pt_bcache_entry bce;
1779 struct pt_insn_ext iext;
1780 struct pt_insn insn;
1781 uint64_t nip, dip;
1782 int64_t disp, ioff, noff;
1783 int status;
1784
1785 if (!decoder || !steps)
1786 return -pte_internal;
1787
1788 /* Proceed one instruction by decoding and examining it.
1789 *
1790 * Note that we also return on a status of zero that indicates that the
1791 * instruction didn't fit into @block.
1792 */
1793 status = pt_blk_proceed_one_insn(decoder, block, &insn, &iext);
1794 if (status <= 0)
1795 return status;
1796
1797 ioff = pt_msec_unmap(msec, insn.ip);
1798
1799 /* Let's see if we can proceed to the next IP without trace.
1800 *
1801 * If we can't, this is certainly a decision point.
1802 */
1803 status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
1804 if (status < 0) {
1805 if (status != -pte_bad_query)
1806 return status;
1807
1808 memset(&bce, 0, sizeof(bce));
1809 bce.ninsn = 1;
1810 bce.mode = insn.mode;
1811 bce.isize = insn.size;
1812
1813 /* Clear the instruction size in case of overflows. */
1814 if ((uint8_t) bce.isize != insn.size)
1815 bce.isize = 0;
1816
1817 switch (insn.iclass) {
1818 case ptic_ptwrite:
1819 case ptic_error:
1820 case ptic_other:
1821 return -pte_internal;
1822
1823 case ptic_jump:
1824 /* A direct jump doesn't require trace. */
1825 if (iext.variant.branch.is_direct)
1826 return -pte_internal;
1827
1828 bce.qualifier = ptbq_indirect;
1829 break;
1830
1831 case ptic_call:
1832 /* A direct call doesn't require trace. */
1833 if (iext.variant.branch.is_direct)
1834 return -pte_internal;
1835
1836 bce.qualifier = ptbq_ind_call;
1837 break;
1838
1839 case ptic_return:
1840 bce.qualifier = ptbq_return;
1841 break;
1842
1843 case ptic_cond_jump:
1844 bce.qualifier = ptbq_cond;
1845 break;
1846
1847 case ptic_far_call:
1848 case ptic_far_return:
1849 case ptic_far_jump:
1850 bce.qualifier = ptbq_indirect;
1851 break;
1852 }
1853
1854 /* If the block was truncated, we have to decode its last
1855 * instruction each time.
1856 *
1857 * We could have skipped the above switch and size assignment in
1858 * this case but this is already a slow and hopefully infrequent
1859 * path.
1860 */
1861 if (block->truncated)
1862 bce.qualifier = ptbq_decode;
1863
1864 status = pt_bcache_add(bcache, ioff, bce);
1865 if (status < 0)
1866 return status;
1867
1868 return pt_blk_proceed_with_trace(decoder, &insn, &iext);
1869 }
1870
1871 /* The next instruction's IP. */
1872 nip = decoder->ip;
1873 noff = pt_msec_unmap(msec, nip);
1874
1875 /* Even if we were able to proceed without trace, we might have to stop
1876 * here for various reasons:
1877 *
1878 * - at near direct calls to update the return-address stack
1879 *
1880 * We are forced to re-decode @insn to get the branch displacement.
1881 *
1882 * Even though it is constant, we don't cache it to avoid increasing
1883 * the size of a cache entry. Note that the displacement field is
1884 * zero for this entry and we might be tempted to use it - but other
1885 * entries that point to this decision point will have non-zero
1886 * displacement.
1887 *
1888 * We could proceed after a near direct call but we migh as well
1889 * postpone it to the next iteration. Make sure to end the block if
1890 * @decoder->flags.variant.block.end_on_call is set, though.
1891 *
1892 * - at near direct backwards jumps to detect section splits
1893 *
1894 * In case the current section is split underneath us, we must take
1895 * care to detect that split.
1896 *
1897 * There is one corner case where the split is in the middle of a
1898 * linear sequence of instructions that branches back into the
1899 * originating section.
1900 *
1901 * Calls, indirect branches, and far branches are already covered
1902 * since they either require trace or already require us to stop
1903 * (i.e. near direct calls) for other reasons. That leaves near
1904 * direct backward jumps.
1905 *
1906 * Instead of the decode stop at the jump instruction we're using we
1907 * could have made sure that other block cache entries that extend
1908 * this one insert a trampoline to the jump's entry. This would
1909 * have been a bit more complicated.
1910 *
1911 * - if we switched sections
1912 *
1913 * This ends a block just like a branch that requires trace.
1914 *
1915 * We need to re-decode @insn in order to determine the start IP of
1916 * the next block.
1917 *
1918 * - if the block is truncated
1919 *
1920 * We need to read the last instruction's memory from multiple
1921 * sections and provide it to the user.
1922 *
1923 * We could still use the block cache but then we'd have to handle
1924 * this case for each qualifier. Truncation is hopefully rare and
1925 * having to read the memory for the instruction from multiple
1926 * sections is already slow. Let's rather keep things simple and
1927 * route it through the decode flow, where we already have
1928 * everything in place.
1929 */
1930 switch (insn.iclass) {
1931 case ptic_call:
1932 return pt_blk_add_decode(bcache, ioff, insn.mode);
1933
1934 case ptic_jump:
1935 /* An indirect branch requires trace and should have been
1936 * handled above.
1937 */
1938 if (!iext.variant.branch.is_direct)
1939 return -pte_internal;
1940
1941 if (iext.variant.branch.displacement < 0 ||
1942 decoder->flags.variant.block.end_on_jump)
1943 return pt_blk_add_decode(bcache, ioff, insn.mode);
1944
1945 fallthrough;
1946 default:
1947 if (!pt_blk_is_in_section(msec, nip) || block->truncated)
1948 return pt_blk_add_decode(bcache, ioff, insn.mode);
1949
1950 break;
1951 }
1952
1953 /* We proceeded one instruction. Let's see if we have a cache entry for
1954 * the next instruction.
1955 */
1956 status = pt_bcache_lookup(&bce, bcache, noff);
1957 if (status < 0)
1958 return status;
1959
1960 /* If we don't have a valid cache entry, yet, fill the cache some more.
1961 *
1962 * On our way back, we add a cache entry for this instruction based on
1963 * the cache entry of the succeeding instruction.
1964 */
1965 if (!pt_bce_is_valid(bce)) {
1966 /* If we exceeded the maximum number of allowed steps, we insert
1967 * a trampoline to the next instruction.
1968 *
1969 * The next time we encounter the same code, we will use the
1970 * trampoline to jump directly to where we left off this time
1971 * and continue from there.
1972 */
1973 steps -= 1;
1974 if (!steps)
1975 return pt_blk_add_trampoline(bcache, ioff, noff,
1976 insn.mode);
1977
1978 status = pt_blk_proceed_no_event_fill_cache(decoder, block,
1979 bcache, msec,
1980 steps);
1981 if (status < 0)
1982 return status;
1983
1984 /* Let's see if we have more luck this time. */
1985 status = pt_bcache_lookup(&bce, bcache, noff);
1986 if (status < 0)
1987 return status;
1988
1989 /* If we still don't have a valid cache entry, we're done. Most
1990 * likely, @block overflowed and we couldn't proceed past the
1991 * next instruction.
1992 */
1993 if (!pt_bce_is_valid(bce))
1994 return 0;
1995 }
1996
1997 /* We must not have switched execution modes.
1998 *
1999 * This would require an event and we're on the no-event flow.
2000 */
2001 if (pt_bce_exec_mode(bce) != insn.mode)
2002 return -pte_internal;
2003
2004 /* The decision point IP and the displacement from @insn.ip. */
2005 dip = nip + bce.displacement;
2006 disp = (int64_t) (dip - insn.ip);
2007
2008 /* We may have switched sections if the section was split. See
2009 * pt_blk_proceed_no_event_cached() for a more elaborate comment.
2010 *
2011 * We're not adding a block cache entry since this won't apply to the
2012 * original section which may be shared with other decoders.
2013 *
2014 * We will instead take the slow path until the end of the section.
2015 */
2016 if (!pt_blk_is_in_section(msec, dip))
2017 return 0;
2018
2019 /* Let's try to reach @nip's decision point from @insn.ip.
2020 *
2021 * There are two fields that may overflow: @bce.ninsn and
2022 * @bce.displacement.
2023 */
2024 bce.ninsn += 1;
2025 bce.displacement = (int32_t) disp;
2026
2027 /* If none of them overflowed, we're done.
2028 *
2029 * If one or both overflowed, let's try to insert a trampoline, i.e. we
2030 * try to reach @dip via a ptbq_again entry to @nip.
2031 */
2032 if (!bce.ninsn || ((int64_t) bce.displacement != disp))
2033 return pt_blk_add_trampoline(bcache, ioff, noff, insn.mode);
2034
2035 /* We're done. Add the cache entry.
2036 *
2037 * There's a chance that other decoders updated the cache entry in the
2038 * meantime. They should have come to the same conclusion as we,
2039 * though, and the cache entries should be identical.
2040 *
2041 * Cache updates are atomic so even if the two versions were not
2042 * identical, we wouldn't care because they are both correct.
2043 */
2044 return pt_bcache_add(bcache, ioff, bce);
2045 }
2046
2047 /* Proceed at a potentially truncated instruction.
2048 *
2049 * We were not able to decode the instruction at @decoder->ip in @decoder's
2050 * cached section. This is typically caused by not having enough bytes.
2051 *
2052 * Try to decode the instruction again using the entire image. If this succeeds
2053 * we expect to end up with an instruction that was truncated in the section it
2054 * started. We provide the full instruction in this case and end the block.
2055 *
2056 * Returns zero on success, a negative error code otherwise.
2057 */
pt_blk_proceed_truncated(struct pt_block_decoder * decoder,struct pt_block * block)2058 static int pt_blk_proceed_truncated(struct pt_block_decoder *decoder,
2059 struct pt_block *block)
2060 {
2061 struct pt_insn_ext iext;
2062 struct pt_insn insn;
2063 int errcode;
2064
2065 if (!decoder || !block)
2066 return -pte_internal;
2067
2068 memset(&iext, 0, sizeof(iext));
2069 memset(&insn, 0, sizeof(insn));
2070
2071 insn.mode = decoder->mode;
2072 insn.ip = decoder->ip;
2073
2074 errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
2075 if (errcode < 0)
2076 return errcode;
2077
2078 /* We shouldn't use this function if the instruction isn't truncated. */
2079 if (!insn.truncated)
2080 return -pte_internal;
2081
2082 /* Provide the instruction in the block. This ends the block. */
2083 memcpy(block->raw, insn.raw, insn.size);
2084 block->iclass = insn.iclass;
2085 block->size = insn.size;
2086 block->truncated = 1;
2087
2088 /* Log calls' return addresses for return compression. */
2089 errcode = pt_blk_log_call(decoder, &insn, &iext);
2090 if (errcode < 0)
2091 return errcode;
2092
2093 /* Let's see if we can proceed to the next IP without trace.
2094 *
2095 * The truncated instruction ends the block but we still need to get the
2096 * next block's start IP.
2097 */
2098 errcode = pt_insn_next_ip(&decoder->ip, &insn, &iext);
2099 if (errcode < 0) {
2100 if (errcode != -pte_bad_query)
2101 return errcode;
2102
2103 return pt_blk_proceed_with_trace(decoder, &insn, &iext);
2104 }
2105
2106 return 0;
2107 }
2108
2109 /* Proceed to the next decision point using the block cache.
2110 *
2111 * Tracing is enabled and we don't have an event pending. We already set
2112 * @block's isid. All reads are done within @msec as we're not switching
2113 * sections between blocks.
2114 *
2115 * Proceed as far as we get without trace. Stop when we either:
2116 *
2117 * - need trace in order to continue
2118 * - overflow the max number of instructions in a block
2119 *
2120 * We actually proceed one instruction further to get the start IP for the next
2121 * block. This only updates @decoder's internal state, though.
2122 *
2123 * Returns zero on success, a negative error code otherwise.
2124 */
pt_blk_proceed_no_event_cached(struct pt_block_decoder * decoder,struct pt_block * block,struct pt_block_cache * bcache,const struct pt_mapped_section * msec)2125 static int pt_blk_proceed_no_event_cached(struct pt_block_decoder *decoder,
2126 struct pt_block *block,
2127 struct pt_block_cache *bcache,
2128 const struct pt_mapped_section *msec)
2129 {
2130 struct pt_bcache_entry bce;
2131 uint16_t binsn, ninsn;
2132 uint64_t offset, nip;
2133 int status;
2134
2135 if (!decoder || !block)
2136 return -pte_internal;
2137
2138 offset = pt_msec_unmap(msec, decoder->ip);
2139 status = pt_bcache_lookup(&bce, bcache, offset);
2140 if (status < 0)
2141 return status;
2142
2143 /* If we don't find a valid cache entry, fill the cache. */
2144 if (!pt_bce_is_valid(bce))
2145 return pt_blk_proceed_no_event_fill_cache(decoder, block,
2146 bcache, msec,
2147 bcache_fill_steps);
2148
2149 /* If we switched sections, the origianl section must have been split
2150 * underneath us. A split preserves the block cache of the original
2151 * section.
2152 *
2153 * Crossing sections requires ending the block so we can indicate the
2154 * proper isid for the entire block.
2155 *
2156 * Plus there's the chance that the new section that caused the original
2157 * section to split changed instructions.
2158 *
2159 * This check will also cover changes to a linear sequence of code we
2160 * would otherwise have jumped over as long as the start and end are in
2161 * different sub-sections.
2162 *
2163 * Since we stop on every (backwards) branch (through an artificial stop
2164 * in the case of a near direct backward branch) we will detect all
2165 * section splits.
2166 *
2167 * Switch to the slow path until we reach the end of this section.
2168 */
2169 nip = decoder->ip + bce.displacement;
2170 if (!pt_blk_is_in_section(msec, nip))
2171 return pt_blk_proceed_no_event_uncached(decoder, block);
2172
2173 /* We have a valid cache entry. Let's first check if the way to the
2174 * decision point still fits into @block.
2175 *
2176 * If it doesn't, we end the block without filling it as much as we
2177 * could since this would require us to switch to the slow path.
2178 *
2179 * On the next iteration, we will start with an empty block, which is
2180 * guaranteed to have enough room for at least one block cache entry.
2181 */
2182 binsn = block->ninsn;
2183 ninsn = binsn + (uint16_t) bce.ninsn;
2184 if (ninsn < binsn)
2185 return 0;
2186
2187 /* Jump ahead to the decision point and proceed from there.
2188 *
2189 * We're not switching execution modes so even if @block already has an
2190 * execution mode, it will be the one we're going to set.
2191 */
2192 decoder->ip = nip;
2193
2194 /* We don't know the instruction class so we should be setting it to
2195 * ptic_error. Since we will be able to fill it back in later in most
2196 * cases, we move the clearing to the switch cases that don't.
2197 */
2198 block->end_ip = nip;
2199 block->ninsn = ninsn;
2200 block->mode = pt_bce_exec_mode(bce);
2201
2202
2203 switch (pt_bce_qualifier(bce)) {
2204 case ptbq_again:
2205 /* We're not able to reach the actual decision point due to
2206 * overflows so we inserted a trampoline.
2207 *
2208 * We don't know the instruction and it is not guaranteed that
2209 * we will proceed further (e.g. if @block overflowed). Let's
2210 * clear any previously stored instruction class which has
2211 * become invalid when we updated @block->ninsn.
2212 */
2213 block->iclass = ptic_error;
2214
2215 return pt_blk_proceed_no_event_cached(decoder, block, bcache,
2216 msec);
2217
2218 case ptbq_cond:
2219 /* We're at a conditional branch. */
2220 block->iclass = ptic_cond_jump;
2221
2222 /* Let's first check whether we know the size of the
2223 * instruction. If we do, we might get away without decoding
2224 * the instruction.
2225 *
2226 * If we don't know the size we might as well do the full decode
2227 * and proceed-with-trace flow we do for ptbq_decode.
2228 */
2229 if (bce.isize) {
2230 uint64_t ip;
2231 int taken;
2232
2233 /* If the branch is not taken, we don't need to decode
2234 * the instruction at @decoder->ip.
2235 *
2236 * If it is taken, we have to implement everything here.
2237 * We can't use the normal decode and proceed-with-trace
2238 * flow since we already consumed the TNT bit.
2239 */
2240 status = pt_blk_cond_branch(decoder, &taken);
2241 if (status < 0)
2242 return status;
2243
2244 /* Preserve the query decoder's response which indicates
2245 * upcoming events.
2246 */
2247 decoder->status = status;
2248
2249 ip = decoder->ip;
2250 if (taken) {
2251 struct pt_insn_ext iext;
2252 struct pt_insn insn;
2253
2254 memset(&iext, 0, sizeof(iext));
2255 memset(&insn, 0, sizeof(insn));
2256
2257 insn.mode = pt_bce_exec_mode(bce);
2258 insn.ip = ip;
2259
2260 status = pt_blk_decode_in_section(&insn, &iext,
2261 msec);
2262 if (status < 0)
2263 return status;
2264
2265 ip += iext.variant.branch.displacement;
2266 }
2267
2268 decoder->ip = ip + bce.isize;
2269 break;
2270 }
2271
2272 fallthrough;
2273 case ptbq_decode: {
2274 struct pt_insn_ext iext;
2275 struct pt_insn insn;
2276
2277 /* We need to decode the instruction at @decoder->ip and decide
2278 * what to do based on that.
2279 *
2280 * We already accounted for the instruction so we can't just
2281 * call pt_blk_proceed_one_insn().
2282 */
2283
2284 memset(&iext, 0, sizeof(iext));
2285 memset(&insn, 0, sizeof(insn));
2286
2287 insn.mode = pt_bce_exec_mode(bce);
2288 insn.ip = decoder->ip;
2289
2290 status = pt_blk_decode_in_section(&insn, &iext, msec);
2291 if (status < 0) {
2292 if (status != -pte_bad_insn)
2293 return status;
2294
2295 return pt_blk_proceed_truncated(decoder, block);
2296 }
2297
2298 /* We just decoded @insn so we know the instruction class. */
2299 block->iclass = insn.iclass;
2300
2301 /* Log calls' return addresses for return compression. */
2302 status = pt_blk_log_call(decoder, &insn, &iext);
2303 if (status < 0)
2304 return status;
2305
2306 /* Let's see if we can proceed to the next IP without trace.
2307 *
2308 * Note that we also stop due to displacement overflows or to
2309 * maintain the return-address stack for near direct calls.
2310 */
2311 status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
2312 if (status < 0) {
2313 if (status != -pte_bad_query)
2314 return status;
2315
2316 /* We can't, so let's proceed with trace, which
2317 * completes the block.
2318 */
2319 return pt_blk_proceed_with_trace(decoder, &insn, &iext);
2320 }
2321
2322 /* End the block if the user asked us to.
2323 *
2324 * We only need to take care about direct near branches.
2325 * Indirect and far branches require trace and will naturally
2326 * end a block.
2327 */
2328 if ((decoder->flags.variant.block.end_on_call &&
2329 (insn.iclass == ptic_call)) ||
2330 (decoder->flags.variant.block.end_on_jump &&
2331 (insn.iclass == ptic_jump)))
2332 break;
2333
2334 /* If we can proceed without trace and we stay in @msec we may
2335 * proceed further.
2336 *
2337 * We're done if we switch sections, though.
2338 */
2339 if (!pt_blk_is_in_section(msec, decoder->ip))
2340 break;
2341
2342 return pt_blk_proceed_no_event_cached(decoder, block, bcache,
2343 msec);
2344 }
2345
2346 case ptbq_ind_call: {
2347 uint64_t ip;
2348
2349 /* We're at a near indirect call. */
2350 block->iclass = ptic_call;
2351
2352 /* We need to update the return-address stack and query the
2353 * destination IP.
2354 */
2355 ip = decoder->ip;
2356
2357 /* If we already know the size of the instruction, we don't need
2358 * to re-decode it.
2359 */
2360 if (bce.isize)
2361 ip += bce.isize;
2362 else {
2363 struct pt_insn_ext iext;
2364 struct pt_insn insn;
2365
2366 memset(&iext, 0, sizeof(iext));
2367 memset(&insn, 0, sizeof(insn));
2368
2369 insn.mode = pt_bce_exec_mode(bce);
2370 insn.ip = ip;
2371
2372 status = pt_blk_decode_in_section(&insn, &iext, msec);
2373 if (status < 0)
2374 return status;
2375
2376 ip += insn.size;
2377 }
2378
2379 status = pt_retstack_push(&decoder->retstack, ip);
2380 if (status < 0)
2381 return status;
2382
2383 status = pt_blk_indirect_branch(decoder, &decoder->ip);
2384 if (status < 0)
2385 return status;
2386
2387 /* Preserve the query decoder's response which indicates
2388 * upcoming events.
2389 */
2390 decoder->status = status;
2391 break;
2392 }
2393
2394 case ptbq_return: {
2395 int taken;
2396
2397 /* We're at a near return. */
2398 block->iclass = ptic_return;
2399
2400 /* Check for a compressed return. */
2401 status = pt_blk_cond_branch(decoder, &taken);
2402 if (status < 0) {
2403 if (status != -pte_bad_query)
2404 return status;
2405
2406 /* The return is not compressed. We need another query
2407 * to determine the destination IP.
2408 */
2409 status = pt_blk_indirect_branch(decoder, &decoder->ip);
2410 if (status < 0)
2411 return status;
2412
2413 /* Preserve the query decoder's response which indicates
2414 * upcoming events.
2415 */
2416 decoder->status = status;
2417 break;
2418 }
2419
2420 /* Preserve the query decoder's response which indicates
2421 * upcoming events.
2422 */
2423 decoder->status = status;
2424
2425 /* A compressed return is indicated by a taken conditional
2426 * branch.
2427 */
2428 if (!taken)
2429 return -pte_bad_retcomp;
2430
2431 return pt_retstack_pop(&decoder->retstack, &decoder->ip);
2432 }
2433
2434 case ptbq_indirect:
2435 /* We're at an indirect jump or far transfer.
2436 *
2437 * We don't know the exact instruction class and there's no
2438 * reason to decode the instruction for any other purpose.
2439 *
2440 * Indicate that we don't know the instruction class and leave
2441 * it to our caller to decode the instruction if needed.
2442 */
2443 block->iclass = ptic_error;
2444
2445 /* This is neither a near call nor return so we don't need to
2446 * touch the return-address stack.
2447 *
2448 * Just query the destination IP.
2449 */
2450 status = pt_blk_indirect_branch(decoder, &decoder->ip);
2451 if (status < 0)
2452 return status;
2453
2454 /* Preserve the query decoder's response which indicates
2455 * upcoming events.
2456 */
2457 decoder->status = status;
2458 break;
2459 }
2460
2461 return 0;
2462 }
2463
pt_blk_msec_fill(struct pt_block_decoder * decoder,const struct pt_mapped_section ** pmsec)2464 static int pt_blk_msec_fill(struct pt_block_decoder *decoder,
2465 const struct pt_mapped_section **pmsec)
2466 {
2467 const struct pt_mapped_section *msec;
2468 struct pt_section *section;
2469 int isid, errcode;
2470
2471 if (!decoder || !pmsec)
2472 return -pte_internal;
2473
2474 isid = pt_msec_cache_fill(&decoder->scache, &msec, decoder->image,
2475 &decoder->asid, decoder->ip);
2476 if (isid < 0)
2477 return isid;
2478
2479 section = pt_msec_section(msec);
2480 if (!section)
2481 return -pte_internal;
2482
2483 *pmsec = msec;
2484
2485 errcode = pt_section_request_bcache(section);
2486 if (errcode < 0)
2487 return errcode;
2488
2489 return isid;
2490 }
2491
pt_blk_msec_lookup(struct pt_block_decoder * decoder,const struct pt_mapped_section ** pmsec)2492 static inline int pt_blk_msec_lookup(struct pt_block_decoder *decoder,
2493 const struct pt_mapped_section **pmsec)
2494 {
2495 int isid;
2496
2497 if (!decoder)
2498 return -pte_internal;
2499
2500 isid = pt_msec_cache_read(&decoder->scache, pmsec, decoder->image,
2501 decoder->ip);
2502 if (isid < 0) {
2503 if (isid != -pte_nomap)
2504 return isid;
2505
2506 return pt_blk_msec_fill(decoder, pmsec);
2507 }
2508
2509 return isid;
2510 }
2511
2512 /* Proceed to the next decision point - try using the cache.
2513 *
2514 * Tracing is enabled and we don't have an event pending. Proceed as far as
2515 * we get without trace. Stop when we either:
2516 *
2517 * - need trace in order to continue
2518 * - overflow the max number of instructions in a block
2519 *
2520 * We actually proceed one instruction further to get the start IP for the next
2521 * block. This only updates @decoder's internal state, though.
2522 *
2523 * Returns zero on success, a negative error code otherwise.
2524 */
pt_blk_proceed_no_event(struct pt_block_decoder * decoder,struct pt_block * block)2525 static int pt_blk_proceed_no_event(struct pt_block_decoder *decoder,
2526 struct pt_block *block)
2527 {
2528 const struct pt_mapped_section *msec;
2529 struct pt_block_cache *bcache;
2530 struct pt_section *section;
2531 int isid;
2532
2533 if (!decoder || !block)
2534 return -pte_internal;
2535
2536 isid = pt_blk_msec_lookup(decoder, &msec);
2537 if (isid < 0) {
2538 if (isid != -pte_nomap)
2539 return isid;
2540
2541 /* Even if there is no such section in the image, we may still
2542 * read the memory via the callback function.
2543 */
2544 return pt_blk_proceed_no_event_uncached(decoder, block);
2545 }
2546
2547 /* We do not switch sections inside a block. */
2548 if (isid != block->isid) {
2549 if (!pt_blk_block_is_empty(block))
2550 return 0;
2551
2552 block->isid = isid;
2553 }
2554
2555 section = pt_msec_section(msec);
2556 if (!section)
2557 return -pte_internal;
2558
2559 bcache = pt_section_bcache(section);
2560 if (!bcache)
2561 return pt_blk_proceed_no_event_uncached(decoder, block);
2562
2563 return pt_blk_proceed_no_event_cached(decoder, block, bcache, msec);
2564 }
2565
2566 /* Proceed to the next event or decision point.
2567 *
2568 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
2569 * code otherwise.
2570 */
pt_blk_proceed(struct pt_block_decoder * decoder,struct pt_block * block)2571 static int pt_blk_proceed(struct pt_block_decoder *decoder,
2572 struct pt_block *block)
2573 {
2574 int status;
2575
2576 status = pt_blk_fetch_event(decoder);
2577 if (status != 0) {
2578 if (status < 0)
2579 return status;
2580
2581 return pt_blk_proceed_event(decoder, block);
2582 }
2583
2584 /* If tracing is disabled we should either be out of trace or we should
2585 * have taken the event flow above.
2586 */
2587 if (!decoder->enabled) {
2588 if (decoder->status & pts_eos)
2589 return -pte_eos;
2590
2591 return -pte_no_enable;
2592 }
2593
2594 status = pt_blk_proceed_no_event(decoder, block);
2595 if (status < 0)
2596 return status;
2597
2598 return pt_blk_proceed_trailing_event(decoder, block);
2599 }
2600
2601 enum {
2602 /* The maximum number of steps to take when determining whether the
2603 * event location can be reached.
2604 */
2605 bdm64_max_steps = 0x100
2606 };
2607
2608 /* Try to work around erratum BDM64.
2609 *
2610 * If we got a transaction abort immediately following a branch that produced
2611 * trace, the trace for that branch might have been corrupted.
2612 *
2613 * Returns a positive integer if the erratum was handled.
2614 * Returns zero if the erratum does not seem to apply.
2615 * Returns a negative error code otherwise.
2616 */
pt_blk_handle_erratum_bdm64(struct pt_block_decoder * decoder,const struct pt_block * block,const struct pt_event * ev)2617 static int pt_blk_handle_erratum_bdm64(struct pt_block_decoder *decoder,
2618 const struct pt_block *block,
2619 const struct pt_event *ev)
2620 {
2621 struct pt_insn_ext iext;
2622 struct pt_insn insn;
2623 int status;
2624
2625 if (!decoder || !block || !ev)
2626 return -pte_internal;
2627
2628 /* This only affects aborts. */
2629 if (!ev->variant.tsx.aborted)
2630 return 0;
2631
2632 /* This only affects branches that require trace.
2633 *
2634 * If the erratum hits, that branch ended the current block and brought
2635 * us to the trailing event flow.
2636 */
2637 if (pt_blk_block_is_empty(block))
2638 return 0;
2639
2640 insn.mode = block->mode;
2641 insn.ip = block->end_ip;
2642
2643 status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
2644 if (status < 0)
2645 return 0;
2646
2647 if (!pt_insn_is_branch(&insn, &iext))
2648 return 0;
2649
2650 /* Let's check if we can reach the event location from here.
2651 *
2652 * If we can, let's assume the erratum did not hit. We might still be
2653 * wrong but we're not able to tell.
2654 */
2655 status = pt_insn_range_is_contiguous(decoder->ip, ev->variant.tsx.ip,
2656 decoder->mode, decoder->image,
2657 &decoder->asid, bdm64_max_steps);
2658 if (status > 0)
2659 return status;
2660
2661 /* We can't reach the event location. This could either mean that we
2662 * stopped too early (and status is zero) or that the erratum hit.
2663 *
2664 * We assume the latter and pretend that the previous branch brought us
2665 * to the event location, instead.
2666 */
2667 decoder->ip = ev->variant.tsx.ip;
2668
2669 return 1;
2670 }
2671
2672 /* Check whether a trailing TSX event should be postponed.
2673 *
2674 * This involves handling erratum BDM64.
2675 *
2676 * Returns a positive integer if the event is to be postponed.
2677 * Returns zero if the event should be processed.
2678 * Returns a negative error code otherwise.
2679 */
pt_blk_postpone_trailing_tsx(struct pt_block_decoder * decoder,struct pt_block * block,const struct pt_event * ev)2680 static inline int pt_blk_postpone_trailing_tsx(struct pt_block_decoder *decoder,
2681 struct pt_block *block,
2682 const struct pt_event *ev)
2683 {
2684 int status;
2685
2686 if (!decoder || !ev)
2687 return -pte_internal;
2688
2689 if (ev->ip_suppressed)
2690 return 0;
2691
2692 if (block && decoder->query.config.errata.bdm64) {
2693 status = pt_blk_handle_erratum_bdm64(decoder, block, ev);
2694 if (status < 0)
2695 return 1;
2696 }
2697
2698 if (decoder->ip != ev->variant.tsx.ip)
2699 return 1;
2700
2701 return 0;
2702 }
2703
2704 /* Proceed with events that bind to the current decoder IP.
2705 *
2706 * This function is used in the following scenarios:
2707 *
2708 * - we just synchronized onto the trace stream
2709 * - we ended a block and proceeded to the next IP
2710 * - we processed an event that was indicated by this function
2711 *
2712 * Check if there is an event at the current IP that needs to be indicated to
2713 * the user.
2714 *
2715 * Returns a non-negative pt_status_flag bit-vector on success, a negative error
2716 * code otherwise.
2717 */
pt_blk_proceed_trailing_event(struct pt_block_decoder * decoder,struct pt_block * block)2718 static int pt_blk_proceed_trailing_event(struct pt_block_decoder *decoder,
2719 struct pt_block *block)
2720 {
2721 struct pt_event *ev;
2722 int status;
2723
2724 if (!decoder)
2725 return -pte_internal;
2726
2727 status = pt_blk_fetch_event(decoder);
2728 if (status <= 0) {
2729 if (status < 0)
2730 return status;
2731
2732 status = pt_blk_proceed_postponed_insn(decoder);
2733 if (status < 0)
2734 return status;
2735
2736 return pt_blk_status(decoder, 0);
2737 }
2738
2739 ev = &decoder->event;
2740 switch (ev->type) {
2741 case ptev_disabled:
2742 /* Synchronous disable events are normally indicated on the
2743 * event flow.
2744 */
2745 if (!decoder->process_insn)
2746 break;
2747
2748 /* A sync disable may bind to a CR3 changing instruction. */
2749 if (ev->ip_suppressed &&
2750 pt_insn_changes_cr3(&decoder->insn, &decoder->iext))
2751 return pt_blk_status(decoder, pts_event_pending);
2752
2753 /* Or it binds to the next branch that would require trace.
2754 *
2755 * Try to complete processing the current instruction by
2756 * proceeding past it. If that fails because it would require
2757 * trace, we can apply the disabled event.
2758 */
2759 status = pt_insn_next_ip(&decoder->ip, &decoder->insn,
2760 &decoder->iext);
2761 if (status < 0) {
2762 if (status != -pte_bad_query)
2763 return status;
2764
2765 status = pt_blk_set_disable_resume_ip(decoder,
2766 &decoder->insn);
2767 if (status < 0)
2768 return status;
2769
2770 return pt_blk_status(decoder, pts_event_pending);
2771 }
2772
2773 /* We proceeded past the current instruction. */
2774 status = pt_blk_clear_postponed_insn(decoder);
2775 if (status < 0)
2776 return status;
2777
2778 /* This might have brought us to the disable IP. */
2779 if (!ev->ip_suppressed &&
2780 decoder->ip == ev->variant.disabled.ip)
2781 return pt_blk_status(decoder, pts_event_pending);
2782
2783 break;
2784
2785 case ptev_enabled:
2786 /* This event does not bind to an instruction. */
2787 status = pt_blk_proceed_postponed_insn(decoder);
2788 if (status < 0)
2789 return status;
2790
2791 return pt_blk_status(decoder, pts_event_pending);
2792
2793 case ptev_async_disabled:
2794 /* This event does not bind to an instruction. */
2795 status = pt_blk_proceed_postponed_insn(decoder);
2796 if (status < 0)
2797 return status;
2798
2799 if (decoder->ip != ev->variant.async_disabled.at)
2800 break;
2801
2802 if (decoder->query.config.errata.skd022) {
2803 status = pt_blk_handle_erratum_skd022(decoder, ev);
2804 if (status != 0) {
2805 if (status < 0)
2806 return status;
2807
2808 /* If the erratum applies, the event is modified
2809 * to a synchronous disable event that will be
2810 * processed on the next pt_blk_proceed_event()
2811 * call. We're done.
2812 */
2813 break;
2814 }
2815 }
2816
2817 return pt_blk_status(decoder, pts_event_pending);
2818
2819 case ptev_async_branch:
2820 /* This event does not bind to an instruction. */
2821 status = pt_blk_proceed_postponed_insn(decoder);
2822 if (status < 0)
2823 return status;
2824
2825 if (decoder->ip != ev->variant.async_branch.from)
2826 break;
2827
2828 return pt_blk_status(decoder, pts_event_pending);
2829
2830 case ptev_paging:
2831 /* We apply the event immediately if we're not tracing. */
2832 if (!decoder->enabled)
2833 return pt_blk_status(decoder, pts_event_pending);
2834
2835 /* Synchronous paging events are normally indicated on the event
2836 * flow, unless they bind to the same instruction as a previous
2837 * event.
2838 *
2839 * We bind at most one paging event to an instruction, though.
2840 */
2841 if (!decoder->process_insn || decoder->bound_paging)
2842 break;
2843
2844 /* We're done if we're not binding to the currently postponed
2845 * instruction. We will process the event on the normal event
2846 * flow in the next iteration.
2847 */
2848 if (!pt_insn_binds_to_pip(&decoder->insn, &decoder->iext))
2849 break;
2850
2851 /* We bound a paging event. Make sure we do not bind further
2852 * paging events to this instruction.
2853 */
2854 decoder->bound_paging = 1;
2855
2856 return pt_blk_status(decoder, pts_event_pending);
2857
2858 case ptev_async_paging:
2859 /* This event does not bind to an instruction. */
2860 status = pt_blk_proceed_postponed_insn(decoder);
2861 if (status < 0)
2862 return status;
2863
2864 if (!ev->ip_suppressed &&
2865 decoder->ip != ev->variant.async_paging.ip)
2866 break;
2867
2868 return pt_blk_status(decoder, pts_event_pending);
2869
2870 case ptev_vmcs:
2871 /* We apply the event immediately if we're not tracing. */
2872 if (!decoder->enabled)
2873 return pt_blk_status(decoder, pts_event_pending);
2874
2875 /* Synchronous vmcs events are normally indicated on the event
2876 * flow, unless they bind to the same instruction as a previous
2877 * event.
2878 *
2879 * We bind at most one vmcs event to an instruction, though.
2880 */
2881 if (!decoder->process_insn || decoder->bound_vmcs)
2882 break;
2883
2884 /* We're done if we're not binding to the currently postponed
2885 * instruction. We will process the event on the normal event
2886 * flow in the next iteration.
2887 */
2888 if (!pt_insn_binds_to_vmcs(&decoder->insn, &decoder->iext))
2889 break;
2890
2891 /* We bound a vmcs event. Make sure we do not bind further vmcs
2892 * events to this instruction.
2893 */
2894 decoder->bound_vmcs = 1;
2895
2896 return pt_blk_status(decoder, pts_event_pending);
2897
2898 case ptev_async_vmcs:
2899 /* This event does not bind to an instruction. */
2900 status = pt_blk_proceed_postponed_insn(decoder);
2901 if (status < 0)
2902 return status;
2903
2904 if (!ev->ip_suppressed &&
2905 decoder->ip != ev->variant.async_vmcs.ip)
2906 break;
2907
2908 return pt_blk_status(decoder, pts_event_pending);
2909
2910 case ptev_overflow:
2911 /* This event does not bind to an instruction. */
2912 status = pt_blk_proceed_postponed_insn(decoder);
2913 if (status < 0)
2914 return status;
2915
2916 return pt_blk_status(decoder, pts_event_pending);
2917
2918 case ptev_exec_mode:
2919 /* This event does not bind to an instruction. */
2920 status = pt_blk_proceed_postponed_insn(decoder);
2921 if (status < 0)
2922 return status;
2923
2924 if (!ev->ip_suppressed &&
2925 decoder->ip != ev->variant.exec_mode.ip)
2926 break;
2927
2928 return pt_blk_status(decoder, pts_event_pending);
2929
2930 case ptev_tsx:
2931 /* This event does not bind to an instruction. */
2932 status = pt_blk_proceed_postponed_insn(decoder);
2933 if (status < 0)
2934 return status;
2935
2936 status = pt_blk_postpone_trailing_tsx(decoder, block, ev);
2937 if (status != 0) {
2938 if (status < 0)
2939 return status;
2940
2941 break;
2942 }
2943
2944 return pt_blk_status(decoder, pts_event_pending);
2945
2946 case ptev_stop:
2947 /* This event does not bind to an instruction. */
2948 status = pt_blk_proceed_postponed_insn(decoder);
2949 if (status < 0)
2950 return status;
2951
2952 return pt_blk_status(decoder, pts_event_pending);
2953
2954 case ptev_exstop:
2955 /* This event does not bind to an instruction. */
2956 status = pt_blk_proceed_postponed_insn(decoder);
2957 if (status < 0)
2958 return status;
2959
2960 if (!ev->ip_suppressed && decoder->enabled &&
2961 decoder->ip != ev->variant.exstop.ip)
2962 break;
2963
2964 return pt_blk_status(decoder, pts_event_pending);
2965
2966 case ptev_mwait:
2967 /* This event does not bind to an instruction. */
2968 status = pt_blk_proceed_postponed_insn(decoder);
2969 if (status < 0)
2970 return status;
2971
2972 if (!ev->ip_suppressed && decoder->enabled &&
2973 decoder->ip != ev->variant.mwait.ip)
2974 break;
2975
2976 return pt_blk_status(decoder, pts_event_pending);
2977
2978 case ptev_pwre:
2979 case ptev_pwrx:
2980 /* This event does not bind to an instruction. */
2981 status = pt_blk_proceed_postponed_insn(decoder);
2982 if (status < 0)
2983 return status;
2984
2985 return pt_blk_status(decoder, pts_event_pending);
2986
2987 case ptev_ptwrite:
2988 /* We apply the event immediately if we're not tracing. */
2989 if (!decoder->enabled)
2990 return pt_blk_status(decoder, pts_event_pending);
2991
2992 /* Ptwrite events are normally indicated on the event flow,
2993 * unless they bind to the same instruction as a previous event.
2994 *
2995 * We bind at most one ptwrite event to an instruction, though.
2996 */
2997 if (!decoder->process_insn || decoder->bound_ptwrite)
2998 break;
2999
3000 /* We're done if we're not binding to the currently postponed
3001 * instruction. We will process the event on the normal event
3002 * flow in the next iteration.
3003 */
3004 if (!ev->ip_suppressed ||
3005 !pt_insn_is_ptwrite(&decoder->insn, &decoder->iext))
3006 break;
3007
3008 /* We bound a ptwrite event. Make sure we do not bind further
3009 * ptwrite events to this instruction.
3010 */
3011 decoder->bound_ptwrite = 1;
3012
3013 return pt_blk_status(decoder, pts_event_pending);
3014
3015 case ptev_tick:
3016 case ptev_cbr:
3017 case ptev_mnt:
3018 /* This event does not bind to an instruction. */
3019 status = pt_blk_proceed_postponed_insn(decoder);
3020 if (status < 0)
3021 return status;
3022
3023 return pt_blk_status(decoder, pts_event_pending);
3024 }
3025
3026 /* No further events. Proceed past any postponed instruction. */
3027 status = pt_blk_proceed_postponed_insn(decoder);
3028 if (status < 0)
3029 return status;
3030
3031 return pt_blk_status(decoder, 0);
3032 }
3033
pt_blk_next(struct pt_block_decoder * decoder,struct pt_block * ublock,size_t size)3034 int pt_blk_next(struct pt_block_decoder *decoder, struct pt_block *ublock,
3035 size_t size)
3036 {
3037 struct pt_block block, *pblock;
3038 int errcode, status;
3039
3040 if (!decoder || !ublock)
3041 return -pte_invalid;
3042
3043 pblock = size == sizeof(block) ? ublock : █
3044
3045 /* Zero-initialize the block in case of error returns. */
3046 memset(pblock, 0, sizeof(*pblock));
3047
3048 /* Fill in a few things from the current decode state.
3049 *
3050 * This reflects the state of the last pt_blk_next() or pt_blk_start()
3051 * call. Note that, unless we stop with tracing disabled, we proceed
3052 * already to the start IP of the next block.
3053 *
3054 * Some of the state may later be overwritten as we process events.
3055 */
3056 pblock->ip = decoder->ip;
3057 pblock->mode = decoder->mode;
3058 if (decoder->speculative)
3059 pblock->speculative = 1;
3060
3061 /* Proceed one block. */
3062 status = pt_blk_proceed(decoder, pblock);
3063
3064 errcode = block_to_user(ublock, size, pblock);
3065 if (errcode < 0)
3066 return errcode;
3067
3068 return status;
3069 }
3070
3071 /* Process an enabled event.
3072 *
3073 * Returns zero on success, a negative error code otherwise.
3074 */
pt_blk_process_enabled(struct pt_block_decoder * decoder,const struct pt_event * ev)3075 static int pt_blk_process_enabled(struct pt_block_decoder *decoder,
3076 const struct pt_event *ev)
3077 {
3078 if (!decoder || !ev)
3079 return -pte_internal;
3080
3081 /* This event can't be a status update. */
3082 if (ev->status_update)
3083 return -pte_bad_context;
3084
3085 /* We must have an IP in order to start decoding. */
3086 if (ev->ip_suppressed)
3087 return -pte_noip;
3088
3089 /* We must currently be disabled. */
3090 if (decoder->enabled)
3091 return -pte_bad_context;
3092
3093 decoder->ip = ev->variant.enabled.ip;
3094 decoder->enabled = 1;
3095 decoder->process_event = 0;
3096
3097 return 0;
3098 }
3099
3100 /* Process a disabled event.
3101 *
3102 * Returns zero on success, a negative error code otherwise.
3103 */
pt_blk_process_disabled(struct pt_block_decoder * decoder,const struct pt_event * ev)3104 static int pt_blk_process_disabled(struct pt_block_decoder *decoder,
3105 const struct pt_event *ev)
3106 {
3107 if (!decoder || !ev)
3108 return -pte_internal;
3109
3110 /* This event can't be a status update. */
3111 if (ev->status_update)
3112 return -pte_bad_context;
3113
3114 /* We must currently be enabled. */
3115 if (!decoder->enabled)
3116 return -pte_bad_context;
3117
3118 /* We preserve @decoder->ip. This is where we expect tracing to resume
3119 * and we'll indicate that on the subsequent enabled event if tracing
3120 * actually does resume from there.
3121 */
3122 decoder->enabled = 0;
3123 decoder->process_event = 0;
3124
3125 return 0;
3126 }
3127
3128 /* Process an asynchronous branch event.
3129 *
3130 * Returns zero on success, a negative error code otherwise.
3131 */
pt_blk_process_async_branch(struct pt_block_decoder * decoder,const struct pt_event * ev)3132 static int pt_blk_process_async_branch(struct pt_block_decoder *decoder,
3133 const struct pt_event *ev)
3134 {
3135 if (!decoder || !ev)
3136 return -pte_internal;
3137
3138 /* This event can't be a status update. */
3139 if (ev->status_update)
3140 return -pte_bad_context;
3141
3142 /* We must currently be enabled. */
3143 if (!decoder->enabled)
3144 return -pte_bad_context;
3145
3146 /* Jump to the branch destination. We will continue from there in the
3147 * next iteration.
3148 */
3149 decoder->ip = ev->variant.async_branch.to;
3150 decoder->process_event = 0;
3151
3152 return 0;
3153 }
3154
3155 /* Process a paging event.
3156 *
3157 * Returns zero on success, a negative error code otherwise.
3158 */
pt_blk_process_paging(struct pt_block_decoder * decoder,const struct pt_event * ev)3159 static int pt_blk_process_paging(struct pt_block_decoder *decoder,
3160 const struct pt_event *ev)
3161 {
3162 uint64_t cr3;
3163 int errcode;
3164
3165 if (!decoder || !ev)
3166 return -pte_internal;
3167
3168 cr3 = ev->variant.paging.cr3;
3169 if (decoder->asid.cr3 != cr3) {
3170 errcode = pt_msec_cache_invalidate(&decoder->scache);
3171 if (errcode < 0)
3172 return errcode;
3173
3174 decoder->asid.cr3 = cr3;
3175 }
3176
3177 decoder->process_event = 0;
3178
3179 return 0;
3180 }
3181
3182 /* Process a vmcs event.
3183 *
3184 * Returns zero on success, a negative error code otherwise.
3185 */
pt_blk_process_vmcs(struct pt_block_decoder * decoder,const struct pt_event * ev)3186 static int pt_blk_process_vmcs(struct pt_block_decoder *decoder,
3187 const struct pt_event *ev)
3188 {
3189 uint64_t vmcs;
3190 int errcode;
3191
3192 if (!decoder || !ev)
3193 return -pte_internal;
3194
3195 vmcs = ev->variant.vmcs.base;
3196 if (decoder->asid.vmcs != vmcs) {
3197 errcode = pt_msec_cache_invalidate(&decoder->scache);
3198 if (errcode < 0)
3199 return errcode;
3200
3201 decoder->asid.vmcs = vmcs;
3202 }
3203
3204 decoder->process_event = 0;
3205
3206 return 0;
3207 }
3208
3209 /* Process an overflow event.
3210 *
3211 * Returns zero on success, a negative error code otherwise.
3212 */
pt_blk_process_overflow(struct pt_block_decoder * decoder,const struct pt_event * ev)3213 static int pt_blk_process_overflow(struct pt_block_decoder *decoder,
3214 const struct pt_event *ev)
3215 {
3216 if (!decoder || !ev)
3217 return -pte_internal;
3218
3219 /* This event can't be a status update. */
3220 if (ev->status_update)
3221 return -pte_bad_context;
3222
3223 /* If the IP is suppressed, the overflow resolved while tracing was
3224 * disabled. Otherwise it resolved while tracing was enabled.
3225 */
3226 if (ev->ip_suppressed) {
3227 /* Tracing is disabled. It doesn't make sense to preserve the
3228 * previous IP. This will just be misleading. Even if tracing
3229 * had been disabled before, as well, we might have missed the
3230 * re-enable in the overflow.
3231 */
3232 decoder->enabled = 0;
3233 decoder->ip = 0ull;
3234 } else {
3235 /* Tracing is enabled and we're at the IP at which the overflow
3236 * resolved.
3237 */
3238 decoder->enabled = 1;
3239 decoder->ip = ev->variant.overflow.ip;
3240 }
3241
3242 /* We don't know the TSX state. Let's assume we execute normally.
3243 *
3244 * We also don't know the execution mode. Let's keep what we have
3245 * in case we don't get an update before we have to decode the next
3246 * instruction.
3247 */
3248 decoder->speculative = 0;
3249 decoder->process_event = 0;
3250
3251 return 0;
3252 }
3253
3254 /* Process an exec mode event.
3255 *
3256 * Returns zero on success, a negative error code otherwise.
3257 */
pt_blk_process_exec_mode(struct pt_block_decoder * decoder,const struct pt_event * ev)3258 static int pt_blk_process_exec_mode(struct pt_block_decoder *decoder,
3259 const struct pt_event *ev)
3260 {
3261 enum pt_exec_mode mode;
3262
3263 if (!decoder || !ev)
3264 return -pte_internal;
3265
3266 /* Use status update events to diagnose inconsistencies. */
3267 mode = ev->variant.exec_mode.mode;
3268 if (ev->status_update && decoder->enabled &&
3269 decoder->mode != ptem_unknown && decoder->mode != mode)
3270 return -pte_bad_status_update;
3271
3272 decoder->mode = mode;
3273 decoder->process_event = 0;
3274
3275 return 0;
3276 }
3277
3278 /* Process a tsx event.
3279 *
3280 * Returns zero on success, a negative error code otherwise.
3281 */
pt_blk_process_tsx(struct pt_block_decoder * decoder,const struct pt_event * ev)3282 static int pt_blk_process_tsx(struct pt_block_decoder *decoder,
3283 const struct pt_event *ev)
3284 {
3285 if (!decoder || !ev)
3286 return -pte_internal;
3287
3288 decoder->speculative = ev->variant.tsx.speculative;
3289 decoder->process_event = 0;
3290
3291 return 0;
3292 }
3293
3294 /* Process a stop event.
3295 *
3296 * Returns zero on success, a negative error code otherwise.
3297 */
pt_blk_process_stop(struct pt_block_decoder * decoder,const struct pt_event * ev)3298 static int pt_blk_process_stop(struct pt_block_decoder *decoder,
3299 const struct pt_event *ev)
3300 {
3301 if (!decoder || !ev)
3302 return -pte_internal;
3303
3304 /* This event can't be a status update. */
3305 if (ev->status_update)
3306 return -pte_bad_context;
3307
3308 /* Tracing is always disabled before it is stopped. */
3309 if (decoder->enabled)
3310 return -pte_bad_context;
3311
3312 decoder->process_event = 0;
3313
3314 return 0;
3315 }
3316
pt_blk_event(struct pt_block_decoder * decoder,struct pt_event * uevent,size_t size)3317 int pt_blk_event(struct pt_block_decoder *decoder, struct pt_event *uevent,
3318 size_t size)
3319 {
3320 struct pt_event *ev;
3321 int status;
3322
3323 if (!decoder || !uevent)
3324 return -pte_invalid;
3325
3326 /* We must currently process an event. */
3327 if (!decoder->process_event)
3328 return -pte_bad_query;
3329
3330 ev = &decoder->event;
3331 switch (ev->type) {
3332 case ptev_enabled:
3333 /* Indicate that tracing resumes from the IP at which tracing
3334 * had been disabled before (with some special treatment for
3335 * calls).
3336 */
3337 if (ev->variant.enabled.ip == decoder->ip)
3338 ev->variant.enabled.resumed = 1;
3339
3340 status = pt_blk_process_enabled(decoder, ev);
3341 if (status < 0)
3342 return status;
3343
3344 break;
3345
3346 case ptev_async_disabled:
3347 if (decoder->ip != ev->variant.async_disabled.at)
3348 return -pte_bad_query;
3349
3350 fallthrough;
3351 case ptev_disabled:
3352
3353 status = pt_blk_process_disabled(decoder, ev);
3354 if (status < 0)
3355 return status;
3356
3357 break;
3358
3359 case ptev_async_branch:
3360 if (decoder->ip != ev->variant.async_branch.from)
3361 return -pte_bad_query;
3362
3363 status = pt_blk_process_async_branch(decoder, ev);
3364 if (status < 0)
3365 return status;
3366
3367 break;
3368
3369 case ptev_async_paging:
3370 if (!ev->ip_suppressed &&
3371 decoder->ip != ev->variant.async_paging.ip)
3372 return -pte_bad_query;
3373
3374 fallthrough;
3375 case ptev_paging:
3376 status = pt_blk_process_paging(decoder, ev);
3377 if (status < 0)
3378 return status;
3379
3380 break;
3381
3382 case ptev_async_vmcs:
3383 if (!ev->ip_suppressed &&
3384 decoder->ip != ev->variant.async_vmcs.ip)
3385 return -pte_bad_query;
3386
3387 fallthrough;
3388 case ptev_vmcs:
3389 status = pt_blk_process_vmcs(decoder, ev);
3390 if (status < 0)
3391 return status;
3392
3393 break;
3394
3395 case ptev_overflow:
3396 status = pt_blk_process_overflow(decoder, ev);
3397 if (status < 0)
3398 return status;
3399
3400 break;
3401
3402 case ptev_exec_mode:
3403 if (!ev->ip_suppressed &&
3404 decoder->ip != ev->variant.exec_mode.ip)
3405 return -pte_bad_query;
3406
3407 status = pt_blk_process_exec_mode(decoder, ev);
3408 if (status < 0)
3409 return status;
3410
3411 break;
3412
3413 case ptev_tsx:
3414 if (!ev->ip_suppressed && decoder->ip != ev->variant.tsx.ip)
3415 return -pte_bad_query;
3416
3417 status = pt_blk_process_tsx(decoder, ev);
3418 if (status < 0)
3419 return status;
3420
3421 break;
3422
3423 case ptev_stop:
3424 status = pt_blk_process_stop(decoder, ev);
3425 if (status < 0)
3426 return status;
3427
3428 break;
3429
3430 case ptev_exstop:
3431 if (!ev->ip_suppressed && decoder->enabled &&
3432 decoder->ip != ev->variant.exstop.ip)
3433 return -pte_bad_query;
3434
3435 decoder->process_event = 0;
3436 break;
3437
3438 case ptev_mwait:
3439 if (!ev->ip_suppressed && decoder->enabled &&
3440 decoder->ip != ev->variant.mwait.ip)
3441 return -pte_bad_query;
3442
3443 decoder->process_event = 0;
3444 break;
3445
3446 case ptev_pwre:
3447 case ptev_pwrx:
3448 case ptev_ptwrite:
3449 case ptev_tick:
3450 case ptev_cbr:
3451 case ptev_mnt:
3452 decoder->process_event = 0;
3453 break;
3454 }
3455
3456 /* Copy the event to the user. Make sure we're not writing beyond the
3457 * memory provided by the user.
3458 *
3459 * We might truncate details of an event but only for those events the
3460 * user can't know about, anyway.
3461 */
3462 if (sizeof(*ev) < size)
3463 size = sizeof(*ev);
3464
3465 memcpy(uevent, ev, size);
3466
3467 /* Indicate further events. */
3468 return pt_blk_proceed_trailing_event(decoder, NULL);
3469 }
3470