xref: /linux-6.15/tools/perf/util/thread-stack.c (revision f08046cb)
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15 
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include <linux/log2.h>
19 #include <errno.h>
20 #include "thread.h"
21 #include "event.h"
22 #include "machine.h"
23 #include "util.h"
24 #include "debug.h"
25 #include "symbol.h"
26 #include "comm.h"
27 #include "call-path.h"
28 #include "thread-stack.h"
29 
30 #define STACK_GROWTH 2048
31 
32 /**
33  * struct thread_stack_entry - thread stack entry.
34  * @ret_addr: return address
35  * @timestamp: timestamp (if known)
36  * @ref: external reference (e.g. db_id of sample)
37  * @branch_count: the branch count when the entry was created
38  * @cp: call path
39  * @no_call: a 'call' was not seen
40  * @trace_end: a 'call' but trace ended
41  * @non_call: a branch but not a 'call' to the start of a different symbol
42  */
43 struct thread_stack_entry {
44 	u64 ret_addr;
45 	u64 timestamp;
46 	u64 ref;
47 	u64 branch_count;
48 	struct call_path *cp;
49 	bool no_call;
50 	bool trace_end;
51 	bool non_call;
52 };
53 
54 /**
55  * struct thread_stack - thread stack constructed from 'call' and 'return'
56  *                       branch samples.
57  * @stack: array that holds the stack
58  * @cnt: number of entries in the stack
59  * @sz: current maximum stack size
60  * @trace_nr: current trace number
61  * @branch_count: running branch count
62  * @kernel_start: kernel start address
63  * @last_time: last timestamp
64  * @crp: call/return processor
65  * @comm: current comm
66  * @arr_sz: size of array if this is the first element of an array
67  */
68 struct thread_stack {
69 	struct thread_stack_entry *stack;
70 	size_t cnt;
71 	size_t sz;
72 	u64 trace_nr;
73 	u64 branch_count;
74 	u64 kernel_start;
75 	u64 last_time;
76 	struct call_return_processor *crp;
77 	struct comm *comm;
78 	unsigned int arr_sz;
79 };
80 
81 /*
82  * Assume pid == tid == 0 identifies the idle task as defined by
83  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
84  * and therefore requires a stack for each cpu.
85  */
86 static inline bool thread_stack__per_cpu(struct thread *thread)
87 {
88 	return !(thread->tid || thread->pid_);
89 }
90 
91 static int thread_stack__grow(struct thread_stack *ts)
92 {
93 	struct thread_stack_entry *new_stack;
94 	size_t sz, new_sz;
95 
96 	new_sz = ts->sz + STACK_GROWTH;
97 	sz = new_sz * sizeof(struct thread_stack_entry);
98 
99 	new_stack = realloc(ts->stack, sz);
100 	if (!new_stack)
101 		return -ENOMEM;
102 
103 	ts->stack = new_stack;
104 	ts->sz = new_sz;
105 
106 	return 0;
107 }
108 
109 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
110 			      struct call_return_processor *crp)
111 {
112 	int err;
113 
114 	err = thread_stack__grow(ts);
115 	if (err)
116 		return err;
117 
118 	if (thread->mg && thread->mg->machine)
119 		ts->kernel_start = machine__kernel_start(thread->mg->machine);
120 	else
121 		ts->kernel_start = 1ULL << 63;
122 	ts->crp = crp;
123 
124 	return 0;
125 }
126 
127 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
128 					      struct call_return_processor *crp)
129 {
130 	struct thread_stack *ts = thread->ts, *new_ts;
131 	unsigned int old_sz = ts ? ts->arr_sz : 0;
132 	unsigned int new_sz = 1;
133 
134 	if (thread_stack__per_cpu(thread) && cpu > 0)
135 		new_sz = roundup_pow_of_two(cpu + 1);
136 
137 	if (!ts || new_sz > old_sz) {
138 		new_ts = calloc(new_sz, sizeof(*ts));
139 		if (!new_ts)
140 			return NULL;
141 		if (ts)
142 			memcpy(new_ts, ts, old_sz * sizeof(*ts));
143 		new_ts->arr_sz = new_sz;
144 		zfree(&thread->ts);
145 		thread->ts = new_ts;
146 		ts = new_ts;
147 	}
148 
149 	if (thread_stack__per_cpu(thread) && cpu > 0 &&
150 	    (unsigned int)cpu < ts->arr_sz)
151 		ts += cpu;
152 
153 	if (!ts->stack &&
154 	    thread_stack__init(ts, thread, crp))
155 		return NULL;
156 
157 	return ts;
158 }
159 
160 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
161 {
162 	struct thread_stack *ts = thread->ts;
163 
164 	if (cpu < 0)
165 		cpu = 0;
166 
167 	if (!ts || (unsigned int)cpu >= ts->arr_sz)
168 		return NULL;
169 
170 	ts += cpu;
171 
172 	if (!ts->stack)
173 		return NULL;
174 
175 	return ts;
176 }
177 
178 static inline struct thread_stack *thread__stack(struct thread *thread,
179 						    int cpu)
180 {
181 	if (!thread)
182 		return NULL;
183 
184 	if (thread_stack__per_cpu(thread))
185 		return thread__cpu_stack(thread, cpu);
186 
187 	return thread->ts;
188 }
189 
190 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
191 			      bool trace_end)
192 {
193 	int err = 0;
194 
195 	if (ts->cnt == ts->sz) {
196 		err = thread_stack__grow(ts);
197 		if (err) {
198 			pr_warning("Out of memory: discarding thread stack\n");
199 			ts->cnt = 0;
200 		}
201 	}
202 
203 	ts->stack[ts->cnt].trace_end = trace_end;
204 	ts->stack[ts->cnt++].ret_addr = ret_addr;
205 
206 	return err;
207 }
208 
209 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
210 {
211 	size_t i;
212 
213 	/*
214 	 * In some cases there may be functions which are not seen to return.
215 	 * For example when setjmp / longjmp has been used.  Or the perf context
216 	 * switch in the kernel which doesn't stop and start tracing in exactly
217 	 * the same code path.  When that happens the return address will be
218 	 * further down the stack.  If the return address is not found at all,
219 	 * we assume the opposite (i.e. this is a return for a call that wasn't
220 	 * seen for some reason) and leave the stack alone.
221 	 */
222 	for (i = ts->cnt; i; ) {
223 		if (ts->stack[--i].ret_addr == ret_addr) {
224 			ts->cnt = i;
225 			return;
226 		}
227 	}
228 }
229 
230 static void thread_stack__pop_trace_end(struct thread_stack *ts)
231 {
232 	size_t i;
233 
234 	for (i = ts->cnt; i; ) {
235 		if (ts->stack[--i].trace_end)
236 			ts->cnt = i;
237 		else
238 			return;
239 	}
240 }
241 
242 static bool thread_stack__in_kernel(struct thread_stack *ts)
243 {
244 	if (!ts->cnt)
245 		return false;
246 
247 	return ts->stack[ts->cnt - 1].cp->in_kernel;
248 }
249 
250 static int thread_stack__call_return(struct thread *thread,
251 				     struct thread_stack *ts, size_t idx,
252 				     u64 timestamp, u64 ref, bool no_return)
253 {
254 	struct call_return_processor *crp = ts->crp;
255 	struct thread_stack_entry *tse;
256 	struct call_return cr = {
257 		.thread = thread,
258 		.comm = ts->comm,
259 		.db_id = 0,
260 	};
261 
262 	tse = &ts->stack[idx];
263 	cr.cp = tse->cp;
264 	cr.call_time = tse->timestamp;
265 	cr.return_time = timestamp;
266 	cr.branch_count = ts->branch_count - tse->branch_count;
267 	cr.call_ref = tse->ref;
268 	cr.return_ref = ref;
269 	if (tse->no_call)
270 		cr.flags |= CALL_RETURN_NO_CALL;
271 	if (no_return)
272 		cr.flags |= CALL_RETURN_NO_RETURN;
273 	if (tse->non_call)
274 		cr.flags |= CALL_RETURN_NON_CALL;
275 
276 	return crp->process(&cr, crp->data);
277 }
278 
279 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
280 {
281 	struct call_return_processor *crp = ts->crp;
282 	int err;
283 
284 	if (!crp) {
285 		ts->cnt = 0;
286 		return 0;
287 	}
288 
289 	while (ts->cnt) {
290 		err = thread_stack__call_return(thread, ts, --ts->cnt,
291 						ts->last_time, 0, true);
292 		if (err) {
293 			pr_err("Error flushing thread stack!\n");
294 			ts->cnt = 0;
295 			return err;
296 		}
297 	}
298 
299 	return 0;
300 }
301 
302 int thread_stack__flush(struct thread *thread)
303 {
304 	struct thread_stack *ts = thread->ts;
305 	unsigned int pos;
306 	int err = 0;
307 
308 	if (ts) {
309 		for (pos = 0; pos < ts->arr_sz; pos++) {
310 			int ret = __thread_stack__flush(thread, ts + pos);
311 
312 			if (ret)
313 				err = ret;
314 		}
315 	}
316 
317 	return err;
318 }
319 
320 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
321 			u64 to_ip, u16 insn_len, u64 trace_nr)
322 {
323 	struct thread_stack *ts = thread__stack(thread, cpu);
324 
325 	if (!thread)
326 		return -EINVAL;
327 
328 	if (!ts) {
329 		ts = thread_stack__new(thread, cpu, NULL);
330 		if (!ts) {
331 			pr_warning("Out of memory: no thread stack\n");
332 			return -ENOMEM;
333 		}
334 		ts->trace_nr = trace_nr;
335 	}
336 
337 	/*
338 	 * When the trace is discontinuous, the trace_nr changes.  In that case
339 	 * the stack might be completely invalid.  Better to report nothing than
340 	 * to report something misleading, so flush the stack.
341 	 */
342 	if (trace_nr != ts->trace_nr) {
343 		if (ts->trace_nr)
344 			__thread_stack__flush(thread, ts);
345 		ts->trace_nr = trace_nr;
346 	}
347 
348 	/* Stop here if thread_stack__process() is in use */
349 	if (ts->crp)
350 		return 0;
351 
352 	if (flags & PERF_IP_FLAG_CALL) {
353 		u64 ret_addr;
354 
355 		if (!to_ip)
356 			return 0;
357 		ret_addr = from_ip + insn_len;
358 		if (ret_addr == to_ip)
359 			return 0; /* Zero-length calls are excluded */
360 		return thread_stack__push(ts, ret_addr,
361 					  flags & PERF_IP_FLAG_TRACE_END);
362 	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
363 		/*
364 		 * If the caller did not change the trace number (which would
365 		 * have flushed the stack) then try to make sense of the stack.
366 		 * Possibly, tracing began after returning to the current
367 		 * address, so try to pop that. Also, do not expect a call made
368 		 * when the trace ended, to return, so pop that.
369 		 */
370 		thread_stack__pop(ts, to_ip);
371 		thread_stack__pop_trace_end(ts);
372 	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
373 		thread_stack__pop(ts, to_ip);
374 	}
375 
376 	return 0;
377 }
378 
379 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
380 {
381 	struct thread_stack *ts = thread__stack(thread, cpu);
382 
383 	if (!ts)
384 		return;
385 
386 	if (trace_nr != ts->trace_nr) {
387 		if (ts->trace_nr)
388 			__thread_stack__flush(thread, ts);
389 		ts->trace_nr = trace_nr;
390 	}
391 }
392 
393 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
394 {
395 	__thread_stack__flush(thread, ts);
396 	zfree(&ts->stack);
397 }
398 
399 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
400 {
401 	unsigned int arr_sz = ts->arr_sz;
402 
403 	__thread_stack__free(thread, ts);
404 	memset(ts, 0, sizeof(*ts));
405 	ts->arr_sz = arr_sz;
406 }
407 
408 void thread_stack__free(struct thread *thread)
409 {
410 	struct thread_stack *ts = thread->ts;
411 	unsigned int pos;
412 
413 	if (ts) {
414 		for (pos = 0; pos < ts->arr_sz; pos++)
415 			__thread_stack__free(thread, ts + pos);
416 		zfree(&thread->ts);
417 	}
418 }
419 
420 static inline u64 callchain_context(u64 ip, u64 kernel_start)
421 {
422 	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
423 }
424 
425 void thread_stack__sample(struct thread *thread, int cpu,
426 			  struct ip_callchain *chain,
427 			  size_t sz, u64 ip, u64 kernel_start)
428 {
429 	struct thread_stack *ts = thread__stack(thread, cpu);
430 	u64 context = callchain_context(ip, kernel_start);
431 	u64 last_context;
432 	size_t i, j;
433 
434 	if (sz < 2) {
435 		chain->nr = 0;
436 		return;
437 	}
438 
439 	chain->ips[0] = context;
440 	chain->ips[1] = ip;
441 
442 	if (!ts) {
443 		chain->nr = 2;
444 		return;
445 	}
446 
447 	last_context = context;
448 
449 	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
450 		ip = ts->stack[ts->cnt - j].ret_addr;
451 		context = callchain_context(ip, kernel_start);
452 		if (context != last_context) {
453 			if (i >= sz - 1)
454 				break;
455 			chain->ips[i++] = context;
456 			last_context = context;
457 		}
458 		chain->ips[i] = ip;
459 	}
460 
461 	chain->nr = i;
462 }
463 
464 struct call_return_processor *
465 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
466 			   void *data)
467 {
468 	struct call_return_processor *crp;
469 
470 	crp = zalloc(sizeof(struct call_return_processor));
471 	if (!crp)
472 		return NULL;
473 	crp->cpr = call_path_root__new();
474 	if (!crp->cpr)
475 		goto out_free;
476 	crp->process = process;
477 	crp->data = data;
478 	return crp;
479 
480 out_free:
481 	free(crp);
482 	return NULL;
483 }
484 
485 void call_return_processor__free(struct call_return_processor *crp)
486 {
487 	if (crp) {
488 		call_path_root__free(crp->cpr);
489 		free(crp);
490 	}
491 }
492 
493 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
494 				 u64 timestamp, u64 ref, struct call_path *cp,
495 				 bool no_call, bool trace_end)
496 {
497 	struct thread_stack_entry *tse;
498 	int err;
499 
500 	if (!cp)
501 		return -ENOMEM;
502 
503 	if (ts->cnt == ts->sz) {
504 		err = thread_stack__grow(ts);
505 		if (err)
506 			return err;
507 	}
508 
509 	tse = &ts->stack[ts->cnt++];
510 	tse->ret_addr = ret_addr;
511 	tse->timestamp = timestamp;
512 	tse->ref = ref;
513 	tse->branch_count = ts->branch_count;
514 	tse->cp = cp;
515 	tse->no_call = no_call;
516 	tse->trace_end = trace_end;
517 	tse->non_call = false;
518 
519 	return 0;
520 }
521 
522 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
523 				u64 ret_addr, u64 timestamp, u64 ref,
524 				struct symbol *sym)
525 {
526 	int err;
527 
528 	if (!ts->cnt)
529 		return 1;
530 
531 	if (ts->cnt == 1) {
532 		struct thread_stack_entry *tse = &ts->stack[0];
533 
534 		if (tse->cp->sym == sym)
535 			return thread_stack__call_return(thread, ts, --ts->cnt,
536 							 timestamp, ref, false);
537 	}
538 
539 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
540 	    !ts->stack[ts->cnt - 1].non_call) {
541 		return thread_stack__call_return(thread, ts, --ts->cnt,
542 						 timestamp, ref, false);
543 	} else {
544 		size_t i = ts->cnt - 1;
545 
546 		while (i--) {
547 			if (ts->stack[i].ret_addr != ret_addr ||
548 			    ts->stack[i].non_call)
549 				continue;
550 			i += 1;
551 			while (ts->cnt > i) {
552 				err = thread_stack__call_return(thread, ts,
553 								--ts->cnt,
554 								timestamp, ref,
555 								true);
556 				if (err)
557 					return err;
558 			}
559 			return thread_stack__call_return(thread, ts, --ts->cnt,
560 							 timestamp, ref, false);
561 		}
562 	}
563 
564 	return 1;
565 }
566 
567 static int thread_stack__bottom(struct thread_stack *ts,
568 				struct perf_sample *sample,
569 				struct addr_location *from_al,
570 				struct addr_location *to_al, u64 ref)
571 {
572 	struct call_path_root *cpr = ts->crp->cpr;
573 	struct call_path *cp;
574 	struct symbol *sym;
575 	u64 ip;
576 
577 	if (sample->ip) {
578 		ip = sample->ip;
579 		sym = from_al->sym;
580 	} else if (sample->addr) {
581 		ip = sample->addr;
582 		sym = to_al->sym;
583 	} else {
584 		return 0;
585 	}
586 
587 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
588 				ts->kernel_start);
589 
590 	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
591 				     true, false);
592 }
593 
594 static int thread_stack__no_call_return(struct thread *thread,
595 					struct thread_stack *ts,
596 					struct perf_sample *sample,
597 					struct addr_location *from_al,
598 					struct addr_location *to_al, u64 ref)
599 {
600 	struct call_path_root *cpr = ts->crp->cpr;
601 	struct call_path *root = &cpr->call_path;
602 	struct symbol *fsym = from_al->sym;
603 	struct symbol *tsym = to_al->sym;
604 	struct call_path *cp, *parent;
605 	u64 ks = ts->kernel_start;
606 	u64 addr = sample->addr;
607 	u64 tm = sample->time;
608 	u64 ip = sample->ip;
609 	int err;
610 
611 	if (ip >= ks && addr < ks) {
612 		/* Return to userspace, so pop all kernel addresses */
613 		while (thread_stack__in_kernel(ts)) {
614 			err = thread_stack__call_return(thread, ts, --ts->cnt,
615 							tm, ref, true);
616 			if (err)
617 				return err;
618 		}
619 
620 		/* If the stack is empty, push the userspace address */
621 		if (!ts->cnt) {
622 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
623 			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
624 						     false);
625 		}
626 	} else if (thread_stack__in_kernel(ts) && ip < ks) {
627 		/* Return to userspace, so pop all kernel addresses */
628 		while (thread_stack__in_kernel(ts)) {
629 			err = thread_stack__call_return(thread, ts, --ts->cnt,
630 							tm, ref, true);
631 			if (err)
632 				return err;
633 		}
634 	}
635 
636 	if (ts->cnt)
637 		parent = ts->stack[ts->cnt - 1].cp;
638 	else
639 		parent = root;
640 
641 	/* This 'return' had no 'call', so push and pop top of stack */
642 	cp = call_path__findnew(cpr, parent, fsym, ip, ks);
643 
644 	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
645 	if (err)
646 		return err;
647 
648 	return thread_stack__pop_cp(thread, ts, addr, tm, ref, tsym);
649 }
650 
651 static int thread_stack__trace_begin(struct thread *thread,
652 				     struct thread_stack *ts, u64 timestamp,
653 				     u64 ref)
654 {
655 	struct thread_stack_entry *tse;
656 	int err;
657 
658 	if (!ts->cnt)
659 		return 0;
660 
661 	/* Pop trace end */
662 	tse = &ts->stack[ts->cnt - 1];
663 	if (tse->trace_end) {
664 		err = thread_stack__call_return(thread, ts, --ts->cnt,
665 						timestamp, ref, false);
666 		if (err)
667 			return err;
668 	}
669 
670 	return 0;
671 }
672 
673 static int thread_stack__trace_end(struct thread_stack *ts,
674 				   struct perf_sample *sample, u64 ref)
675 {
676 	struct call_path_root *cpr = ts->crp->cpr;
677 	struct call_path *cp;
678 	u64 ret_addr;
679 
680 	/* No point having 'trace end' on the bottom of the stack */
681 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
682 		return 0;
683 
684 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
685 				ts->kernel_start);
686 
687 	ret_addr = sample->ip + sample->insn_len;
688 
689 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
690 				     false, true);
691 }
692 
693 int thread_stack__process(struct thread *thread, struct comm *comm,
694 			  struct perf_sample *sample,
695 			  struct addr_location *from_al,
696 			  struct addr_location *to_al, u64 ref,
697 			  struct call_return_processor *crp)
698 {
699 	struct thread_stack *ts = thread__stack(thread, sample->cpu);
700 	int err = 0;
701 
702 	if (ts && !ts->crp) {
703 		/* Supersede thread_stack__event() */
704 		thread_stack__reset(thread, ts);
705 		ts = NULL;
706 	}
707 
708 	if (!ts) {
709 		ts = thread_stack__new(thread, sample->cpu, crp);
710 		if (!ts)
711 			return -ENOMEM;
712 		ts->comm = comm;
713 	}
714 
715 	/* Flush stack on exec */
716 	if (ts->comm != comm && thread->pid_ == thread->tid) {
717 		err = __thread_stack__flush(thread, ts);
718 		if (err)
719 			return err;
720 		ts->comm = comm;
721 	}
722 
723 	/* If the stack is empty, put the current symbol on the stack */
724 	if (!ts->cnt) {
725 		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
726 		if (err)
727 			return err;
728 	}
729 
730 	ts->branch_count += 1;
731 	ts->last_time = sample->time;
732 
733 	if (sample->flags & PERF_IP_FLAG_CALL) {
734 		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
735 		struct call_path_root *cpr = ts->crp->cpr;
736 		struct call_path *cp;
737 		u64 ret_addr;
738 
739 		if (!sample->ip || !sample->addr)
740 			return 0;
741 
742 		ret_addr = sample->ip + sample->insn_len;
743 		if (ret_addr == sample->addr)
744 			return 0; /* Zero-length calls are excluded */
745 
746 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
747 					to_al->sym, sample->addr,
748 					ts->kernel_start);
749 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
750 					    cp, false, trace_end);
751 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
752 		if (!sample->ip || !sample->addr)
753 			return 0;
754 
755 		err = thread_stack__pop_cp(thread, ts, sample->addr,
756 					   sample->time, ref, from_al->sym);
757 		if (err) {
758 			if (err < 0)
759 				return err;
760 			err = thread_stack__no_call_return(thread, ts, sample,
761 							   from_al, to_al, ref);
762 		}
763 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
764 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
765 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
766 		err = thread_stack__trace_end(ts, sample, ref);
767 	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
768 		   from_al->sym != to_al->sym && to_al->sym &&
769 		   to_al->addr == to_al->sym->start) {
770 		struct call_path_root *cpr = ts->crp->cpr;
771 		struct call_path *cp;
772 
773 		/*
774 		 * The compiler might optimize a call/ret combination by making
775 		 * it a jmp. Make that visible by recording on the stack a
776 		 * branch to the start of a different symbol. Note, that means
777 		 * when a ret pops the stack, all jmps must be popped off first.
778 		 */
779 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
780 					to_al->sym, sample->addr,
781 					ts->kernel_start);
782 		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
783 					    false);
784 		if (!err)
785 			ts->stack[ts->cnt - 1].non_call = true;
786 	}
787 
788 	return err;
789 }
790 
791 size_t thread_stack__depth(struct thread *thread, int cpu)
792 {
793 	struct thread_stack *ts = thread__stack(thread, cpu);
794 
795 	if (!ts)
796 		return 0;
797 	return ts->cnt;
798 }
799