1 /* 2 * thread-stack.c: Synthesize a thread's stack using call / return events 3 * Copyright (c) 2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 */ 15 16 #include <linux/rbtree.h> 17 #include <linux/list.h> 18 #include <errno.h> 19 #include "thread.h" 20 #include "event.h" 21 #include "machine.h" 22 #include "util.h" 23 #include "debug.h" 24 #include "symbol.h" 25 #include "comm.h" 26 #include "call-path.h" 27 #include "thread-stack.h" 28 29 #define STACK_GROWTH 2048 30 31 /** 32 * struct thread_stack_entry - thread stack entry. 33 * @ret_addr: return address 34 * @timestamp: timestamp (if known) 35 * @ref: external reference (e.g. db_id of sample) 36 * @branch_count: the branch count when the entry was created 37 * @cp: call path 38 * @no_call: a 'call' was not seen 39 * @trace_end: a 'call' but trace ended 40 */ 41 struct thread_stack_entry { 42 u64 ret_addr; 43 u64 timestamp; 44 u64 ref; 45 u64 branch_count; 46 struct call_path *cp; 47 bool no_call; 48 bool trace_end; 49 }; 50 51 /** 52 * struct thread_stack - thread stack constructed from 'call' and 'return' 53 * branch samples. 54 * @stack: array that holds the stack 55 * @cnt: number of entries in the stack 56 * @sz: current maximum stack size 57 * @trace_nr: current trace number 58 * @branch_count: running branch count 59 * @kernel_start: kernel start address 60 * @last_time: last timestamp 61 * @crp: call/return processor 62 * @comm: current comm 63 * @arr_sz: size of array if this is the first element of an array 64 */ 65 struct thread_stack { 66 struct thread_stack_entry *stack; 67 size_t cnt; 68 size_t sz; 69 u64 trace_nr; 70 u64 branch_count; 71 u64 kernel_start; 72 u64 last_time; 73 struct call_return_processor *crp; 74 struct comm *comm; 75 unsigned int arr_sz; 76 }; 77 78 static int thread_stack__grow(struct thread_stack *ts) 79 { 80 struct thread_stack_entry *new_stack; 81 size_t sz, new_sz; 82 83 new_sz = ts->sz + STACK_GROWTH; 84 sz = new_sz * sizeof(struct thread_stack_entry); 85 86 new_stack = realloc(ts->stack, sz); 87 if (!new_stack) 88 return -ENOMEM; 89 90 ts->stack = new_stack; 91 ts->sz = new_sz; 92 93 return 0; 94 } 95 96 static int thread_stack__init(struct thread_stack *ts, struct thread *thread, 97 struct call_return_processor *crp) 98 { 99 int err; 100 101 err = thread_stack__grow(ts); 102 if (err) 103 return err; 104 105 if (thread->mg && thread->mg->machine) 106 ts->kernel_start = machine__kernel_start(thread->mg->machine); 107 else 108 ts->kernel_start = 1ULL << 63; 109 ts->crp = crp; 110 111 return 0; 112 } 113 114 static struct thread_stack *thread_stack__new(struct thread *thread, 115 struct call_return_processor *crp) 116 { 117 struct thread_stack *ts; 118 119 ts = zalloc(sizeof(struct thread_stack)); 120 if (!ts) 121 return NULL; 122 123 ts->arr_sz = 1; 124 125 if (thread_stack__init(ts, thread, crp)) { 126 free(ts); 127 return NULL; 128 } 129 130 thread->ts = ts; 131 132 return ts; 133 } 134 135 static inline struct thread_stack *thread__stack(struct thread *thread) 136 { 137 return thread ? thread->ts : NULL; 138 } 139 140 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 141 bool trace_end) 142 { 143 int err = 0; 144 145 if (ts->cnt == ts->sz) { 146 err = thread_stack__grow(ts); 147 if (err) { 148 pr_warning("Out of memory: discarding thread stack\n"); 149 ts->cnt = 0; 150 } 151 } 152 153 ts->stack[ts->cnt].trace_end = trace_end; 154 ts->stack[ts->cnt++].ret_addr = ret_addr; 155 156 return err; 157 } 158 159 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 160 { 161 size_t i; 162 163 /* 164 * In some cases there may be functions which are not seen to return. 165 * For example when setjmp / longjmp has been used. Or the perf context 166 * switch in the kernel which doesn't stop and start tracing in exactly 167 * the same code path. When that happens the return address will be 168 * further down the stack. If the return address is not found at all, 169 * we assume the opposite (i.e. this is a return for a call that wasn't 170 * seen for some reason) and leave the stack alone. 171 */ 172 for (i = ts->cnt; i; ) { 173 if (ts->stack[--i].ret_addr == ret_addr) { 174 ts->cnt = i; 175 return; 176 } 177 } 178 } 179 180 static void thread_stack__pop_trace_end(struct thread_stack *ts) 181 { 182 size_t i; 183 184 for (i = ts->cnt; i; ) { 185 if (ts->stack[--i].trace_end) 186 ts->cnt = i; 187 else 188 return; 189 } 190 } 191 192 static bool thread_stack__in_kernel(struct thread_stack *ts) 193 { 194 if (!ts->cnt) 195 return false; 196 197 return ts->stack[ts->cnt - 1].cp->in_kernel; 198 } 199 200 static int thread_stack__call_return(struct thread *thread, 201 struct thread_stack *ts, size_t idx, 202 u64 timestamp, u64 ref, bool no_return) 203 { 204 struct call_return_processor *crp = ts->crp; 205 struct thread_stack_entry *tse; 206 struct call_return cr = { 207 .thread = thread, 208 .comm = ts->comm, 209 .db_id = 0, 210 }; 211 212 tse = &ts->stack[idx]; 213 cr.cp = tse->cp; 214 cr.call_time = tse->timestamp; 215 cr.return_time = timestamp; 216 cr.branch_count = ts->branch_count - tse->branch_count; 217 cr.call_ref = tse->ref; 218 cr.return_ref = ref; 219 if (tse->no_call) 220 cr.flags |= CALL_RETURN_NO_CALL; 221 if (no_return) 222 cr.flags |= CALL_RETURN_NO_RETURN; 223 224 return crp->process(&cr, crp->data); 225 } 226 227 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 228 { 229 struct call_return_processor *crp = ts->crp; 230 int err; 231 232 if (!crp) { 233 ts->cnt = 0; 234 return 0; 235 } 236 237 while (ts->cnt) { 238 err = thread_stack__call_return(thread, ts, --ts->cnt, 239 ts->last_time, 0, true); 240 if (err) { 241 pr_err("Error flushing thread stack!\n"); 242 ts->cnt = 0; 243 return err; 244 } 245 } 246 247 return 0; 248 } 249 250 int thread_stack__flush(struct thread *thread) 251 { 252 struct thread_stack *ts = thread->ts; 253 unsigned int pos; 254 int err = 0; 255 256 if (ts) { 257 for (pos = 0; pos < ts->arr_sz; pos++) { 258 int ret = __thread_stack__flush(thread, ts + pos); 259 260 if (ret) 261 err = ret; 262 } 263 } 264 265 return err; 266 } 267 268 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 269 u64 to_ip, u16 insn_len, u64 trace_nr) 270 { 271 struct thread_stack *ts = thread__stack(thread); 272 273 if (!thread) 274 return -EINVAL; 275 276 if (!ts) { 277 ts = thread_stack__new(thread, NULL); 278 if (!ts) { 279 pr_warning("Out of memory: no thread stack\n"); 280 return -ENOMEM; 281 } 282 ts->trace_nr = trace_nr; 283 } 284 285 /* 286 * When the trace is discontinuous, the trace_nr changes. In that case 287 * the stack might be completely invalid. Better to report nothing than 288 * to report something misleading, so flush the stack. 289 */ 290 if (trace_nr != ts->trace_nr) { 291 if (ts->trace_nr) 292 __thread_stack__flush(thread, ts); 293 ts->trace_nr = trace_nr; 294 } 295 296 /* Stop here if thread_stack__process() is in use */ 297 if (ts->crp) 298 return 0; 299 300 if (flags & PERF_IP_FLAG_CALL) { 301 u64 ret_addr; 302 303 if (!to_ip) 304 return 0; 305 ret_addr = from_ip + insn_len; 306 if (ret_addr == to_ip) 307 return 0; /* Zero-length calls are excluded */ 308 return thread_stack__push(ts, ret_addr, 309 flags & PERF_IP_FLAG_TRACE_END); 310 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { 311 /* 312 * If the caller did not change the trace number (which would 313 * have flushed the stack) then try to make sense of the stack. 314 * Possibly, tracing began after returning to the current 315 * address, so try to pop that. Also, do not expect a call made 316 * when the trace ended, to return, so pop that. 317 */ 318 thread_stack__pop(ts, to_ip); 319 thread_stack__pop_trace_end(ts); 320 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { 321 thread_stack__pop(ts, to_ip); 322 } 323 324 return 0; 325 } 326 327 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) 328 { 329 struct thread_stack *ts = thread__stack(thread); 330 331 if (!ts) 332 return; 333 334 if (trace_nr != ts->trace_nr) { 335 if (ts->trace_nr) 336 __thread_stack__flush(thread, ts); 337 ts->trace_nr = trace_nr; 338 } 339 } 340 341 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) 342 { 343 __thread_stack__flush(thread, ts); 344 zfree(&ts->stack); 345 } 346 347 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) 348 { 349 unsigned int arr_sz = ts->arr_sz; 350 351 __thread_stack__free(thread, ts); 352 memset(ts, 0, sizeof(*ts)); 353 ts->arr_sz = arr_sz; 354 } 355 356 void thread_stack__free(struct thread *thread) 357 { 358 struct thread_stack *ts = thread->ts; 359 unsigned int pos; 360 361 if (ts) { 362 for (pos = 0; pos < ts->arr_sz; pos++) 363 __thread_stack__free(thread, ts + pos); 364 zfree(&thread->ts); 365 } 366 } 367 368 static inline u64 callchain_context(u64 ip, u64 kernel_start) 369 { 370 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 371 } 372 373 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 374 size_t sz, u64 ip, u64 kernel_start) 375 { 376 struct thread_stack *ts = thread__stack(thread); 377 u64 context = callchain_context(ip, kernel_start); 378 u64 last_context; 379 size_t i, j; 380 381 if (sz < 2) { 382 chain->nr = 0; 383 return; 384 } 385 386 chain->ips[0] = context; 387 chain->ips[1] = ip; 388 389 if (!ts) { 390 chain->nr = 2; 391 return; 392 } 393 394 last_context = context; 395 396 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { 397 ip = ts->stack[ts->cnt - j].ret_addr; 398 context = callchain_context(ip, kernel_start); 399 if (context != last_context) { 400 if (i >= sz - 1) 401 break; 402 chain->ips[i++] = context; 403 last_context = context; 404 } 405 chain->ips[i] = ip; 406 } 407 408 chain->nr = i; 409 } 410 411 struct call_return_processor * 412 call_return_processor__new(int (*process)(struct call_return *cr, void *data), 413 void *data) 414 { 415 struct call_return_processor *crp; 416 417 crp = zalloc(sizeof(struct call_return_processor)); 418 if (!crp) 419 return NULL; 420 crp->cpr = call_path_root__new(); 421 if (!crp->cpr) 422 goto out_free; 423 crp->process = process; 424 crp->data = data; 425 return crp; 426 427 out_free: 428 free(crp); 429 return NULL; 430 } 431 432 void call_return_processor__free(struct call_return_processor *crp) 433 { 434 if (crp) { 435 call_path_root__free(crp->cpr); 436 free(crp); 437 } 438 } 439 440 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 441 u64 timestamp, u64 ref, struct call_path *cp, 442 bool no_call, bool trace_end) 443 { 444 struct thread_stack_entry *tse; 445 int err; 446 447 if (ts->cnt == ts->sz) { 448 err = thread_stack__grow(ts); 449 if (err) 450 return err; 451 } 452 453 tse = &ts->stack[ts->cnt++]; 454 tse->ret_addr = ret_addr; 455 tse->timestamp = timestamp; 456 tse->ref = ref; 457 tse->branch_count = ts->branch_count; 458 tse->cp = cp; 459 tse->no_call = no_call; 460 tse->trace_end = trace_end; 461 462 return 0; 463 } 464 465 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 466 u64 ret_addr, u64 timestamp, u64 ref, 467 struct symbol *sym) 468 { 469 int err; 470 471 if (!ts->cnt) 472 return 1; 473 474 if (ts->cnt == 1) { 475 struct thread_stack_entry *tse = &ts->stack[0]; 476 477 if (tse->cp->sym == sym) 478 return thread_stack__call_return(thread, ts, --ts->cnt, 479 timestamp, ref, false); 480 } 481 482 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { 483 return thread_stack__call_return(thread, ts, --ts->cnt, 484 timestamp, ref, false); 485 } else { 486 size_t i = ts->cnt - 1; 487 488 while (i--) { 489 if (ts->stack[i].ret_addr != ret_addr) 490 continue; 491 i += 1; 492 while (ts->cnt > i) { 493 err = thread_stack__call_return(thread, ts, 494 --ts->cnt, 495 timestamp, ref, 496 true); 497 if (err) 498 return err; 499 } 500 return thread_stack__call_return(thread, ts, --ts->cnt, 501 timestamp, ref, false); 502 } 503 } 504 505 return 1; 506 } 507 508 static int thread_stack__bottom(struct thread_stack *ts, 509 struct perf_sample *sample, 510 struct addr_location *from_al, 511 struct addr_location *to_al, u64 ref) 512 { 513 struct call_path_root *cpr = ts->crp->cpr; 514 struct call_path *cp; 515 struct symbol *sym; 516 u64 ip; 517 518 if (sample->ip) { 519 ip = sample->ip; 520 sym = from_al->sym; 521 } else if (sample->addr) { 522 ip = sample->addr; 523 sym = to_al->sym; 524 } else { 525 return 0; 526 } 527 528 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 529 ts->kernel_start); 530 if (!cp) 531 return -ENOMEM; 532 533 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 534 true, false); 535 } 536 537 static int thread_stack__no_call_return(struct thread *thread, 538 struct thread_stack *ts, 539 struct perf_sample *sample, 540 struct addr_location *from_al, 541 struct addr_location *to_al, u64 ref) 542 { 543 struct call_path_root *cpr = ts->crp->cpr; 544 struct call_path *cp, *parent; 545 u64 ks = ts->kernel_start; 546 int err; 547 548 if (sample->ip >= ks && sample->addr < ks) { 549 /* Return to userspace, so pop all kernel addresses */ 550 while (thread_stack__in_kernel(ts)) { 551 err = thread_stack__call_return(thread, ts, --ts->cnt, 552 sample->time, ref, 553 true); 554 if (err) 555 return err; 556 } 557 558 /* If the stack is empty, push the userspace address */ 559 if (!ts->cnt) { 560 cp = call_path__findnew(cpr, &cpr->call_path, 561 to_al->sym, sample->addr, 562 ts->kernel_start); 563 if (!cp) 564 return -ENOMEM; 565 return thread_stack__push_cp(ts, 0, sample->time, ref, 566 cp, true, false); 567 } 568 } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { 569 /* Return to userspace, so pop all kernel addresses */ 570 while (thread_stack__in_kernel(ts)) { 571 err = thread_stack__call_return(thread, ts, --ts->cnt, 572 sample->time, ref, 573 true); 574 if (err) 575 return err; 576 } 577 } 578 579 if (ts->cnt) 580 parent = ts->stack[ts->cnt - 1].cp; 581 else 582 parent = &cpr->call_path; 583 584 /* This 'return' had no 'call', so push and pop top of stack */ 585 cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, 586 ts->kernel_start); 587 if (!cp) 588 return -ENOMEM; 589 590 err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, 591 true, false); 592 if (err) 593 return err; 594 595 return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, 596 to_al->sym); 597 } 598 599 static int thread_stack__trace_begin(struct thread *thread, 600 struct thread_stack *ts, u64 timestamp, 601 u64 ref) 602 { 603 struct thread_stack_entry *tse; 604 int err; 605 606 if (!ts->cnt) 607 return 0; 608 609 /* Pop trace end */ 610 tse = &ts->stack[ts->cnt - 1]; 611 if (tse->trace_end) { 612 err = thread_stack__call_return(thread, ts, --ts->cnt, 613 timestamp, ref, false); 614 if (err) 615 return err; 616 } 617 618 return 0; 619 } 620 621 static int thread_stack__trace_end(struct thread_stack *ts, 622 struct perf_sample *sample, u64 ref) 623 { 624 struct call_path_root *cpr = ts->crp->cpr; 625 struct call_path *cp; 626 u64 ret_addr; 627 628 /* No point having 'trace end' on the bottom of the stack */ 629 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 630 return 0; 631 632 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 633 ts->kernel_start); 634 if (!cp) 635 return -ENOMEM; 636 637 ret_addr = sample->ip + sample->insn_len; 638 639 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 640 false, true); 641 } 642 643 int thread_stack__process(struct thread *thread, struct comm *comm, 644 struct perf_sample *sample, 645 struct addr_location *from_al, 646 struct addr_location *to_al, u64 ref, 647 struct call_return_processor *crp) 648 { 649 struct thread_stack *ts = thread__stack(thread); 650 int err = 0; 651 652 if (ts && !ts->crp) { 653 /* Supersede thread_stack__event() */ 654 thread_stack__reset(thread, ts); 655 ts = NULL; 656 } 657 658 if (!ts) { 659 ts = thread_stack__new(thread, crp); 660 if (!ts) 661 return -ENOMEM; 662 ts->comm = comm; 663 } 664 665 /* Flush stack on exec */ 666 if (ts->comm != comm && thread->pid_ == thread->tid) { 667 err = __thread_stack__flush(thread, ts); 668 if (err) 669 return err; 670 ts->comm = comm; 671 } 672 673 /* If the stack is empty, put the current symbol on the stack */ 674 if (!ts->cnt) { 675 err = thread_stack__bottom(ts, sample, from_al, to_al, ref); 676 if (err) 677 return err; 678 } 679 680 ts->branch_count += 1; 681 ts->last_time = sample->time; 682 683 if (sample->flags & PERF_IP_FLAG_CALL) { 684 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; 685 struct call_path_root *cpr = ts->crp->cpr; 686 struct call_path *cp; 687 u64 ret_addr; 688 689 if (!sample->ip || !sample->addr) 690 return 0; 691 692 ret_addr = sample->ip + sample->insn_len; 693 if (ret_addr == sample->addr) 694 return 0; /* Zero-length calls are excluded */ 695 696 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 697 to_al->sym, sample->addr, 698 ts->kernel_start); 699 if (!cp) 700 return -ENOMEM; 701 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 702 cp, false, trace_end); 703 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 704 if (!sample->ip || !sample->addr) 705 return 0; 706 707 err = thread_stack__pop_cp(thread, ts, sample->addr, 708 sample->time, ref, from_al->sym); 709 if (err) { 710 if (err < 0) 711 return err; 712 err = thread_stack__no_call_return(thread, ts, sample, 713 from_al, to_al, ref); 714 } 715 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 716 err = thread_stack__trace_begin(thread, ts, sample->time, ref); 717 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 718 err = thread_stack__trace_end(ts, sample, ref); 719 } 720 721 return err; 722 } 723 724 size_t thread_stack__depth(struct thread *thread) 725 { 726 struct thread_stack *ts = thread__stack(thread); 727 728 if (!ts) 729 return 0; 730 return ts->cnt; 731 } 732