1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * thread-stack.c: Synthesize a thread's stack using call / return events 4 * Copyright (c) 2014, Intel Corporation. 5 */ 6 7 #include <linux/rbtree.h> 8 #include <linux/list.h> 9 #include <linux/log2.h> 10 #include <errno.h> 11 #include "thread.h" 12 #include "event.h" 13 #include "machine.h" 14 #include "env.h" 15 #include "util.h" 16 #include "debug.h" 17 #include "symbol.h" 18 #include "comm.h" 19 #include "call-path.h" 20 #include "thread-stack.h" 21 22 #define STACK_GROWTH 2048 23 24 /* 25 * State of retpoline detection. 26 * 27 * RETPOLINE_NONE: no retpoline detection 28 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible 29 * X86_RETPOLINE_DETECTED: x86 retpoline detected 30 */ 31 enum retpoline_state_t { 32 RETPOLINE_NONE, 33 X86_RETPOLINE_POSSIBLE, 34 X86_RETPOLINE_DETECTED, 35 }; 36 37 /** 38 * struct thread_stack_entry - thread stack entry. 39 * @ret_addr: return address 40 * @timestamp: timestamp (if known) 41 * @ref: external reference (e.g. db_id of sample) 42 * @branch_count: the branch count when the entry was created 43 * @db_id: id used for db-export 44 * @cp: call path 45 * @no_call: a 'call' was not seen 46 * @trace_end: a 'call' but trace ended 47 * @non_call: a branch but not a 'call' to the start of a different symbol 48 */ 49 struct thread_stack_entry { 50 u64 ret_addr; 51 u64 timestamp; 52 u64 ref; 53 u64 branch_count; 54 u64 db_id; 55 struct call_path *cp; 56 bool no_call; 57 bool trace_end; 58 bool non_call; 59 }; 60 61 /** 62 * struct thread_stack - thread stack constructed from 'call' and 'return' 63 * branch samples. 64 * @stack: array that holds the stack 65 * @cnt: number of entries in the stack 66 * @sz: current maximum stack size 67 * @trace_nr: current trace number 68 * @branch_count: running branch count 69 * @kernel_start: kernel start address 70 * @last_time: last timestamp 71 * @crp: call/return processor 72 * @comm: current comm 73 * @arr_sz: size of array if this is the first element of an array 74 * @rstate: used to detect retpolines 75 */ 76 struct thread_stack { 77 struct thread_stack_entry *stack; 78 size_t cnt; 79 size_t sz; 80 u64 trace_nr; 81 u64 branch_count; 82 u64 kernel_start; 83 u64 last_time; 84 struct call_return_processor *crp; 85 struct comm *comm; 86 unsigned int arr_sz; 87 enum retpoline_state_t rstate; 88 }; 89 90 /* 91 * Assume pid == tid == 0 identifies the idle task as defined by 92 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, 93 * and therefore requires a stack for each cpu. 94 */ 95 static inline bool thread_stack__per_cpu(struct thread *thread) 96 { 97 return !(thread->tid || thread->pid_); 98 } 99 100 static int thread_stack__grow(struct thread_stack *ts) 101 { 102 struct thread_stack_entry *new_stack; 103 size_t sz, new_sz; 104 105 new_sz = ts->sz + STACK_GROWTH; 106 sz = new_sz * sizeof(struct thread_stack_entry); 107 108 new_stack = realloc(ts->stack, sz); 109 if (!new_stack) 110 return -ENOMEM; 111 112 ts->stack = new_stack; 113 ts->sz = new_sz; 114 115 return 0; 116 } 117 118 static int thread_stack__init(struct thread_stack *ts, struct thread *thread, 119 struct call_return_processor *crp) 120 { 121 int err; 122 123 err = thread_stack__grow(ts); 124 if (err) 125 return err; 126 127 if (thread->mg && thread->mg->machine) { 128 struct machine *machine = thread->mg->machine; 129 const char *arch = perf_env__arch(machine->env); 130 131 ts->kernel_start = machine__kernel_start(machine); 132 if (!strcmp(arch, "x86")) 133 ts->rstate = X86_RETPOLINE_POSSIBLE; 134 } else { 135 ts->kernel_start = 1ULL << 63; 136 } 137 ts->crp = crp; 138 139 return 0; 140 } 141 142 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, 143 struct call_return_processor *crp) 144 { 145 struct thread_stack *ts = thread->ts, *new_ts; 146 unsigned int old_sz = ts ? ts->arr_sz : 0; 147 unsigned int new_sz = 1; 148 149 if (thread_stack__per_cpu(thread) && cpu > 0) 150 new_sz = roundup_pow_of_two(cpu + 1); 151 152 if (!ts || new_sz > old_sz) { 153 new_ts = calloc(new_sz, sizeof(*ts)); 154 if (!new_ts) 155 return NULL; 156 if (ts) 157 memcpy(new_ts, ts, old_sz * sizeof(*ts)); 158 new_ts->arr_sz = new_sz; 159 zfree(&thread->ts); 160 thread->ts = new_ts; 161 ts = new_ts; 162 } 163 164 if (thread_stack__per_cpu(thread) && cpu > 0 && 165 (unsigned int)cpu < ts->arr_sz) 166 ts += cpu; 167 168 if (!ts->stack && 169 thread_stack__init(ts, thread, crp)) 170 return NULL; 171 172 return ts; 173 } 174 175 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) 176 { 177 struct thread_stack *ts = thread->ts; 178 179 if (cpu < 0) 180 cpu = 0; 181 182 if (!ts || (unsigned int)cpu >= ts->arr_sz) 183 return NULL; 184 185 ts += cpu; 186 187 if (!ts->stack) 188 return NULL; 189 190 return ts; 191 } 192 193 static inline struct thread_stack *thread__stack(struct thread *thread, 194 int cpu) 195 { 196 if (!thread) 197 return NULL; 198 199 if (thread_stack__per_cpu(thread)) 200 return thread__cpu_stack(thread, cpu); 201 202 return thread->ts; 203 } 204 205 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 206 bool trace_end) 207 { 208 int err = 0; 209 210 if (ts->cnt == ts->sz) { 211 err = thread_stack__grow(ts); 212 if (err) { 213 pr_warning("Out of memory: discarding thread stack\n"); 214 ts->cnt = 0; 215 } 216 } 217 218 ts->stack[ts->cnt].trace_end = trace_end; 219 ts->stack[ts->cnt++].ret_addr = ret_addr; 220 221 return err; 222 } 223 224 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 225 { 226 size_t i; 227 228 /* 229 * In some cases there may be functions which are not seen to return. 230 * For example when setjmp / longjmp has been used. Or the perf context 231 * switch in the kernel which doesn't stop and start tracing in exactly 232 * the same code path. When that happens the return address will be 233 * further down the stack. If the return address is not found at all, 234 * we assume the opposite (i.e. this is a return for a call that wasn't 235 * seen for some reason) and leave the stack alone. 236 */ 237 for (i = ts->cnt; i; ) { 238 if (ts->stack[--i].ret_addr == ret_addr) { 239 ts->cnt = i; 240 return; 241 } 242 } 243 } 244 245 static void thread_stack__pop_trace_end(struct thread_stack *ts) 246 { 247 size_t i; 248 249 for (i = ts->cnt; i; ) { 250 if (ts->stack[--i].trace_end) 251 ts->cnt = i; 252 else 253 return; 254 } 255 } 256 257 static bool thread_stack__in_kernel(struct thread_stack *ts) 258 { 259 if (!ts->cnt) 260 return false; 261 262 return ts->stack[ts->cnt - 1].cp->in_kernel; 263 } 264 265 static int thread_stack__call_return(struct thread *thread, 266 struct thread_stack *ts, size_t idx, 267 u64 timestamp, u64 ref, bool no_return) 268 { 269 struct call_return_processor *crp = ts->crp; 270 struct thread_stack_entry *tse; 271 struct call_return cr = { 272 .thread = thread, 273 .comm = ts->comm, 274 .db_id = 0, 275 }; 276 u64 *parent_db_id; 277 278 tse = &ts->stack[idx]; 279 cr.cp = tse->cp; 280 cr.call_time = tse->timestamp; 281 cr.return_time = timestamp; 282 cr.branch_count = ts->branch_count - tse->branch_count; 283 cr.db_id = tse->db_id; 284 cr.call_ref = tse->ref; 285 cr.return_ref = ref; 286 if (tse->no_call) 287 cr.flags |= CALL_RETURN_NO_CALL; 288 if (no_return) 289 cr.flags |= CALL_RETURN_NO_RETURN; 290 if (tse->non_call) 291 cr.flags |= CALL_RETURN_NON_CALL; 292 293 /* 294 * The parent db_id must be assigned before exporting the child. Note 295 * it is not possible to export the parent first because its information 296 * is not yet complete because its 'return' has not yet been processed. 297 */ 298 parent_db_id = idx ? &(tse - 1)->db_id : NULL; 299 300 return crp->process(&cr, parent_db_id, crp->data); 301 } 302 303 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 304 { 305 struct call_return_processor *crp = ts->crp; 306 int err; 307 308 if (!crp) { 309 ts->cnt = 0; 310 return 0; 311 } 312 313 while (ts->cnt) { 314 err = thread_stack__call_return(thread, ts, --ts->cnt, 315 ts->last_time, 0, true); 316 if (err) { 317 pr_err("Error flushing thread stack!\n"); 318 ts->cnt = 0; 319 return err; 320 } 321 } 322 323 return 0; 324 } 325 326 int thread_stack__flush(struct thread *thread) 327 { 328 struct thread_stack *ts = thread->ts; 329 unsigned int pos; 330 int err = 0; 331 332 if (ts) { 333 for (pos = 0; pos < ts->arr_sz; pos++) { 334 int ret = __thread_stack__flush(thread, ts + pos); 335 336 if (ret) 337 err = ret; 338 } 339 } 340 341 return err; 342 } 343 344 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, 345 u64 to_ip, u16 insn_len, u64 trace_nr) 346 { 347 struct thread_stack *ts = thread__stack(thread, cpu); 348 349 if (!thread) 350 return -EINVAL; 351 352 if (!ts) { 353 ts = thread_stack__new(thread, cpu, NULL); 354 if (!ts) { 355 pr_warning("Out of memory: no thread stack\n"); 356 return -ENOMEM; 357 } 358 ts->trace_nr = trace_nr; 359 } 360 361 /* 362 * When the trace is discontinuous, the trace_nr changes. In that case 363 * the stack might be completely invalid. Better to report nothing than 364 * to report something misleading, so flush the stack. 365 */ 366 if (trace_nr != ts->trace_nr) { 367 if (ts->trace_nr) 368 __thread_stack__flush(thread, ts); 369 ts->trace_nr = trace_nr; 370 } 371 372 /* Stop here if thread_stack__process() is in use */ 373 if (ts->crp) 374 return 0; 375 376 if (flags & PERF_IP_FLAG_CALL) { 377 u64 ret_addr; 378 379 if (!to_ip) 380 return 0; 381 ret_addr = from_ip + insn_len; 382 if (ret_addr == to_ip) 383 return 0; /* Zero-length calls are excluded */ 384 return thread_stack__push(ts, ret_addr, 385 flags & PERF_IP_FLAG_TRACE_END); 386 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { 387 /* 388 * If the caller did not change the trace number (which would 389 * have flushed the stack) then try to make sense of the stack. 390 * Possibly, tracing began after returning to the current 391 * address, so try to pop that. Also, do not expect a call made 392 * when the trace ended, to return, so pop that. 393 */ 394 thread_stack__pop(ts, to_ip); 395 thread_stack__pop_trace_end(ts); 396 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { 397 thread_stack__pop(ts, to_ip); 398 } 399 400 return 0; 401 } 402 403 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) 404 { 405 struct thread_stack *ts = thread__stack(thread, cpu); 406 407 if (!ts) 408 return; 409 410 if (trace_nr != ts->trace_nr) { 411 if (ts->trace_nr) 412 __thread_stack__flush(thread, ts); 413 ts->trace_nr = trace_nr; 414 } 415 } 416 417 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) 418 { 419 __thread_stack__flush(thread, ts); 420 zfree(&ts->stack); 421 } 422 423 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) 424 { 425 unsigned int arr_sz = ts->arr_sz; 426 427 __thread_stack__free(thread, ts); 428 memset(ts, 0, sizeof(*ts)); 429 ts->arr_sz = arr_sz; 430 } 431 432 void thread_stack__free(struct thread *thread) 433 { 434 struct thread_stack *ts = thread->ts; 435 unsigned int pos; 436 437 if (ts) { 438 for (pos = 0; pos < ts->arr_sz; pos++) 439 __thread_stack__free(thread, ts + pos); 440 zfree(&thread->ts); 441 } 442 } 443 444 static inline u64 callchain_context(u64 ip, u64 kernel_start) 445 { 446 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 447 } 448 449 void thread_stack__sample(struct thread *thread, int cpu, 450 struct ip_callchain *chain, 451 size_t sz, u64 ip, u64 kernel_start) 452 { 453 struct thread_stack *ts = thread__stack(thread, cpu); 454 u64 context = callchain_context(ip, kernel_start); 455 u64 last_context; 456 size_t i, j; 457 458 if (sz < 2) { 459 chain->nr = 0; 460 return; 461 } 462 463 chain->ips[0] = context; 464 chain->ips[1] = ip; 465 466 if (!ts) { 467 chain->nr = 2; 468 return; 469 } 470 471 last_context = context; 472 473 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { 474 ip = ts->stack[ts->cnt - j].ret_addr; 475 context = callchain_context(ip, kernel_start); 476 if (context != last_context) { 477 if (i >= sz - 1) 478 break; 479 chain->ips[i++] = context; 480 last_context = context; 481 } 482 chain->ips[i] = ip; 483 } 484 485 chain->nr = i; 486 } 487 488 struct call_return_processor * 489 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), 490 void *data) 491 { 492 struct call_return_processor *crp; 493 494 crp = zalloc(sizeof(struct call_return_processor)); 495 if (!crp) 496 return NULL; 497 crp->cpr = call_path_root__new(); 498 if (!crp->cpr) 499 goto out_free; 500 crp->process = process; 501 crp->data = data; 502 return crp; 503 504 out_free: 505 free(crp); 506 return NULL; 507 } 508 509 void call_return_processor__free(struct call_return_processor *crp) 510 { 511 if (crp) { 512 call_path_root__free(crp->cpr); 513 free(crp); 514 } 515 } 516 517 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 518 u64 timestamp, u64 ref, struct call_path *cp, 519 bool no_call, bool trace_end) 520 { 521 struct thread_stack_entry *tse; 522 int err; 523 524 if (!cp) 525 return -ENOMEM; 526 527 if (ts->cnt == ts->sz) { 528 err = thread_stack__grow(ts); 529 if (err) 530 return err; 531 } 532 533 tse = &ts->stack[ts->cnt++]; 534 tse->ret_addr = ret_addr; 535 tse->timestamp = timestamp; 536 tse->ref = ref; 537 tse->branch_count = ts->branch_count; 538 tse->cp = cp; 539 tse->no_call = no_call; 540 tse->trace_end = trace_end; 541 tse->non_call = false; 542 tse->db_id = 0; 543 544 return 0; 545 } 546 547 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 548 u64 ret_addr, u64 timestamp, u64 ref, 549 struct symbol *sym) 550 { 551 int err; 552 553 if (!ts->cnt) 554 return 1; 555 556 if (ts->cnt == 1) { 557 struct thread_stack_entry *tse = &ts->stack[0]; 558 559 if (tse->cp->sym == sym) 560 return thread_stack__call_return(thread, ts, --ts->cnt, 561 timestamp, ref, false); 562 } 563 564 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && 565 !ts->stack[ts->cnt - 1].non_call) { 566 return thread_stack__call_return(thread, ts, --ts->cnt, 567 timestamp, ref, false); 568 } else { 569 size_t i = ts->cnt - 1; 570 571 while (i--) { 572 if (ts->stack[i].ret_addr != ret_addr || 573 ts->stack[i].non_call) 574 continue; 575 i += 1; 576 while (ts->cnt > i) { 577 err = thread_stack__call_return(thread, ts, 578 --ts->cnt, 579 timestamp, ref, 580 true); 581 if (err) 582 return err; 583 } 584 return thread_stack__call_return(thread, ts, --ts->cnt, 585 timestamp, ref, false); 586 } 587 } 588 589 return 1; 590 } 591 592 static int thread_stack__bottom(struct thread_stack *ts, 593 struct perf_sample *sample, 594 struct addr_location *from_al, 595 struct addr_location *to_al, u64 ref) 596 { 597 struct call_path_root *cpr = ts->crp->cpr; 598 struct call_path *cp; 599 struct symbol *sym; 600 u64 ip; 601 602 if (sample->ip) { 603 ip = sample->ip; 604 sym = from_al->sym; 605 } else if (sample->addr) { 606 ip = sample->addr; 607 sym = to_al->sym; 608 } else { 609 return 0; 610 } 611 612 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 613 ts->kernel_start); 614 615 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 616 true, false); 617 } 618 619 static int thread_stack__no_call_return(struct thread *thread, 620 struct thread_stack *ts, 621 struct perf_sample *sample, 622 struct addr_location *from_al, 623 struct addr_location *to_al, u64 ref) 624 { 625 struct call_path_root *cpr = ts->crp->cpr; 626 struct call_path *root = &cpr->call_path; 627 struct symbol *fsym = from_al->sym; 628 struct symbol *tsym = to_al->sym; 629 struct call_path *cp, *parent; 630 u64 ks = ts->kernel_start; 631 u64 addr = sample->addr; 632 u64 tm = sample->time; 633 u64 ip = sample->ip; 634 int err; 635 636 if (ip >= ks && addr < ks) { 637 /* Return to userspace, so pop all kernel addresses */ 638 while (thread_stack__in_kernel(ts)) { 639 err = thread_stack__call_return(thread, ts, --ts->cnt, 640 tm, ref, true); 641 if (err) 642 return err; 643 } 644 645 /* If the stack is empty, push the userspace address */ 646 if (!ts->cnt) { 647 cp = call_path__findnew(cpr, root, tsym, addr, ks); 648 return thread_stack__push_cp(ts, 0, tm, ref, cp, true, 649 false); 650 } 651 } else if (thread_stack__in_kernel(ts) && ip < ks) { 652 /* Return to userspace, so pop all kernel addresses */ 653 while (thread_stack__in_kernel(ts)) { 654 err = thread_stack__call_return(thread, ts, --ts->cnt, 655 tm, ref, true); 656 if (err) 657 return err; 658 } 659 } 660 661 if (ts->cnt) 662 parent = ts->stack[ts->cnt - 1].cp; 663 else 664 parent = root; 665 666 if (parent->sym == from_al->sym) { 667 /* 668 * At the bottom of the stack, assume the missing 'call' was 669 * before the trace started. So, pop the current symbol and push 670 * the 'to' symbol. 671 */ 672 if (ts->cnt == 1) { 673 err = thread_stack__call_return(thread, ts, --ts->cnt, 674 tm, ref, false); 675 if (err) 676 return err; 677 } 678 679 if (!ts->cnt) { 680 cp = call_path__findnew(cpr, root, tsym, addr, ks); 681 682 return thread_stack__push_cp(ts, addr, tm, ref, cp, 683 true, false); 684 } 685 686 /* 687 * Otherwise assume the 'return' is being used as a jump (e.g. 688 * retpoline) and just push the 'to' symbol. 689 */ 690 cp = call_path__findnew(cpr, parent, tsym, addr, ks); 691 692 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); 693 if (!err) 694 ts->stack[ts->cnt - 1].non_call = true; 695 696 return err; 697 } 698 699 /* 700 * Assume 'parent' has not yet returned, so push 'to', and then push and 701 * pop 'from'. 702 */ 703 704 cp = call_path__findnew(cpr, parent, tsym, addr, ks); 705 706 err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); 707 if (err) 708 return err; 709 710 cp = call_path__findnew(cpr, cp, fsym, ip, ks); 711 712 err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); 713 if (err) 714 return err; 715 716 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); 717 } 718 719 static int thread_stack__trace_begin(struct thread *thread, 720 struct thread_stack *ts, u64 timestamp, 721 u64 ref) 722 { 723 struct thread_stack_entry *tse; 724 int err; 725 726 if (!ts->cnt) 727 return 0; 728 729 /* Pop trace end */ 730 tse = &ts->stack[ts->cnt - 1]; 731 if (tse->trace_end) { 732 err = thread_stack__call_return(thread, ts, --ts->cnt, 733 timestamp, ref, false); 734 if (err) 735 return err; 736 } 737 738 return 0; 739 } 740 741 static int thread_stack__trace_end(struct thread_stack *ts, 742 struct perf_sample *sample, u64 ref) 743 { 744 struct call_path_root *cpr = ts->crp->cpr; 745 struct call_path *cp; 746 u64 ret_addr; 747 748 /* No point having 'trace end' on the bottom of the stack */ 749 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 750 return 0; 751 752 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 753 ts->kernel_start); 754 755 ret_addr = sample->ip + sample->insn_len; 756 757 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 758 false, true); 759 } 760 761 static bool is_x86_retpoline(const char *name) 762 { 763 const char *p = strstr(name, "__x86_indirect_thunk_"); 764 765 return p == name || !strcmp(name, "__indirect_thunk_start"); 766 } 767 768 /* 769 * x86 retpoline functions pollute the call graph. This function removes them. 770 * This does not handle function return thunks, nor is there any improvement 771 * for the handling of inline thunks or extern thunks. 772 */ 773 static int thread_stack__x86_retpoline(struct thread_stack *ts, 774 struct perf_sample *sample, 775 struct addr_location *to_al) 776 { 777 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; 778 struct call_path_root *cpr = ts->crp->cpr; 779 struct symbol *sym = tse->cp->sym; 780 struct symbol *tsym = to_al->sym; 781 struct call_path *cp; 782 783 if (sym && is_x86_retpoline(sym->name)) { 784 /* 785 * This is a x86 retpoline fn. It pollutes the call graph by 786 * showing up everywhere there is an indirect branch, but does 787 * not itself mean anything. Here the top-of-stack is removed, 788 * by decrementing the stack count, and then further down, the 789 * resulting top-of-stack is replaced with the actual target. 790 * The result is that the retpoline functions will no longer 791 * appear in the call graph. Note this only affects the call 792 * graph, since all the original branches are left unchanged. 793 */ 794 ts->cnt -= 1; 795 sym = ts->stack[ts->cnt - 2].cp->sym; 796 if (sym && sym == tsym && to_al->addr != tsym->start) { 797 /* 798 * Target is back to the middle of the symbol we came 799 * from so assume it is an indirect jmp and forget it 800 * altogether. 801 */ 802 ts->cnt -= 1; 803 return 0; 804 } 805 } else if (sym && sym == tsym) { 806 /* 807 * Target is back to the symbol we came from so assume it is an 808 * indirect jmp and forget it altogether. 809 */ 810 ts->cnt -= 1; 811 return 0; 812 } 813 814 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, 815 sample->addr, ts->kernel_start); 816 if (!cp) 817 return -ENOMEM; 818 819 /* Replace the top-of-stack with the actual target */ 820 ts->stack[ts->cnt - 1].cp = cp; 821 822 return 0; 823 } 824 825 int thread_stack__process(struct thread *thread, struct comm *comm, 826 struct perf_sample *sample, 827 struct addr_location *from_al, 828 struct addr_location *to_al, u64 ref, 829 struct call_return_processor *crp) 830 { 831 struct thread_stack *ts = thread__stack(thread, sample->cpu); 832 enum retpoline_state_t rstate; 833 int err = 0; 834 835 if (ts && !ts->crp) { 836 /* Supersede thread_stack__event() */ 837 thread_stack__reset(thread, ts); 838 ts = NULL; 839 } 840 841 if (!ts) { 842 ts = thread_stack__new(thread, sample->cpu, crp); 843 if (!ts) 844 return -ENOMEM; 845 ts->comm = comm; 846 } 847 848 rstate = ts->rstate; 849 if (rstate == X86_RETPOLINE_DETECTED) 850 ts->rstate = X86_RETPOLINE_POSSIBLE; 851 852 /* Flush stack on exec */ 853 if (ts->comm != comm && thread->pid_ == thread->tid) { 854 err = __thread_stack__flush(thread, ts); 855 if (err) 856 return err; 857 ts->comm = comm; 858 } 859 860 /* If the stack is empty, put the current symbol on the stack */ 861 if (!ts->cnt) { 862 err = thread_stack__bottom(ts, sample, from_al, to_al, ref); 863 if (err) 864 return err; 865 } 866 867 ts->branch_count += 1; 868 ts->last_time = sample->time; 869 870 if (sample->flags & PERF_IP_FLAG_CALL) { 871 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; 872 struct call_path_root *cpr = ts->crp->cpr; 873 struct call_path *cp; 874 u64 ret_addr; 875 876 if (!sample->ip || !sample->addr) 877 return 0; 878 879 ret_addr = sample->ip + sample->insn_len; 880 if (ret_addr == sample->addr) 881 return 0; /* Zero-length calls are excluded */ 882 883 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 884 to_al->sym, sample->addr, 885 ts->kernel_start); 886 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 887 cp, false, trace_end); 888 889 /* 890 * A call to the same symbol but not the start of the symbol, 891 * may be the start of a x86 retpoline. 892 */ 893 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && 894 from_al->sym == to_al->sym && 895 to_al->addr != to_al->sym->start) 896 ts->rstate = X86_RETPOLINE_DETECTED; 897 898 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 899 if (!sample->ip || !sample->addr) 900 return 0; 901 902 /* x86 retpoline 'return' doesn't match the stack */ 903 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && 904 ts->stack[ts->cnt - 1].ret_addr != sample->addr) 905 return thread_stack__x86_retpoline(ts, sample, to_al); 906 907 err = thread_stack__pop_cp(thread, ts, sample->addr, 908 sample->time, ref, from_al->sym); 909 if (err) { 910 if (err < 0) 911 return err; 912 err = thread_stack__no_call_return(thread, ts, sample, 913 from_al, to_al, ref); 914 } 915 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 916 err = thread_stack__trace_begin(thread, ts, sample->time, ref); 917 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 918 err = thread_stack__trace_end(ts, sample, ref); 919 } else if (sample->flags & PERF_IP_FLAG_BRANCH && 920 from_al->sym != to_al->sym && to_al->sym && 921 to_al->addr == to_al->sym->start) { 922 struct call_path_root *cpr = ts->crp->cpr; 923 struct call_path *cp; 924 925 /* 926 * The compiler might optimize a call/ret combination by making 927 * it a jmp. Make that visible by recording on the stack a 928 * branch to the start of a different symbol. Note, that means 929 * when a ret pops the stack, all jmps must be popped off first. 930 */ 931 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 932 to_al->sym, sample->addr, 933 ts->kernel_start); 934 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, 935 false); 936 if (!err) 937 ts->stack[ts->cnt - 1].non_call = true; 938 } 939 940 return err; 941 } 942 943 size_t thread_stack__depth(struct thread *thread, int cpu) 944 { 945 struct thread_stack *ts = thread__stack(thread, cpu); 946 947 if (!ts) 948 return 0; 949 return ts->cnt; 950 } 951