12025cf9eSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 200447ccdSAdrian Hunter /* 300447ccdSAdrian Hunter * thread-stack.c: Synthesize a thread's stack using call / return events 400447ccdSAdrian Hunter * Copyright (c) 2014, Intel Corporation. 500447ccdSAdrian Hunter */ 600447ccdSAdrian Hunter 792a9e4f7SAdrian Hunter #include <linux/rbtree.h> 892a9e4f7SAdrian Hunter #include <linux/list.h> 9256d92bcSAdrian Hunter #include <linux/log2.h> 107f7c536fSArnaldo Carvalho de Melo #include <linux/zalloc.h> 11a43783aeSArnaldo Carvalho de Melo #include <errno.h> 12215a0d30SArnaldo Carvalho de Melo #include <stdlib.h> 138520a98dSArnaldo Carvalho de Melo #include <string.h> 1400447ccdSAdrian Hunter #include "thread.h" 1500447ccdSAdrian Hunter #include "event.h" 1692a9e4f7SAdrian Hunter #include "machine.h" 173c0cd952SAdrian Hunter #include "env.h" 1800447ccdSAdrian Hunter #include "debug.h" 1992a9e4f7SAdrian Hunter #include "symbol.h" 2092a9e4f7SAdrian Hunter #include "comm.h" 21451db126SChris Phlipot #include "call-path.h" 2200447ccdSAdrian Hunter #include "thread-stack.h" 2300447ccdSAdrian Hunter 2492a9e4f7SAdrian Hunter #define STACK_GROWTH 2048 2592a9e4f7SAdrian Hunter 263c0cd952SAdrian Hunter /* 273c0cd952SAdrian Hunter * State of retpoline detection. 283c0cd952SAdrian Hunter * 293c0cd952SAdrian Hunter * RETPOLINE_NONE: no retpoline detection 303c0cd952SAdrian Hunter * X86_RETPOLINE_POSSIBLE: x86 retpoline possible 313c0cd952SAdrian Hunter * X86_RETPOLINE_DETECTED: x86 retpoline detected 323c0cd952SAdrian Hunter */ 333c0cd952SAdrian Hunter enum retpoline_state_t { 343c0cd952SAdrian Hunter RETPOLINE_NONE, 353c0cd952SAdrian Hunter X86_RETPOLINE_POSSIBLE, 363c0cd952SAdrian Hunter X86_RETPOLINE_DETECTED, 373c0cd952SAdrian Hunter }; 383c0cd952SAdrian Hunter 3992a9e4f7SAdrian Hunter /** 4092a9e4f7SAdrian Hunter * struct thread_stack_entry - thread stack entry. 4192a9e4f7SAdrian Hunter * @ret_addr: return address 4292a9e4f7SAdrian Hunter * @timestamp: timestamp (if known) 4392a9e4f7SAdrian Hunter * @ref: external reference (e.g. db_id of sample) 4492a9e4f7SAdrian Hunter * @branch_count: the branch count when the entry was created 45003ccdc7SAdrian Hunter * @insn_count: the instruction count when the entry was created 46003ccdc7SAdrian Hunter * @cyc_count the cycle count when the entry was created 47f435887eSAdrian Hunter * @db_id: id used for db-export 4892a9e4f7SAdrian Hunter * @cp: call path 4992a9e4f7SAdrian Hunter * @no_call: a 'call' was not seen 504d60e5e3SAdrian Hunter * @trace_end: a 'call' but trace ended 51f08046cbSAdrian Hunter * @non_call: a branch but not a 'call' to the start of a different symbol 5292a9e4f7SAdrian Hunter */ 5392a9e4f7SAdrian Hunter struct thread_stack_entry { 5492a9e4f7SAdrian Hunter u64 ret_addr; 5592a9e4f7SAdrian Hunter u64 timestamp; 5692a9e4f7SAdrian Hunter u64 ref; 5792a9e4f7SAdrian Hunter u64 branch_count; 58003ccdc7SAdrian Hunter u64 insn_count; 59003ccdc7SAdrian Hunter u64 cyc_count; 60f435887eSAdrian Hunter u64 db_id; 6192a9e4f7SAdrian Hunter struct call_path *cp; 6292a9e4f7SAdrian Hunter bool no_call; 634d60e5e3SAdrian Hunter bool trace_end; 64f08046cbSAdrian Hunter bool non_call; 6592a9e4f7SAdrian Hunter }; 6692a9e4f7SAdrian Hunter 6792a9e4f7SAdrian Hunter /** 6892a9e4f7SAdrian Hunter * struct thread_stack - thread stack constructed from 'call' and 'return' 6992a9e4f7SAdrian Hunter * branch samples. 7092a9e4f7SAdrian Hunter * @stack: array that holds the stack 7192a9e4f7SAdrian Hunter * @cnt: number of entries in the stack 7292a9e4f7SAdrian Hunter * @sz: current maximum stack size 7392a9e4f7SAdrian Hunter * @trace_nr: current trace number 7492a9e4f7SAdrian Hunter * @branch_count: running branch count 75003ccdc7SAdrian Hunter * @insn_count: running instruction count 76003ccdc7SAdrian Hunter * @cyc_count running cycle count 7792a9e4f7SAdrian Hunter * @kernel_start: kernel start address 7892a9e4f7SAdrian Hunter * @last_time: last timestamp 7992a9e4f7SAdrian Hunter * @crp: call/return processor 8092a9e4f7SAdrian Hunter * @comm: current comm 81f6060ac6SAdrian Hunter * @arr_sz: size of array if this is the first element of an array 823c0cd952SAdrian Hunter * @rstate: used to detect retpolines 8392a9e4f7SAdrian Hunter */ 8400447ccdSAdrian Hunter struct thread_stack { 8500447ccdSAdrian Hunter struct thread_stack_entry *stack; 8600447ccdSAdrian Hunter size_t cnt; 8700447ccdSAdrian Hunter size_t sz; 8800447ccdSAdrian Hunter u64 trace_nr; 8992a9e4f7SAdrian Hunter u64 branch_count; 90003ccdc7SAdrian Hunter u64 insn_count; 91003ccdc7SAdrian Hunter u64 cyc_count; 9292a9e4f7SAdrian Hunter u64 kernel_start; 9392a9e4f7SAdrian Hunter u64 last_time; 9492a9e4f7SAdrian Hunter struct call_return_processor *crp; 9592a9e4f7SAdrian Hunter struct comm *comm; 96f6060ac6SAdrian Hunter unsigned int arr_sz; 973c0cd952SAdrian Hunter enum retpoline_state_t rstate; 9800447ccdSAdrian Hunter }; 9900447ccdSAdrian Hunter 100256d92bcSAdrian Hunter /* 101256d92bcSAdrian Hunter * Assume pid == tid == 0 identifies the idle task as defined by 102256d92bcSAdrian Hunter * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, 103256d92bcSAdrian Hunter * and therefore requires a stack for each cpu. 104256d92bcSAdrian Hunter */ 105256d92bcSAdrian Hunter static inline bool thread_stack__per_cpu(struct thread *thread) 106256d92bcSAdrian Hunter { 107256d92bcSAdrian Hunter return !(thread->tid || thread->pid_); 108256d92bcSAdrian Hunter } 109256d92bcSAdrian Hunter 11000447ccdSAdrian Hunter static int thread_stack__grow(struct thread_stack *ts) 11100447ccdSAdrian Hunter { 11200447ccdSAdrian Hunter struct thread_stack_entry *new_stack; 11300447ccdSAdrian Hunter size_t sz, new_sz; 11400447ccdSAdrian Hunter 11500447ccdSAdrian Hunter new_sz = ts->sz + STACK_GROWTH; 11600447ccdSAdrian Hunter sz = new_sz * sizeof(struct thread_stack_entry); 11700447ccdSAdrian Hunter 11800447ccdSAdrian Hunter new_stack = realloc(ts->stack, sz); 11900447ccdSAdrian Hunter if (!new_stack) 12000447ccdSAdrian Hunter return -ENOMEM; 12100447ccdSAdrian Hunter 12200447ccdSAdrian Hunter ts->stack = new_stack; 12300447ccdSAdrian Hunter ts->sz = new_sz; 12400447ccdSAdrian Hunter 12500447ccdSAdrian Hunter return 0; 12600447ccdSAdrian Hunter } 12700447ccdSAdrian Hunter 1282e9e8688SAdrian Hunter static int thread_stack__init(struct thread_stack *ts, struct thread *thread, 1292e9e8688SAdrian Hunter struct call_return_processor *crp) 1302e9e8688SAdrian Hunter { 1312e9e8688SAdrian Hunter int err; 1322e9e8688SAdrian Hunter 1332e9e8688SAdrian Hunter err = thread_stack__grow(ts); 1342e9e8688SAdrian Hunter if (err) 1352e9e8688SAdrian Hunter return err; 1362e9e8688SAdrian Hunter 137fe87797dSArnaldo Carvalho de Melo if (thread->maps && thread->maps->machine) { 138fe87797dSArnaldo Carvalho de Melo struct machine *machine = thread->maps->machine; 1393c0cd952SAdrian Hunter const char *arch = perf_env__arch(machine->env); 1403c0cd952SAdrian Hunter 1413c0cd952SAdrian Hunter ts->kernel_start = machine__kernel_start(machine); 1423c0cd952SAdrian Hunter if (!strcmp(arch, "x86")) 1433c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_POSSIBLE; 1443c0cd952SAdrian Hunter } else { 1452e9e8688SAdrian Hunter ts->kernel_start = 1ULL << 63; 1463c0cd952SAdrian Hunter } 1472e9e8688SAdrian Hunter ts->crp = crp; 1482e9e8688SAdrian Hunter 1492e9e8688SAdrian Hunter return 0; 1502e9e8688SAdrian Hunter } 1512e9e8688SAdrian Hunter 152256d92bcSAdrian Hunter static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, 15392a9e4f7SAdrian Hunter struct call_return_processor *crp) 15400447ccdSAdrian Hunter { 155139f42f3SAdrian Hunter struct thread_stack *ts = thread->ts, *new_ts; 156139f42f3SAdrian Hunter unsigned int old_sz = ts ? ts->arr_sz : 0; 157139f42f3SAdrian Hunter unsigned int new_sz = 1; 15800447ccdSAdrian Hunter 159256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread) && cpu > 0) 160256d92bcSAdrian Hunter new_sz = roundup_pow_of_two(cpu + 1); 161256d92bcSAdrian Hunter 162139f42f3SAdrian Hunter if (!ts || new_sz > old_sz) { 163139f42f3SAdrian Hunter new_ts = calloc(new_sz, sizeof(*ts)); 164139f42f3SAdrian Hunter if (!new_ts) 16500447ccdSAdrian Hunter return NULL; 166139f42f3SAdrian Hunter if (ts) 167139f42f3SAdrian Hunter memcpy(new_ts, ts, old_sz * sizeof(*ts)); 168139f42f3SAdrian Hunter new_ts->arr_sz = new_sz; 169139f42f3SAdrian Hunter zfree(&thread->ts); 170139f42f3SAdrian Hunter thread->ts = new_ts; 171139f42f3SAdrian Hunter ts = new_ts; 17200447ccdSAdrian Hunter } 17300447ccdSAdrian Hunter 174256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread) && cpu > 0 && 175256d92bcSAdrian Hunter (unsigned int)cpu < ts->arr_sz) 176256d92bcSAdrian Hunter ts += cpu; 177256d92bcSAdrian Hunter 178139f42f3SAdrian Hunter if (!ts->stack && 179139f42f3SAdrian Hunter thread_stack__init(ts, thread, crp)) 180139f42f3SAdrian Hunter return NULL; 181bd8e68acSAdrian Hunter 18200447ccdSAdrian Hunter return ts; 18300447ccdSAdrian Hunter } 18400447ccdSAdrian Hunter 185256d92bcSAdrian Hunter static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) 186bd8e68acSAdrian Hunter { 187256d92bcSAdrian Hunter struct thread_stack *ts = thread->ts; 188256d92bcSAdrian Hunter 189256d92bcSAdrian Hunter if (cpu < 0) 190256d92bcSAdrian Hunter cpu = 0; 191256d92bcSAdrian Hunter 192256d92bcSAdrian Hunter if (!ts || (unsigned int)cpu >= ts->arr_sz) 193256d92bcSAdrian Hunter return NULL; 194256d92bcSAdrian Hunter 195256d92bcSAdrian Hunter ts += cpu; 196256d92bcSAdrian Hunter 197256d92bcSAdrian Hunter if (!ts->stack) 198256d92bcSAdrian Hunter return NULL; 199256d92bcSAdrian Hunter 200256d92bcSAdrian Hunter return ts; 201256d92bcSAdrian Hunter } 202256d92bcSAdrian Hunter 203256d92bcSAdrian Hunter static inline struct thread_stack *thread__stack(struct thread *thread, 204256d92bcSAdrian Hunter int cpu) 205256d92bcSAdrian Hunter { 206256d92bcSAdrian Hunter if (!thread) 207256d92bcSAdrian Hunter return NULL; 208256d92bcSAdrian Hunter 209256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread)) 210256d92bcSAdrian Hunter return thread__cpu_stack(thread, cpu); 211256d92bcSAdrian Hunter 212256d92bcSAdrian Hunter return thread->ts; 213bd8e68acSAdrian Hunter } 214bd8e68acSAdrian Hunter 2154d60e5e3SAdrian Hunter static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 2164d60e5e3SAdrian Hunter bool trace_end) 21700447ccdSAdrian Hunter { 21800447ccdSAdrian Hunter int err = 0; 21900447ccdSAdrian Hunter 22000447ccdSAdrian Hunter if (ts->cnt == ts->sz) { 22100447ccdSAdrian Hunter err = thread_stack__grow(ts); 22200447ccdSAdrian Hunter if (err) { 22300447ccdSAdrian Hunter pr_warning("Out of memory: discarding thread stack\n"); 22400447ccdSAdrian Hunter ts->cnt = 0; 22500447ccdSAdrian Hunter } 22600447ccdSAdrian Hunter } 22700447ccdSAdrian Hunter 2284d60e5e3SAdrian Hunter ts->stack[ts->cnt].trace_end = trace_end; 22900447ccdSAdrian Hunter ts->stack[ts->cnt++].ret_addr = ret_addr; 23000447ccdSAdrian Hunter 23100447ccdSAdrian Hunter return err; 23200447ccdSAdrian Hunter } 23300447ccdSAdrian Hunter 23400447ccdSAdrian Hunter static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 23500447ccdSAdrian Hunter { 23600447ccdSAdrian Hunter size_t i; 23700447ccdSAdrian Hunter 23800447ccdSAdrian Hunter /* 23900447ccdSAdrian Hunter * In some cases there may be functions which are not seen to return. 24000447ccdSAdrian Hunter * For example when setjmp / longjmp has been used. Or the perf context 24100447ccdSAdrian Hunter * switch in the kernel which doesn't stop and start tracing in exactly 24200447ccdSAdrian Hunter * the same code path. When that happens the return address will be 24300447ccdSAdrian Hunter * further down the stack. If the return address is not found at all, 24400447ccdSAdrian Hunter * we assume the opposite (i.e. this is a return for a call that wasn't 24500447ccdSAdrian Hunter * seen for some reason) and leave the stack alone. 24600447ccdSAdrian Hunter */ 24700447ccdSAdrian Hunter for (i = ts->cnt; i; ) { 24800447ccdSAdrian Hunter if (ts->stack[--i].ret_addr == ret_addr) { 24900447ccdSAdrian Hunter ts->cnt = i; 25000447ccdSAdrian Hunter return; 25100447ccdSAdrian Hunter } 25200447ccdSAdrian Hunter } 25300447ccdSAdrian Hunter } 25400447ccdSAdrian Hunter 2554d60e5e3SAdrian Hunter static void thread_stack__pop_trace_end(struct thread_stack *ts) 2564d60e5e3SAdrian Hunter { 2574d60e5e3SAdrian Hunter size_t i; 2584d60e5e3SAdrian Hunter 2594d60e5e3SAdrian Hunter for (i = ts->cnt; i; ) { 2604d60e5e3SAdrian Hunter if (ts->stack[--i].trace_end) 2614d60e5e3SAdrian Hunter ts->cnt = i; 2624d60e5e3SAdrian Hunter else 2634d60e5e3SAdrian Hunter return; 2644d60e5e3SAdrian Hunter } 2654d60e5e3SAdrian Hunter } 2664d60e5e3SAdrian Hunter 26792a9e4f7SAdrian Hunter static bool thread_stack__in_kernel(struct thread_stack *ts) 26892a9e4f7SAdrian Hunter { 26992a9e4f7SAdrian Hunter if (!ts->cnt) 27092a9e4f7SAdrian Hunter return false; 27192a9e4f7SAdrian Hunter 27292a9e4f7SAdrian Hunter return ts->stack[ts->cnt - 1].cp->in_kernel; 27392a9e4f7SAdrian Hunter } 27492a9e4f7SAdrian Hunter 27592a9e4f7SAdrian Hunter static int thread_stack__call_return(struct thread *thread, 27692a9e4f7SAdrian Hunter struct thread_stack *ts, size_t idx, 27792a9e4f7SAdrian Hunter u64 timestamp, u64 ref, bool no_return) 27892a9e4f7SAdrian Hunter { 27992a9e4f7SAdrian Hunter struct call_return_processor *crp = ts->crp; 28092a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 28192a9e4f7SAdrian Hunter struct call_return cr = { 28292a9e4f7SAdrian Hunter .thread = thread, 28392a9e4f7SAdrian Hunter .comm = ts->comm, 28492a9e4f7SAdrian Hunter .db_id = 0, 28592a9e4f7SAdrian Hunter }; 286f435887eSAdrian Hunter u64 *parent_db_id; 28792a9e4f7SAdrian Hunter 28892a9e4f7SAdrian Hunter tse = &ts->stack[idx]; 28992a9e4f7SAdrian Hunter cr.cp = tse->cp; 29092a9e4f7SAdrian Hunter cr.call_time = tse->timestamp; 29192a9e4f7SAdrian Hunter cr.return_time = timestamp; 29292a9e4f7SAdrian Hunter cr.branch_count = ts->branch_count - tse->branch_count; 293003ccdc7SAdrian Hunter cr.insn_count = ts->insn_count - tse->insn_count; 294003ccdc7SAdrian Hunter cr.cyc_count = ts->cyc_count - tse->cyc_count; 295f435887eSAdrian Hunter cr.db_id = tse->db_id; 29692a9e4f7SAdrian Hunter cr.call_ref = tse->ref; 29792a9e4f7SAdrian Hunter cr.return_ref = ref; 29892a9e4f7SAdrian Hunter if (tse->no_call) 29992a9e4f7SAdrian Hunter cr.flags |= CALL_RETURN_NO_CALL; 30092a9e4f7SAdrian Hunter if (no_return) 30192a9e4f7SAdrian Hunter cr.flags |= CALL_RETURN_NO_RETURN; 302f08046cbSAdrian Hunter if (tse->non_call) 303f08046cbSAdrian Hunter cr.flags |= CALL_RETURN_NON_CALL; 30492a9e4f7SAdrian Hunter 305f435887eSAdrian Hunter /* 306f435887eSAdrian Hunter * The parent db_id must be assigned before exporting the child. Note 307f435887eSAdrian Hunter * it is not possible to export the parent first because its information 308f435887eSAdrian Hunter * is not yet complete because its 'return' has not yet been processed. 309f435887eSAdrian Hunter */ 310f435887eSAdrian Hunter parent_db_id = idx ? &(tse - 1)->db_id : NULL; 311f435887eSAdrian Hunter 312f435887eSAdrian Hunter return crp->process(&cr, parent_db_id, crp->data); 31392a9e4f7SAdrian Hunter } 31492a9e4f7SAdrian Hunter 315a5499b37SAdrian Hunter static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 31692a9e4f7SAdrian Hunter { 31792a9e4f7SAdrian Hunter struct call_return_processor *crp = ts->crp; 31892a9e4f7SAdrian Hunter int err; 31992a9e4f7SAdrian Hunter 32092a9e4f7SAdrian Hunter if (!crp) { 32192a9e4f7SAdrian Hunter ts->cnt = 0; 32292a9e4f7SAdrian Hunter return 0; 32392a9e4f7SAdrian Hunter } 32492a9e4f7SAdrian Hunter 32592a9e4f7SAdrian Hunter while (ts->cnt) { 32692a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 32792a9e4f7SAdrian Hunter ts->last_time, 0, true); 32892a9e4f7SAdrian Hunter if (err) { 32992a9e4f7SAdrian Hunter pr_err("Error flushing thread stack!\n"); 33092a9e4f7SAdrian Hunter ts->cnt = 0; 33192a9e4f7SAdrian Hunter return err; 33292a9e4f7SAdrian Hunter } 33392a9e4f7SAdrian Hunter } 33492a9e4f7SAdrian Hunter 33592a9e4f7SAdrian Hunter return 0; 33692a9e4f7SAdrian Hunter } 33792a9e4f7SAdrian Hunter 338a5499b37SAdrian Hunter int thread_stack__flush(struct thread *thread) 339a5499b37SAdrian Hunter { 340bd8e68acSAdrian Hunter struct thread_stack *ts = thread->ts; 341f6060ac6SAdrian Hunter unsigned int pos; 342f6060ac6SAdrian Hunter int err = 0; 343bd8e68acSAdrian Hunter 344f6060ac6SAdrian Hunter if (ts) { 345f6060ac6SAdrian Hunter for (pos = 0; pos < ts->arr_sz; pos++) { 346f6060ac6SAdrian Hunter int ret = __thread_stack__flush(thread, ts + pos); 347a5499b37SAdrian Hunter 348f6060ac6SAdrian Hunter if (ret) 349f6060ac6SAdrian Hunter err = ret; 350f6060ac6SAdrian Hunter } 351f6060ac6SAdrian Hunter } 352f6060ac6SAdrian Hunter 353f6060ac6SAdrian Hunter return err; 354a5499b37SAdrian Hunter } 355a5499b37SAdrian Hunter 356256d92bcSAdrian Hunter int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, 35700447ccdSAdrian Hunter u64 to_ip, u16 insn_len, u64 trace_nr) 35800447ccdSAdrian Hunter { 359256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 360bd8e68acSAdrian Hunter 36100447ccdSAdrian Hunter if (!thread) 36200447ccdSAdrian Hunter return -EINVAL; 36300447ccdSAdrian Hunter 364bd8e68acSAdrian Hunter if (!ts) { 365256d92bcSAdrian Hunter ts = thread_stack__new(thread, cpu, NULL); 366bd8e68acSAdrian Hunter if (!ts) { 36700447ccdSAdrian Hunter pr_warning("Out of memory: no thread stack\n"); 36800447ccdSAdrian Hunter return -ENOMEM; 36900447ccdSAdrian Hunter } 370bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 37100447ccdSAdrian Hunter } 37200447ccdSAdrian Hunter 37300447ccdSAdrian Hunter /* 37400447ccdSAdrian Hunter * When the trace is discontinuous, the trace_nr changes. In that case 37500447ccdSAdrian Hunter * the stack might be completely invalid. Better to report nothing than 37692a9e4f7SAdrian Hunter * to report something misleading, so flush the stack. 37700447ccdSAdrian Hunter */ 378bd8e68acSAdrian Hunter if (trace_nr != ts->trace_nr) { 379bd8e68acSAdrian Hunter if (ts->trace_nr) 380bd8e68acSAdrian Hunter __thread_stack__flush(thread, ts); 381bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 38200447ccdSAdrian Hunter } 38300447ccdSAdrian Hunter 38492a9e4f7SAdrian Hunter /* Stop here if thread_stack__process() is in use */ 385bd8e68acSAdrian Hunter if (ts->crp) 38692a9e4f7SAdrian Hunter return 0; 38792a9e4f7SAdrian Hunter 38800447ccdSAdrian Hunter if (flags & PERF_IP_FLAG_CALL) { 38900447ccdSAdrian Hunter u64 ret_addr; 39000447ccdSAdrian Hunter 39100447ccdSAdrian Hunter if (!to_ip) 39200447ccdSAdrian Hunter return 0; 39300447ccdSAdrian Hunter ret_addr = from_ip + insn_len; 39400447ccdSAdrian Hunter if (ret_addr == to_ip) 39500447ccdSAdrian Hunter return 0; /* Zero-length calls are excluded */ 396bd8e68acSAdrian Hunter return thread_stack__push(ts, ret_addr, 3974d60e5e3SAdrian Hunter flags & PERF_IP_FLAG_TRACE_END); 3984d60e5e3SAdrian Hunter } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { 3994d60e5e3SAdrian Hunter /* 4004d60e5e3SAdrian Hunter * If the caller did not change the trace number (which would 4014d60e5e3SAdrian Hunter * have flushed the stack) then try to make sense of the stack. 4024d60e5e3SAdrian Hunter * Possibly, tracing began after returning to the current 4034d60e5e3SAdrian Hunter * address, so try to pop that. Also, do not expect a call made 4044d60e5e3SAdrian Hunter * when the trace ended, to return, so pop that. 4054d60e5e3SAdrian Hunter */ 406bd8e68acSAdrian Hunter thread_stack__pop(ts, to_ip); 407bd8e68acSAdrian Hunter thread_stack__pop_trace_end(ts); 4084d60e5e3SAdrian Hunter } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { 409bd8e68acSAdrian Hunter thread_stack__pop(ts, to_ip); 41000447ccdSAdrian Hunter } 41100447ccdSAdrian Hunter 41200447ccdSAdrian Hunter return 0; 41300447ccdSAdrian Hunter } 41400447ccdSAdrian Hunter 415256d92bcSAdrian Hunter void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) 41692a9e4f7SAdrian Hunter { 417256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 418bd8e68acSAdrian Hunter 419bd8e68acSAdrian Hunter if (!ts) 42092a9e4f7SAdrian Hunter return; 42192a9e4f7SAdrian Hunter 422bd8e68acSAdrian Hunter if (trace_nr != ts->trace_nr) { 423bd8e68acSAdrian Hunter if (ts->trace_nr) 424bd8e68acSAdrian Hunter __thread_stack__flush(thread, ts); 425bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 42692a9e4f7SAdrian Hunter } 42792a9e4f7SAdrian Hunter } 42892a9e4f7SAdrian Hunter 429f6060ac6SAdrian Hunter static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) 430f6060ac6SAdrian Hunter { 431f6060ac6SAdrian Hunter __thread_stack__flush(thread, ts); 432f6060ac6SAdrian Hunter zfree(&ts->stack); 433f6060ac6SAdrian Hunter } 434f6060ac6SAdrian Hunter 435f6060ac6SAdrian Hunter static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) 436f6060ac6SAdrian Hunter { 437f6060ac6SAdrian Hunter unsigned int arr_sz = ts->arr_sz; 438f6060ac6SAdrian Hunter 439f6060ac6SAdrian Hunter __thread_stack__free(thread, ts); 440f6060ac6SAdrian Hunter memset(ts, 0, sizeof(*ts)); 441f6060ac6SAdrian Hunter ts->arr_sz = arr_sz; 442f6060ac6SAdrian Hunter } 443f6060ac6SAdrian Hunter 44400447ccdSAdrian Hunter void thread_stack__free(struct thread *thread) 44500447ccdSAdrian Hunter { 446bd8e68acSAdrian Hunter struct thread_stack *ts = thread->ts; 447f6060ac6SAdrian Hunter unsigned int pos; 448bd8e68acSAdrian Hunter 449bd8e68acSAdrian Hunter if (ts) { 450f6060ac6SAdrian Hunter for (pos = 0; pos < ts->arr_sz; pos++) 451f6060ac6SAdrian Hunter __thread_stack__free(thread, ts + pos); 45200447ccdSAdrian Hunter zfree(&thread->ts); 45300447ccdSAdrian Hunter } 45400447ccdSAdrian Hunter } 45500447ccdSAdrian Hunter 45624248306SAdrian Hunter static inline u64 callchain_context(u64 ip, u64 kernel_start) 45700447ccdSAdrian Hunter { 45824248306SAdrian Hunter return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 45924248306SAdrian Hunter } 46000447ccdSAdrian Hunter 461256d92bcSAdrian Hunter void thread_stack__sample(struct thread *thread, int cpu, 462256d92bcSAdrian Hunter struct ip_callchain *chain, 46324248306SAdrian Hunter size_t sz, u64 ip, u64 kernel_start) 46424248306SAdrian Hunter { 465256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 46624248306SAdrian Hunter u64 context = callchain_context(ip, kernel_start); 46724248306SAdrian Hunter u64 last_context; 46824248306SAdrian Hunter size_t i, j; 46900447ccdSAdrian Hunter 47024248306SAdrian Hunter if (sz < 2) { 47124248306SAdrian Hunter chain->nr = 0; 47224248306SAdrian Hunter return; 47324248306SAdrian Hunter } 47400447ccdSAdrian Hunter 47524248306SAdrian Hunter chain->ips[0] = context; 47624248306SAdrian Hunter chain->ips[1] = ip; 47724248306SAdrian Hunter 478bd8e68acSAdrian Hunter if (!ts) { 47924248306SAdrian Hunter chain->nr = 2; 48024248306SAdrian Hunter return; 48124248306SAdrian Hunter } 48224248306SAdrian Hunter 48324248306SAdrian Hunter last_context = context; 48424248306SAdrian Hunter 485bd8e68acSAdrian Hunter for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { 486bd8e68acSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 48724248306SAdrian Hunter context = callchain_context(ip, kernel_start); 48824248306SAdrian Hunter if (context != last_context) { 48924248306SAdrian Hunter if (i >= sz - 1) 49024248306SAdrian Hunter break; 49124248306SAdrian Hunter chain->ips[i++] = context; 49224248306SAdrian Hunter last_context = context; 49324248306SAdrian Hunter } 49424248306SAdrian Hunter chain->ips[i] = ip; 49524248306SAdrian Hunter } 49624248306SAdrian Hunter 49724248306SAdrian Hunter chain->nr = i; 49800447ccdSAdrian Hunter } 49992a9e4f7SAdrian Hunter 5004fef41bfSAdrian Hunter /* 5014fef41bfSAdrian Hunter * Hardware sample records, created some time after the event occurred, need to 5024fef41bfSAdrian Hunter * have subsequent addresses removed from the call chain. 5034fef41bfSAdrian Hunter */ 5044fef41bfSAdrian Hunter void thread_stack__sample_late(struct thread *thread, int cpu, 5054fef41bfSAdrian Hunter struct ip_callchain *chain, size_t sz, 5064fef41bfSAdrian Hunter u64 sample_ip, u64 kernel_start) 5074fef41bfSAdrian Hunter { 5084fef41bfSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 5094fef41bfSAdrian Hunter u64 sample_context = callchain_context(sample_ip, kernel_start); 5104fef41bfSAdrian Hunter u64 last_context, context, ip; 5114fef41bfSAdrian Hunter size_t nr = 0, j; 5124fef41bfSAdrian Hunter 5134fef41bfSAdrian Hunter if (sz < 2) { 5144fef41bfSAdrian Hunter chain->nr = 0; 5154fef41bfSAdrian Hunter return; 5164fef41bfSAdrian Hunter } 5174fef41bfSAdrian Hunter 5184fef41bfSAdrian Hunter if (!ts) 5194fef41bfSAdrian Hunter goto out; 5204fef41bfSAdrian Hunter 5214fef41bfSAdrian Hunter /* 5224fef41bfSAdrian Hunter * When tracing kernel space, kernel addresses occur at the top of the 5234fef41bfSAdrian Hunter * call chain after the event occurred but before tracing stopped. 5244fef41bfSAdrian Hunter * Skip them. 5254fef41bfSAdrian Hunter */ 5264fef41bfSAdrian Hunter for (j = 1; j <= ts->cnt; j++) { 5274fef41bfSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 5284fef41bfSAdrian Hunter context = callchain_context(ip, kernel_start); 5294fef41bfSAdrian Hunter if (context == PERF_CONTEXT_USER || 5304fef41bfSAdrian Hunter (context == sample_context && ip == sample_ip)) 5314fef41bfSAdrian Hunter break; 5324fef41bfSAdrian Hunter } 5334fef41bfSAdrian Hunter 5344fef41bfSAdrian Hunter last_context = sample_ip; /* Use sample_ip as an invalid context */ 5354fef41bfSAdrian Hunter 5364fef41bfSAdrian Hunter for (; nr < sz && j <= ts->cnt; nr++, j++) { 5374fef41bfSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 5384fef41bfSAdrian Hunter context = callchain_context(ip, kernel_start); 5394fef41bfSAdrian Hunter if (context != last_context) { 5404fef41bfSAdrian Hunter if (nr >= sz - 1) 5414fef41bfSAdrian Hunter break; 5424fef41bfSAdrian Hunter chain->ips[nr++] = context; 5434fef41bfSAdrian Hunter last_context = context; 5444fef41bfSAdrian Hunter } 5454fef41bfSAdrian Hunter chain->ips[nr] = ip; 5464fef41bfSAdrian Hunter } 5474fef41bfSAdrian Hunter out: 5484fef41bfSAdrian Hunter if (nr) { 5494fef41bfSAdrian Hunter chain->nr = nr; 5504fef41bfSAdrian Hunter } else { 5514fef41bfSAdrian Hunter chain->ips[0] = sample_context; 5524fef41bfSAdrian Hunter chain->ips[1] = sample_ip; 5534fef41bfSAdrian Hunter chain->nr = 2; 5544fef41bfSAdrian Hunter } 5554fef41bfSAdrian Hunter } 5564fef41bfSAdrian Hunter 55792a9e4f7SAdrian Hunter struct call_return_processor * 558f435887eSAdrian Hunter call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), 55992a9e4f7SAdrian Hunter void *data) 56092a9e4f7SAdrian Hunter { 56192a9e4f7SAdrian Hunter struct call_return_processor *crp; 56292a9e4f7SAdrian Hunter 56392a9e4f7SAdrian Hunter crp = zalloc(sizeof(struct call_return_processor)); 56492a9e4f7SAdrian Hunter if (!crp) 56592a9e4f7SAdrian Hunter return NULL; 56692a9e4f7SAdrian Hunter crp->cpr = call_path_root__new(); 56792a9e4f7SAdrian Hunter if (!crp->cpr) 56892a9e4f7SAdrian Hunter goto out_free; 56992a9e4f7SAdrian Hunter crp->process = process; 57092a9e4f7SAdrian Hunter crp->data = data; 57192a9e4f7SAdrian Hunter return crp; 57292a9e4f7SAdrian Hunter 57392a9e4f7SAdrian Hunter out_free: 57492a9e4f7SAdrian Hunter free(crp); 57592a9e4f7SAdrian Hunter return NULL; 57692a9e4f7SAdrian Hunter } 57792a9e4f7SAdrian Hunter 57892a9e4f7SAdrian Hunter void call_return_processor__free(struct call_return_processor *crp) 57992a9e4f7SAdrian Hunter { 58092a9e4f7SAdrian Hunter if (crp) { 58192a9e4f7SAdrian Hunter call_path_root__free(crp->cpr); 58292a9e4f7SAdrian Hunter free(crp); 58392a9e4f7SAdrian Hunter } 58492a9e4f7SAdrian Hunter } 58592a9e4f7SAdrian Hunter 58692a9e4f7SAdrian Hunter static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 58792a9e4f7SAdrian Hunter u64 timestamp, u64 ref, struct call_path *cp, 5882dcde4e1SAdrian Hunter bool no_call, bool trace_end) 58992a9e4f7SAdrian Hunter { 59092a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 59192a9e4f7SAdrian Hunter int err; 59292a9e4f7SAdrian Hunter 593e7a3a055SAdrian Hunter if (!cp) 594e7a3a055SAdrian Hunter return -ENOMEM; 595e7a3a055SAdrian Hunter 59692a9e4f7SAdrian Hunter if (ts->cnt == ts->sz) { 59792a9e4f7SAdrian Hunter err = thread_stack__grow(ts); 59892a9e4f7SAdrian Hunter if (err) 59992a9e4f7SAdrian Hunter return err; 60092a9e4f7SAdrian Hunter } 60192a9e4f7SAdrian Hunter 60292a9e4f7SAdrian Hunter tse = &ts->stack[ts->cnt++]; 60392a9e4f7SAdrian Hunter tse->ret_addr = ret_addr; 60492a9e4f7SAdrian Hunter tse->timestamp = timestamp; 60592a9e4f7SAdrian Hunter tse->ref = ref; 60692a9e4f7SAdrian Hunter tse->branch_count = ts->branch_count; 607003ccdc7SAdrian Hunter tse->insn_count = ts->insn_count; 608003ccdc7SAdrian Hunter tse->cyc_count = ts->cyc_count; 60992a9e4f7SAdrian Hunter tse->cp = cp; 61092a9e4f7SAdrian Hunter tse->no_call = no_call; 6112dcde4e1SAdrian Hunter tse->trace_end = trace_end; 612f08046cbSAdrian Hunter tse->non_call = false; 613f435887eSAdrian Hunter tse->db_id = 0; 61492a9e4f7SAdrian Hunter 61592a9e4f7SAdrian Hunter return 0; 61692a9e4f7SAdrian Hunter } 61792a9e4f7SAdrian Hunter 61892a9e4f7SAdrian Hunter static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 61992a9e4f7SAdrian Hunter u64 ret_addr, u64 timestamp, u64 ref, 62092a9e4f7SAdrian Hunter struct symbol *sym) 62192a9e4f7SAdrian Hunter { 62292a9e4f7SAdrian Hunter int err; 62392a9e4f7SAdrian Hunter 62492a9e4f7SAdrian Hunter if (!ts->cnt) 62592a9e4f7SAdrian Hunter return 1; 62692a9e4f7SAdrian Hunter 62792a9e4f7SAdrian Hunter if (ts->cnt == 1) { 62892a9e4f7SAdrian Hunter struct thread_stack_entry *tse = &ts->stack[0]; 62992a9e4f7SAdrian Hunter 63092a9e4f7SAdrian Hunter if (tse->cp->sym == sym) 63192a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 63292a9e4f7SAdrian Hunter timestamp, ref, false); 63392a9e4f7SAdrian Hunter } 63492a9e4f7SAdrian Hunter 635f08046cbSAdrian Hunter if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && 636f08046cbSAdrian Hunter !ts->stack[ts->cnt - 1].non_call) { 63792a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 63892a9e4f7SAdrian Hunter timestamp, ref, false); 63992a9e4f7SAdrian Hunter } else { 64092a9e4f7SAdrian Hunter size_t i = ts->cnt - 1; 64192a9e4f7SAdrian Hunter 64292a9e4f7SAdrian Hunter while (i--) { 643f08046cbSAdrian Hunter if (ts->stack[i].ret_addr != ret_addr || 644f08046cbSAdrian Hunter ts->stack[i].non_call) 64592a9e4f7SAdrian Hunter continue; 64692a9e4f7SAdrian Hunter i += 1; 64792a9e4f7SAdrian Hunter while (ts->cnt > i) { 64892a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, 64992a9e4f7SAdrian Hunter --ts->cnt, 65092a9e4f7SAdrian Hunter timestamp, ref, 65192a9e4f7SAdrian Hunter true); 65292a9e4f7SAdrian Hunter if (err) 65392a9e4f7SAdrian Hunter return err; 65492a9e4f7SAdrian Hunter } 65592a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 65692a9e4f7SAdrian Hunter timestamp, ref, false); 65792a9e4f7SAdrian Hunter } 65892a9e4f7SAdrian Hunter } 65992a9e4f7SAdrian Hunter 66092a9e4f7SAdrian Hunter return 1; 66192a9e4f7SAdrian Hunter } 66292a9e4f7SAdrian Hunter 663e0b89511SAdrian Hunter static int thread_stack__bottom(struct thread_stack *ts, 66492a9e4f7SAdrian Hunter struct perf_sample *sample, 66592a9e4f7SAdrian Hunter struct addr_location *from_al, 66692a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref) 66792a9e4f7SAdrian Hunter { 66892a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 66992a9e4f7SAdrian Hunter struct call_path *cp; 67092a9e4f7SAdrian Hunter struct symbol *sym; 67192a9e4f7SAdrian Hunter u64 ip; 67292a9e4f7SAdrian Hunter 67392a9e4f7SAdrian Hunter if (sample->ip) { 67492a9e4f7SAdrian Hunter ip = sample->ip; 67592a9e4f7SAdrian Hunter sym = from_al->sym; 67692a9e4f7SAdrian Hunter } else if (sample->addr) { 67792a9e4f7SAdrian Hunter ip = sample->addr; 67892a9e4f7SAdrian Hunter sym = to_al->sym; 67992a9e4f7SAdrian Hunter } else { 68092a9e4f7SAdrian Hunter return 0; 68192a9e4f7SAdrian Hunter } 68292a9e4f7SAdrian Hunter 68392a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 68492a9e4f7SAdrian Hunter ts->kernel_start); 68592a9e4f7SAdrian Hunter 686e0b89511SAdrian Hunter return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 6872dcde4e1SAdrian Hunter true, false); 68892a9e4f7SAdrian Hunter } 68992a9e4f7SAdrian Hunter 69097860b48SAdrian Hunter static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts, 69197860b48SAdrian Hunter struct perf_sample *sample, u64 ref) 69297860b48SAdrian Hunter { 69397860b48SAdrian Hunter u64 tm = sample->time; 69497860b48SAdrian Hunter int err; 69597860b48SAdrian Hunter 69697860b48SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 69797860b48SAdrian Hunter while (thread_stack__in_kernel(ts)) { 69897860b48SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 69997860b48SAdrian Hunter tm, ref, true); 70097860b48SAdrian Hunter if (err) 70197860b48SAdrian Hunter return err; 70297860b48SAdrian Hunter } 70397860b48SAdrian Hunter 70497860b48SAdrian Hunter return 0; 70597860b48SAdrian Hunter } 70697860b48SAdrian Hunter 70792a9e4f7SAdrian Hunter static int thread_stack__no_call_return(struct thread *thread, 70892a9e4f7SAdrian Hunter struct thread_stack *ts, 70992a9e4f7SAdrian Hunter struct perf_sample *sample, 71092a9e4f7SAdrian Hunter struct addr_location *from_al, 71192a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref) 71292a9e4f7SAdrian Hunter { 71392a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 71490c2cda7SAdrian Hunter struct call_path *root = &cpr->call_path; 71590c2cda7SAdrian Hunter struct symbol *fsym = from_al->sym; 71690c2cda7SAdrian Hunter struct symbol *tsym = to_al->sym; 71792a9e4f7SAdrian Hunter struct call_path *cp, *parent; 71892a9e4f7SAdrian Hunter u64 ks = ts->kernel_start; 71990c2cda7SAdrian Hunter u64 addr = sample->addr; 72090c2cda7SAdrian Hunter u64 tm = sample->time; 72190c2cda7SAdrian Hunter u64 ip = sample->ip; 72292a9e4f7SAdrian Hunter int err; 72392a9e4f7SAdrian Hunter 72490c2cda7SAdrian Hunter if (ip >= ks && addr < ks) { 72592a9e4f7SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 726eb5d8544SAdrian Hunter err = thread_stack__pop_ks(thread, ts, sample, ref); 72792a9e4f7SAdrian Hunter if (err) 72892a9e4f7SAdrian Hunter return err; 72992a9e4f7SAdrian Hunter 73092a9e4f7SAdrian Hunter /* If the stack is empty, push the userspace address */ 73192a9e4f7SAdrian Hunter if (!ts->cnt) { 73290c2cda7SAdrian Hunter cp = call_path__findnew(cpr, root, tsym, addr, ks); 73390c2cda7SAdrian Hunter return thread_stack__push_cp(ts, 0, tm, ref, cp, true, 73490c2cda7SAdrian Hunter false); 73592a9e4f7SAdrian Hunter } 73690c2cda7SAdrian Hunter } else if (thread_stack__in_kernel(ts) && ip < ks) { 73792a9e4f7SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 738eb5d8544SAdrian Hunter err = thread_stack__pop_ks(thread, ts, sample, ref); 73992a9e4f7SAdrian Hunter if (err) 74092a9e4f7SAdrian Hunter return err; 74192a9e4f7SAdrian Hunter } 74292a9e4f7SAdrian Hunter 74392a9e4f7SAdrian Hunter if (ts->cnt) 74492a9e4f7SAdrian Hunter parent = ts->stack[ts->cnt - 1].cp; 74592a9e4f7SAdrian Hunter else 74690c2cda7SAdrian Hunter parent = root; 74792a9e4f7SAdrian Hunter 7481f35cd65SAdrian Hunter if (parent->sym == from_al->sym) { 7491f35cd65SAdrian Hunter /* 7501f35cd65SAdrian Hunter * At the bottom of the stack, assume the missing 'call' was 7511f35cd65SAdrian Hunter * before the trace started. So, pop the current symbol and push 7521f35cd65SAdrian Hunter * the 'to' symbol. 7531f35cd65SAdrian Hunter */ 7541f35cd65SAdrian Hunter if (ts->cnt == 1) { 7551f35cd65SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 7561f35cd65SAdrian Hunter tm, ref, false); 7571f35cd65SAdrian Hunter if (err) 7581f35cd65SAdrian Hunter return err; 7591f35cd65SAdrian Hunter } 7601f35cd65SAdrian Hunter 7611f35cd65SAdrian Hunter if (!ts->cnt) { 7621f35cd65SAdrian Hunter cp = call_path__findnew(cpr, root, tsym, addr, ks); 7631f35cd65SAdrian Hunter 7641f35cd65SAdrian Hunter return thread_stack__push_cp(ts, addr, tm, ref, cp, 7651f35cd65SAdrian Hunter true, false); 7661f35cd65SAdrian Hunter } 7671f35cd65SAdrian Hunter 7681f35cd65SAdrian Hunter /* 7691f35cd65SAdrian Hunter * Otherwise assume the 'return' is being used as a jump (e.g. 7701f35cd65SAdrian Hunter * retpoline) and just push the 'to' symbol. 7711f35cd65SAdrian Hunter */ 7721f35cd65SAdrian Hunter cp = call_path__findnew(cpr, parent, tsym, addr, ks); 7731f35cd65SAdrian Hunter 7741f35cd65SAdrian Hunter err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); 7751f35cd65SAdrian Hunter if (!err) 7761f35cd65SAdrian Hunter ts->stack[ts->cnt - 1].non_call = true; 7771f35cd65SAdrian Hunter 7781f35cd65SAdrian Hunter return err; 7791f35cd65SAdrian Hunter } 7801f35cd65SAdrian Hunter 7811f35cd65SAdrian Hunter /* 7821f35cd65SAdrian Hunter * Assume 'parent' has not yet returned, so push 'to', and then push and 7831f35cd65SAdrian Hunter * pop 'from'. 7841f35cd65SAdrian Hunter */ 7851f35cd65SAdrian Hunter 7861f35cd65SAdrian Hunter cp = call_path__findnew(cpr, parent, tsym, addr, ks); 78792a9e4f7SAdrian Hunter 78890c2cda7SAdrian Hunter err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); 78992a9e4f7SAdrian Hunter if (err) 79092a9e4f7SAdrian Hunter return err; 79192a9e4f7SAdrian Hunter 7921f35cd65SAdrian Hunter cp = call_path__findnew(cpr, cp, fsym, ip, ks); 7931f35cd65SAdrian Hunter 7941f35cd65SAdrian Hunter err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); 7951f35cd65SAdrian Hunter if (err) 7961f35cd65SAdrian Hunter return err; 7971f35cd65SAdrian Hunter 7981f35cd65SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); 79992a9e4f7SAdrian Hunter } 80092a9e4f7SAdrian Hunter 80192a9e4f7SAdrian Hunter static int thread_stack__trace_begin(struct thread *thread, 80292a9e4f7SAdrian Hunter struct thread_stack *ts, u64 timestamp, 80392a9e4f7SAdrian Hunter u64 ref) 80492a9e4f7SAdrian Hunter { 80592a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 80692a9e4f7SAdrian Hunter int err; 80792a9e4f7SAdrian Hunter 80892a9e4f7SAdrian Hunter if (!ts->cnt) 80992a9e4f7SAdrian Hunter return 0; 81092a9e4f7SAdrian Hunter 81192a9e4f7SAdrian Hunter /* Pop trace end */ 81292a9e4f7SAdrian Hunter tse = &ts->stack[ts->cnt - 1]; 8132dcde4e1SAdrian Hunter if (tse->trace_end) { 81492a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 81592a9e4f7SAdrian Hunter timestamp, ref, false); 81692a9e4f7SAdrian Hunter if (err) 81792a9e4f7SAdrian Hunter return err; 81892a9e4f7SAdrian Hunter } 81992a9e4f7SAdrian Hunter 82092a9e4f7SAdrian Hunter return 0; 82192a9e4f7SAdrian Hunter } 82292a9e4f7SAdrian Hunter 82392a9e4f7SAdrian Hunter static int thread_stack__trace_end(struct thread_stack *ts, 82492a9e4f7SAdrian Hunter struct perf_sample *sample, u64 ref) 82592a9e4f7SAdrian Hunter { 82692a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 82792a9e4f7SAdrian Hunter struct call_path *cp; 82892a9e4f7SAdrian Hunter u64 ret_addr; 82992a9e4f7SAdrian Hunter 83092a9e4f7SAdrian Hunter /* No point having 'trace end' on the bottom of the stack */ 83192a9e4f7SAdrian Hunter if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 83292a9e4f7SAdrian Hunter return 0; 83392a9e4f7SAdrian Hunter 83492a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 83592a9e4f7SAdrian Hunter ts->kernel_start); 83692a9e4f7SAdrian Hunter 83792a9e4f7SAdrian Hunter ret_addr = sample->ip + sample->insn_len; 83892a9e4f7SAdrian Hunter 83992a9e4f7SAdrian Hunter return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 8402dcde4e1SAdrian Hunter false, true); 84192a9e4f7SAdrian Hunter } 84292a9e4f7SAdrian Hunter 8433c0cd952SAdrian Hunter static bool is_x86_retpoline(const char *name) 8443c0cd952SAdrian Hunter { 8453c0cd952SAdrian Hunter const char *p = strstr(name, "__x86_indirect_thunk_"); 8463c0cd952SAdrian Hunter 8473c0cd952SAdrian Hunter return p == name || !strcmp(name, "__indirect_thunk_start"); 8483c0cd952SAdrian Hunter } 8493c0cd952SAdrian Hunter 8503c0cd952SAdrian Hunter /* 8513c0cd952SAdrian Hunter * x86 retpoline functions pollute the call graph. This function removes them. 8523c0cd952SAdrian Hunter * This does not handle function return thunks, nor is there any improvement 8533c0cd952SAdrian Hunter * for the handling of inline thunks or extern thunks. 8543c0cd952SAdrian Hunter */ 8553c0cd952SAdrian Hunter static int thread_stack__x86_retpoline(struct thread_stack *ts, 8563c0cd952SAdrian Hunter struct perf_sample *sample, 8573c0cd952SAdrian Hunter struct addr_location *to_al) 8583c0cd952SAdrian Hunter { 8593c0cd952SAdrian Hunter struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; 8603c0cd952SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 8613c0cd952SAdrian Hunter struct symbol *sym = tse->cp->sym; 8623c0cd952SAdrian Hunter struct symbol *tsym = to_al->sym; 8633c0cd952SAdrian Hunter struct call_path *cp; 8643c0cd952SAdrian Hunter 8653c0cd952SAdrian Hunter if (sym && is_x86_retpoline(sym->name)) { 8663c0cd952SAdrian Hunter /* 8673c0cd952SAdrian Hunter * This is a x86 retpoline fn. It pollutes the call graph by 8683c0cd952SAdrian Hunter * showing up everywhere there is an indirect branch, but does 8693c0cd952SAdrian Hunter * not itself mean anything. Here the top-of-stack is removed, 8703c0cd952SAdrian Hunter * by decrementing the stack count, and then further down, the 8713c0cd952SAdrian Hunter * resulting top-of-stack is replaced with the actual target. 8723c0cd952SAdrian Hunter * The result is that the retpoline functions will no longer 8733c0cd952SAdrian Hunter * appear in the call graph. Note this only affects the call 8743c0cd952SAdrian Hunter * graph, since all the original branches are left unchanged. 8753c0cd952SAdrian Hunter */ 8763c0cd952SAdrian Hunter ts->cnt -= 1; 8773c0cd952SAdrian Hunter sym = ts->stack[ts->cnt - 2].cp->sym; 8783c0cd952SAdrian Hunter if (sym && sym == tsym && to_al->addr != tsym->start) { 8793c0cd952SAdrian Hunter /* 8803c0cd952SAdrian Hunter * Target is back to the middle of the symbol we came 8813c0cd952SAdrian Hunter * from so assume it is an indirect jmp and forget it 8823c0cd952SAdrian Hunter * altogether. 8833c0cd952SAdrian Hunter */ 8843c0cd952SAdrian Hunter ts->cnt -= 1; 8853c0cd952SAdrian Hunter return 0; 8863c0cd952SAdrian Hunter } 8873c0cd952SAdrian Hunter } else if (sym && sym == tsym) { 8883c0cd952SAdrian Hunter /* 8893c0cd952SAdrian Hunter * Target is back to the symbol we came from so assume it is an 8903c0cd952SAdrian Hunter * indirect jmp and forget it altogether. 8913c0cd952SAdrian Hunter */ 8923c0cd952SAdrian Hunter ts->cnt -= 1; 8933c0cd952SAdrian Hunter return 0; 8943c0cd952SAdrian Hunter } 8953c0cd952SAdrian Hunter 8963c0cd952SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, 8973c0cd952SAdrian Hunter sample->addr, ts->kernel_start); 8983c0cd952SAdrian Hunter if (!cp) 8993c0cd952SAdrian Hunter return -ENOMEM; 9003c0cd952SAdrian Hunter 9013c0cd952SAdrian Hunter /* Replace the top-of-stack with the actual target */ 9023c0cd952SAdrian Hunter ts->stack[ts->cnt - 1].cp = cp; 9033c0cd952SAdrian Hunter 9043c0cd952SAdrian Hunter return 0; 9053c0cd952SAdrian Hunter } 9063c0cd952SAdrian Hunter 90792a9e4f7SAdrian Hunter int thread_stack__process(struct thread *thread, struct comm *comm, 90892a9e4f7SAdrian Hunter struct perf_sample *sample, 90992a9e4f7SAdrian Hunter struct addr_location *from_al, 91092a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref, 91192a9e4f7SAdrian Hunter struct call_return_processor *crp) 91292a9e4f7SAdrian Hunter { 913256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, sample->cpu); 9143c0cd952SAdrian Hunter enum retpoline_state_t rstate; 91592a9e4f7SAdrian Hunter int err = 0; 91692a9e4f7SAdrian Hunter 91703b32cb2SAdrian Hunter if (ts && !ts->crp) { 91892a9e4f7SAdrian Hunter /* Supersede thread_stack__event() */ 919f6060ac6SAdrian Hunter thread_stack__reset(thread, ts); 92003b32cb2SAdrian Hunter ts = NULL; 92192a9e4f7SAdrian Hunter } 92203b32cb2SAdrian Hunter 92303b32cb2SAdrian Hunter if (!ts) { 924256d92bcSAdrian Hunter ts = thread_stack__new(thread, sample->cpu, crp); 925bd8e68acSAdrian Hunter if (!ts) 92692a9e4f7SAdrian Hunter return -ENOMEM; 92792a9e4f7SAdrian Hunter ts->comm = comm; 92892a9e4f7SAdrian Hunter } 92992a9e4f7SAdrian Hunter 9303c0cd952SAdrian Hunter rstate = ts->rstate; 9313c0cd952SAdrian Hunter if (rstate == X86_RETPOLINE_DETECTED) 9323c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_POSSIBLE; 9333c0cd952SAdrian Hunter 93492a9e4f7SAdrian Hunter /* Flush stack on exec */ 93592a9e4f7SAdrian Hunter if (ts->comm != comm && thread->pid_ == thread->tid) { 936a5499b37SAdrian Hunter err = __thread_stack__flush(thread, ts); 93792a9e4f7SAdrian Hunter if (err) 93892a9e4f7SAdrian Hunter return err; 93992a9e4f7SAdrian Hunter ts->comm = comm; 94092a9e4f7SAdrian Hunter } 94192a9e4f7SAdrian Hunter 94292a9e4f7SAdrian Hunter /* If the stack is empty, put the current symbol on the stack */ 94392a9e4f7SAdrian Hunter if (!ts->cnt) { 944e0b89511SAdrian Hunter err = thread_stack__bottom(ts, sample, from_al, to_al, ref); 94592a9e4f7SAdrian Hunter if (err) 94692a9e4f7SAdrian Hunter return err; 94792a9e4f7SAdrian Hunter } 94892a9e4f7SAdrian Hunter 94992a9e4f7SAdrian Hunter ts->branch_count += 1; 950003ccdc7SAdrian Hunter ts->insn_count += sample->insn_cnt; 951003ccdc7SAdrian Hunter ts->cyc_count += sample->cyc_cnt; 95292a9e4f7SAdrian Hunter ts->last_time = sample->time; 95392a9e4f7SAdrian Hunter 95492a9e4f7SAdrian Hunter if (sample->flags & PERF_IP_FLAG_CALL) { 9552dcde4e1SAdrian Hunter bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; 95692a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 95792a9e4f7SAdrian Hunter struct call_path *cp; 95892a9e4f7SAdrian Hunter u64 ret_addr; 95992a9e4f7SAdrian Hunter 96092a9e4f7SAdrian Hunter if (!sample->ip || !sample->addr) 96192a9e4f7SAdrian Hunter return 0; 96292a9e4f7SAdrian Hunter 96392a9e4f7SAdrian Hunter ret_addr = sample->ip + sample->insn_len; 96492a9e4f7SAdrian Hunter if (ret_addr == sample->addr) 96592a9e4f7SAdrian Hunter return 0; /* Zero-length calls are excluded */ 96692a9e4f7SAdrian Hunter 96792a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 96892a9e4f7SAdrian Hunter to_al->sym, sample->addr, 96992a9e4f7SAdrian Hunter ts->kernel_start); 97092a9e4f7SAdrian Hunter err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 9712dcde4e1SAdrian Hunter cp, false, trace_end); 9723c0cd952SAdrian Hunter 9733c0cd952SAdrian Hunter /* 9743c0cd952SAdrian Hunter * A call to the same symbol but not the start of the symbol, 9753c0cd952SAdrian Hunter * may be the start of a x86 retpoline. 9763c0cd952SAdrian Hunter */ 9773c0cd952SAdrian Hunter if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && 9783c0cd952SAdrian Hunter from_al->sym == to_al->sym && 9793c0cd952SAdrian Hunter to_al->addr != to_al->sym->start) 9803c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_DETECTED; 9813c0cd952SAdrian Hunter 98292a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_RETURN) { 98397860b48SAdrian Hunter if (!sample->addr) { 98497860b48SAdrian Hunter u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET | 98597860b48SAdrian Hunter PERF_IP_FLAG_INTERRUPT; 98697860b48SAdrian Hunter 98797860b48SAdrian Hunter if (!(sample->flags & return_from_kernel)) 98897860b48SAdrian Hunter return 0; 98997860b48SAdrian Hunter 99097860b48SAdrian Hunter /* Pop kernel stack */ 99197860b48SAdrian Hunter return thread_stack__pop_ks(thread, ts, sample, ref); 99297860b48SAdrian Hunter } 99397860b48SAdrian Hunter 99497860b48SAdrian Hunter if (!sample->ip) 99592a9e4f7SAdrian Hunter return 0; 99692a9e4f7SAdrian Hunter 9973c0cd952SAdrian Hunter /* x86 retpoline 'return' doesn't match the stack */ 9983c0cd952SAdrian Hunter if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && 9993c0cd952SAdrian Hunter ts->stack[ts->cnt - 1].ret_addr != sample->addr) 10003c0cd952SAdrian Hunter return thread_stack__x86_retpoline(ts, sample, to_al); 10013c0cd952SAdrian Hunter 100292a9e4f7SAdrian Hunter err = thread_stack__pop_cp(thread, ts, sample->addr, 100392a9e4f7SAdrian Hunter sample->time, ref, from_al->sym); 100492a9e4f7SAdrian Hunter if (err) { 100592a9e4f7SAdrian Hunter if (err < 0) 100692a9e4f7SAdrian Hunter return err; 100792a9e4f7SAdrian Hunter err = thread_stack__no_call_return(thread, ts, sample, 100892a9e4f7SAdrian Hunter from_al, to_al, ref); 100992a9e4f7SAdrian Hunter } 101092a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 101192a9e4f7SAdrian Hunter err = thread_stack__trace_begin(thread, ts, sample->time, ref); 101292a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 101392a9e4f7SAdrian Hunter err = thread_stack__trace_end(ts, sample, ref); 1014f08046cbSAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_BRANCH && 1015f08046cbSAdrian Hunter from_al->sym != to_al->sym && to_al->sym && 1016f08046cbSAdrian Hunter to_al->addr == to_al->sym->start) { 1017f08046cbSAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 1018f08046cbSAdrian Hunter struct call_path *cp; 1019f08046cbSAdrian Hunter 1020f08046cbSAdrian Hunter /* 1021f08046cbSAdrian Hunter * The compiler might optimize a call/ret combination by making 1022f08046cbSAdrian Hunter * it a jmp. Make that visible by recording on the stack a 1023f08046cbSAdrian Hunter * branch to the start of a different symbol. Note, that means 1024f08046cbSAdrian Hunter * when a ret pops the stack, all jmps must be popped off first. 1025f08046cbSAdrian Hunter */ 1026f08046cbSAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 1027f08046cbSAdrian Hunter to_al->sym, sample->addr, 1028f08046cbSAdrian Hunter ts->kernel_start); 1029f08046cbSAdrian Hunter err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, 1030f08046cbSAdrian Hunter false); 1031f08046cbSAdrian Hunter if (!err) 1032f08046cbSAdrian Hunter ts->stack[ts->cnt - 1].non_call = true; 103392a9e4f7SAdrian Hunter } 103492a9e4f7SAdrian Hunter 103592a9e4f7SAdrian Hunter return err; 103692a9e4f7SAdrian Hunter } 1037e216708dSAdrian Hunter 1038256d92bcSAdrian Hunter size_t thread_stack__depth(struct thread *thread, int cpu) 1039e216708dSAdrian Hunter { 1040256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 1041bd8e68acSAdrian Hunter 1042bd8e68acSAdrian Hunter if (!ts) 1043e216708dSAdrian Hunter return 0; 1044bd8e68acSAdrian Hunter return ts->cnt; 1045e216708dSAdrian Hunter } 1046