100447ccdSAdrian Hunter /* 200447ccdSAdrian Hunter * thread-stack.c: Synthesize a thread's stack using call / return events 300447ccdSAdrian Hunter * Copyright (c) 2014, Intel Corporation. 400447ccdSAdrian Hunter * 500447ccdSAdrian Hunter * This program is free software; you can redistribute it and/or modify it 600447ccdSAdrian Hunter * under the terms and conditions of the GNU General Public License, 700447ccdSAdrian Hunter * version 2, as published by the Free Software Foundation. 800447ccdSAdrian Hunter * 900447ccdSAdrian Hunter * This program is distributed in the hope it will be useful, but WITHOUT 1000447ccdSAdrian Hunter * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1100447ccdSAdrian Hunter * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 1200447ccdSAdrian Hunter * more details. 1300447ccdSAdrian Hunter * 1400447ccdSAdrian Hunter */ 1500447ccdSAdrian Hunter 1692a9e4f7SAdrian Hunter #include <linux/rbtree.h> 1792a9e4f7SAdrian Hunter #include <linux/list.h> 18256d92bcSAdrian Hunter #include <linux/log2.h> 19a43783aeSArnaldo Carvalho de Melo #include <errno.h> 2000447ccdSAdrian Hunter #include "thread.h" 2100447ccdSAdrian Hunter #include "event.h" 2292a9e4f7SAdrian Hunter #include "machine.h" 233c0cd952SAdrian Hunter #include "env.h" 2400447ccdSAdrian Hunter #include "util.h" 2500447ccdSAdrian Hunter #include "debug.h" 2692a9e4f7SAdrian Hunter #include "symbol.h" 2792a9e4f7SAdrian Hunter #include "comm.h" 28451db126SChris Phlipot #include "call-path.h" 2900447ccdSAdrian Hunter #include "thread-stack.h" 3000447ccdSAdrian Hunter 3192a9e4f7SAdrian Hunter #define STACK_GROWTH 2048 3292a9e4f7SAdrian Hunter 333c0cd952SAdrian Hunter /* 343c0cd952SAdrian Hunter * State of retpoline detection. 353c0cd952SAdrian Hunter * 363c0cd952SAdrian Hunter * RETPOLINE_NONE: no retpoline detection 373c0cd952SAdrian Hunter * X86_RETPOLINE_POSSIBLE: x86 retpoline possible 383c0cd952SAdrian Hunter * X86_RETPOLINE_DETECTED: x86 retpoline detected 393c0cd952SAdrian Hunter */ 403c0cd952SAdrian Hunter enum retpoline_state_t { 413c0cd952SAdrian Hunter RETPOLINE_NONE, 423c0cd952SAdrian Hunter X86_RETPOLINE_POSSIBLE, 433c0cd952SAdrian Hunter X86_RETPOLINE_DETECTED, 443c0cd952SAdrian Hunter }; 453c0cd952SAdrian Hunter 4692a9e4f7SAdrian Hunter /** 4792a9e4f7SAdrian Hunter * struct thread_stack_entry - thread stack entry. 4892a9e4f7SAdrian Hunter * @ret_addr: return address 4992a9e4f7SAdrian Hunter * @timestamp: timestamp (if known) 5092a9e4f7SAdrian Hunter * @ref: external reference (e.g. db_id of sample) 5192a9e4f7SAdrian Hunter * @branch_count: the branch count when the entry was created 52003ccdc7SAdrian Hunter * @insn_count: the instruction count when the entry was created 53003ccdc7SAdrian Hunter * @cyc_count the cycle count when the entry was created 54f435887eSAdrian Hunter * @db_id: id used for db-export 5592a9e4f7SAdrian Hunter * @cp: call path 5692a9e4f7SAdrian Hunter * @no_call: a 'call' was not seen 574d60e5e3SAdrian Hunter * @trace_end: a 'call' but trace ended 58f08046cbSAdrian Hunter * @non_call: a branch but not a 'call' to the start of a different symbol 5992a9e4f7SAdrian Hunter */ 6092a9e4f7SAdrian Hunter struct thread_stack_entry { 6192a9e4f7SAdrian Hunter u64 ret_addr; 6292a9e4f7SAdrian Hunter u64 timestamp; 6392a9e4f7SAdrian Hunter u64 ref; 6492a9e4f7SAdrian Hunter u64 branch_count; 65003ccdc7SAdrian Hunter u64 insn_count; 66003ccdc7SAdrian Hunter u64 cyc_count; 67f435887eSAdrian Hunter u64 db_id; 6892a9e4f7SAdrian Hunter struct call_path *cp; 6992a9e4f7SAdrian Hunter bool no_call; 704d60e5e3SAdrian Hunter bool trace_end; 71f08046cbSAdrian Hunter bool non_call; 7292a9e4f7SAdrian Hunter }; 7392a9e4f7SAdrian Hunter 7492a9e4f7SAdrian Hunter /** 7592a9e4f7SAdrian Hunter * struct thread_stack - thread stack constructed from 'call' and 'return' 7692a9e4f7SAdrian Hunter * branch samples. 7792a9e4f7SAdrian Hunter * @stack: array that holds the stack 7892a9e4f7SAdrian Hunter * @cnt: number of entries in the stack 7992a9e4f7SAdrian Hunter * @sz: current maximum stack size 8092a9e4f7SAdrian Hunter * @trace_nr: current trace number 8192a9e4f7SAdrian Hunter * @branch_count: running branch count 82003ccdc7SAdrian Hunter * @insn_count: running instruction count 83003ccdc7SAdrian Hunter * @cyc_count running cycle count 8492a9e4f7SAdrian Hunter * @kernel_start: kernel start address 8592a9e4f7SAdrian Hunter * @last_time: last timestamp 8692a9e4f7SAdrian Hunter * @crp: call/return processor 8792a9e4f7SAdrian Hunter * @comm: current comm 88f6060ac6SAdrian Hunter * @arr_sz: size of array if this is the first element of an array 893c0cd952SAdrian Hunter * @rstate: used to detect retpolines 9092a9e4f7SAdrian Hunter */ 9100447ccdSAdrian Hunter struct thread_stack { 9200447ccdSAdrian Hunter struct thread_stack_entry *stack; 9300447ccdSAdrian Hunter size_t cnt; 9400447ccdSAdrian Hunter size_t sz; 9500447ccdSAdrian Hunter u64 trace_nr; 9692a9e4f7SAdrian Hunter u64 branch_count; 97003ccdc7SAdrian Hunter u64 insn_count; 98003ccdc7SAdrian Hunter u64 cyc_count; 9992a9e4f7SAdrian Hunter u64 kernel_start; 10092a9e4f7SAdrian Hunter u64 last_time; 10192a9e4f7SAdrian Hunter struct call_return_processor *crp; 10292a9e4f7SAdrian Hunter struct comm *comm; 103f6060ac6SAdrian Hunter unsigned int arr_sz; 1043c0cd952SAdrian Hunter enum retpoline_state_t rstate; 10500447ccdSAdrian Hunter }; 10600447ccdSAdrian Hunter 107256d92bcSAdrian Hunter /* 108256d92bcSAdrian Hunter * Assume pid == tid == 0 identifies the idle task as defined by 109256d92bcSAdrian Hunter * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, 110256d92bcSAdrian Hunter * and therefore requires a stack for each cpu. 111256d92bcSAdrian Hunter */ 112256d92bcSAdrian Hunter static inline bool thread_stack__per_cpu(struct thread *thread) 113256d92bcSAdrian Hunter { 114256d92bcSAdrian Hunter return !(thread->tid || thread->pid_); 115256d92bcSAdrian Hunter } 116256d92bcSAdrian Hunter 11700447ccdSAdrian Hunter static int thread_stack__grow(struct thread_stack *ts) 11800447ccdSAdrian Hunter { 11900447ccdSAdrian Hunter struct thread_stack_entry *new_stack; 12000447ccdSAdrian Hunter size_t sz, new_sz; 12100447ccdSAdrian Hunter 12200447ccdSAdrian Hunter new_sz = ts->sz + STACK_GROWTH; 12300447ccdSAdrian Hunter sz = new_sz * sizeof(struct thread_stack_entry); 12400447ccdSAdrian Hunter 12500447ccdSAdrian Hunter new_stack = realloc(ts->stack, sz); 12600447ccdSAdrian Hunter if (!new_stack) 12700447ccdSAdrian Hunter return -ENOMEM; 12800447ccdSAdrian Hunter 12900447ccdSAdrian Hunter ts->stack = new_stack; 13000447ccdSAdrian Hunter ts->sz = new_sz; 13100447ccdSAdrian Hunter 13200447ccdSAdrian Hunter return 0; 13300447ccdSAdrian Hunter } 13400447ccdSAdrian Hunter 1352e9e8688SAdrian Hunter static int thread_stack__init(struct thread_stack *ts, struct thread *thread, 1362e9e8688SAdrian Hunter struct call_return_processor *crp) 1372e9e8688SAdrian Hunter { 1382e9e8688SAdrian Hunter int err; 1392e9e8688SAdrian Hunter 1402e9e8688SAdrian Hunter err = thread_stack__grow(ts); 1412e9e8688SAdrian Hunter if (err) 1422e9e8688SAdrian Hunter return err; 1432e9e8688SAdrian Hunter 1443c0cd952SAdrian Hunter if (thread->mg && thread->mg->machine) { 1453c0cd952SAdrian Hunter struct machine *machine = thread->mg->machine; 1463c0cd952SAdrian Hunter const char *arch = perf_env__arch(machine->env); 1473c0cd952SAdrian Hunter 1483c0cd952SAdrian Hunter ts->kernel_start = machine__kernel_start(machine); 1493c0cd952SAdrian Hunter if (!strcmp(arch, "x86")) 1503c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_POSSIBLE; 1513c0cd952SAdrian Hunter } else { 1522e9e8688SAdrian Hunter ts->kernel_start = 1ULL << 63; 1533c0cd952SAdrian Hunter } 1542e9e8688SAdrian Hunter ts->crp = crp; 1552e9e8688SAdrian Hunter 1562e9e8688SAdrian Hunter return 0; 1572e9e8688SAdrian Hunter } 1582e9e8688SAdrian Hunter 159256d92bcSAdrian Hunter static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, 16092a9e4f7SAdrian Hunter struct call_return_processor *crp) 16100447ccdSAdrian Hunter { 162139f42f3SAdrian Hunter struct thread_stack *ts = thread->ts, *new_ts; 163139f42f3SAdrian Hunter unsigned int old_sz = ts ? ts->arr_sz : 0; 164139f42f3SAdrian Hunter unsigned int new_sz = 1; 16500447ccdSAdrian Hunter 166256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread) && cpu > 0) 167256d92bcSAdrian Hunter new_sz = roundup_pow_of_two(cpu + 1); 168256d92bcSAdrian Hunter 169139f42f3SAdrian Hunter if (!ts || new_sz > old_sz) { 170139f42f3SAdrian Hunter new_ts = calloc(new_sz, sizeof(*ts)); 171139f42f3SAdrian Hunter if (!new_ts) 17200447ccdSAdrian Hunter return NULL; 173139f42f3SAdrian Hunter if (ts) 174139f42f3SAdrian Hunter memcpy(new_ts, ts, old_sz * sizeof(*ts)); 175139f42f3SAdrian Hunter new_ts->arr_sz = new_sz; 176139f42f3SAdrian Hunter zfree(&thread->ts); 177139f42f3SAdrian Hunter thread->ts = new_ts; 178139f42f3SAdrian Hunter ts = new_ts; 17900447ccdSAdrian Hunter } 18000447ccdSAdrian Hunter 181256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread) && cpu > 0 && 182256d92bcSAdrian Hunter (unsigned int)cpu < ts->arr_sz) 183256d92bcSAdrian Hunter ts += cpu; 184256d92bcSAdrian Hunter 185139f42f3SAdrian Hunter if (!ts->stack && 186139f42f3SAdrian Hunter thread_stack__init(ts, thread, crp)) 187139f42f3SAdrian Hunter return NULL; 188bd8e68acSAdrian Hunter 18900447ccdSAdrian Hunter return ts; 19000447ccdSAdrian Hunter } 19100447ccdSAdrian Hunter 192256d92bcSAdrian Hunter static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) 193bd8e68acSAdrian Hunter { 194256d92bcSAdrian Hunter struct thread_stack *ts = thread->ts; 195256d92bcSAdrian Hunter 196256d92bcSAdrian Hunter if (cpu < 0) 197256d92bcSAdrian Hunter cpu = 0; 198256d92bcSAdrian Hunter 199256d92bcSAdrian Hunter if (!ts || (unsigned int)cpu >= ts->arr_sz) 200256d92bcSAdrian Hunter return NULL; 201256d92bcSAdrian Hunter 202256d92bcSAdrian Hunter ts += cpu; 203256d92bcSAdrian Hunter 204256d92bcSAdrian Hunter if (!ts->stack) 205256d92bcSAdrian Hunter return NULL; 206256d92bcSAdrian Hunter 207256d92bcSAdrian Hunter return ts; 208256d92bcSAdrian Hunter } 209256d92bcSAdrian Hunter 210256d92bcSAdrian Hunter static inline struct thread_stack *thread__stack(struct thread *thread, 211256d92bcSAdrian Hunter int cpu) 212256d92bcSAdrian Hunter { 213256d92bcSAdrian Hunter if (!thread) 214256d92bcSAdrian Hunter return NULL; 215256d92bcSAdrian Hunter 216256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread)) 217256d92bcSAdrian Hunter return thread__cpu_stack(thread, cpu); 218256d92bcSAdrian Hunter 219256d92bcSAdrian Hunter return thread->ts; 220bd8e68acSAdrian Hunter } 221bd8e68acSAdrian Hunter 2224d60e5e3SAdrian Hunter static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 2234d60e5e3SAdrian Hunter bool trace_end) 22400447ccdSAdrian Hunter { 22500447ccdSAdrian Hunter int err = 0; 22600447ccdSAdrian Hunter 22700447ccdSAdrian Hunter if (ts->cnt == ts->sz) { 22800447ccdSAdrian Hunter err = thread_stack__grow(ts); 22900447ccdSAdrian Hunter if (err) { 23000447ccdSAdrian Hunter pr_warning("Out of memory: discarding thread stack\n"); 23100447ccdSAdrian Hunter ts->cnt = 0; 23200447ccdSAdrian Hunter } 23300447ccdSAdrian Hunter } 23400447ccdSAdrian Hunter 2354d60e5e3SAdrian Hunter ts->stack[ts->cnt].trace_end = trace_end; 23600447ccdSAdrian Hunter ts->stack[ts->cnt++].ret_addr = ret_addr; 23700447ccdSAdrian Hunter 23800447ccdSAdrian Hunter return err; 23900447ccdSAdrian Hunter } 24000447ccdSAdrian Hunter 24100447ccdSAdrian Hunter static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 24200447ccdSAdrian Hunter { 24300447ccdSAdrian Hunter size_t i; 24400447ccdSAdrian Hunter 24500447ccdSAdrian Hunter /* 24600447ccdSAdrian Hunter * In some cases there may be functions which are not seen to return. 24700447ccdSAdrian Hunter * For example when setjmp / longjmp has been used. Or the perf context 24800447ccdSAdrian Hunter * switch in the kernel which doesn't stop and start tracing in exactly 24900447ccdSAdrian Hunter * the same code path. When that happens the return address will be 25000447ccdSAdrian Hunter * further down the stack. If the return address is not found at all, 25100447ccdSAdrian Hunter * we assume the opposite (i.e. this is a return for a call that wasn't 25200447ccdSAdrian Hunter * seen for some reason) and leave the stack alone. 25300447ccdSAdrian Hunter */ 25400447ccdSAdrian Hunter for (i = ts->cnt; i; ) { 25500447ccdSAdrian Hunter if (ts->stack[--i].ret_addr == ret_addr) { 25600447ccdSAdrian Hunter ts->cnt = i; 25700447ccdSAdrian Hunter return; 25800447ccdSAdrian Hunter } 25900447ccdSAdrian Hunter } 26000447ccdSAdrian Hunter } 26100447ccdSAdrian Hunter 2624d60e5e3SAdrian Hunter static void thread_stack__pop_trace_end(struct thread_stack *ts) 2634d60e5e3SAdrian Hunter { 2644d60e5e3SAdrian Hunter size_t i; 2654d60e5e3SAdrian Hunter 2664d60e5e3SAdrian Hunter for (i = ts->cnt; i; ) { 2674d60e5e3SAdrian Hunter if (ts->stack[--i].trace_end) 2684d60e5e3SAdrian Hunter ts->cnt = i; 2694d60e5e3SAdrian Hunter else 2704d60e5e3SAdrian Hunter return; 2714d60e5e3SAdrian Hunter } 2724d60e5e3SAdrian Hunter } 2734d60e5e3SAdrian Hunter 27492a9e4f7SAdrian Hunter static bool thread_stack__in_kernel(struct thread_stack *ts) 27592a9e4f7SAdrian Hunter { 27692a9e4f7SAdrian Hunter if (!ts->cnt) 27792a9e4f7SAdrian Hunter return false; 27892a9e4f7SAdrian Hunter 27992a9e4f7SAdrian Hunter return ts->stack[ts->cnt - 1].cp->in_kernel; 28092a9e4f7SAdrian Hunter } 28192a9e4f7SAdrian Hunter 28292a9e4f7SAdrian Hunter static int thread_stack__call_return(struct thread *thread, 28392a9e4f7SAdrian Hunter struct thread_stack *ts, size_t idx, 28492a9e4f7SAdrian Hunter u64 timestamp, u64 ref, bool no_return) 28592a9e4f7SAdrian Hunter { 28692a9e4f7SAdrian Hunter struct call_return_processor *crp = ts->crp; 28792a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 28892a9e4f7SAdrian Hunter struct call_return cr = { 28992a9e4f7SAdrian Hunter .thread = thread, 29092a9e4f7SAdrian Hunter .comm = ts->comm, 29192a9e4f7SAdrian Hunter .db_id = 0, 29292a9e4f7SAdrian Hunter }; 293f435887eSAdrian Hunter u64 *parent_db_id; 29492a9e4f7SAdrian Hunter 29592a9e4f7SAdrian Hunter tse = &ts->stack[idx]; 29692a9e4f7SAdrian Hunter cr.cp = tse->cp; 29792a9e4f7SAdrian Hunter cr.call_time = tse->timestamp; 29892a9e4f7SAdrian Hunter cr.return_time = timestamp; 29992a9e4f7SAdrian Hunter cr.branch_count = ts->branch_count - tse->branch_count; 300003ccdc7SAdrian Hunter cr.insn_count = ts->insn_count - tse->insn_count; 301003ccdc7SAdrian Hunter cr.cyc_count = ts->cyc_count - tse->cyc_count; 302f435887eSAdrian Hunter cr.db_id = tse->db_id; 30392a9e4f7SAdrian Hunter cr.call_ref = tse->ref; 30492a9e4f7SAdrian Hunter cr.return_ref = ref; 30592a9e4f7SAdrian Hunter if (tse->no_call) 30692a9e4f7SAdrian Hunter cr.flags |= CALL_RETURN_NO_CALL; 30792a9e4f7SAdrian Hunter if (no_return) 30892a9e4f7SAdrian Hunter cr.flags |= CALL_RETURN_NO_RETURN; 309f08046cbSAdrian Hunter if (tse->non_call) 310f08046cbSAdrian Hunter cr.flags |= CALL_RETURN_NON_CALL; 31192a9e4f7SAdrian Hunter 312f435887eSAdrian Hunter /* 313f435887eSAdrian Hunter * The parent db_id must be assigned before exporting the child. Note 314f435887eSAdrian Hunter * it is not possible to export the parent first because its information 315f435887eSAdrian Hunter * is not yet complete because its 'return' has not yet been processed. 316f435887eSAdrian Hunter */ 317f435887eSAdrian Hunter parent_db_id = idx ? &(tse - 1)->db_id : NULL; 318f435887eSAdrian Hunter 319f435887eSAdrian Hunter return crp->process(&cr, parent_db_id, crp->data); 32092a9e4f7SAdrian Hunter } 32192a9e4f7SAdrian Hunter 322a5499b37SAdrian Hunter static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 32392a9e4f7SAdrian Hunter { 32492a9e4f7SAdrian Hunter struct call_return_processor *crp = ts->crp; 32592a9e4f7SAdrian Hunter int err; 32692a9e4f7SAdrian Hunter 32792a9e4f7SAdrian Hunter if (!crp) { 32892a9e4f7SAdrian Hunter ts->cnt = 0; 32992a9e4f7SAdrian Hunter return 0; 33092a9e4f7SAdrian Hunter } 33192a9e4f7SAdrian Hunter 33292a9e4f7SAdrian Hunter while (ts->cnt) { 33392a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 33492a9e4f7SAdrian Hunter ts->last_time, 0, true); 33592a9e4f7SAdrian Hunter if (err) { 33692a9e4f7SAdrian Hunter pr_err("Error flushing thread stack!\n"); 33792a9e4f7SAdrian Hunter ts->cnt = 0; 33892a9e4f7SAdrian Hunter return err; 33992a9e4f7SAdrian Hunter } 34092a9e4f7SAdrian Hunter } 34192a9e4f7SAdrian Hunter 34292a9e4f7SAdrian Hunter return 0; 34392a9e4f7SAdrian Hunter } 34492a9e4f7SAdrian Hunter 345a5499b37SAdrian Hunter int thread_stack__flush(struct thread *thread) 346a5499b37SAdrian Hunter { 347bd8e68acSAdrian Hunter struct thread_stack *ts = thread->ts; 348f6060ac6SAdrian Hunter unsigned int pos; 349f6060ac6SAdrian Hunter int err = 0; 350bd8e68acSAdrian Hunter 351f6060ac6SAdrian Hunter if (ts) { 352f6060ac6SAdrian Hunter for (pos = 0; pos < ts->arr_sz; pos++) { 353f6060ac6SAdrian Hunter int ret = __thread_stack__flush(thread, ts + pos); 354a5499b37SAdrian Hunter 355f6060ac6SAdrian Hunter if (ret) 356f6060ac6SAdrian Hunter err = ret; 357f6060ac6SAdrian Hunter } 358f6060ac6SAdrian Hunter } 359f6060ac6SAdrian Hunter 360f6060ac6SAdrian Hunter return err; 361a5499b37SAdrian Hunter } 362a5499b37SAdrian Hunter 363256d92bcSAdrian Hunter int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, 36400447ccdSAdrian Hunter u64 to_ip, u16 insn_len, u64 trace_nr) 36500447ccdSAdrian Hunter { 366256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 367bd8e68acSAdrian Hunter 36800447ccdSAdrian Hunter if (!thread) 36900447ccdSAdrian Hunter return -EINVAL; 37000447ccdSAdrian Hunter 371bd8e68acSAdrian Hunter if (!ts) { 372256d92bcSAdrian Hunter ts = thread_stack__new(thread, cpu, NULL); 373bd8e68acSAdrian Hunter if (!ts) { 37400447ccdSAdrian Hunter pr_warning("Out of memory: no thread stack\n"); 37500447ccdSAdrian Hunter return -ENOMEM; 37600447ccdSAdrian Hunter } 377bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 37800447ccdSAdrian Hunter } 37900447ccdSAdrian Hunter 38000447ccdSAdrian Hunter /* 38100447ccdSAdrian Hunter * When the trace is discontinuous, the trace_nr changes. In that case 38200447ccdSAdrian Hunter * the stack might be completely invalid. Better to report nothing than 38392a9e4f7SAdrian Hunter * to report something misleading, so flush the stack. 38400447ccdSAdrian Hunter */ 385bd8e68acSAdrian Hunter if (trace_nr != ts->trace_nr) { 386bd8e68acSAdrian Hunter if (ts->trace_nr) 387bd8e68acSAdrian Hunter __thread_stack__flush(thread, ts); 388bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 38900447ccdSAdrian Hunter } 39000447ccdSAdrian Hunter 39192a9e4f7SAdrian Hunter /* Stop here if thread_stack__process() is in use */ 392bd8e68acSAdrian Hunter if (ts->crp) 39392a9e4f7SAdrian Hunter return 0; 39492a9e4f7SAdrian Hunter 39500447ccdSAdrian Hunter if (flags & PERF_IP_FLAG_CALL) { 39600447ccdSAdrian Hunter u64 ret_addr; 39700447ccdSAdrian Hunter 39800447ccdSAdrian Hunter if (!to_ip) 39900447ccdSAdrian Hunter return 0; 40000447ccdSAdrian Hunter ret_addr = from_ip + insn_len; 40100447ccdSAdrian Hunter if (ret_addr == to_ip) 40200447ccdSAdrian Hunter return 0; /* Zero-length calls are excluded */ 403bd8e68acSAdrian Hunter return thread_stack__push(ts, ret_addr, 4044d60e5e3SAdrian Hunter flags & PERF_IP_FLAG_TRACE_END); 4054d60e5e3SAdrian Hunter } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { 4064d60e5e3SAdrian Hunter /* 4074d60e5e3SAdrian Hunter * If the caller did not change the trace number (which would 4084d60e5e3SAdrian Hunter * have flushed the stack) then try to make sense of the stack. 4094d60e5e3SAdrian Hunter * Possibly, tracing began after returning to the current 4104d60e5e3SAdrian Hunter * address, so try to pop that. Also, do not expect a call made 4114d60e5e3SAdrian Hunter * when the trace ended, to return, so pop that. 4124d60e5e3SAdrian Hunter */ 413bd8e68acSAdrian Hunter thread_stack__pop(ts, to_ip); 414bd8e68acSAdrian Hunter thread_stack__pop_trace_end(ts); 4154d60e5e3SAdrian Hunter } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { 416bd8e68acSAdrian Hunter thread_stack__pop(ts, to_ip); 41700447ccdSAdrian Hunter } 41800447ccdSAdrian Hunter 41900447ccdSAdrian Hunter return 0; 42000447ccdSAdrian Hunter } 42100447ccdSAdrian Hunter 422256d92bcSAdrian Hunter void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) 42392a9e4f7SAdrian Hunter { 424256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 425bd8e68acSAdrian Hunter 426bd8e68acSAdrian Hunter if (!ts) 42792a9e4f7SAdrian Hunter return; 42892a9e4f7SAdrian Hunter 429bd8e68acSAdrian Hunter if (trace_nr != ts->trace_nr) { 430bd8e68acSAdrian Hunter if (ts->trace_nr) 431bd8e68acSAdrian Hunter __thread_stack__flush(thread, ts); 432bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 43392a9e4f7SAdrian Hunter } 43492a9e4f7SAdrian Hunter } 43592a9e4f7SAdrian Hunter 436f6060ac6SAdrian Hunter static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) 437f6060ac6SAdrian Hunter { 438f6060ac6SAdrian Hunter __thread_stack__flush(thread, ts); 439f6060ac6SAdrian Hunter zfree(&ts->stack); 440f6060ac6SAdrian Hunter } 441f6060ac6SAdrian Hunter 442f6060ac6SAdrian Hunter static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) 443f6060ac6SAdrian Hunter { 444f6060ac6SAdrian Hunter unsigned int arr_sz = ts->arr_sz; 445f6060ac6SAdrian Hunter 446f6060ac6SAdrian Hunter __thread_stack__free(thread, ts); 447f6060ac6SAdrian Hunter memset(ts, 0, sizeof(*ts)); 448f6060ac6SAdrian Hunter ts->arr_sz = arr_sz; 449f6060ac6SAdrian Hunter } 450f6060ac6SAdrian Hunter 45100447ccdSAdrian Hunter void thread_stack__free(struct thread *thread) 45200447ccdSAdrian Hunter { 453bd8e68acSAdrian Hunter struct thread_stack *ts = thread->ts; 454f6060ac6SAdrian Hunter unsigned int pos; 455bd8e68acSAdrian Hunter 456bd8e68acSAdrian Hunter if (ts) { 457f6060ac6SAdrian Hunter for (pos = 0; pos < ts->arr_sz; pos++) 458f6060ac6SAdrian Hunter __thread_stack__free(thread, ts + pos); 45900447ccdSAdrian Hunter zfree(&thread->ts); 46000447ccdSAdrian Hunter } 46100447ccdSAdrian Hunter } 46200447ccdSAdrian Hunter 46324248306SAdrian Hunter static inline u64 callchain_context(u64 ip, u64 kernel_start) 46400447ccdSAdrian Hunter { 46524248306SAdrian Hunter return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 46624248306SAdrian Hunter } 46700447ccdSAdrian Hunter 468256d92bcSAdrian Hunter void thread_stack__sample(struct thread *thread, int cpu, 469256d92bcSAdrian Hunter struct ip_callchain *chain, 47024248306SAdrian Hunter size_t sz, u64 ip, u64 kernel_start) 47124248306SAdrian Hunter { 472256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 47324248306SAdrian Hunter u64 context = callchain_context(ip, kernel_start); 47424248306SAdrian Hunter u64 last_context; 47524248306SAdrian Hunter size_t i, j; 47600447ccdSAdrian Hunter 47724248306SAdrian Hunter if (sz < 2) { 47824248306SAdrian Hunter chain->nr = 0; 47924248306SAdrian Hunter return; 48024248306SAdrian Hunter } 48100447ccdSAdrian Hunter 48224248306SAdrian Hunter chain->ips[0] = context; 48324248306SAdrian Hunter chain->ips[1] = ip; 48424248306SAdrian Hunter 485bd8e68acSAdrian Hunter if (!ts) { 48624248306SAdrian Hunter chain->nr = 2; 48724248306SAdrian Hunter return; 48824248306SAdrian Hunter } 48924248306SAdrian Hunter 49024248306SAdrian Hunter last_context = context; 49124248306SAdrian Hunter 492bd8e68acSAdrian Hunter for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { 493bd8e68acSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 49424248306SAdrian Hunter context = callchain_context(ip, kernel_start); 49524248306SAdrian Hunter if (context != last_context) { 49624248306SAdrian Hunter if (i >= sz - 1) 49724248306SAdrian Hunter break; 49824248306SAdrian Hunter chain->ips[i++] = context; 49924248306SAdrian Hunter last_context = context; 50024248306SAdrian Hunter } 50124248306SAdrian Hunter chain->ips[i] = ip; 50224248306SAdrian Hunter } 50324248306SAdrian Hunter 50424248306SAdrian Hunter chain->nr = i; 50500447ccdSAdrian Hunter } 50692a9e4f7SAdrian Hunter 50792a9e4f7SAdrian Hunter struct call_return_processor * 508f435887eSAdrian Hunter call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), 50992a9e4f7SAdrian Hunter void *data) 51092a9e4f7SAdrian Hunter { 51192a9e4f7SAdrian Hunter struct call_return_processor *crp; 51292a9e4f7SAdrian Hunter 51392a9e4f7SAdrian Hunter crp = zalloc(sizeof(struct call_return_processor)); 51492a9e4f7SAdrian Hunter if (!crp) 51592a9e4f7SAdrian Hunter return NULL; 51692a9e4f7SAdrian Hunter crp->cpr = call_path_root__new(); 51792a9e4f7SAdrian Hunter if (!crp->cpr) 51892a9e4f7SAdrian Hunter goto out_free; 51992a9e4f7SAdrian Hunter crp->process = process; 52092a9e4f7SAdrian Hunter crp->data = data; 52192a9e4f7SAdrian Hunter return crp; 52292a9e4f7SAdrian Hunter 52392a9e4f7SAdrian Hunter out_free: 52492a9e4f7SAdrian Hunter free(crp); 52592a9e4f7SAdrian Hunter return NULL; 52692a9e4f7SAdrian Hunter } 52792a9e4f7SAdrian Hunter 52892a9e4f7SAdrian Hunter void call_return_processor__free(struct call_return_processor *crp) 52992a9e4f7SAdrian Hunter { 53092a9e4f7SAdrian Hunter if (crp) { 53192a9e4f7SAdrian Hunter call_path_root__free(crp->cpr); 53292a9e4f7SAdrian Hunter free(crp); 53392a9e4f7SAdrian Hunter } 53492a9e4f7SAdrian Hunter } 53592a9e4f7SAdrian Hunter 53692a9e4f7SAdrian Hunter static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 53792a9e4f7SAdrian Hunter u64 timestamp, u64 ref, struct call_path *cp, 5382dcde4e1SAdrian Hunter bool no_call, bool trace_end) 53992a9e4f7SAdrian Hunter { 54092a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 54192a9e4f7SAdrian Hunter int err; 54292a9e4f7SAdrian Hunter 543e7a3a055SAdrian Hunter if (!cp) 544e7a3a055SAdrian Hunter return -ENOMEM; 545e7a3a055SAdrian Hunter 54692a9e4f7SAdrian Hunter if (ts->cnt == ts->sz) { 54792a9e4f7SAdrian Hunter err = thread_stack__grow(ts); 54892a9e4f7SAdrian Hunter if (err) 54992a9e4f7SAdrian Hunter return err; 55092a9e4f7SAdrian Hunter } 55192a9e4f7SAdrian Hunter 55292a9e4f7SAdrian Hunter tse = &ts->stack[ts->cnt++]; 55392a9e4f7SAdrian Hunter tse->ret_addr = ret_addr; 55492a9e4f7SAdrian Hunter tse->timestamp = timestamp; 55592a9e4f7SAdrian Hunter tse->ref = ref; 55692a9e4f7SAdrian Hunter tse->branch_count = ts->branch_count; 557003ccdc7SAdrian Hunter tse->insn_count = ts->insn_count; 558003ccdc7SAdrian Hunter tse->cyc_count = ts->cyc_count; 55992a9e4f7SAdrian Hunter tse->cp = cp; 56092a9e4f7SAdrian Hunter tse->no_call = no_call; 5612dcde4e1SAdrian Hunter tse->trace_end = trace_end; 562f08046cbSAdrian Hunter tse->non_call = false; 563f435887eSAdrian Hunter tse->db_id = 0; 56492a9e4f7SAdrian Hunter 56592a9e4f7SAdrian Hunter return 0; 56692a9e4f7SAdrian Hunter } 56792a9e4f7SAdrian Hunter 56892a9e4f7SAdrian Hunter static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 56992a9e4f7SAdrian Hunter u64 ret_addr, u64 timestamp, u64 ref, 57092a9e4f7SAdrian Hunter struct symbol *sym) 57192a9e4f7SAdrian Hunter { 57292a9e4f7SAdrian Hunter int err; 57392a9e4f7SAdrian Hunter 57492a9e4f7SAdrian Hunter if (!ts->cnt) 57592a9e4f7SAdrian Hunter return 1; 57692a9e4f7SAdrian Hunter 57792a9e4f7SAdrian Hunter if (ts->cnt == 1) { 57892a9e4f7SAdrian Hunter struct thread_stack_entry *tse = &ts->stack[0]; 57992a9e4f7SAdrian Hunter 58092a9e4f7SAdrian Hunter if (tse->cp->sym == sym) 58192a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 58292a9e4f7SAdrian Hunter timestamp, ref, false); 58392a9e4f7SAdrian Hunter } 58492a9e4f7SAdrian Hunter 585f08046cbSAdrian Hunter if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && 586f08046cbSAdrian Hunter !ts->stack[ts->cnt - 1].non_call) { 58792a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 58892a9e4f7SAdrian Hunter timestamp, ref, false); 58992a9e4f7SAdrian Hunter } else { 59092a9e4f7SAdrian Hunter size_t i = ts->cnt - 1; 59192a9e4f7SAdrian Hunter 59292a9e4f7SAdrian Hunter while (i--) { 593f08046cbSAdrian Hunter if (ts->stack[i].ret_addr != ret_addr || 594f08046cbSAdrian Hunter ts->stack[i].non_call) 59592a9e4f7SAdrian Hunter continue; 59692a9e4f7SAdrian Hunter i += 1; 59792a9e4f7SAdrian Hunter while (ts->cnt > i) { 59892a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, 59992a9e4f7SAdrian Hunter --ts->cnt, 60092a9e4f7SAdrian Hunter timestamp, ref, 60192a9e4f7SAdrian Hunter true); 60292a9e4f7SAdrian Hunter if (err) 60392a9e4f7SAdrian Hunter return err; 60492a9e4f7SAdrian Hunter } 60592a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 60692a9e4f7SAdrian Hunter timestamp, ref, false); 60792a9e4f7SAdrian Hunter } 60892a9e4f7SAdrian Hunter } 60992a9e4f7SAdrian Hunter 61092a9e4f7SAdrian Hunter return 1; 61192a9e4f7SAdrian Hunter } 61292a9e4f7SAdrian Hunter 613e0b89511SAdrian Hunter static int thread_stack__bottom(struct thread_stack *ts, 61492a9e4f7SAdrian Hunter struct perf_sample *sample, 61592a9e4f7SAdrian Hunter struct addr_location *from_al, 61692a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref) 61792a9e4f7SAdrian Hunter { 61892a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 61992a9e4f7SAdrian Hunter struct call_path *cp; 62092a9e4f7SAdrian Hunter struct symbol *sym; 62192a9e4f7SAdrian Hunter u64 ip; 62292a9e4f7SAdrian Hunter 62392a9e4f7SAdrian Hunter if (sample->ip) { 62492a9e4f7SAdrian Hunter ip = sample->ip; 62592a9e4f7SAdrian Hunter sym = from_al->sym; 62692a9e4f7SAdrian Hunter } else if (sample->addr) { 62792a9e4f7SAdrian Hunter ip = sample->addr; 62892a9e4f7SAdrian Hunter sym = to_al->sym; 62992a9e4f7SAdrian Hunter } else { 63092a9e4f7SAdrian Hunter return 0; 63192a9e4f7SAdrian Hunter } 63292a9e4f7SAdrian Hunter 63392a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 63492a9e4f7SAdrian Hunter ts->kernel_start); 63592a9e4f7SAdrian Hunter 636e0b89511SAdrian Hunter return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 6372dcde4e1SAdrian Hunter true, false); 63892a9e4f7SAdrian Hunter } 63992a9e4f7SAdrian Hunter 64092a9e4f7SAdrian Hunter static int thread_stack__no_call_return(struct thread *thread, 64192a9e4f7SAdrian Hunter struct thread_stack *ts, 64292a9e4f7SAdrian Hunter struct perf_sample *sample, 64392a9e4f7SAdrian Hunter struct addr_location *from_al, 64492a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref) 64592a9e4f7SAdrian Hunter { 64692a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 64790c2cda7SAdrian Hunter struct call_path *root = &cpr->call_path; 64890c2cda7SAdrian Hunter struct symbol *fsym = from_al->sym; 64990c2cda7SAdrian Hunter struct symbol *tsym = to_al->sym; 65092a9e4f7SAdrian Hunter struct call_path *cp, *parent; 65192a9e4f7SAdrian Hunter u64 ks = ts->kernel_start; 65290c2cda7SAdrian Hunter u64 addr = sample->addr; 65390c2cda7SAdrian Hunter u64 tm = sample->time; 65490c2cda7SAdrian Hunter u64 ip = sample->ip; 65592a9e4f7SAdrian Hunter int err; 65692a9e4f7SAdrian Hunter 65790c2cda7SAdrian Hunter if (ip >= ks && addr < ks) { 65892a9e4f7SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 65992a9e4f7SAdrian Hunter while (thread_stack__in_kernel(ts)) { 66092a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 66190c2cda7SAdrian Hunter tm, ref, true); 66292a9e4f7SAdrian Hunter if (err) 66392a9e4f7SAdrian Hunter return err; 66492a9e4f7SAdrian Hunter } 66592a9e4f7SAdrian Hunter 66692a9e4f7SAdrian Hunter /* If the stack is empty, push the userspace address */ 66792a9e4f7SAdrian Hunter if (!ts->cnt) { 66890c2cda7SAdrian Hunter cp = call_path__findnew(cpr, root, tsym, addr, ks); 66990c2cda7SAdrian Hunter return thread_stack__push_cp(ts, 0, tm, ref, cp, true, 67090c2cda7SAdrian Hunter false); 67192a9e4f7SAdrian Hunter } 67290c2cda7SAdrian Hunter } else if (thread_stack__in_kernel(ts) && ip < ks) { 67392a9e4f7SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 67492a9e4f7SAdrian Hunter while (thread_stack__in_kernel(ts)) { 67592a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 67690c2cda7SAdrian Hunter tm, ref, true); 67792a9e4f7SAdrian Hunter if (err) 67892a9e4f7SAdrian Hunter return err; 67992a9e4f7SAdrian Hunter } 68092a9e4f7SAdrian Hunter } 68192a9e4f7SAdrian Hunter 68292a9e4f7SAdrian Hunter if (ts->cnt) 68392a9e4f7SAdrian Hunter parent = ts->stack[ts->cnt - 1].cp; 68492a9e4f7SAdrian Hunter else 68590c2cda7SAdrian Hunter parent = root; 68692a9e4f7SAdrian Hunter 6871f35cd65SAdrian Hunter if (parent->sym == from_al->sym) { 6881f35cd65SAdrian Hunter /* 6891f35cd65SAdrian Hunter * At the bottom of the stack, assume the missing 'call' was 6901f35cd65SAdrian Hunter * before the trace started. So, pop the current symbol and push 6911f35cd65SAdrian Hunter * the 'to' symbol. 6921f35cd65SAdrian Hunter */ 6931f35cd65SAdrian Hunter if (ts->cnt == 1) { 6941f35cd65SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 6951f35cd65SAdrian Hunter tm, ref, false); 6961f35cd65SAdrian Hunter if (err) 6971f35cd65SAdrian Hunter return err; 6981f35cd65SAdrian Hunter } 6991f35cd65SAdrian Hunter 7001f35cd65SAdrian Hunter if (!ts->cnt) { 7011f35cd65SAdrian Hunter cp = call_path__findnew(cpr, root, tsym, addr, ks); 7021f35cd65SAdrian Hunter 7031f35cd65SAdrian Hunter return thread_stack__push_cp(ts, addr, tm, ref, cp, 7041f35cd65SAdrian Hunter true, false); 7051f35cd65SAdrian Hunter } 7061f35cd65SAdrian Hunter 7071f35cd65SAdrian Hunter /* 7081f35cd65SAdrian Hunter * Otherwise assume the 'return' is being used as a jump (e.g. 7091f35cd65SAdrian Hunter * retpoline) and just push the 'to' symbol. 7101f35cd65SAdrian Hunter */ 7111f35cd65SAdrian Hunter cp = call_path__findnew(cpr, parent, tsym, addr, ks); 7121f35cd65SAdrian Hunter 7131f35cd65SAdrian Hunter err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); 7141f35cd65SAdrian Hunter if (!err) 7151f35cd65SAdrian Hunter ts->stack[ts->cnt - 1].non_call = true; 7161f35cd65SAdrian Hunter 7171f35cd65SAdrian Hunter return err; 7181f35cd65SAdrian Hunter } 7191f35cd65SAdrian Hunter 7201f35cd65SAdrian Hunter /* 7211f35cd65SAdrian Hunter * Assume 'parent' has not yet returned, so push 'to', and then push and 7221f35cd65SAdrian Hunter * pop 'from'. 7231f35cd65SAdrian Hunter */ 7241f35cd65SAdrian Hunter 7251f35cd65SAdrian Hunter cp = call_path__findnew(cpr, parent, tsym, addr, ks); 72692a9e4f7SAdrian Hunter 72790c2cda7SAdrian Hunter err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); 72892a9e4f7SAdrian Hunter if (err) 72992a9e4f7SAdrian Hunter return err; 73092a9e4f7SAdrian Hunter 7311f35cd65SAdrian Hunter cp = call_path__findnew(cpr, cp, fsym, ip, ks); 7321f35cd65SAdrian Hunter 7331f35cd65SAdrian Hunter err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); 7341f35cd65SAdrian Hunter if (err) 7351f35cd65SAdrian Hunter return err; 7361f35cd65SAdrian Hunter 7371f35cd65SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); 73892a9e4f7SAdrian Hunter } 73992a9e4f7SAdrian Hunter 74092a9e4f7SAdrian Hunter static int thread_stack__trace_begin(struct thread *thread, 74192a9e4f7SAdrian Hunter struct thread_stack *ts, u64 timestamp, 74292a9e4f7SAdrian Hunter u64 ref) 74392a9e4f7SAdrian Hunter { 74492a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 74592a9e4f7SAdrian Hunter int err; 74692a9e4f7SAdrian Hunter 74792a9e4f7SAdrian Hunter if (!ts->cnt) 74892a9e4f7SAdrian Hunter return 0; 74992a9e4f7SAdrian Hunter 75092a9e4f7SAdrian Hunter /* Pop trace end */ 75192a9e4f7SAdrian Hunter tse = &ts->stack[ts->cnt - 1]; 7522dcde4e1SAdrian Hunter if (tse->trace_end) { 75392a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 75492a9e4f7SAdrian Hunter timestamp, ref, false); 75592a9e4f7SAdrian Hunter if (err) 75692a9e4f7SAdrian Hunter return err; 75792a9e4f7SAdrian Hunter } 75892a9e4f7SAdrian Hunter 75992a9e4f7SAdrian Hunter return 0; 76092a9e4f7SAdrian Hunter } 76192a9e4f7SAdrian Hunter 76292a9e4f7SAdrian Hunter static int thread_stack__trace_end(struct thread_stack *ts, 76392a9e4f7SAdrian Hunter struct perf_sample *sample, u64 ref) 76492a9e4f7SAdrian Hunter { 76592a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 76692a9e4f7SAdrian Hunter struct call_path *cp; 76792a9e4f7SAdrian Hunter u64 ret_addr; 76892a9e4f7SAdrian Hunter 76992a9e4f7SAdrian Hunter /* No point having 'trace end' on the bottom of the stack */ 77092a9e4f7SAdrian Hunter if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 77192a9e4f7SAdrian Hunter return 0; 77292a9e4f7SAdrian Hunter 77392a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 77492a9e4f7SAdrian Hunter ts->kernel_start); 77592a9e4f7SAdrian Hunter 77692a9e4f7SAdrian Hunter ret_addr = sample->ip + sample->insn_len; 77792a9e4f7SAdrian Hunter 77892a9e4f7SAdrian Hunter return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 7792dcde4e1SAdrian Hunter false, true); 78092a9e4f7SAdrian Hunter } 78192a9e4f7SAdrian Hunter 7823c0cd952SAdrian Hunter static bool is_x86_retpoline(const char *name) 7833c0cd952SAdrian Hunter { 7843c0cd952SAdrian Hunter const char *p = strstr(name, "__x86_indirect_thunk_"); 7853c0cd952SAdrian Hunter 7863c0cd952SAdrian Hunter return p == name || !strcmp(name, "__indirect_thunk_start"); 7873c0cd952SAdrian Hunter } 7883c0cd952SAdrian Hunter 7893c0cd952SAdrian Hunter /* 7903c0cd952SAdrian Hunter * x86 retpoline functions pollute the call graph. This function removes them. 7913c0cd952SAdrian Hunter * This does not handle function return thunks, nor is there any improvement 7923c0cd952SAdrian Hunter * for the handling of inline thunks or extern thunks. 7933c0cd952SAdrian Hunter */ 7943c0cd952SAdrian Hunter static int thread_stack__x86_retpoline(struct thread_stack *ts, 7953c0cd952SAdrian Hunter struct perf_sample *sample, 7963c0cd952SAdrian Hunter struct addr_location *to_al) 7973c0cd952SAdrian Hunter { 7983c0cd952SAdrian Hunter struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; 7993c0cd952SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 8003c0cd952SAdrian Hunter struct symbol *sym = tse->cp->sym; 8013c0cd952SAdrian Hunter struct symbol *tsym = to_al->sym; 8023c0cd952SAdrian Hunter struct call_path *cp; 8033c0cd952SAdrian Hunter 8043c0cd952SAdrian Hunter if (sym && is_x86_retpoline(sym->name)) { 8053c0cd952SAdrian Hunter /* 8063c0cd952SAdrian Hunter * This is a x86 retpoline fn. It pollutes the call graph by 8073c0cd952SAdrian Hunter * showing up everywhere there is an indirect branch, but does 8083c0cd952SAdrian Hunter * not itself mean anything. Here the top-of-stack is removed, 8093c0cd952SAdrian Hunter * by decrementing the stack count, and then further down, the 8103c0cd952SAdrian Hunter * resulting top-of-stack is replaced with the actual target. 8113c0cd952SAdrian Hunter * The result is that the retpoline functions will no longer 8123c0cd952SAdrian Hunter * appear in the call graph. Note this only affects the call 8133c0cd952SAdrian Hunter * graph, since all the original branches are left unchanged. 8143c0cd952SAdrian Hunter */ 8153c0cd952SAdrian Hunter ts->cnt -= 1; 8163c0cd952SAdrian Hunter sym = ts->stack[ts->cnt - 2].cp->sym; 8173c0cd952SAdrian Hunter if (sym && sym == tsym && to_al->addr != tsym->start) { 8183c0cd952SAdrian Hunter /* 8193c0cd952SAdrian Hunter * Target is back to the middle of the symbol we came 8203c0cd952SAdrian Hunter * from so assume it is an indirect jmp and forget it 8213c0cd952SAdrian Hunter * altogether. 8223c0cd952SAdrian Hunter */ 8233c0cd952SAdrian Hunter ts->cnt -= 1; 8243c0cd952SAdrian Hunter return 0; 8253c0cd952SAdrian Hunter } 8263c0cd952SAdrian Hunter } else if (sym && sym == tsym) { 8273c0cd952SAdrian Hunter /* 8283c0cd952SAdrian Hunter * Target is back to the symbol we came from so assume it is an 8293c0cd952SAdrian Hunter * indirect jmp and forget it altogether. 8303c0cd952SAdrian Hunter */ 8313c0cd952SAdrian Hunter ts->cnt -= 1; 8323c0cd952SAdrian Hunter return 0; 8333c0cd952SAdrian Hunter } 8343c0cd952SAdrian Hunter 8353c0cd952SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, 8363c0cd952SAdrian Hunter sample->addr, ts->kernel_start); 8373c0cd952SAdrian Hunter if (!cp) 8383c0cd952SAdrian Hunter return -ENOMEM; 8393c0cd952SAdrian Hunter 8403c0cd952SAdrian Hunter /* Replace the top-of-stack with the actual target */ 8413c0cd952SAdrian Hunter ts->stack[ts->cnt - 1].cp = cp; 8423c0cd952SAdrian Hunter 8433c0cd952SAdrian Hunter return 0; 8443c0cd952SAdrian Hunter } 8453c0cd952SAdrian Hunter 84692a9e4f7SAdrian Hunter int thread_stack__process(struct thread *thread, struct comm *comm, 84792a9e4f7SAdrian Hunter struct perf_sample *sample, 84892a9e4f7SAdrian Hunter struct addr_location *from_al, 84992a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref, 85092a9e4f7SAdrian Hunter struct call_return_processor *crp) 85192a9e4f7SAdrian Hunter { 852256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, sample->cpu); 8533c0cd952SAdrian Hunter enum retpoline_state_t rstate; 85492a9e4f7SAdrian Hunter int err = 0; 85592a9e4f7SAdrian Hunter 85603b32cb2SAdrian Hunter if (ts && !ts->crp) { 85792a9e4f7SAdrian Hunter /* Supersede thread_stack__event() */ 858f6060ac6SAdrian Hunter thread_stack__reset(thread, ts); 85903b32cb2SAdrian Hunter ts = NULL; 86092a9e4f7SAdrian Hunter } 86103b32cb2SAdrian Hunter 86203b32cb2SAdrian Hunter if (!ts) { 863256d92bcSAdrian Hunter ts = thread_stack__new(thread, sample->cpu, crp); 864bd8e68acSAdrian Hunter if (!ts) 86592a9e4f7SAdrian Hunter return -ENOMEM; 86692a9e4f7SAdrian Hunter ts->comm = comm; 86792a9e4f7SAdrian Hunter } 86892a9e4f7SAdrian Hunter 8693c0cd952SAdrian Hunter rstate = ts->rstate; 8703c0cd952SAdrian Hunter if (rstate == X86_RETPOLINE_DETECTED) 8713c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_POSSIBLE; 8723c0cd952SAdrian Hunter 87392a9e4f7SAdrian Hunter /* Flush stack on exec */ 87492a9e4f7SAdrian Hunter if (ts->comm != comm && thread->pid_ == thread->tid) { 875a5499b37SAdrian Hunter err = __thread_stack__flush(thread, ts); 87692a9e4f7SAdrian Hunter if (err) 87792a9e4f7SAdrian Hunter return err; 87892a9e4f7SAdrian Hunter ts->comm = comm; 87992a9e4f7SAdrian Hunter } 88092a9e4f7SAdrian Hunter 88192a9e4f7SAdrian Hunter /* If the stack is empty, put the current symbol on the stack */ 88292a9e4f7SAdrian Hunter if (!ts->cnt) { 883e0b89511SAdrian Hunter err = thread_stack__bottom(ts, sample, from_al, to_al, ref); 88492a9e4f7SAdrian Hunter if (err) 88592a9e4f7SAdrian Hunter return err; 88692a9e4f7SAdrian Hunter } 88792a9e4f7SAdrian Hunter 88892a9e4f7SAdrian Hunter ts->branch_count += 1; 889003ccdc7SAdrian Hunter ts->insn_count += sample->insn_cnt; 890003ccdc7SAdrian Hunter ts->cyc_count += sample->cyc_cnt; 89192a9e4f7SAdrian Hunter ts->last_time = sample->time; 89292a9e4f7SAdrian Hunter 89392a9e4f7SAdrian Hunter if (sample->flags & PERF_IP_FLAG_CALL) { 8942dcde4e1SAdrian Hunter bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; 89592a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 89692a9e4f7SAdrian Hunter struct call_path *cp; 89792a9e4f7SAdrian Hunter u64 ret_addr; 89892a9e4f7SAdrian Hunter 89992a9e4f7SAdrian Hunter if (!sample->ip || !sample->addr) 90092a9e4f7SAdrian Hunter return 0; 90192a9e4f7SAdrian Hunter 90292a9e4f7SAdrian Hunter ret_addr = sample->ip + sample->insn_len; 90392a9e4f7SAdrian Hunter if (ret_addr == sample->addr) 90492a9e4f7SAdrian Hunter return 0; /* Zero-length calls are excluded */ 90592a9e4f7SAdrian Hunter 90692a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 90792a9e4f7SAdrian Hunter to_al->sym, sample->addr, 90892a9e4f7SAdrian Hunter ts->kernel_start); 90992a9e4f7SAdrian Hunter err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 9102dcde4e1SAdrian Hunter cp, false, trace_end); 9113c0cd952SAdrian Hunter 9123c0cd952SAdrian Hunter /* 9133c0cd952SAdrian Hunter * A call to the same symbol but not the start of the symbol, 9143c0cd952SAdrian Hunter * may be the start of a x86 retpoline. 9153c0cd952SAdrian Hunter */ 9163c0cd952SAdrian Hunter if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && 9173c0cd952SAdrian Hunter from_al->sym == to_al->sym && 9183c0cd952SAdrian Hunter to_al->addr != to_al->sym->start) 9193c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_DETECTED; 9203c0cd952SAdrian Hunter 92192a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_RETURN) { 92292a9e4f7SAdrian Hunter if (!sample->ip || !sample->addr) 92392a9e4f7SAdrian Hunter return 0; 92492a9e4f7SAdrian Hunter 9253c0cd952SAdrian Hunter /* x86 retpoline 'return' doesn't match the stack */ 9263c0cd952SAdrian Hunter if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && 9273c0cd952SAdrian Hunter ts->stack[ts->cnt - 1].ret_addr != sample->addr) 9283c0cd952SAdrian Hunter return thread_stack__x86_retpoline(ts, sample, to_al); 9293c0cd952SAdrian Hunter 93092a9e4f7SAdrian Hunter err = thread_stack__pop_cp(thread, ts, sample->addr, 93192a9e4f7SAdrian Hunter sample->time, ref, from_al->sym); 93292a9e4f7SAdrian Hunter if (err) { 93392a9e4f7SAdrian Hunter if (err < 0) 93492a9e4f7SAdrian Hunter return err; 93592a9e4f7SAdrian Hunter err = thread_stack__no_call_return(thread, ts, sample, 93692a9e4f7SAdrian Hunter from_al, to_al, ref); 93792a9e4f7SAdrian Hunter } 93892a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 93992a9e4f7SAdrian Hunter err = thread_stack__trace_begin(thread, ts, sample->time, ref); 94092a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 94192a9e4f7SAdrian Hunter err = thread_stack__trace_end(ts, sample, ref); 942f08046cbSAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_BRANCH && 943f08046cbSAdrian Hunter from_al->sym != to_al->sym && to_al->sym && 944f08046cbSAdrian Hunter to_al->addr == to_al->sym->start) { 945f08046cbSAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 946f08046cbSAdrian Hunter struct call_path *cp; 947f08046cbSAdrian Hunter 948f08046cbSAdrian Hunter /* 949f08046cbSAdrian Hunter * The compiler might optimize a call/ret combination by making 950f08046cbSAdrian Hunter * it a jmp. Make that visible by recording on the stack a 951f08046cbSAdrian Hunter * branch to the start of a different symbol. Note, that means 952f08046cbSAdrian Hunter * when a ret pops the stack, all jmps must be popped off first. 953f08046cbSAdrian Hunter */ 954f08046cbSAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 955f08046cbSAdrian Hunter to_al->sym, sample->addr, 956f08046cbSAdrian Hunter ts->kernel_start); 957f08046cbSAdrian Hunter err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, 958f08046cbSAdrian Hunter false); 959f08046cbSAdrian Hunter if (!err) 960f08046cbSAdrian Hunter ts->stack[ts->cnt - 1].non_call = true; 96192a9e4f7SAdrian Hunter } 96292a9e4f7SAdrian Hunter 96392a9e4f7SAdrian Hunter return err; 96492a9e4f7SAdrian Hunter } 965e216708dSAdrian Hunter 966256d92bcSAdrian Hunter size_t thread_stack__depth(struct thread *thread, int cpu) 967e216708dSAdrian Hunter { 968256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 969bd8e68acSAdrian Hunter 970bd8e68acSAdrian Hunter if (!ts) 971e216708dSAdrian Hunter return 0; 972bd8e68acSAdrian Hunter return ts->cnt; 973e216708dSAdrian Hunter } 974