12025cf9eSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 200447ccdSAdrian Hunter /* 300447ccdSAdrian Hunter * thread-stack.c: Synthesize a thread's stack using call / return events 400447ccdSAdrian Hunter * Copyright (c) 2014, Intel Corporation. 500447ccdSAdrian Hunter */ 600447ccdSAdrian Hunter 792a9e4f7SAdrian Hunter #include <linux/rbtree.h> 892a9e4f7SAdrian Hunter #include <linux/list.h> 9256d92bcSAdrian Hunter #include <linux/log2.h> 107f7c536fSArnaldo Carvalho de Melo #include <linux/zalloc.h> 11a43783aeSArnaldo Carvalho de Melo #include <errno.h> 12215a0d30SArnaldo Carvalho de Melo #include <stdlib.h> 138520a98dSArnaldo Carvalho de Melo #include <string.h> 1400447ccdSAdrian Hunter #include "thread.h" 1500447ccdSAdrian Hunter #include "event.h" 1692a9e4f7SAdrian Hunter #include "machine.h" 173c0cd952SAdrian Hunter #include "env.h" 1800447ccdSAdrian Hunter #include "debug.h" 1992a9e4f7SAdrian Hunter #include "symbol.h" 2092a9e4f7SAdrian Hunter #include "comm.h" 21451db126SChris Phlipot #include "call-path.h" 2200447ccdSAdrian Hunter #include "thread-stack.h" 2300447ccdSAdrian Hunter 2492a9e4f7SAdrian Hunter #define STACK_GROWTH 2048 2592a9e4f7SAdrian Hunter 263c0cd952SAdrian Hunter /* 273c0cd952SAdrian Hunter * State of retpoline detection. 283c0cd952SAdrian Hunter * 293c0cd952SAdrian Hunter * RETPOLINE_NONE: no retpoline detection 303c0cd952SAdrian Hunter * X86_RETPOLINE_POSSIBLE: x86 retpoline possible 313c0cd952SAdrian Hunter * X86_RETPOLINE_DETECTED: x86 retpoline detected 323c0cd952SAdrian Hunter */ 333c0cd952SAdrian Hunter enum retpoline_state_t { 343c0cd952SAdrian Hunter RETPOLINE_NONE, 353c0cd952SAdrian Hunter X86_RETPOLINE_POSSIBLE, 363c0cd952SAdrian Hunter X86_RETPOLINE_DETECTED, 373c0cd952SAdrian Hunter }; 383c0cd952SAdrian Hunter 3992a9e4f7SAdrian Hunter /** 4092a9e4f7SAdrian Hunter * struct thread_stack_entry - thread stack entry. 4192a9e4f7SAdrian Hunter * @ret_addr: return address 4292a9e4f7SAdrian Hunter * @timestamp: timestamp (if known) 4392a9e4f7SAdrian Hunter * @ref: external reference (e.g. db_id of sample) 4492a9e4f7SAdrian Hunter * @branch_count: the branch count when the entry was created 45003ccdc7SAdrian Hunter * @insn_count: the instruction count when the entry was created 46003ccdc7SAdrian Hunter * @cyc_count the cycle count when the entry was created 47f435887eSAdrian Hunter * @db_id: id used for db-export 4892a9e4f7SAdrian Hunter * @cp: call path 4992a9e4f7SAdrian Hunter * @no_call: a 'call' was not seen 504d60e5e3SAdrian Hunter * @trace_end: a 'call' but trace ended 51f08046cbSAdrian Hunter * @non_call: a branch but not a 'call' to the start of a different symbol 5292a9e4f7SAdrian Hunter */ 5392a9e4f7SAdrian Hunter struct thread_stack_entry { 5492a9e4f7SAdrian Hunter u64 ret_addr; 5592a9e4f7SAdrian Hunter u64 timestamp; 5692a9e4f7SAdrian Hunter u64 ref; 5792a9e4f7SAdrian Hunter u64 branch_count; 58003ccdc7SAdrian Hunter u64 insn_count; 59003ccdc7SAdrian Hunter u64 cyc_count; 60f435887eSAdrian Hunter u64 db_id; 6192a9e4f7SAdrian Hunter struct call_path *cp; 6292a9e4f7SAdrian Hunter bool no_call; 634d60e5e3SAdrian Hunter bool trace_end; 64f08046cbSAdrian Hunter bool non_call; 6592a9e4f7SAdrian Hunter }; 6692a9e4f7SAdrian Hunter 6792a9e4f7SAdrian Hunter /** 6892a9e4f7SAdrian Hunter * struct thread_stack - thread stack constructed from 'call' and 'return' 6992a9e4f7SAdrian Hunter * branch samples. 7092a9e4f7SAdrian Hunter * @stack: array that holds the stack 7192a9e4f7SAdrian Hunter * @cnt: number of entries in the stack 7292a9e4f7SAdrian Hunter * @sz: current maximum stack size 7392a9e4f7SAdrian Hunter * @trace_nr: current trace number 7492a9e4f7SAdrian Hunter * @branch_count: running branch count 75003ccdc7SAdrian Hunter * @insn_count: running instruction count 76003ccdc7SAdrian Hunter * @cyc_count running cycle count 7792a9e4f7SAdrian Hunter * @kernel_start: kernel start address 7892a9e4f7SAdrian Hunter * @last_time: last timestamp 7992a9e4f7SAdrian Hunter * @crp: call/return processor 8092a9e4f7SAdrian Hunter * @comm: current comm 81f6060ac6SAdrian Hunter * @arr_sz: size of array if this is the first element of an array 823c0cd952SAdrian Hunter * @rstate: used to detect retpolines 8386d67180SAdrian Hunter * @br_stack_rb: branch stack (ring buffer) 8486d67180SAdrian Hunter * @br_stack_sz: maximum branch stack size 8586d67180SAdrian Hunter * @br_stack_pos: current position in @br_stack_rb 8686d67180SAdrian Hunter * @mispred_all: mark all branches as mispredicted 8792a9e4f7SAdrian Hunter */ 8800447ccdSAdrian Hunter struct thread_stack { 8900447ccdSAdrian Hunter struct thread_stack_entry *stack; 9000447ccdSAdrian Hunter size_t cnt; 9100447ccdSAdrian Hunter size_t sz; 9200447ccdSAdrian Hunter u64 trace_nr; 9392a9e4f7SAdrian Hunter u64 branch_count; 94003ccdc7SAdrian Hunter u64 insn_count; 95003ccdc7SAdrian Hunter u64 cyc_count; 9692a9e4f7SAdrian Hunter u64 kernel_start; 9792a9e4f7SAdrian Hunter u64 last_time; 9892a9e4f7SAdrian Hunter struct call_return_processor *crp; 9992a9e4f7SAdrian Hunter struct comm *comm; 100f6060ac6SAdrian Hunter unsigned int arr_sz; 1013c0cd952SAdrian Hunter enum retpoline_state_t rstate; 10286d67180SAdrian Hunter struct branch_stack *br_stack_rb; 10386d67180SAdrian Hunter unsigned int br_stack_sz; 10486d67180SAdrian Hunter unsigned int br_stack_pos; 10586d67180SAdrian Hunter bool mispred_all; 10600447ccdSAdrian Hunter }; 10700447ccdSAdrian Hunter 108256d92bcSAdrian Hunter /* 109256d92bcSAdrian Hunter * Assume pid == tid == 0 identifies the idle task as defined by 110256d92bcSAdrian Hunter * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, 111256d92bcSAdrian Hunter * and therefore requires a stack for each cpu. 112256d92bcSAdrian Hunter */ 113256d92bcSAdrian Hunter static inline bool thread_stack__per_cpu(struct thread *thread) 114256d92bcSAdrian Hunter { 115*ee84a303SIan Rogers return !(thread__tid(thread) || thread__pid(thread)); 116256d92bcSAdrian Hunter } 117256d92bcSAdrian Hunter 11800447ccdSAdrian Hunter static int thread_stack__grow(struct thread_stack *ts) 11900447ccdSAdrian Hunter { 12000447ccdSAdrian Hunter struct thread_stack_entry *new_stack; 12100447ccdSAdrian Hunter size_t sz, new_sz; 12200447ccdSAdrian Hunter 12300447ccdSAdrian Hunter new_sz = ts->sz + STACK_GROWTH; 12400447ccdSAdrian Hunter sz = new_sz * sizeof(struct thread_stack_entry); 12500447ccdSAdrian Hunter 12600447ccdSAdrian Hunter new_stack = realloc(ts->stack, sz); 12700447ccdSAdrian Hunter if (!new_stack) 12800447ccdSAdrian Hunter return -ENOMEM; 12900447ccdSAdrian Hunter 13000447ccdSAdrian Hunter ts->stack = new_stack; 13100447ccdSAdrian Hunter ts->sz = new_sz; 13200447ccdSAdrian Hunter 13300447ccdSAdrian Hunter return 0; 13400447ccdSAdrian Hunter } 13500447ccdSAdrian Hunter 1362e9e8688SAdrian Hunter static int thread_stack__init(struct thread_stack *ts, struct thread *thread, 13786d67180SAdrian Hunter struct call_return_processor *crp, 13886d67180SAdrian Hunter bool callstack, unsigned int br_stack_sz) 1392e9e8688SAdrian Hunter { 1402e9e8688SAdrian Hunter int err; 1412e9e8688SAdrian Hunter 14286d67180SAdrian Hunter if (callstack) { 1432e9e8688SAdrian Hunter err = thread_stack__grow(ts); 1442e9e8688SAdrian Hunter if (err) 1452e9e8688SAdrian Hunter return err; 14686d67180SAdrian Hunter } 14786d67180SAdrian Hunter 14886d67180SAdrian Hunter if (br_stack_sz) { 14986d67180SAdrian Hunter size_t sz = sizeof(struct branch_stack); 15086d67180SAdrian Hunter 15186d67180SAdrian Hunter sz += br_stack_sz * sizeof(struct branch_entry); 15286d67180SAdrian Hunter ts->br_stack_rb = zalloc(sz); 15386d67180SAdrian Hunter if (!ts->br_stack_rb) 15486d67180SAdrian Hunter return -ENOMEM; 15586d67180SAdrian Hunter ts->br_stack_sz = br_stack_sz; 15686d67180SAdrian Hunter } 1572e9e8688SAdrian Hunter 158*ee84a303SIan Rogers if (thread__maps(thread) && maps__machine(thread__maps(thread))) { 159*ee84a303SIan Rogers struct machine *machine = maps__machine(thread__maps(thread)); 1603c0cd952SAdrian Hunter const char *arch = perf_env__arch(machine->env); 1613c0cd952SAdrian Hunter 1623c0cd952SAdrian Hunter ts->kernel_start = machine__kernel_start(machine); 1633c0cd952SAdrian Hunter if (!strcmp(arch, "x86")) 1643c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_POSSIBLE; 1653c0cd952SAdrian Hunter } else { 1662e9e8688SAdrian Hunter ts->kernel_start = 1ULL << 63; 1673c0cd952SAdrian Hunter } 1682e9e8688SAdrian Hunter ts->crp = crp; 1692e9e8688SAdrian Hunter 1702e9e8688SAdrian Hunter return 0; 1712e9e8688SAdrian Hunter } 1722e9e8688SAdrian Hunter 173256d92bcSAdrian Hunter static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, 17486d67180SAdrian Hunter struct call_return_processor *crp, 17586d67180SAdrian Hunter bool callstack, 17686d67180SAdrian Hunter unsigned int br_stack_sz) 17700447ccdSAdrian Hunter { 178*ee84a303SIan Rogers struct thread_stack *ts = thread__ts(thread), *new_ts; 179139f42f3SAdrian Hunter unsigned int old_sz = ts ? ts->arr_sz : 0; 180139f42f3SAdrian Hunter unsigned int new_sz = 1; 18100447ccdSAdrian Hunter 182256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread) && cpu > 0) 183256d92bcSAdrian Hunter new_sz = roundup_pow_of_two(cpu + 1); 184256d92bcSAdrian Hunter 185139f42f3SAdrian Hunter if (!ts || new_sz > old_sz) { 186139f42f3SAdrian Hunter new_ts = calloc(new_sz, sizeof(*ts)); 187139f42f3SAdrian Hunter if (!new_ts) 18800447ccdSAdrian Hunter return NULL; 189139f42f3SAdrian Hunter if (ts) 190139f42f3SAdrian Hunter memcpy(new_ts, ts, old_sz * sizeof(*ts)); 191139f42f3SAdrian Hunter new_ts->arr_sz = new_sz; 192*ee84a303SIan Rogers free(thread__ts(thread)); 193*ee84a303SIan Rogers thread__set_ts(thread, new_ts); 194139f42f3SAdrian Hunter ts = new_ts; 19500447ccdSAdrian Hunter } 19600447ccdSAdrian Hunter 197256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread) && cpu > 0 && 198256d92bcSAdrian Hunter (unsigned int)cpu < ts->arr_sz) 199256d92bcSAdrian Hunter ts += cpu; 200256d92bcSAdrian Hunter 201139f42f3SAdrian Hunter if (!ts->stack && 20286d67180SAdrian Hunter thread_stack__init(ts, thread, crp, callstack, br_stack_sz)) 203139f42f3SAdrian Hunter return NULL; 204bd8e68acSAdrian Hunter 20500447ccdSAdrian Hunter return ts; 20600447ccdSAdrian Hunter } 20700447ccdSAdrian Hunter 208256d92bcSAdrian Hunter static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) 209bd8e68acSAdrian Hunter { 210*ee84a303SIan Rogers struct thread_stack *ts = thread__ts(thread); 211256d92bcSAdrian Hunter 212256d92bcSAdrian Hunter if (cpu < 0) 213256d92bcSAdrian Hunter cpu = 0; 214256d92bcSAdrian Hunter 215256d92bcSAdrian Hunter if (!ts || (unsigned int)cpu >= ts->arr_sz) 216256d92bcSAdrian Hunter return NULL; 217256d92bcSAdrian Hunter 218256d92bcSAdrian Hunter ts += cpu; 219256d92bcSAdrian Hunter 220256d92bcSAdrian Hunter if (!ts->stack) 221256d92bcSAdrian Hunter return NULL; 222256d92bcSAdrian Hunter 223256d92bcSAdrian Hunter return ts; 224256d92bcSAdrian Hunter } 225256d92bcSAdrian Hunter 226256d92bcSAdrian Hunter static inline struct thread_stack *thread__stack(struct thread *thread, 227256d92bcSAdrian Hunter int cpu) 228256d92bcSAdrian Hunter { 229256d92bcSAdrian Hunter if (!thread) 230256d92bcSAdrian Hunter return NULL; 231256d92bcSAdrian Hunter 232256d92bcSAdrian Hunter if (thread_stack__per_cpu(thread)) 233256d92bcSAdrian Hunter return thread__cpu_stack(thread, cpu); 234256d92bcSAdrian Hunter 235*ee84a303SIan Rogers return thread__ts(thread); 236bd8e68acSAdrian Hunter } 237bd8e68acSAdrian Hunter 2384d60e5e3SAdrian Hunter static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 2394d60e5e3SAdrian Hunter bool trace_end) 24000447ccdSAdrian Hunter { 24100447ccdSAdrian Hunter int err = 0; 24200447ccdSAdrian Hunter 24300447ccdSAdrian Hunter if (ts->cnt == ts->sz) { 24400447ccdSAdrian Hunter err = thread_stack__grow(ts); 24500447ccdSAdrian Hunter if (err) { 24600447ccdSAdrian Hunter pr_warning("Out of memory: discarding thread stack\n"); 24700447ccdSAdrian Hunter ts->cnt = 0; 24800447ccdSAdrian Hunter } 24900447ccdSAdrian Hunter } 25000447ccdSAdrian Hunter 2514d60e5e3SAdrian Hunter ts->stack[ts->cnt].trace_end = trace_end; 25200447ccdSAdrian Hunter ts->stack[ts->cnt++].ret_addr = ret_addr; 25300447ccdSAdrian Hunter 25400447ccdSAdrian Hunter return err; 25500447ccdSAdrian Hunter } 25600447ccdSAdrian Hunter 25700447ccdSAdrian Hunter static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) 25800447ccdSAdrian Hunter { 25900447ccdSAdrian Hunter size_t i; 26000447ccdSAdrian Hunter 26100447ccdSAdrian Hunter /* 26200447ccdSAdrian Hunter * In some cases there may be functions which are not seen to return. 26300447ccdSAdrian Hunter * For example when setjmp / longjmp has been used. Or the perf context 26400447ccdSAdrian Hunter * switch in the kernel which doesn't stop and start tracing in exactly 26500447ccdSAdrian Hunter * the same code path. When that happens the return address will be 26600447ccdSAdrian Hunter * further down the stack. If the return address is not found at all, 26700447ccdSAdrian Hunter * we assume the opposite (i.e. this is a return for a call that wasn't 26800447ccdSAdrian Hunter * seen for some reason) and leave the stack alone. 26900447ccdSAdrian Hunter */ 27000447ccdSAdrian Hunter for (i = ts->cnt; i; ) { 27100447ccdSAdrian Hunter if (ts->stack[--i].ret_addr == ret_addr) { 27200447ccdSAdrian Hunter ts->cnt = i; 27300447ccdSAdrian Hunter return; 27400447ccdSAdrian Hunter } 27500447ccdSAdrian Hunter } 27600447ccdSAdrian Hunter } 27700447ccdSAdrian Hunter 2784d60e5e3SAdrian Hunter static void thread_stack__pop_trace_end(struct thread_stack *ts) 2794d60e5e3SAdrian Hunter { 2804d60e5e3SAdrian Hunter size_t i; 2814d60e5e3SAdrian Hunter 2824d60e5e3SAdrian Hunter for (i = ts->cnt; i; ) { 2834d60e5e3SAdrian Hunter if (ts->stack[--i].trace_end) 2844d60e5e3SAdrian Hunter ts->cnt = i; 2854d60e5e3SAdrian Hunter else 2864d60e5e3SAdrian Hunter return; 2874d60e5e3SAdrian Hunter } 2884d60e5e3SAdrian Hunter } 2894d60e5e3SAdrian Hunter 29092a9e4f7SAdrian Hunter static bool thread_stack__in_kernel(struct thread_stack *ts) 29192a9e4f7SAdrian Hunter { 29292a9e4f7SAdrian Hunter if (!ts->cnt) 29392a9e4f7SAdrian Hunter return false; 29492a9e4f7SAdrian Hunter 29592a9e4f7SAdrian Hunter return ts->stack[ts->cnt - 1].cp->in_kernel; 29692a9e4f7SAdrian Hunter } 29792a9e4f7SAdrian Hunter 29892a9e4f7SAdrian Hunter static int thread_stack__call_return(struct thread *thread, 29992a9e4f7SAdrian Hunter struct thread_stack *ts, size_t idx, 30092a9e4f7SAdrian Hunter u64 timestamp, u64 ref, bool no_return) 30192a9e4f7SAdrian Hunter { 30292a9e4f7SAdrian Hunter struct call_return_processor *crp = ts->crp; 30392a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 30492a9e4f7SAdrian Hunter struct call_return cr = { 30592a9e4f7SAdrian Hunter .thread = thread, 30692a9e4f7SAdrian Hunter .comm = ts->comm, 30792a9e4f7SAdrian Hunter .db_id = 0, 30892a9e4f7SAdrian Hunter }; 309f435887eSAdrian Hunter u64 *parent_db_id; 31092a9e4f7SAdrian Hunter 31192a9e4f7SAdrian Hunter tse = &ts->stack[idx]; 31292a9e4f7SAdrian Hunter cr.cp = tse->cp; 31392a9e4f7SAdrian Hunter cr.call_time = tse->timestamp; 31492a9e4f7SAdrian Hunter cr.return_time = timestamp; 31592a9e4f7SAdrian Hunter cr.branch_count = ts->branch_count - tse->branch_count; 316003ccdc7SAdrian Hunter cr.insn_count = ts->insn_count - tse->insn_count; 317003ccdc7SAdrian Hunter cr.cyc_count = ts->cyc_count - tse->cyc_count; 318f435887eSAdrian Hunter cr.db_id = tse->db_id; 31992a9e4f7SAdrian Hunter cr.call_ref = tse->ref; 32092a9e4f7SAdrian Hunter cr.return_ref = ref; 32192a9e4f7SAdrian Hunter if (tse->no_call) 32292a9e4f7SAdrian Hunter cr.flags |= CALL_RETURN_NO_CALL; 32392a9e4f7SAdrian Hunter if (no_return) 32492a9e4f7SAdrian Hunter cr.flags |= CALL_RETURN_NO_RETURN; 325f08046cbSAdrian Hunter if (tse->non_call) 326f08046cbSAdrian Hunter cr.flags |= CALL_RETURN_NON_CALL; 32792a9e4f7SAdrian Hunter 328f435887eSAdrian Hunter /* 329f435887eSAdrian Hunter * The parent db_id must be assigned before exporting the child. Note 330f435887eSAdrian Hunter * it is not possible to export the parent first because its information 331f435887eSAdrian Hunter * is not yet complete because its 'return' has not yet been processed. 332f435887eSAdrian Hunter */ 333f435887eSAdrian Hunter parent_db_id = idx ? &(tse - 1)->db_id : NULL; 334f435887eSAdrian Hunter 335f435887eSAdrian Hunter return crp->process(&cr, parent_db_id, crp->data); 33692a9e4f7SAdrian Hunter } 33792a9e4f7SAdrian Hunter 338a5499b37SAdrian Hunter static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) 33992a9e4f7SAdrian Hunter { 34092a9e4f7SAdrian Hunter struct call_return_processor *crp = ts->crp; 34192a9e4f7SAdrian Hunter int err; 34292a9e4f7SAdrian Hunter 34392a9e4f7SAdrian Hunter if (!crp) { 34492a9e4f7SAdrian Hunter ts->cnt = 0; 34586d67180SAdrian Hunter ts->br_stack_pos = 0; 34686d67180SAdrian Hunter if (ts->br_stack_rb) 34786d67180SAdrian Hunter ts->br_stack_rb->nr = 0; 34892a9e4f7SAdrian Hunter return 0; 34992a9e4f7SAdrian Hunter } 35092a9e4f7SAdrian Hunter 35192a9e4f7SAdrian Hunter while (ts->cnt) { 35292a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 35392a9e4f7SAdrian Hunter ts->last_time, 0, true); 35492a9e4f7SAdrian Hunter if (err) { 35592a9e4f7SAdrian Hunter pr_err("Error flushing thread stack!\n"); 35692a9e4f7SAdrian Hunter ts->cnt = 0; 35792a9e4f7SAdrian Hunter return err; 35892a9e4f7SAdrian Hunter } 35992a9e4f7SAdrian Hunter } 36092a9e4f7SAdrian Hunter 36192a9e4f7SAdrian Hunter return 0; 36292a9e4f7SAdrian Hunter } 36392a9e4f7SAdrian Hunter 364a5499b37SAdrian Hunter int thread_stack__flush(struct thread *thread) 365a5499b37SAdrian Hunter { 366*ee84a303SIan Rogers struct thread_stack *ts = thread__ts(thread); 367f6060ac6SAdrian Hunter unsigned int pos; 368f6060ac6SAdrian Hunter int err = 0; 369bd8e68acSAdrian Hunter 370f6060ac6SAdrian Hunter if (ts) { 371f6060ac6SAdrian Hunter for (pos = 0; pos < ts->arr_sz; pos++) { 372f6060ac6SAdrian Hunter int ret = __thread_stack__flush(thread, ts + pos); 373a5499b37SAdrian Hunter 374f6060ac6SAdrian Hunter if (ret) 375f6060ac6SAdrian Hunter err = ret; 376f6060ac6SAdrian Hunter } 377f6060ac6SAdrian Hunter } 378f6060ac6SAdrian Hunter 379f6060ac6SAdrian Hunter return err; 380a5499b37SAdrian Hunter } 381a5499b37SAdrian Hunter 38286d67180SAdrian Hunter static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags, 38386d67180SAdrian Hunter u64 from_ip, u64 to_ip) 38486d67180SAdrian Hunter { 38586d67180SAdrian Hunter struct branch_stack *bs = ts->br_stack_rb; 38686d67180SAdrian Hunter struct branch_entry *be; 38786d67180SAdrian Hunter 38886d67180SAdrian Hunter if (!ts->br_stack_pos) 38986d67180SAdrian Hunter ts->br_stack_pos = ts->br_stack_sz; 39086d67180SAdrian Hunter 39186d67180SAdrian Hunter ts->br_stack_pos -= 1; 39286d67180SAdrian Hunter 39386d67180SAdrian Hunter be = &bs->entries[ts->br_stack_pos]; 39486d67180SAdrian Hunter be->from = from_ip; 39586d67180SAdrian Hunter be->to = to_ip; 39686d67180SAdrian Hunter be->flags.value = 0; 39786d67180SAdrian Hunter be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); 39886d67180SAdrian Hunter be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); 39986d67180SAdrian Hunter /* No support for mispredict */ 40086d67180SAdrian Hunter be->flags.mispred = ts->mispred_all; 40186d67180SAdrian Hunter 40286d67180SAdrian Hunter if (bs->nr < ts->br_stack_sz) 40386d67180SAdrian Hunter bs->nr += 1; 40486d67180SAdrian Hunter } 40586d67180SAdrian Hunter 406256d92bcSAdrian Hunter int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, 40786d67180SAdrian Hunter u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, 40886d67180SAdrian Hunter unsigned int br_stack_sz, bool mispred_all) 40900447ccdSAdrian Hunter { 410256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 411bd8e68acSAdrian Hunter 41200447ccdSAdrian Hunter if (!thread) 41300447ccdSAdrian Hunter return -EINVAL; 41400447ccdSAdrian Hunter 415bd8e68acSAdrian Hunter if (!ts) { 41686d67180SAdrian Hunter ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz); 417bd8e68acSAdrian Hunter if (!ts) { 41800447ccdSAdrian Hunter pr_warning("Out of memory: no thread stack\n"); 41900447ccdSAdrian Hunter return -ENOMEM; 42000447ccdSAdrian Hunter } 421bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 42286d67180SAdrian Hunter ts->mispred_all = mispred_all; 42300447ccdSAdrian Hunter } 42400447ccdSAdrian Hunter 42500447ccdSAdrian Hunter /* 42600447ccdSAdrian Hunter * When the trace is discontinuous, the trace_nr changes. In that case 42700447ccdSAdrian Hunter * the stack might be completely invalid. Better to report nothing than 42892a9e4f7SAdrian Hunter * to report something misleading, so flush the stack. 42900447ccdSAdrian Hunter */ 430bd8e68acSAdrian Hunter if (trace_nr != ts->trace_nr) { 431bd8e68acSAdrian Hunter if (ts->trace_nr) 432bd8e68acSAdrian Hunter __thread_stack__flush(thread, ts); 433bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 43400447ccdSAdrian Hunter } 43500447ccdSAdrian Hunter 43686d67180SAdrian Hunter if (br_stack_sz) 43786d67180SAdrian Hunter thread_stack__update_br_stack(ts, flags, from_ip, to_ip); 43886d67180SAdrian Hunter 43986d67180SAdrian Hunter /* 44086d67180SAdrian Hunter * Stop here if thread_stack__process() is in use, or not recording call 44186d67180SAdrian Hunter * stack. 44286d67180SAdrian Hunter */ 44386d67180SAdrian Hunter if (ts->crp || !callstack) 44492a9e4f7SAdrian Hunter return 0; 44592a9e4f7SAdrian Hunter 44600447ccdSAdrian Hunter if (flags & PERF_IP_FLAG_CALL) { 44700447ccdSAdrian Hunter u64 ret_addr; 44800447ccdSAdrian Hunter 44900447ccdSAdrian Hunter if (!to_ip) 45000447ccdSAdrian Hunter return 0; 45100447ccdSAdrian Hunter ret_addr = from_ip + insn_len; 45200447ccdSAdrian Hunter if (ret_addr == to_ip) 45300447ccdSAdrian Hunter return 0; /* Zero-length calls are excluded */ 454bd8e68acSAdrian Hunter return thread_stack__push(ts, ret_addr, 4554d60e5e3SAdrian Hunter flags & PERF_IP_FLAG_TRACE_END); 4564d60e5e3SAdrian Hunter } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { 4574d60e5e3SAdrian Hunter /* 4584d60e5e3SAdrian Hunter * If the caller did not change the trace number (which would 4594d60e5e3SAdrian Hunter * have flushed the stack) then try to make sense of the stack. 4604d60e5e3SAdrian Hunter * Possibly, tracing began after returning to the current 4614d60e5e3SAdrian Hunter * address, so try to pop that. Also, do not expect a call made 4624d60e5e3SAdrian Hunter * when the trace ended, to return, so pop that. 4634d60e5e3SAdrian Hunter */ 464bd8e68acSAdrian Hunter thread_stack__pop(ts, to_ip); 465bd8e68acSAdrian Hunter thread_stack__pop_trace_end(ts); 4664d60e5e3SAdrian Hunter } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { 467bd8e68acSAdrian Hunter thread_stack__pop(ts, to_ip); 46800447ccdSAdrian Hunter } 46900447ccdSAdrian Hunter 47000447ccdSAdrian Hunter return 0; 47100447ccdSAdrian Hunter } 47200447ccdSAdrian Hunter 473256d92bcSAdrian Hunter void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) 47492a9e4f7SAdrian Hunter { 475256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 476bd8e68acSAdrian Hunter 477bd8e68acSAdrian Hunter if (!ts) 47892a9e4f7SAdrian Hunter return; 47992a9e4f7SAdrian Hunter 480bd8e68acSAdrian Hunter if (trace_nr != ts->trace_nr) { 481bd8e68acSAdrian Hunter if (ts->trace_nr) 482bd8e68acSAdrian Hunter __thread_stack__flush(thread, ts); 483bd8e68acSAdrian Hunter ts->trace_nr = trace_nr; 48492a9e4f7SAdrian Hunter } 48592a9e4f7SAdrian Hunter } 48692a9e4f7SAdrian Hunter 487f6060ac6SAdrian Hunter static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) 488f6060ac6SAdrian Hunter { 489f6060ac6SAdrian Hunter __thread_stack__flush(thread, ts); 490f6060ac6SAdrian Hunter zfree(&ts->stack); 49186d67180SAdrian Hunter zfree(&ts->br_stack_rb); 492f6060ac6SAdrian Hunter } 493f6060ac6SAdrian Hunter 494f6060ac6SAdrian Hunter static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) 495f6060ac6SAdrian Hunter { 496f6060ac6SAdrian Hunter unsigned int arr_sz = ts->arr_sz; 497f6060ac6SAdrian Hunter 498f6060ac6SAdrian Hunter __thread_stack__free(thread, ts); 499f6060ac6SAdrian Hunter memset(ts, 0, sizeof(*ts)); 500f6060ac6SAdrian Hunter ts->arr_sz = arr_sz; 501f6060ac6SAdrian Hunter } 502f6060ac6SAdrian Hunter 50300447ccdSAdrian Hunter void thread_stack__free(struct thread *thread) 50400447ccdSAdrian Hunter { 505*ee84a303SIan Rogers struct thread_stack *ts = thread__ts(thread); 506f6060ac6SAdrian Hunter unsigned int pos; 507bd8e68acSAdrian Hunter 508bd8e68acSAdrian Hunter if (ts) { 509f6060ac6SAdrian Hunter for (pos = 0; pos < ts->arr_sz; pos++) 510f6060ac6SAdrian Hunter __thread_stack__free(thread, ts + pos); 511*ee84a303SIan Rogers free(thread__ts(thread)); 512*ee84a303SIan Rogers thread__set_ts(thread, NULL); 51300447ccdSAdrian Hunter } 51400447ccdSAdrian Hunter } 51500447ccdSAdrian Hunter 51624248306SAdrian Hunter static inline u64 callchain_context(u64 ip, u64 kernel_start) 51700447ccdSAdrian Hunter { 51824248306SAdrian Hunter return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 51924248306SAdrian Hunter } 52000447ccdSAdrian Hunter 521256d92bcSAdrian Hunter void thread_stack__sample(struct thread *thread, int cpu, 522256d92bcSAdrian Hunter struct ip_callchain *chain, 52324248306SAdrian Hunter size_t sz, u64 ip, u64 kernel_start) 52424248306SAdrian Hunter { 525256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 52624248306SAdrian Hunter u64 context = callchain_context(ip, kernel_start); 52724248306SAdrian Hunter u64 last_context; 52824248306SAdrian Hunter size_t i, j; 52900447ccdSAdrian Hunter 53024248306SAdrian Hunter if (sz < 2) { 53124248306SAdrian Hunter chain->nr = 0; 53224248306SAdrian Hunter return; 53324248306SAdrian Hunter } 53400447ccdSAdrian Hunter 53524248306SAdrian Hunter chain->ips[0] = context; 53624248306SAdrian Hunter chain->ips[1] = ip; 53724248306SAdrian Hunter 538bd8e68acSAdrian Hunter if (!ts) { 53924248306SAdrian Hunter chain->nr = 2; 54024248306SAdrian Hunter return; 54124248306SAdrian Hunter } 54224248306SAdrian Hunter 54324248306SAdrian Hunter last_context = context; 54424248306SAdrian Hunter 545bd8e68acSAdrian Hunter for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { 546bd8e68acSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 54724248306SAdrian Hunter context = callchain_context(ip, kernel_start); 54824248306SAdrian Hunter if (context != last_context) { 54924248306SAdrian Hunter if (i >= sz - 1) 55024248306SAdrian Hunter break; 55124248306SAdrian Hunter chain->ips[i++] = context; 55224248306SAdrian Hunter last_context = context; 55324248306SAdrian Hunter } 55424248306SAdrian Hunter chain->ips[i] = ip; 55524248306SAdrian Hunter } 55624248306SAdrian Hunter 55724248306SAdrian Hunter chain->nr = i; 55800447ccdSAdrian Hunter } 55992a9e4f7SAdrian Hunter 5604fef41bfSAdrian Hunter /* 5614fef41bfSAdrian Hunter * Hardware sample records, created some time after the event occurred, need to 5624fef41bfSAdrian Hunter * have subsequent addresses removed from the call chain. 5634fef41bfSAdrian Hunter */ 5644fef41bfSAdrian Hunter void thread_stack__sample_late(struct thread *thread, int cpu, 5654fef41bfSAdrian Hunter struct ip_callchain *chain, size_t sz, 5664fef41bfSAdrian Hunter u64 sample_ip, u64 kernel_start) 5674fef41bfSAdrian Hunter { 5684fef41bfSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 5694fef41bfSAdrian Hunter u64 sample_context = callchain_context(sample_ip, kernel_start); 5704fef41bfSAdrian Hunter u64 last_context, context, ip; 5714fef41bfSAdrian Hunter size_t nr = 0, j; 5724fef41bfSAdrian Hunter 5734fef41bfSAdrian Hunter if (sz < 2) { 5744fef41bfSAdrian Hunter chain->nr = 0; 5754fef41bfSAdrian Hunter return; 5764fef41bfSAdrian Hunter } 5774fef41bfSAdrian Hunter 5784fef41bfSAdrian Hunter if (!ts) 5794fef41bfSAdrian Hunter goto out; 5804fef41bfSAdrian Hunter 5814fef41bfSAdrian Hunter /* 5824fef41bfSAdrian Hunter * When tracing kernel space, kernel addresses occur at the top of the 5834fef41bfSAdrian Hunter * call chain after the event occurred but before tracing stopped. 5844fef41bfSAdrian Hunter * Skip them. 5854fef41bfSAdrian Hunter */ 5864fef41bfSAdrian Hunter for (j = 1; j <= ts->cnt; j++) { 5874fef41bfSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 5884fef41bfSAdrian Hunter context = callchain_context(ip, kernel_start); 5894fef41bfSAdrian Hunter if (context == PERF_CONTEXT_USER || 5904fef41bfSAdrian Hunter (context == sample_context && ip == sample_ip)) 5914fef41bfSAdrian Hunter break; 5924fef41bfSAdrian Hunter } 5934fef41bfSAdrian Hunter 5944fef41bfSAdrian Hunter last_context = sample_ip; /* Use sample_ip as an invalid context */ 5954fef41bfSAdrian Hunter 5964fef41bfSAdrian Hunter for (; nr < sz && j <= ts->cnt; nr++, j++) { 5974fef41bfSAdrian Hunter ip = ts->stack[ts->cnt - j].ret_addr; 5984fef41bfSAdrian Hunter context = callchain_context(ip, kernel_start); 5994fef41bfSAdrian Hunter if (context != last_context) { 6004fef41bfSAdrian Hunter if (nr >= sz - 1) 6014fef41bfSAdrian Hunter break; 6024fef41bfSAdrian Hunter chain->ips[nr++] = context; 6034fef41bfSAdrian Hunter last_context = context; 6044fef41bfSAdrian Hunter } 6054fef41bfSAdrian Hunter chain->ips[nr] = ip; 6064fef41bfSAdrian Hunter } 6074fef41bfSAdrian Hunter out: 6084fef41bfSAdrian Hunter if (nr) { 6094fef41bfSAdrian Hunter chain->nr = nr; 6104fef41bfSAdrian Hunter } else { 6114fef41bfSAdrian Hunter chain->ips[0] = sample_context; 6124fef41bfSAdrian Hunter chain->ips[1] = sample_ip; 6134fef41bfSAdrian Hunter chain->nr = 2; 6144fef41bfSAdrian Hunter } 6154fef41bfSAdrian Hunter } 6164fef41bfSAdrian Hunter 61786d67180SAdrian Hunter void thread_stack__br_sample(struct thread *thread, int cpu, 61886d67180SAdrian Hunter struct branch_stack *dst, unsigned int sz) 61986d67180SAdrian Hunter { 62086d67180SAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 62186d67180SAdrian Hunter const size_t bsz = sizeof(struct branch_entry); 62286d67180SAdrian Hunter struct branch_stack *src; 62386d67180SAdrian Hunter struct branch_entry *be; 62486d67180SAdrian Hunter unsigned int nr; 62586d67180SAdrian Hunter 62686d67180SAdrian Hunter dst->nr = 0; 62786d67180SAdrian Hunter 62886d67180SAdrian Hunter if (!ts) 62986d67180SAdrian Hunter return; 63086d67180SAdrian Hunter 63186d67180SAdrian Hunter src = ts->br_stack_rb; 63286d67180SAdrian Hunter if (!src->nr) 63386d67180SAdrian Hunter return; 63486d67180SAdrian Hunter 63586d67180SAdrian Hunter dst->nr = min((unsigned int)src->nr, sz); 63686d67180SAdrian Hunter 63786d67180SAdrian Hunter be = &dst->entries[0]; 63886d67180SAdrian Hunter nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); 63986d67180SAdrian Hunter memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); 64086d67180SAdrian Hunter 64186d67180SAdrian Hunter if (src->nr >= ts->br_stack_sz) { 64286d67180SAdrian Hunter sz -= nr; 64386d67180SAdrian Hunter be = &dst->entries[nr]; 64486d67180SAdrian Hunter nr = min(ts->br_stack_pos, sz); 64586d67180SAdrian Hunter memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); 64686d67180SAdrian Hunter } 64786d67180SAdrian Hunter } 64886d67180SAdrian Hunter 6493749e0bbSAdrian Hunter /* Start of user space branch entries */ 6503749e0bbSAdrian Hunter static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start) 6513749e0bbSAdrian Hunter { 6523749e0bbSAdrian Hunter if (!*start) 6533749e0bbSAdrian Hunter *start = be->to && be->to < kernel_start; 6543749e0bbSAdrian Hunter 6553749e0bbSAdrian Hunter return *start; 6563749e0bbSAdrian Hunter } 6573749e0bbSAdrian Hunter 6583749e0bbSAdrian Hunter /* 6593749e0bbSAdrian Hunter * Start of branch entries after the ip fell in between 2 branches, or user 6603749e0bbSAdrian Hunter * space branch entries. 6613749e0bbSAdrian Hunter */ 6623749e0bbSAdrian Hunter static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start, 6633749e0bbSAdrian Hunter bool *start, struct branch_entry *nb) 6643749e0bbSAdrian Hunter { 6653749e0bbSAdrian Hunter if (!*start) { 6663749e0bbSAdrian Hunter *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || 6673749e0bbSAdrian Hunter be->from < kernel_start || 6683749e0bbSAdrian Hunter (be->to && be->to < kernel_start); 6693749e0bbSAdrian Hunter } 6703749e0bbSAdrian Hunter 6713749e0bbSAdrian Hunter return *start; 6723749e0bbSAdrian Hunter } 6733749e0bbSAdrian Hunter 6743749e0bbSAdrian Hunter /* 6753749e0bbSAdrian Hunter * Hardware sample records, created some time after the event occurred, need to 6763749e0bbSAdrian Hunter * have subsequent addresses removed from the branch stack. 6773749e0bbSAdrian Hunter */ 6783749e0bbSAdrian Hunter void thread_stack__br_sample_late(struct thread *thread, int cpu, 6793749e0bbSAdrian Hunter struct branch_stack *dst, unsigned int sz, 6803749e0bbSAdrian Hunter u64 ip, u64 kernel_start) 6813749e0bbSAdrian Hunter { 6823749e0bbSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 6833749e0bbSAdrian Hunter struct branch_entry *d, *s, *spos, *ssz; 6843749e0bbSAdrian Hunter struct branch_stack *src; 6853749e0bbSAdrian Hunter unsigned int nr = 0; 6863749e0bbSAdrian Hunter bool start = false; 6873749e0bbSAdrian Hunter 6883749e0bbSAdrian Hunter dst->nr = 0; 6893749e0bbSAdrian Hunter 6903749e0bbSAdrian Hunter if (!ts) 6913749e0bbSAdrian Hunter return; 6923749e0bbSAdrian Hunter 6933749e0bbSAdrian Hunter src = ts->br_stack_rb; 6943749e0bbSAdrian Hunter if (!src->nr) 6953749e0bbSAdrian Hunter return; 6963749e0bbSAdrian Hunter 6973749e0bbSAdrian Hunter spos = &src->entries[ts->br_stack_pos]; 6983749e0bbSAdrian Hunter ssz = &src->entries[ts->br_stack_sz]; 6993749e0bbSAdrian Hunter 7003749e0bbSAdrian Hunter d = &dst->entries[0]; 7013749e0bbSAdrian Hunter s = spos; 7023749e0bbSAdrian Hunter 7033749e0bbSAdrian Hunter if (ip < kernel_start) { 7043749e0bbSAdrian Hunter /* 7053749e0bbSAdrian Hunter * User space sample: start copying branch entries when the 7063749e0bbSAdrian Hunter * branch is in user space. 7073749e0bbSAdrian Hunter */ 7083749e0bbSAdrian Hunter for (s = spos; s < ssz && nr < sz; s++) { 7093749e0bbSAdrian Hunter if (us_start(s, kernel_start, &start)) { 7103749e0bbSAdrian Hunter *d++ = *s; 7113749e0bbSAdrian Hunter nr += 1; 7123749e0bbSAdrian Hunter } 7133749e0bbSAdrian Hunter } 7143749e0bbSAdrian Hunter 7153749e0bbSAdrian Hunter if (src->nr >= ts->br_stack_sz) { 7163749e0bbSAdrian Hunter for (s = &src->entries[0]; s < spos && nr < sz; s++) { 7173749e0bbSAdrian Hunter if (us_start(s, kernel_start, &start)) { 7183749e0bbSAdrian Hunter *d++ = *s; 7193749e0bbSAdrian Hunter nr += 1; 7203749e0bbSAdrian Hunter } 7213749e0bbSAdrian Hunter } 7223749e0bbSAdrian Hunter } 7233749e0bbSAdrian Hunter } else { 7243749e0bbSAdrian Hunter struct branch_entry *nb = NULL; 7253749e0bbSAdrian Hunter 7263749e0bbSAdrian Hunter /* 7273749e0bbSAdrian Hunter * Kernel space sample: start copying branch entries when the ip 7283749e0bbSAdrian Hunter * falls in between 2 branches (or the branch is in user space 7293749e0bbSAdrian Hunter * because then the start must have been missed). 7303749e0bbSAdrian Hunter */ 7313749e0bbSAdrian Hunter for (s = spos; s < ssz && nr < sz; s++) { 7323749e0bbSAdrian Hunter if (ks_start(s, ip, kernel_start, &start, nb)) { 7333749e0bbSAdrian Hunter *d++ = *s; 7343749e0bbSAdrian Hunter nr += 1; 7353749e0bbSAdrian Hunter } 7363749e0bbSAdrian Hunter nb = s; 7373749e0bbSAdrian Hunter } 7383749e0bbSAdrian Hunter 7393749e0bbSAdrian Hunter if (src->nr >= ts->br_stack_sz) { 7403749e0bbSAdrian Hunter for (s = &src->entries[0]; s < spos && nr < sz; s++) { 7413749e0bbSAdrian Hunter if (ks_start(s, ip, kernel_start, &start, nb)) { 7423749e0bbSAdrian Hunter *d++ = *s; 7433749e0bbSAdrian Hunter nr += 1; 7443749e0bbSAdrian Hunter } 7453749e0bbSAdrian Hunter nb = s; 7463749e0bbSAdrian Hunter } 7473749e0bbSAdrian Hunter } 7483749e0bbSAdrian Hunter } 7493749e0bbSAdrian Hunter 7503749e0bbSAdrian Hunter dst->nr = nr; 7513749e0bbSAdrian Hunter } 7523749e0bbSAdrian Hunter 75392a9e4f7SAdrian Hunter struct call_return_processor * 754f435887eSAdrian Hunter call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), 75592a9e4f7SAdrian Hunter void *data) 75692a9e4f7SAdrian Hunter { 75792a9e4f7SAdrian Hunter struct call_return_processor *crp; 75892a9e4f7SAdrian Hunter 75992a9e4f7SAdrian Hunter crp = zalloc(sizeof(struct call_return_processor)); 76092a9e4f7SAdrian Hunter if (!crp) 76192a9e4f7SAdrian Hunter return NULL; 76292a9e4f7SAdrian Hunter crp->cpr = call_path_root__new(); 76392a9e4f7SAdrian Hunter if (!crp->cpr) 76492a9e4f7SAdrian Hunter goto out_free; 76592a9e4f7SAdrian Hunter crp->process = process; 76692a9e4f7SAdrian Hunter crp->data = data; 76792a9e4f7SAdrian Hunter return crp; 76892a9e4f7SAdrian Hunter 76992a9e4f7SAdrian Hunter out_free: 77092a9e4f7SAdrian Hunter free(crp); 77192a9e4f7SAdrian Hunter return NULL; 77292a9e4f7SAdrian Hunter } 77392a9e4f7SAdrian Hunter 77492a9e4f7SAdrian Hunter void call_return_processor__free(struct call_return_processor *crp) 77592a9e4f7SAdrian Hunter { 77692a9e4f7SAdrian Hunter if (crp) { 77792a9e4f7SAdrian Hunter call_path_root__free(crp->cpr); 77892a9e4f7SAdrian Hunter free(crp); 77992a9e4f7SAdrian Hunter } 78092a9e4f7SAdrian Hunter } 78192a9e4f7SAdrian Hunter 78292a9e4f7SAdrian Hunter static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, 78392a9e4f7SAdrian Hunter u64 timestamp, u64 ref, struct call_path *cp, 7842dcde4e1SAdrian Hunter bool no_call, bool trace_end) 78592a9e4f7SAdrian Hunter { 78692a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 78792a9e4f7SAdrian Hunter int err; 78892a9e4f7SAdrian Hunter 789e7a3a055SAdrian Hunter if (!cp) 790e7a3a055SAdrian Hunter return -ENOMEM; 791e7a3a055SAdrian Hunter 79292a9e4f7SAdrian Hunter if (ts->cnt == ts->sz) { 79392a9e4f7SAdrian Hunter err = thread_stack__grow(ts); 79492a9e4f7SAdrian Hunter if (err) 79592a9e4f7SAdrian Hunter return err; 79692a9e4f7SAdrian Hunter } 79792a9e4f7SAdrian Hunter 79892a9e4f7SAdrian Hunter tse = &ts->stack[ts->cnt++]; 79992a9e4f7SAdrian Hunter tse->ret_addr = ret_addr; 80092a9e4f7SAdrian Hunter tse->timestamp = timestamp; 80192a9e4f7SAdrian Hunter tse->ref = ref; 80292a9e4f7SAdrian Hunter tse->branch_count = ts->branch_count; 803003ccdc7SAdrian Hunter tse->insn_count = ts->insn_count; 804003ccdc7SAdrian Hunter tse->cyc_count = ts->cyc_count; 80592a9e4f7SAdrian Hunter tse->cp = cp; 80692a9e4f7SAdrian Hunter tse->no_call = no_call; 8072dcde4e1SAdrian Hunter tse->trace_end = trace_end; 808f08046cbSAdrian Hunter tse->non_call = false; 809f435887eSAdrian Hunter tse->db_id = 0; 81092a9e4f7SAdrian Hunter 81192a9e4f7SAdrian Hunter return 0; 81292a9e4f7SAdrian Hunter } 81392a9e4f7SAdrian Hunter 81492a9e4f7SAdrian Hunter static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, 81592a9e4f7SAdrian Hunter u64 ret_addr, u64 timestamp, u64 ref, 81692a9e4f7SAdrian Hunter struct symbol *sym) 81792a9e4f7SAdrian Hunter { 81892a9e4f7SAdrian Hunter int err; 81992a9e4f7SAdrian Hunter 82092a9e4f7SAdrian Hunter if (!ts->cnt) 82192a9e4f7SAdrian Hunter return 1; 82292a9e4f7SAdrian Hunter 82392a9e4f7SAdrian Hunter if (ts->cnt == 1) { 82492a9e4f7SAdrian Hunter struct thread_stack_entry *tse = &ts->stack[0]; 82592a9e4f7SAdrian Hunter 82692a9e4f7SAdrian Hunter if (tse->cp->sym == sym) 82792a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 82892a9e4f7SAdrian Hunter timestamp, ref, false); 82992a9e4f7SAdrian Hunter } 83092a9e4f7SAdrian Hunter 831f08046cbSAdrian Hunter if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && 832f08046cbSAdrian Hunter !ts->stack[ts->cnt - 1].non_call) { 83392a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 83492a9e4f7SAdrian Hunter timestamp, ref, false); 83592a9e4f7SAdrian Hunter } else { 83692a9e4f7SAdrian Hunter size_t i = ts->cnt - 1; 83792a9e4f7SAdrian Hunter 83892a9e4f7SAdrian Hunter while (i--) { 839f08046cbSAdrian Hunter if (ts->stack[i].ret_addr != ret_addr || 840f08046cbSAdrian Hunter ts->stack[i].non_call) 84192a9e4f7SAdrian Hunter continue; 84292a9e4f7SAdrian Hunter i += 1; 84392a9e4f7SAdrian Hunter while (ts->cnt > i) { 84492a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, 84592a9e4f7SAdrian Hunter --ts->cnt, 84692a9e4f7SAdrian Hunter timestamp, ref, 84792a9e4f7SAdrian Hunter true); 84892a9e4f7SAdrian Hunter if (err) 84992a9e4f7SAdrian Hunter return err; 85092a9e4f7SAdrian Hunter } 85192a9e4f7SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, 85292a9e4f7SAdrian Hunter timestamp, ref, false); 85392a9e4f7SAdrian Hunter } 85492a9e4f7SAdrian Hunter } 85592a9e4f7SAdrian Hunter 85692a9e4f7SAdrian Hunter return 1; 85792a9e4f7SAdrian Hunter } 85892a9e4f7SAdrian Hunter 859e0b89511SAdrian Hunter static int thread_stack__bottom(struct thread_stack *ts, 86092a9e4f7SAdrian Hunter struct perf_sample *sample, 86192a9e4f7SAdrian Hunter struct addr_location *from_al, 86292a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref) 86392a9e4f7SAdrian Hunter { 86492a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 86592a9e4f7SAdrian Hunter struct call_path *cp; 86692a9e4f7SAdrian Hunter struct symbol *sym; 86792a9e4f7SAdrian Hunter u64 ip; 86892a9e4f7SAdrian Hunter 86992a9e4f7SAdrian Hunter if (sample->ip) { 87092a9e4f7SAdrian Hunter ip = sample->ip; 87192a9e4f7SAdrian Hunter sym = from_al->sym; 87292a9e4f7SAdrian Hunter } else if (sample->addr) { 87392a9e4f7SAdrian Hunter ip = sample->addr; 87492a9e4f7SAdrian Hunter sym = to_al->sym; 87592a9e4f7SAdrian Hunter } else { 87692a9e4f7SAdrian Hunter return 0; 87792a9e4f7SAdrian Hunter } 87892a9e4f7SAdrian Hunter 87992a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 88092a9e4f7SAdrian Hunter ts->kernel_start); 88192a9e4f7SAdrian Hunter 882e0b89511SAdrian Hunter return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 8832dcde4e1SAdrian Hunter true, false); 88492a9e4f7SAdrian Hunter } 88592a9e4f7SAdrian Hunter 88697860b48SAdrian Hunter static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts, 88797860b48SAdrian Hunter struct perf_sample *sample, u64 ref) 88897860b48SAdrian Hunter { 88997860b48SAdrian Hunter u64 tm = sample->time; 89097860b48SAdrian Hunter int err; 89197860b48SAdrian Hunter 89297860b48SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 89397860b48SAdrian Hunter while (thread_stack__in_kernel(ts)) { 89497860b48SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 89597860b48SAdrian Hunter tm, ref, true); 89697860b48SAdrian Hunter if (err) 89797860b48SAdrian Hunter return err; 89897860b48SAdrian Hunter } 89997860b48SAdrian Hunter 90097860b48SAdrian Hunter return 0; 90197860b48SAdrian Hunter } 90297860b48SAdrian Hunter 90392a9e4f7SAdrian Hunter static int thread_stack__no_call_return(struct thread *thread, 90492a9e4f7SAdrian Hunter struct thread_stack *ts, 90592a9e4f7SAdrian Hunter struct perf_sample *sample, 90692a9e4f7SAdrian Hunter struct addr_location *from_al, 90792a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref) 90892a9e4f7SAdrian Hunter { 90992a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 91090c2cda7SAdrian Hunter struct call_path *root = &cpr->call_path; 91190c2cda7SAdrian Hunter struct symbol *fsym = from_al->sym; 91290c2cda7SAdrian Hunter struct symbol *tsym = to_al->sym; 91392a9e4f7SAdrian Hunter struct call_path *cp, *parent; 91492a9e4f7SAdrian Hunter u64 ks = ts->kernel_start; 91590c2cda7SAdrian Hunter u64 addr = sample->addr; 91690c2cda7SAdrian Hunter u64 tm = sample->time; 91790c2cda7SAdrian Hunter u64 ip = sample->ip; 91892a9e4f7SAdrian Hunter int err; 91992a9e4f7SAdrian Hunter 92090c2cda7SAdrian Hunter if (ip >= ks && addr < ks) { 92192a9e4f7SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 922eb5d8544SAdrian Hunter err = thread_stack__pop_ks(thread, ts, sample, ref); 92392a9e4f7SAdrian Hunter if (err) 92492a9e4f7SAdrian Hunter return err; 92592a9e4f7SAdrian Hunter 92692a9e4f7SAdrian Hunter /* If the stack is empty, push the userspace address */ 92792a9e4f7SAdrian Hunter if (!ts->cnt) { 92890c2cda7SAdrian Hunter cp = call_path__findnew(cpr, root, tsym, addr, ks); 92990c2cda7SAdrian Hunter return thread_stack__push_cp(ts, 0, tm, ref, cp, true, 93090c2cda7SAdrian Hunter false); 93192a9e4f7SAdrian Hunter } 93290c2cda7SAdrian Hunter } else if (thread_stack__in_kernel(ts) && ip < ks) { 93392a9e4f7SAdrian Hunter /* Return to userspace, so pop all kernel addresses */ 934eb5d8544SAdrian Hunter err = thread_stack__pop_ks(thread, ts, sample, ref); 93592a9e4f7SAdrian Hunter if (err) 93692a9e4f7SAdrian Hunter return err; 93792a9e4f7SAdrian Hunter } 93892a9e4f7SAdrian Hunter 93992a9e4f7SAdrian Hunter if (ts->cnt) 94092a9e4f7SAdrian Hunter parent = ts->stack[ts->cnt - 1].cp; 94192a9e4f7SAdrian Hunter else 94290c2cda7SAdrian Hunter parent = root; 94392a9e4f7SAdrian Hunter 9441f35cd65SAdrian Hunter if (parent->sym == from_al->sym) { 9451f35cd65SAdrian Hunter /* 9461f35cd65SAdrian Hunter * At the bottom of the stack, assume the missing 'call' was 9471f35cd65SAdrian Hunter * before the trace started. So, pop the current symbol and push 9481f35cd65SAdrian Hunter * the 'to' symbol. 9491f35cd65SAdrian Hunter */ 9501f35cd65SAdrian Hunter if (ts->cnt == 1) { 9511f35cd65SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 9521f35cd65SAdrian Hunter tm, ref, false); 9531f35cd65SAdrian Hunter if (err) 9541f35cd65SAdrian Hunter return err; 9551f35cd65SAdrian Hunter } 9561f35cd65SAdrian Hunter 9571f35cd65SAdrian Hunter if (!ts->cnt) { 9581f35cd65SAdrian Hunter cp = call_path__findnew(cpr, root, tsym, addr, ks); 9591f35cd65SAdrian Hunter 9601f35cd65SAdrian Hunter return thread_stack__push_cp(ts, addr, tm, ref, cp, 9611f35cd65SAdrian Hunter true, false); 9621f35cd65SAdrian Hunter } 9631f35cd65SAdrian Hunter 9641f35cd65SAdrian Hunter /* 9651f35cd65SAdrian Hunter * Otherwise assume the 'return' is being used as a jump (e.g. 9661f35cd65SAdrian Hunter * retpoline) and just push the 'to' symbol. 9671f35cd65SAdrian Hunter */ 9681f35cd65SAdrian Hunter cp = call_path__findnew(cpr, parent, tsym, addr, ks); 9691f35cd65SAdrian Hunter 9701f35cd65SAdrian Hunter err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); 9711f35cd65SAdrian Hunter if (!err) 9721f35cd65SAdrian Hunter ts->stack[ts->cnt - 1].non_call = true; 9731f35cd65SAdrian Hunter 9741f35cd65SAdrian Hunter return err; 9751f35cd65SAdrian Hunter } 9761f35cd65SAdrian Hunter 9771f35cd65SAdrian Hunter /* 9781f35cd65SAdrian Hunter * Assume 'parent' has not yet returned, so push 'to', and then push and 9791f35cd65SAdrian Hunter * pop 'from'. 9801f35cd65SAdrian Hunter */ 9811f35cd65SAdrian Hunter 9821f35cd65SAdrian Hunter cp = call_path__findnew(cpr, parent, tsym, addr, ks); 98392a9e4f7SAdrian Hunter 98490c2cda7SAdrian Hunter err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); 98592a9e4f7SAdrian Hunter if (err) 98692a9e4f7SAdrian Hunter return err; 98792a9e4f7SAdrian Hunter 9881f35cd65SAdrian Hunter cp = call_path__findnew(cpr, cp, fsym, ip, ks); 9891f35cd65SAdrian Hunter 9901f35cd65SAdrian Hunter err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); 9911f35cd65SAdrian Hunter if (err) 9921f35cd65SAdrian Hunter return err; 9931f35cd65SAdrian Hunter 9941f35cd65SAdrian Hunter return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); 99592a9e4f7SAdrian Hunter } 99692a9e4f7SAdrian Hunter 99792a9e4f7SAdrian Hunter static int thread_stack__trace_begin(struct thread *thread, 99892a9e4f7SAdrian Hunter struct thread_stack *ts, u64 timestamp, 99992a9e4f7SAdrian Hunter u64 ref) 100092a9e4f7SAdrian Hunter { 100192a9e4f7SAdrian Hunter struct thread_stack_entry *tse; 100292a9e4f7SAdrian Hunter int err; 100392a9e4f7SAdrian Hunter 100492a9e4f7SAdrian Hunter if (!ts->cnt) 100592a9e4f7SAdrian Hunter return 0; 100692a9e4f7SAdrian Hunter 100792a9e4f7SAdrian Hunter /* Pop trace end */ 100892a9e4f7SAdrian Hunter tse = &ts->stack[ts->cnt - 1]; 10092dcde4e1SAdrian Hunter if (tse->trace_end) { 101092a9e4f7SAdrian Hunter err = thread_stack__call_return(thread, ts, --ts->cnt, 101192a9e4f7SAdrian Hunter timestamp, ref, false); 101292a9e4f7SAdrian Hunter if (err) 101392a9e4f7SAdrian Hunter return err; 101492a9e4f7SAdrian Hunter } 101592a9e4f7SAdrian Hunter 101692a9e4f7SAdrian Hunter return 0; 101792a9e4f7SAdrian Hunter } 101892a9e4f7SAdrian Hunter 101992a9e4f7SAdrian Hunter static int thread_stack__trace_end(struct thread_stack *ts, 102092a9e4f7SAdrian Hunter struct perf_sample *sample, u64 ref) 102192a9e4f7SAdrian Hunter { 102292a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 102392a9e4f7SAdrian Hunter struct call_path *cp; 102492a9e4f7SAdrian Hunter u64 ret_addr; 102592a9e4f7SAdrian Hunter 102692a9e4f7SAdrian Hunter /* No point having 'trace end' on the bottom of the stack */ 102792a9e4f7SAdrian Hunter if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) 102892a9e4f7SAdrian Hunter return 0; 102992a9e4f7SAdrian Hunter 103092a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 103192a9e4f7SAdrian Hunter ts->kernel_start); 103292a9e4f7SAdrian Hunter 103392a9e4f7SAdrian Hunter ret_addr = sample->ip + sample->insn_len; 103492a9e4f7SAdrian Hunter 103592a9e4f7SAdrian Hunter return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, 10362dcde4e1SAdrian Hunter false, true); 103792a9e4f7SAdrian Hunter } 103892a9e4f7SAdrian Hunter 10393c0cd952SAdrian Hunter static bool is_x86_retpoline(const char *name) 10403c0cd952SAdrian Hunter { 10413c0cd952SAdrian Hunter const char *p = strstr(name, "__x86_indirect_thunk_"); 10423c0cd952SAdrian Hunter 10433c0cd952SAdrian Hunter return p == name || !strcmp(name, "__indirect_thunk_start"); 10443c0cd952SAdrian Hunter } 10453c0cd952SAdrian Hunter 10463c0cd952SAdrian Hunter /* 10473c0cd952SAdrian Hunter * x86 retpoline functions pollute the call graph. This function removes them. 10483c0cd952SAdrian Hunter * This does not handle function return thunks, nor is there any improvement 10493c0cd952SAdrian Hunter * for the handling of inline thunks or extern thunks. 10503c0cd952SAdrian Hunter */ 10513c0cd952SAdrian Hunter static int thread_stack__x86_retpoline(struct thread_stack *ts, 10523c0cd952SAdrian Hunter struct perf_sample *sample, 10533c0cd952SAdrian Hunter struct addr_location *to_al) 10543c0cd952SAdrian Hunter { 10553c0cd952SAdrian Hunter struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; 10563c0cd952SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 10573c0cd952SAdrian Hunter struct symbol *sym = tse->cp->sym; 10583c0cd952SAdrian Hunter struct symbol *tsym = to_al->sym; 10593c0cd952SAdrian Hunter struct call_path *cp; 10603c0cd952SAdrian Hunter 10613c0cd952SAdrian Hunter if (sym && is_x86_retpoline(sym->name)) { 10623c0cd952SAdrian Hunter /* 10633c0cd952SAdrian Hunter * This is a x86 retpoline fn. It pollutes the call graph by 10643c0cd952SAdrian Hunter * showing up everywhere there is an indirect branch, but does 10653c0cd952SAdrian Hunter * not itself mean anything. Here the top-of-stack is removed, 10663c0cd952SAdrian Hunter * by decrementing the stack count, and then further down, the 10673c0cd952SAdrian Hunter * resulting top-of-stack is replaced with the actual target. 10683c0cd952SAdrian Hunter * The result is that the retpoline functions will no longer 10693c0cd952SAdrian Hunter * appear in the call graph. Note this only affects the call 10703c0cd952SAdrian Hunter * graph, since all the original branches are left unchanged. 10713c0cd952SAdrian Hunter */ 10723c0cd952SAdrian Hunter ts->cnt -= 1; 10733c0cd952SAdrian Hunter sym = ts->stack[ts->cnt - 2].cp->sym; 10743c0cd952SAdrian Hunter if (sym && sym == tsym && to_al->addr != tsym->start) { 10753c0cd952SAdrian Hunter /* 10763c0cd952SAdrian Hunter * Target is back to the middle of the symbol we came 10773c0cd952SAdrian Hunter * from so assume it is an indirect jmp and forget it 10783c0cd952SAdrian Hunter * altogether. 10793c0cd952SAdrian Hunter */ 10803c0cd952SAdrian Hunter ts->cnt -= 1; 10813c0cd952SAdrian Hunter return 0; 10823c0cd952SAdrian Hunter } 10833c0cd952SAdrian Hunter } else if (sym && sym == tsym) { 10843c0cd952SAdrian Hunter /* 10853c0cd952SAdrian Hunter * Target is back to the symbol we came from so assume it is an 10863c0cd952SAdrian Hunter * indirect jmp and forget it altogether. 10873c0cd952SAdrian Hunter */ 10883c0cd952SAdrian Hunter ts->cnt -= 1; 10893c0cd952SAdrian Hunter return 0; 10903c0cd952SAdrian Hunter } 10913c0cd952SAdrian Hunter 10923c0cd952SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, 10933c0cd952SAdrian Hunter sample->addr, ts->kernel_start); 10943c0cd952SAdrian Hunter if (!cp) 10953c0cd952SAdrian Hunter return -ENOMEM; 10963c0cd952SAdrian Hunter 10973c0cd952SAdrian Hunter /* Replace the top-of-stack with the actual target */ 10983c0cd952SAdrian Hunter ts->stack[ts->cnt - 1].cp = cp; 10993c0cd952SAdrian Hunter 11003c0cd952SAdrian Hunter return 0; 11013c0cd952SAdrian Hunter } 11023c0cd952SAdrian Hunter 110392a9e4f7SAdrian Hunter int thread_stack__process(struct thread *thread, struct comm *comm, 110492a9e4f7SAdrian Hunter struct perf_sample *sample, 110592a9e4f7SAdrian Hunter struct addr_location *from_al, 110692a9e4f7SAdrian Hunter struct addr_location *to_al, u64 ref, 110792a9e4f7SAdrian Hunter struct call_return_processor *crp) 110892a9e4f7SAdrian Hunter { 1109256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, sample->cpu); 11103c0cd952SAdrian Hunter enum retpoline_state_t rstate; 111192a9e4f7SAdrian Hunter int err = 0; 111292a9e4f7SAdrian Hunter 111303b32cb2SAdrian Hunter if (ts && !ts->crp) { 111492a9e4f7SAdrian Hunter /* Supersede thread_stack__event() */ 1115f6060ac6SAdrian Hunter thread_stack__reset(thread, ts); 111603b32cb2SAdrian Hunter ts = NULL; 111792a9e4f7SAdrian Hunter } 111803b32cb2SAdrian Hunter 111903b32cb2SAdrian Hunter if (!ts) { 112086d67180SAdrian Hunter ts = thread_stack__new(thread, sample->cpu, crp, true, 0); 1121bd8e68acSAdrian Hunter if (!ts) 112292a9e4f7SAdrian Hunter return -ENOMEM; 112392a9e4f7SAdrian Hunter ts->comm = comm; 112492a9e4f7SAdrian Hunter } 112592a9e4f7SAdrian Hunter 11263c0cd952SAdrian Hunter rstate = ts->rstate; 11273c0cd952SAdrian Hunter if (rstate == X86_RETPOLINE_DETECTED) 11283c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_POSSIBLE; 11293c0cd952SAdrian Hunter 113092a9e4f7SAdrian Hunter /* Flush stack on exec */ 1131*ee84a303SIan Rogers if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) { 1132a5499b37SAdrian Hunter err = __thread_stack__flush(thread, ts); 113392a9e4f7SAdrian Hunter if (err) 113492a9e4f7SAdrian Hunter return err; 113592a9e4f7SAdrian Hunter ts->comm = comm; 113692a9e4f7SAdrian Hunter } 113792a9e4f7SAdrian Hunter 113892a9e4f7SAdrian Hunter /* If the stack is empty, put the current symbol on the stack */ 113992a9e4f7SAdrian Hunter if (!ts->cnt) { 1140e0b89511SAdrian Hunter err = thread_stack__bottom(ts, sample, from_al, to_al, ref); 114192a9e4f7SAdrian Hunter if (err) 114292a9e4f7SAdrian Hunter return err; 114392a9e4f7SAdrian Hunter } 114492a9e4f7SAdrian Hunter 114592a9e4f7SAdrian Hunter ts->branch_count += 1; 1146003ccdc7SAdrian Hunter ts->insn_count += sample->insn_cnt; 1147003ccdc7SAdrian Hunter ts->cyc_count += sample->cyc_cnt; 114892a9e4f7SAdrian Hunter ts->last_time = sample->time; 114992a9e4f7SAdrian Hunter 115092a9e4f7SAdrian Hunter if (sample->flags & PERF_IP_FLAG_CALL) { 11512dcde4e1SAdrian Hunter bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; 115292a9e4f7SAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 115392a9e4f7SAdrian Hunter struct call_path *cp; 115492a9e4f7SAdrian Hunter u64 ret_addr; 115592a9e4f7SAdrian Hunter 115692a9e4f7SAdrian Hunter if (!sample->ip || !sample->addr) 115792a9e4f7SAdrian Hunter return 0; 115892a9e4f7SAdrian Hunter 115992a9e4f7SAdrian Hunter ret_addr = sample->ip + sample->insn_len; 116092a9e4f7SAdrian Hunter if (ret_addr == sample->addr) 116192a9e4f7SAdrian Hunter return 0; /* Zero-length calls are excluded */ 116292a9e4f7SAdrian Hunter 116392a9e4f7SAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 116492a9e4f7SAdrian Hunter to_al->sym, sample->addr, 116592a9e4f7SAdrian Hunter ts->kernel_start); 116692a9e4f7SAdrian Hunter err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 11672dcde4e1SAdrian Hunter cp, false, trace_end); 11683c0cd952SAdrian Hunter 11693c0cd952SAdrian Hunter /* 11703c0cd952SAdrian Hunter * A call to the same symbol but not the start of the symbol, 11713c0cd952SAdrian Hunter * may be the start of a x86 retpoline. 11723c0cd952SAdrian Hunter */ 11733c0cd952SAdrian Hunter if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && 11743c0cd952SAdrian Hunter from_al->sym == to_al->sym && 11753c0cd952SAdrian Hunter to_al->addr != to_al->sym->start) 11763c0cd952SAdrian Hunter ts->rstate = X86_RETPOLINE_DETECTED; 11773c0cd952SAdrian Hunter 117892a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_RETURN) { 117997860b48SAdrian Hunter if (!sample->addr) { 118097860b48SAdrian Hunter u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET | 118197860b48SAdrian Hunter PERF_IP_FLAG_INTERRUPT; 118297860b48SAdrian Hunter 118397860b48SAdrian Hunter if (!(sample->flags & return_from_kernel)) 118497860b48SAdrian Hunter return 0; 118597860b48SAdrian Hunter 118697860b48SAdrian Hunter /* Pop kernel stack */ 118797860b48SAdrian Hunter return thread_stack__pop_ks(thread, ts, sample, ref); 118897860b48SAdrian Hunter } 118997860b48SAdrian Hunter 119097860b48SAdrian Hunter if (!sample->ip) 119192a9e4f7SAdrian Hunter return 0; 119292a9e4f7SAdrian Hunter 11933c0cd952SAdrian Hunter /* x86 retpoline 'return' doesn't match the stack */ 11943c0cd952SAdrian Hunter if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && 11953c0cd952SAdrian Hunter ts->stack[ts->cnt - 1].ret_addr != sample->addr) 11963c0cd952SAdrian Hunter return thread_stack__x86_retpoline(ts, sample, to_al); 11973c0cd952SAdrian Hunter 119892a9e4f7SAdrian Hunter err = thread_stack__pop_cp(thread, ts, sample->addr, 119992a9e4f7SAdrian Hunter sample->time, ref, from_al->sym); 120092a9e4f7SAdrian Hunter if (err) { 120192a9e4f7SAdrian Hunter if (err < 0) 120292a9e4f7SAdrian Hunter return err; 120392a9e4f7SAdrian Hunter err = thread_stack__no_call_return(thread, ts, sample, 120492a9e4f7SAdrian Hunter from_al, to_al, ref); 120592a9e4f7SAdrian Hunter } 120692a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { 120792a9e4f7SAdrian Hunter err = thread_stack__trace_begin(thread, ts, sample->time, ref); 120892a9e4f7SAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 120992a9e4f7SAdrian Hunter err = thread_stack__trace_end(ts, sample, ref); 1210f08046cbSAdrian Hunter } else if (sample->flags & PERF_IP_FLAG_BRANCH && 1211f08046cbSAdrian Hunter from_al->sym != to_al->sym && to_al->sym && 1212f08046cbSAdrian Hunter to_al->addr == to_al->sym->start) { 1213f08046cbSAdrian Hunter struct call_path_root *cpr = ts->crp->cpr; 1214f08046cbSAdrian Hunter struct call_path *cp; 1215f08046cbSAdrian Hunter 1216f08046cbSAdrian Hunter /* 1217f08046cbSAdrian Hunter * The compiler might optimize a call/ret combination by making 1218f08046cbSAdrian Hunter * it a jmp. Make that visible by recording on the stack a 1219f08046cbSAdrian Hunter * branch to the start of a different symbol. Note, that means 1220f08046cbSAdrian Hunter * when a ret pops the stack, all jmps must be popped off first. 1221f08046cbSAdrian Hunter */ 1222f08046cbSAdrian Hunter cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 1223f08046cbSAdrian Hunter to_al->sym, sample->addr, 1224f08046cbSAdrian Hunter ts->kernel_start); 1225f08046cbSAdrian Hunter err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, 1226f08046cbSAdrian Hunter false); 1227f08046cbSAdrian Hunter if (!err) 1228f08046cbSAdrian Hunter ts->stack[ts->cnt - 1].non_call = true; 122992a9e4f7SAdrian Hunter } 123092a9e4f7SAdrian Hunter 123192a9e4f7SAdrian Hunter return err; 123292a9e4f7SAdrian Hunter } 1233e216708dSAdrian Hunter 1234256d92bcSAdrian Hunter size_t thread_stack__depth(struct thread *thread, int cpu) 1235e216708dSAdrian Hunter { 1236256d92bcSAdrian Hunter struct thread_stack *ts = thread__stack(thread, cpu); 1237bd8e68acSAdrian Hunter 1238bd8e68acSAdrian Hunter if (!ts) 1239e216708dSAdrian Hunter return 0; 1240bd8e68acSAdrian Hunter return ts->cnt; 1241e216708dSAdrian Hunter } 1242