xref: /openbmc/linux/tools/perf/util/thread-stack.c (revision 2025cf9e)
12025cf9eSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
200447ccdSAdrian Hunter /*
300447ccdSAdrian Hunter  * thread-stack.c: Synthesize a thread's stack using call / return events
400447ccdSAdrian Hunter  * Copyright (c) 2014, Intel Corporation.
500447ccdSAdrian Hunter  */
600447ccdSAdrian Hunter 
792a9e4f7SAdrian Hunter #include <linux/rbtree.h>
892a9e4f7SAdrian Hunter #include <linux/list.h>
9256d92bcSAdrian Hunter #include <linux/log2.h>
10a43783aeSArnaldo Carvalho de Melo #include <errno.h>
1100447ccdSAdrian Hunter #include "thread.h"
1200447ccdSAdrian Hunter #include "event.h"
1392a9e4f7SAdrian Hunter #include "machine.h"
143c0cd952SAdrian Hunter #include "env.h"
1500447ccdSAdrian Hunter #include "util.h"
1600447ccdSAdrian Hunter #include "debug.h"
1792a9e4f7SAdrian Hunter #include "symbol.h"
1892a9e4f7SAdrian Hunter #include "comm.h"
19451db126SChris Phlipot #include "call-path.h"
2000447ccdSAdrian Hunter #include "thread-stack.h"
2100447ccdSAdrian Hunter 
2292a9e4f7SAdrian Hunter #define STACK_GROWTH 2048
2392a9e4f7SAdrian Hunter 
243c0cd952SAdrian Hunter /*
253c0cd952SAdrian Hunter  * State of retpoline detection.
263c0cd952SAdrian Hunter  *
273c0cd952SAdrian Hunter  * RETPOLINE_NONE: no retpoline detection
283c0cd952SAdrian Hunter  * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
293c0cd952SAdrian Hunter  * X86_RETPOLINE_DETECTED: x86 retpoline detected
303c0cd952SAdrian Hunter  */
313c0cd952SAdrian Hunter enum retpoline_state_t {
323c0cd952SAdrian Hunter 	RETPOLINE_NONE,
333c0cd952SAdrian Hunter 	X86_RETPOLINE_POSSIBLE,
343c0cd952SAdrian Hunter 	X86_RETPOLINE_DETECTED,
353c0cd952SAdrian Hunter };
363c0cd952SAdrian Hunter 
3792a9e4f7SAdrian Hunter /**
3892a9e4f7SAdrian Hunter  * struct thread_stack_entry - thread stack entry.
3992a9e4f7SAdrian Hunter  * @ret_addr: return address
4092a9e4f7SAdrian Hunter  * @timestamp: timestamp (if known)
4192a9e4f7SAdrian Hunter  * @ref: external reference (e.g. db_id of sample)
4292a9e4f7SAdrian Hunter  * @branch_count: the branch count when the entry was created
43f435887eSAdrian Hunter  * @db_id: id used for db-export
4492a9e4f7SAdrian Hunter  * @cp: call path
4592a9e4f7SAdrian Hunter  * @no_call: a 'call' was not seen
464d60e5e3SAdrian Hunter  * @trace_end: a 'call' but trace ended
47f08046cbSAdrian Hunter  * @non_call: a branch but not a 'call' to the start of a different symbol
4892a9e4f7SAdrian Hunter  */
4992a9e4f7SAdrian Hunter struct thread_stack_entry {
5092a9e4f7SAdrian Hunter 	u64 ret_addr;
5192a9e4f7SAdrian Hunter 	u64 timestamp;
5292a9e4f7SAdrian Hunter 	u64 ref;
5392a9e4f7SAdrian Hunter 	u64 branch_count;
54f435887eSAdrian Hunter 	u64 db_id;
5592a9e4f7SAdrian Hunter 	struct call_path *cp;
5692a9e4f7SAdrian Hunter 	bool no_call;
574d60e5e3SAdrian Hunter 	bool trace_end;
58f08046cbSAdrian Hunter 	bool non_call;
5992a9e4f7SAdrian Hunter };
6092a9e4f7SAdrian Hunter 
6192a9e4f7SAdrian Hunter /**
6292a9e4f7SAdrian Hunter  * struct thread_stack - thread stack constructed from 'call' and 'return'
6392a9e4f7SAdrian Hunter  *                       branch samples.
6492a9e4f7SAdrian Hunter  * @stack: array that holds the stack
6592a9e4f7SAdrian Hunter  * @cnt: number of entries in the stack
6692a9e4f7SAdrian Hunter  * @sz: current maximum stack size
6792a9e4f7SAdrian Hunter  * @trace_nr: current trace number
6892a9e4f7SAdrian Hunter  * @branch_count: running branch count
6992a9e4f7SAdrian Hunter  * @kernel_start: kernel start address
7092a9e4f7SAdrian Hunter  * @last_time: last timestamp
7192a9e4f7SAdrian Hunter  * @crp: call/return processor
7292a9e4f7SAdrian Hunter  * @comm: current comm
73f6060ac6SAdrian Hunter  * @arr_sz: size of array if this is the first element of an array
743c0cd952SAdrian Hunter  * @rstate: used to detect retpolines
7592a9e4f7SAdrian Hunter  */
7600447ccdSAdrian Hunter struct thread_stack {
7700447ccdSAdrian Hunter 	struct thread_stack_entry *stack;
7800447ccdSAdrian Hunter 	size_t cnt;
7900447ccdSAdrian Hunter 	size_t sz;
8000447ccdSAdrian Hunter 	u64 trace_nr;
8192a9e4f7SAdrian Hunter 	u64 branch_count;
8292a9e4f7SAdrian Hunter 	u64 kernel_start;
8392a9e4f7SAdrian Hunter 	u64 last_time;
8492a9e4f7SAdrian Hunter 	struct call_return_processor *crp;
8592a9e4f7SAdrian Hunter 	struct comm *comm;
86f6060ac6SAdrian Hunter 	unsigned int arr_sz;
873c0cd952SAdrian Hunter 	enum retpoline_state_t rstate;
8800447ccdSAdrian Hunter };
8900447ccdSAdrian Hunter 
90256d92bcSAdrian Hunter /*
91256d92bcSAdrian Hunter  * Assume pid == tid == 0 identifies the idle task as defined by
92256d92bcSAdrian Hunter  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
93256d92bcSAdrian Hunter  * and therefore requires a stack for each cpu.
94256d92bcSAdrian Hunter  */
95256d92bcSAdrian Hunter static inline bool thread_stack__per_cpu(struct thread *thread)
96256d92bcSAdrian Hunter {
97256d92bcSAdrian Hunter 	return !(thread->tid || thread->pid_);
98256d92bcSAdrian Hunter }
99256d92bcSAdrian Hunter 
10000447ccdSAdrian Hunter static int thread_stack__grow(struct thread_stack *ts)
10100447ccdSAdrian Hunter {
10200447ccdSAdrian Hunter 	struct thread_stack_entry *new_stack;
10300447ccdSAdrian Hunter 	size_t sz, new_sz;
10400447ccdSAdrian Hunter 
10500447ccdSAdrian Hunter 	new_sz = ts->sz + STACK_GROWTH;
10600447ccdSAdrian Hunter 	sz = new_sz * sizeof(struct thread_stack_entry);
10700447ccdSAdrian Hunter 
10800447ccdSAdrian Hunter 	new_stack = realloc(ts->stack, sz);
10900447ccdSAdrian Hunter 	if (!new_stack)
11000447ccdSAdrian Hunter 		return -ENOMEM;
11100447ccdSAdrian Hunter 
11200447ccdSAdrian Hunter 	ts->stack = new_stack;
11300447ccdSAdrian Hunter 	ts->sz = new_sz;
11400447ccdSAdrian Hunter 
11500447ccdSAdrian Hunter 	return 0;
11600447ccdSAdrian Hunter }
11700447ccdSAdrian Hunter 
1182e9e8688SAdrian Hunter static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
1192e9e8688SAdrian Hunter 			      struct call_return_processor *crp)
1202e9e8688SAdrian Hunter {
1212e9e8688SAdrian Hunter 	int err;
1222e9e8688SAdrian Hunter 
1232e9e8688SAdrian Hunter 	err = thread_stack__grow(ts);
1242e9e8688SAdrian Hunter 	if (err)
1252e9e8688SAdrian Hunter 		return err;
1262e9e8688SAdrian Hunter 
1273c0cd952SAdrian Hunter 	if (thread->mg && thread->mg->machine) {
1283c0cd952SAdrian Hunter 		struct machine *machine = thread->mg->machine;
1293c0cd952SAdrian Hunter 		const char *arch = perf_env__arch(machine->env);
1303c0cd952SAdrian Hunter 
1313c0cd952SAdrian Hunter 		ts->kernel_start = machine__kernel_start(machine);
1323c0cd952SAdrian Hunter 		if (!strcmp(arch, "x86"))
1333c0cd952SAdrian Hunter 			ts->rstate = X86_RETPOLINE_POSSIBLE;
1343c0cd952SAdrian Hunter 	} else {
1352e9e8688SAdrian Hunter 		ts->kernel_start = 1ULL << 63;
1363c0cd952SAdrian Hunter 	}
1372e9e8688SAdrian Hunter 	ts->crp = crp;
1382e9e8688SAdrian Hunter 
1392e9e8688SAdrian Hunter 	return 0;
1402e9e8688SAdrian Hunter }
1412e9e8688SAdrian Hunter 
142256d92bcSAdrian Hunter static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
14392a9e4f7SAdrian Hunter 					      struct call_return_processor *crp)
14400447ccdSAdrian Hunter {
145139f42f3SAdrian Hunter 	struct thread_stack *ts = thread->ts, *new_ts;
146139f42f3SAdrian Hunter 	unsigned int old_sz = ts ? ts->arr_sz : 0;
147139f42f3SAdrian Hunter 	unsigned int new_sz = 1;
14800447ccdSAdrian Hunter 
149256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread) && cpu > 0)
150256d92bcSAdrian Hunter 		new_sz = roundup_pow_of_two(cpu + 1);
151256d92bcSAdrian Hunter 
152139f42f3SAdrian Hunter 	if (!ts || new_sz > old_sz) {
153139f42f3SAdrian Hunter 		new_ts = calloc(new_sz, sizeof(*ts));
154139f42f3SAdrian Hunter 		if (!new_ts)
15500447ccdSAdrian Hunter 			return NULL;
156139f42f3SAdrian Hunter 		if (ts)
157139f42f3SAdrian Hunter 			memcpy(new_ts, ts, old_sz * sizeof(*ts));
158139f42f3SAdrian Hunter 		new_ts->arr_sz = new_sz;
159139f42f3SAdrian Hunter 		zfree(&thread->ts);
160139f42f3SAdrian Hunter 		thread->ts = new_ts;
161139f42f3SAdrian Hunter 		ts = new_ts;
16200447ccdSAdrian Hunter 	}
16300447ccdSAdrian Hunter 
164256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread) && cpu > 0 &&
165256d92bcSAdrian Hunter 	    (unsigned int)cpu < ts->arr_sz)
166256d92bcSAdrian Hunter 		ts += cpu;
167256d92bcSAdrian Hunter 
168139f42f3SAdrian Hunter 	if (!ts->stack &&
169139f42f3SAdrian Hunter 	    thread_stack__init(ts, thread, crp))
170139f42f3SAdrian Hunter 		return NULL;
171bd8e68acSAdrian Hunter 
17200447ccdSAdrian Hunter 	return ts;
17300447ccdSAdrian Hunter }
17400447ccdSAdrian Hunter 
175256d92bcSAdrian Hunter static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
176bd8e68acSAdrian Hunter {
177256d92bcSAdrian Hunter 	struct thread_stack *ts = thread->ts;
178256d92bcSAdrian Hunter 
179256d92bcSAdrian Hunter 	if (cpu < 0)
180256d92bcSAdrian Hunter 		cpu = 0;
181256d92bcSAdrian Hunter 
182256d92bcSAdrian Hunter 	if (!ts || (unsigned int)cpu >= ts->arr_sz)
183256d92bcSAdrian Hunter 		return NULL;
184256d92bcSAdrian Hunter 
185256d92bcSAdrian Hunter 	ts += cpu;
186256d92bcSAdrian Hunter 
187256d92bcSAdrian Hunter 	if (!ts->stack)
188256d92bcSAdrian Hunter 		return NULL;
189256d92bcSAdrian Hunter 
190256d92bcSAdrian Hunter 	return ts;
191256d92bcSAdrian Hunter }
192256d92bcSAdrian Hunter 
193256d92bcSAdrian Hunter static inline struct thread_stack *thread__stack(struct thread *thread,
194256d92bcSAdrian Hunter 						    int cpu)
195256d92bcSAdrian Hunter {
196256d92bcSAdrian Hunter 	if (!thread)
197256d92bcSAdrian Hunter 		return NULL;
198256d92bcSAdrian Hunter 
199256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread))
200256d92bcSAdrian Hunter 		return thread__cpu_stack(thread, cpu);
201256d92bcSAdrian Hunter 
202256d92bcSAdrian Hunter 	return thread->ts;
203bd8e68acSAdrian Hunter }
204bd8e68acSAdrian Hunter 
2054d60e5e3SAdrian Hunter static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
2064d60e5e3SAdrian Hunter 			      bool trace_end)
20700447ccdSAdrian Hunter {
20800447ccdSAdrian Hunter 	int err = 0;
20900447ccdSAdrian Hunter 
21000447ccdSAdrian Hunter 	if (ts->cnt == ts->sz) {
21100447ccdSAdrian Hunter 		err = thread_stack__grow(ts);
21200447ccdSAdrian Hunter 		if (err) {
21300447ccdSAdrian Hunter 			pr_warning("Out of memory: discarding thread stack\n");
21400447ccdSAdrian Hunter 			ts->cnt = 0;
21500447ccdSAdrian Hunter 		}
21600447ccdSAdrian Hunter 	}
21700447ccdSAdrian Hunter 
2184d60e5e3SAdrian Hunter 	ts->stack[ts->cnt].trace_end = trace_end;
21900447ccdSAdrian Hunter 	ts->stack[ts->cnt++].ret_addr = ret_addr;
22000447ccdSAdrian Hunter 
22100447ccdSAdrian Hunter 	return err;
22200447ccdSAdrian Hunter }
22300447ccdSAdrian Hunter 
22400447ccdSAdrian Hunter static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
22500447ccdSAdrian Hunter {
22600447ccdSAdrian Hunter 	size_t i;
22700447ccdSAdrian Hunter 
22800447ccdSAdrian Hunter 	/*
22900447ccdSAdrian Hunter 	 * In some cases there may be functions which are not seen to return.
23000447ccdSAdrian Hunter 	 * For example when setjmp / longjmp has been used.  Or the perf context
23100447ccdSAdrian Hunter 	 * switch in the kernel which doesn't stop and start tracing in exactly
23200447ccdSAdrian Hunter 	 * the same code path.  When that happens the return address will be
23300447ccdSAdrian Hunter 	 * further down the stack.  If the return address is not found at all,
23400447ccdSAdrian Hunter 	 * we assume the opposite (i.e. this is a return for a call that wasn't
23500447ccdSAdrian Hunter 	 * seen for some reason) and leave the stack alone.
23600447ccdSAdrian Hunter 	 */
23700447ccdSAdrian Hunter 	for (i = ts->cnt; i; ) {
23800447ccdSAdrian Hunter 		if (ts->stack[--i].ret_addr == ret_addr) {
23900447ccdSAdrian Hunter 			ts->cnt = i;
24000447ccdSAdrian Hunter 			return;
24100447ccdSAdrian Hunter 		}
24200447ccdSAdrian Hunter 	}
24300447ccdSAdrian Hunter }
24400447ccdSAdrian Hunter 
2454d60e5e3SAdrian Hunter static void thread_stack__pop_trace_end(struct thread_stack *ts)
2464d60e5e3SAdrian Hunter {
2474d60e5e3SAdrian Hunter 	size_t i;
2484d60e5e3SAdrian Hunter 
2494d60e5e3SAdrian Hunter 	for (i = ts->cnt; i; ) {
2504d60e5e3SAdrian Hunter 		if (ts->stack[--i].trace_end)
2514d60e5e3SAdrian Hunter 			ts->cnt = i;
2524d60e5e3SAdrian Hunter 		else
2534d60e5e3SAdrian Hunter 			return;
2544d60e5e3SAdrian Hunter 	}
2554d60e5e3SAdrian Hunter }
2564d60e5e3SAdrian Hunter 
25792a9e4f7SAdrian Hunter static bool thread_stack__in_kernel(struct thread_stack *ts)
25892a9e4f7SAdrian Hunter {
25992a9e4f7SAdrian Hunter 	if (!ts->cnt)
26092a9e4f7SAdrian Hunter 		return false;
26192a9e4f7SAdrian Hunter 
26292a9e4f7SAdrian Hunter 	return ts->stack[ts->cnt - 1].cp->in_kernel;
26392a9e4f7SAdrian Hunter }
26492a9e4f7SAdrian Hunter 
26592a9e4f7SAdrian Hunter static int thread_stack__call_return(struct thread *thread,
26692a9e4f7SAdrian Hunter 				     struct thread_stack *ts, size_t idx,
26792a9e4f7SAdrian Hunter 				     u64 timestamp, u64 ref, bool no_return)
26892a9e4f7SAdrian Hunter {
26992a9e4f7SAdrian Hunter 	struct call_return_processor *crp = ts->crp;
27092a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
27192a9e4f7SAdrian Hunter 	struct call_return cr = {
27292a9e4f7SAdrian Hunter 		.thread = thread,
27392a9e4f7SAdrian Hunter 		.comm = ts->comm,
27492a9e4f7SAdrian Hunter 		.db_id = 0,
27592a9e4f7SAdrian Hunter 	};
276f435887eSAdrian Hunter 	u64 *parent_db_id;
27792a9e4f7SAdrian Hunter 
27892a9e4f7SAdrian Hunter 	tse = &ts->stack[idx];
27992a9e4f7SAdrian Hunter 	cr.cp = tse->cp;
28092a9e4f7SAdrian Hunter 	cr.call_time = tse->timestamp;
28192a9e4f7SAdrian Hunter 	cr.return_time = timestamp;
28292a9e4f7SAdrian Hunter 	cr.branch_count = ts->branch_count - tse->branch_count;
283f435887eSAdrian Hunter 	cr.db_id = tse->db_id;
28492a9e4f7SAdrian Hunter 	cr.call_ref = tse->ref;
28592a9e4f7SAdrian Hunter 	cr.return_ref = ref;
28692a9e4f7SAdrian Hunter 	if (tse->no_call)
28792a9e4f7SAdrian Hunter 		cr.flags |= CALL_RETURN_NO_CALL;
28892a9e4f7SAdrian Hunter 	if (no_return)
28992a9e4f7SAdrian Hunter 		cr.flags |= CALL_RETURN_NO_RETURN;
290f08046cbSAdrian Hunter 	if (tse->non_call)
291f08046cbSAdrian Hunter 		cr.flags |= CALL_RETURN_NON_CALL;
29292a9e4f7SAdrian Hunter 
293f435887eSAdrian Hunter 	/*
294f435887eSAdrian Hunter 	 * The parent db_id must be assigned before exporting the child. Note
295f435887eSAdrian Hunter 	 * it is not possible to export the parent first because its information
296f435887eSAdrian Hunter 	 * is not yet complete because its 'return' has not yet been processed.
297f435887eSAdrian Hunter 	 */
298f435887eSAdrian Hunter 	parent_db_id = idx ? &(tse - 1)->db_id : NULL;
299f435887eSAdrian Hunter 
300f435887eSAdrian Hunter 	return crp->process(&cr, parent_db_id, crp->data);
30192a9e4f7SAdrian Hunter }
30292a9e4f7SAdrian Hunter 
303a5499b37SAdrian Hunter static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
30492a9e4f7SAdrian Hunter {
30592a9e4f7SAdrian Hunter 	struct call_return_processor *crp = ts->crp;
30692a9e4f7SAdrian Hunter 	int err;
30792a9e4f7SAdrian Hunter 
30892a9e4f7SAdrian Hunter 	if (!crp) {
30992a9e4f7SAdrian Hunter 		ts->cnt = 0;
31092a9e4f7SAdrian Hunter 		return 0;
31192a9e4f7SAdrian Hunter 	}
31292a9e4f7SAdrian Hunter 
31392a9e4f7SAdrian Hunter 	while (ts->cnt) {
31492a9e4f7SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
31592a9e4f7SAdrian Hunter 						ts->last_time, 0, true);
31692a9e4f7SAdrian Hunter 		if (err) {
31792a9e4f7SAdrian Hunter 			pr_err("Error flushing thread stack!\n");
31892a9e4f7SAdrian Hunter 			ts->cnt = 0;
31992a9e4f7SAdrian Hunter 			return err;
32092a9e4f7SAdrian Hunter 		}
32192a9e4f7SAdrian Hunter 	}
32292a9e4f7SAdrian Hunter 
32392a9e4f7SAdrian Hunter 	return 0;
32492a9e4f7SAdrian Hunter }
32592a9e4f7SAdrian Hunter 
326a5499b37SAdrian Hunter int thread_stack__flush(struct thread *thread)
327a5499b37SAdrian Hunter {
328bd8e68acSAdrian Hunter 	struct thread_stack *ts = thread->ts;
329f6060ac6SAdrian Hunter 	unsigned int pos;
330f6060ac6SAdrian Hunter 	int err = 0;
331bd8e68acSAdrian Hunter 
332f6060ac6SAdrian Hunter 	if (ts) {
333f6060ac6SAdrian Hunter 		for (pos = 0; pos < ts->arr_sz; pos++) {
334f6060ac6SAdrian Hunter 			int ret = __thread_stack__flush(thread, ts + pos);
335a5499b37SAdrian Hunter 
336f6060ac6SAdrian Hunter 			if (ret)
337f6060ac6SAdrian Hunter 				err = ret;
338f6060ac6SAdrian Hunter 		}
339f6060ac6SAdrian Hunter 	}
340f6060ac6SAdrian Hunter 
341f6060ac6SAdrian Hunter 	return err;
342a5499b37SAdrian Hunter }
343a5499b37SAdrian Hunter 
344256d92bcSAdrian Hunter int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
34500447ccdSAdrian Hunter 			u64 to_ip, u16 insn_len, u64 trace_nr)
34600447ccdSAdrian Hunter {
347256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
348bd8e68acSAdrian Hunter 
34900447ccdSAdrian Hunter 	if (!thread)
35000447ccdSAdrian Hunter 		return -EINVAL;
35100447ccdSAdrian Hunter 
352bd8e68acSAdrian Hunter 	if (!ts) {
353256d92bcSAdrian Hunter 		ts = thread_stack__new(thread, cpu, NULL);
354bd8e68acSAdrian Hunter 		if (!ts) {
35500447ccdSAdrian Hunter 			pr_warning("Out of memory: no thread stack\n");
35600447ccdSAdrian Hunter 			return -ENOMEM;
35700447ccdSAdrian Hunter 		}
358bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
35900447ccdSAdrian Hunter 	}
36000447ccdSAdrian Hunter 
36100447ccdSAdrian Hunter 	/*
36200447ccdSAdrian Hunter 	 * When the trace is discontinuous, the trace_nr changes.  In that case
36300447ccdSAdrian Hunter 	 * the stack might be completely invalid.  Better to report nothing than
36492a9e4f7SAdrian Hunter 	 * to report something misleading, so flush the stack.
36500447ccdSAdrian Hunter 	 */
366bd8e68acSAdrian Hunter 	if (trace_nr != ts->trace_nr) {
367bd8e68acSAdrian Hunter 		if (ts->trace_nr)
368bd8e68acSAdrian Hunter 			__thread_stack__flush(thread, ts);
369bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
37000447ccdSAdrian Hunter 	}
37100447ccdSAdrian Hunter 
37292a9e4f7SAdrian Hunter 	/* Stop here if thread_stack__process() is in use */
373bd8e68acSAdrian Hunter 	if (ts->crp)
37492a9e4f7SAdrian Hunter 		return 0;
37592a9e4f7SAdrian Hunter 
37600447ccdSAdrian Hunter 	if (flags & PERF_IP_FLAG_CALL) {
37700447ccdSAdrian Hunter 		u64 ret_addr;
37800447ccdSAdrian Hunter 
37900447ccdSAdrian Hunter 		if (!to_ip)
38000447ccdSAdrian Hunter 			return 0;
38100447ccdSAdrian Hunter 		ret_addr = from_ip + insn_len;
38200447ccdSAdrian Hunter 		if (ret_addr == to_ip)
38300447ccdSAdrian Hunter 			return 0; /* Zero-length calls are excluded */
384bd8e68acSAdrian Hunter 		return thread_stack__push(ts, ret_addr,
3854d60e5e3SAdrian Hunter 					  flags & PERF_IP_FLAG_TRACE_END);
3864d60e5e3SAdrian Hunter 	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
3874d60e5e3SAdrian Hunter 		/*
3884d60e5e3SAdrian Hunter 		 * If the caller did not change the trace number (which would
3894d60e5e3SAdrian Hunter 		 * have flushed the stack) then try to make sense of the stack.
3904d60e5e3SAdrian Hunter 		 * Possibly, tracing began after returning to the current
3914d60e5e3SAdrian Hunter 		 * address, so try to pop that. Also, do not expect a call made
3924d60e5e3SAdrian Hunter 		 * when the trace ended, to return, so pop that.
3934d60e5e3SAdrian Hunter 		 */
394bd8e68acSAdrian Hunter 		thread_stack__pop(ts, to_ip);
395bd8e68acSAdrian Hunter 		thread_stack__pop_trace_end(ts);
3964d60e5e3SAdrian Hunter 	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
397bd8e68acSAdrian Hunter 		thread_stack__pop(ts, to_ip);
39800447ccdSAdrian Hunter 	}
39900447ccdSAdrian Hunter 
40000447ccdSAdrian Hunter 	return 0;
40100447ccdSAdrian Hunter }
40200447ccdSAdrian Hunter 
403256d92bcSAdrian Hunter void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
40492a9e4f7SAdrian Hunter {
405256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
406bd8e68acSAdrian Hunter 
407bd8e68acSAdrian Hunter 	if (!ts)
40892a9e4f7SAdrian Hunter 		return;
40992a9e4f7SAdrian Hunter 
410bd8e68acSAdrian Hunter 	if (trace_nr != ts->trace_nr) {
411bd8e68acSAdrian Hunter 		if (ts->trace_nr)
412bd8e68acSAdrian Hunter 			__thread_stack__flush(thread, ts);
413bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
41492a9e4f7SAdrian Hunter 	}
41592a9e4f7SAdrian Hunter }
41692a9e4f7SAdrian Hunter 
417f6060ac6SAdrian Hunter static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
418f6060ac6SAdrian Hunter {
419f6060ac6SAdrian Hunter 	__thread_stack__flush(thread, ts);
420f6060ac6SAdrian Hunter 	zfree(&ts->stack);
421f6060ac6SAdrian Hunter }
422f6060ac6SAdrian Hunter 
423f6060ac6SAdrian Hunter static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
424f6060ac6SAdrian Hunter {
425f6060ac6SAdrian Hunter 	unsigned int arr_sz = ts->arr_sz;
426f6060ac6SAdrian Hunter 
427f6060ac6SAdrian Hunter 	__thread_stack__free(thread, ts);
428f6060ac6SAdrian Hunter 	memset(ts, 0, sizeof(*ts));
429f6060ac6SAdrian Hunter 	ts->arr_sz = arr_sz;
430f6060ac6SAdrian Hunter }
431f6060ac6SAdrian Hunter 
43200447ccdSAdrian Hunter void thread_stack__free(struct thread *thread)
43300447ccdSAdrian Hunter {
434bd8e68acSAdrian Hunter 	struct thread_stack *ts = thread->ts;
435f6060ac6SAdrian Hunter 	unsigned int pos;
436bd8e68acSAdrian Hunter 
437bd8e68acSAdrian Hunter 	if (ts) {
438f6060ac6SAdrian Hunter 		for (pos = 0; pos < ts->arr_sz; pos++)
439f6060ac6SAdrian Hunter 			__thread_stack__free(thread, ts + pos);
44000447ccdSAdrian Hunter 		zfree(&thread->ts);
44100447ccdSAdrian Hunter 	}
44200447ccdSAdrian Hunter }
44300447ccdSAdrian Hunter 
44424248306SAdrian Hunter static inline u64 callchain_context(u64 ip, u64 kernel_start)
44500447ccdSAdrian Hunter {
44624248306SAdrian Hunter 	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
44724248306SAdrian Hunter }
44800447ccdSAdrian Hunter 
449256d92bcSAdrian Hunter void thread_stack__sample(struct thread *thread, int cpu,
450256d92bcSAdrian Hunter 			  struct ip_callchain *chain,
45124248306SAdrian Hunter 			  size_t sz, u64 ip, u64 kernel_start)
45224248306SAdrian Hunter {
453256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
45424248306SAdrian Hunter 	u64 context = callchain_context(ip, kernel_start);
45524248306SAdrian Hunter 	u64 last_context;
45624248306SAdrian Hunter 	size_t i, j;
45700447ccdSAdrian Hunter 
45824248306SAdrian Hunter 	if (sz < 2) {
45924248306SAdrian Hunter 		chain->nr = 0;
46024248306SAdrian Hunter 		return;
46124248306SAdrian Hunter 	}
46200447ccdSAdrian Hunter 
46324248306SAdrian Hunter 	chain->ips[0] = context;
46424248306SAdrian Hunter 	chain->ips[1] = ip;
46524248306SAdrian Hunter 
466bd8e68acSAdrian Hunter 	if (!ts) {
46724248306SAdrian Hunter 		chain->nr = 2;
46824248306SAdrian Hunter 		return;
46924248306SAdrian Hunter 	}
47024248306SAdrian Hunter 
47124248306SAdrian Hunter 	last_context = context;
47224248306SAdrian Hunter 
473bd8e68acSAdrian Hunter 	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
474bd8e68acSAdrian Hunter 		ip = ts->stack[ts->cnt - j].ret_addr;
47524248306SAdrian Hunter 		context = callchain_context(ip, kernel_start);
47624248306SAdrian Hunter 		if (context != last_context) {
47724248306SAdrian Hunter 			if (i >= sz - 1)
47824248306SAdrian Hunter 				break;
47924248306SAdrian Hunter 			chain->ips[i++] = context;
48024248306SAdrian Hunter 			last_context = context;
48124248306SAdrian Hunter 		}
48224248306SAdrian Hunter 		chain->ips[i] = ip;
48324248306SAdrian Hunter 	}
48424248306SAdrian Hunter 
48524248306SAdrian Hunter 	chain->nr = i;
48600447ccdSAdrian Hunter }
48792a9e4f7SAdrian Hunter 
48892a9e4f7SAdrian Hunter struct call_return_processor *
489f435887eSAdrian Hunter call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
49092a9e4f7SAdrian Hunter 			   void *data)
49192a9e4f7SAdrian Hunter {
49292a9e4f7SAdrian Hunter 	struct call_return_processor *crp;
49392a9e4f7SAdrian Hunter 
49492a9e4f7SAdrian Hunter 	crp = zalloc(sizeof(struct call_return_processor));
49592a9e4f7SAdrian Hunter 	if (!crp)
49692a9e4f7SAdrian Hunter 		return NULL;
49792a9e4f7SAdrian Hunter 	crp->cpr = call_path_root__new();
49892a9e4f7SAdrian Hunter 	if (!crp->cpr)
49992a9e4f7SAdrian Hunter 		goto out_free;
50092a9e4f7SAdrian Hunter 	crp->process = process;
50192a9e4f7SAdrian Hunter 	crp->data = data;
50292a9e4f7SAdrian Hunter 	return crp;
50392a9e4f7SAdrian Hunter 
50492a9e4f7SAdrian Hunter out_free:
50592a9e4f7SAdrian Hunter 	free(crp);
50692a9e4f7SAdrian Hunter 	return NULL;
50792a9e4f7SAdrian Hunter }
50892a9e4f7SAdrian Hunter 
50992a9e4f7SAdrian Hunter void call_return_processor__free(struct call_return_processor *crp)
51092a9e4f7SAdrian Hunter {
51192a9e4f7SAdrian Hunter 	if (crp) {
51292a9e4f7SAdrian Hunter 		call_path_root__free(crp->cpr);
51392a9e4f7SAdrian Hunter 		free(crp);
51492a9e4f7SAdrian Hunter 	}
51592a9e4f7SAdrian Hunter }
51692a9e4f7SAdrian Hunter 
51792a9e4f7SAdrian Hunter static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
51892a9e4f7SAdrian Hunter 				 u64 timestamp, u64 ref, struct call_path *cp,
5192dcde4e1SAdrian Hunter 				 bool no_call, bool trace_end)
52092a9e4f7SAdrian Hunter {
52192a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
52292a9e4f7SAdrian Hunter 	int err;
52392a9e4f7SAdrian Hunter 
524e7a3a055SAdrian Hunter 	if (!cp)
525e7a3a055SAdrian Hunter 		return -ENOMEM;
526e7a3a055SAdrian Hunter 
52792a9e4f7SAdrian Hunter 	if (ts->cnt == ts->sz) {
52892a9e4f7SAdrian Hunter 		err = thread_stack__grow(ts);
52992a9e4f7SAdrian Hunter 		if (err)
53092a9e4f7SAdrian Hunter 			return err;
53192a9e4f7SAdrian Hunter 	}
53292a9e4f7SAdrian Hunter 
53392a9e4f7SAdrian Hunter 	tse = &ts->stack[ts->cnt++];
53492a9e4f7SAdrian Hunter 	tse->ret_addr = ret_addr;
53592a9e4f7SAdrian Hunter 	tse->timestamp = timestamp;
53692a9e4f7SAdrian Hunter 	tse->ref = ref;
53792a9e4f7SAdrian Hunter 	tse->branch_count = ts->branch_count;
53892a9e4f7SAdrian Hunter 	tse->cp = cp;
53992a9e4f7SAdrian Hunter 	tse->no_call = no_call;
5402dcde4e1SAdrian Hunter 	tse->trace_end = trace_end;
541f08046cbSAdrian Hunter 	tse->non_call = false;
542f435887eSAdrian Hunter 	tse->db_id = 0;
54392a9e4f7SAdrian Hunter 
54492a9e4f7SAdrian Hunter 	return 0;
54592a9e4f7SAdrian Hunter }
54692a9e4f7SAdrian Hunter 
54792a9e4f7SAdrian Hunter static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
54892a9e4f7SAdrian Hunter 				u64 ret_addr, u64 timestamp, u64 ref,
54992a9e4f7SAdrian Hunter 				struct symbol *sym)
55092a9e4f7SAdrian Hunter {
55192a9e4f7SAdrian Hunter 	int err;
55292a9e4f7SAdrian Hunter 
55392a9e4f7SAdrian Hunter 	if (!ts->cnt)
55492a9e4f7SAdrian Hunter 		return 1;
55592a9e4f7SAdrian Hunter 
55692a9e4f7SAdrian Hunter 	if (ts->cnt == 1) {
55792a9e4f7SAdrian Hunter 		struct thread_stack_entry *tse = &ts->stack[0];
55892a9e4f7SAdrian Hunter 
55992a9e4f7SAdrian Hunter 		if (tse->cp->sym == sym)
56092a9e4f7SAdrian Hunter 			return thread_stack__call_return(thread, ts, --ts->cnt,
56192a9e4f7SAdrian Hunter 							 timestamp, ref, false);
56292a9e4f7SAdrian Hunter 	}
56392a9e4f7SAdrian Hunter 
564f08046cbSAdrian Hunter 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
565f08046cbSAdrian Hunter 	    !ts->stack[ts->cnt - 1].non_call) {
56692a9e4f7SAdrian Hunter 		return thread_stack__call_return(thread, ts, --ts->cnt,
56792a9e4f7SAdrian Hunter 						 timestamp, ref, false);
56892a9e4f7SAdrian Hunter 	} else {
56992a9e4f7SAdrian Hunter 		size_t i = ts->cnt - 1;
57092a9e4f7SAdrian Hunter 
57192a9e4f7SAdrian Hunter 		while (i--) {
572f08046cbSAdrian Hunter 			if (ts->stack[i].ret_addr != ret_addr ||
573f08046cbSAdrian Hunter 			    ts->stack[i].non_call)
57492a9e4f7SAdrian Hunter 				continue;
57592a9e4f7SAdrian Hunter 			i += 1;
57692a9e4f7SAdrian Hunter 			while (ts->cnt > i) {
57792a9e4f7SAdrian Hunter 				err = thread_stack__call_return(thread, ts,
57892a9e4f7SAdrian Hunter 								--ts->cnt,
57992a9e4f7SAdrian Hunter 								timestamp, ref,
58092a9e4f7SAdrian Hunter 								true);
58192a9e4f7SAdrian Hunter 				if (err)
58292a9e4f7SAdrian Hunter 					return err;
58392a9e4f7SAdrian Hunter 			}
58492a9e4f7SAdrian Hunter 			return thread_stack__call_return(thread, ts, --ts->cnt,
58592a9e4f7SAdrian Hunter 							 timestamp, ref, false);
58692a9e4f7SAdrian Hunter 		}
58792a9e4f7SAdrian Hunter 	}
58892a9e4f7SAdrian Hunter 
58992a9e4f7SAdrian Hunter 	return 1;
59092a9e4f7SAdrian Hunter }
59192a9e4f7SAdrian Hunter 
592e0b89511SAdrian Hunter static int thread_stack__bottom(struct thread_stack *ts,
59392a9e4f7SAdrian Hunter 				struct perf_sample *sample,
59492a9e4f7SAdrian Hunter 				struct addr_location *from_al,
59592a9e4f7SAdrian Hunter 				struct addr_location *to_al, u64 ref)
59692a9e4f7SAdrian Hunter {
59792a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
59892a9e4f7SAdrian Hunter 	struct call_path *cp;
59992a9e4f7SAdrian Hunter 	struct symbol *sym;
60092a9e4f7SAdrian Hunter 	u64 ip;
60192a9e4f7SAdrian Hunter 
60292a9e4f7SAdrian Hunter 	if (sample->ip) {
60392a9e4f7SAdrian Hunter 		ip = sample->ip;
60492a9e4f7SAdrian Hunter 		sym = from_al->sym;
60592a9e4f7SAdrian Hunter 	} else if (sample->addr) {
60692a9e4f7SAdrian Hunter 		ip = sample->addr;
60792a9e4f7SAdrian Hunter 		sym = to_al->sym;
60892a9e4f7SAdrian Hunter 	} else {
60992a9e4f7SAdrian Hunter 		return 0;
61092a9e4f7SAdrian Hunter 	}
61192a9e4f7SAdrian Hunter 
61292a9e4f7SAdrian Hunter 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
61392a9e4f7SAdrian Hunter 				ts->kernel_start);
61492a9e4f7SAdrian Hunter 
615e0b89511SAdrian Hunter 	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
6162dcde4e1SAdrian Hunter 				     true, false);
61792a9e4f7SAdrian Hunter }
61892a9e4f7SAdrian Hunter 
61992a9e4f7SAdrian Hunter static int thread_stack__no_call_return(struct thread *thread,
62092a9e4f7SAdrian Hunter 					struct thread_stack *ts,
62192a9e4f7SAdrian Hunter 					struct perf_sample *sample,
62292a9e4f7SAdrian Hunter 					struct addr_location *from_al,
62392a9e4f7SAdrian Hunter 					struct addr_location *to_al, u64 ref)
62492a9e4f7SAdrian Hunter {
62592a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
62690c2cda7SAdrian Hunter 	struct call_path *root = &cpr->call_path;
62790c2cda7SAdrian Hunter 	struct symbol *fsym = from_al->sym;
62890c2cda7SAdrian Hunter 	struct symbol *tsym = to_al->sym;
62992a9e4f7SAdrian Hunter 	struct call_path *cp, *parent;
63092a9e4f7SAdrian Hunter 	u64 ks = ts->kernel_start;
63190c2cda7SAdrian Hunter 	u64 addr = sample->addr;
63290c2cda7SAdrian Hunter 	u64 tm = sample->time;
63390c2cda7SAdrian Hunter 	u64 ip = sample->ip;
63492a9e4f7SAdrian Hunter 	int err;
63592a9e4f7SAdrian Hunter 
63690c2cda7SAdrian Hunter 	if (ip >= ks && addr < ks) {
63792a9e4f7SAdrian Hunter 		/* Return to userspace, so pop all kernel addresses */
63892a9e4f7SAdrian Hunter 		while (thread_stack__in_kernel(ts)) {
63992a9e4f7SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
64090c2cda7SAdrian Hunter 							tm, ref, true);
64192a9e4f7SAdrian Hunter 			if (err)
64292a9e4f7SAdrian Hunter 				return err;
64392a9e4f7SAdrian Hunter 		}
64492a9e4f7SAdrian Hunter 
64592a9e4f7SAdrian Hunter 		/* If the stack is empty, push the userspace address */
64692a9e4f7SAdrian Hunter 		if (!ts->cnt) {
64790c2cda7SAdrian Hunter 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
64890c2cda7SAdrian Hunter 			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
64990c2cda7SAdrian Hunter 						     false);
65092a9e4f7SAdrian Hunter 		}
65190c2cda7SAdrian Hunter 	} else if (thread_stack__in_kernel(ts) && ip < ks) {
65292a9e4f7SAdrian Hunter 		/* Return to userspace, so pop all kernel addresses */
65392a9e4f7SAdrian Hunter 		while (thread_stack__in_kernel(ts)) {
65492a9e4f7SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
65590c2cda7SAdrian Hunter 							tm, ref, true);
65692a9e4f7SAdrian Hunter 			if (err)
65792a9e4f7SAdrian Hunter 				return err;
65892a9e4f7SAdrian Hunter 		}
65992a9e4f7SAdrian Hunter 	}
66092a9e4f7SAdrian Hunter 
66192a9e4f7SAdrian Hunter 	if (ts->cnt)
66292a9e4f7SAdrian Hunter 		parent = ts->stack[ts->cnt - 1].cp;
66392a9e4f7SAdrian Hunter 	else
66490c2cda7SAdrian Hunter 		parent = root;
66592a9e4f7SAdrian Hunter 
6661f35cd65SAdrian Hunter 	if (parent->sym == from_al->sym) {
6671f35cd65SAdrian Hunter 		/*
6681f35cd65SAdrian Hunter 		 * At the bottom of the stack, assume the missing 'call' was
6691f35cd65SAdrian Hunter 		 * before the trace started. So, pop the current symbol and push
6701f35cd65SAdrian Hunter 		 * the 'to' symbol.
6711f35cd65SAdrian Hunter 		 */
6721f35cd65SAdrian Hunter 		if (ts->cnt == 1) {
6731f35cd65SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
6741f35cd65SAdrian Hunter 							tm, ref, false);
6751f35cd65SAdrian Hunter 			if (err)
6761f35cd65SAdrian Hunter 				return err;
6771f35cd65SAdrian Hunter 		}
6781f35cd65SAdrian Hunter 
6791f35cd65SAdrian Hunter 		if (!ts->cnt) {
6801f35cd65SAdrian Hunter 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
6811f35cd65SAdrian Hunter 
6821f35cd65SAdrian Hunter 			return thread_stack__push_cp(ts, addr, tm, ref, cp,
6831f35cd65SAdrian Hunter 						     true, false);
6841f35cd65SAdrian Hunter 		}
6851f35cd65SAdrian Hunter 
6861f35cd65SAdrian Hunter 		/*
6871f35cd65SAdrian Hunter 		 * Otherwise assume the 'return' is being used as a jump (e.g.
6881f35cd65SAdrian Hunter 		 * retpoline) and just push the 'to' symbol.
6891f35cd65SAdrian Hunter 		 */
6901f35cd65SAdrian Hunter 		cp = call_path__findnew(cpr, parent, tsym, addr, ks);
6911f35cd65SAdrian Hunter 
6921f35cd65SAdrian Hunter 		err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
6931f35cd65SAdrian Hunter 		if (!err)
6941f35cd65SAdrian Hunter 			ts->stack[ts->cnt - 1].non_call = true;
6951f35cd65SAdrian Hunter 
6961f35cd65SAdrian Hunter 		return err;
6971f35cd65SAdrian Hunter 	}
6981f35cd65SAdrian Hunter 
6991f35cd65SAdrian Hunter 	/*
7001f35cd65SAdrian Hunter 	 * Assume 'parent' has not yet returned, so push 'to', and then push and
7011f35cd65SAdrian Hunter 	 * pop 'from'.
7021f35cd65SAdrian Hunter 	 */
7031f35cd65SAdrian Hunter 
7041f35cd65SAdrian Hunter 	cp = call_path__findnew(cpr, parent, tsym, addr, ks);
70592a9e4f7SAdrian Hunter 
70690c2cda7SAdrian Hunter 	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
70792a9e4f7SAdrian Hunter 	if (err)
70892a9e4f7SAdrian Hunter 		return err;
70992a9e4f7SAdrian Hunter 
7101f35cd65SAdrian Hunter 	cp = call_path__findnew(cpr, cp, fsym, ip, ks);
7111f35cd65SAdrian Hunter 
7121f35cd65SAdrian Hunter 	err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
7131f35cd65SAdrian Hunter 	if (err)
7141f35cd65SAdrian Hunter 		return err;
7151f35cd65SAdrian Hunter 
7161f35cd65SAdrian Hunter 	return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
71792a9e4f7SAdrian Hunter }
71892a9e4f7SAdrian Hunter 
71992a9e4f7SAdrian Hunter static int thread_stack__trace_begin(struct thread *thread,
72092a9e4f7SAdrian Hunter 				     struct thread_stack *ts, u64 timestamp,
72192a9e4f7SAdrian Hunter 				     u64 ref)
72292a9e4f7SAdrian Hunter {
72392a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
72492a9e4f7SAdrian Hunter 	int err;
72592a9e4f7SAdrian Hunter 
72692a9e4f7SAdrian Hunter 	if (!ts->cnt)
72792a9e4f7SAdrian Hunter 		return 0;
72892a9e4f7SAdrian Hunter 
72992a9e4f7SAdrian Hunter 	/* Pop trace end */
73092a9e4f7SAdrian Hunter 	tse = &ts->stack[ts->cnt - 1];
7312dcde4e1SAdrian Hunter 	if (tse->trace_end) {
73292a9e4f7SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
73392a9e4f7SAdrian Hunter 						timestamp, ref, false);
73492a9e4f7SAdrian Hunter 		if (err)
73592a9e4f7SAdrian Hunter 			return err;
73692a9e4f7SAdrian Hunter 	}
73792a9e4f7SAdrian Hunter 
73892a9e4f7SAdrian Hunter 	return 0;
73992a9e4f7SAdrian Hunter }
74092a9e4f7SAdrian Hunter 
74192a9e4f7SAdrian Hunter static int thread_stack__trace_end(struct thread_stack *ts,
74292a9e4f7SAdrian Hunter 				   struct perf_sample *sample, u64 ref)
74392a9e4f7SAdrian Hunter {
74492a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
74592a9e4f7SAdrian Hunter 	struct call_path *cp;
74692a9e4f7SAdrian Hunter 	u64 ret_addr;
74792a9e4f7SAdrian Hunter 
74892a9e4f7SAdrian Hunter 	/* No point having 'trace end' on the bottom of the stack */
74992a9e4f7SAdrian Hunter 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
75092a9e4f7SAdrian Hunter 		return 0;
75192a9e4f7SAdrian Hunter 
75292a9e4f7SAdrian Hunter 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
75392a9e4f7SAdrian Hunter 				ts->kernel_start);
75492a9e4f7SAdrian Hunter 
75592a9e4f7SAdrian Hunter 	ret_addr = sample->ip + sample->insn_len;
75692a9e4f7SAdrian Hunter 
75792a9e4f7SAdrian Hunter 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
7582dcde4e1SAdrian Hunter 				     false, true);
75992a9e4f7SAdrian Hunter }
76092a9e4f7SAdrian Hunter 
7613c0cd952SAdrian Hunter static bool is_x86_retpoline(const char *name)
7623c0cd952SAdrian Hunter {
7633c0cd952SAdrian Hunter 	const char *p = strstr(name, "__x86_indirect_thunk_");
7643c0cd952SAdrian Hunter 
7653c0cd952SAdrian Hunter 	return p == name || !strcmp(name, "__indirect_thunk_start");
7663c0cd952SAdrian Hunter }
7673c0cd952SAdrian Hunter 
7683c0cd952SAdrian Hunter /*
7693c0cd952SAdrian Hunter  * x86 retpoline functions pollute the call graph. This function removes them.
7703c0cd952SAdrian Hunter  * This does not handle function return thunks, nor is there any improvement
7713c0cd952SAdrian Hunter  * for the handling of inline thunks or extern thunks.
7723c0cd952SAdrian Hunter  */
7733c0cd952SAdrian Hunter static int thread_stack__x86_retpoline(struct thread_stack *ts,
7743c0cd952SAdrian Hunter 				       struct perf_sample *sample,
7753c0cd952SAdrian Hunter 				       struct addr_location *to_al)
7763c0cd952SAdrian Hunter {
7773c0cd952SAdrian Hunter 	struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
7783c0cd952SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
7793c0cd952SAdrian Hunter 	struct symbol *sym = tse->cp->sym;
7803c0cd952SAdrian Hunter 	struct symbol *tsym = to_al->sym;
7813c0cd952SAdrian Hunter 	struct call_path *cp;
7823c0cd952SAdrian Hunter 
7833c0cd952SAdrian Hunter 	if (sym && is_x86_retpoline(sym->name)) {
7843c0cd952SAdrian Hunter 		/*
7853c0cd952SAdrian Hunter 		 * This is a x86 retpoline fn. It pollutes the call graph by
7863c0cd952SAdrian Hunter 		 * showing up everywhere there is an indirect branch, but does
7873c0cd952SAdrian Hunter 		 * not itself mean anything. Here the top-of-stack is removed,
7883c0cd952SAdrian Hunter 		 * by decrementing the stack count, and then further down, the
7893c0cd952SAdrian Hunter 		 * resulting top-of-stack is replaced with the actual target.
7903c0cd952SAdrian Hunter 		 * The result is that the retpoline functions will no longer
7913c0cd952SAdrian Hunter 		 * appear in the call graph. Note this only affects the call
7923c0cd952SAdrian Hunter 		 * graph, since all the original branches are left unchanged.
7933c0cd952SAdrian Hunter 		 */
7943c0cd952SAdrian Hunter 		ts->cnt -= 1;
7953c0cd952SAdrian Hunter 		sym = ts->stack[ts->cnt - 2].cp->sym;
7963c0cd952SAdrian Hunter 		if (sym && sym == tsym && to_al->addr != tsym->start) {
7973c0cd952SAdrian Hunter 			/*
7983c0cd952SAdrian Hunter 			 * Target is back to the middle of the symbol we came
7993c0cd952SAdrian Hunter 			 * from so assume it is an indirect jmp and forget it
8003c0cd952SAdrian Hunter 			 * altogether.
8013c0cd952SAdrian Hunter 			 */
8023c0cd952SAdrian Hunter 			ts->cnt -= 1;
8033c0cd952SAdrian Hunter 			return 0;
8043c0cd952SAdrian Hunter 		}
8053c0cd952SAdrian Hunter 	} else if (sym && sym == tsym) {
8063c0cd952SAdrian Hunter 		/*
8073c0cd952SAdrian Hunter 		 * Target is back to the symbol we came from so assume it is an
8083c0cd952SAdrian Hunter 		 * indirect jmp and forget it altogether.
8093c0cd952SAdrian Hunter 		 */
8103c0cd952SAdrian Hunter 		ts->cnt -= 1;
8113c0cd952SAdrian Hunter 		return 0;
8123c0cd952SAdrian Hunter 	}
8133c0cd952SAdrian Hunter 
8143c0cd952SAdrian Hunter 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
8153c0cd952SAdrian Hunter 				sample->addr, ts->kernel_start);
8163c0cd952SAdrian Hunter 	if (!cp)
8173c0cd952SAdrian Hunter 		return -ENOMEM;
8183c0cd952SAdrian Hunter 
8193c0cd952SAdrian Hunter 	/* Replace the top-of-stack with the actual target */
8203c0cd952SAdrian Hunter 	ts->stack[ts->cnt - 1].cp = cp;
8213c0cd952SAdrian Hunter 
8223c0cd952SAdrian Hunter 	return 0;
8233c0cd952SAdrian Hunter }
8243c0cd952SAdrian Hunter 
82592a9e4f7SAdrian Hunter int thread_stack__process(struct thread *thread, struct comm *comm,
82692a9e4f7SAdrian Hunter 			  struct perf_sample *sample,
82792a9e4f7SAdrian Hunter 			  struct addr_location *from_al,
82892a9e4f7SAdrian Hunter 			  struct addr_location *to_al, u64 ref,
82992a9e4f7SAdrian Hunter 			  struct call_return_processor *crp)
83092a9e4f7SAdrian Hunter {
831256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, sample->cpu);
8323c0cd952SAdrian Hunter 	enum retpoline_state_t rstate;
83392a9e4f7SAdrian Hunter 	int err = 0;
83492a9e4f7SAdrian Hunter 
83503b32cb2SAdrian Hunter 	if (ts && !ts->crp) {
83692a9e4f7SAdrian Hunter 		/* Supersede thread_stack__event() */
837f6060ac6SAdrian Hunter 		thread_stack__reset(thread, ts);
83803b32cb2SAdrian Hunter 		ts = NULL;
83992a9e4f7SAdrian Hunter 	}
84003b32cb2SAdrian Hunter 
84103b32cb2SAdrian Hunter 	if (!ts) {
842256d92bcSAdrian Hunter 		ts = thread_stack__new(thread, sample->cpu, crp);
843bd8e68acSAdrian Hunter 		if (!ts)
84492a9e4f7SAdrian Hunter 			return -ENOMEM;
84592a9e4f7SAdrian Hunter 		ts->comm = comm;
84692a9e4f7SAdrian Hunter 	}
84792a9e4f7SAdrian Hunter 
8483c0cd952SAdrian Hunter 	rstate = ts->rstate;
8493c0cd952SAdrian Hunter 	if (rstate == X86_RETPOLINE_DETECTED)
8503c0cd952SAdrian Hunter 		ts->rstate = X86_RETPOLINE_POSSIBLE;
8513c0cd952SAdrian Hunter 
85292a9e4f7SAdrian Hunter 	/* Flush stack on exec */
85392a9e4f7SAdrian Hunter 	if (ts->comm != comm && thread->pid_ == thread->tid) {
854a5499b37SAdrian Hunter 		err = __thread_stack__flush(thread, ts);
85592a9e4f7SAdrian Hunter 		if (err)
85692a9e4f7SAdrian Hunter 			return err;
85792a9e4f7SAdrian Hunter 		ts->comm = comm;
85892a9e4f7SAdrian Hunter 	}
85992a9e4f7SAdrian Hunter 
86092a9e4f7SAdrian Hunter 	/* If the stack is empty, put the current symbol on the stack */
86192a9e4f7SAdrian Hunter 	if (!ts->cnt) {
862e0b89511SAdrian Hunter 		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
86392a9e4f7SAdrian Hunter 		if (err)
86492a9e4f7SAdrian Hunter 			return err;
86592a9e4f7SAdrian Hunter 	}
86692a9e4f7SAdrian Hunter 
86792a9e4f7SAdrian Hunter 	ts->branch_count += 1;
86892a9e4f7SAdrian Hunter 	ts->last_time = sample->time;
86992a9e4f7SAdrian Hunter 
87092a9e4f7SAdrian Hunter 	if (sample->flags & PERF_IP_FLAG_CALL) {
8712dcde4e1SAdrian Hunter 		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
87292a9e4f7SAdrian Hunter 		struct call_path_root *cpr = ts->crp->cpr;
87392a9e4f7SAdrian Hunter 		struct call_path *cp;
87492a9e4f7SAdrian Hunter 		u64 ret_addr;
87592a9e4f7SAdrian Hunter 
87692a9e4f7SAdrian Hunter 		if (!sample->ip || !sample->addr)
87792a9e4f7SAdrian Hunter 			return 0;
87892a9e4f7SAdrian Hunter 
87992a9e4f7SAdrian Hunter 		ret_addr = sample->ip + sample->insn_len;
88092a9e4f7SAdrian Hunter 		if (ret_addr == sample->addr)
88192a9e4f7SAdrian Hunter 			return 0; /* Zero-length calls are excluded */
88292a9e4f7SAdrian Hunter 
88392a9e4f7SAdrian Hunter 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
88492a9e4f7SAdrian Hunter 					to_al->sym, sample->addr,
88592a9e4f7SAdrian Hunter 					ts->kernel_start);
88692a9e4f7SAdrian Hunter 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
8872dcde4e1SAdrian Hunter 					    cp, false, trace_end);
8883c0cd952SAdrian Hunter 
8893c0cd952SAdrian Hunter 		/*
8903c0cd952SAdrian Hunter 		 * A call to the same symbol but not the start of the symbol,
8913c0cd952SAdrian Hunter 		 * may be the start of a x86 retpoline.
8923c0cd952SAdrian Hunter 		 */
8933c0cd952SAdrian Hunter 		if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
8943c0cd952SAdrian Hunter 		    from_al->sym == to_al->sym &&
8953c0cd952SAdrian Hunter 		    to_al->addr != to_al->sym->start)
8963c0cd952SAdrian Hunter 			ts->rstate = X86_RETPOLINE_DETECTED;
8973c0cd952SAdrian Hunter 
89892a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
89992a9e4f7SAdrian Hunter 		if (!sample->ip || !sample->addr)
90092a9e4f7SAdrian Hunter 			return 0;
90192a9e4f7SAdrian Hunter 
9023c0cd952SAdrian Hunter 		/* x86 retpoline 'return' doesn't match the stack */
9033c0cd952SAdrian Hunter 		if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
9043c0cd952SAdrian Hunter 		    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
9053c0cd952SAdrian Hunter 			return thread_stack__x86_retpoline(ts, sample, to_al);
9063c0cd952SAdrian Hunter 
90792a9e4f7SAdrian Hunter 		err = thread_stack__pop_cp(thread, ts, sample->addr,
90892a9e4f7SAdrian Hunter 					   sample->time, ref, from_al->sym);
90992a9e4f7SAdrian Hunter 		if (err) {
91092a9e4f7SAdrian Hunter 			if (err < 0)
91192a9e4f7SAdrian Hunter 				return err;
91292a9e4f7SAdrian Hunter 			err = thread_stack__no_call_return(thread, ts, sample,
91392a9e4f7SAdrian Hunter 							   from_al, to_al, ref);
91492a9e4f7SAdrian Hunter 		}
91592a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
91692a9e4f7SAdrian Hunter 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
91792a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
91892a9e4f7SAdrian Hunter 		err = thread_stack__trace_end(ts, sample, ref);
919f08046cbSAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
920f08046cbSAdrian Hunter 		   from_al->sym != to_al->sym && to_al->sym &&
921f08046cbSAdrian Hunter 		   to_al->addr == to_al->sym->start) {
922f08046cbSAdrian Hunter 		struct call_path_root *cpr = ts->crp->cpr;
923f08046cbSAdrian Hunter 		struct call_path *cp;
924f08046cbSAdrian Hunter 
925f08046cbSAdrian Hunter 		/*
926f08046cbSAdrian Hunter 		 * The compiler might optimize a call/ret combination by making
927f08046cbSAdrian Hunter 		 * it a jmp. Make that visible by recording on the stack a
928f08046cbSAdrian Hunter 		 * branch to the start of a different symbol. Note, that means
929f08046cbSAdrian Hunter 		 * when a ret pops the stack, all jmps must be popped off first.
930f08046cbSAdrian Hunter 		 */
931f08046cbSAdrian Hunter 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
932f08046cbSAdrian Hunter 					to_al->sym, sample->addr,
933f08046cbSAdrian Hunter 					ts->kernel_start);
934f08046cbSAdrian Hunter 		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
935f08046cbSAdrian Hunter 					    false);
936f08046cbSAdrian Hunter 		if (!err)
937f08046cbSAdrian Hunter 			ts->stack[ts->cnt - 1].non_call = true;
93892a9e4f7SAdrian Hunter 	}
93992a9e4f7SAdrian Hunter 
94092a9e4f7SAdrian Hunter 	return err;
94192a9e4f7SAdrian Hunter }
942e216708dSAdrian Hunter 
943256d92bcSAdrian Hunter size_t thread_stack__depth(struct thread *thread, int cpu)
944e216708dSAdrian Hunter {
945256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
946bd8e68acSAdrian Hunter 
947bd8e68acSAdrian Hunter 	if (!ts)
948e216708dSAdrian Hunter 		return 0;
949bd8e68acSAdrian Hunter 	return ts->cnt;
950e216708dSAdrian Hunter }
951