xref: /openbmc/linux/tools/perf/util/thread-stack.c (revision 3c0cd952)
100447ccdSAdrian Hunter /*
200447ccdSAdrian Hunter  * thread-stack.c: Synthesize a thread's stack using call / return events
300447ccdSAdrian Hunter  * Copyright (c) 2014, Intel Corporation.
400447ccdSAdrian Hunter  *
500447ccdSAdrian Hunter  * This program is free software; you can redistribute it and/or modify it
600447ccdSAdrian Hunter  * under the terms and conditions of the GNU General Public License,
700447ccdSAdrian Hunter  * version 2, as published by the Free Software Foundation.
800447ccdSAdrian Hunter  *
900447ccdSAdrian Hunter  * This program is distributed in the hope it will be useful, but WITHOUT
1000447ccdSAdrian Hunter  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1100447ccdSAdrian Hunter  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
1200447ccdSAdrian Hunter  * more details.
1300447ccdSAdrian Hunter  *
1400447ccdSAdrian Hunter  */
1500447ccdSAdrian Hunter 
1692a9e4f7SAdrian Hunter #include <linux/rbtree.h>
1792a9e4f7SAdrian Hunter #include <linux/list.h>
18256d92bcSAdrian Hunter #include <linux/log2.h>
19a43783aeSArnaldo Carvalho de Melo #include <errno.h>
2000447ccdSAdrian Hunter #include "thread.h"
2100447ccdSAdrian Hunter #include "event.h"
2292a9e4f7SAdrian Hunter #include "machine.h"
233c0cd952SAdrian Hunter #include "env.h"
2400447ccdSAdrian Hunter #include "util.h"
2500447ccdSAdrian Hunter #include "debug.h"
2692a9e4f7SAdrian Hunter #include "symbol.h"
2792a9e4f7SAdrian Hunter #include "comm.h"
28451db126SChris Phlipot #include "call-path.h"
2900447ccdSAdrian Hunter #include "thread-stack.h"
3000447ccdSAdrian Hunter 
3192a9e4f7SAdrian Hunter #define STACK_GROWTH 2048
3292a9e4f7SAdrian Hunter 
333c0cd952SAdrian Hunter /*
343c0cd952SAdrian Hunter  * State of retpoline detection.
353c0cd952SAdrian Hunter  *
363c0cd952SAdrian Hunter  * RETPOLINE_NONE: no retpoline detection
373c0cd952SAdrian Hunter  * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
383c0cd952SAdrian Hunter  * X86_RETPOLINE_DETECTED: x86 retpoline detected
393c0cd952SAdrian Hunter  */
403c0cd952SAdrian Hunter enum retpoline_state_t {
413c0cd952SAdrian Hunter 	RETPOLINE_NONE,
423c0cd952SAdrian Hunter 	X86_RETPOLINE_POSSIBLE,
433c0cd952SAdrian Hunter 	X86_RETPOLINE_DETECTED,
443c0cd952SAdrian Hunter };
453c0cd952SAdrian Hunter 
4692a9e4f7SAdrian Hunter /**
4792a9e4f7SAdrian Hunter  * struct thread_stack_entry - thread stack entry.
4892a9e4f7SAdrian Hunter  * @ret_addr: return address
4992a9e4f7SAdrian Hunter  * @timestamp: timestamp (if known)
5092a9e4f7SAdrian Hunter  * @ref: external reference (e.g. db_id of sample)
5192a9e4f7SAdrian Hunter  * @branch_count: the branch count when the entry was created
5292a9e4f7SAdrian Hunter  * @cp: call path
5392a9e4f7SAdrian Hunter  * @no_call: a 'call' was not seen
544d60e5e3SAdrian Hunter  * @trace_end: a 'call' but trace ended
55f08046cbSAdrian Hunter  * @non_call: a branch but not a 'call' to the start of a different symbol
5692a9e4f7SAdrian Hunter  */
5792a9e4f7SAdrian Hunter struct thread_stack_entry {
5892a9e4f7SAdrian Hunter 	u64 ret_addr;
5992a9e4f7SAdrian Hunter 	u64 timestamp;
6092a9e4f7SAdrian Hunter 	u64 ref;
6192a9e4f7SAdrian Hunter 	u64 branch_count;
6292a9e4f7SAdrian Hunter 	struct call_path *cp;
6392a9e4f7SAdrian Hunter 	bool no_call;
644d60e5e3SAdrian Hunter 	bool trace_end;
65f08046cbSAdrian Hunter 	bool non_call;
6692a9e4f7SAdrian Hunter };
6792a9e4f7SAdrian Hunter 
6892a9e4f7SAdrian Hunter /**
6992a9e4f7SAdrian Hunter  * struct thread_stack - thread stack constructed from 'call' and 'return'
7092a9e4f7SAdrian Hunter  *                       branch samples.
7192a9e4f7SAdrian Hunter  * @stack: array that holds the stack
7292a9e4f7SAdrian Hunter  * @cnt: number of entries in the stack
7392a9e4f7SAdrian Hunter  * @sz: current maximum stack size
7492a9e4f7SAdrian Hunter  * @trace_nr: current trace number
7592a9e4f7SAdrian Hunter  * @branch_count: running branch count
7692a9e4f7SAdrian Hunter  * @kernel_start: kernel start address
7792a9e4f7SAdrian Hunter  * @last_time: last timestamp
7892a9e4f7SAdrian Hunter  * @crp: call/return processor
7992a9e4f7SAdrian Hunter  * @comm: current comm
80f6060ac6SAdrian Hunter  * @arr_sz: size of array if this is the first element of an array
813c0cd952SAdrian Hunter  * @rstate: used to detect retpolines
8292a9e4f7SAdrian Hunter  */
8300447ccdSAdrian Hunter struct thread_stack {
8400447ccdSAdrian Hunter 	struct thread_stack_entry *stack;
8500447ccdSAdrian Hunter 	size_t cnt;
8600447ccdSAdrian Hunter 	size_t sz;
8700447ccdSAdrian Hunter 	u64 trace_nr;
8892a9e4f7SAdrian Hunter 	u64 branch_count;
8992a9e4f7SAdrian Hunter 	u64 kernel_start;
9092a9e4f7SAdrian Hunter 	u64 last_time;
9192a9e4f7SAdrian Hunter 	struct call_return_processor *crp;
9292a9e4f7SAdrian Hunter 	struct comm *comm;
93f6060ac6SAdrian Hunter 	unsigned int arr_sz;
943c0cd952SAdrian Hunter 	enum retpoline_state_t rstate;
9500447ccdSAdrian Hunter };
9600447ccdSAdrian Hunter 
97256d92bcSAdrian Hunter /*
98256d92bcSAdrian Hunter  * Assume pid == tid == 0 identifies the idle task as defined by
99256d92bcSAdrian Hunter  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
100256d92bcSAdrian Hunter  * and therefore requires a stack for each cpu.
101256d92bcSAdrian Hunter  */
102256d92bcSAdrian Hunter static inline bool thread_stack__per_cpu(struct thread *thread)
103256d92bcSAdrian Hunter {
104256d92bcSAdrian Hunter 	return !(thread->tid || thread->pid_);
105256d92bcSAdrian Hunter }
106256d92bcSAdrian Hunter 
10700447ccdSAdrian Hunter static int thread_stack__grow(struct thread_stack *ts)
10800447ccdSAdrian Hunter {
10900447ccdSAdrian Hunter 	struct thread_stack_entry *new_stack;
11000447ccdSAdrian Hunter 	size_t sz, new_sz;
11100447ccdSAdrian Hunter 
11200447ccdSAdrian Hunter 	new_sz = ts->sz + STACK_GROWTH;
11300447ccdSAdrian Hunter 	sz = new_sz * sizeof(struct thread_stack_entry);
11400447ccdSAdrian Hunter 
11500447ccdSAdrian Hunter 	new_stack = realloc(ts->stack, sz);
11600447ccdSAdrian Hunter 	if (!new_stack)
11700447ccdSAdrian Hunter 		return -ENOMEM;
11800447ccdSAdrian Hunter 
11900447ccdSAdrian Hunter 	ts->stack = new_stack;
12000447ccdSAdrian Hunter 	ts->sz = new_sz;
12100447ccdSAdrian Hunter 
12200447ccdSAdrian Hunter 	return 0;
12300447ccdSAdrian Hunter }
12400447ccdSAdrian Hunter 
1252e9e8688SAdrian Hunter static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
1262e9e8688SAdrian Hunter 			      struct call_return_processor *crp)
1272e9e8688SAdrian Hunter {
1282e9e8688SAdrian Hunter 	int err;
1292e9e8688SAdrian Hunter 
1302e9e8688SAdrian Hunter 	err = thread_stack__grow(ts);
1312e9e8688SAdrian Hunter 	if (err)
1322e9e8688SAdrian Hunter 		return err;
1332e9e8688SAdrian Hunter 
1343c0cd952SAdrian Hunter 	if (thread->mg && thread->mg->machine) {
1353c0cd952SAdrian Hunter 		struct machine *machine = thread->mg->machine;
1363c0cd952SAdrian Hunter 		const char *arch = perf_env__arch(machine->env);
1373c0cd952SAdrian Hunter 
1383c0cd952SAdrian Hunter 		ts->kernel_start = machine__kernel_start(machine);
1393c0cd952SAdrian Hunter 		if (!strcmp(arch, "x86"))
1403c0cd952SAdrian Hunter 			ts->rstate = X86_RETPOLINE_POSSIBLE;
1413c0cd952SAdrian Hunter 	} else {
1422e9e8688SAdrian Hunter 		ts->kernel_start = 1ULL << 63;
1433c0cd952SAdrian Hunter 	}
1442e9e8688SAdrian Hunter 	ts->crp = crp;
1452e9e8688SAdrian Hunter 
1462e9e8688SAdrian Hunter 	return 0;
1472e9e8688SAdrian Hunter }
1482e9e8688SAdrian Hunter 
149256d92bcSAdrian Hunter static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
15092a9e4f7SAdrian Hunter 					      struct call_return_processor *crp)
15100447ccdSAdrian Hunter {
152139f42f3SAdrian Hunter 	struct thread_stack *ts = thread->ts, *new_ts;
153139f42f3SAdrian Hunter 	unsigned int old_sz = ts ? ts->arr_sz : 0;
154139f42f3SAdrian Hunter 	unsigned int new_sz = 1;
15500447ccdSAdrian Hunter 
156256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread) && cpu > 0)
157256d92bcSAdrian Hunter 		new_sz = roundup_pow_of_two(cpu + 1);
158256d92bcSAdrian Hunter 
159139f42f3SAdrian Hunter 	if (!ts || new_sz > old_sz) {
160139f42f3SAdrian Hunter 		new_ts = calloc(new_sz, sizeof(*ts));
161139f42f3SAdrian Hunter 		if (!new_ts)
16200447ccdSAdrian Hunter 			return NULL;
163139f42f3SAdrian Hunter 		if (ts)
164139f42f3SAdrian Hunter 			memcpy(new_ts, ts, old_sz * sizeof(*ts));
165139f42f3SAdrian Hunter 		new_ts->arr_sz = new_sz;
166139f42f3SAdrian Hunter 		zfree(&thread->ts);
167139f42f3SAdrian Hunter 		thread->ts = new_ts;
168139f42f3SAdrian Hunter 		ts = new_ts;
16900447ccdSAdrian Hunter 	}
17000447ccdSAdrian Hunter 
171256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread) && cpu > 0 &&
172256d92bcSAdrian Hunter 	    (unsigned int)cpu < ts->arr_sz)
173256d92bcSAdrian Hunter 		ts += cpu;
174256d92bcSAdrian Hunter 
175139f42f3SAdrian Hunter 	if (!ts->stack &&
176139f42f3SAdrian Hunter 	    thread_stack__init(ts, thread, crp))
177139f42f3SAdrian Hunter 		return NULL;
178bd8e68acSAdrian Hunter 
17900447ccdSAdrian Hunter 	return ts;
18000447ccdSAdrian Hunter }
18100447ccdSAdrian Hunter 
182256d92bcSAdrian Hunter static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
183bd8e68acSAdrian Hunter {
184256d92bcSAdrian Hunter 	struct thread_stack *ts = thread->ts;
185256d92bcSAdrian Hunter 
186256d92bcSAdrian Hunter 	if (cpu < 0)
187256d92bcSAdrian Hunter 		cpu = 0;
188256d92bcSAdrian Hunter 
189256d92bcSAdrian Hunter 	if (!ts || (unsigned int)cpu >= ts->arr_sz)
190256d92bcSAdrian Hunter 		return NULL;
191256d92bcSAdrian Hunter 
192256d92bcSAdrian Hunter 	ts += cpu;
193256d92bcSAdrian Hunter 
194256d92bcSAdrian Hunter 	if (!ts->stack)
195256d92bcSAdrian Hunter 		return NULL;
196256d92bcSAdrian Hunter 
197256d92bcSAdrian Hunter 	return ts;
198256d92bcSAdrian Hunter }
199256d92bcSAdrian Hunter 
200256d92bcSAdrian Hunter static inline struct thread_stack *thread__stack(struct thread *thread,
201256d92bcSAdrian Hunter 						    int cpu)
202256d92bcSAdrian Hunter {
203256d92bcSAdrian Hunter 	if (!thread)
204256d92bcSAdrian Hunter 		return NULL;
205256d92bcSAdrian Hunter 
206256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread))
207256d92bcSAdrian Hunter 		return thread__cpu_stack(thread, cpu);
208256d92bcSAdrian Hunter 
209256d92bcSAdrian Hunter 	return thread->ts;
210bd8e68acSAdrian Hunter }
211bd8e68acSAdrian Hunter 
2124d60e5e3SAdrian Hunter static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
2134d60e5e3SAdrian Hunter 			      bool trace_end)
21400447ccdSAdrian Hunter {
21500447ccdSAdrian Hunter 	int err = 0;
21600447ccdSAdrian Hunter 
21700447ccdSAdrian Hunter 	if (ts->cnt == ts->sz) {
21800447ccdSAdrian Hunter 		err = thread_stack__grow(ts);
21900447ccdSAdrian Hunter 		if (err) {
22000447ccdSAdrian Hunter 			pr_warning("Out of memory: discarding thread stack\n");
22100447ccdSAdrian Hunter 			ts->cnt = 0;
22200447ccdSAdrian Hunter 		}
22300447ccdSAdrian Hunter 	}
22400447ccdSAdrian Hunter 
2254d60e5e3SAdrian Hunter 	ts->stack[ts->cnt].trace_end = trace_end;
22600447ccdSAdrian Hunter 	ts->stack[ts->cnt++].ret_addr = ret_addr;
22700447ccdSAdrian Hunter 
22800447ccdSAdrian Hunter 	return err;
22900447ccdSAdrian Hunter }
23000447ccdSAdrian Hunter 
23100447ccdSAdrian Hunter static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
23200447ccdSAdrian Hunter {
23300447ccdSAdrian Hunter 	size_t i;
23400447ccdSAdrian Hunter 
23500447ccdSAdrian Hunter 	/*
23600447ccdSAdrian Hunter 	 * In some cases there may be functions which are not seen to return.
23700447ccdSAdrian Hunter 	 * For example when setjmp / longjmp has been used.  Or the perf context
23800447ccdSAdrian Hunter 	 * switch in the kernel which doesn't stop and start tracing in exactly
23900447ccdSAdrian Hunter 	 * the same code path.  When that happens the return address will be
24000447ccdSAdrian Hunter 	 * further down the stack.  If the return address is not found at all,
24100447ccdSAdrian Hunter 	 * we assume the opposite (i.e. this is a return for a call that wasn't
24200447ccdSAdrian Hunter 	 * seen for some reason) and leave the stack alone.
24300447ccdSAdrian Hunter 	 */
24400447ccdSAdrian Hunter 	for (i = ts->cnt; i; ) {
24500447ccdSAdrian Hunter 		if (ts->stack[--i].ret_addr == ret_addr) {
24600447ccdSAdrian Hunter 			ts->cnt = i;
24700447ccdSAdrian Hunter 			return;
24800447ccdSAdrian Hunter 		}
24900447ccdSAdrian Hunter 	}
25000447ccdSAdrian Hunter }
25100447ccdSAdrian Hunter 
2524d60e5e3SAdrian Hunter static void thread_stack__pop_trace_end(struct thread_stack *ts)
2534d60e5e3SAdrian Hunter {
2544d60e5e3SAdrian Hunter 	size_t i;
2554d60e5e3SAdrian Hunter 
2564d60e5e3SAdrian Hunter 	for (i = ts->cnt; i; ) {
2574d60e5e3SAdrian Hunter 		if (ts->stack[--i].trace_end)
2584d60e5e3SAdrian Hunter 			ts->cnt = i;
2594d60e5e3SAdrian Hunter 		else
2604d60e5e3SAdrian Hunter 			return;
2614d60e5e3SAdrian Hunter 	}
2624d60e5e3SAdrian Hunter }
2634d60e5e3SAdrian Hunter 
26492a9e4f7SAdrian Hunter static bool thread_stack__in_kernel(struct thread_stack *ts)
26592a9e4f7SAdrian Hunter {
26692a9e4f7SAdrian Hunter 	if (!ts->cnt)
26792a9e4f7SAdrian Hunter 		return false;
26892a9e4f7SAdrian Hunter 
26992a9e4f7SAdrian Hunter 	return ts->stack[ts->cnt - 1].cp->in_kernel;
27092a9e4f7SAdrian Hunter }
27192a9e4f7SAdrian Hunter 
27292a9e4f7SAdrian Hunter static int thread_stack__call_return(struct thread *thread,
27392a9e4f7SAdrian Hunter 				     struct thread_stack *ts, size_t idx,
27492a9e4f7SAdrian Hunter 				     u64 timestamp, u64 ref, bool no_return)
27592a9e4f7SAdrian Hunter {
27692a9e4f7SAdrian Hunter 	struct call_return_processor *crp = ts->crp;
27792a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
27892a9e4f7SAdrian Hunter 	struct call_return cr = {
27992a9e4f7SAdrian Hunter 		.thread = thread,
28092a9e4f7SAdrian Hunter 		.comm = ts->comm,
28192a9e4f7SAdrian Hunter 		.db_id = 0,
28292a9e4f7SAdrian Hunter 	};
28392a9e4f7SAdrian Hunter 
28492a9e4f7SAdrian Hunter 	tse = &ts->stack[idx];
28592a9e4f7SAdrian Hunter 	cr.cp = tse->cp;
28692a9e4f7SAdrian Hunter 	cr.call_time = tse->timestamp;
28792a9e4f7SAdrian Hunter 	cr.return_time = timestamp;
28892a9e4f7SAdrian Hunter 	cr.branch_count = ts->branch_count - tse->branch_count;
28992a9e4f7SAdrian Hunter 	cr.call_ref = tse->ref;
29092a9e4f7SAdrian Hunter 	cr.return_ref = ref;
29192a9e4f7SAdrian Hunter 	if (tse->no_call)
29292a9e4f7SAdrian Hunter 		cr.flags |= CALL_RETURN_NO_CALL;
29392a9e4f7SAdrian Hunter 	if (no_return)
29492a9e4f7SAdrian Hunter 		cr.flags |= CALL_RETURN_NO_RETURN;
295f08046cbSAdrian Hunter 	if (tse->non_call)
296f08046cbSAdrian Hunter 		cr.flags |= CALL_RETURN_NON_CALL;
29792a9e4f7SAdrian Hunter 
29892a9e4f7SAdrian Hunter 	return crp->process(&cr, crp->data);
29992a9e4f7SAdrian Hunter }
30092a9e4f7SAdrian Hunter 
301a5499b37SAdrian Hunter static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
30292a9e4f7SAdrian Hunter {
30392a9e4f7SAdrian Hunter 	struct call_return_processor *crp = ts->crp;
30492a9e4f7SAdrian Hunter 	int err;
30592a9e4f7SAdrian Hunter 
30692a9e4f7SAdrian Hunter 	if (!crp) {
30792a9e4f7SAdrian Hunter 		ts->cnt = 0;
30892a9e4f7SAdrian Hunter 		return 0;
30992a9e4f7SAdrian Hunter 	}
31092a9e4f7SAdrian Hunter 
31192a9e4f7SAdrian Hunter 	while (ts->cnt) {
31292a9e4f7SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
31392a9e4f7SAdrian Hunter 						ts->last_time, 0, true);
31492a9e4f7SAdrian Hunter 		if (err) {
31592a9e4f7SAdrian Hunter 			pr_err("Error flushing thread stack!\n");
31692a9e4f7SAdrian Hunter 			ts->cnt = 0;
31792a9e4f7SAdrian Hunter 			return err;
31892a9e4f7SAdrian Hunter 		}
31992a9e4f7SAdrian Hunter 	}
32092a9e4f7SAdrian Hunter 
32192a9e4f7SAdrian Hunter 	return 0;
32292a9e4f7SAdrian Hunter }
32392a9e4f7SAdrian Hunter 
324a5499b37SAdrian Hunter int thread_stack__flush(struct thread *thread)
325a5499b37SAdrian Hunter {
326bd8e68acSAdrian Hunter 	struct thread_stack *ts = thread->ts;
327f6060ac6SAdrian Hunter 	unsigned int pos;
328f6060ac6SAdrian Hunter 	int err = 0;
329bd8e68acSAdrian Hunter 
330f6060ac6SAdrian Hunter 	if (ts) {
331f6060ac6SAdrian Hunter 		for (pos = 0; pos < ts->arr_sz; pos++) {
332f6060ac6SAdrian Hunter 			int ret = __thread_stack__flush(thread, ts + pos);
333a5499b37SAdrian Hunter 
334f6060ac6SAdrian Hunter 			if (ret)
335f6060ac6SAdrian Hunter 				err = ret;
336f6060ac6SAdrian Hunter 		}
337f6060ac6SAdrian Hunter 	}
338f6060ac6SAdrian Hunter 
339f6060ac6SAdrian Hunter 	return err;
340a5499b37SAdrian Hunter }
341a5499b37SAdrian Hunter 
342256d92bcSAdrian Hunter int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
34300447ccdSAdrian Hunter 			u64 to_ip, u16 insn_len, u64 trace_nr)
34400447ccdSAdrian Hunter {
345256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
346bd8e68acSAdrian Hunter 
34700447ccdSAdrian Hunter 	if (!thread)
34800447ccdSAdrian Hunter 		return -EINVAL;
34900447ccdSAdrian Hunter 
350bd8e68acSAdrian Hunter 	if (!ts) {
351256d92bcSAdrian Hunter 		ts = thread_stack__new(thread, cpu, NULL);
352bd8e68acSAdrian Hunter 		if (!ts) {
35300447ccdSAdrian Hunter 			pr_warning("Out of memory: no thread stack\n");
35400447ccdSAdrian Hunter 			return -ENOMEM;
35500447ccdSAdrian Hunter 		}
356bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
35700447ccdSAdrian Hunter 	}
35800447ccdSAdrian Hunter 
35900447ccdSAdrian Hunter 	/*
36000447ccdSAdrian Hunter 	 * When the trace is discontinuous, the trace_nr changes.  In that case
36100447ccdSAdrian Hunter 	 * the stack might be completely invalid.  Better to report nothing than
36292a9e4f7SAdrian Hunter 	 * to report something misleading, so flush the stack.
36300447ccdSAdrian Hunter 	 */
364bd8e68acSAdrian Hunter 	if (trace_nr != ts->trace_nr) {
365bd8e68acSAdrian Hunter 		if (ts->trace_nr)
366bd8e68acSAdrian Hunter 			__thread_stack__flush(thread, ts);
367bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
36800447ccdSAdrian Hunter 	}
36900447ccdSAdrian Hunter 
37092a9e4f7SAdrian Hunter 	/* Stop here if thread_stack__process() is in use */
371bd8e68acSAdrian Hunter 	if (ts->crp)
37292a9e4f7SAdrian Hunter 		return 0;
37392a9e4f7SAdrian Hunter 
37400447ccdSAdrian Hunter 	if (flags & PERF_IP_FLAG_CALL) {
37500447ccdSAdrian Hunter 		u64 ret_addr;
37600447ccdSAdrian Hunter 
37700447ccdSAdrian Hunter 		if (!to_ip)
37800447ccdSAdrian Hunter 			return 0;
37900447ccdSAdrian Hunter 		ret_addr = from_ip + insn_len;
38000447ccdSAdrian Hunter 		if (ret_addr == to_ip)
38100447ccdSAdrian Hunter 			return 0; /* Zero-length calls are excluded */
382bd8e68acSAdrian Hunter 		return thread_stack__push(ts, ret_addr,
3834d60e5e3SAdrian Hunter 					  flags & PERF_IP_FLAG_TRACE_END);
3844d60e5e3SAdrian Hunter 	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
3854d60e5e3SAdrian Hunter 		/*
3864d60e5e3SAdrian Hunter 		 * If the caller did not change the trace number (which would
3874d60e5e3SAdrian Hunter 		 * have flushed the stack) then try to make sense of the stack.
3884d60e5e3SAdrian Hunter 		 * Possibly, tracing began after returning to the current
3894d60e5e3SAdrian Hunter 		 * address, so try to pop that. Also, do not expect a call made
3904d60e5e3SAdrian Hunter 		 * when the trace ended, to return, so pop that.
3914d60e5e3SAdrian Hunter 		 */
392bd8e68acSAdrian Hunter 		thread_stack__pop(ts, to_ip);
393bd8e68acSAdrian Hunter 		thread_stack__pop_trace_end(ts);
3944d60e5e3SAdrian Hunter 	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
395bd8e68acSAdrian Hunter 		thread_stack__pop(ts, to_ip);
39600447ccdSAdrian Hunter 	}
39700447ccdSAdrian Hunter 
39800447ccdSAdrian Hunter 	return 0;
39900447ccdSAdrian Hunter }
40000447ccdSAdrian Hunter 
401256d92bcSAdrian Hunter void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
40292a9e4f7SAdrian Hunter {
403256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
404bd8e68acSAdrian Hunter 
405bd8e68acSAdrian Hunter 	if (!ts)
40692a9e4f7SAdrian Hunter 		return;
40792a9e4f7SAdrian Hunter 
408bd8e68acSAdrian Hunter 	if (trace_nr != ts->trace_nr) {
409bd8e68acSAdrian Hunter 		if (ts->trace_nr)
410bd8e68acSAdrian Hunter 			__thread_stack__flush(thread, ts);
411bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
41292a9e4f7SAdrian Hunter 	}
41392a9e4f7SAdrian Hunter }
41492a9e4f7SAdrian Hunter 
415f6060ac6SAdrian Hunter static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
416f6060ac6SAdrian Hunter {
417f6060ac6SAdrian Hunter 	__thread_stack__flush(thread, ts);
418f6060ac6SAdrian Hunter 	zfree(&ts->stack);
419f6060ac6SAdrian Hunter }
420f6060ac6SAdrian Hunter 
421f6060ac6SAdrian Hunter static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
422f6060ac6SAdrian Hunter {
423f6060ac6SAdrian Hunter 	unsigned int arr_sz = ts->arr_sz;
424f6060ac6SAdrian Hunter 
425f6060ac6SAdrian Hunter 	__thread_stack__free(thread, ts);
426f6060ac6SAdrian Hunter 	memset(ts, 0, sizeof(*ts));
427f6060ac6SAdrian Hunter 	ts->arr_sz = arr_sz;
428f6060ac6SAdrian Hunter }
429f6060ac6SAdrian Hunter 
43000447ccdSAdrian Hunter void thread_stack__free(struct thread *thread)
43100447ccdSAdrian Hunter {
432bd8e68acSAdrian Hunter 	struct thread_stack *ts = thread->ts;
433f6060ac6SAdrian Hunter 	unsigned int pos;
434bd8e68acSAdrian Hunter 
435bd8e68acSAdrian Hunter 	if (ts) {
436f6060ac6SAdrian Hunter 		for (pos = 0; pos < ts->arr_sz; pos++)
437f6060ac6SAdrian Hunter 			__thread_stack__free(thread, ts + pos);
43800447ccdSAdrian Hunter 		zfree(&thread->ts);
43900447ccdSAdrian Hunter 	}
44000447ccdSAdrian Hunter }
44100447ccdSAdrian Hunter 
44224248306SAdrian Hunter static inline u64 callchain_context(u64 ip, u64 kernel_start)
44300447ccdSAdrian Hunter {
44424248306SAdrian Hunter 	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
44524248306SAdrian Hunter }
44600447ccdSAdrian Hunter 
447256d92bcSAdrian Hunter void thread_stack__sample(struct thread *thread, int cpu,
448256d92bcSAdrian Hunter 			  struct ip_callchain *chain,
44924248306SAdrian Hunter 			  size_t sz, u64 ip, u64 kernel_start)
45024248306SAdrian Hunter {
451256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
45224248306SAdrian Hunter 	u64 context = callchain_context(ip, kernel_start);
45324248306SAdrian Hunter 	u64 last_context;
45424248306SAdrian Hunter 	size_t i, j;
45500447ccdSAdrian Hunter 
45624248306SAdrian Hunter 	if (sz < 2) {
45724248306SAdrian Hunter 		chain->nr = 0;
45824248306SAdrian Hunter 		return;
45924248306SAdrian Hunter 	}
46000447ccdSAdrian Hunter 
46124248306SAdrian Hunter 	chain->ips[0] = context;
46224248306SAdrian Hunter 	chain->ips[1] = ip;
46324248306SAdrian Hunter 
464bd8e68acSAdrian Hunter 	if (!ts) {
46524248306SAdrian Hunter 		chain->nr = 2;
46624248306SAdrian Hunter 		return;
46724248306SAdrian Hunter 	}
46824248306SAdrian Hunter 
46924248306SAdrian Hunter 	last_context = context;
47024248306SAdrian Hunter 
471bd8e68acSAdrian Hunter 	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
472bd8e68acSAdrian Hunter 		ip = ts->stack[ts->cnt - j].ret_addr;
47324248306SAdrian Hunter 		context = callchain_context(ip, kernel_start);
47424248306SAdrian Hunter 		if (context != last_context) {
47524248306SAdrian Hunter 			if (i >= sz - 1)
47624248306SAdrian Hunter 				break;
47724248306SAdrian Hunter 			chain->ips[i++] = context;
47824248306SAdrian Hunter 			last_context = context;
47924248306SAdrian Hunter 		}
48024248306SAdrian Hunter 		chain->ips[i] = ip;
48124248306SAdrian Hunter 	}
48224248306SAdrian Hunter 
48324248306SAdrian Hunter 	chain->nr = i;
48400447ccdSAdrian Hunter }
48592a9e4f7SAdrian Hunter 
48692a9e4f7SAdrian Hunter struct call_return_processor *
48792a9e4f7SAdrian Hunter call_return_processor__new(int (*process)(struct call_return *cr, void *data),
48892a9e4f7SAdrian Hunter 			   void *data)
48992a9e4f7SAdrian Hunter {
49092a9e4f7SAdrian Hunter 	struct call_return_processor *crp;
49192a9e4f7SAdrian Hunter 
49292a9e4f7SAdrian Hunter 	crp = zalloc(sizeof(struct call_return_processor));
49392a9e4f7SAdrian Hunter 	if (!crp)
49492a9e4f7SAdrian Hunter 		return NULL;
49592a9e4f7SAdrian Hunter 	crp->cpr = call_path_root__new();
49692a9e4f7SAdrian Hunter 	if (!crp->cpr)
49792a9e4f7SAdrian Hunter 		goto out_free;
49892a9e4f7SAdrian Hunter 	crp->process = process;
49992a9e4f7SAdrian Hunter 	crp->data = data;
50092a9e4f7SAdrian Hunter 	return crp;
50192a9e4f7SAdrian Hunter 
50292a9e4f7SAdrian Hunter out_free:
50392a9e4f7SAdrian Hunter 	free(crp);
50492a9e4f7SAdrian Hunter 	return NULL;
50592a9e4f7SAdrian Hunter }
50692a9e4f7SAdrian Hunter 
50792a9e4f7SAdrian Hunter void call_return_processor__free(struct call_return_processor *crp)
50892a9e4f7SAdrian Hunter {
50992a9e4f7SAdrian Hunter 	if (crp) {
51092a9e4f7SAdrian Hunter 		call_path_root__free(crp->cpr);
51192a9e4f7SAdrian Hunter 		free(crp);
51292a9e4f7SAdrian Hunter 	}
51392a9e4f7SAdrian Hunter }
51492a9e4f7SAdrian Hunter 
51592a9e4f7SAdrian Hunter static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
51692a9e4f7SAdrian Hunter 				 u64 timestamp, u64 ref, struct call_path *cp,
5172dcde4e1SAdrian Hunter 				 bool no_call, bool trace_end)
51892a9e4f7SAdrian Hunter {
51992a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
52092a9e4f7SAdrian Hunter 	int err;
52192a9e4f7SAdrian Hunter 
522e7a3a055SAdrian Hunter 	if (!cp)
523e7a3a055SAdrian Hunter 		return -ENOMEM;
524e7a3a055SAdrian Hunter 
52592a9e4f7SAdrian Hunter 	if (ts->cnt == ts->sz) {
52692a9e4f7SAdrian Hunter 		err = thread_stack__grow(ts);
52792a9e4f7SAdrian Hunter 		if (err)
52892a9e4f7SAdrian Hunter 			return err;
52992a9e4f7SAdrian Hunter 	}
53092a9e4f7SAdrian Hunter 
53192a9e4f7SAdrian Hunter 	tse = &ts->stack[ts->cnt++];
53292a9e4f7SAdrian Hunter 	tse->ret_addr = ret_addr;
53392a9e4f7SAdrian Hunter 	tse->timestamp = timestamp;
53492a9e4f7SAdrian Hunter 	tse->ref = ref;
53592a9e4f7SAdrian Hunter 	tse->branch_count = ts->branch_count;
53692a9e4f7SAdrian Hunter 	tse->cp = cp;
53792a9e4f7SAdrian Hunter 	tse->no_call = no_call;
5382dcde4e1SAdrian Hunter 	tse->trace_end = trace_end;
539f08046cbSAdrian Hunter 	tse->non_call = false;
54092a9e4f7SAdrian Hunter 
54192a9e4f7SAdrian Hunter 	return 0;
54292a9e4f7SAdrian Hunter }
54392a9e4f7SAdrian Hunter 
54492a9e4f7SAdrian Hunter static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
54592a9e4f7SAdrian Hunter 				u64 ret_addr, u64 timestamp, u64 ref,
54692a9e4f7SAdrian Hunter 				struct symbol *sym)
54792a9e4f7SAdrian Hunter {
54892a9e4f7SAdrian Hunter 	int err;
54992a9e4f7SAdrian Hunter 
55092a9e4f7SAdrian Hunter 	if (!ts->cnt)
55192a9e4f7SAdrian Hunter 		return 1;
55292a9e4f7SAdrian Hunter 
55392a9e4f7SAdrian Hunter 	if (ts->cnt == 1) {
55492a9e4f7SAdrian Hunter 		struct thread_stack_entry *tse = &ts->stack[0];
55592a9e4f7SAdrian Hunter 
55692a9e4f7SAdrian Hunter 		if (tse->cp->sym == sym)
55792a9e4f7SAdrian Hunter 			return thread_stack__call_return(thread, ts, --ts->cnt,
55892a9e4f7SAdrian Hunter 							 timestamp, ref, false);
55992a9e4f7SAdrian Hunter 	}
56092a9e4f7SAdrian Hunter 
561f08046cbSAdrian Hunter 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
562f08046cbSAdrian Hunter 	    !ts->stack[ts->cnt - 1].non_call) {
56392a9e4f7SAdrian Hunter 		return thread_stack__call_return(thread, ts, --ts->cnt,
56492a9e4f7SAdrian Hunter 						 timestamp, ref, false);
56592a9e4f7SAdrian Hunter 	} else {
56692a9e4f7SAdrian Hunter 		size_t i = ts->cnt - 1;
56792a9e4f7SAdrian Hunter 
56892a9e4f7SAdrian Hunter 		while (i--) {
569f08046cbSAdrian Hunter 			if (ts->stack[i].ret_addr != ret_addr ||
570f08046cbSAdrian Hunter 			    ts->stack[i].non_call)
57192a9e4f7SAdrian Hunter 				continue;
57292a9e4f7SAdrian Hunter 			i += 1;
57392a9e4f7SAdrian Hunter 			while (ts->cnt > i) {
57492a9e4f7SAdrian Hunter 				err = thread_stack__call_return(thread, ts,
57592a9e4f7SAdrian Hunter 								--ts->cnt,
57692a9e4f7SAdrian Hunter 								timestamp, ref,
57792a9e4f7SAdrian Hunter 								true);
57892a9e4f7SAdrian Hunter 				if (err)
57992a9e4f7SAdrian Hunter 					return err;
58092a9e4f7SAdrian Hunter 			}
58192a9e4f7SAdrian Hunter 			return thread_stack__call_return(thread, ts, --ts->cnt,
58292a9e4f7SAdrian Hunter 							 timestamp, ref, false);
58392a9e4f7SAdrian Hunter 		}
58492a9e4f7SAdrian Hunter 	}
58592a9e4f7SAdrian Hunter 
58692a9e4f7SAdrian Hunter 	return 1;
58792a9e4f7SAdrian Hunter }
58892a9e4f7SAdrian Hunter 
589e0b89511SAdrian Hunter static int thread_stack__bottom(struct thread_stack *ts,
59092a9e4f7SAdrian Hunter 				struct perf_sample *sample,
59192a9e4f7SAdrian Hunter 				struct addr_location *from_al,
59292a9e4f7SAdrian Hunter 				struct addr_location *to_al, u64 ref)
59392a9e4f7SAdrian Hunter {
59492a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
59592a9e4f7SAdrian Hunter 	struct call_path *cp;
59692a9e4f7SAdrian Hunter 	struct symbol *sym;
59792a9e4f7SAdrian Hunter 	u64 ip;
59892a9e4f7SAdrian Hunter 
59992a9e4f7SAdrian Hunter 	if (sample->ip) {
60092a9e4f7SAdrian Hunter 		ip = sample->ip;
60192a9e4f7SAdrian Hunter 		sym = from_al->sym;
60292a9e4f7SAdrian Hunter 	} else if (sample->addr) {
60392a9e4f7SAdrian Hunter 		ip = sample->addr;
60492a9e4f7SAdrian Hunter 		sym = to_al->sym;
60592a9e4f7SAdrian Hunter 	} else {
60692a9e4f7SAdrian Hunter 		return 0;
60792a9e4f7SAdrian Hunter 	}
60892a9e4f7SAdrian Hunter 
60992a9e4f7SAdrian Hunter 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
61092a9e4f7SAdrian Hunter 				ts->kernel_start);
61192a9e4f7SAdrian Hunter 
612e0b89511SAdrian Hunter 	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
6132dcde4e1SAdrian Hunter 				     true, false);
61492a9e4f7SAdrian Hunter }
61592a9e4f7SAdrian Hunter 
61692a9e4f7SAdrian Hunter static int thread_stack__no_call_return(struct thread *thread,
61792a9e4f7SAdrian Hunter 					struct thread_stack *ts,
61892a9e4f7SAdrian Hunter 					struct perf_sample *sample,
61992a9e4f7SAdrian Hunter 					struct addr_location *from_al,
62092a9e4f7SAdrian Hunter 					struct addr_location *to_al, u64 ref)
62192a9e4f7SAdrian Hunter {
62292a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
62390c2cda7SAdrian Hunter 	struct call_path *root = &cpr->call_path;
62490c2cda7SAdrian Hunter 	struct symbol *fsym = from_al->sym;
62590c2cda7SAdrian Hunter 	struct symbol *tsym = to_al->sym;
62692a9e4f7SAdrian Hunter 	struct call_path *cp, *parent;
62792a9e4f7SAdrian Hunter 	u64 ks = ts->kernel_start;
62890c2cda7SAdrian Hunter 	u64 addr = sample->addr;
62990c2cda7SAdrian Hunter 	u64 tm = sample->time;
63090c2cda7SAdrian Hunter 	u64 ip = sample->ip;
63192a9e4f7SAdrian Hunter 	int err;
63292a9e4f7SAdrian Hunter 
63390c2cda7SAdrian Hunter 	if (ip >= ks && addr < ks) {
63492a9e4f7SAdrian Hunter 		/* Return to userspace, so pop all kernel addresses */
63592a9e4f7SAdrian Hunter 		while (thread_stack__in_kernel(ts)) {
63692a9e4f7SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
63790c2cda7SAdrian Hunter 							tm, ref, true);
63892a9e4f7SAdrian Hunter 			if (err)
63992a9e4f7SAdrian Hunter 				return err;
64092a9e4f7SAdrian Hunter 		}
64192a9e4f7SAdrian Hunter 
64292a9e4f7SAdrian Hunter 		/* If the stack is empty, push the userspace address */
64392a9e4f7SAdrian Hunter 		if (!ts->cnt) {
64490c2cda7SAdrian Hunter 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
64590c2cda7SAdrian Hunter 			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
64690c2cda7SAdrian Hunter 						     false);
64792a9e4f7SAdrian Hunter 		}
64890c2cda7SAdrian Hunter 	} else if (thread_stack__in_kernel(ts) && ip < ks) {
64992a9e4f7SAdrian Hunter 		/* Return to userspace, so pop all kernel addresses */
65092a9e4f7SAdrian Hunter 		while (thread_stack__in_kernel(ts)) {
65192a9e4f7SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
65290c2cda7SAdrian Hunter 							tm, ref, true);
65392a9e4f7SAdrian Hunter 			if (err)
65492a9e4f7SAdrian Hunter 				return err;
65592a9e4f7SAdrian Hunter 		}
65692a9e4f7SAdrian Hunter 	}
65792a9e4f7SAdrian Hunter 
65892a9e4f7SAdrian Hunter 	if (ts->cnt)
65992a9e4f7SAdrian Hunter 		parent = ts->stack[ts->cnt - 1].cp;
66092a9e4f7SAdrian Hunter 	else
66190c2cda7SAdrian Hunter 		parent = root;
66292a9e4f7SAdrian Hunter 
6631f35cd65SAdrian Hunter 	if (parent->sym == from_al->sym) {
6641f35cd65SAdrian Hunter 		/*
6651f35cd65SAdrian Hunter 		 * At the bottom of the stack, assume the missing 'call' was
6661f35cd65SAdrian Hunter 		 * before the trace started. So, pop the current symbol and push
6671f35cd65SAdrian Hunter 		 * the 'to' symbol.
6681f35cd65SAdrian Hunter 		 */
6691f35cd65SAdrian Hunter 		if (ts->cnt == 1) {
6701f35cd65SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
6711f35cd65SAdrian Hunter 							tm, ref, false);
6721f35cd65SAdrian Hunter 			if (err)
6731f35cd65SAdrian Hunter 				return err;
6741f35cd65SAdrian Hunter 		}
6751f35cd65SAdrian Hunter 
6761f35cd65SAdrian Hunter 		if (!ts->cnt) {
6771f35cd65SAdrian Hunter 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
6781f35cd65SAdrian Hunter 
6791f35cd65SAdrian Hunter 			return thread_stack__push_cp(ts, addr, tm, ref, cp,
6801f35cd65SAdrian Hunter 						     true, false);
6811f35cd65SAdrian Hunter 		}
6821f35cd65SAdrian Hunter 
6831f35cd65SAdrian Hunter 		/*
6841f35cd65SAdrian Hunter 		 * Otherwise assume the 'return' is being used as a jump (e.g.
6851f35cd65SAdrian Hunter 		 * retpoline) and just push the 'to' symbol.
6861f35cd65SAdrian Hunter 		 */
6871f35cd65SAdrian Hunter 		cp = call_path__findnew(cpr, parent, tsym, addr, ks);
6881f35cd65SAdrian Hunter 
6891f35cd65SAdrian Hunter 		err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
6901f35cd65SAdrian Hunter 		if (!err)
6911f35cd65SAdrian Hunter 			ts->stack[ts->cnt - 1].non_call = true;
6921f35cd65SAdrian Hunter 
6931f35cd65SAdrian Hunter 		return err;
6941f35cd65SAdrian Hunter 	}
6951f35cd65SAdrian Hunter 
6961f35cd65SAdrian Hunter 	/*
6971f35cd65SAdrian Hunter 	 * Assume 'parent' has not yet returned, so push 'to', and then push and
6981f35cd65SAdrian Hunter 	 * pop 'from'.
6991f35cd65SAdrian Hunter 	 */
7001f35cd65SAdrian Hunter 
7011f35cd65SAdrian Hunter 	cp = call_path__findnew(cpr, parent, tsym, addr, ks);
70292a9e4f7SAdrian Hunter 
70390c2cda7SAdrian Hunter 	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
70492a9e4f7SAdrian Hunter 	if (err)
70592a9e4f7SAdrian Hunter 		return err;
70692a9e4f7SAdrian Hunter 
7071f35cd65SAdrian Hunter 	cp = call_path__findnew(cpr, cp, fsym, ip, ks);
7081f35cd65SAdrian Hunter 
7091f35cd65SAdrian Hunter 	err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
7101f35cd65SAdrian Hunter 	if (err)
7111f35cd65SAdrian Hunter 		return err;
7121f35cd65SAdrian Hunter 
7131f35cd65SAdrian Hunter 	return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
71492a9e4f7SAdrian Hunter }
71592a9e4f7SAdrian Hunter 
71692a9e4f7SAdrian Hunter static int thread_stack__trace_begin(struct thread *thread,
71792a9e4f7SAdrian Hunter 				     struct thread_stack *ts, u64 timestamp,
71892a9e4f7SAdrian Hunter 				     u64 ref)
71992a9e4f7SAdrian Hunter {
72092a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
72192a9e4f7SAdrian Hunter 	int err;
72292a9e4f7SAdrian Hunter 
72392a9e4f7SAdrian Hunter 	if (!ts->cnt)
72492a9e4f7SAdrian Hunter 		return 0;
72592a9e4f7SAdrian Hunter 
72692a9e4f7SAdrian Hunter 	/* Pop trace end */
72792a9e4f7SAdrian Hunter 	tse = &ts->stack[ts->cnt - 1];
7282dcde4e1SAdrian Hunter 	if (tse->trace_end) {
72992a9e4f7SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
73092a9e4f7SAdrian Hunter 						timestamp, ref, false);
73192a9e4f7SAdrian Hunter 		if (err)
73292a9e4f7SAdrian Hunter 			return err;
73392a9e4f7SAdrian Hunter 	}
73492a9e4f7SAdrian Hunter 
73592a9e4f7SAdrian Hunter 	return 0;
73692a9e4f7SAdrian Hunter }
73792a9e4f7SAdrian Hunter 
73892a9e4f7SAdrian Hunter static int thread_stack__trace_end(struct thread_stack *ts,
73992a9e4f7SAdrian Hunter 				   struct perf_sample *sample, u64 ref)
74092a9e4f7SAdrian Hunter {
74192a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
74292a9e4f7SAdrian Hunter 	struct call_path *cp;
74392a9e4f7SAdrian Hunter 	u64 ret_addr;
74492a9e4f7SAdrian Hunter 
74592a9e4f7SAdrian Hunter 	/* No point having 'trace end' on the bottom of the stack */
74692a9e4f7SAdrian Hunter 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
74792a9e4f7SAdrian Hunter 		return 0;
74892a9e4f7SAdrian Hunter 
74992a9e4f7SAdrian Hunter 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
75092a9e4f7SAdrian Hunter 				ts->kernel_start);
75192a9e4f7SAdrian Hunter 
75292a9e4f7SAdrian Hunter 	ret_addr = sample->ip + sample->insn_len;
75392a9e4f7SAdrian Hunter 
75492a9e4f7SAdrian Hunter 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
7552dcde4e1SAdrian Hunter 				     false, true);
75692a9e4f7SAdrian Hunter }
75792a9e4f7SAdrian Hunter 
7583c0cd952SAdrian Hunter static bool is_x86_retpoline(const char *name)
7593c0cd952SAdrian Hunter {
7603c0cd952SAdrian Hunter 	const char *p = strstr(name, "__x86_indirect_thunk_");
7613c0cd952SAdrian Hunter 
7623c0cd952SAdrian Hunter 	return p == name || !strcmp(name, "__indirect_thunk_start");
7633c0cd952SAdrian Hunter }
7643c0cd952SAdrian Hunter 
7653c0cd952SAdrian Hunter /*
7663c0cd952SAdrian Hunter  * x86 retpoline functions pollute the call graph. This function removes them.
7673c0cd952SAdrian Hunter  * This does not handle function return thunks, nor is there any improvement
7683c0cd952SAdrian Hunter  * for the handling of inline thunks or extern thunks.
7693c0cd952SAdrian Hunter  */
7703c0cd952SAdrian Hunter static int thread_stack__x86_retpoline(struct thread_stack *ts,
7713c0cd952SAdrian Hunter 				       struct perf_sample *sample,
7723c0cd952SAdrian Hunter 				       struct addr_location *to_al)
7733c0cd952SAdrian Hunter {
7743c0cd952SAdrian Hunter 	struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
7753c0cd952SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
7763c0cd952SAdrian Hunter 	struct symbol *sym = tse->cp->sym;
7773c0cd952SAdrian Hunter 	struct symbol *tsym = to_al->sym;
7783c0cd952SAdrian Hunter 	struct call_path *cp;
7793c0cd952SAdrian Hunter 
7803c0cd952SAdrian Hunter 	if (sym && is_x86_retpoline(sym->name)) {
7813c0cd952SAdrian Hunter 		/*
7823c0cd952SAdrian Hunter 		 * This is a x86 retpoline fn. It pollutes the call graph by
7833c0cd952SAdrian Hunter 		 * showing up everywhere there is an indirect branch, but does
7843c0cd952SAdrian Hunter 		 * not itself mean anything. Here the top-of-stack is removed,
7853c0cd952SAdrian Hunter 		 * by decrementing the stack count, and then further down, the
7863c0cd952SAdrian Hunter 		 * resulting top-of-stack is replaced with the actual target.
7873c0cd952SAdrian Hunter 		 * The result is that the retpoline functions will no longer
7883c0cd952SAdrian Hunter 		 * appear in the call graph. Note this only affects the call
7893c0cd952SAdrian Hunter 		 * graph, since all the original branches are left unchanged.
7903c0cd952SAdrian Hunter 		 */
7913c0cd952SAdrian Hunter 		ts->cnt -= 1;
7923c0cd952SAdrian Hunter 		sym = ts->stack[ts->cnt - 2].cp->sym;
7933c0cd952SAdrian Hunter 		if (sym && sym == tsym && to_al->addr != tsym->start) {
7943c0cd952SAdrian Hunter 			/*
7953c0cd952SAdrian Hunter 			 * Target is back to the middle of the symbol we came
7963c0cd952SAdrian Hunter 			 * from so assume it is an indirect jmp and forget it
7973c0cd952SAdrian Hunter 			 * altogether.
7983c0cd952SAdrian Hunter 			 */
7993c0cd952SAdrian Hunter 			ts->cnt -= 1;
8003c0cd952SAdrian Hunter 			return 0;
8013c0cd952SAdrian Hunter 		}
8023c0cd952SAdrian Hunter 	} else if (sym && sym == tsym) {
8033c0cd952SAdrian Hunter 		/*
8043c0cd952SAdrian Hunter 		 * Target is back to the symbol we came from so assume it is an
8053c0cd952SAdrian Hunter 		 * indirect jmp and forget it altogether.
8063c0cd952SAdrian Hunter 		 */
8073c0cd952SAdrian Hunter 		ts->cnt -= 1;
8083c0cd952SAdrian Hunter 		return 0;
8093c0cd952SAdrian Hunter 	}
8103c0cd952SAdrian Hunter 
8113c0cd952SAdrian Hunter 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
8123c0cd952SAdrian Hunter 				sample->addr, ts->kernel_start);
8133c0cd952SAdrian Hunter 	if (!cp)
8143c0cd952SAdrian Hunter 		return -ENOMEM;
8153c0cd952SAdrian Hunter 
8163c0cd952SAdrian Hunter 	/* Replace the top-of-stack with the actual target */
8173c0cd952SAdrian Hunter 	ts->stack[ts->cnt - 1].cp = cp;
8183c0cd952SAdrian Hunter 
8193c0cd952SAdrian Hunter 	return 0;
8203c0cd952SAdrian Hunter }
8213c0cd952SAdrian Hunter 
82292a9e4f7SAdrian Hunter int thread_stack__process(struct thread *thread, struct comm *comm,
82392a9e4f7SAdrian Hunter 			  struct perf_sample *sample,
82492a9e4f7SAdrian Hunter 			  struct addr_location *from_al,
82592a9e4f7SAdrian Hunter 			  struct addr_location *to_al, u64 ref,
82692a9e4f7SAdrian Hunter 			  struct call_return_processor *crp)
82792a9e4f7SAdrian Hunter {
828256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, sample->cpu);
8293c0cd952SAdrian Hunter 	enum retpoline_state_t rstate;
83092a9e4f7SAdrian Hunter 	int err = 0;
83192a9e4f7SAdrian Hunter 
83203b32cb2SAdrian Hunter 	if (ts && !ts->crp) {
83392a9e4f7SAdrian Hunter 		/* Supersede thread_stack__event() */
834f6060ac6SAdrian Hunter 		thread_stack__reset(thread, ts);
83503b32cb2SAdrian Hunter 		ts = NULL;
83692a9e4f7SAdrian Hunter 	}
83703b32cb2SAdrian Hunter 
83803b32cb2SAdrian Hunter 	if (!ts) {
839256d92bcSAdrian Hunter 		ts = thread_stack__new(thread, sample->cpu, crp);
840bd8e68acSAdrian Hunter 		if (!ts)
84192a9e4f7SAdrian Hunter 			return -ENOMEM;
84292a9e4f7SAdrian Hunter 		ts->comm = comm;
84392a9e4f7SAdrian Hunter 	}
84492a9e4f7SAdrian Hunter 
8453c0cd952SAdrian Hunter 	rstate = ts->rstate;
8463c0cd952SAdrian Hunter 	if (rstate == X86_RETPOLINE_DETECTED)
8473c0cd952SAdrian Hunter 		ts->rstate = X86_RETPOLINE_POSSIBLE;
8483c0cd952SAdrian Hunter 
84992a9e4f7SAdrian Hunter 	/* Flush stack on exec */
85092a9e4f7SAdrian Hunter 	if (ts->comm != comm && thread->pid_ == thread->tid) {
851a5499b37SAdrian Hunter 		err = __thread_stack__flush(thread, ts);
85292a9e4f7SAdrian Hunter 		if (err)
85392a9e4f7SAdrian Hunter 			return err;
85492a9e4f7SAdrian Hunter 		ts->comm = comm;
85592a9e4f7SAdrian Hunter 	}
85692a9e4f7SAdrian Hunter 
85792a9e4f7SAdrian Hunter 	/* If the stack is empty, put the current symbol on the stack */
85892a9e4f7SAdrian Hunter 	if (!ts->cnt) {
859e0b89511SAdrian Hunter 		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
86092a9e4f7SAdrian Hunter 		if (err)
86192a9e4f7SAdrian Hunter 			return err;
86292a9e4f7SAdrian Hunter 	}
86392a9e4f7SAdrian Hunter 
86492a9e4f7SAdrian Hunter 	ts->branch_count += 1;
86592a9e4f7SAdrian Hunter 	ts->last_time = sample->time;
86692a9e4f7SAdrian Hunter 
86792a9e4f7SAdrian Hunter 	if (sample->flags & PERF_IP_FLAG_CALL) {
8682dcde4e1SAdrian Hunter 		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
86992a9e4f7SAdrian Hunter 		struct call_path_root *cpr = ts->crp->cpr;
87092a9e4f7SAdrian Hunter 		struct call_path *cp;
87192a9e4f7SAdrian Hunter 		u64 ret_addr;
87292a9e4f7SAdrian Hunter 
87392a9e4f7SAdrian Hunter 		if (!sample->ip || !sample->addr)
87492a9e4f7SAdrian Hunter 			return 0;
87592a9e4f7SAdrian Hunter 
87692a9e4f7SAdrian Hunter 		ret_addr = sample->ip + sample->insn_len;
87792a9e4f7SAdrian Hunter 		if (ret_addr == sample->addr)
87892a9e4f7SAdrian Hunter 			return 0; /* Zero-length calls are excluded */
87992a9e4f7SAdrian Hunter 
88092a9e4f7SAdrian Hunter 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
88192a9e4f7SAdrian Hunter 					to_al->sym, sample->addr,
88292a9e4f7SAdrian Hunter 					ts->kernel_start);
88392a9e4f7SAdrian Hunter 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
8842dcde4e1SAdrian Hunter 					    cp, false, trace_end);
8853c0cd952SAdrian Hunter 
8863c0cd952SAdrian Hunter 		/*
8873c0cd952SAdrian Hunter 		 * A call to the same symbol but not the start of the symbol,
8883c0cd952SAdrian Hunter 		 * may be the start of a x86 retpoline.
8893c0cd952SAdrian Hunter 		 */
8903c0cd952SAdrian Hunter 		if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
8913c0cd952SAdrian Hunter 		    from_al->sym == to_al->sym &&
8923c0cd952SAdrian Hunter 		    to_al->addr != to_al->sym->start)
8933c0cd952SAdrian Hunter 			ts->rstate = X86_RETPOLINE_DETECTED;
8943c0cd952SAdrian Hunter 
89592a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
89692a9e4f7SAdrian Hunter 		if (!sample->ip || !sample->addr)
89792a9e4f7SAdrian Hunter 			return 0;
89892a9e4f7SAdrian Hunter 
8993c0cd952SAdrian Hunter 		/* x86 retpoline 'return' doesn't match the stack */
9003c0cd952SAdrian Hunter 		if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
9013c0cd952SAdrian Hunter 		    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
9023c0cd952SAdrian Hunter 			return thread_stack__x86_retpoline(ts, sample, to_al);
9033c0cd952SAdrian Hunter 
90492a9e4f7SAdrian Hunter 		err = thread_stack__pop_cp(thread, ts, sample->addr,
90592a9e4f7SAdrian Hunter 					   sample->time, ref, from_al->sym);
90692a9e4f7SAdrian Hunter 		if (err) {
90792a9e4f7SAdrian Hunter 			if (err < 0)
90892a9e4f7SAdrian Hunter 				return err;
90992a9e4f7SAdrian Hunter 			err = thread_stack__no_call_return(thread, ts, sample,
91092a9e4f7SAdrian Hunter 							   from_al, to_al, ref);
91192a9e4f7SAdrian Hunter 		}
91292a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
91392a9e4f7SAdrian Hunter 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
91492a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
91592a9e4f7SAdrian Hunter 		err = thread_stack__trace_end(ts, sample, ref);
916f08046cbSAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
917f08046cbSAdrian Hunter 		   from_al->sym != to_al->sym && to_al->sym &&
918f08046cbSAdrian Hunter 		   to_al->addr == to_al->sym->start) {
919f08046cbSAdrian Hunter 		struct call_path_root *cpr = ts->crp->cpr;
920f08046cbSAdrian Hunter 		struct call_path *cp;
921f08046cbSAdrian Hunter 
922f08046cbSAdrian Hunter 		/*
923f08046cbSAdrian Hunter 		 * The compiler might optimize a call/ret combination by making
924f08046cbSAdrian Hunter 		 * it a jmp. Make that visible by recording on the stack a
925f08046cbSAdrian Hunter 		 * branch to the start of a different symbol. Note, that means
926f08046cbSAdrian Hunter 		 * when a ret pops the stack, all jmps must be popped off first.
927f08046cbSAdrian Hunter 		 */
928f08046cbSAdrian Hunter 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
929f08046cbSAdrian Hunter 					to_al->sym, sample->addr,
930f08046cbSAdrian Hunter 					ts->kernel_start);
931f08046cbSAdrian Hunter 		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
932f08046cbSAdrian Hunter 					    false);
933f08046cbSAdrian Hunter 		if (!err)
934f08046cbSAdrian Hunter 			ts->stack[ts->cnt - 1].non_call = true;
93592a9e4f7SAdrian Hunter 	}
93692a9e4f7SAdrian Hunter 
93792a9e4f7SAdrian Hunter 	return err;
93892a9e4f7SAdrian Hunter }
939e216708dSAdrian Hunter 
940256d92bcSAdrian Hunter size_t thread_stack__depth(struct thread *thread, int cpu)
941e216708dSAdrian Hunter {
942256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
943bd8e68acSAdrian Hunter 
944bd8e68acSAdrian Hunter 	if (!ts)
945e216708dSAdrian Hunter 		return 0;
946bd8e68acSAdrian Hunter 	return ts->cnt;
947e216708dSAdrian Hunter }
948