xref: /openbmc/linux/tools/perf/util/thread-stack.c (revision 833fd800)
12025cf9eSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
200447ccdSAdrian Hunter /*
300447ccdSAdrian Hunter  * thread-stack.c: Synthesize a thread's stack using call / return events
400447ccdSAdrian Hunter  * Copyright (c) 2014, Intel Corporation.
500447ccdSAdrian Hunter  */
600447ccdSAdrian Hunter 
792a9e4f7SAdrian Hunter #include <linux/rbtree.h>
892a9e4f7SAdrian Hunter #include <linux/list.h>
9256d92bcSAdrian Hunter #include <linux/log2.h>
107f7c536fSArnaldo Carvalho de Melo #include <linux/zalloc.h>
11a43783aeSArnaldo Carvalho de Melo #include <errno.h>
12215a0d30SArnaldo Carvalho de Melo #include <stdlib.h>
138520a98dSArnaldo Carvalho de Melo #include <string.h>
1400447ccdSAdrian Hunter #include "thread.h"
1500447ccdSAdrian Hunter #include "event.h"
1692a9e4f7SAdrian Hunter #include "machine.h"
173c0cd952SAdrian Hunter #include "env.h"
1800447ccdSAdrian Hunter #include "debug.h"
1992a9e4f7SAdrian Hunter #include "symbol.h"
2092a9e4f7SAdrian Hunter #include "comm.h"
21451db126SChris Phlipot #include "call-path.h"
2200447ccdSAdrian Hunter #include "thread-stack.h"
2300447ccdSAdrian Hunter 
2492a9e4f7SAdrian Hunter #define STACK_GROWTH 2048
2592a9e4f7SAdrian Hunter 
263c0cd952SAdrian Hunter /*
273c0cd952SAdrian Hunter  * State of retpoline detection.
283c0cd952SAdrian Hunter  *
293c0cd952SAdrian Hunter  * RETPOLINE_NONE: no retpoline detection
303c0cd952SAdrian Hunter  * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
313c0cd952SAdrian Hunter  * X86_RETPOLINE_DETECTED: x86 retpoline detected
323c0cd952SAdrian Hunter  */
333c0cd952SAdrian Hunter enum retpoline_state_t {
343c0cd952SAdrian Hunter 	RETPOLINE_NONE,
353c0cd952SAdrian Hunter 	X86_RETPOLINE_POSSIBLE,
363c0cd952SAdrian Hunter 	X86_RETPOLINE_DETECTED,
373c0cd952SAdrian Hunter };
383c0cd952SAdrian Hunter 
3992a9e4f7SAdrian Hunter /**
4092a9e4f7SAdrian Hunter  * struct thread_stack_entry - thread stack entry.
4192a9e4f7SAdrian Hunter  * @ret_addr: return address
4292a9e4f7SAdrian Hunter  * @timestamp: timestamp (if known)
4392a9e4f7SAdrian Hunter  * @ref: external reference (e.g. db_id of sample)
4492a9e4f7SAdrian Hunter  * @branch_count: the branch count when the entry was created
45003ccdc7SAdrian Hunter  * @insn_count: the instruction count when the entry was created
46003ccdc7SAdrian Hunter  * @cyc_count the cycle count when the entry was created
47f435887eSAdrian Hunter  * @db_id: id used for db-export
4892a9e4f7SAdrian Hunter  * @cp: call path
4992a9e4f7SAdrian Hunter  * @no_call: a 'call' was not seen
504d60e5e3SAdrian Hunter  * @trace_end: a 'call' but trace ended
51f08046cbSAdrian Hunter  * @non_call: a branch but not a 'call' to the start of a different symbol
5292a9e4f7SAdrian Hunter  */
5392a9e4f7SAdrian Hunter struct thread_stack_entry {
5492a9e4f7SAdrian Hunter 	u64 ret_addr;
5592a9e4f7SAdrian Hunter 	u64 timestamp;
5692a9e4f7SAdrian Hunter 	u64 ref;
5792a9e4f7SAdrian Hunter 	u64 branch_count;
58003ccdc7SAdrian Hunter 	u64 insn_count;
59003ccdc7SAdrian Hunter 	u64 cyc_count;
60f435887eSAdrian Hunter 	u64 db_id;
6192a9e4f7SAdrian Hunter 	struct call_path *cp;
6292a9e4f7SAdrian Hunter 	bool no_call;
634d60e5e3SAdrian Hunter 	bool trace_end;
64f08046cbSAdrian Hunter 	bool non_call;
6592a9e4f7SAdrian Hunter };
6692a9e4f7SAdrian Hunter 
6792a9e4f7SAdrian Hunter /**
6892a9e4f7SAdrian Hunter  * struct thread_stack - thread stack constructed from 'call' and 'return'
6992a9e4f7SAdrian Hunter  *                       branch samples.
7092a9e4f7SAdrian Hunter  * @stack: array that holds the stack
7192a9e4f7SAdrian Hunter  * @cnt: number of entries in the stack
7292a9e4f7SAdrian Hunter  * @sz: current maximum stack size
7392a9e4f7SAdrian Hunter  * @trace_nr: current trace number
7492a9e4f7SAdrian Hunter  * @branch_count: running branch count
75003ccdc7SAdrian Hunter  * @insn_count: running  instruction count
76003ccdc7SAdrian Hunter  * @cyc_count running  cycle count
7792a9e4f7SAdrian Hunter  * @kernel_start: kernel start address
7892a9e4f7SAdrian Hunter  * @last_time: last timestamp
7992a9e4f7SAdrian Hunter  * @crp: call/return processor
8092a9e4f7SAdrian Hunter  * @comm: current comm
81f6060ac6SAdrian Hunter  * @arr_sz: size of array if this is the first element of an array
823c0cd952SAdrian Hunter  * @rstate: used to detect retpolines
8386d67180SAdrian Hunter  * @br_stack_rb: branch stack (ring buffer)
8486d67180SAdrian Hunter  * @br_stack_sz: maximum branch stack size
8586d67180SAdrian Hunter  * @br_stack_pos: current position in @br_stack_rb
8686d67180SAdrian Hunter  * @mispred_all: mark all branches as mispredicted
8792a9e4f7SAdrian Hunter  */
8800447ccdSAdrian Hunter struct thread_stack {
8900447ccdSAdrian Hunter 	struct thread_stack_entry *stack;
9000447ccdSAdrian Hunter 	size_t cnt;
9100447ccdSAdrian Hunter 	size_t sz;
9200447ccdSAdrian Hunter 	u64 trace_nr;
9392a9e4f7SAdrian Hunter 	u64 branch_count;
94003ccdc7SAdrian Hunter 	u64 insn_count;
95003ccdc7SAdrian Hunter 	u64 cyc_count;
9692a9e4f7SAdrian Hunter 	u64 kernel_start;
9792a9e4f7SAdrian Hunter 	u64 last_time;
9892a9e4f7SAdrian Hunter 	struct call_return_processor *crp;
9992a9e4f7SAdrian Hunter 	struct comm *comm;
100f6060ac6SAdrian Hunter 	unsigned int arr_sz;
1013c0cd952SAdrian Hunter 	enum retpoline_state_t rstate;
10286d67180SAdrian Hunter 	struct branch_stack *br_stack_rb;
10386d67180SAdrian Hunter 	unsigned int br_stack_sz;
10486d67180SAdrian Hunter 	unsigned int br_stack_pos;
10586d67180SAdrian Hunter 	bool mispred_all;
10600447ccdSAdrian Hunter };
10700447ccdSAdrian Hunter 
108256d92bcSAdrian Hunter /*
109256d92bcSAdrian Hunter  * Assume pid == tid == 0 identifies the idle task as defined by
110256d92bcSAdrian Hunter  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
111256d92bcSAdrian Hunter  * and therefore requires a stack for each cpu.
112256d92bcSAdrian Hunter  */
thread_stack__per_cpu(struct thread * thread)113256d92bcSAdrian Hunter static inline bool thread_stack__per_cpu(struct thread *thread)
114256d92bcSAdrian Hunter {
115ee84a303SIan Rogers 	return !(thread__tid(thread) || thread__pid(thread));
116256d92bcSAdrian Hunter }
117256d92bcSAdrian Hunter 
thread_stack__grow(struct thread_stack * ts)11800447ccdSAdrian Hunter static int thread_stack__grow(struct thread_stack *ts)
11900447ccdSAdrian Hunter {
12000447ccdSAdrian Hunter 	struct thread_stack_entry *new_stack;
12100447ccdSAdrian Hunter 	size_t sz, new_sz;
12200447ccdSAdrian Hunter 
12300447ccdSAdrian Hunter 	new_sz = ts->sz + STACK_GROWTH;
12400447ccdSAdrian Hunter 	sz = new_sz * sizeof(struct thread_stack_entry);
12500447ccdSAdrian Hunter 
12600447ccdSAdrian Hunter 	new_stack = realloc(ts->stack, sz);
12700447ccdSAdrian Hunter 	if (!new_stack)
12800447ccdSAdrian Hunter 		return -ENOMEM;
12900447ccdSAdrian Hunter 
13000447ccdSAdrian Hunter 	ts->stack = new_stack;
13100447ccdSAdrian Hunter 	ts->sz = new_sz;
13200447ccdSAdrian Hunter 
13300447ccdSAdrian Hunter 	return 0;
13400447ccdSAdrian Hunter }
13500447ccdSAdrian Hunter 
thread_stack__init(struct thread_stack * ts,struct thread * thread,struct call_return_processor * crp,bool callstack,unsigned int br_stack_sz)1362e9e8688SAdrian Hunter static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
13786d67180SAdrian Hunter 			      struct call_return_processor *crp,
13886d67180SAdrian Hunter 			      bool callstack, unsigned int br_stack_sz)
1392e9e8688SAdrian Hunter {
1402e9e8688SAdrian Hunter 	int err;
1412e9e8688SAdrian Hunter 
14286d67180SAdrian Hunter 	if (callstack) {
1432e9e8688SAdrian Hunter 		err = thread_stack__grow(ts);
1442e9e8688SAdrian Hunter 		if (err)
1452e9e8688SAdrian Hunter 			return err;
14686d67180SAdrian Hunter 	}
14786d67180SAdrian Hunter 
14886d67180SAdrian Hunter 	if (br_stack_sz) {
14986d67180SAdrian Hunter 		size_t sz = sizeof(struct branch_stack);
15086d67180SAdrian Hunter 
15186d67180SAdrian Hunter 		sz += br_stack_sz * sizeof(struct branch_entry);
15286d67180SAdrian Hunter 		ts->br_stack_rb = zalloc(sz);
15386d67180SAdrian Hunter 		if (!ts->br_stack_rb)
15486d67180SAdrian Hunter 			return -ENOMEM;
15586d67180SAdrian Hunter 		ts->br_stack_sz = br_stack_sz;
15686d67180SAdrian Hunter 	}
1572e9e8688SAdrian Hunter 
158ee84a303SIan Rogers 	if (thread__maps(thread) && maps__machine(thread__maps(thread))) {
159ee84a303SIan Rogers 		struct machine *machine = maps__machine(thread__maps(thread));
1603c0cd952SAdrian Hunter 		const char *arch = perf_env__arch(machine->env);
1613c0cd952SAdrian Hunter 
1623c0cd952SAdrian Hunter 		ts->kernel_start = machine__kernel_start(machine);
1633c0cd952SAdrian Hunter 		if (!strcmp(arch, "x86"))
1643c0cd952SAdrian Hunter 			ts->rstate = X86_RETPOLINE_POSSIBLE;
1653c0cd952SAdrian Hunter 	} else {
1662e9e8688SAdrian Hunter 		ts->kernel_start = 1ULL << 63;
1673c0cd952SAdrian Hunter 	}
1682e9e8688SAdrian Hunter 	ts->crp = crp;
1692e9e8688SAdrian Hunter 
1702e9e8688SAdrian Hunter 	return 0;
1712e9e8688SAdrian Hunter }
1722e9e8688SAdrian Hunter 
thread_stack__new(struct thread * thread,int cpu,struct call_return_processor * crp,bool callstack,unsigned int br_stack_sz)173256d92bcSAdrian Hunter static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
17486d67180SAdrian Hunter 					      struct call_return_processor *crp,
17586d67180SAdrian Hunter 					      bool callstack,
17686d67180SAdrian Hunter 					      unsigned int br_stack_sz)
17700447ccdSAdrian Hunter {
178ee84a303SIan Rogers 	struct thread_stack *ts = thread__ts(thread), *new_ts;
179139f42f3SAdrian Hunter 	unsigned int old_sz = ts ? ts->arr_sz : 0;
180139f42f3SAdrian Hunter 	unsigned int new_sz = 1;
18100447ccdSAdrian Hunter 
182256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread) && cpu > 0)
183256d92bcSAdrian Hunter 		new_sz = roundup_pow_of_two(cpu + 1);
184256d92bcSAdrian Hunter 
185139f42f3SAdrian Hunter 	if (!ts || new_sz > old_sz) {
186139f42f3SAdrian Hunter 		new_ts = calloc(new_sz, sizeof(*ts));
187139f42f3SAdrian Hunter 		if (!new_ts)
18800447ccdSAdrian Hunter 			return NULL;
189139f42f3SAdrian Hunter 		if (ts)
190139f42f3SAdrian Hunter 			memcpy(new_ts, ts, old_sz * sizeof(*ts));
191139f42f3SAdrian Hunter 		new_ts->arr_sz = new_sz;
192ee84a303SIan Rogers 		free(thread__ts(thread));
193ee84a303SIan Rogers 		thread__set_ts(thread, new_ts);
194139f42f3SAdrian Hunter 		ts = new_ts;
19500447ccdSAdrian Hunter 	}
19600447ccdSAdrian Hunter 
197256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread) && cpu > 0 &&
198256d92bcSAdrian Hunter 	    (unsigned int)cpu < ts->arr_sz)
199256d92bcSAdrian Hunter 		ts += cpu;
200256d92bcSAdrian Hunter 
201139f42f3SAdrian Hunter 	if (!ts->stack &&
20286d67180SAdrian Hunter 	    thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
203139f42f3SAdrian Hunter 		return NULL;
204bd8e68acSAdrian Hunter 
20500447ccdSAdrian Hunter 	return ts;
20600447ccdSAdrian Hunter }
20700447ccdSAdrian Hunter 
thread__cpu_stack(struct thread * thread,int cpu)208256d92bcSAdrian Hunter static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
209bd8e68acSAdrian Hunter {
210ee84a303SIan Rogers 	struct thread_stack *ts = thread__ts(thread);
211256d92bcSAdrian Hunter 
212256d92bcSAdrian Hunter 	if (cpu < 0)
213256d92bcSAdrian Hunter 		cpu = 0;
214256d92bcSAdrian Hunter 
215256d92bcSAdrian Hunter 	if (!ts || (unsigned int)cpu >= ts->arr_sz)
216256d92bcSAdrian Hunter 		return NULL;
217256d92bcSAdrian Hunter 
218256d92bcSAdrian Hunter 	ts += cpu;
219256d92bcSAdrian Hunter 
220256d92bcSAdrian Hunter 	if (!ts->stack)
221256d92bcSAdrian Hunter 		return NULL;
222256d92bcSAdrian Hunter 
223256d92bcSAdrian Hunter 	return ts;
224256d92bcSAdrian Hunter }
225256d92bcSAdrian Hunter 
thread__stack(struct thread * thread,int cpu)226256d92bcSAdrian Hunter static inline struct thread_stack *thread__stack(struct thread *thread,
227256d92bcSAdrian Hunter 						    int cpu)
228256d92bcSAdrian Hunter {
229256d92bcSAdrian Hunter 	if (!thread)
230256d92bcSAdrian Hunter 		return NULL;
231256d92bcSAdrian Hunter 
232256d92bcSAdrian Hunter 	if (thread_stack__per_cpu(thread))
233256d92bcSAdrian Hunter 		return thread__cpu_stack(thread, cpu);
234256d92bcSAdrian Hunter 
235ee84a303SIan Rogers 	return thread__ts(thread);
236bd8e68acSAdrian Hunter }
237bd8e68acSAdrian Hunter 
thread_stack__push(struct thread_stack * ts,u64 ret_addr,bool trace_end)2384d60e5e3SAdrian Hunter static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
2394d60e5e3SAdrian Hunter 			      bool trace_end)
24000447ccdSAdrian Hunter {
24100447ccdSAdrian Hunter 	int err = 0;
24200447ccdSAdrian Hunter 
24300447ccdSAdrian Hunter 	if (ts->cnt == ts->sz) {
24400447ccdSAdrian Hunter 		err = thread_stack__grow(ts);
24500447ccdSAdrian Hunter 		if (err) {
24600447ccdSAdrian Hunter 			pr_warning("Out of memory: discarding thread stack\n");
24700447ccdSAdrian Hunter 			ts->cnt = 0;
24800447ccdSAdrian Hunter 		}
24900447ccdSAdrian Hunter 	}
25000447ccdSAdrian Hunter 
2514d60e5e3SAdrian Hunter 	ts->stack[ts->cnt].trace_end = trace_end;
25200447ccdSAdrian Hunter 	ts->stack[ts->cnt++].ret_addr = ret_addr;
25300447ccdSAdrian Hunter 
25400447ccdSAdrian Hunter 	return err;
25500447ccdSAdrian Hunter }
25600447ccdSAdrian Hunter 
thread_stack__pop(struct thread_stack * ts,u64 ret_addr)25700447ccdSAdrian Hunter static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
25800447ccdSAdrian Hunter {
25900447ccdSAdrian Hunter 	size_t i;
26000447ccdSAdrian Hunter 
26100447ccdSAdrian Hunter 	/*
26200447ccdSAdrian Hunter 	 * In some cases there may be functions which are not seen to return.
26300447ccdSAdrian Hunter 	 * For example when setjmp / longjmp has been used.  Or the perf context
26400447ccdSAdrian Hunter 	 * switch in the kernel which doesn't stop and start tracing in exactly
26500447ccdSAdrian Hunter 	 * the same code path.  When that happens the return address will be
26600447ccdSAdrian Hunter 	 * further down the stack.  If the return address is not found at all,
26700447ccdSAdrian Hunter 	 * we assume the opposite (i.e. this is a return for a call that wasn't
26800447ccdSAdrian Hunter 	 * seen for some reason) and leave the stack alone.
26900447ccdSAdrian Hunter 	 */
27000447ccdSAdrian Hunter 	for (i = ts->cnt; i; ) {
27100447ccdSAdrian Hunter 		if (ts->stack[--i].ret_addr == ret_addr) {
27200447ccdSAdrian Hunter 			ts->cnt = i;
27300447ccdSAdrian Hunter 			return;
27400447ccdSAdrian Hunter 		}
27500447ccdSAdrian Hunter 	}
27600447ccdSAdrian Hunter }
27700447ccdSAdrian Hunter 
thread_stack__pop_trace_end(struct thread_stack * ts)2784d60e5e3SAdrian Hunter static void thread_stack__pop_trace_end(struct thread_stack *ts)
2794d60e5e3SAdrian Hunter {
2804d60e5e3SAdrian Hunter 	size_t i;
2814d60e5e3SAdrian Hunter 
2824d60e5e3SAdrian Hunter 	for (i = ts->cnt; i; ) {
2834d60e5e3SAdrian Hunter 		if (ts->stack[--i].trace_end)
2844d60e5e3SAdrian Hunter 			ts->cnt = i;
2854d60e5e3SAdrian Hunter 		else
2864d60e5e3SAdrian Hunter 			return;
2874d60e5e3SAdrian Hunter 	}
2884d60e5e3SAdrian Hunter }
2894d60e5e3SAdrian Hunter 
thread_stack__in_kernel(struct thread_stack * ts)29092a9e4f7SAdrian Hunter static bool thread_stack__in_kernel(struct thread_stack *ts)
29192a9e4f7SAdrian Hunter {
29292a9e4f7SAdrian Hunter 	if (!ts->cnt)
29392a9e4f7SAdrian Hunter 		return false;
29492a9e4f7SAdrian Hunter 
29592a9e4f7SAdrian Hunter 	return ts->stack[ts->cnt - 1].cp->in_kernel;
29692a9e4f7SAdrian Hunter }
29792a9e4f7SAdrian Hunter 
thread_stack__call_return(struct thread * thread,struct thread_stack * ts,size_t idx,u64 timestamp,u64 ref,bool no_return)29892a9e4f7SAdrian Hunter static int thread_stack__call_return(struct thread *thread,
29992a9e4f7SAdrian Hunter 				     struct thread_stack *ts, size_t idx,
30092a9e4f7SAdrian Hunter 				     u64 timestamp, u64 ref, bool no_return)
30192a9e4f7SAdrian Hunter {
30292a9e4f7SAdrian Hunter 	struct call_return_processor *crp = ts->crp;
30392a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
30492a9e4f7SAdrian Hunter 	struct call_return cr = {
30592a9e4f7SAdrian Hunter 		.thread = thread,
30692a9e4f7SAdrian Hunter 		.comm = ts->comm,
30792a9e4f7SAdrian Hunter 		.db_id = 0,
30892a9e4f7SAdrian Hunter 	};
309f435887eSAdrian Hunter 	u64 *parent_db_id;
31092a9e4f7SAdrian Hunter 
31192a9e4f7SAdrian Hunter 	tse = &ts->stack[idx];
31292a9e4f7SAdrian Hunter 	cr.cp = tse->cp;
31392a9e4f7SAdrian Hunter 	cr.call_time = tse->timestamp;
31492a9e4f7SAdrian Hunter 	cr.return_time = timestamp;
31592a9e4f7SAdrian Hunter 	cr.branch_count = ts->branch_count - tse->branch_count;
316003ccdc7SAdrian Hunter 	cr.insn_count = ts->insn_count - tse->insn_count;
317003ccdc7SAdrian Hunter 	cr.cyc_count = ts->cyc_count - tse->cyc_count;
318f435887eSAdrian Hunter 	cr.db_id = tse->db_id;
31992a9e4f7SAdrian Hunter 	cr.call_ref = tse->ref;
32092a9e4f7SAdrian Hunter 	cr.return_ref = ref;
32192a9e4f7SAdrian Hunter 	if (tse->no_call)
32292a9e4f7SAdrian Hunter 		cr.flags |= CALL_RETURN_NO_CALL;
32392a9e4f7SAdrian Hunter 	if (no_return)
32492a9e4f7SAdrian Hunter 		cr.flags |= CALL_RETURN_NO_RETURN;
325f08046cbSAdrian Hunter 	if (tse->non_call)
326f08046cbSAdrian Hunter 		cr.flags |= CALL_RETURN_NON_CALL;
32792a9e4f7SAdrian Hunter 
328f435887eSAdrian Hunter 	/*
329f435887eSAdrian Hunter 	 * The parent db_id must be assigned before exporting the child. Note
330f435887eSAdrian Hunter 	 * it is not possible to export the parent first because its information
331f435887eSAdrian Hunter 	 * is not yet complete because its 'return' has not yet been processed.
332f435887eSAdrian Hunter 	 */
333f435887eSAdrian Hunter 	parent_db_id = idx ? &(tse - 1)->db_id : NULL;
334f435887eSAdrian Hunter 
335f435887eSAdrian Hunter 	return crp->process(&cr, parent_db_id, crp->data);
33692a9e4f7SAdrian Hunter }
33792a9e4f7SAdrian Hunter 
__thread_stack__flush(struct thread * thread,struct thread_stack * ts)338a5499b37SAdrian Hunter static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
33992a9e4f7SAdrian Hunter {
34092a9e4f7SAdrian Hunter 	struct call_return_processor *crp = ts->crp;
34192a9e4f7SAdrian Hunter 	int err;
34292a9e4f7SAdrian Hunter 
34392a9e4f7SAdrian Hunter 	if (!crp) {
34492a9e4f7SAdrian Hunter 		ts->cnt = 0;
34586d67180SAdrian Hunter 		ts->br_stack_pos = 0;
34686d67180SAdrian Hunter 		if (ts->br_stack_rb)
34786d67180SAdrian Hunter 			ts->br_stack_rb->nr = 0;
34892a9e4f7SAdrian Hunter 		return 0;
34992a9e4f7SAdrian Hunter 	}
35092a9e4f7SAdrian Hunter 
35192a9e4f7SAdrian Hunter 	while (ts->cnt) {
35292a9e4f7SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
35392a9e4f7SAdrian Hunter 						ts->last_time, 0, true);
35492a9e4f7SAdrian Hunter 		if (err) {
35592a9e4f7SAdrian Hunter 			pr_err("Error flushing thread stack!\n");
35692a9e4f7SAdrian Hunter 			ts->cnt = 0;
35792a9e4f7SAdrian Hunter 			return err;
35892a9e4f7SAdrian Hunter 		}
35992a9e4f7SAdrian Hunter 	}
36092a9e4f7SAdrian Hunter 
36192a9e4f7SAdrian Hunter 	return 0;
36292a9e4f7SAdrian Hunter }
36392a9e4f7SAdrian Hunter 
thread_stack__flush(struct thread * thread)364a5499b37SAdrian Hunter int thread_stack__flush(struct thread *thread)
365a5499b37SAdrian Hunter {
366ee84a303SIan Rogers 	struct thread_stack *ts = thread__ts(thread);
367f6060ac6SAdrian Hunter 	unsigned int pos;
368f6060ac6SAdrian Hunter 	int err = 0;
369bd8e68acSAdrian Hunter 
370f6060ac6SAdrian Hunter 	if (ts) {
371f6060ac6SAdrian Hunter 		for (pos = 0; pos < ts->arr_sz; pos++) {
372f6060ac6SAdrian Hunter 			int ret = __thread_stack__flush(thread, ts + pos);
373a5499b37SAdrian Hunter 
374f6060ac6SAdrian Hunter 			if (ret)
375f6060ac6SAdrian Hunter 				err = ret;
376f6060ac6SAdrian Hunter 		}
377f6060ac6SAdrian Hunter 	}
378f6060ac6SAdrian Hunter 
379f6060ac6SAdrian Hunter 	return err;
380a5499b37SAdrian Hunter }
381a5499b37SAdrian Hunter 
thread_stack__update_br_stack(struct thread_stack * ts,u32 flags,u64 from_ip,u64 to_ip)38286d67180SAdrian Hunter static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
38386d67180SAdrian Hunter 					  u64 from_ip, u64 to_ip)
38486d67180SAdrian Hunter {
38586d67180SAdrian Hunter 	struct branch_stack *bs = ts->br_stack_rb;
38686d67180SAdrian Hunter 	struct branch_entry *be;
38786d67180SAdrian Hunter 
38886d67180SAdrian Hunter 	if (!ts->br_stack_pos)
38986d67180SAdrian Hunter 		ts->br_stack_pos = ts->br_stack_sz;
39086d67180SAdrian Hunter 
39186d67180SAdrian Hunter 	ts->br_stack_pos -= 1;
39286d67180SAdrian Hunter 
39386d67180SAdrian Hunter 	be              = &bs->entries[ts->br_stack_pos];
39486d67180SAdrian Hunter 	be->from        = from_ip;
39586d67180SAdrian Hunter 	be->to          = to_ip;
39686d67180SAdrian Hunter 	be->flags.value = 0;
39786d67180SAdrian Hunter 	be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
39886d67180SAdrian Hunter 	be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
39986d67180SAdrian Hunter 	/* No support for mispredict */
40086d67180SAdrian Hunter 	be->flags.mispred = ts->mispred_all;
40186d67180SAdrian Hunter 
40286d67180SAdrian Hunter 	if (bs->nr < ts->br_stack_sz)
40386d67180SAdrian Hunter 		bs->nr += 1;
40486d67180SAdrian Hunter }
40586d67180SAdrian Hunter 
thread_stack__event(struct thread * thread,int cpu,u32 flags,u64 from_ip,u64 to_ip,u16 insn_len,u64 trace_nr,bool callstack,unsigned int br_stack_sz,bool mispred_all)406256d92bcSAdrian Hunter int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
40786d67180SAdrian Hunter 			u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
40886d67180SAdrian Hunter 			unsigned int br_stack_sz, bool mispred_all)
40900447ccdSAdrian Hunter {
410256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
411bd8e68acSAdrian Hunter 
41200447ccdSAdrian Hunter 	if (!thread)
41300447ccdSAdrian Hunter 		return -EINVAL;
41400447ccdSAdrian Hunter 
415bd8e68acSAdrian Hunter 	if (!ts) {
41686d67180SAdrian Hunter 		ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
417bd8e68acSAdrian Hunter 		if (!ts) {
41800447ccdSAdrian Hunter 			pr_warning("Out of memory: no thread stack\n");
41900447ccdSAdrian Hunter 			return -ENOMEM;
42000447ccdSAdrian Hunter 		}
421bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
42286d67180SAdrian Hunter 		ts->mispred_all = mispred_all;
42300447ccdSAdrian Hunter 	}
42400447ccdSAdrian Hunter 
42500447ccdSAdrian Hunter 	/*
42600447ccdSAdrian Hunter 	 * When the trace is discontinuous, the trace_nr changes.  In that case
42700447ccdSAdrian Hunter 	 * the stack might be completely invalid.  Better to report nothing than
42892a9e4f7SAdrian Hunter 	 * to report something misleading, so flush the stack.
42900447ccdSAdrian Hunter 	 */
430bd8e68acSAdrian Hunter 	if (trace_nr != ts->trace_nr) {
431bd8e68acSAdrian Hunter 		if (ts->trace_nr)
432bd8e68acSAdrian Hunter 			__thread_stack__flush(thread, ts);
433bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
43400447ccdSAdrian Hunter 	}
43500447ccdSAdrian Hunter 
43686d67180SAdrian Hunter 	if (br_stack_sz)
43786d67180SAdrian Hunter 		thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
43886d67180SAdrian Hunter 
43986d67180SAdrian Hunter 	/*
44086d67180SAdrian Hunter 	 * Stop here if thread_stack__process() is in use, or not recording call
44186d67180SAdrian Hunter 	 * stack.
44286d67180SAdrian Hunter 	 */
44386d67180SAdrian Hunter 	if (ts->crp || !callstack)
44492a9e4f7SAdrian Hunter 		return 0;
44592a9e4f7SAdrian Hunter 
44600447ccdSAdrian Hunter 	if (flags & PERF_IP_FLAG_CALL) {
44700447ccdSAdrian Hunter 		u64 ret_addr;
44800447ccdSAdrian Hunter 
44900447ccdSAdrian Hunter 		if (!to_ip)
45000447ccdSAdrian Hunter 			return 0;
45100447ccdSAdrian Hunter 		ret_addr = from_ip + insn_len;
45200447ccdSAdrian Hunter 		if (ret_addr == to_ip)
45300447ccdSAdrian Hunter 			return 0; /* Zero-length calls are excluded */
454bd8e68acSAdrian Hunter 		return thread_stack__push(ts, ret_addr,
4554d60e5e3SAdrian Hunter 					  flags & PERF_IP_FLAG_TRACE_END);
4564d60e5e3SAdrian Hunter 	} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
4574d60e5e3SAdrian Hunter 		/*
4584d60e5e3SAdrian Hunter 		 * If the caller did not change the trace number (which would
4594d60e5e3SAdrian Hunter 		 * have flushed the stack) then try to make sense of the stack.
4604d60e5e3SAdrian Hunter 		 * Possibly, tracing began after returning to the current
4614d60e5e3SAdrian Hunter 		 * address, so try to pop that. Also, do not expect a call made
4624d60e5e3SAdrian Hunter 		 * when the trace ended, to return, so pop that.
4634d60e5e3SAdrian Hunter 		 */
464bd8e68acSAdrian Hunter 		thread_stack__pop(ts, to_ip);
465bd8e68acSAdrian Hunter 		thread_stack__pop_trace_end(ts);
4664d60e5e3SAdrian Hunter 	} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
467bd8e68acSAdrian Hunter 		thread_stack__pop(ts, to_ip);
46800447ccdSAdrian Hunter 	}
46900447ccdSAdrian Hunter 
47000447ccdSAdrian Hunter 	return 0;
47100447ccdSAdrian Hunter }
47200447ccdSAdrian Hunter 
thread_stack__set_trace_nr(struct thread * thread,int cpu,u64 trace_nr)473256d92bcSAdrian Hunter void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
47492a9e4f7SAdrian Hunter {
475256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
476bd8e68acSAdrian Hunter 
477bd8e68acSAdrian Hunter 	if (!ts)
47892a9e4f7SAdrian Hunter 		return;
47992a9e4f7SAdrian Hunter 
480bd8e68acSAdrian Hunter 	if (trace_nr != ts->trace_nr) {
481bd8e68acSAdrian Hunter 		if (ts->trace_nr)
482bd8e68acSAdrian Hunter 			__thread_stack__flush(thread, ts);
483bd8e68acSAdrian Hunter 		ts->trace_nr = trace_nr;
48492a9e4f7SAdrian Hunter 	}
48592a9e4f7SAdrian Hunter }
48692a9e4f7SAdrian Hunter 
__thread_stack__free(struct thread * thread,struct thread_stack * ts)487f6060ac6SAdrian Hunter static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
488f6060ac6SAdrian Hunter {
489f6060ac6SAdrian Hunter 	__thread_stack__flush(thread, ts);
490f6060ac6SAdrian Hunter 	zfree(&ts->stack);
49186d67180SAdrian Hunter 	zfree(&ts->br_stack_rb);
492f6060ac6SAdrian Hunter }
493f6060ac6SAdrian Hunter 
thread_stack__reset(struct thread * thread,struct thread_stack * ts)494f6060ac6SAdrian Hunter static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
495f6060ac6SAdrian Hunter {
496f6060ac6SAdrian Hunter 	unsigned int arr_sz = ts->arr_sz;
497f6060ac6SAdrian Hunter 
498f6060ac6SAdrian Hunter 	__thread_stack__free(thread, ts);
499f6060ac6SAdrian Hunter 	memset(ts, 0, sizeof(*ts));
500f6060ac6SAdrian Hunter 	ts->arr_sz = arr_sz;
501f6060ac6SAdrian Hunter }
502f6060ac6SAdrian Hunter 
thread_stack__free(struct thread * thread)50300447ccdSAdrian Hunter void thread_stack__free(struct thread *thread)
50400447ccdSAdrian Hunter {
505ee84a303SIan Rogers 	struct thread_stack *ts = thread__ts(thread);
506f6060ac6SAdrian Hunter 	unsigned int pos;
507bd8e68acSAdrian Hunter 
508bd8e68acSAdrian Hunter 	if (ts) {
509f6060ac6SAdrian Hunter 		for (pos = 0; pos < ts->arr_sz; pos++)
510f6060ac6SAdrian Hunter 			__thread_stack__free(thread, ts + pos);
511ee84a303SIan Rogers 		free(thread__ts(thread));
512ee84a303SIan Rogers 		thread__set_ts(thread, NULL);
51300447ccdSAdrian Hunter 	}
51400447ccdSAdrian Hunter }
51500447ccdSAdrian Hunter 
callchain_context(u64 ip,u64 kernel_start)51624248306SAdrian Hunter static inline u64 callchain_context(u64 ip, u64 kernel_start)
51700447ccdSAdrian Hunter {
51824248306SAdrian Hunter 	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
51924248306SAdrian Hunter }
52000447ccdSAdrian Hunter 
thread_stack__sample(struct thread * thread,int cpu,struct ip_callchain * chain,size_t sz,u64 ip,u64 kernel_start)521256d92bcSAdrian Hunter void thread_stack__sample(struct thread *thread, int cpu,
522256d92bcSAdrian Hunter 			  struct ip_callchain *chain,
52324248306SAdrian Hunter 			  size_t sz, u64 ip, u64 kernel_start)
52424248306SAdrian Hunter {
525256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
52624248306SAdrian Hunter 	u64 context = callchain_context(ip, kernel_start);
52724248306SAdrian Hunter 	u64 last_context;
52824248306SAdrian Hunter 	size_t i, j;
52900447ccdSAdrian Hunter 
53024248306SAdrian Hunter 	if (sz < 2) {
53124248306SAdrian Hunter 		chain->nr = 0;
53224248306SAdrian Hunter 		return;
53324248306SAdrian Hunter 	}
53400447ccdSAdrian Hunter 
53524248306SAdrian Hunter 	chain->ips[0] = context;
53624248306SAdrian Hunter 	chain->ips[1] = ip;
53724248306SAdrian Hunter 
538bd8e68acSAdrian Hunter 	if (!ts) {
53924248306SAdrian Hunter 		chain->nr = 2;
54024248306SAdrian Hunter 		return;
54124248306SAdrian Hunter 	}
54224248306SAdrian Hunter 
54324248306SAdrian Hunter 	last_context = context;
54424248306SAdrian Hunter 
545bd8e68acSAdrian Hunter 	for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
546bd8e68acSAdrian Hunter 		ip = ts->stack[ts->cnt - j].ret_addr;
54724248306SAdrian Hunter 		context = callchain_context(ip, kernel_start);
54824248306SAdrian Hunter 		if (context != last_context) {
54924248306SAdrian Hunter 			if (i >= sz - 1)
55024248306SAdrian Hunter 				break;
55124248306SAdrian Hunter 			chain->ips[i++] = context;
55224248306SAdrian Hunter 			last_context = context;
55324248306SAdrian Hunter 		}
55424248306SAdrian Hunter 		chain->ips[i] = ip;
55524248306SAdrian Hunter 	}
55624248306SAdrian Hunter 
55724248306SAdrian Hunter 	chain->nr = i;
55800447ccdSAdrian Hunter }
55992a9e4f7SAdrian Hunter 
5604fef41bfSAdrian Hunter /*
5614fef41bfSAdrian Hunter  * Hardware sample records, created some time after the event occurred, need to
5624fef41bfSAdrian Hunter  * have subsequent addresses removed from the call chain.
5634fef41bfSAdrian Hunter  */
thread_stack__sample_late(struct thread * thread,int cpu,struct ip_callchain * chain,size_t sz,u64 sample_ip,u64 kernel_start)5644fef41bfSAdrian Hunter void thread_stack__sample_late(struct thread *thread, int cpu,
5654fef41bfSAdrian Hunter 			       struct ip_callchain *chain, size_t sz,
5664fef41bfSAdrian Hunter 			       u64 sample_ip, u64 kernel_start)
5674fef41bfSAdrian Hunter {
5684fef41bfSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
5694fef41bfSAdrian Hunter 	u64 sample_context = callchain_context(sample_ip, kernel_start);
5704fef41bfSAdrian Hunter 	u64 last_context, context, ip;
5714fef41bfSAdrian Hunter 	size_t nr = 0, j;
5724fef41bfSAdrian Hunter 
5734fef41bfSAdrian Hunter 	if (sz < 2) {
5744fef41bfSAdrian Hunter 		chain->nr = 0;
5754fef41bfSAdrian Hunter 		return;
5764fef41bfSAdrian Hunter 	}
5774fef41bfSAdrian Hunter 
5784fef41bfSAdrian Hunter 	if (!ts)
5794fef41bfSAdrian Hunter 		goto out;
5804fef41bfSAdrian Hunter 
5814fef41bfSAdrian Hunter 	/*
5824fef41bfSAdrian Hunter 	 * When tracing kernel space, kernel addresses occur at the top of the
5834fef41bfSAdrian Hunter 	 * call chain after the event occurred but before tracing stopped.
5844fef41bfSAdrian Hunter 	 * Skip them.
5854fef41bfSAdrian Hunter 	 */
5864fef41bfSAdrian Hunter 	for (j = 1; j <= ts->cnt; j++) {
5874fef41bfSAdrian Hunter 		ip = ts->stack[ts->cnt - j].ret_addr;
5884fef41bfSAdrian Hunter 		context = callchain_context(ip, kernel_start);
5894fef41bfSAdrian Hunter 		if (context == PERF_CONTEXT_USER ||
5904fef41bfSAdrian Hunter 		    (context == sample_context && ip == sample_ip))
5914fef41bfSAdrian Hunter 			break;
5924fef41bfSAdrian Hunter 	}
5934fef41bfSAdrian Hunter 
5944fef41bfSAdrian Hunter 	last_context = sample_ip; /* Use sample_ip as an invalid context */
5954fef41bfSAdrian Hunter 
5964fef41bfSAdrian Hunter 	for (; nr < sz && j <= ts->cnt; nr++, j++) {
5974fef41bfSAdrian Hunter 		ip = ts->stack[ts->cnt - j].ret_addr;
5984fef41bfSAdrian Hunter 		context = callchain_context(ip, kernel_start);
5994fef41bfSAdrian Hunter 		if (context != last_context) {
6004fef41bfSAdrian Hunter 			if (nr >= sz - 1)
6014fef41bfSAdrian Hunter 				break;
6024fef41bfSAdrian Hunter 			chain->ips[nr++] = context;
6034fef41bfSAdrian Hunter 			last_context = context;
6044fef41bfSAdrian Hunter 		}
6054fef41bfSAdrian Hunter 		chain->ips[nr] = ip;
6064fef41bfSAdrian Hunter 	}
6074fef41bfSAdrian Hunter out:
6084fef41bfSAdrian Hunter 	if (nr) {
6094fef41bfSAdrian Hunter 		chain->nr = nr;
6104fef41bfSAdrian Hunter 	} else {
6114fef41bfSAdrian Hunter 		chain->ips[0] = sample_context;
6124fef41bfSAdrian Hunter 		chain->ips[1] = sample_ip;
6134fef41bfSAdrian Hunter 		chain->nr = 2;
6144fef41bfSAdrian Hunter 	}
6154fef41bfSAdrian Hunter }
6164fef41bfSAdrian Hunter 
thread_stack__br_sample(struct thread * thread,int cpu,struct branch_stack * dst,unsigned int sz)61786d67180SAdrian Hunter void thread_stack__br_sample(struct thread *thread, int cpu,
61886d67180SAdrian Hunter 			     struct branch_stack *dst, unsigned int sz)
61986d67180SAdrian Hunter {
62086d67180SAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
62186d67180SAdrian Hunter 	const size_t bsz = sizeof(struct branch_entry);
62286d67180SAdrian Hunter 	struct branch_stack *src;
62386d67180SAdrian Hunter 	struct branch_entry *be;
62486d67180SAdrian Hunter 	unsigned int nr;
62586d67180SAdrian Hunter 
62686d67180SAdrian Hunter 	dst->nr = 0;
62786d67180SAdrian Hunter 
62886d67180SAdrian Hunter 	if (!ts)
62986d67180SAdrian Hunter 		return;
63086d67180SAdrian Hunter 
63186d67180SAdrian Hunter 	src = ts->br_stack_rb;
63286d67180SAdrian Hunter 	if (!src->nr)
63386d67180SAdrian Hunter 		return;
63486d67180SAdrian Hunter 
63586d67180SAdrian Hunter 	dst->nr = min((unsigned int)src->nr, sz);
63686d67180SAdrian Hunter 
63786d67180SAdrian Hunter 	be = &dst->entries[0];
63886d67180SAdrian Hunter 	nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
63986d67180SAdrian Hunter 	memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
64086d67180SAdrian Hunter 
64186d67180SAdrian Hunter 	if (src->nr >= ts->br_stack_sz) {
64286d67180SAdrian Hunter 		sz -= nr;
64386d67180SAdrian Hunter 		be = &dst->entries[nr];
64486d67180SAdrian Hunter 		nr = min(ts->br_stack_pos, sz);
64586d67180SAdrian Hunter 		memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
64686d67180SAdrian Hunter 	}
64786d67180SAdrian Hunter }
64886d67180SAdrian Hunter 
6493749e0bbSAdrian Hunter /* Start of user space branch entries */
us_start(struct branch_entry * be,u64 kernel_start,bool * start)6503749e0bbSAdrian Hunter static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
6513749e0bbSAdrian Hunter {
6523749e0bbSAdrian Hunter 	if (!*start)
6533749e0bbSAdrian Hunter 		*start = be->to && be->to < kernel_start;
6543749e0bbSAdrian Hunter 
6553749e0bbSAdrian Hunter 	return *start;
6563749e0bbSAdrian Hunter }
6573749e0bbSAdrian Hunter 
6583749e0bbSAdrian Hunter /*
6593749e0bbSAdrian Hunter  * Start of branch entries after the ip fell in between 2 branches, or user
6603749e0bbSAdrian Hunter  * space branch entries.
6613749e0bbSAdrian Hunter  */
ks_start(struct branch_entry * be,u64 sample_ip,u64 kernel_start,bool * start,struct branch_entry * nb)6623749e0bbSAdrian Hunter static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
6633749e0bbSAdrian Hunter 		     bool *start, struct branch_entry *nb)
6643749e0bbSAdrian Hunter {
6653749e0bbSAdrian Hunter 	if (!*start) {
6663749e0bbSAdrian Hunter 		*start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
6673749e0bbSAdrian Hunter 			 be->from < kernel_start ||
6683749e0bbSAdrian Hunter 			 (be->to && be->to < kernel_start);
6693749e0bbSAdrian Hunter 	}
6703749e0bbSAdrian Hunter 
6713749e0bbSAdrian Hunter 	return *start;
6723749e0bbSAdrian Hunter }
6733749e0bbSAdrian Hunter 
6743749e0bbSAdrian Hunter /*
6753749e0bbSAdrian Hunter  * Hardware sample records, created some time after the event occurred, need to
6763749e0bbSAdrian Hunter  * have subsequent addresses removed from the branch stack.
6773749e0bbSAdrian Hunter  */
thread_stack__br_sample_late(struct thread * thread,int cpu,struct branch_stack * dst,unsigned int sz,u64 ip,u64 kernel_start)6783749e0bbSAdrian Hunter void thread_stack__br_sample_late(struct thread *thread, int cpu,
6793749e0bbSAdrian Hunter 				  struct branch_stack *dst, unsigned int sz,
6803749e0bbSAdrian Hunter 				  u64 ip, u64 kernel_start)
6813749e0bbSAdrian Hunter {
6823749e0bbSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
6833749e0bbSAdrian Hunter 	struct branch_entry *d, *s, *spos, *ssz;
6843749e0bbSAdrian Hunter 	struct branch_stack *src;
6853749e0bbSAdrian Hunter 	unsigned int nr = 0;
6863749e0bbSAdrian Hunter 	bool start = false;
6873749e0bbSAdrian Hunter 
6883749e0bbSAdrian Hunter 	dst->nr = 0;
6893749e0bbSAdrian Hunter 
6903749e0bbSAdrian Hunter 	if (!ts)
6913749e0bbSAdrian Hunter 		return;
6923749e0bbSAdrian Hunter 
6933749e0bbSAdrian Hunter 	src = ts->br_stack_rb;
6943749e0bbSAdrian Hunter 	if (!src->nr)
6953749e0bbSAdrian Hunter 		return;
6963749e0bbSAdrian Hunter 
6973749e0bbSAdrian Hunter 	spos = &src->entries[ts->br_stack_pos];
6983749e0bbSAdrian Hunter 	ssz  = &src->entries[ts->br_stack_sz];
6993749e0bbSAdrian Hunter 
7003749e0bbSAdrian Hunter 	d = &dst->entries[0];
7013749e0bbSAdrian Hunter 	s = spos;
7023749e0bbSAdrian Hunter 
7033749e0bbSAdrian Hunter 	if (ip < kernel_start) {
7043749e0bbSAdrian Hunter 		/*
7053749e0bbSAdrian Hunter 		 * User space sample: start copying branch entries when the
7063749e0bbSAdrian Hunter 		 * branch is in user space.
7073749e0bbSAdrian Hunter 		 */
7083749e0bbSAdrian Hunter 		for (s = spos; s < ssz && nr < sz; s++) {
7093749e0bbSAdrian Hunter 			if (us_start(s, kernel_start, &start)) {
7103749e0bbSAdrian Hunter 				*d++ = *s;
7113749e0bbSAdrian Hunter 				nr += 1;
7123749e0bbSAdrian Hunter 			}
7133749e0bbSAdrian Hunter 		}
7143749e0bbSAdrian Hunter 
7153749e0bbSAdrian Hunter 		if (src->nr >= ts->br_stack_sz) {
7163749e0bbSAdrian Hunter 			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
7173749e0bbSAdrian Hunter 				if (us_start(s, kernel_start, &start)) {
7183749e0bbSAdrian Hunter 					*d++ = *s;
7193749e0bbSAdrian Hunter 					nr += 1;
7203749e0bbSAdrian Hunter 				}
7213749e0bbSAdrian Hunter 			}
7223749e0bbSAdrian Hunter 		}
7233749e0bbSAdrian Hunter 	} else {
7243749e0bbSAdrian Hunter 		struct branch_entry *nb = NULL;
7253749e0bbSAdrian Hunter 
7263749e0bbSAdrian Hunter 		/*
7273749e0bbSAdrian Hunter 		 * Kernel space sample: start copying branch entries when the ip
7283749e0bbSAdrian Hunter 		 * falls in between 2 branches (or the branch is in user space
7293749e0bbSAdrian Hunter 		 * because then the start must have been missed).
7303749e0bbSAdrian Hunter 		 */
7313749e0bbSAdrian Hunter 		for (s = spos; s < ssz && nr < sz; s++) {
7323749e0bbSAdrian Hunter 			if (ks_start(s, ip, kernel_start, &start, nb)) {
7333749e0bbSAdrian Hunter 				*d++ = *s;
7343749e0bbSAdrian Hunter 				nr += 1;
7353749e0bbSAdrian Hunter 			}
7363749e0bbSAdrian Hunter 			nb = s;
7373749e0bbSAdrian Hunter 		}
7383749e0bbSAdrian Hunter 
7393749e0bbSAdrian Hunter 		if (src->nr >= ts->br_stack_sz) {
7403749e0bbSAdrian Hunter 			for (s = &src->entries[0]; s < spos && nr < sz; s++) {
7413749e0bbSAdrian Hunter 				if (ks_start(s, ip, kernel_start, &start, nb)) {
7423749e0bbSAdrian Hunter 					*d++ = *s;
7433749e0bbSAdrian Hunter 					nr += 1;
7443749e0bbSAdrian Hunter 				}
7453749e0bbSAdrian Hunter 				nb = s;
7463749e0bbSAdrian Hunter 			}
7473749e0bbSAdrian Hunter 		}
7483749e0bbSAdrian Hunter 	}
7493749e0bbSAdrian Hunter 
7503749e0bbSAdrian Hunter 	dst->nr = nr;
7513749e0bbSAdrian Hunter }
7523749e0bbSAdrian Hunter 
75392a9e4f7SAdrian Hunter struct call_return_processor *
call_return_processor__new(int (* process)(struct call_return * cr,u64 * parent_db_id,void * data),void * data)754f435887eSAdrian Hunter call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
75592a9e4f7SAdrian Hunter 			   void *data)
75692a9e4f7SAdrian Hunter {
75792a9e4f7SAdrian Hunter 	struct call_return_processor *crp;
75892a9e4f7SAdrian Hunter 
75992a9e4f7SAdrian Hunter 	crp = zalloc(sizeof(struct call_return_processor));
76092a9e4f7SAdrian Hunter 	if (!crp)
76192a9e4f7SAdrian Hunter 		return NULL;
76292a9e4f7SAdrian Hunter 	crp->cpr = call_path_root__new();
76392a9e4f7SAdrian Hunter 	if (!crp->cpr)
76492a9e4f7SAdrian Hunter 		goto out_free;
76592a9e4f7SAdrian Hunter 	crp->process = process;
76692a9e4f7SAdrian Hunter 	crp->data = data;
76792a9e4f7SAdrian Hunter 	return crp;
76892a9e4f7SAdrian Hunter 
76992a9e4f7SAdrian Hunter out_free:
77092a9e4f7SAdrian Hunter 	free(crp);
77192a9e4f7SAdrian Hunter 	return NULL;
77292a9e4f7SAdrian Hunter }
77392a9e4f7SAdrian Hunter 
call_return_processor__free(struct call_return_processor * crp)77492a9e4f7SAdrian Hunter void call_return_processor__free(struct call_return_processor *crp)
77592a9e4f7SAdrian Hunter {
77692a9e4f7SAdrian Hunter 	if (crp) {
77792a9e4f7SAdrian Hunter 		call_path_root__free(crp->cpr);
77892a9e4f7SAdrian Hunter 		free(crp);
77992a9e4f7SAdrian Hunter 	}
78092a9e4f7SAdrian Hunter }
78192a9e4f7SAdrian Hunter 
thread_stack__push_cp(struct thread_stack * ts,u64 ret_addr,u64 timestamp,u64 ref,struct call_path * cp,bool no_call,bool trace_end)78292a9e4f7SAdrian Hunter static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
78392a9e4f7SAdrian Hunter 				 u64 timestamp, u64 ref, struct call_path *cp,
7842dcde4e1SAdrian Hunter 				 bool no_call, bool trace_end)
78592a9e4f7SAdrian Hunter {
78692a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
78792a9e4f7SAdrian Hunter 	int err;
78892a9e4f7SAdrian Hunter 
789e7a3a055SAdrian Hunter 	if (!cp)
790e7a3a055SAdrian Hunter 		return -ENOMEM;
791e7a3a055SAdrian Hunter 
79292a9e4f7SAdrian Hunter 	if (ts->cnt == ts->sz) {
79392a9e4f7SAdrian Hunter 		err = thread_stack__grow(ts);
79492a9e4f7SAdrian Hunter 		if (err)
79592a9e4f7SAdrian Hunter 			return err;
79692a9e4f7SAdrian Hunter 	}
79792a9e4f7SAdrian Hunter 
79892a9e4f7SAdrian Hunter 	tse = &ts->stack[ts->cnt++];
79992a9e4f7SAdrian Hunter 	tse->ret_addr = ret_addr;
80092a9e4f7SAdrian Hunter 	tse->timestamp = timestamp;
80192a9e4f7SAdrian Hunter 	tse->ref = ref;
80292a9e4f7SAdrian Hunter 	tse->branch_count = ts->branch_count;
803003ccdc7SAdrian Hunter 	tse->insn_count = ts->insn_count;
804003ccdc7SAdrian Hunter 	tse->cyc_count = ts->cyc_count;
80592a9e4f7SAdrian Hunter 	tse->cp = cp;
80692a9e4f7SAdrian Hunter 	tse->no_call = no_call;
8072dcde4e1SAdrian Hunter 	tse->trace_end = trace_end;
808f08046cbSAdrian Hunter 	tse->non_call = false;
809f435887eSAdrian Hunter 	tse->db_id = 0;
81092a9e4f7SAdrian Hunter 
81192a9e4f7SAdrian Hunter 	return 0;
81292a9e4f7SAdrian Hunter }
81392a9e4f7SAdrian Hunter 
thread_stack__pop_cp(struct thread * thread,struct thread_stack * ts,u64 ret_addr,u64 timestamp,u64 ref,struct symbol * sym)81492a9e4f7SAdrian Hunter static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
81592a9e4f7SAdrian Hunter 				u64 ret_addr, u64 timestamp, u64 ref,
81692a9e4f7SAdrian Hunter 				struct symbol *sym)
81792a9e4f7SAdrian Hunter {
81892a9e4f7SAdrian Hunter 	int err;
81992a9e4f7SAdrian Hunter 
82092a9e4f7SAdrian Hunter 	if (!ts->cnt)
82192a9e4f7SAdrian Hunter 		return 1;
82292a9e4f7SAdrian Hunter 
82392a9e4f7SAdrian Hunter 	if (ts->cnt == 1) {
82492a9e4f7SAdrian Hunter 		struct thread_stack_entry *tse = &ts->stack[0];
82592a9e4f7SAdrian Hunter 
82692a9e4f7SAdrian Hunter 		if (tse->cp->sym == sym)
82792a9e4f7SAdrian Hunter 			return thread_stack__call_return(thread, ts, --ts->cnt,
82892a9e4f7SAdrian Hunter 							 timestamp, ref, false);
82992a9e4f7SAdrian Hunter 	}
83092a9e4f7SAdrian Hunter 
831f08046cbSAdrian Hunter 	if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
832f08046cbSAdrian Hunter 	    !ts->stack[ts->cnt - 1].non_call) {
83392a9e4f7SAdrian Hunter 		return thread_stack__call_return(thread, ts, --ts->cnt,
83492a9e4f7SAdrian Hunter 						 timestamp, ref, false);
83592a9e4f7SAdrian Hunter 	} else {
83692a9e4f7SAdrian Hunter 		size_t i = ts->cnt - 1;
83792a9e4f7SAdrian Hunter 
83892a9e4f7SAdrian Hunter 		while (i--) {
839f08046cbSAdrian Hunter 			if (ts->stack[i].ret_addr != ret_addr ||
840f08046cbSAdrian Hunter 			    ts->stack[i].non_call)
84192a9e4f7SAdrian Hunter 				continue;
84292a9e4f7SAdrian Hunter 			i += 1;
84392a9e4f7SAdrian Hunter 			while (ts->cnt > i) {
84492a9e4f7SAdrian Hunter 				err = thread_stack__call_return(thread, ts,
84592a9e4f7SAdrian Hunter 								--ts->cnt,
84692a9e4f7SAdrian Hunter 								timestamp, ref,
84792a9e4f7SAdrian Hunter 								true);
84892a9e4f7SAdrian Hunter 				if (err)
84992a9e4f7SAdrian Hunter 					return err;
85092a9e4f7SAdrian Hunter 			}
85192a9e4f7SAdrian Hunter 			return thread_stack__call_return(thread, ts, --ts->cnt,
85292a9e4f7SAdrian Hunter 							 timestamp, ref, false);
85392a9e4f7SAdrian Hunter 		}
85492a9e4f7SAdrian Hunter 	}
85592a9e4f7SAdrian Hunter 
85692a9e4f7SAdrian Hunter 	return 1;
85792a9e4f7SAdrian Hunter }
85892a9e4f7SAdrian Hunter 
thread_stack__bottom(struct thread_stack * ts,struct perf_sample * sample,struct addr_location * from_al,struct addr_location * to_al,u64 ref)859e0b89511SAdrian Hunter static int thread_stack__bottom(struct thread_stack *ts,
86092a9e4f7SAdrian Hunter 				struct perf_sample *sample,
86192a9e4f7SAdrian Hunter 				struct addr_location *from_al,
86292a9e4f7SAdrian Hunter 				struct addr_location *to_al, u64 ref)
86392a9e4f7SAdrian Hunter {
86492a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
86592a9e4f7SAdrian Hunter 	struct call_path *cp;
86692a9e4f7SAdrian Hunter 	struct symbol *sym;
86792a9e4f7SAdrian Hunter 	u64 ip;
86892a9e4f7SAdrian Hunter 
86992a9e4f7SAdrian Hunter 	if (sample->ip) {
87092a9e4f7SAdrian Hunter 		ip = sample->ip;
87192a9e4f7SAdrian Hunter 		sym = from_al->sym;
87292a9e4f7SAdrian Hunter 	} else if (sample->addr) {
87392a9e4f7SAdrian Hunter 		ip = sample->addr;
87492a9e4f7SAdrian Hunter 		sym = to_al->sym;
87592a9e4f7SAdrian Hunter 	} else {
87692a9e4f7SAdrian Hunter 		return 0;
87792a9e4f7SAdrian Hunter 	}
87892a9e4f7SAdrian Hunter 
87992a9e4f7SAdrian Hunter 	cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
88092a9e4f7SAdrian Hunter 				ts->kernel_start);
88192a9e4f7SAdrian Hunter 
882e0b89511SAdrian Hunter 	return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
8832dcde4e1SAdrian Hunter 				     true, false);
88492a9e4f7SAdrian Hunter }
88592a9e4f7SAdrian Hunter 
thread_stack__pop_ks(struct thread * thread,struct thread_stack * ts,struct perf_sample * sample,u64 ref)88697860b48SAdrian Hunter static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
88797860b48SAdrian Hunter 				struct perf_sample *sample, u64 ref)
88897860b48SAdrian Hunter {
88997860b48SAdrian Hunter 	u64 tm = sample->time;
89097860b48SAdrian Hunter 	int err;
89197860b48SAdrian Hunter 
89297860b48SAdrian Hunter 	/* Return to userspace, so pop all kernel addresses */
89397860b48SAdrian Hunter 	while (thread_stack__in_kernel(ts)) {
89497860b48SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
89597860b48SAdrian Hunter 						tm, ref, true);
89697860b48SAdrian Hunter 		if (err)
89797860b48SAdrian Hunter 			return err;
89897860b48SAdrian Hunter 	}
89997860b48SAdrian Hunter 
90097860b48SAdrian Hunter 	return 0;
90197860b48SAdrian Hunter }
90297860b48SAdrian Hunter 
thread_stack__no_call_return(struct thread * thread,struct thread_stack * ts,struct perf_sample * sample,struct addr_location * from_al,struct addr_location * to_al,u64 ref)90392a9e4f7SAdrian Hunter static int thread_stack__no_call_return(struct thread *thread,
90492a9e4f7SAdrian Hunter 					struct thread_stack *ts,
90592a9e4f7SAdrian Hunter 					struct perf_sample *sample,
90692a9e4f7SAdrian Hunter 					struct addr_location *from_al,
90792a9e4f7SAdrian Hunter 					struct addr_location *to_al, u64 ref)
90892a9e4f7SAdrian Hunter {
90992a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
91090c2cda7SAdrian Hunter 	struct call_path *root = &cpr->call_path;
91190c2cda7SAdrian Hunter 	struct symbol *fsym = from_al->sym;
91290c2cda7SAdrian Hunter 	struct symbol *tsym = to_al->sym;
91392a9e4f7SAdrian Hunter 	struct call_path *cp, *parent;
91492a9e4f7SAdrian Hunter 	u64 ks = ts->kernel_start;
91590c2cda7SAdrian Hunter 	u64 addr = sample->addr;
91690c2cda7SAdrian Hunter 	u64 tm = sample->time;
91790c2cda7SAdrian Hunter 	u64 ip = sample->ip;
91892a9e4f7SAdrian Hunter 	int err;
91992a9e4f7SAdrian Hunter 
92090c2cda7SAdrian Hunter 	if (ip >= ks && addr < ks) {
92192a9e4f7SAdrian Hunter 		/* Return to userspace, so pop all kernel addresses */
922eb5d8544SAdrian Hunter 		err = thread_stack__pop_ks(thread, ts, sample, ref);
92392a9e4f7SAdrian Hunter 		if (err)
92492a9e4f7SAdrian Hunter 			return err;
92592a9e4f7SAdrian Hunter 
92692a9e4f7SAdrian Hunter 		/* If the stack is empty, push the userspace address */
92792a9e4f7SAdrian Hunter 		if (!ts->cnt) {
92890c2cda7SAdrian Hunter 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
92990c2cda7SAdrian Hunter 			return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
93090c2cda7SAdrian Hunter 						     false);
93192a9e4f7SAdrian Hunter 		}
93290c2cda7SAdrian Hunter 	} else if (thread_stack__in_kernel(ts) && ip < ks) {
93392a9e4f7SAdrian Hunter 		/* Return to userspace, so pop all kernel addresses */
934eb5d8544SAdrian Hunter 		err = thread_stack__pop_ks(thread, ts, sample, ref);
93592a9e4f7SAdrian Hunter 		if (err)
93692a9e4f7SAdrian Hunter 			return err;
93792a9e4f7SAdrian Hunter 	}
93892a9e4f7SAdrian Hunter 
93992a9e4f7SAdrian Hunter 	if (ts->cnt)
94092a9e4f7SAdrian Hunter 		parent = ts->stack[ts->cnt - 1].cp;
94192a9e4f7SAdrian Hunter 	else
94290c2cda7SAdrian Hunter 		parent = root;
94392a9e4f7SAdrian Hunter 
9441f35cd65SAdrian Hunter 	if (parent->sym == from_al->sym) {
9451f35cd65SAdrian Hunter 		/*
9461f35cd65SAdrian Hunter 		 * At the bottom of the stack, assume the missing 'call' was
9471f35cd65SAdrian Hunter 		 * before the trace started. So, pop the current symbol and push
9481f35cd65SAdrian Hunter 		 * the 'to' symbol.
9491f35cd65SAdrian Hunter 		 */
9501f35cd65SAdrian Hunter 		if (ts->cnt == 1) {
9511f35cd65SAdrian Hunter 			err = thread_stack__call_return(thread, ts, --ts->cnt,
9521f35cd65SAdrian Hunter 							tm, ref, false);
9531f35cd65SAdrian Hunter 			if (err)
9541f35cd65SAdrian Hunter 				return err;
9551f35cd65SAdrian Hunter 		}
9561f35cd65SAdrian Hunter 
9571f35cd65SAdrian Hunter 		if (!ts->cnt) {
9581f35cd65SAdrian Hunter 			cp = call_path__findnew(cpr, root, tsym, addr, ks);
9591f35cd65SAdrian Hunter 
9601f35cd65SAdrian Hunter 			return thread_stack__push_cp(ts, addr, tm, ref, cp,
9611f35cd65SAdrian Hunter 						     true, false);
9621f35cd65SAdrian Hunter 		}
9631f35cd65SAdrian Hunter 
9641f35cd65SAdrian Hunter 		/*
9651f35cd65SAdrian Hunter 		 * Otherwise assume the 'return' is being used as a jump (e.g.
9661f35cd65SAdrian Hunter 		 * retpoline) and just push the 'to' symbol.
9671f35cd65SAdrian Hunter 		 */
9681f35cd65SAdrian Hunter 		cp = call_path__findnew(cpr, parent, tsym, addr, ks);
9691f35cd65SAdrian Hunter 
9701f35cd65SAdrian Hunter 		err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
9711f35cd65SAdrian Hunter 		if (!err)
9721f35cd65SAdrian Hunter 			ts->stack[ts->cnt - 1].non_call = true;
9731f35cd65SAdrian Hunter 
9741f35cd65SAdrian Hunter 		return err;
9751f35cd65SAdrian Hunter 	}
9761f35cd65SAdrian Hunter 
9771f35cd65SAdrian Hunter 	/*
9781f35cd65SAdrian Hunter 	 * Assume 'parent' has not yet returned, so push 'to', and then push and
9791f35cd65SAdrian Hunter 	 * pop 'from'.
9801f35cd65SAdrian Hunter 	 */
9811f35cd65SAdrian Hunter 
9821f35cd65SAdrian Hunter 	cp = call_path__findnew(cpr, parent, tsym, addr, ks);
98392a9e4f7SAdrian Hunter 
98490c2cda7SAdrian Hunter 	err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
98592a9e4f7SAdrian Hunter 	if (err)
98692a9e4f7SAdrian Hunter 		return err;
98792a9e4f7SAdrian Hunter 
9881f35cd65SAdrian Hunter 	cp = call_path__findnew(cpr, cp, fsym, ip, ks);
9891f35cd65SAdrian Hunter 
9901f35cd65SAdrian Hunter 	err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
9911f35cd65SAdrian Hunter 	if (err)
9921f35cd65SAdrian Hunter 		return err;
9931f35cd65SAdrian Hunter 
9941f35cd65SAdrian Hunter 	return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
99592a9e4f7SAdrian Hunter }
99692a9e4f7SAdrian Hunter 
thread_stack__trace_begin(struct thread * thread,struct thread_stack * ts,u64 timestamp,u64 ref)99792a9e4f7SAdrian Hunter static int thread_stack__trace_begin(struct thread *thread,
99892a9e4f7SAdrian Hunter 				     struct thread_stack *ts, u64 timestamp,
99992a9e4f7SAdrian Hunter 				     u64 ref)
100092a9e4f7SAdrian Hunter {
100192a9e4f7SAdrian Hunter 	struct thread_stack_entry *tse;
100292a9e4f7SAdrian Hunter 	int err;
100392a9e4f7SAdrian Hunter 
100492a9e4f7SAdrian Hunter 	if (!ts->cnt)
100592a9e4f7SAdrian Hunter 		return 0;
100692a9e4f7SAdrian Hunter 
100792a9e4f7SAdrian Hunter 	/* Pop trace end */
100892a9e4f7SAdrian Hunter 	tse = &ts->stack[ts->cnt - 1];
10092dcde4e1SAdrian Hunter 	if (tse->trace_end) {
101092a9e4f7SAdrian Hunter 		err = thread_stack__call_return(thread, ts, --ts->cnt,
101192a9e4f7SAdrian Hunter 						timestamp, ref, false);
101292a9e4f7SAdrian Hunter 		if (err)
101392a9e4f7SAdrian Hunter 			return err;
101492a9e4f7SAdrian Hunter 	}
101592a9e4f7SAdrian Hunter 
101692a9e4f7SAdrian Hunter 	return 0;
101792a9e4f7SAdrian Hunter }
101892a9e4f7SAdrian Hunter 
thread_stack__trace_end(struct thread_stack * ts,struct perf_sample * sample,u64 ref)101992a9e4f7SAdrian Hunter static int thread_stack__trace_end(struct thread_stack *ts,
102092a9e4f7SAdrian Hunter 				   struct perf_sample *sample, u64 ref)
102192a9e4f7SAdrian Hunter {
102292a9e4f7SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
102392a9e4f7SAdrian Hunter 	struct call_path *cp;
102492a9e4f7SAdrian Hunter 	u64 ret_addr;
102592a9e4f7SAdrian Hunter 
102692a9e4f7SAdrian Hunter 	/* No point having 'trace end' on the bottom of the stack */
102792a9e4f7SAdrian Hunter 	if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
102892a9e4f7SAdrian Hunter 		return 0;
102992a9e4f7SAdrian Hunter 
103092a9e4f7SAdrian Hunter 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
103192a9e4f7SAdrian Hunter 				ts->kernel_start);
103292a9e4f7SAdrian Hunter 
103392a9e4f7SAdrian Hunter 	ret_addr = sample->ip + sample->insn_len;
103492a9e4f7SAdrian Hunter 
103592a9e4f7SAdrian Hunter 	return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
10362dcde4e1SAdrian Hunter 				     false, true);
103792a9e4f7SAdrian Hunter }
103892a9e4f7SAdrian Hunter 
is_x86_retpoline(const char * name)10393c0cd952SAdrian Hunter static bool is_x86_retpoline(const char *name)
10403c0cd952SAdrian Hunter {
1041*833fd800SPetr Pavlu 	return strstr(name, "__x86_indirect_thunk_") == name;
10423c0cd952SAdrian Hunter }
10433c0cd952SAdrian Hunter 
10443c0cd952SAdrian Hunter /*
10453c0cd952SAdrian Hunter  * x86 retpoline functions pollute the call graph. This function removes them.
10463c0cd952SAdrian Hunter  * This does not handle function return thunks, nor is there any improvement
10473c0cd952SAdrian Hunter  * for the handling of inline thunks or extern thunks.
10483c0cd952SAdrian Hunter  */
thread_stack__x86_retpoline(struct thread_stack * ts,struct perf_sample * sample,struct addr_location * to_al)10493c0cd952SAdrian Hunter static int thread_stack__x86_retpoline(struct thread_stack *ts,
10503c0cd952SAdrian Hunter 				       struct perf_sample *sample,
10513c0cd952SAdrian Hunter 				       struct addr_location *to_al)
10523c0cd952SAdrian Hunter {
10533c0cd952SAdrian Hunter 	struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
10543c0cd952SAdrian Hunter 	struct call_path_root *cpr = ts->crp->cpr;
10553c0cd952SAdrian Hunter 	struct symbol *sym = tse->cp->sym;
10563c0cd952SAdrian Hunter 	struct symbol *tsym = to_al->sym;
10573c0cd952SAdrian Hunter 	struct call_path *cp;
10583c0cd952SAdrian Hunter 
10593c0cd952SAdrian Hunter 	if (sym && is_x86_retpoline(sym->name)) {
10603c0cd952SAdrian Hunter 		/*
10613c0cd952SAdrian Hunter 		 * This is a x86 retpoline fn. It pollutes the call graph by
10623c0cd952SAdrian Hunter 		 * showing up everywhere there is an indirect branch, but does
10633c0cd952SAdrian Hunter 		 * not itself mean anything. Here the top-of-stack is removed,
10643c0cd952SAdrian Hunter 		 * by decrementing the stack count, and then further down, the
10653c0cd952SAdrian Hunter 		 * resulting top-of-stack is replaced with the actual target.
10663c0cd952SAdrian Hunter 		 * The result is that the retpoline functions will no longer
10673c0cd952SAdrian Hunter 		 * appear in the call graph. Note this only affects the call
10683c0cd952SAdrian Hunter 		 * graph, since all the original branches are left unchanged.
10693c0cd952SAdrian Hunter 		 */
10703c0cd952SAdrian Hunter 		ts->cnt -= 1;
10713c0cd952SAdrian Hunter 		sym = ts->stack[ts->cnt - 2].cp->sym;
10723c0cd952SAdrian Hunter 		if (sym && sym == tsym && to_al->addr != tsym->start) {
10733c0cd952SAdrian Hunter 			/*
10743c0cd952SAdrian Hunter 			 * Target is back to the middle of the symbol we came
10753c0cd952SAdrian Hunter 			 * from so assume it is an indirect jmp and forget it
10763c0cd952SAdrian Hunter 			 * altogether.
10773c0cd952SAdrian Hunter 			 */
10783c0cd952SAdrian Hunter 			ts->cnt -= 1;
10793c0cd952SAdrian Hunter 			return 0;
10803c0cd952SAdrian Hunter 		}
10813c0cd952SAdrian Hunter 	} else if (sym && sym == tsym) {
10823c0cd952SAdrian Hunter 		/*
10833c0cd952SAdrian Hunter 		 * Target is back to the symbol we came from so assume it is an
10843c0cd952SAdrian Hunter 		 * indirect jmp and forget it altogether.
10853c0cd952SAdrian Hunter 		 */
10863c0cd952SAdrian Hunter 		ts->cnt -= 1;
10873c0cd952SAdrian Hunter 		return 0;
10883c0cd952SAdrian Hunter 	}
10893c0cd952SAdrian Hunter 
10903c0cd952SAdrian Hunter 	cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
10913c0cd952SAdrian Hunter 				sample->addr, ts->kernel_start);
10923c0cd952SAdrian Hunter 	if (!cp)
10933c0cd952SAdrian Hunter 		return -ENOMEM;
10943c0cd952SAdrian Hunter 
10953c0cd952SAdrian Hunter 	/* Replace the top-of-stack with the actual target */
10963c0cd952SAdrian Hunter 	ts->stack[ts->cnt - 1].cp = cp;
10973c0cd952SAdrian Hunter 
10983c0cd952SAdrian Hunter 	return 0;
10993c0cd952SAdrian Hunter }
11003c0cd952SAdrian Hunter 
thread_stack__process(struct thread * thread,struct comm * comm,struct perf_sample * sample,struct addr_location * from_al,struct addr_location * to_al,u64 ref,struct call_return_processor * crp)110192a9e4f7SAdrian Hunter int thread_stack__process(struct thread *thread, struct comm *comm,
110292a9e4f7SAdrian Hunter 			  struct perf_sample *sample,
110392a9e4f7SAdrian Hunter 			  struct addr_location *from_al,
110492a9e4f7SAdrian Hunter 			  struct addr_location *to_al, u64 ref,
110592a9e4f7SAdrian Hunter 			  struct call_return_processor *crp)
110692a9e4f7SAdrian Hunter {
1107256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, sample->cpu);
11083c0cd952SAdrian Hunter 	enum retpoline_state_t rstate;
110992a9e4f7SAdrian Hunter 	int err = 0;
111092a9e4f7SAdrian Hunter 
111103b32cb2SAdrian Hunter 	if (ts && !ts->crp) {
111292a9e4f7SAdrian Hunter 		/* Supersede thread_stack__event() */
1113f6060ac6SAdrian Hunter 		thread_stack__reset(thread, ts);
111403b32cb2SAdrian Hunter 		ts = NULL;
111592a9e4f7SAdrian Hunter 	}
111603b32cb2SAdrian Hunter 
111703b32cb2SAdrian Hunter 	if (!ts) {
111886d67180SAdrian Hunter 		ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1119bd8e68acSAdrian Hunter 		if (!ts)
112092a9e4f7SAdrian Hunter 			return -ENOMEM;
112192a9e4f7SAdrian Hunter 		ts->comm = comm;
112292a9e4f7SAdrian Hunter 	}
112392a9e4f7SAdrian Hunter 
11243c0cd952SAdrian Hunter 	rstate = ts->rstate;
11253c0cd952SAdrian Hunter 	if (rstate == X86_RETPOLINE_DETECTED)
11263c0cd952SAdrian Hunter 		ts->rstate = X86_RETPOLINE_POSSIBLE;
11273c0cd952SAdrian Hunter 
112892a9e4f7SAdrian Hunter 	/* Flush stack on exec */
1129ee84a303SIan Rogers 	if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) {
1130a5499b37SAdrian Hunter 		err = __thread_stack__flush(thread, ts);
113192a9e4f7SAdrian Hunter 		if (err)
113292a9e4f7SAdrian Hunter 			return err;
113392a9e4f7SAdrian Hunter 		ts->comm = comm;
113492a9e4f7SAdrian Hunter 	}
113592a9e4f7SAdrian Hunter 
113692a9e4f7SAdrian Hunter 	/* If the stack is empty, put the current symbol on the stack */
113792a9e4f7SAdrian Hunter 	if (!ts->cnt) {
1138e0b89511SAdrian Hunter 		err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
113992a9e4f7SAdrian Hunter 		if (err)
114092a9e4f7SAdrian Hunter 			return err;
114192a9e4f7SAdrian Hunter 	}
114292a9e4f7SAdrian Hunter 
114392a9e4f7SAdrian Hunter 	ts->branch_count += 1;
1144003ccdc7SAdrian Hunter 	ts->insn_count += sample->insn_cnt;
1145003ccdc7SAdrian Hunter 	ts->cyc_count += sample->cyc_cnt;
114692a9e4f7SAdrian Hunter 	ts->last_time = sample->time;
114792a9e4f7SAdrian Hunter 
114892a9e4f7SAdrian Hunter 	if (sample->flags & PERF_IP_FLAG_CALL) {
11492dcde4e1SAdrian Hunter 		bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
115092a9e4f7SAdrian Hunter 		struct call_path_root *cpr = ts->crp->cpr;
115192a9e4f7SAdrian Hunter 		struct call_path *cp;
115292a9e4f7SAdrian Hunter 		u64 ret_addr;
115392a9e4f7SAdrian Hunter 
115492a9e4f7SAdrian Hunter 		if (!sample->ip || !sample->addr)
115592a9e4f7SAdrian Hunter 			return 0;
115692a9e4f7SAdrian Hunter 
115792a9e4f7SAdrian Hunter 		ret_addr = sample->ip + sample->insn_len;
115892a9e4f7SAdrian Hunter 		if (ret_addr == sample->addr)
115992a9e4f7SAdrian Hunter 			return 0; /* Zero-length calls are excluded */
116092a9e4f7SAdrian Hunter 
116192a9e4f7SAdrian Hunter 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
116292a9e4f7SAdrian Hunter 					to_al->sym, sample->addr,
116392a9e4f7SAdrian Hunter 					ts->kernel_start);
116492a9e4f7SAdrian Hunter 		err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
11652dcde4e1SAdrian Hunter 					    cp, false, trace_end);
11663c0cd952SAdrian Hunter 
11673c0cd952SAdrian Hunter 		/*
11683c0cd952SAdrian Hunter 		 * A call to the same symbol but not the start of the symbol,
11693c0cd952SAdrian Hunter 		 * may be the start of a x86 retpoline.
11703c0cd952SAdrian Hunter 		 */
11713c0cd952SAdrian Hunter 		if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
11723c0cd952SAdrian Hunter 		    from_al->sym == to_al->sym &&
11733c0cd952SAdrian Hunter 		    to_al->addr != to_al->sym->start)
11743c0cd952SAdrian Hunter 			ts->rstate = X86_RETPOLINE_DETECTED;
11753c0cd952SAdrian Hunter 
117692a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_RETURN) {
117797860b48SAdrian Hunter 		if (!sample->addr) {
117897860b48SAdrian Hunter 			u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
117997860b48SAdrian Hunter 						 PERF_IP_FLAG_INTERRUPT;
118097860b48SAdrian Hunter 
118197860b48SAdrian Hunter 			if (!(sample->flags & return_from_kernel))
118297860b48SAdrian Hunter 				return 0;
118397860b48SAdrian Hunter 
118497860b48SAdrian Hunter 			/* Pop kernel stack */
118597860b48SAdrian Hunter 			return thread_stack__pop_ks(thread, ts, sample, ref);
118697860b48SAdrian Hunter 		}
118797860b48SAdrian Hunter 
118897860b48SAdrian Hunter 		if (!sample->ip)
118992a9e4f7SAdrian Hunter 			return 0;
119092a9e4f7SAdrian Hunter 
11913c0cd952SAdrian Hunter 		/* x86 retpoline 'return' doesn't match the stack */
11923c0cd952SAdrian Hunter 		if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
11933c0cd952SAdrian Hunter 		    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
11943c0cd952SAdrian Hunter 			return thread_stack__x86_retpoline(ts, sample, to_al);
11953c0cd952SAdrian Hunter 
119692a9e4f7SAdrian Hunter 		err = thread_stack__pop_cp(thread, ts, sample->addr,
119792a9e4f7SAdrian Hunter 					   sample->time, ref, from_al->sym);
119892a9e4f7SAdrian Hunter 		if (err) {
119992a9e4f7SAdrian Hunter 			if (err < 0)
120092a9e4f7SAdrian Hunter 				return err;
120192a9e4f7SAdrian Hunter 			err = thread_stack__no_call_return(thread, ts, sample,
120292a9e4f7SAdrian Hunter 							   from_al, to_al, ref);
120392a9e4f7SAdrian Hunter 		}
120492a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
120592a9e4f7SAdrian Hunter 		err = thread_stack__trace_begin(thread, ts, sample->time, ref);
120692a9e4f7SAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
120792a9e4f7SAdrian Hunter 		err = thread_stack__trace_end(ts, sample, ref);
1208f08046cbSAdrian Hunter 	} else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1209f08046cbSAdrian Hunter 		   from_al->sym != to_al->sym && to_al->sym &&
1210f08046cbSAdrian Hunter 		   to_al->addr == to_al->sym->start) {
1211f08046cbSAdrian Hunter 		struct call_path_root *cpr = ts->crp->cpr;
1212f08046cbSAdrian Hunter 		struct call_path *cp;
1213f08046cbSAdrian Hunter 
1214f08046cbSAdrian Hunter 		/*
1215f08046cbSAdrian Hunter 		 * The compiler might optimize a call/ret combination by making
1216f08046cbSAdrian Hunter 		 * it a jmp. Make that visible by recording on the stack a
1217f08046cbSAdrian Hunter 		 * branch to the start of a different symbol. Note, that means
1218f08046cbSAdrian Hunter 		 * when a ret pops the stack, all jmps must be popped off first.
1219f08046cbSAdrian Hunter 		 */
1220f08046cbSAdrian Hunter 		cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1221f08046cbSAdrian Hunter 					to_al->sym, sample->addr,
1222f08046cbSAdrian Hunter 					ts->kernel_start);
1223f08046cbSAdrian Hunter 		err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1224f08046cbSAdrian Hunter 					    false);
1225f08046cbSAdrian Hunter 		if (!err)
1226f08046cbSAdrian Hunter 			ts->stack[ts->cnt - 1].non_call = true;
122792a9e4f7SAdrian Hunter 	}
122892a9e4f7SAdrian Hunter 
122992a9e4f7SAdrian Hunter 	return err;
123092a9e4f7SAdrian Hunter }
1231e216708dSAdrian Hunter 
thread_stack__depth(struct thread * thread,int cpu)1232256d92bcSAdrian Hunter size_t thread_stack__depth(struct thread *thread, int cpu)
1233e216708dSAdrian Hunter {
1234256d92bcSAdrian Hunter 	struct thread_stack *ts = thread__stack(thread, cpu);
1235bd8e68acSAdrian Hunter 
1236bd8e68acSAdrian Hunter 	if (!ts)
1237e216708dSAdrian Hunter 		return 0;
1238bd8e68acSAdrian Hunter 	return ts->cnt;
1239e216708dSAdrian Hunter }
1240