xref: /openbmc/linux/arch/x86/kernel/ftrace.c (revision a26a2a27396c0a0877aa701f8f92d08ba550a6c9)
13d083395SSteven Rostedt /*
23d083395SSteven Rostedt  * Code for replacing ftrace calls with jumps.
33d083395SSteven Rostedt  *
43d083395SSteven Rostedt  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
53d083395SSteven Rostedt  *
63d083395SSteven Rostedt  * Thanks goes to Ingo Molnar, for suggesting the idea.
73d083395SSteven Rostedt  * Mathieu Desnoyers, for suggesting postponing the modifications.
83d083395SSteven Rostedt  * Arjan van de Ven, for keeping me straight, and explaining to me
93d083395SSteven Rostedt  * the dangers of modifying code on the run.
103d083395SSteven Rostedt  */
113d083395SSteven Rostedt 
123d083395SSteven Rostedt #include <linux/spinlock.h>
133d083395SSteven Rostedt #include <linux/hardirq.h>
146f93fc07SSteven Rostedt #include <linux/uaccess.h>
153d083395SSteven Rostedt #include <linux/ftrace.h>
163d083395SSteven Rostedt #include <linux/percpu.h>
173d083395SSteven Rostedt #include <linux/init.h>
183d083395SSteven Rostedt #include <linux/list.h>
193d083395SSteven Rostedt 
20395a59d0SAbhishek Sagar #include <asm/ftrace.h>
21732f3ca7SSteven Rostedt #include <asm/nops.h>
22dfa60abaSSteven Rostedt 
233d083395SSteven Rostedt 
248115f3f0SSteven Rostedt static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
253d083395SSteven Rostedt 
263d083395SSteven Rostedt union ftrace_code_union {
27395a59d0SAbhishek Sagar 	char code[MCOUNT_INSN_SIZE];
283d083395SSteven Rostedt 	struct {
293d083395SSteven Rostedt 		char e8;
303d083395SSteven Rostedt 		int offset;
313d083395SSteven Rostedt 	} __attribute__((packed));
323d083395SSteven Rostedt };
333d083395SSteven Rostedt 
34395a59d0SAbhishek Sagar 
3515adc048SSteven Rostedt static int ftrace_calc_offset(long ip, long addr)
363c1720f0SSteven Rostedt {
373c1720f0SSteven Rostedt 	return (int)(addr - ip);
383d083395SSteven Rostedt }
393d083395SSteven Rostedt 
4015adc048SSteven Rostedt unsigned char *ftrace_nop_replace(void)
413c1720f0SSteven Rostedt {
428115f3f0SSteven Rostedt 	return ftrace_nop;
433c1720f0SSteven Rostedt }
443c1720f0SSteven Rostedt 
4515adc048SSteven Rostedt unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
463c1720f0SSteven Rostedt {
473c1720f0SSteven Rostedt 	static union ftrace_code_union calc;
483c1720f0SSteven Rostedt 
493c1720f0SSteven Rostedt 	calc.e8		= 0xe8;
50395a59d0SAbhishek Sagar 	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
513c1720f0SSteven Rostedt 
523c1720f0SSteven Rostedt 	/*
533c1720f0SSteven Rostedt 	 * No locking needed, this must be called via kstop_machine
543c1720f0SSteven Rostedt 	 * which in essence is like running on a uniprocessor machine.
553c1720f0SSteven Rostedt 	 */
563c1720f0SSteven Rostedt 	return calc.code;
573c1720f0SSteven Rostedt }
583c1720f0SSteven Rostedt 
5917666f02SSteven Rostedt /*
6017666f02SSteven Rostedt  * Modifying code must take extra care. On an SMP machine, if
6117666f02SSteven Rostedt  * the code being modified is also being executed on another CPU
6217666f02SSteven Rostedt  * that CPU will have undefined results and possibly take a GPF.
6317666f02SSteven Rostedt  * We use kstop_machine to stop other CPUS from exectuing code.
6417666f02SSteven Rostedt  * But this does not stop NMIs from happening. We still need
6517666f02SSteven Rostedt  * to protect against that. We separate out the modification of
6617666f02SSteven Rostedt  * the code to take care of this.
6717666f02SSteven Rostedt  *
6817666f02SSteven Rostedt  * Two buffers are added: An IP buffer and a "code" buffer.
6917666f02SSteven Rostedt  *
70*a26a2a27SSteven Rostedt  * 1) Put the instruction pointer into the IP buffer
7117666f02SSteven Rostedt  *    and the new code into the "code" buffer.
7217666f02SSteven Rostedt  * 2) Set a flag that says we are modifying code
7317666f02SSteven Rostedt  * 3) Wait for any running NMIs to finish.
7417666f02SSteven Rostedt  * 4) Write the code
7517666f02SSteven Rostedt  * 5) clear the flag.
7617666f02SSteven Rostedt  * 6) Wait for any running NMIs to finish.
7717666f02SSteven Rostedt  *
7817666f02SSteven Rostedt  * If an NMI is executed, the first thing it does is to call
7917666f02SSteven Rostedt  * "ftrace_nmi_enter". This will check if the flag is set to write
8017666f02SSteven Rostedt  * and if it is, it will write what is in the IP and "code" buffers.
8117666f02SSteven Rostedt  *
8217666f02SSteven Rostedt  * The trick is, it does not matter if everyone is writing the same
8317666f02SSteven Rostedt  * content to the code location. Also, if a CPU is executing code
8417666f02SSteven Rostedt  * it is OK to write to that code location if the contents being written
8517666f02SSteven Rostedt  * are the same as what exists.
8617666f02SSteven Rostedt  */
8717666f02SSteven Rostedt 
88*a26a2a27SSteven Rostedt static atomic_t in_nmi = ATOMIC_INIT(0);
89*a26a2a27SSteven Rostedt static int mod_code_status;		/* holds return value of text write */
90*a26a2a27SSteven Rostedt static int mod_code_write;		/* set when NMI should do the write */
91*a26a2a27SSteven Rostedt static void *mod_code_ip;		/* holds the IP to write to */
92*a26a2a27SSteven Rostedt static void *mod_code_newcode;		/* holds the text to write to the IP */
9317666f02SSteven Rostedt 
94*a26a2a27SSteven Rostedt static unsigned nmi_wait_count;
95*a26a2a27SSteven Rostedt static atomic_t nmi_update_count = ATOMIC_INIT(0);
96b807c3d0SSteven Rostedt 
97b807c3d0SSteven Rostedt int ftrace_arch_read_dyn_info(char *buf, int size)
98b807c3d0SSteven Rostedt {
99b807c3d0SSteven Rostedt 	int r;
100b807c3d0SSteven Rostedt 
101b807c3d0SSteven Rostedt 	r = snprintf(buf, size, "%u %u",
102b807c3d0SSteven Rostedt 		     nmi_wait_count,
103b807c3d0SSteven Rostedt 		     atomic_read(&nmi_update_count));
104b807c3d0SSteven Rostedt 	return r;
105b807c3d0SSteven Rostedt }
106b807c3d0SSteven Rostedt 
10717666f02SSteven Rostedt static void ftrace_mod_code(void)
10817666f02SSteven Rostedt {
10917666f02SSteven Rostedt 	/*
11017666f02SSteven Rostedt 	 * Yes, more than one CPU process can be writing to mod_code_status.
11117666f02SSteven Rostedt 	 *    (and the code itself)
11217666f02SSteven Rostedt 	 * But if one were to fail, then they all should, and if one were
11317666f02SSteven Rostedt 	 * to succeed, then they all should.
11417666f02SSteven Rostedt 	 */
11517666f02SSteven Rostedt 	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
11617666f02SSteven Rostedt 					     MCOUNT_INSN_SIZE);
11717666f02SSteven Rostedt 
11817666f02SSteven Rostedt }
11917666f02SSteven Rostedt 
12017666f02SSteven Rostedt void ftrace_nmi_enter(void)
12117666f02SSteven Rostedt {
12217666f02SSteven Rostedt 	atomic_inc(&in_nmi);
12317666f02SSteven Rostedt 	/* Must have in_nmi seen before reading write flag */
12417666f02SSteven Rostedt 	smp_mb();
125b807c3d0SSteven Rostedt 	if (mod_code_write) {
12617666f02SSteven Rostedt 		ftrace_mod_code();
127b807c3d0SSteven Rostedt 		atomic_inc(&nmi_update_count);
128b807c3d0SSteven Rostedt 	}
12917666f02SSteven Rostedt }
13017666f02SSteven Rostedt 
13117666f02SSteven Rostedt void ftrace_nmi_exit(void)
13217666f02SSteven Rostedt {
13317666f02SSteven Rostedt 	/* Finish all executions before clearing in_nmi */
13417666f02SSteven Rostedt 	smp_wmb();
13517666f02SSteven Rostedt 	atomic_dec(&in_nmi);
13617666f02SSteven Rostedt }
13717666f02SSteven Rostedt 
13817666f02SSteven Rostedt static void wait_for_nmi(void)
13917666f02SSteven Rostedt {
140b807c3d0SSteven Rostedt 	int waited = 0;
141b807c3d0SSteven Rostedt 
142b807c3d0SSteven Rostedt 	while (atomic_read(&in_nmi)) {
143b807c3d0SSteven Rostedt 		waited = 1;
14417666f02SSteven Rostedt 		cpu_relax();
14517666f02SSteven Rostedt 	}
14617666f02SSteven Rostedt 
147b807c3d0SSteven Rostedt 	if (waited)
148b807c3d0SSteven Rostedt 		nmi_wait_count++;
149b807c3d0SSteven Rostedt }
150b807c3d0SSteven Rostedt 
15117666f02SSteven Rostedt static int
15217666f02SSteven Rostedt do_ftrace_mod_code(unsigned long ip, void *new_code)
15317666f02SSteven Rostedt {
15417666f02SSteven Rostedt 	mod_code_ip = (void *)ip;
15517666f02SSteven Rostedt 	mod_code_newcode = new_code;
15617666f02SSteven Rostedt 
15717666f02SSteven Rostedt 	/* The buffers need to be visible before we let NMIs write them */
15817666f02SSteven Rostedt 	smp_wmb();
15917666f02SSteven Rostedt 
16017666f02SSteven Rostedt 	mod_code_write = 1;
16117666f02SSteven Rostedt 
16217666f02SSteven Rostedt 	/* Make sure write bit is visible before we wait on NMIs */
16317666f02SSteven Rostedt 	smp_mb();
16417666f02SSteven Rostedt 
16517666f02SSteven Rostedt 	wait_for_nmi();
16617666f02SSteven Rostedt 
16717666f02SSteven Rostedt 	/* Make sure all running NMIs have finished before we write the code */
16817666f02SSteven Rostedt 	smp_mb();
16917666f02SSteven Rostedt 
17017666f02SSteven Rostedt 	ftrace_mod_code();
17117666f02SSteven Rostedt 
17217666f02SSteven Rostedt 	/* Make sure the write happens before clearing the bit */
17317666f02SSteven Rostedt 	smp_wmb();
17417666f02SSteven Rostedt 
17517666f02SSteven Rostedt 	mod_code_write = 0;
17617666f02SSteven Rostedt 
17717666f02SSteven Rostedt 	/* make sure NMIs see the cleared bit */
17817666f02SSteven Rostedt 	smp_mb();
17917666f02SSteven Rostedt 
18017666f02SSteven Rostedt 	wait_for_nmi();
18117666f02SSteven Rostedt 
18217666f02SSteven Rostedt 	return mod_code_status;
18317666f02SSteven Rostedt }
18417666f02SSteven Rostedt 
18517666f02SSteven Rostedt 
18615adc048SSteven Rostedt int
1873d083395SSteven Rostedt ftrace_modify_code(unsigned long ip, unsigned char *old_code,
1883d083395SSteven Rostedt 		   unsigned char *new_code)
1893d083395SSteven Rostedt {
1906f93fc07SSteven Rostedt 	unsigned char replaced[MCOUNT_INSN_SIZE];
1913d083395SSteven Rostedt 
1923d083395SSteven Rostedt 	/*
1933d083395SSteven Rostedt 	 * Note: Due to modules and __init, code can
1943d083395SSteven Rostedt 	 *  disappear and change, we need to protect against faulting
19576aefee5SSteven Rostedt 	 *  as well as code changing. We do this by using the
196ab9a0918SSteven Rostedt 	 *  probe_kernel_* functions.
1973d083395SSteven Rostedt 	 *
1983d083395SSteven Rostedt 	 * No real locking needed, this code is run through
1996f93fc07SSteven Rostedt 	 * kstop_machine, or before SMP starts.
2003d083395SSteven Rostedt 	 */
20176aefee5SSteven Rostedt 
20276aefee5SSteven Rostedt 	/* read the text we want to modify */
203ab9a0918SSteven Rostedt 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
204593eb8a2SSteven Rostedt 		return -EFAULT;
2056f93fc07SSteven Rostedt 
20676aefee5SSteven Rostedt 	/* Make sure it is what we expect it to be */
2076f93fc07SSteven Rostedt 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
208593eb8a2SSteven Rostedt 		return -EINVAL;
2096f93fc07SSteven Rostedt 
21076aefee5SSteven Rostedt 	/* replace the text with the new text */
21117666f02SSteven Rostedt 	if (do_ftrace_mod_code(ip, new_code))
212593eb8a2SSteven Rostedt 		return -EPERM;
2136f93fc07SSteven Rostedt 
2143d083395SSteven Rostedt 	sync_core();
2153d083395SSteven Rostedt 
2166f93fc07SSteven Rostedt 	return 0;
2173d083395SSteven Rostedt }
2183d083395SSteven Rostedt 
21915adc048SSteven Rostedt int ftrace_update_ftrace_func(ftrace_func_t func)
220d61f82d0SSteven Rostedt {
221d61f82d0SSteven Rostedt 	unsigned long ip = (unsigned long)(&ftrace_call);
222395a59d0SAbhishek Sagar 	unsigned char old[MCOUNT_INSN_SIZE], *new;
223d61f82d0SSteven Rostedt 	int ret;
224d61f82d0SSteven Rostedt 
225395a59d0SAbhishek Sagar 	memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
226d61f82d0SSteven Rostedt 	new = ftrace_call_replace(ip, (unsigned long)func);
227d61f82d0SSteven Rostedt 	ret = ftrace_modify_code(ip, old, new);
228d61f82d0SSteven Rostedt 
229d61f82d0SSteven Rostedt 	return ret;
230d61f82d0SSteven Rostedt }
231d61f82d0SSteven Rostedt 
232d61f82d0SSteven Rostedt int __init ftrace_dyn_arch_init(void *data)
2333d083395SSteven Rostedt {
234732f3ca7SSteven Rostedt 	extern const unsigned char ftrace_test_p6nop[];
235732f3ca7SSteven Rostedt 	extern const unsigned char ftrace_test_nop5[];
236732f3ca7SSteven Rostedt 	extern const unsigned char ftrace_test_jmp[];
237732f3ca7SSteven Rostedt 	int faulted = 0;
2383d083395SSteven Rostedt 
239732f3ca7SSteven Rostedt 	/*
240732f3ca7SSteven Rostedt 	 * There is no good nop for all x86 archs.
241732f3ca7SSteven Rostedt 	 * We will default to using the P6_NOP5, but first we
242732f3ca7SSteven Rostedt 	 * will test to make sure that the nop will actually
243732f3ca7SSteven Rostedt 	 * work on this CPU. If it faults, we will then
244732f3ca7SSteven Rostedt 	 * go to a lesser efficient 5 byte nop. If that fails
245732f3ca7SSteven Rostedt 	 * we then just use a jmp as our nop. This isn't the most
246732f3ca7SSteven Rostedt 	 * efficient nop, but we can not use a multi part nop
247732f3ca7SSteven Rostedt 	 * since we would then risk being preempted in the middle
248732f3ca7SSteven Rostedt 	 * of that nop, and if we enabled tracing then, it might
249732f3ca7SSteven Rostedt 	 * cause a system crash.
250732f3ca7SSteven Rostedt 	 *
251732f3ca7SSteven Rostedt 	 * TODO: check the cpuid to determine the best nop.
252732f3ca7SSteven Rostedt 	 */
253732f3ca7SSteven Rostedt 	asm volatile (
254732f3ca7SSteven Rostedt 		"ftrace_test_jmp:"
255732f3ca7SSteven Rostedt 		"jmp ftrace_test_p6nop\n"
2568b27386aSAnders Kaseorg 		"nop\n"
2578b27386aSAnders Kaseorg 		"nop\n"
2588b27386aSAnders Kaseorg 		"nop\n"  /* 2 byte jmp + 3 bytes */
259732f3ca7SSteven Rostedt 		"ftrace_test_p6nop:"
260732f3ca7SSteven Rostedt 		P6_NOP5
261732f3ca7SSteven Rostedt 		"jmp 1f\n"
262732f3ca7SSteven Rostedt 		"ftrace_test_nop5:"
263732f3ca7SSteven Rostedt 		".byte 0x66,0x66,0x66,0x66,0x90\n"
264732f3ca7SSteven Rostedt 		"1:"
265732f3ca7SSteven Rostedt 		".section .fixup, \"ax\"\n"
266732f3ca7SSteven Rostedt 		"2:	movl $1, %0\n"
267732f3ca7SSteven Rostedt 		"	jmp ftrace_test_nop5\n"
268732f3ca7SSteven Rostedt 		"3:	movl $2, %0\n"
269732f3ca7SSteven Rostedt 		"	jmp 1b\n"
270732f3ca7SSteven Rostedt 		".previous\n"
271732f3ca7SSteven Rostedt 		_ASM_EXTABLE(ftrace_test_p6nop, 2b)
272732f3ca7SSteven Rostedt 		_ASM_EXTABLE(ftrace_test_nop5, 3b)
273732f3ca7SSteven Rostedt 		: "=r"(faulted) : "0" (faulted));
274d61f82d0SSteven Rostedt 
275732f3ca7SSteven Rostedt 	switch (faulted) {
276732f3ca7SSteven Rostedt 	case 0:
277732f3ca7SSteven Rostedt 		pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
2788115f3f0SSteven Rostedt 		memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
279732f3ca7SSteven Rostedt 		break;
280732f3ca7SSteven Rostedt 	case 1:
281732f3ca7SSteven Rostedt 		pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
2828115f3f0SSteven Rostedt 		memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
283732f3ca7SSteven Rostedt 		break;
284732f3ca7SSteven Rostedt 	case 2:
2858b27386aSAnders Kaseorg 		pr_info("ftrace: converting mcount calls to jmp . + 5\n");
2868115f3f0SSteven Rostedt 		memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
287732f3ca7SSteven Rostedt 		break;
288732f3ca7SSteven Rostedt 	}
289d61f82d0SSteven Rostedt 
290732f3ca7SSteven Rostedt 	/* The return code is retured via data */
291732f3ca7SSteven Rostedt 	*(unsigned long *)data = 0;
292dfa60abaSSteven Rostedt 
2933d083395SSteven Rostedt 	return 0;
2943d083395SSteven Rostedt }
295