1 /* 2 * Copyright (C) 2008 Matt Fleming <matt@console-pimps.org> 3 * Copyright (C) 2008 Paul Mundt <lethal@linux-sh.org> 4 * 5 * Code for replacing ftrace calls with jumps. 6 * 7 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 8 * 9 * Thanks goes to Ingo Molnar, for suggesting the idea. 10 * Mathieu Desnoyers, for suggesting postponing the modifications. 11 * Arjan van de Ven, for keeping me straight, and explaining to me 12 * the dangers of modifying code on the run. 13 */ 14 #include <linux/uaccess.h> 15 #include <linux/ftrace.h> 16 #include <linux/string.h> 17 #include <linux/init.h> 18 #include <linux/io.h> 19 #include <linux/kernel.h> 20 #include <asm/ftrace.h> 21 #include <asm/cacheflush.h> 22 #include <asm/unistd.h> 23 #include <trace/syscall.h> 24 25 #ifdef CONFIG_DYNAMIC_FTRACE 26 static unsigned char ftrace_replaced_code[MCOUNT_INSN_SIZE]; 27 28 static unsigned char ftrace_nop[4]; 29 /* 30 * If we're trying to nop out a call to a function, we instead 31 * place a call to the address after the memory table. 32 * 33 * 8c011060 <a>: 34 * 8c011060: 02 d1 mov.l 8c01106c <a+0xc>,r1 35 * 8c011062: 22 4f sts.l pr,@-r15 36 * 8c011064: 02 c7 mova 8c011070 <a+0x10>,r0 37 * 8c011066: 2b 41 jmp @r1 38 * 8c011068: 2a 40 lds r0,pr 39 * 8c01106a: 09 00 nop 40 * 8c01106c: 68 24 .word 0x2468 <--- ip 41 * 8c01106e: 1d 8c .word 0x8c1d 42 * 8c011070: 26 4f lds.l @r15+,pr <--- ip + MCOUNT_INSN_SIZE 43 * 44 * We write 0x8c011070 to 0x8c01106c so that on entry to a() we branch 45 * past the _mcount call and continue executing code like normal. 46 */ 47 static unsigned char *ftrace_nop_replace(unsigned long ip) 48 { 49 __raw_writel(ip + MCOUNT_INSN_SIZE, ftrace_nop); 50 return ftrace_nop; 51 } 52 53 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 54 { 55 /* Place the address in the memory table. */ 56 __raw_writel(addr, ftrace_replaced_code); 57 58 /* 59 * No locking needed, this must be called via kstop_machine 60 * which in essence is like running on a uniprocessor machine. 61 */ 62 return ftrace_replaced_code; 63 } 64 65 /* 66 * Modifying code must take extra care. On an SMP machine, if 67 * the code being modified is also being executed on another CPU 68 * that CPU will have undefined results and possibly take a GPF. 69 * We use kstop_machine to stop other CPUS from exectuing code. 70 * But this does not stop NMIs from happening. We still need 71 * to protect against that. We separate out the modification of 72 * the code to take care of this. 73 * 74 * Two buffers are added: An IP buffer and a "code" buffer. 75 * 76 * 1) Put the instruction pointer into the IP buffer 77 * and the new code into the "code" buffer. 78 * 2) Wait for any running NMIs to finish and set a flag that says 79 * we are modifying code, it is done in an atomic operation. 80 * 3) Write the code 81 * 4) clear the flag. 82 * 5) Wait for any running NMIs to finish. 83 * 84 * If an NMI is executed, the first thing it does is to call 85 * "ftrace_nmi_enter". This will check if the flag is set to write 86 * and if it is, it will write what is in the IP and "code" buffers. 87 * 88 * The trick is, it does not matter if everyone is writing the same 89 * content to the code location. Also, if a CPU is executing code 90 * it is OK to write to that code location if the contents being written 91 * are the same as what exists. 92 */ 93 #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ 94 static atomic_t nmi_running = ATOMIC_INIT(0); 95 static int mod_code_status; /* holds return value of text write */ 96 static void *mod_code_ip; /* holds the IP to write to */ 97 static void *mod_code_newcode; /* holds the text to write to the IP */ 98 99 static void clear_mod_flag(void) 100 { 101 int old = atomic_read(&nmi_running); 102 103 for (;;) { 104 int new = old & ~MOD_CODE_WRITE_FLAG; 105 106 if (old == new) 107 break; 108 109 old = atomic_cmpxchg(&nmi_running, old, new); 110 } 111 } 112 113 static void ftrace_mod_code(void) 114 { 115 /* 116 * Yes, more than one CPU process can be writing to mod_code_status. 117 * (and the code itself) 118 * But if one were to fail, then they all should, and if one were 119 * to succeed, then they all should. 120 */ 121 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 122 MCOUNT_INSN_SIZE); 123 124 /* if we fail, then kill any new writers */ 125 if (mod_code_status) 126 clear_mod_flag(); 127 } 128 129 void arch_ftrace_nmi_enter(void) 130 { 131 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 132 smp_rmb(); 133 ftrace_mod_code(); 134 } 135 /* Must have previous changes seen before executions */ 136 smp_mb(); 137 } 138 139 void arch_ftrace_nmi_exit(void) 140 { 141 /* Finish all executions before clearing nmi_running */ 142 smp_mb(); 143 atomic_dec(&nmi_running); 144 } 145 146 static void wait_for_nmi_and_set_mod_flag(void) 147 { 148 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) 149 return; 150 151 do { 152 cpu_relax(); 153 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); 154 } 155 156 static void wait_for_nmi(void) 157 { 158 if (!atomic_read(&nmi_running)) 159 return; 160 161 do { 162 cpu_relax(); 163 } while (atomic_read(&nmi_running)); 164 } 165 166 static int 167 do_ftrace_mod_code(unsigned long ip, void *new_code) 168 { 169 mod_code_ip = (void *)ip; 170 mod_code_newcode = new_code; 171 172 /* The buffers need to be visible before we let NMIs write them */ 173 smp_mb(); 174 175 wait_for_nmi_and_set_mod_flag(); 176 177 /* Make sure all running NMIs have finished before we write the code */ 178 smp_mb(); 179 180 ftrace_mod_code(); 181 182 /* Make sure the write happens before clearing the bit */ 183 smp_mb(); 184 185 clear_mod_flag(); 186 wait_for_nmi(); 187 188 return mod_code_status; 189 } 190 191 static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, 192 unsigned char *new_code) 193 { 194 unsigned char replaced[MCOUNT_INSN_SIZE]; 195 196 /* 197 * Note: 198 * We are paranoid about modifying text, as if a bug was to happen, it 199 * could cause us to read or write to someplace that could cause harm. 200 * Carefully read and modify the code with probe_kernel_*(), and make 201 * sure what we read is what we expected it to be before modifying it. 202 */ 203 204 /* read the text we want to modify */ 205 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 206 return -EFAULT; 207 208 /* Make sure it is what we expect it to be */ 209 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 210 return -EINVAL; 211 212 /* replace the text with the new text */ 213 if (do_ftrace_mod_code(ip, new_code)) 214 return -EPERM; 215 216 flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); 217 218 return 0; 219 } 220 221 int ftrace_update_ftrace_func(ftrace_func_t func) 222 { 223 unsigned long ip = (unsigned long)(&ftrace_call) + MCOUNT_INSN_OFFSET; 224 unsigned char old[MCOUNT_INSN_SIZE], *new; 225 226 memcpy(old, (unsigned char *)ip, MCOUNT_INSN_SIZE); 227 new = ftrace_call_replace(ip, (unsigned long)func); 228 229 return ftrace_modify_code(ip, old, new); 230 } 231 232 int ftrace_make_nop(struct module *mod, 233 struct dyn_ftrace *rec, unsigned long addr) 234 { 235 unsigned char *new, *old; 236 unsigned long ip = rec->ip; 237 238 old = ftrace_call_replace(ip, addr); 239 new = ftrace_nop_replace(ip); 240 241 return ftrace_modify_code(rec->ip, old, new); 242 } 243 244 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 245 { 246 unsigned char *new, *old; 247 unsigned long ip = rec->ip; 248 249 old = ftrace_nop_replace(ip); 250 new = ftrace_call_replace(ip, addr); 251 252 return ftrace_modify_code(rec->ip, old, new); 253 } 254 255 int __init ftrace_dyn_arch_init(void) 256 { 257 return 0; 258 } 259 #endif /* CONFIG_DYNAMIC_FTRACE */ 260 261 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 262 #ifdef CONFIG_DYNAMIC_FTRACE 263 extern void ftrace_graph_call(void); 264 265 static int ftrace_mod(unsigned long ip, unsigned long old_addr, 266 unsigned long new_addr) 267 { 268 unsigned char code[MCOUNT_INSN_SIZE]; 269 270 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 271 return -EFAULT; 272 273 if (old_addr != __raw_readl((unsigned long *)code)) 274 return -EINVAL; 275 276 __raw_writel(new_addr, ip); 277 return 0; 278 } 279 280 int ftrace_enable_ftrace_graph_caller(void) 281 { 282 unsigned long ip, old_addr, new_addr; 283 284 ip = (unsigned long)(&ftrace_graph_call) + GRAPH_INSN_OFFSET; 285 old_addr = (unsigned long)(&skip_trace); 286 new_addr = (unsigned long)(&ftrace_graph_caller); 287 288 return ftrace_mod(ip, old_addr, new_addr); 289 } 290 291 int ftrace_disable_ftrace_graph_caller(void) 292 { 293 unsigned long ip, old_addr, new_addr; 294 295 ip = (unsigned long)(&ftrace_graph_call) + GRAPH_INSN_OFFSET; 296 old_addr = (unsigned long)(&ftrace_graph_caller); 297 new_addr = (unsigned long)(&skip_trace); 298 299 return ftrace_mod(ip, old_addr, new_addr); 300 } 301 #endif /* CONFIG_DYNAMIC_FTRACE */ 302 303 /* 304 * Hook the return address and push it in the stack of return addrs 305 * in the current thread info. 306 * 307 * This is the main routine for the function graph tracer. The function 308 * graph tracer essentially works like this: 309 * 310 * parent is the stack address containing self_addr's return address. 311 * We pull the real return address out of parent and store it in 312 * current's ret_stack. Then, we replace the return address on the stack 313 * with the address of return_to_handler. self_addr is the function that 314 * called mcount. 315 * 316 * When self_addr returns, it will jump to return_to_handler which calls 317 * ftrace_return_to_handler. ftrace_return_to_handler will pull the real 318 * return address off of current's ret_stack and jump to it. 319 */ 320 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 321 { 322 unsigned long old; 323 int faulted, err; 324 struct ftrace_graph_ent trace; 325 unsigned long return_hooker = (unsigned long)&return_to_handler; 326 327 if (unlikely(ftrace_graph_is_dead())) 328 return; 329 330 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 331 return; 332 333 /* 334 * Protect against fault, even if it shouldn't 335 * happen. This tool is too much intrusive to 336 * ignore such a protection. 337 */ 338 __asm__ __volatile__( 339 "1: \n\t" 340 "mov.l @%2, %0 \n\t" 341 "2: \n\t" 342 "mov.l %3, @%2 \n\t" 343 "mov #0, %1 \n\t" 344 "3: \n\t" 345 ".section .fixup, \"ax\" \n\t" 346 "4: \n\t" 347 "mov.l 5f, %0 \n\t" 348 "jmp @%0 \n\t" 349 " mov #1, %1 \n\t" 350 ".balign 4 \n\t" 351 "5: .long 3b \n\t" 352 ".previous \n\t" 353 ".section __ex_table,\"a\" \n\t" 354 ".long 1b, 4b \n\t" 355 ".long 2b, 4b \n\t" 356 ".previous \n\t" 357 : "=&r" (old), "=r" (faulted) 358 : "r" (parent), "r" (return_hooker) 359 ); 360 361 if (unlikely(faulted)) { 362 ftrace_graph_stop(); 363 WARN_ON(1); 364 return; 365 } 366 367 err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); 368 if (err == -EBUSY) { 369 __raw_writel(old, parent); 370 return; 371 } 372 373 trace.func = self_addr; 374 375 /* Only trace if the calling function expects to */ 376 if (!ftrace_graph_entry(&trace)) { 377 current->curr_ret_stack--; 378 __raw_writel(old, parent); 379 } 380 } 381 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 382