1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 #include <linux/module.h> 23 24 #include <trace/syscall.h> 25 26 #include <asm/cacheflush.h> 27 #include <asm/ftrace.h> 28 #include <asm/nops.h> 29 #include <asm/nmi.h> 30 31 32 #ifdef CONFIG_DYNAMIC_FTRACE 33 34 /* 35 * modifying_code is set to notify NMIs that they need to use 36 * memory barriers when entering or exiting. But we don't want 37 * to burden NMIs with unnecessary memory barriers when code 38 * modification is not being done (which is most of the time). 39 * 40 * A mutex is already held when ftrace_arch_code_modify_prepare 41 * and post_process are called. No locks need to be taken here. 42 * 43 * Stop machine will make sure currently running NMIs are done 44 * and new NMIs will see the updated variable before we need 45 * to worry about NMIs doing memory barriers. 46 */ 47 static int modifying_code __read_mostly; 48 static DEFINE_PER_CPU(int, save_modifying_code); 49 50 int ftrace_arch_code_modify_prepare(void) 51 { 52 set_kernel_text_rw(); 53 set_all_modules_text_rw(); 54 modifying_code = 1; 55 return 0; 56 } 57 58 int ftrace_arch_code_modify_post_process(void) 59 { 60 modifying_code = 0; 61 set_all_modules_text_ro(); 62 set_kernel_text_ro(); 63 return 0; 64 } 65 66 union ftrace_code_union { 67 char code[MCOUNT_INSN_SIZE]; 68 struct { 69 char e8; 70 int offset; 71 } __attribute__((packed)); 72 }; 73 74 static int ftrace_calc_offset(long ip, long addr) 75 { 76 return (int)(addr - ip); 77 } 78 79 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 80 { 81 static union ftrace_code_union calc; 82 83 calc.e8 = 0xe8; 84 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 85 86 /* 87 * No locking needed, this must be called via kstop_machine 88 * which in essence is like running on a uniprocessor machine. 89 */ 90 return calc.code; 91 } 92 93 /* 94 * Modifying code must take extra care. On an SMP machine, if 95 * the code being modified is also being executed on another CPU 96 * that CPU will have undefined results and possibly take a GPF. 97 * We use kstop_machine to stop other CPUS from exectuing code. 98 * But this does not stop NMIs from happening. We still need 99 * to protect against that. We separate out the modification of 100 * the code to take care of this. 101 * 102 * Two buffers are added: An IP buffer and a "code" buffer. 103 * 104 * 1) Put the instruction pointer into the IP buffer 105 * and the new code into the "code" buffer. 106 * 2) Wait for any running NMIs to finish and set a flag that says 107 * we are modifying code, it is done in an atomic operation. 108 * 3) Write the code 109 * 4) clear the flag. 110 * 5) Wait for any running NMIs to finish. 111 * 112 * If an NMI is executed, the first thing it does is to call 113 * "ftrace_nmi_enter". This will check if the flag is set to write 114 * and if it is, it will write what is in the IP and "code" buffers. 115 * 116 * The trick is, it does not matter if everyone is writing the same 117 * content to the code location. Also, if a CPU is executing code 118 * it is OK to write to that code location if the contents being written 119 * are the same as what exists. 120 */ 121 122 #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ 123 static atomic_t nmi_running = ATOMIC_INIT(0); 124 static int mod_code_status; /* holds return value of text write */ 125 static void *mod_code_ip; /* holds the IP to write to */ 126 static const void *mod_code_newcode; /* holds the text to write to the IP */ 127 128 static unsigned nmi_wait_count; 129 static atomic_t nmi_update_count = ATOMIC_INIT(0); 130 131 int ftrace_arch_read_dyn_info(char *buf, int size) 132 { 133 int r; 134 135 r = snprintf(buf, size, "%u %u", 136 nmi_wait_count, 137 atomic_read(&nmi_update_count)); 138 return r; 139 } 140 141 static void clear_mod_flag(void) 142 { 143 int old = atomic_read(&nmi_running); 144 145 for (;;) { 146 int new = old & ~MOD_CODE_WRITE_FLAG; 147 148 if (old == new) 149 break; 150 151 old = atomic_cmpxchg(&nmi_running, old, new); 152 } 153 } 154 155 static void ftrace_mod_code(void) 156 { 157 /* 158 * Yes, more than one CPU process can be writing to mod_code_status. 159 * (and the code itself) 160 * But if one were to fail, then they all should, and if one were 161 * to succeed, then they all should. 162 */ 163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 164 MCOUNT_INSN_SIZE); 165 166 /* if we fail, then kill any new writers */ 167 if (mod_code_status) 168 clear_mod_flag(); 169 } 170 171 void ftrace_nmi_enter(void) 172 { 173 __this_cpu_write(save_modifying_code, modifying_code); 174 175 if (!__this_cpu_read(save_modifying_code)) 176 return; 177 178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 179 smp_rmb(); 180 ftrace_mod_code(); 181 atomic_inc(&nmi_update_count); 182 } 183 /* Must have previous changes seen before executions */ 184 smp_mb(); 185 } 186 187 void ftrace_nmi_exit(void) 188 { 189 if (!__this_cpu_read(save_modifying_code)) 190 return; 191 192 /* Finish all executions before clearing nmi_running */ 193 smp_mb(); 194 atomic_dec(&nmi_running); 195 } 196 197 static void wait_for_nmi_and_set_mod_flag(void) 198 { 199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) 200 return; 201 202 do { 203 cpu_relax(); 204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); 205 206 nmi_wait_count++; 207 } 208 209 static void wait_for_nmi(void) 210 { 211 if (!atomic_read(&nmi_running)) 212 return; 213 214 do { 215 cpu_relax(); 216 } while (atomic_read(&nmi_running)); 217 218 nmi_wait_count++; 219 } 220 221 static inline int 222 within(unsigned long addr, unsigned long start, unsigned long end) 223 { 224 return addr >= start && addr < end; 225 } 226 227 static int 228 do_ftrace_mod_code(unsigned long ip, const void *new_code) 229 { 230 /* 231 * On x86_64, kernel text mappings are mapped read-only with 232 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 233 * of the kernel text mapping to modify the kernel text. 234 * 235 * For 32bit kernels, these mappings are same and we can use 236 * kernel identity mapping to modify code. 237 */ 238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 239 ip = (unsigned long)__va(__pa(ip)); 240 241 mod_code_ip = (void *)ip; 242 mod_code_newcode = new_code; 243 244 /* The buffers need to be visible before we let NMIs write them */ 245 smp_mb(); 246 247 wait_for_nmi_and_set_mod_flag(); 248 249 /* Make sure all running NMIs have finished before we write the code */ 250 smp_mb(); 251 252 ftrace_mod_code(); 253 254 /* Make sure the write happens before clearing the bit */ 255 smp_mb(); 256 257 clear_mod_flag(); 258 wait_for_nmi(); 259 260 return mod_code_status; 261 } 262 263 static const unsigned char *ftrace_nop_replace(void) 264 { 265 return ideal_nops[NOP_ATOMIC5]; 266 } 267 268 static int 269 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 270 unsigned const char *new_code) 271 { 272 unsigned char replaced[MCOUNT_INSN_SIZE]; 273 274 /* 275 * Note: Due to modules and __init, code can 276 * disappear and change, we need to protect against faulting 277 * as well as code changing. We do this by using the 278 * probe_kernel_* functions. 279 * 280 * No real locking needed, this code is run through 281 * kstop_machine, or before SMP starts. 282 */ 283 284 /* read the text we want to modify */ 285 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 286 return -EFAULT; 287 288 /* Make sure it is what we expect it to be */ 289 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 290 return -EINVAL; 291 292 /* replace the text with the new text */ 293 if (do_ftrace_mod_code(ip, new_code)) 294 return -EPERM; 295 296 sync_core(); 297 298 return 0; 299 } 300 301 int ftrace_make_nop(struct module *mod, 302 struct dyn_ftrace *rec, unsigned long addr) 303 { 304 unsigned const char *new, *old; 305 unsigned long ip = rec->ip; 306 307 old = ftrace_call_replace(ip, addr); 308 new = ftrace_nop_replace(); 309 310 return ftrace_modify_code(rec->ip, old, new); 311 } 312 313 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 314 { 315 unsigned const char *new, *old; 316 unsigned long ip = rec->ip; 317 318 old = ftrace_nop_replace(); 319 new = ftrace_call_replace(ip, addr); 320 321 return ftrace_modify_code(rec->ip, old, new); 322 } 323 324 int ftrace_update_ftrace_func(ftrace_func_t func) 325 { 326 unsigned long ip = (unsigned long)(&ftrace_call); 327 unsigned char old[MCOUNT_INSN_SIZE], *new; 328 int ret; 329 330 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 331 new = ftrace_call_replace(ip, (unsigned long)func); 332 ret = ftrace_modify_code(ip, old, new); 333 334 return ret; 335 } 336 337 int __init ftrace_dyn_arch_init(void *data) 338 { 339 /* The return code is retured via data */ 340 *(unsigned long *)data = 0; 341 342 return 0; 343 } 344 #endif 345 346 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 347 348 #ifdef CONFIG_DYNAMIC_FTRACE 349 extern void ftrace_graph_call(void); 350 351 static int ftrace_mod_jmp(unsigned long ip, 352 int old_offset, int new_offset) 353 { 354 unsigned char code[MCOUNT_INSN_SIZE]; 355 356 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 357 return -EFAULT; 358 359 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 360 return -EINVAL; 361 362 *(int *)(&code[1]) = new_offset; 363 364 if (do_ftrace_mod_code(ip, &code)) 365 return -EPERM; 366 367 return 0; 368 } 369 370 int ftrace_enable_ftrace_graph_caller(void) 371 { 372 unsigned long ip = (unsigned long)(&ftrace_graph_call); 373 int old_offset, new_offset; 374 375 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 376 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 377 378 return ftrace_mod_jmp(ip, old_offset, new_offset); 379 } 380 381 int ftrace_disable_ftrace_graph_caller(void) 382 { 383 unsigned long ip = (unsigned long)(&ftrace_graph_call); 384 int old_offset, new_offset; 385 386 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 387 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 388 389 return ftrace_mod_jmp(ip, old_offset, new_offset); 390 } 391 392 #endif /* !CONFIG_DYNAMIC_FTRACE */ 393 394 /* 395 * Hook the return address and push it in the stack of return addrs 396 * in current thread info. 397 */ 398 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 399 unsigned long frame_pointer) 400 { 401 unsigned long old; 402 int faulted; 403 struct ftrace_graph_ent trace; 404 unsigned long return_hooker = (unsigned long) 405 &return_to_handler; 406 407 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 408 return; 409 410 /* 411 * Protect against fault, even if it shouldn't 412 * happen. This tool is too much intrusive to 413 * ignore such a protection. 414 */ 415 asm volatile( 416 "1: " _ASM_MOV " (%[parent]), %[old]\n" 417 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 418 " movl $0, %[faulted]\n" 419 "3:\n" 420 421 ".section .fixup, \"ax\"\n" 422 "4: movl $1, %[faulted]\n" 423 " jmp 3b\n" 424 ".previous\n" 425 426 _ASM_EXTABLE(1b, 4b) 427 _ASM_EXTABLE(2b, 4b) 428 429 : [old] "=&r" (old), [faulted] "=r" (faulted) 430 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 431 : "memory" 432 ); 433 434 if (unlikely(faulted)) { 435 ftrace_graph_stop(); 436 WARN_ON(1); 437 return; 438 } 439 440 trace.func = self_addr; 441 trace.depth = current->curr_ret_stack + 1; 442 443 /* Only trace if the calling function expects to */ 444 if (!ftrace_graph_entry(&trace)) { 445 *parent = old; 446 return; 447 } 448 449 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 450 frame_pointer) == -EBUSY) { 451 *parent = old; 452 return; 453 } 454 } 455 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 456