1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 23 #include <trace/syscall.h> 24 25 #include <asm/cacheflush.h> 26 #include <asm/ftrace.h> 27 #include <asm/nops.h> 28 #include <asm/nmi.h> 29 30 31 #ifdef CONFIG_DYNAMIC_FTRACE 32 33 int ftrace_arch_code_modify_prepare(void) 34 { 35 set_kernel_text_rw(); 36 return 0; 37 } 38 39 int ftrace_arch_code_modify_post_process(void) 40 { 41 set_kernel_text_ro(); 42 return 0; 43 } 44 45 union ftrace_code_union { 46 char code[MCOUNT_INSN_SIZE]; 47 struct { 48 char e8; 49 int offset; 50 } __attribute__((packed)); 51 }; 52 53 static int ftrace_calc_offset(long ip, long addr) 54 { 55 return (int)(addr - ip); 56 } 57 58 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 59 { 60 static union ftrace_code_union calc; 61 62 calc.e8 = 0xe8; 63 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 64 65 /* 66 * No locking needed, this must be called via kstop_machine 67 * which in essence is like running on a uniprocessor machine. 68 */ 69 return calc.code; 70 } 71 72 /* 73 * Modifying code must take extra care. On an SMP machine, if 74 * the code being modified is also being executed on another CPU 75 * that CPU will have undefined results and possibly take a GPF. 76 * We use kstop_machine to stop other CPUS from exectuing code. 77 * But this does not stop NMIs from happening. We still need 78 * to protect against that. We separate out the modification of 79 * the code to take care of this. 80 * 81 * Two buffers are added: An IP buffer and a "code" buffer. 82 * 83 * 1) Put the instruction pointer into the IP buffer 84 * and the new code into the "code" buffer. 85 * 2) Wait for any running NMIs to finish and set a flag that says 86 * we are modifying code, it is done in an atomic operation. 87 * 3) Write the code 88 * 4) clear the flag. 89 * 5) Wait for any running NMIs to finish. 90 * 91 * If an NMI is executed, the first thing it does is to call 92 * "ftrace_nmi_enter". This will check if the flag is set to write 93 * and if it is, it will write what is in the IP and "code" buffers. 94 * 95 * The trick is, it does not matter if everyone is writing the same 96 * content to the code location. Also, if a CPU is executing code 97 * it is OK to write to that code location if the contents being written 98 * are the same as what exists. 99 */ 100 101 #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ 102 static atomic_t nmi_running = ATOMIC_INIT(0); 103 static int mod_code_status; /* holds return value of text write */ 104 static void *mod_code_ip; /* holds the IP to write to */ 105 static void *mod_code_newcode; /* holds the text to write to the IP */ 106 107 static unsigned nmi_wait_count; 108 static atomic_t nmi_update_count = ATOMIC_INIT(0); 109 110 int ftrace_arch_read_dyn_info(char *buf, int size) 111 { 112 int r; 113 114 r = snprintf(buf, size, "%u %u", 115 nmi_wait_count, 116 atomic_read(&nmi_update_count)); 117 return r; 118 } 119 120 static void clear_mod_flag(void) 121 { 122 int old = atomic_read(&nmi_running); 123 124 for (;;) { 125 int new = old & ~MOD_CODE_WRITE_FLAG; 126 127 if (old == new) 128 break; 129 130 old = atomic_cmpxchg(&nmi_running, old, new); 131 } 132 } 133 134 static void ftrace_mod_code(void) 135 { 136 /* 137 * Yes, more than one CPU process can be writing to mod_code_status. 138 * (and the code itself) 139 * But if one were to fail, then they all should, and if one were 140 * to succeed, then they all should. 141 */ 142 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 143 MCOUNT_INSN_SIZE); 144 145 /* if we fail, then kill any new writers */ 146 if (mod_code_status) 147 clear_mod_flag(); 148 } 149 150 void ftrace_nmi_enter(void) 151 { 152 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 153 smp_rmb(); 154 ftrace_mod_code(); 155 atomic_inc(&nmi_update_count); 156 } 157 /* Must have previous changes seen before executions */ 158 smp_mb(); 159 } 160 161 void ftrace_nmi_exit(void) 162 { 163 /* Finish all executions before clearing nmi_running */ 164 smp_mb(); 165 atomic_dec(&nmi_running); 166 } 167 168 static void wait_for_nmi_and_set_mod_flag(void) 169 { 170 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) 171 return; 172 173 do { 174 cpu_relax(); 175 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); 176 177 nmi_wait_count++; 178 } 179 180 static void wait_for_nmi(void) 181 { 182 if (!atomic_read(&nmi_running)) 183 return; 184 185 do { 186 cpu_relax(); 187 } while (atomic_read(&nmi_running)); 188 189 nmi_wait_count++; 190 } 191 192 static inline int 193 within(unsigned long addr, unsigned long start, unsigned long end) 194 { 195 return addr >= start && addr < end; 196 } 197 198 static int 199 do_ftrace_mod_code(unsigned long ip, void *new_code) 200 { 201 /* 202 * On x86_64, kernel text mappings are mapped read-only with 203 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 204 * of the kernel text mapping to modify the kernel text. 205 * 206 * For 32bit kernels, these mappings are same and we can use 207 * kernel identity mapping to modify code. 208 */ 209 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 210 ip = (unsigned long)__va(__pa(ip)); 211 212 mod_code_ip = (void *)ip; 213 mod_code_newcode = new_code; 214 215 /* The buffers need to be visible before we let NMIs write them */ 216 smp_mb(); 217 218 wait_for_nmi_and_set_mod_flag(); 219 220 /* Make sure all running NMIs have finished before we write the code */ 221 smp_mb(); 222 223 ftrace_mod_code(); 224 225 /* Make sure the write happens before clearing the bit */ 226 smp_mb(); 227 228 clear_mod_flag(); 229 wait_for_nmi(); 230 231 return mod_code_status; 232 } 233 234 235 236 237 static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 238 239 static unsigned char *ftrace_nop_replace(void) 240 { 241 return ftrace_nop; 242 } 243 244 static int 245 ftrace_modify_code(unsigned long ip, unsigned char *old_code, 246 unsigned char *new_code) 247 { 248 unsigned char replaced[MCOUNT_INSN_SIZE]; 249 250 /* 251 * Note: Due to modules and __init, code can 252 * disappear and change, we need to protect against faulting 253 * as well as code changing. We do this by using the 254 * probe_kernel_* functions. 255 * 256 * No real locking needed, this code is run through 257 * kstop_machine, or before SMP starts. 258 */ 259 260 /* read the text we want to modify */ 261 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 262 return -EFAULT; 263 264 /* Make sure it is what we expect it to be */ 265 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 266 return -EINVAL; 267 268 /* replace the text with the new text */ 269 if (do_ftrace_mod_code(ip, new_code)) 270 return -EPERM; 271 272 sync_core(); 273 274 return 0; 275 } 276 277 int ftrace_make_nop(struct module *mod, 278 struct dyn_ftrace *rec, unsigned long addr) 279 { 280 unsigned char *new, *old; 281 unsigned long ip = rec->ip; 282 283 old = ftrace_call_replace(ip, addr); 284 new = ftrace_nop_replace(); 285 286 return ftrace_modify_code(rec->ip, old, new); 287 } 288 289 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 290 { 291 unsigned char *new, *old; 292 unsigned long ip = rec->ip; 293 294 old = ftrace_nop_replace(); 295 new = ftrace_call_replace(ip, addr); 296 297 return ftrace_modify_code(rec->ip, old, new); 298 } 299 300 int ftrace_update_ftrace_func(ftrace_func_t func) 301 { 302 unsigned long ip = (unsigned long)(&ftrace_call); 303 unsigned char old[MCOUNT_INSN_SIZE], *new; 304 int ret; 305 306 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 307 new = ftrace_call_replace(ip, (unsigned long)func); 308 ret = ftrace_modify_code(ip, old, new); 309 310 return ret; 311 } 312 313 int __init ftrace_dyn_arch_init(void *data) 314 { 315 extern const unsigned char ftrace_test_p6nop[]; 316 extern const unsigned char ftrace_test_nop5[]; 317 extern const unsigned char ftrace_test_jmp[]; 318 int faulted = 0; 319 320 /* 321 * There is no good nop for all x86 archs. 322 * We will default to using the P6_NOP5, but first we 323 * will test to make sure that the nop will actually 324 * work on this CPU. If it faults, we will then 325 * go to a lesser efficient 5 byte nop. If that fails 326 * we then just use a jmp as our nop. This isn't the most 327 * efficient nop, but we can not use a multi part nop 328 * since we would then risk being preempted in the middle 329 * of that nop, and if we enabled tracing then, it might 330 * cause a system crash. 331 * 332 * TODO: check the cpuid to determine the best nop. 333 */ 334 asm volatile ( 335 "ftrace_test_jmp:" 336 "jmp ftrace_test_p6nop\n" 337 "nop\n" 338 "nop\n" 339 "nop\n" /* 2 byte jmp + 3 bytes */ 340 "ftrace_test_p6nop:" 341 P6_NOP5 342 "jmp 1f\n" 343 "ftrace_test_nop5:" 344 ".byte 0x66,0x66,0x66,0x66,0x90\n" 345 "1:" 346 ".section .fixup, \"ax\"\n" 347 "2: movl $1, %0\n" 348 " jmp ftrace_test_nop5\n" 349 "3: movl $2, %0\n" 350 " jmp 1b\n" 351 ".previous\n" 352 _ASM_EXTABLE(ftrace_test_p6nop, 2b) 353 _ASM_EXTABLE(ftrace_test_nop5, 3b) 354 : "=r"(faulted) : "0" (faulted)); 355 356 switch (faulted) { 357 case 0: 358 pr_info("converting mcount calls to 0f 1f 44 00 00\n"); 359 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); 360 break; 361 case 1: 362 pr_info("converting mcount calls to 66 66 66 66 90\n"); 363 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); 364 break; 365 case 2: 366 pr_info("converting mcount calls to jmp . + 5\n"); 367 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); 368 break; 369 } 370 371 /* The return code is retured via data */ 372 *(unsigned long *)data = 0; 373 374 return 0; 375 } 376 #endif 377 378 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 379 380 #ifdef CONFIG_DYNAMIC_FTRACE 381 extern void ftrace_graph_call(void); 382 383 static int ftrace_mod_jmp(unsigned long ip, 384 int old_offset, int new_offset) 385 { 386 unsigned char code[MCOUNT_INSN_SIZE]; 387 388 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 389 return -EFAULT; 390 391 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 392 return -EINVAL; 393 394 *(int *)(&code[1]) = new_offset; 395 396 if (do_ftrace_mod_code(ip, &code)) 397 return -EPERM; 398 399 return 0; 400 } 401 402 int ftrace_enable_ftrace_graph_caller(void) 403 { 404 unsigned long ip = (unsigned long)(&ftrace_graph_call); 405 int old_offset, new_offset; 406 407 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 408 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 409 410 return ftrace_mod_jmp(ip, old_offset, new_offset); 411 } 412 413 int ftrace_disable_ftrace_graph_caller(void) 414 { 415 unsigned long ip = (unsigned long)(&ftrace_graph_call); 416 int old_offset, new_offset; 417 418 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 419 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 420 421 return ftrace_mod_jmp(ip, old_offset, new_offset); 422 } 423 424 #endif /* !CONFIG_DYNAMIC_FTRACE */ 425 426 /* 427 * Hook the return address and push it in the stack of return addrs 428 * in current thread info. 429 */ 430 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 431 unsigned long frame_pointer) 432 { 433 unsigned long old; 434 int faulted; 435 struct ftrace_graph_ent trace; 436 unsigned long return_hooker = (unsigned long) 437 &return_to_handler; 438 439 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 440 return; 441 442 /* 443 * Protect against fault, even if it shouldn't 444 * happen. This tool is too much intrusive to 445 * ignore such a protection. 446 */ 447 asm volatile( 448 "1: " _ASM_MOV " (%[parent]), %[old]\n" 449 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 450 " movl $0, %[faulted]\n" 451 "3:\n" 452 453 ".section .fixup, \"ax\"\n" 454 "4: movl $1, %[faulted]\n" 455 " jmp 3b\n" 456 ".previous\n" 457 458 _ASM_EXTABLE(1b, 4b) 459 _ASM_EXTABLE(2b, 4b) 460 461 : [old] "=&r" (old), [faulted] "=r" (faulted) 462 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 463 : "memory" 464 ); 465 466 if (unlikely(faulted)) { 467 ftrace_graph_stop(); 468 WARN_ON(1); 469 return; 470 } 471 472 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 473 frame_pointer) == -EBUSY) { 474 *parent = old; 475 return; 476 } 477 478 trace.func = self_addr; 479 480 /* Only trace if the calling function expects to */ 481 if (!ftrace_graph_entry(&trace)) { 482 current->curr_ret_stack--; 483 *parent = old; 484 } 485 } 486 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 487 488 #ifdef CONFIG_FTRACE_SYSCALLS 489 490 extern unsigned long *sys_call_table; 491 492 unsigned long __init arch_syscall_addr(int nr) 493 { 494 return (unsigned long)(&sys_call_table)[nr]; 495 } 496 #endif 497