1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #include <linux/spinlock.h> 13 #include <linux/hardirq.h> 14 #include <linux/uaccess.h> 15 #include <linux/ftrace.h> 16 #include <linux/percpu.h> 17 #include <linux/sched.h> 18 #include <linux/init.h> 19 #include <linux/list.h> 20 21 #include <asm/ftrace.h> 22 #include <linux/ftrace.h> 23 #include <asm/nops.h> 24 #include <asm/nmi.h> 25 26 27 #ifdef CONFIG_DYNAMIC_FTRACE 28 29 union ftrace_code_union { 30 char code[MCOUNT_INSN_SIZE]; 31 struct { 32 char e8; 33 int offset; 34 } __attribute__((packed)); 35 }; 36 37 static int ftrace_calc_offset(long ip, long addr) 38 { 39 return (int)(addr - ip); 40 } 41 42 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 43 { 44 static union ftrace_code_union calc; 45 46 calc.e8 = 0xe8; 47 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 48 49 /* 50 * No locking needed, this must be called via kstop_machine 51 * which in essence is like running on a uniprocessor machine. 52 */ 53 return calc.code; 54 } 55 56 /* 57 * Modifying code must take extra care. On an SMP machine, if 58 * the code being modified is also being executed on another CPU 59 * that CPU will have undefined results and possibly take a GPF. 60 * We use kstop_machine to stop other CPUS from exectuing code. 61 * But this does not stop NMIs from happening. We still need 62 * to protect against that. We separate out the modification of 63 * the code to take care of this. 64 * 65 * Two buffers are added: An IP buffer and a "code" buffer. 66 * 67 * 1) Put the instruction pointer into the IP buffer 68 * and the new code into the "code" buffer. 69 * 2) Set a flag that says we are modifying code 70 * 3) Wait for any running NMIs to finish. 71 * 4) Write the code 72 * 5) clear the flag. 73 * 6) Wait for any running NMIs to finish. 74 * 75 * If an NMI is executed, the first thing it does is to call 76 * "ftrace_nmi_enter". This will check if the flag is set to write 77 * and if it is, it will write what is in the IP and "code" buffers. 78 * 79 * The trick is, it does not matter if everyone is writing the same 80 * content to the code location. Also, if a CPU is executing code 81 * it is OK to write to that code location if the contents being written 82 * are the same as what exists. 83 */ 84 85 static atomic_t in_nmi = ATOMIC_INIT(0); 86 static int mod_code_status; /* holds return value of text write */ 87 static int mod_code_write; /* set when NMI should do the write */ 88 static void *mod_code_ip; /* holds the IP to write to */ 89 static void *mod_code_newcode; /* holds the text to write to the IP */ 90 91 static unsigned nmi_wait_count; 92 static atomic_t nmi_update_count = ATOMIC_INIT(0); 93 94 int ftrace_arch_read_dyn_info(char *buf, int size) 95 { 96 int r; 97 98 r = snprintf(buf, size, "%u %u", 99 nmi_wait_count, 100 atomic_read(&nmi_update_count)); 101 return r; 102 } 103 104 static void ftrace_mod_code(void) 105 { 106 /* 107 * Yes, more than one CPU process can be writing to mod_code_status. 108 * (and the code itself) 109 * But if one were to fail, then they all should, and if one were 110 * to succeed, then they all should. 111 */ 112 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 113 MCOUNT_INSN_SIZE); 114 } 115 116 void ftrace_nmi_enter(void) 117 { 118 atomic_inc(&in_nmi); 119 /* Must have in_nmi seen before reading write flag */ 120 smp_mb(); 121 if (mod_code_write) { 122 ftrace_mod_code(); 123 atomic_inc(&nmi_update_count); 124 } 125 } 126 127 void ftrace_nmi_exit(void) 128 { 129 /* Finish all executions before clearing in_nmi */ 130 smp_wmb(); 131 atomic_dec(&in_nmi); 132 } 133 134 static void wait_for_nmi(void) 135 { 136 int waited = 0; 137 138 while (atomic_read(&in_nmi)) { 139 waited = 1; 140 cpu_relax(); 141 } 142 143 if (waited) 144 nmi_wait_count++; 145 } 146 147 static int 148 do_ftrace_mod_code(unsigned long ip, void *new_code) 149 { 150 mod_code_ip = (void *)ip; 151 mod_code_newcode = new_code; 152 153 /* The buffers need to be visible before we let NMIs write them */ 154 smp_wmb(); 155 156 mod_code_write = 1; 157 158 /* Make sure write bit is visible before we wait on NMIs */ 159 smp_mb(); 160 161 wait_for_nmi(); 162 163 /* Make sure all running NMIs have finished before we write the code */ 164 smp_mb(); 165 166 ftrace_mod_code(); 167 168 /* Make sure the write happens before clearing the bit */ 169 smp_wmb(); 170 171 mod_code_write = 0; 172 173 /* make sure NMIs see the cleared bit */ 174 smp_mb(); 175 176 wait_for_nmi(); 177 178 return mod_code_status; 179 } 180 181 182 183 184 static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 185 186 static unsigned char *ftrace_nop_replace(void) 187 { 188 return ftrace_nop; 189 } 190 191 static int 192 ftrace_modify_code(unsigned long ip, unsigned char *old_code, 193 unsigned char *new_code) 194 { 195 unsigned char replaced[MCOUNT_INSN_SIZE]; 196 197 /* 198 * Note: Due to modules and __init, code can 199 * disappear and change, we need to protect against faulting 200 * as well as code changing. We do this by using the 201 * probe_kernel_* functions. 202 * 203 * No real locking needed, this code is run through 204 * kstop_machine, or before SMP starts. 205 */ 206 207 /* read the text we want to modify */ 208 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 209 return -EFAULT; 210 211 /* Make sure it is what we expect it to be */ 212 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 213 return -EINVAL; 214 215 /* replace the text with the new text */ 216 if (do_ftrace_mod_code(ip, new_code)) 217 return -EPERM; 218 219 sync_core(); 220 221 return 0; 222 } 223 224 int ftrace_make_nop(struct module *mod, 225 struct dyn_ftrace *rec, unsigned long addr) 226 { 227 unsigned char *new, *old; 228 unsigned long ip = rec->ip; 229 230 old = ftrace_call_replace(ip, addr); 231 new = ftrace_nop_replace(); 232 233 return ftrace_modify_code(rec->ip, old, new); 234 } 235 236 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 237 { 238 unsigned char *new, *old; 239 unsigned long ip = rec->ip; 240 241 old = ftrace_nop_replace(); 242 new = ftrace_call_replace(ip, addr); 243 244 return ftrace_modify_code(rec->ip, old, new); 245 } 246 247 int ftrace_update_ftrace_func(ftrace_func_t func) 248 { 249 unsigned long ip = (unsigned long)(&ftrace_call); 250 unsigned char old[MCOUNT_INSN_SIZE], *new; 251 int ret; 252 253 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 254 new = ftrace_call_replace(ip, (unsigned long)func); 255 ret = ftrace_modify_code(ip, old, new); 256 257 return ret; 258 } 259 260 int __init ftrace_dyn_arch_init(void *data) 261 { 262 extern const unsigned char ftrace_test_p6nop[]; 263 extern const unsigned char ftrace_test_nop5[]; 264 extern const unsigned char ftrace_test_jmp[]; 265 int faulted = 0; 266 267 /* 268 * There is no good nop for all x86 archs. 269 * We will default to using the P6_NOP5, but first we 270 * will test to make sure that the nop will actually 271 * work on this CPU. If it faults, we will then 272 * go to a lesser efficient 5 byte nop. If that fails 273 * we then just use a jmp as our nop. This isn't the most 274 * efficient nop, but we can not use a multi part nop 275 * since we would then risk being preempted in the middle 276 * of that nop, and if we enabled tracing then, it might 277 * cause a system crash. 278 * 279 * TODO: check the cpuid to determine the best nop. 280 */ 281 asm volatile ( 282 "ftrace_test_jmp:" 283 "jmp ftrace_test_p6nop\n" 284 "nop\n" 285 "nop\n" 286 "nop\n" /* 2 byte jmp + 3 bytes */ 287 "ftrace_test_p6nop:" 288 P6_NOP5 289 "jmp 1f\n" 290 "ftrace_test_nop5:" 291 ".byte 0x66,0x66,0x66,0x66,0x90\n" 292 "1:" 293 ".section .fixup, \"ax\"\n" 294 "2: movl $1, %0\n" 295 " jmp ftrace_test_nop5\n" 296 "3: movl $2, %0\n" 297 " jmp 1b\n" 298 ".previous\n" 299 _ASM_EXTABLE(ftrace_test_p6nop, 2b) 300 _ASM_EXTABLE(ftrace_test_nop5, 3b) 301 : "=r"(faulted) : "0" (faulted)); 302 303 switch (faulted) { 304 case 0: 305 pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); 306 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); 307 break; 308 case 1: 309 pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); 310 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); 311 break; 312 case 2: 313 pr_info("ftrace: converting mcount calls to jmp . + 5\n"); 314 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); 315 break; 316 } 317 318 /* The return code is retured via data */ 319 *(unsigned long *)data = 0; 320 321 return 0; 322 } 323 #endif 324 325 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 326 327 #ifdef CONFIG_DYNAMIC_FTRACE 328 extern void ftrace_graph_call(void); 329 330 static int ftrace_mod_jmp(unsigned long ip, 331 int old_offset, int new_offset) 332 { 333 unsigned char code[MCOUNT_INSN_SIZE]; 334 335 if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) 336 return -EFAULT; 337 338 if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) 339 return -EINVAL; 340 341 *(int *)(&code[1]) = new_offset; 342 343 if (do_ftrace_mod_code(ip, &code)) 344 return -EPERM; 345 346 return 0; 347 } 348 349 int ftrace_enable_ftrace_graph_caller(void) 350 { 351 unsigned long ip = (unsigned long)(&ftrace_graph_call); 352 int old_offset, new_offset; 353 354 old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 355 new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 356 357 return ftrace_mod_jmp(ip, old_offset, new_offset); 358 } 359 360 int ftrace_disable_ftrace_graph_caller(void) 361 { 362 unsigned long ip = (unsigned long)(&ftrace_graph_call); 363 int old_offset, new_offset; 364 365 old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); 366 new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); 367 368 return ftrace_mod_jmp(ip, old_offset, new_offset); 369 } 370 371 #else /* CONFIG_DYNAMIC_FTRACE */ 372 373 /* 374 * These functions are picked from those used on 375 * this page for dynamic ftrace. They have been 376 * simplified to ignore all traces in NMI context. 377 */ 378 static atomic_t in_nmi; 379 380 void ftrace_nmi_enter(void) 381 { 382 atomic_inc(&in_nmi); 383 } 384 385 void ftrace_nmi_exit(void) 386 { 387 atomic_dec(&in_nmi); 388 } 389 390 #endif /* !CONFIG_DYNAMIC_FTRACE */ 391 392 /* 393 * Hook the return address and push it in the stack of return addrs 394 * in current thread info. 395 */ 396 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 397 { 398 unsigned long old; 399 unsigned long long calltime; 400 int faulted; 401 struct ftrace_graph_ent trace; 402 unsigned long return_hooker = (unsigned long) 403 &return_to_handler; 404 405 /* Nmi's are currently unsupported */ 406 if (unlikely(atomic_read(&in_nmi))) 407 return; 408 409 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 410 return; 411 412 /* 413 * Protect against fault, even if it shouldn't 414 * happen. This tool is too much intrusive to 415 * ignore such a protection. 416 */ 417 asm volatile( 418 "1: " _ASM_MOV " (%[parent]), %[old]\n" 419 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 420 " movl $0, %[faulted]\n" 421 "3:\n" 422 423 ".section .fixup, \"ax\"\n" 424 "4: movl $1, %[faulted]\n" 425 " jmp 3b\n" 426 ".previous\n" 427 428 _ASM_EXTABLE(1b, 4b) 429 _ASM_EXTABLE(2b, 4b) 430 431 : [old] "=r" (old), [faulted] "=r" (faulted) 432 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 433 : "memory" 434 ); 435 436 if (unlikely(faulted)) { 437 ftrace_graph_stop(); 438 WARN_ON(1); 439 return; 440 } 441 442 if (unlikely(!__kernel_text_address(old))) { 443 ftrace_graph_stop(); 444 *parent = old; 445 WARN_ON(1); 446 return; 447 } 448 449 calltime = cpu_clock(raw_smp_processor_id()); 450 451 if (ftrace_push_return_trace(old, calltime, 452 self_addr, &trace.depth) == -EBUSY) { 453 *parent = old; 454 return; 455 } 456 457 trace.func = self_addr; 458 459 /* Only trace if the calling function expects to */ 460 if (!ftrace_graph_entry(&trace)) { 461 current->curr_ret_stack--; 462 *parent = old; 463 } 464 } 465 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 466