1 /* 2 * Code for replacing ftrace calls with jumps. 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * 6 * Thanks goes to Ingo Molnar, for suggesting the idea. 7 * Mathieu Desnoyers, for suggesting postponing the modifications. 8 * Arjan van de Ven, for keeping me straight, and explaining to me 9 * the dangers of modifying code on the run. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/spinlock.h> 15 #include <linux/hardirq.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/percpu.h> 19 #include <linux/sched.h> 20 #include <linux/init.h> 21 #include <linux/list.h> 22 #include <linux/module.h> 23 24 #include <trace/syscall.h> 25 26 #include <asm/cacheflush.h> 27 #include <asm/kprobes.h> 28 #include <asm/ftrace.h> 29 #include <asm/nops.h> 30 31 #ifdef CONFIG_DYNAMIC_FTRACE 32 33 int ftrace_arch_code_modify_prepare(void) 34 { 35 set_kernel_text_rw(); 36 set_all_modules_text_rw(); 37 return 0; 38 } 39 40 int ftrace_arch_code_modify_post_process(void) 41 { 42 set_all_modules_text_ro(); 43 set_kernel_text_ro(); 44 return 0; 45 } 46 47 union ftrace_code_union { 48 char code[MCOUNT_INSN_SIZE]; 49 struct { 50 char e8; 51 int offset; 52 } __attribute__((packed)); 53 }; 54 55 static int ftrace_calc_offset(long ip, long addr) 56 { 57 return (int)(addr - ip); 58 } 59 60 static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 61 { 62 static union ftrace_code_union calc; 63 64 calc.e8 = 0xe8; 65 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 66 67 /* 68 * No locking needed, this must be called via kstop_machine 69 * which in essence is like running on a uniprocessor machine. 70 */ 71 return calc.code; 72 } 73 74 static inline int 75 within(unsigned long addr, unsigned long start, unsigned long end) 76 { 77 return addr >= start && addr < end; 78 } 79 80 static unsigned long text_ip_addr(unsigned long ip) 81 { 82 /* 83 * On x86_64, kernel text mappings are mapped read-only with 84 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 85 * of the kernel text mapping to modify the kernel text. 86 * 87 * For 32bit kernels, these mappings are same and we can use 88 * kernel identity mapping to modify code. 89 */ 90 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 ip = (unsigned long)__va(__pa_symbol(ip)); 92 93 return ip; 94 } 95 96 static const unsigned char *ftrace_nop_replace(void) 97 { 98 return ideal_nops[NOP_ATOMIC5]; 99 } 100 101 static int 102 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 103 unsigned const char *new_code) 104 { 105 unsigned char replaced[MCOUNT_INSN_SIZE]; 106 107 /* 108 * Note: Due to modules and __init, code can 109 * disappear and change, we need to protect against faulting 110 * as well as code changing. We do this by using the 111 * probe_kernel_* functions. 112 * 113 * No real locking needed, this code is run through 114 * kstop_machine, or before SMP starts. 115 */ 116 117 /* read the text we want to modify */ 118 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 119 return -EFAULT; 120 121 /* Make sure it is what we expect it to be */ 122 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 123 return -EINVAL; 124 125 ip = text_ip_addr(ip); 126 127 /* replace the text with the new text */ 128 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 129 return -EPERM; 130 131 sync_core(); 132 133 return 0; 134 } 135 136 int ftrace_make_nop(struct module *mod, 137 struct dyn_ftrace *rec, unsigned long addr) 138 { 139 unsigned const char *new, *old; 140 unsigned long ip = rec->ip; 141 142 old = ftrace_call_replace(ip, addr); 143 new = ftrace_nop_replace(); 144 145 /* 146 * On boot up, and when modules are loaded, the MCOUNT_ADDR 147 * is converted to a nop, and will never become MCOUNT_ADDR 148 * again. This code is either running before SMP (on boot up) 149 * or before the code will ever be executed (module load). 150 * We do not want to use the breakpoint version in this case, 151 * just modify the code directly. 152 */ 153 if (addr == MCOUNT_ADDR) 154 return ftrace_modify_code_direct(rec->ip, old, new); 155 156 /* Normal cases use add_brk_on_nop */ 157 WARN_ONCE(1, "invalid use of ftrace_make_nop"); 158 return -EINVAL; 159 } 160 161 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 162 { 163 unsigned const char *new, *old; 164 unsigned long ip = rec->ip; 165 166 old = ftrace_nop_replace(); 167 new = ftrace_call_replace(ip, addr); 168 169 /* Should only be called when module is loaded */ 170 return ftrace_modify_code_direct(rec->ip, old, new); 171 } 172 173 /* 174 * The modifying_ftrace_code is used to tell the breakpoint 175 * handler to call ftrace_int3_handler(). If it fails to 176 * call this handler for a breakpoint added by ftrace, then 177 * the kernel may crash. 178 * 179 * As atomic_writes on x86 do not need a barrier, we do not 180 * need to add smp_mb()s for this to work. It is also considered 181 * that we can not read the modifying_ftrace_code before 182 * executing the breakpoint. That would be quite remarkable if 183 * it could do that. Here's the flow that is required: 184 * 185 * CPU-0 CPU-1 186 * 187 * atomic_inc(mfc); 188 * write int3s 189 * <trap-int3> // implicit (r)mb 190 * if (atomic_read(mfc)) 191 * call ftrace_int3_handler() 192 * 193 * Then when we are finished: 194 * 195 * atomic_dec(mfc); 196 * 197 * If we hit a breakpoint that was not set by ftrace, it does not 198 * matter if ftrace_int3_handler() is called or not. It will 199 * simply be ignored. But it is crucial that a ftrace nop/caller 200 * breakpoint is handled. No other user should ever place a 201 * breakpoint on an ftrace nop/caller location. It must only 202 * be done by this code. 203 */ 204 atomic_t modifying_ftrace_code __read_mostly; 205 206 static int 207 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 208 unsigned const char *new_code); 209 210 /* 211 * Should never be called: 212 * As it is only called by __ftrace_replace_code() which is called by 213 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() 214 * which is called to turn mcount into nops or nops into function calls 215 * but not to convert a function from not using regs to one that uses 216 * regs, which ftrace_modify_call() is for. 217 */ 218 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, 219 unsigned long addr) 220 { 221 WARN_ON(1); 222 return -EINVAL; 223 } 224 225 static unsigned long ftrace_update_func; 226 227 static int update_ftrace_func(unsigned long ip, void *new) 228 { 229 unsigned char old[MCOUNT_INSN_SIZE]; 230 int ret; 231 232 memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); 233 234 ftrace_update_func = ip; 235 /* Make sure the breakpoints see the ftrace_update_func update */ 236 smp_wmb(); 237 238 /* See comment above by declaration of modifying_ftrace_code */ 239 atomic_inc(&modifying_ftrace_code); 240 241 ret = ftrace_modify_code(ip, old, new); 242 243 atomic_dec(&modifying_ftrace_code); 244 245 return ret; 246 } 247 248 int ftrace_update_ftrace_func(ftrace_func_t func) 249 { 250 unsigned long ip = (unsigned long)(&ftrace_call); 251 unsigned char *new; 252 int ret; 253 254 new = ftrace_call_replace(ip, (unsigned long)func); 255 ret = update_ftrace_func(ip, new); 256 257 /* Also update the regs callback function */ 258 if (!ret) { 259 ip = (unsigned long)(&ftrace_regs_call); 260 new = ftrace_call_replace(ip, (unsigned long)func); 261 ret = update_ftrace_func(ip, new); 262 } 263 264 return ret; 265 } 266 267 static int is_ftrace_caller(unsigned long ip) 268 { 269 if (ip == ftrace_update_func) 270 return 1; 271 272 return 0; 273 } 274 275 /* 276 * A breakpoint was added to the code address we are about to 277 * modify, and this is the handle that will just skip over it. 278 * We are either changing a nop into a trace call, or a trace 279 * call to a nop. While the change is taking place, we treat 280 * it just like it was a nop. 281 */ 282 int ftrace_int3_handler(struct pt_regs *regs) 283 { 284 unsigned long ip; 285 286 if (WARN_ON_ONCE(!regs)) 287 return 0; 288 289 ip = regs->ip - 1; 290 if (!ftrace_location(ip) && !is_ftrace_caller(ip)) 291 return 0; 292 293 regs->ip += MCOUNT_INSN_SIZE - 1; 294 295 return 1; 296 } 297 298 static int ftrace_write(unsigned long ip, const char *val, int size) 299 { 300 /* 301 * On x86_64, kernel text mappings are mapped read-only with 302 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead 303 * of the kernel text mapping to modify the kernel text. 304 * 305 * For 32bit kernels, these mappings are same and we can use 306 * kernel identity mapping to modify code. 307 */ 308 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 309 ip = (unsigned long)__va(__pa_symbol(ip)); 310 311 if (probe_kernel_write((void *)ip, val, size)) 312 return -EPERM; 313 314 return 0; 315 } 316 317 static int add_break(unsigned long ip, const char *old) 318 { 319 unsigned char replaced[MCOUNT_INSN_SIZE]; 320 unsigned char brk = BREAKPOINT_INSTRUCTION; 321 322 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 323 return -EFAULT; 324 325 /* Make sure it is what we expect it to be */ 326 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) 327 return -EINVAL; 328 329 return ftrace_write(ip, &brk, 1); 330 } 331 332 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) 333 { 334 unsigned const char *old; 335 unsigned long ip = rec->ip; 336 337 old = ftrace_call_replace(ip, addr); 338 339 return add_break(rec->ip, old); 340 } 341 342 343 static int add_brk_on_nop(struct dyn_ftrace *rec) 344 { 345 unsigned const char *old; 346 347 old = ftrace_nop_replace(); 348 349 return add_break(rec->ip, old); 350 } 351 352 /* 353 * If the record has the FTRACE_FL_REGS set, that means that it 354 * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS 355 * is not not set, then it wants to convert to the normal callback. 356 */ 357 static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) 358 { 359 if (rec->flags & FTRACE_FL_REGS) 360 return (unsigned long)FTRACE_REGS_ADDR; 361 else 362 return (unsigned long)FTRACE_ADDR; 363 } 364 365 /* 366 * The FTRACE_FL_REGS_EN is set when the record already points to 367 * a function that saves all the regs. Basically the '_EN' version 368 * represents the current state of the function. 369 */ 370 static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) 371 { 372 if (rec->flags & FTRACE_FL_REGS_EN) 373 return (unsigned long)FTRACE_REGS_ADDR; 374 else 375 return (unsigned long)FTRACE_ADDR; 376 } 377 378 static int add_breakpoints(struct dyn_ftrace *rec, int enable) 379 { 380 unsigned long ftrace_addr; 381 int ret; 382 383 ret = ftrace_test_record(rec, enable); 384 385 ftrace_addr = get_ftrace_addr(rec); 386 387 switch (ret) { 388 case FTRACE_UPDATE_IGNORE: 389 return 0; 390 391 case FTRACE_UPDATE_MAKE_CALL: 392 /* converting nop to call */ 393 return add_brk_on_nop(rec); 394 395 case FTRACE_UPDATE_MODIFY_CALL_REGS: 396 case FTRACE_UPDATE_MODIFY_CALL: 397 ftrace_addr = get_ftrace_old_addr(rec); 398 /* fall through */ 399 case FTRACE_UPDATE_MAKE_NOP: 400 /* converting a call to a nop */ 401 return add_brk_on_call(rec, ftrace_addr); 402 } 403 return 0; 404 } 405 406 /* 407 * On error, we need to remove breakpoints. This needs to 408 * be done caefully. If the address does not currently have a 409 * breakpoint, we know we are done. Otherwise, we look at the 410 * remaining 4 bytes of the instruction. If it matches a nop 411 * we replace the breakpoint with the nop. Otherwise we replace 412 * it with the call instruction. 413 */ 414 static int remove_breakpoint(struct dyn_ftrace *rec) 415 { 416 unsigned char ins[MCOUNT_INSN_SIZE]; 417 unsigned char brk = BREAKPOINT_INSTRUCTION; 418 const unsigned char *nop; 419 unsigned long ftrace_addr; 420 unsigned long ip = rec->ip; 421 422 /* If we fail the read, just give up */ 423 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) 424 return -EFAULT; 425 426 /* If this does not have a breakpoint, we are done */ 427 if (ins[0] != brk) 428 return -1; 429 430 nop = ftrace_nop_replace(); 431 432 /* 433 * If the last 4 bytes of the instruction do not match 434 * a nop, then we assume that this is a call to ftrace_addr. 435 */ 436 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { 437 /* 438 * For extra paranoidism, we check if the breakpoint is on 439 * a call that would actually jump to the ftrace_addr. 440 * If not, don't touch the breakpoint, we make just create 441 * a disaster. 442 */ 443 ftrace_addr = get_ftrace_addr(rec); 444 nop = ftrace_call_replace(ip, ftrace_addr); 445 446 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) 447 goto update; 448 449 /* Check both ftrace_addr and ftrace_old_addr */ 450 ftrace_addr = get_ftrace_old_addr(rec); 451 nop = ftrace_call_replace(ip, ftrace_addr); 452 453 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 454 return -EINVAL; 455 } 456 457 update: 458 return ftrace_write(ip, nop, 1); 459 } 460 461 static int add_update_code(unsigned long ip, unsigned const char *new) 462 { 463 /* skip breakpoint */ 464 ip++; 465 new++; 466 return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); 467 } 468 469 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) 470 { 471 unsigned long ip = rec->ip; 472 unsigned const char *new; 473 474 new = ftrace_call_replace(ip, addr); 475 return add_update_code(ip, new); 476 } 477 478 static int add_update_nop(struct dyn_ftrace *rec) 479 { 480 unsigned long ip = rec->ip; 481 unsigned const char *new; 482 483 new = ftrace_nop_replace(); 484 return add_update_code(ip, new); 485 } 486 487 static int add_update(struct dyn_ftrace *rec, int enable) 488 { 489 unsigned long ftrace_addr; 490 int ret; 491 492 ret = ftrace_test_record(rec, enable); 493 494 ftrace_addr = get_ftrace_addr(rec); 495 496 switch (ret) { 497 case FTRACE_UPDATE_IGNORE: 498 return 0; 499 500 case FTRACE_UPDATE_MODIFY_CALL_REGS: 501 case FTRACE_UPDATE_MODIFY_CALL: 502 case FTRACE_UPDATE_MAKE_CALL: 503 /* converting nop to call */ 504 return add_update_call(rec, ftrace_addr); 505 506 case FTRACE_UPDATE_MAKE_NOP: 507 /* converting a call to a nop */ 508 return add_update_nop(rec); 509 } 510 511 return 0; 512 } 513 514 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) 515 { 516 unsigned long ip = rec->ip; 517 unsigned const char *new; 518 519 new = ftrace_call_replace(ip, addr); 520 521 return ftrace_write(ip, new, 1); 522 } 523 524 static int finish_update_nop(struct dyn_ftrace *rec) 525 { 526 unsigned long ip = rec->ip; 527 unsigned const char *new; 528 529 new = ftrace_nop_replace(); 530 531 return ftrace_write(ip, new, 1); 532 } 533 534 static int finish_update(struct dyn_ftrace *rec, int enable) 535 { 536 unsigned long ftrace_addr; 537 int ret; 538 539 ret = ftrace_update_record(rec, enable); 540 541 ftrace_addr = get_ftrace_addr(rec); 542 543 switch (ret) { 544 case FTRACE_UPDATE_IGNORE: 545 return 0; 546 547 case FTRACE_UPDATE_MODIFY_CALL_REGS: 548 case FTRACE_UPDATE_MODIFY_CALL: 549 case FTRACE_UPDATE_MAKE_CALL: 550 /* converting nop to call */ 551 return finish_update_call(rec, ftrace_addr); 552 553 case FTRACE_UPDATE_MAKE_NOP: 554 /* converting a call to a nop */ 555 return finish_update_nop(rec); 556 } 557 558 return 0; 559 } 560 561 static void do_sync_core(void *data) 562 { 563 sync_core(); 564 } 565 566 static void run_sync(void) 567 { 568 int enable_irqs = irqs_disabled(); 569 570 /* We may be called with interrupts disbled (on bootup). */ 571 if (enable_irqs) 572 local_irq_enable(); 573 on_each_cpu(do_sync_core, NULL, 1); 574 if (enable_irqs) 575 local_irq_disable(); 576 } 577 578 void ftrace_replace_code(int enable) 579 { 580 struct ftrace_rec_iter *iter; 581 struct dyn_ftrace *rec; 582 const char *report = "adding breakpoints"; 583 int count = 0; 584 int ret; 585 586 for_ftrace_rec_iter(iter) { 587 rec = ftrace_rec_iter_record(iter); 588 589 ret = add_breakpoints(rec, enable); 590 if (ret) 591 goto remove_breakpoints; 592 count++; 593 } 594 595 run_sync(); 596 597 report = "updating code"; 598 599 for_ftrace_rec_iter(iter) { 600 rec = ftrace_rec_iter_record(iter); 601 602 ret = add_update(rec, enable); 603 if (ret) 604 goto remove_breakpoints; 605 } 606 607 run_sync(); 608 609 report = "removing breakpoints"; 610 611 for_ftrace_rec_iter(iter) { 612 rec = ftrace_rec_iter_record(iter); 613 614 ret = finish_update(rec, enable); 615 if (ret) 616 goto remove_breakpoints; 617 } 618 619 run_sync(); 620 621 return; 622 623 remove_breakpoints: 624 ftrace_bug(ret, rec ? rec->ip : 0); 625 printk(KERN_WARNING "Failed on %s (%d):\n", report, count); 626 for_ftrace_rec_iter(iter) { 627 rec = ftrace_rec_iter_record(iter); 628 remove_breakpoint(rec); 629 } 630 run_sync(); 631 } 632 633 static int 634 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 635 unsigned const char *new_code) 636 { 637 int ret; 638 639 ret = add_break(ip, old_code); 640 if (ret) 641 goto out; 642 643 run_sync(); 644 645 ret = add_update_code(ip, new_code); 646 if (ret) 647 goto fail_update; 648 649 run_sync(); 650 651 ret = ftrace_write(ip, new_code, 1); 652 out: 653 run_sync(); 654 return ret; 655 656 fail_update: 657 ftrace_write(ip, old_code, 1); 658 goto out; 659 } 660 661 void arch_ftrace_update_code(int command) 662 { 663 /* See comment above by declaration of modifying_ftrace_code */ 664 atomic_inc(&modifying_ftrace_code); 665 666 ftrace_modify_all_code(command); 667 668 atomic_dec(&modifying_ftrace_code); 669 } 670 671 int __init ftrace_dyn_arch_init(void *data) 672 { 673 return 0; 674 } 675 #endif 676 677 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 678 679 #ifdef CONFIG_DYNAMIC_FTRACE 680 extern void ftrace_graph_call(void); 681 682 static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) 683 { 684 static union ftrace_code_union calc; 685 686 /* Jmp not a call (ignore the .e8) */ 687 calc.e8 = 0xe9; 688 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 689 690 /* 691 * ftrace external locks synchronize the access to the static variable. 692 */ 693 return calc.code; 694 } 695 696 static int ftrace_mod_jmp(unsigned long ip, void *func) 697 { 698 unsigned char *new; 699 700 new = ftrace_jmp_replace(ip, (unsigned long)func); 701 702 return update_ftrace_func(ip, new); 703 } 704 705 int ftrace_enable_ftrace_graph_caller(void) 706 { 707 unsigned long ip = (unsigned long)(&ftrace_graph_call); 708 709 return ftrace_mod_jmp(ip, &ftrace_graph_caller); 710 } 711 712 int ftrace_disable_ftrace_graph_caller(void) 713 { 714 unsigned long ip = (unsigned long)(&ftrace_graph_call); 715 716 return ftrace_mod_jmp(ip, &ftrace_stub); 717 } 718 719 #endif /* !CONFIG_DYNAMIC_FTRACE */ 720 721 /* 722 * Hook the return address and push it in the stack of return addrs 723 * in current thread info. 724 */ 725 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 726 unsigned long frame_pointer) 727 { 728 unsigned long old; 729 int faulted; 730 struct ftrace_graph_ent trace; 731 unsigned long return_hooker = (unsigned long) 732 &return_to_handler; 733 734 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 735 return; 736 737 /* 738 * Protect against fault, even if it shouldn't 739 * happen. This tool is too much intrusive to 740 * ignore such a protection. 741 */ 742 asm volatile( 743 "1: " _ASM_MOV " (%[parent]), %[old]\n" 744 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 745 " movl $0, %[faulted]\n" 746 "3:\n" 747 748 ".section .fixup, \"ax\"\n" 749 "4: movl $1, %[faulted]\n" 750 " jmp 3b\n" 751 ".previous\n" 752 753 _ASM_EXTABLE(1b, 4b) 754 _ASM_EXTABLE(2b, 4b) 755 756 : [old] "=&r" (old), [faulted] "=r" (faulted) 757 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 758 : "memory" 759 ); 760 761 if (unlikely(faulted)) { 762 ftrace_graph_stop(); 763 WARN_ON(1); 764 return; 765 } 766 767 trace.func = self_addr; 768 trace.depth = current->curr_ret_stack + 1; 769 770 /* Only trace if the calling function expects to */ 771 if (!ftrace_graph_entry(&trace)) { 772 *parent = old; 773 return; 774 } 775 776 if (ftrace_push_return_trace(old, self_addr, &trace.depth, 777 frame_pointer) == -EBUSY) { 778 *parent = old; 779 return; 780 } 781 } 782 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 783