1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Dynamic function tracing support. 4 * 5 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 6 * 7 * Thanks goes to Ingo Molnar, for suggesting the idea. 8 * Mathieu Desnoyers, for suggesting postponing the modifications. 9 * Arjan van de Ven, for keeping me straight, and explaining to me 10 * the dangers of modifying code on the run. 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/spinlock.h> 16 #include <linux/hardirq.h> 17 #include <linux/uaccess.h> 18 #include <linux/ftrace.h> 19 #include <linux/percpu.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/init.h> 23 #include <linux/list.h> 24 #include <linux/module.h> 25 #include <linux/memory.h> 26 27 #include <trace/syscall.h> 28 29 #include <asm/set_memory.h> 30 #include <asm/kprobes.h> 31 #include <asm/ftrace.h> 32 #include <asm/nops.h> 33 #include <asm/text-patching.h> 34 35 #ifdef CONFIG_DYNAMIC_FTRACE 36 37 int ftrace_arch_code_modify_prepare(void) 38 __acquires(&text_mutex) 39 { 40 /* 41 * Need to grab text_mutex to prevent a race from module loading 42 * and live kernel patching from changing the text permissions while 43 * ftrace has it set to "read/write". 44 */ 45 mutex_lock(&text_mutex); 46 set_kernel_text_rw(); 47 set_all_modules_text_rw(); 48 return 0; 49 } 50 51 int ftrace_arch_code_modify_post_process(void) 52 __releases(&text_mutex) 53 { 54 set_all_modules_text_ro(); 55 set_kernel_text_ro(); 56 mutex_unlock(&text_mutex); 57 return 0; 58 } 59 60 union ftrace_code_union { 61 char code[MCOUNT_INSN_SIZE]; 62 struct { 63 unsigned char op; 64 int offset; 65 } __attribute__((packed)); 66 }; 67 68 static int ftrace_calc_offset(long ip, long addr) 69 { 70 return (int)(addr - ip); 71 } 72 73 static unsigned char * 74 ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr) 75 { 76 static union ftrace_code_union calc; 77 78 calc.op = op; 79 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 80 81 return calc.code; 82 } 83 84 static unsigned char * 85 ftrace_call_replace(unsigned long ip, unsigned long addr) 86 { 87 return ftrace_text_replace(0xe8, ip, addr); 88 } 89 90 static inline int 91 within(unsigned long addr, unsigned long start, unsigned long end) 92 { 93 return addr >= start && addr < end; 94 } 95 96 static unsigned long text_ip_addr(unsigned long ip) 97 { 98 /* 99 * On x86_64, kernel text mappings are mapped read-only, so we use 100 * the kernel identity mapping instead of the kernel text mapping 101 * to modify the kernel text. 102 * 103 * For 32bit kernels, these mappings are same and we can use 104 * kernel identity mapping to modify code. 105 */ 106 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 107 ip = (unsigned long)__va(__pa_symbol(ip)); 108 109 return ip; 110 } 111 112 static const unsigned char *ftrace_nop_replace(void) 113 { 114 return ideal_nops[NOP_ATOMIC5]; 115 } 116 117 static int 118 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 119 unsigned const char *new_code) 120 { 121 unsigned char replaced[MCOUNT_INSN_SIZE]; 122 123 ftrace_expected = old_code; 124 125 /* 126 * Note: 127 * We are paranoid about modifying text, as if a bug was to happen, it 128 * could cause us to read or write to someplace that could cause harm. 129 * Carefully read and modify the code with probe_kernel_*(), and make 130 * sure what we read is what we expected it to be before modifying it. 131 */ 132 133 /* read the text we want to modify */ 134 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 135 return -EFAULT; 136 137 /* Make sure it is what we expect it to be */ 138 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 139 return -EINVAL; 140 141 ip = text_ip_addr(ip); 142 143 /* replace the text with the new text */ 144 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 145 return -EPERM; 146 147 sync_core(); 148 149 return 0; 150 } 151 152 int ftrace_make_nop(struct module *mod, 153 struct dyn_ftrace *rec, unsigned long addr) 154 { 155 unsigned const char *new, *old; 156 unsigned long ip = rec->ip; 157 158 old = ftrace_call_replace(ip, addr); 159 new = ftrace_nop_replace(); 160 161 /* 162 * On boot up, and when modules are loaded, the MCOUNT_ADDR 163 * is converted to a nop, and will never become MCOUNT_ADDR 164 * again. This code is either running before SMP (on boot up) 165 * or before the code will ever be executed (module load). 166 * We do not want to use the breakpoint version in this case, 167 * just modify the code directly. 168 */ 169 if (addr == MCOUNT_ADDR) 170 return ftrace_modify_code_direct(rec->ip, old, new); 171 172 ftrace_expected = NULL; 173 174 /* Normal cases use add_brk_on_nop */ 175 WARN_ONCE(1, "invalid use of ftrace_make_nop"); 176 return -EINVAL; 177 } 178 179 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 180 { 181 unsigned const char *new, *old; 182 unsigned long ip = rec->ip; 183 184 old = ftrace_nop_replace(); 185 new = ftrace_call_replace(ip, addr); 186 187 /* Should only be called when module is loaded */ 188 return ftrace_modify_code_direct(rec->ip, old, new); 189 } 190 191 /* 192 * The modifying_ftrace_code is used to tell the breakpoint 193 * handler to call ftrace_int3_handler(). If it fails to 194 * call this handler for a breakpoint added by ftrace, then 195 * the kernel may crash. 196 * 197 * As atomic_writes on x86 do not need a barrier, we do not 198 * need to add smp_mb()s for this to work. It is also considered 199 * that we can not read the modifying_ftrace_code before 200 * executing the breakpoint. That would be quite remarkable if 201 * it could do that. Here's the flow that is required: 202 * 203 * CPU-0 CPU-1 204 * 205 * atomic_inc(mfc); 206 * write int3s 207 * <trap-int3> // implicit (r)mb 208 * if (atomic_read(mfc)) 209 * call ftrace_int3_handler() 210 * 211 * Then when we are finished: 212 * 213 * atomic_dec(mfc); 214 * 215 * If we hit a breakpoint that was not set by ftrace, it does not 216 * matter if ftrace_int3_handler() is called or not. It will 217 * simply be ignored. But it is crucial that a ftrace nop/caller 218 * breakpoint is handled. No other user should ever place a 219 * breakpoint on an ftrace nop/caller location. It must only 220 * be done by this code. 221 */ 222 atomic_t modifying_ftrace_code __read_mostly; 223 224 static int 225 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 226 unsigned const char *new_code); 227 228 /* 229 * Should never be called: 230 * As it is only called by __ftrace_replace_code() which is called by 231 * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() 232 * which is called to turn mcount into nops or nops into function calls 233 * but not to convert a function from not using regs to one that uses 234 * regs, which ftrace_modify_call() is for. 235 */ 236 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, 237 unsigned long addr) 238 { 239 WARN_ON(1); 240 ftrace_expected = NULL; 241 return -EINVAL; 242 } 243 244 static unsigned long ftrace_update_func; 245 static unsigned long ftrace_update_func_call; 246 247 static int update_ftrace_func(unsigned long ip, void *new) 248 { 249 unsigned char old[MCOUNT_INSN_SIZE]; 250 int ret; 251 252 memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); 253 254 ftrace_update_func = ip; 255 /* Make sure the breakpoints see the ftrace_update_func update */ 256 smp_wmb(); 257 258 /* See comment above by declaration of modifying_ftrace_code */ 259 atomic_inc(&modifying_ftrace_code); 260 261 ret = ftrace_modify_code(ip, old, new); 262 263 atomic_dec(&modifying_ftrace_code); 264 265 return ret; 266 } 267 268 int ftrace_update_ftrace_func(ftrace_func_t func) 269 { 270 unsigned long ip = (unsigned long)(&ftrace_call); 271 unsigned char *new; 272 int ret; 273 274 ftrace_update_func_call = (unsigned long)func; 275 276 new = ftrace_call_replace(ip, (unsigned long)func); 277 ret = update_ftrace_func(ip, new); 278 279 /* Also update the regs callback function */ 280 if (!ret) { 281 ip = (unsigned long)(&ftrace_regs_call); 282 new = ftrace_call_replace(ip, (unsigned long)func); 283 ret = update_ftrace_func(ip, new); 284 } 285 286 return ret; 287 } 288 289 static nokprobe_inline int is_ftrace_caller(unsigned long ip) 290 { 291 if (ip == ftrace_update_func) 292 return 1; 293 294 return 0; 295 } 296 297 /* 298 * A breakpoint was added to the code address we are about to 299 * modify, and this is the handle that will just skip over it. 300 * We are either changing a nop into a trace call, or a trace 301 * call to a nop. While the change is taking place, we treat 302 * it just like it was a nop. 303 */ 304 int ftrace_int3_handler(struct pt_regs *regs) 305 { 306 unsigned long ip; 307 308 if (WARN_ON_ONCE(!regs)) 309 return 0; 310 311 ip = regs->ip - INT3_INSN_SIZE; 312 313 if (ftrace_location(ip)) { 314 int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); 315 return 1; 316 } else if (is_ftrace_caller(ip)) { 317 if (!ftrace_update_func_call) { 318 int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); 319 return 1; 320 } 321 int3_emulate_call(regs, ftrace_update_func_call); 322 return 1; 323 } 324 325 return 0; 326 } 327 NOKPROBE_SYMBOL(ftrace_int3_handler); 328 329 static int ftrace_write(unsigned long ip, const char *val, int size) 330 { 331 ip = text_ip_addr(ip); 332 333 if (probe_kernel_write((void *)ip, val, size)) 334 return -EPERM; 335 336 return 0; 337 } 338 339 static int add_break(unsigned long ip, const char *old) 340 { 341 unsigned char replaced[MCOUNT_INSN_SIZE]; 342 unsigned char brk = BREAKPOINT_INSTRUCTION; 343 344 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 345 return -EFAULT; 346 347 ftrace_expected = old; 348 349 /* Make sure it is what we expect it to be */ 350 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) 351 return -EINVAL; 352 353 return ftrace_write(ip, &brk, 1); 354 } 355 356 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) 357 { 358 unsigned const char *old; 359 unsigned long ip = rec->ip; 360 361 old = ftrace_call_replace(ip, addr); 362 363 return add_break(rec->ip, old); 364 } 365 366 367 static int add_brk_on_nop(struct dyn_ftrace *rec) 368 { 369 unsigned const char *old; 370 371 old = ftrace_nop_replace(); 372 373 return add_break(rec->ip, old); 374 } 375 376 static int add_breakpoints(struct dyn_ftrace *rec, bool enable) 377 { 378 unsigned long ftrace_addr; 379 int ret; 380 381 ftrace_addr = ftrace_get_addr_curr(rec); 382 383 ret = ftrace_test_record(rec, enable); 384 385 switch (ret) { 386 case FTRACE_UPDATE_IGNORE: 387 return 0; 388 389 case FTRACE_UPDATE_MAKE_CALL: 390 /* converting nop to call */ 391 return add_brk_on_nop(rec); 392 393 case FTRACE_UPDATE_MODIFY_CALL: 394 case FTRACE_UPDATE_MAKE_NOP: 395 /* converting a call to a nop */ 396 return add_brk_on_call(rec, ftrace_addr); 397 } 398 return 0; 399 } 400 401 /* 402 * On error, we need to remove breakpoints. This needs to 403 * be done caefully. If the address does not currently have a 404 * breakpoint, we know we are done. Otherwise, we look at the 405 * remaining 4 bytes of the instruction. If it matches a nop 406 * we replace the breakpoint with the nop. Otherwise we replace 407 * it with the call instruction. 408 */ 409 static int remove_breakpoint(struct dyn_ftrace *rec) 410 { 411 unsigned char ins[MCOUNT_INSN_SIZE]; 412 unsigned char brk = BREAKPOINT_INSTRUCTION; 413 const unsigned char *nop; 414 unsigned long ftrace_addr; 415 unsigned long ip = rec->ip; 416 417 /* If we fail the read, just give up */ 418 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) 419 return -EFAULT; 420 421 /* If this does not have a breakpoint, we are done */ 422 if (ins[0] != brk) 423 return 0; 424 425 nop = ftrace_nop_replace(); 426 427 /* 428 * If the last 4 bytes of the instruction do not match 429 * a nop, then we assume that this is a call to ftrace_addr. 430 */ 431 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { 432 /* 433 * For extra paranoidism, we check if the breakpoint is on 434 * a call that would actually jump to the ftrace_addr. 435 * If not, don't touch the breakpoint, we make just create 436 * a disaster. 437 */ 438 ftrace_addr = ftrace_get_addr_new(rec); 439 nop = ftrace_call_replace(ip, ftrace_addr); 440 441 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) 442 goto update; 443 444 /* Check both ftrace_addr and ftrace_old_addr */ 445 ftrace_addr = ftrace_get_addr_curr(rec); 446 nop = ftrace_call_replace(ip, ftrace_addr); 447 448 ftrace_expected = nop; 449 450 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) 451 return -EINVAL; 452 } 453 454 update: 455 return ftrace_write(ip, nop, 1); 456 } 457 458 static int add_update_code(unsigned long ip, unsigned const char *new) 459 { 460 /* skip breakpoint */ 461 ip++; 462 new++; 463 return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); 464 } 465 466 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) 467 { 468 unsigned long ip = rec->ip; 469 unsigned const char *new; 470 471 new = ftrace_call_replace(ip, addr); 472 return add_update_code(ip, new); 473 } 474 475 static int add_update_nop(struct dyn_ftrace *rec) 476 { 477 unsigned long ip = rec->ip; 478 unsigned const char *new; 479 480 new = ftrace_nop_replace(); 481 return add_update_code(ip, new); 482 } 483 484 static int add_update(struct dyn_ftrace *rec, bool enable) 485 { 486 unsigned long ftrace_addr; 487 int ret; 488 489 ret = ftrace_test_record(rec, enable); 490 491 ftrace_addr = ftrace_get_addr_new(rec); 492 493 switch (ret) { 494 case FTRACE_UPDATE_IGNORE: 495 return 0; 496 497 case FTRACE_UPDATE_MODIFY_CALL: 498 case FTRACE_UPDATE_MAKE_CALL: 499 /* converting nop to call */ 500 return add_update_call(rec, ftrace_addr); 501 502 case FTRACE_UPDATE_MAKE_NOP: 503 /* converting a call to a nop */ 504 return add_update_nop(rec); 505 } 506 507 return 0; 508 } 509 510 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) 511 { 512 unsigned long ip = rec->ip; 513 unsigned const char *new; 514 515 new = ftrace_call_replace(ip, addr); 516 517 return ftrace_write(ip, new, 1); 518 } 519 520 static int finish_update_nop(struct dyn_ftrace *rec) 521 { 522 unsigned long ip = rec->ip; 523 unsigned const char *new; 524 525 new = ftrace_nop_replace(); 526 527 return ftrace_write(ip, new, 1); 528 } 529 530 static int finish_update(struct dyn_ftrace *rec, bool enable) 531 { 532 unsigned long ftrace_addr; 533 int ret; 534 535 ret = ftrace_update_record(rec, enable); 536 537 ftrace_addr = ftrace_get_addr_new(rec); 538 539 switch (ret) { 540 case FTRACE_UPDATE_IGNORE: 541 return 0; 542 543 case FTRACE_UPDATE_MODIFY_CALL: 544 case FTRACE_UPDATE_MAKE_CALL: 545 /* converting nop to call */ 546 return finish_update_call(rec, ftrace_addr); 547 548 case FTRACE_UPDATE_MAKE_NOP: 549 /* converting a call to a nop */ 550 return finish_update_nop(rec); 551 } 552 553 return 0; 554 } 555 556 static void do_sync_core(void *data) 557 { 558 sync_core(); 559 } 560 561 static void run_sync(void) 562 { 563 int enable_irqs; 564 565 /* No need to sync if there's only one CPU */ 566 if (num_online_cpus() == 1) 567 return; 568 569 enable_irqs = irqs_disabled(); 570 571 /* We may be called with interrupts disabled (on bootup). */ 572 if (enable_irqs) 573 local_irq_enable(); 574 on_each_cpu(do_sync_core, NULL, 1); 575 if (enable_irqs) 576 local_irq_disable(); 577 } 578 579 void ftrace_replace_code(int enable) 580 { 581 struct ftrace_rec_iter *iter; 582 struct dyn_ftrace *rec; 583 const char *report = "adding breakpoints"; 584 int count = 0; 585 int ret; 586 587 for_ftrace_rec_iter(iter) { 588 rec = ftrace_rec_iter_record(iter); 589 590 ret = add_breakpoints(rec, enable); 591 if (ret) 592 goto remove_breakpoints; 593 count++; 594 } 595 596 run_sync(); 597 598 report = "updating code"; 599 count = 0; 600 601 for_ftrace_rec_iter(iter) { 602 rec = ftrace_rec_iter_record(iter); 603 604 ret = add_update(rec, enable); 605 if (ret) 606 goto remove_breakpoints; 607 count++; 608 } 609 610 run_sync(); 611 612 report = "removing breakpoints"; 613 count = 0; 614 615 for_ftrace_rec_iter(iter) { 616 rec = ftrace_rec_iter_record(iter); 617 618 ret = finish_update(rec, enable); 619 if (ret) 620 goto remove_breakpoints; 621 count++; 622 } 623 624 run_sync(); 625 626 return; 627 628 remove_breakpoints: 629 pr_warn("Failed on %s (%d):\n", report, count); 630 ftrace_bug(ret, rec); 631 for_ftrace_rec_iter(iter) { 632 rec = ftrace_rec_iter_record(iter); 633 /* 634 * Breakpoints are handled only when this function is in 635 * progress. The system could not work with them. 636 */ 637 if (remove_breakpoint(rec)) 638 BUG(); 639 } 640 run_sync(); 641 } 642 643 static int 644 ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 645 unsigned const char *new_code) 646 { 647 int ret; 648 649 ret = add_break(ip, old_code); 650 if (ret) 651 goto out; 652 653 run_sync(); 654 655 ret = add_update_code(ip, new_code); 656 if (ret) 657 goto fail_update; 658 659 run_sync(); 660 661 ret = ftrace_write(ip, new_code, 1); 662 /* 663 * The breakpoint is handled only when this function is in progress. 664 * The system could not work if we could not remove it. 665 */ 666 BUG_ON(ret); 667 out: 668 run_sync(); 669 return ret; 670 671 fail_update: 672 /* Also here the system could not work with the breakpoint */ 673 if (ftrace_write(ip, old_code, 1)) 674 BUG(); 675 goto out; 676 } 677 678 void arch_ftrace_update_code(int command) 679 { 680 /* See comment above by declaration of modifying_ftrace_code */ 681 atomic_inc(&modifying_ftrace_code); 682 683 ftrace_modify_all_code(command); 684 685 atomic_dec(&modifying_ftrace_code); 686 } 687 688 int __init ftrace_dyn_arch_init(void) 689 { 690 return 0; 691 } 692 693 /* Currently only x86_64 supports dynamic trampolines */ 694 #ifdef CONFIG_X86_64 695 696 #ifdef CONFIG_MODULES 697 #include <linux/moduleloader.h> 698 /* Module allocation simplifies allocating memory for code */ 699 static inline void *alloc_tramp(unsigned long size) 700 { 701 return module_alloc(size); 702 } 703 static inline void tramp_free(void *tramp) 704 { 705 module_memfree(tramp); 706 } 707 #else 708 /* Trampolines can only be created if modules are supported */ 709 static inline void *alloc_tramp(unsigned long size) 710 { 711 return NULL; 712 } 713 static inline void tramp_free(void *tramp) { } 714 #endif 715 716 /* Defined as markers to the end of the ftrace default trampolines */ 717 extern void ftrace_regs_caller_end(void); 718 extern void ftrace_epilogue(void); 719 extern void ftrace_caller_op_ptr(void); 720 extern void ftrace_regs_caller_op_ptr(void); 721 722 /* movq function_trace_op(%rip), %rdx */ 723 /* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */ 724 #define OP_REF_SIZE 7 725 726 /* 727 * The ftrace_ops is passed to the function callback. Since the 728 * trampoline only services a single ftrace_ops, we can pass in 729 * that ops directly. 730 * 731 * The ftrace_op_code_union is used to create a pointer to the 732 * ftrace_ops that will be passed to the callback function. 733 */ 734 union ftrace_op_code_union { 735 char code[OP_REF_SIZE]; 736 struct { 737 char op[3]; 738 int offset; 739 } __attribute__((packed)); 740 }; 741 742 #define RET_SIZE 1 743 744 static unsigned long 745 create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) 746 { 747 unsigned long start_offset; 748 unsigned long end_offset; 749 unsigned long op_offset; 750 unsigned long offset; 751 unsigned long npages; 752 unsigned long size; 753 unsigned long retq; 754 unsigned long *ptr; 755 void *trampoline; 756 void *ip; 757 /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ 758 unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; 759 union ftrace_op_code_union op_ptr; 760 int ret; 761 762 if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { 763 start_offset = (unsigned long)ftrace_regs_caller; 764 end_offset = (unsigned long)ftrace_regs_caller_end; 765 op_offset = (unsigned long)ftrace_regs_caller_op_ptr; 766 } else { 767 start_offset = (unsigned long)ftrace_caller; 768 end_offset = (unsigned long)ftrace_epilogue; 769 op_offset = (unsigned long)ftrace_caller_op_ptr; 770 } 771 772 size = end_offset - start_offset; 773 774 /* 775 * Allocate enough size to store the ftrace_caller code, 776 * the iret , as well as the address of the ftrace_ops this 777 * trampoline is used for. 778 */ 779 trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *)); 780 if (!trampoline) 781 return 0; 782 783 *tramp_size = size + RET_SIZE + sizeof(void *); 784 npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); 785 786 /* Copy ftrace_caller onto the trampoline memory */ 787 ret = probe_kernel_read(trampoline, (void *)start_offset, size); 788 if (WARN_ON(ret < 0)) 789 goto fail; 790 791 ip = trampoline + size; 792 793 /* The trampoline ends with ret(q) */ 794 retq = (unsigned long)ftrace_stub; 795 ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); 796 if (WARN_ON(ret < 0)) 797 goto fail; 798 799 /* 800 * The address of the ftrace_ops that is used for this trampoline 801 * is stored at the end of the trampoline. This will be used to 802 * load the third parameter for the callback. Basically, that 803 * location at the end of the trampoline takes the place of 804 * the global function_trace_op variable. 805 */ 806 807 ptr = (unsigned long *)(trampoline + size + RET_SIZE); 808 *ptr = (unsigned long)ops; 809 810 op_offset -= start_offset; 811 memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); 812 813 /* Are we pointing to the reference? */ 814 if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) 815 goto fail; 816 817 /* Load the contents of ptr into the callback parameter */ 818 offset = (unsigned long)ptr; 819 offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE; 820 821 op_ptr.offset = offset; 822 823 /* put in the new offset to the ftrace_ops */ 824 memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE); 825 826 /* ALLOC_TRAMP flags lets us know we created it */ 827 ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; 828 829 set_vm_flush_reset_perms(trampoline); 830 831 /* 832 * Module allocation needs to be completed by making the page 833 * executable. The page is still writable, which is a security hazard, 834 * but anyhow ftrace breaks W^X completely. 835 */ 836 set_memory_x((unsigned long)trampoline, npages); 837 return (unsigned long)trampoline; 838 fail: 839 tramp_free(trampoline); 840 return 0; 841 } 842 843 static unsigned long calc_trampoline_call_offset(bool save_regs) 844 { 845 unsigned long start_offset; 846 unsigned long call_offset; 847 848 if (save_regs) { 849 start_offset = (unsigned long)ftrace_regs_caller; 850 call_offset = (unsigned long)ftrace_regs_call; 851 } else { 852 start_offset = (unsigned long)ftrace_caller; 853 call_offset = (unsigned long)ftrace_call; 854 } 855 856 return call_offset - start_offset; 857 } 858 859 void arch_ftrace_update_trampoline(struct ftrace_ops *ops) 860 { 861 ftrace_func_t func; 862 unsigned char *new; 863 unsigned long offset; 864 unsigned long ip; 865 unsigned int size; 866 int ret, npages; 867 868 if (ops->trampoline) { 869 /* 870 * The ftrace_ops caller may set up its own trampoline. 871 * In such a case, this code must not modify it. 872 */ 873 if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 874 return; 875 npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT; 876 set_memory_rw(ops->trampoline, npages); 877 } else { 878 ops->trampoline = create_trampoline(ops, &size); 879 if (!ops->trampoline) 880 return; 881 ops->trampoline_size = size; 882 npages = PAGE_ALIGN(size) >> PAGE_SHIFT; 883 } 884 885 offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); 886 ip = ops->trampoline + offset; 887 888 func = ftrace_ops_get_func(ops); 889 890 ftrace_update_func_call = (unsigned long)func; 891 892 /* Do a safe modify in case the trampoline is executing */ 893 new = ftrace_call_replace(ip, (unsigned long)func); 894 ret = update_ftrace_func(ip, new); 895 set_memory_ro(ops->trampoline, npages); 896 897 /* The update should never fail */ 898 WARN_ON(ret); 899 } 900 901 /* Return the address of the function the trampoline calls */ 902 static void *addr_from_call(void *ptr) 903 { 904 union ftrace_code_union calc; 905 int ret; 906 907 ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE); 908 if (WARN_ON_ONCE(ret < 0)) 909 return NULL; 910 911 /* Make sure this is a call */ 912 if (WARN_ON_ONCE(calc.op != 0xe8)) { 913 pr_warn("Expected e8, got %x\n", calc.op); 914 return NULL; 915 } 916 917 return ptr + MCOUNT_INSN_SIZE + calc.offset; 918 } 919 920 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, 921 unsigned long frame_pointer); 922 923 /* 924 * If the ops->trampoline was not allocated, then it probably 925 * has a static trampoline func, or is the ftrace caller itself. 926 */ 927 static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) 928 { 929 unsigned long offset; 930 bool save_regs = rec->flags & FTRACE_FL_REGS_EN; 931 void *ptr; 932 933 if (ops && ops->trampoline) { 934 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 935 /* 936 * We only know about function graph tracer setting as static 937 * trampoline. 938 */ 939 if (ops->trampoline == FTRACE_GRAPH_ADDR) 940 return (void *)prepare_ftrace_return; 941 #endif 942 return NULL; 943 } 944 945 offset = calc_trampoline_call_offset(save_regs); 946 947 if (save_regs) 948 ptr = (void *)FTRACE_REGS_ADDR + offset; 949 else 950 ptr = (void *)FTRACE_ADDR + offset; 951 952 return addr_from_call(ptr); 953 } 954 955 void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) 956 { 957 unsigned long offset; 958 959 /* If we didn't allocate this trampoline, consider it static */ 960 if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 961 return static_tramp_func(ops, rec); 962 963 offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS); 964 return addr_from_call((void *)ops->trampoline + offset); 965 } 966 967 void arch_ftrace_trampoline_free(struct ftrace_ops *ops) 968 { 969 if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) 970 return; 971 972 tramp_free((void *)ops->trampoline); 973 ops->trampoline = 0; 974 } 975 976 #endif /* CONFIG_X86_64 */ 977 #endif /* CONFIG_DYNAMIC_FTRACE */ 978 979 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 980 981 #ifdef CONFIG_DYNAMIC_FTRACE 982 extern void ftrace_graph_call(void); 983 984 static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) 985 { 986 return ftrace_text_replace(0xe9, ip, addr); 987 } 988 989 static int ftrace_mod_jmp(unsigned long ip, void *func) 990 { 991 unsigned char *new; 992 993 ftrace_update_func_call = 0UL; 994 new = ftrace_jmp_replace(ip, (unsigned long)func); 995 996 return update_ftrace_func(ip, new); 997 } 998 999 int ftrace_enable_ftrace_graph_caller(void) 1000 { 1001 unsigned long ip = (unsigned long)(&ftrace_graph_call); 1002 1003 return ftrace_mod_jmp(ip, &ftrace_graph_caller); 1004 } 1005 1006 int ftrace_disable_ftrace_graph_caller(void) 1007 { 1008 unsigned long ip = (unsigned long)(&ftrace_graph_call); 1009 1010 return ftrace_mod_jmp(ip, &ftrace_stub); 1011 } 1012 1013 #endif /* !CONFIG_DYNAMIC_FTRACE */ 1014 1015 /* 1016 * Hook the return address and push it in the stack of return addrs 1017 * in current thread info. 1018 */ 1019 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, 1020 unsigned long frame_pointer) 1021 { 1022 unsigned long old; 1023 int faulted; 1024 unsigned long return_hooker = (unsigned long) 1025 &return_to_handler; 1026 1027 /* 1028 * When resuming from suspend-to-ram, this function can be indirectly 1029 * called from early CPU startup code while the CPU is in real mode, 1030 * which would fail miserably. Make sure the stack pointer is a 1031 * virtual address. 1032 * 1033 * This check isn't as accurate as virt_addr_valid(), but it should be 1034 * good enough for this purpose, and it's fast. 1035 */ 1036 if (unlikely((long)__builtin_frame_address(0) >= 0)) 1037 return; 1038 1039 if (unlikely(ftrace_graph_is_dead())) 1040 return; 1041 1042 if (unlikely(atomic_read(¤t->tracing_graph_pause))) 1043 return; 1044 1045 /* 1046 * If the return location is actually pointing directly to 1047 * the start of a direct trampoline (if we trace the trampoline 1048 * it will still be offset by MCOUNT_INSN_SIZE), then the 1049 * return address is actually off by one word, and we 1050 * need to adjust for that. 1051 */ 1052 if (ftrace_direct_func_count) { 1053 if (ftrace_find_direct_func(self_addr + MCOUNT_INSN_SIZE)) { 1054 self_addr = *parent; 1055 parent++; 1056 } 1057 } 1058 1059 /* 1060 * Protect against fault, even if it shouldn't 1061 * happen. This tool is too much intrusive to 1062 * ignore such a protection. 1063 */ 1064 asm volatile( 1065 "1: " _ASM_MOV " (%[parent]), %[old]\n" 1066 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" 1067 " movl $0, %[faulted]\n" 1068 "3:\n" 1069 1070 ".section .fixup, \"ax\"\n" 1071 "4: movl $1, %[faulted]\n" 1072 " jmp 3b\n" 1073 ".previous\n" 1074 1075 _ASM_EXTABLE(1b, 4b) 1076 _ASM_EXTABLE(2b, 4b) 1077 1078 : [old] "=&r" (old), [faulted] "=r" (faulted) 1079 : [parent] "r" (parent), [return_hooker] "r" (return_hooker) 1080 : "memory" 1081 ); 1082 1083 if (unlikely(faulted)) { 1084 ftrace_graph_stop(); 1085 WARN_ON(1); 1086 return; 1087 } 1088 1089 if (function_graph_enter(old, self_addr, frame_pointer, parent)) 1090 *parent = old; 1091 } 1092 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 1093