13d083395SSteven Rostedt /* 23d083395SSteven Rostedt * Code for replacing ftrace calls with jumps. 33d083395SSteven Rostedt * 43d083395SSteven Rostedt * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 53d083395SSteven Rostedt * 63d083395SSteven Rostedt * Thanks goes to Ingo Molnar, for suggesting the idea. 73d083395SSteven Rostedt * Mathieu Desnoyers, for suggesting postponing the modifications. 83d083395SSteven Rostedt * Arjan van de Ven, for keeping me straight, and explaining to me 93d083395SSteven Rostedt * the dangers of modifying code on the run. 103d083395SSteven Rostedt */ 113d083395SSteven Rostedt 123d083395SSteven Rostedt #include <linux/spinlock.h> 133d083395SSteven Rostedt #include <linux/hardirq.h> 146f93fc07SSteven Rostedt #include <linux/uaccess.h> 153d083395SSteven Rostedt #include <linux/ftrace.h> 163d083395SSteven Rostedt #include <linux/percpu.h> 173d083395SSteven Rostedt #include <linux/init.h> 183d083395SSteven Rostedt #include <linux/list.h> 193d083395SSteven Rostedt 20395a59d0SAbhishek Sagar #include <asm/ftrace.h> 21732f3ca7SSteven Rostedt #include <asm/nops.h> 22dfa60abaSSteven Rostedt 233d083395SSteven Rostedt 248115f3f0SSteven Rostedt static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 253d083395SSteven Rostedt 263d083395SSteven Rostedt union ftrace_code_union { 27395a59d0SAbhishek Sagar char code[MCOUNT_INSN_SIZE]; 283d083395SSteven Rostedt struct { 293d083395SSteven Rostedt char e8; 303d083395SSteven Rostedt int offset; 313d083395SSteven Rostedt } __attribute__((packed)); 323d083395SSteven Rostedt }; 333d083395SSteven Rostedt 34395a59d0SAbhishek Sagar 3515adc048SSteven Rostedt static int ftrace_calc_offset(long ip, long addr) 363c1720f0SSteven Rostedt { 373c1720f0SSteven Rostedt return (int)(addr - ip); 383d083395SSteven Rostedt } 393d083395SSteven Rostedt 4015adc048SSteven Rostedt unsigned char *ftrace_nop_replace(void) 413c1720f0SSteven Rostedt { 428115f3f0SSteven Rostedt return ftrace_nop; 433c1720f0SSteven Rostedt } 443c1720f0SSteven Rostedt 4515adc048SSteven Rostedt unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 463c1720f0SSteven Rostedt { 473c1720f0SSteven Rostedt static union ftrace_code_union calc; 483c1720f0SSteven Rostedt 493c1720f0SSteven Rostedt calc.e8 = 0xe8; 50395a59d0SAbhishek Sagar calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 513c1720f0SSteven Rostedt 523c1720f0SSteven Rostedt /* 533c1720f0SSteven Rostedt * No locking needed, this must be called via kstop_machine 543c1720f0SSteven Rostedt * which in essence is like running on a uniprocessor machine. 553c1720f0SSteven Rostedt */ 563c1720f0SSteven Rostedt return calc.code; 573c1720f0SSteven Rostedt } 583c1720f0SSteven Rostedt 59*17666f02SSteven Rostedt /* 60*17666f02SSteven Rostedt * Modifying code must take extra care. On an SMP machine, if 61*17666f02SSteven Rostedt * the code being modified is also being executed on another CPU 62*17666f02SSteven Rostedt * that CPU will have undefined results and possibly take a GPF. 63*17666f02SSteven Rostedt * We use kstop_machine to stop other CPUS from exectuing code. 64*17666f02SSteven Rostedt * But this does not stop NMIs from happening. We still need 65*17666f02SSteven Rostedt * to protect against that. We separate out the modification of 66*17666f02SSteven Rostedt * the code to take care of this. 67*17666f02SSteven Rostedt * 68*17666f02SSteven Rostedt * Two buffers are added: An IP buffer and a "code" buffer. 69*17666f02SSteven Rostedt * 70*17666f02SSteven Rostedt * 1) Put in the instruction pointer into the IP buffer 71*17666f02SSteven Rostedt * and the new code into the "code" buffer. 72*17666f02SSteven Rostedt * 2) Set a flag that says we are modifying code 73*17666f02SSteven Rostedt * 3) Wait for any running NMIs to finish. 74*17666f02SSteven Rostedt * 4) Write the code 75*17666f02SSteven Rostedt * 5) clear the flag. 76*17666f02SSteven Rostedt * 6) Wait for any running NMIs to finish. 77*17666f02SSteven Rostedt * 78*17666f02SSteven Rostedt * If an NMI is executed, the first thing it does is to call 79*17666f02SSteven Rostedt * "ftrace_nmi_enter". This will check if the flag is set to write 80*17666f02SSteven Rostedt * and if it is, it will write what is in the IP and "code" buffers. 81*17666f02SSteven Rostedt * 82*17666f02SSteven Rostedt * The trick is, it does not matter if everyone is writing the same 83*17666f02SSteven Rostedt * content to the code location. Also, if a CPU is executing code 84*17666f02SSteven Rostedt * it is OK to write to that code location if the contents being written 85*17666f02SSteven Rostedt * are the same as what exists. 86*17666f02SSteven Rostedt */ 87*17666f02SSteven Rostedt 88*17666f02SSteven Rostedt static atomic_t in_nmi; 89*17666f02SSteven Rostedt static int mod_code_status; 90*17666f02SSteven Rostedt static int mod_code_write; 91*17666f02SSteven Rostedt static void *mod_code_ip; 92*17666f02SSteven Rostedt static void *mod_code_newcode; 93*17666f02SSteven Rostedt 94*17666f02SSteven Rostedt static void ftrace_mod_code(void) 95*17666f02SSteven Rostedt { 96*17666f02SSteven Rostedt /* 97*17666f02SSteven Rostedt * Yes, more than one CPU process can be writing to mod_code_status. 98*17666f02SSteven Rostedt * (and the code itself) 99*17666f02SSteven Rostedt * But if one were to fail, then they all should, and if one were 100*17666f02SSteven Rostedt * to succeed, then they all should. 101*17666f02SSteven Rostedt */ 102*17666f02SSteven Rostedt mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 103*17666f02SSteven Rostedt MCOUNT_INSN_SIZE); 104*17666f02SSteven Rostedt 105*17666f02SSteven Rostedt } 106*17666f02SSteven Rostedt 107*17666f02SSteven Rostedt void ftrace_nmi_enter(void) 108*17666f02SSteven Rostedt { 109*17666f02SSteven Rostedt atomic_inc(&in_nmi); 110*17666f02SSteven Rostedt /* Must have in_nmi seen before reading write flag */ 111*17666f02SSteven Rostedt smp_mb(); 112*17666f02SSteven Rostedt if (mod_code_write) 113*17666f02SSteven Rostedt ftrace_mod_code(); 114*17666f02SSteven Rostedt } 115*17666f02SSteven Rostedt 116*17666f02SSteven Rostedt void ftrace_nmi_exit(void) 117*17666f02SSteven Rostedt { 118*17666f02SSteven Rostedt /* Finish all executions before clearing in_nmi */ 119*17666f02SSteven Rostedt smp_wmb(); 120*17666f02SSteven Rostedt atomic_dec(&in_nmi); 121*17666f02SSteven Rostedt } 122*17666f02SSteven Rostedt 123*17666f02SSteven Rostedt static void wait_for_nmi(void) 124*17666f02SSteven Rostedt { 125*17666f02SSteven Rostedt while (atomic_read(&in_nmi)) 126*17666f02SSteven Rostedt cpu_relax(); 127*17666f02SSteven Rostedt } 128*17666f02SSteven Rostedt 129*17666f02SSteven Rostedt static int 130*17666f02SSteven Rostedt do_ftrace_mod_code(unsigned long ip, void *new_code) 131*17666f02SSteven Rostedt { 132*17666f02SSteven Rostedt mod_code_ip = (void *)ip; 133*17666f02SSteven Rostedt mod_code_newcode = new_code; 134*17666f02SSteven Rostedt 135*17666f02SSteven Rostedt /* The buffers need to be visible before we let NMIs write them */ 136*17666f02SSteven Rostedt smp_wmb(); 137*17666f02SSteven Rostedt 138*17666f02SSteven Rostedt mod_code_write = 1; 139*17666f02SSteven Rostedt 140*17666f02SSteven Rostedt /* Make sure write bit is visible before we wait on NMIs */ 141*17666f02SSteven Rostedt smp_mb(); 142*17666f02SSteven Rostedt 143*17666f02SSteven Rostedt wait_for_nmi(); 144*17666f02SSteven Rostedt 145*17666f02SSteven Rostedt /* Make sure all running NMIs have finished before we write the code */ 146*17666f02SSteven Rostedt smp_mb(); 147*17666f02SSteven Rostedt 148*17666f02SSteven Rostedt ftrace_mod_code(); 149*17666f02SSteven Rostedt 150*17666f02SSteven Rostedt /* Make sure the write happens before clearing the bit */ 151*17666f02SSteven Rostedt smp_wmb(); 152*17666f02SSteven Rostedt 153*17666f02SSteven Rostedt mod_code_write = 0; 154*17666f02SSteven Rostedt 155*17666f02SSteven Rostedt /* make sure NMIs see the cleared bit */ 156*17666f02SSteven Rostedt smp_mb(); 157*17666f02SSteven Rostedt 158*17666f02SSteven Rostedt wait_for_nmi(); 159*17666f02SSteven Rostedt 160*17666f02SSteven Rostedt return mod_code_status; 161*17666f02SSteven Rostedt } 162*17666f02SSteven Rostedt 163*17666f02SSteven Rostedt 16415adc048SSteven Rostedt int 1653d083395SSteven Rostedt ftrace_modify_code(unsigned long ip, unsigned char *old_code, 1663d083395SSteven Rostedt unsigned char *new_code) 1673d083395SSteven Rostedt { 1686f93fc07SSteven Rostedt unsigned char replaced[MCOUNT_INSN_SIZE]; 1693d083395SSteven Rostedt 1703d083395SSteven Rostedt /* 1713d083395SSteven Rostedt * Note: Due to modules and __init, code can 1723d083395SSteven Rostedt * disappear and change, we need to protect against faulting 17376aefee5SSteven Rostedt * as well as code changing. We do this by using the 174ab9a0918SSteven Rostedt * probe_kernel_* functions. 1753d083395SSteven Rostedt * 1763d083395SSteven Rostedt * No real locking needed, this code is run through 1776f93fc07SSteven Rostedt * kstop_machine, or before SMP starts. 1783d083395SSteven Rostedt */ 17976aefee5SSteven Rostedt 18076aefee5SSteven Rostedt /* read the text we want to modify */ 181ab9a0918SSteven Rostedt if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 182593eb8a2SSteven Rostedt return -EFAULT; 1836f93fc07SSteven Rostedt 18476aefee5SSteven Rostedt /* Make sure it is what we expect it to be */ 1856f93fc07SSteven Rostedt if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 186593eb8a2SSteven Rostedt return -EINVAL; 1876f93fc07SSteven Rostedt 18876aefee5SSteven Rostedt /* replace the text with the new text */ 189*17666f02SSteven Rostedt if (do_ftrace_mod_code(ip, new_code)) 190593eb8a2SSteven Rostedt return -EPERM; 1916f93fc07SSteven Rostedt 1923d083395SSteven Rostedt sync_core(); 1933d083395SSteven Rostedt 1946f93fc07SSteven Rostedt return 0; 1953d083395SSteven Rostedt } 1963d083395SSteven Rostedt 19715adc048SSteven Rostedt int ftrace_update_ftrace_func(ftrace_func_t func) 198d61f82d0SSteven Rostedt { 199d61f82d0SSteven Rostedt unsigned long ip = (unsigned long)(&ftrace_call); 200395a59d0SAbhishek Sagar unsigned char old[MCOUNT_INSN_SIZE], *new; 201d61f82d0SSteven Rostedt int ret; 202d61f82d0SSteven Rostedt 203395a59d0SAbhishek Sagar memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 204d61f82d0SSteven Rostedt new = ftrace_call_replace(ip, (unsigned long)func); 205d61f82d0SSteven Rostedt ret = ftrace_modify_code(ip, old, new); 206d61f82d0SSteven Rostedt 207d61f82d0SSteven Rostedt return ret; 208d61f82d0SSteven Rostedt } 209d61f82d0SSteven Rostedt 210d61f82d0SSteven Rostedt int __init ftrace_dyn_arch_init(void *data) 2113d083395SSteven Rostedt { 212732f3ca7SSteven Rostedt extern const unsigned char ftrace_test_p6nop[]; 213732f3ca7SSteven Rostedt extern const unsigned char ftrace_test_nop5[]; 214732f3ca7SSteven Rostedt extern const unsigned char ftrace_test_jmp[]; 215732f3ca7SSteven Rostedt int faulted = 0; 2163d083395SSteven Rostedt 217732f3ca7SSteven Rostedt /* 218732f3ca7SSteven Rostedt * There is no good nop for all x86 archs. 219732f3ca7SSteven Rostedt * We will default to using the P6_NOP5, but first we 220732f3ca7SSteven Rostedt * will test to make sure that the nop will actually 221732f3ca7SSteven Rostedt * work on this CPU. If it faults, we will then 222732f3ca7SSteven Rostedt * go to a lesser efficient 5 byte nop. If that fails 223732f3ca7SSteven Rostedt * we then just use a jmp as our nop. This isn't the most 224732f3ca7SSteven Rostedt * efficient nop, but we can not use a multi part nop 225732f3ca7SSteven Rostedt * since we would then risk being preempted in the middle 226732f3ca7SSteven Rostedt * of that nop, and if we enabled tracing then, it might 227732f3ca7SSteven Rostedt * cause a system crash. 228732f3ca7SSteven Rostedt * 229732f3ca7SSteven Rostedt * TODO: check the cpuid to determine the best nop. 230732f3ca7SSteven Rostedt */ 231732f3ca7SSteven Rostedt asm volatile ( 232732f3ca7SSteven Rostedt "ftrace_test_jmp:" 233732f3ca7SSteven Rostedt "jmp ftrace_test_p6nop\n" 2348b27386aSAnders Kaseorg "nop\n" 2358b27386aSAnders Kaseorg "nop\n" 2368b27386aSAnders Kaseorg "nop\n" /* 2 byte jmp + 3 bytes */ 237732f3ca7SSteven Rostedt "ftrace_test_p6nop:" 238732f3ca7SSteven Rostedt P6_NOP5 239732f3ca7SSteven Rostedt "jmp 1f\n" 240732f3ca7SSteven Rostedt "ftrace_test_nop5:" 241732f3ca7SSteven Rostedt ".byte 0x66,0x66,0x66,0x66,0x90\n" 242732f3ca7SSteven Rostedt "1:" 243732f3ca7SSteven Rostedt ".section .fixup, \"ax\"\n" 244732f3ca7SSteven Rostedt "2: movl $1, %0\n" 245732f3ca7SSteven Rostedt " jmp ftrace_test_nop5\n" 246732f3ca7SSteven Rostedt "3: movl $2, %0\n" 247732f3ca7SSteven Rostedt " jmp 1b\n" 248732f3ca7SSteven Rostedt ".previous\n" 249732f3ca7SSteven Rostedt _ASM_EXTABLE(ftrace_test_p6nop, 2b) 250732f3ca7SSteven Rostedt _ASM_EXTABLE(ftrace_test_nop5, 3b) 251732f3ca7SSteven Rostedt : "=r"(faulted) : "0" (faulted)); 252d61f82d0SSteven Rostedt 253732f3ca7SSteven Rostedt switch (faulted) { 254732f3ca7SSteven Rostedt case 0: 255732f3ca7SSteven Rostedt pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); 2568115f3f0SSteven Rostedt memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); 257732f3ca7SSteven Rostedt break; 258732f3ca7SSteven Rostedt case 1: 259732f3ca7SSteven Rostedt pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); 2608115f3f0SSteven Rostedt memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); 261732f3ca7SSteven Rostedt break; 262732f3ca7SSteven Rostedt case 2: 2638b27386aSAnders Kaseorg pr_info("ftrace: converting mcount calls to jmp . + 5\n"); 2648115f3f0SSteven Rostedt memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); 265732f3ca7SSteven Rostedt break; 266732f3ca7SSteven Rostedt } 267d61f82d0SSteven Rostedt 268732f3ca7SSteven Rostedt /* The return code is retured via data */ 269732f3ca7SSteven Rostedt *(unsigned long *)data = 0; 270dfa60abaSSteven Rostedt 2713d083395SSteven Rostedt return 0; 2723d083395SSteven Rostedt } 273