13d083395SSteven Rostedt /* 23d083395SSteven Rostedt * Code for replacing ftrace calls with jumps. 33d083395SSteven Rostedt * 43d083395SSteven Rostedt * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 53d083395SSteven Rostedt * 63d083395SSteven Rostedt * Thanks goes to Ingo Molnar, for suggesting the idea. 73d083395SSteven Rostedt * Mathieu Desnoyers, for suggesting postponing the modifications. 83d083395SSteven Rostedt * Arjan van de Ven, for keeping me straight, and explaining to me 93d083395SSteven Rostedt * the dangers of modifying code on the run. 103d083395SSteven Rostedt */ 113d083395SSteven Rostedt 123d083395SSteven Rostedt #include <linux/spinlock.h> 133d083395SSteven Rostedt #include <linux/hardirq.h> 146f93fc07SSteven Rostedt #include <linux/uaccess.h> 153d083395SSteven Rostedt #include <linux/ftrace.h> 163d083395SSteven Rostedt #include <linux/percpu.h> 173d083395SSteven Rostedt #include <linux/init.h> 183d083395SSteven Rostedt #include <linux/list.h> 193d083395SSteven Rostedt 20395a59d0SAbhishek Sagar #include <asm/ftrace.h> 21732f3ca7SSteven Rostedt #include <asm/nops.h> 22dfa60abaSSteven Rostedt 233d083395SSteven Rostedt 248115f3f0SSteven Rostedt static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; 253d083395SSteven Rostedt 263d083395SSteven Rostedt union ftrace_code_union { 27395a59d0SAbhishek Sagar char code[MCOUNT_INSN_SIZE]; 283d083395SSteven Rostedt struct { 293d083395SSteven Rostedt char e8; 303d083395SSteven Rostedt int offset; 313d083395SSteven Rostedt } __attribute__((packed)); 323d083395SSteven Rostedt }; 333d083395SSteven Rostedt 34395a59d0SAbhishek Sagar 3515adc048SSteven Rostedt static int ftrace_calc_offset(long ip, long addr) 363c1720f0SSteven Rostedt { 373c1720f0SSteven Rostedt return (int)(addr - ip); 383d083395SSteven Rostedt } 393d083395SSteven Rostedt 4015adc048SSteven Rostedt unsigned char *ftrace_nop_replace(void) 413c1720f0SSteven Rostedt { 428115f3f0SSteven Rostedt return ftrace_nop; 433c1720f0SSteven Rostedt } 443c1720f0SSteven Rostedt 4515adc048SSteven Rostedt unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) 463c1720f0SSteven Rostedt { 473c1720f0SSteven Rostedt static union ftrace_code_union calc; 483c1720f0SSteven Rostedt 493c1720f0SSteven Rostedt calc.e8 = 0xe8; 50395a59d0SAbhishek Sagar calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); 513c1720f0SSteven Rostedt 523c1720f0SSteven Rostedt /* 533c1720f0SSteven Rostedt * No locking needed, this must be called via kstop_machine 543c1720f0SSteven Rostedt * which in essence is like running on a uniprocessor machine. 553c1720f0SSteven Rostedt */ 563c1720f0SSteven Rostedt return calc.code; 573c1720f0SSteven Rostedt } 583c1720f0SSteven Rostedt 5917666f02SSteven Rostedt /* 6017666f02SSteven Rostedt * Modifying code must take extra care. On an SMP machine, if 6117666f02SSteven Rostedt * the code being modified is also being executed on another CPU 6217666f02SSteven Rostedt * that CPU will have undefined results and possibly take a GPF. 6317666f02SSteven Rostedt * We use kstop_machine to stop other CPUS from exectuing code. 6417666f02SSteven Rostedt * But this does not stop NMIs from happening. We still need 6517666f02SSteven Rostedt * to protect against that. We separate out the modification of 6617666f02SSteven Rostedt * the code to take care of this. 6717666f02SSteven Rostedt * 6817666f02SSteven Rostedt * Two buffers are added: An IP buffer and a "code" buffer. 6917666f02SSteven Rostedt * 7017666f02SSteven Rostedt * 1) Put in the instruction pointer into the IP buffer 7117666f02SSteven Rostedt * and the new code into the "code" buffer. 7217666f02SSteven Rostedt * 2) Set a flag that says we are modifying code 7317666f02SSteven Rostedt * 3) Wait for any running NMIs to finish. 7417666f02SSteven Rostedt * 4) Write the code 7517666f02SSteven Rostedt * 5) clear the flag. 7617666f02SSteven Rostedt * 6) Wait for any running NMIs to finish. 7717666f02SSteven Rostedt * 7817666f02SSteven Rostedt * If an NMI is executed, the first thing it does is to call 7917666f02SSteven Rostedt * "ftrace_nmi_enter". This will check if the flag is set to write 8017666f02SSteven Rostedt * and if it is, it will write what is in the IP and "code" buffers. 8117666f02SSteven Rostedt * 8217666f02SSteven Rostedt * The trick is, it does not matter if everyone is writing the same 8317666f02SSteven Rostedt * content to the code location. Also, if a CPU is executing code 8417666f02SSteven Rostedt * it is OK to write to that code location if the contents being written 8517666f02SSteven Rostedt * are the same as what exists. 8617666f02SSteven Rostedt */ 8717666f02SSteven Rostedt 8817666f02SSteven Rostedt static atomic_t in_nmi; 8917666f02SSteven Rostedt static int mod_code_status; 9017666f02SSteven Rostedt static int mod_code_write; 9117666f02SSteven Rostedt static void *mod_code_ip; 9217666f02SSteven Rostedt static void *mod_code_newcode; 9317666f02SSteven Rostedt 94*b807c3d0SSteven Rostedt static int nmi_wait_count; 95*b807c3d0SSteven Rostedt static atomic_t nmi_update_count; 96*b807c3d0SSteven Rostedt 97*b807c3d0SSteven Rostedt int ftrace_arch_read_dyn_info(char *buf, int size) 98*b807c3d0SSteven Rostedt { 99*b807c3d0SSteven Rostedt int r; 100*b807c3d0SSteven Rostedt 101*b807c3d0SSteven Rostedt r = snprintf(buf, size, "%u %u", 102*b807c3d0SSteven Rostedt nmi_wait_count, 103*b807c3d0SSteven Rostedt atomic_read(&nmi_update_count)); 104*b807c3d0SSteven Rostedt return r; 105*b807c3d0SSteven Rostedt } 106*b807c3d0SSteven Rostedt 10717666f02SSteven Rostedt static void ftrace_mod_code(void) 10817666f02SSteven Rostedt { 10917666f02SSteven Rostedt /* 11017666f02SSteven Rostedt * Yes, more than one CPU process can be writing to mod_code_status. 11117666f02SSteven Rostedt * (and the code itself) 11217666f02SSteven Rostedt * But if one were to fail, then they all should, and if one were 11317666f02SSteven Rostedt * to succeed, then they all should. 11417666f02SSteven Rostedt */ 11517666f02SSteven Rostedt mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, 11617666f02SSteven Rostedt MCOUNT_INSN_SIZE); 11717666f02SSteven Rostedt 11817666f02SSteven Rostedt } 11917666f02SSteven Rostedt 12017666f02SSteven Rostedt void ftrace_nmi_enter(void) 12117666f02SSteven Rostedt { 12217666f02SSteven Rostedt atomic_inc(&in_nmi); 12317666f02SSteven Rostedt /* Must have in_nmi seen before reading write flag */ 12417666f02SSteven Rostedt smp_mb(); 125*b807c3d0SSteven Rostedt if (mod_code_write) { 12617666f02SSteven Rostedt ftrace_mod_code(); 127*b807c3d0SSteven Rostedt atomic_inc(&nmi_update_count); 128*b807c3d0SSteven Rostedt } 12917666f02SSteven Rostedt } 13017666f02SSteven Rostedt 13117666f02SSteven Rostedt void ftrace_nmi_exit(void) 13217666f02SSteven Rostedt { 13317666f02SSteven Rostedt /* Finish all executions before clearing in_nmi */ 13417666f02SSteven Rostedt smp_wmb(); 13517666f02SSteven Rostedt atomic_dec(&in_nmi); 13617666f02SSteven Rostedt } 13717666f02SSteven Rostedt 13817666f02SSteven Rostedt static void wait_for_nmi(void) 13917666f02SSteven Rostedt { 140*b807c3d0SSteven Rostedt int waited = 0; 141*b807c3d0SSteven Rostedt 142*b807c3d0SSteven Rostedt while (atomic_read(&in_nmi)) { 143*b807c3d0SSteven Rostedt waited = 1; 14417666f02SSteven Rostedt cpu_relax(); 14517666f02SSteven Rostedt } 14617666f02SSteven Rostedt 147*b807c3d0SSteven Rostedt if (waited) 148*b807c3d0SSteven Rostedt nmi_wait_count++; 149*b807c3d0SSteven Rostedt } 150*b807c3d0SSteven Rostedt 15117666f02SSteven Rostedt static int 15217666f02SSteven Rostedt do_ftrace_mod_code(unsigned long ip, void *new_code) 15317666f02SSteven Rostedt { 15417666f02SSteven Rostedt mod_code_ip = (void *)ip; 15517666f02SSteven Rostedt mod_code_newcode = new_code; 15617666f02SSteven Rostedt 15717666f02SSteven Rostedt /* The buffers need to be visible before we let NMIs write them */ 15817666f02SSteven Rostedt smp_wmb(); 15917666f02SSteven Rostedt 16017666f02SSteven Rostedt mod_code_write = 1; 16117666f02SSteven Rostedt 16217666f02SSteven Rostedt /* Make sure write bit is visible before we wait on NMIs */ 16317666f02SSteven Rostedt smp_mb(); 16417666f02SSteven Rostedt 16517666f02SSteven Rostedt wait_for_nmi(); 16617666f02SSteven Rostedt 16717666f02SSteven Rostedt /* Make sure all running NMIs have finished before we write the code */ 16817666f02SSteven Rostedt smp_mb(); 16917666f02SSteven Rostedt 17017666f02SSteven Rostedt ftrace_mod_code(); 17117666f02SSteven Rostedt 17217666f02SSteven Rostedt /* Make sure the write happens before clearing the bit */ 17317666f02SSteven Rostedt smp_wmb(); 17417666f02SSteven Rostedt 17517666f02SSteven Rostedt mod_code_write = 0; 17617666f02SSteven Rostedt 17717666f02SSteven Rostedt /* make sure NMIs see the cleared bit */ 17817666f02SSteven Rostedt smp_mb(); 17917666f02SSteven Rostedt 18017666f02SSteven Rostedt wait_for_nmi(); 18117666f02SSteven Rostedt 18217666f02SSteven Rostedt return mod_code_status; 18317666f02SSteven Rostedt } 18417666f02SSteven Rostedt 18517666f02SSteven Rostedt 18615adc048SSteven Rostedt int 1873d083395SSteven Rostedt ftrace_modify_code(unsigned long ip, unsigned char *old_code, 1883d083395SSteven Rostedt unsigned char *new_code) 1893d083395SSteven Rostedt { 1906f93fc07SSteven Rostedt unsigned char replaced[MCOUNT_INSN_SIZE]; 1913d083395SSteven Rostedt 1923d083395SSteven Rostedt /* 1933d083395SSteven Rostedt * Note: Due to modules and __init, code can 1943d083395SSteven Rostedt * disappear and change, we need to protect against faulting 19576aefee5SSteven Rostedt * as well as code changing. We do this by using the 196ab9a0918SSteven Rostedt * probe_kernel_* functions. 1973d083395SSteven Rostedt * 1983d083395SSteven Rostedt * No real locking needed, this code is run through 1996f93fc07SSteven Rostedt * kstop_machine, or before SMP starts. 2003d083395SSteven Rostedt */ 20176aefee5SSteven Rostedt 20276aefee5SSteven Rostedt /* read the text we want to modify */ 203ab9a0918SSteven Rostedt if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) 204593eb8a2SSteven Rostedt return -EFAULT; 2056f93fc07SSteven Rostedt 20676aefee5SSteven Rostedt /* Make sure it is what we expect it to be */ 2076f93fc07SSteven Rostedt if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) 208593eb8a2SSteven Rostedt return -EINVAL; 2096f93fc07SSteven Rostedt 21076aefee5SSteven Rostedt /* replace the text with the new text */ 21117666f02SSteven Rostedt if (do_ftrace_mod_code(ip, new_code)) 212593eb8a2SSteven Rostedt return -EPERM; 2136f93fc07SSteven Rostedt 2143d083395SSteven Rostedt sync_core(); 2153d083395SSteven Rostedt 2166f93fc07SSteven Rostedt return 0; 2173d083395SSteven Rostedt } 2183d083395SSteven Rostedt 21915adc048SSteven Rostedt int ftrace_update_ftrace_func(ftrace_func_t func) 220d61f82d0SSteven Rostedt { 221d61f82d0SSteven Rostedt unsigned long ip = (unsigned long)(&ftrace_call); 222395a59d0SAbhishek Sagar unsigned char old[MCOUNT_INSN_SIZE], *new; 223d61f82d0SSteven Rostedt int ret; 224d61f82d0SSteven Rostedt 225395a59d0SAbhishek Sagar memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 226d61f82d0SSteven Rostedt new = ftrace_call_replace(ip, (unsigned long)func); 227d61f82d0SSteven Rostedt ret = ftrace_modify_code(ip, old, new); 228d61f82d0SSteven Rostedt 229d61f82d0SSteven Rostedt return ret; 230d61f82d0SSteven Rostedt } 231d61f82d0SSteven Rostedt 232d61f82d0SSteven Rostedt int __init ftrace_dyn_arch_init(void *data) 2333d083395SSteven Rostedt { 234732f3ca7SSteven Rostedt extern const unsigned char ftrace_test_p6nop[]; 235732f3ca7SSteven Rostedt extern const unsigned char ftrace_test_nop5[]; 236732f3ca7SSteven Rostedt extern const unsigned char ftrace_test_jmp[]; 237732f3ca7SSteven Rostedt int faulted = 0; 2383d083395SSteven Rostedt 239732f3ca7SSteven Rostedt /* 240732f3ca7SSteven Rostedt * There is no good nop for all x86 archs. 241732f3ca7SSteven Rostedt * We will default to using the P6_NOP5, but first we 242732f3ca7SSteven Rostedt * will test to make sure that the nop will actually 243732f3ca7SSteven Rostedt * work on this CPU. If it faults, we will then 244732f3ca7SSteven Rostedt * go to a lesser efficient 5 byte nop. If that fails 245732f3ca7SSteven Rostedt * we then just use a jmp as our nop. This isn't the most 246732f3ca7SSteven Rostedt * efficient nop, but we can not use a multi part nop 247732f3ca7SSteven Rostedt * since we would then risk being preempted in the middle 248732f3ca7SSteven Rostedt * of that nop, and if we enabled tracing then, it might 249732f3ca7SSteven Rostedt * cause a system crash. 250732f3ca7SSteven Rostedt * 251732f3ca7SSteven Rostedt * TODO: check the cpuid to determine the best nop. 252732f3ca7SSteven Rostedt */ 253732f3ca7SSteven Rostedt asm volatile ( 254732f3ca7SSteven Rostedt "ftrace_test_jmp:" 255732f3ca7SSteven Rostedt "jmp ftrace_test_p6nop\n" 2568b27386aSAnders Kaseorg "nop\n" 2578b27386aSAnders Kaseorg "nop\n" 2588b27386aSAnders Kaseorg "nop\n" /* 2 byte jmp + 3 bytes */ 259732f3ca7SSteven Rostedt "ftrace_test_p6nop:" 260732f3ca7SSteven Rostedt P6_NOP5 261732f3ca7SSteven Rostedt "jmp 1f\n" 262732f3ca7SSteven Rostedt "ftrace_test_nop5:" 263732f3ca7SSteven Rostedt ".byte 0x66,0x66,0x66,0x66,0x90\n" 264732f3ca7SSteven Rostedt "1:" 265732f3ca7SSteven Rostedt ".section .fixup, \"ax\"\n" 266732f3ca7SSteven Rostedt "2: movl $1, %0\n" 267732f3ca7SSteven Rostedt " jmp ftrace_test_nop5\n" 268732f3ca7SSteven Rostedt "3: movl $2, %0\n" 269732f3ca7SSteven Rostedt " jmp 1b\n" 270732f3ca7SSteven Rostedt ".previous\n" 271732f3ca7SSteven Rostedt _ASM_EXTABLE(ftrace_test_p6nop, 2b) 272732f3ca7SSteven Rostedt _ASM_EXTABLE(ftrace_test_nop5, 3b) 273732f3ca7SSteven Rostedt : "=r"(faulted) : "0" (faulted)); 274d61f82d0SSteven Rostedt 275732f3ca7SSteven Rostedt switch (faulted) { 276732f3ca7SSteven Rostedt case 0: 277732f3ca7SSteven Rostedt pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); 2788115f3f0SSteven Rostedt memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); 279732f3ca7SSteven Rostedt break; 280732f3ca7SSteven Rostedt case 1: 281732f3ca7SSteven Rostedt pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); 2828115f3f0SSteven Rostedt memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); 283732f3ca7SSteven Rostedt break; 284732f3ca7SSteven Rostedt case 2: 2858b27386aSAnders Kaseorg pr_info("ftrace: converting mcount calls to jmp . + 5\n"); 2868115f3f0SSteven Rostedt memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); 287732f3ca7SSteven Rostedt break; 288732f3ca7SSteven Rostedt } 289d61f82d0SSteven Rostedt 290732f3ca7SSteven Rostedt /* The return code is retured via data */ 291732f3ca7SSteven Rostedt *(unsigned long *)data = 0; 292dfa60abaSSteven Rostedt 2933d083395SSteven Rostedt return 0; 2943d083395SSteven Rostedt } 295