1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/extable.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 32 #include <asm/text-patching.h> 33 #include <asm/cacheflush.h> 34 #include <asm/desc.h> 35 #include <asm/pgtable.h> 36 #include <linux/uaccess.h> 37 #include <asm/alternative.h> 38 #include <asm/insn.h> 39 #include <asm/debugreg.h> 40 #include <asm/set_memory.h> 41 42 #include "common.h" 43 44 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 45 { 46 struct optimized_kprobe *op; 47 struct kprobe *kp; 48 long offs; 49 int i; 50 51 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 52 kp = get_kprobe((void *)addr - i); 53 /* This function only handles jump-optimized kprobe */ 54 if (kp && kprobe_optimized(kp)) { 55 op = container_of(kp, struct optimized_kprobe, kp); 56 /* If op->list is not empty, op is under optimizing */ 57 if (list_empty(&op->list)) 58 goto found; 59 } 60 } 61 62 return addr; 63 found: 64 /* 65 * If the kprobe can be optimized, original bytes which can be 66 * overwritten by jump destination address. In this case, original 67 * bytes must be recovered from op->optinsn.copied_insn buffer. 68 */ 69 if (probe_kernel_read(buf, (void *)addr, 70 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 71 return 0UL; 72 73 if (addr == (unsigned long)kp->addr) { 74 buf[0] = kp->opcode; 75 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 76 } else { 77 offs = addr - (unsigned long)kp->addr - 1; 78 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 79 } 80 81 return (unsigned long)buf; 82 } 83 84 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 85 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 86 { 87 #ifdef CONFIG_X86_64 88 *addr++ = 0x48; 89 *addr++ = 0xbf; 90 #else 91 *addr++ = 0xb8; 92 #endif 93 *(unsigned long *)addr = val; 94 } 95 96 asm ( 97 ".global optprobe_template_entry\n" 98 "optprobe_template_entry:\n" 99 #ifdef CONFIG_X86_64 100 /* We don't bother saving the ss register */ 101 " pushq %rsp\n" 102 " pushfq\n" 103 SAVE_REGS_STRING 104 " movq %rsp, %rsi\n" 105 ".global optprobe_template_val\n" 106 "optprobe_template_val:\n" 107 ASM_NOP5 108 ASM_NOP5 109 ".global optprobe_template_call\n" 110 "optprobe_template_call:\n" 111 ASM_NOP5 112 /* Move flags to rsp */ 113 " movq 144(%rsp), %rdx\n" 114 " movq %rdx, 152(%rsp)\n" 115 RESTORE_REGS_STRING 116 /* Skip flags entry */ 117 " addq $8, %rsp\n" 118 " popfq\n" 119 #else /* CONFIG_X86_32 */ 120 " pushf\n" 121 SAVE_REGS_STRING 122 " movl %esp, %edx\n" 123 ".global optprobe_template_val\n" 124 "optprobe_template_val:\n" 125 ASM_NOP5 126 ".global optprobe_template_call\n" 127 "optprobe_template_call:\n" 128 ASM_NOP5 129 RESTORE_REGS_STRING 130 " addl $4, %esp\n" /* skip cs */ 131 " popf\n" 132 #endif 133 ".global optprobe_template_end\n" 134 "optprobe_template_end:\n"); 135 136 #define TMPL_MOVE_IDX \ 137 ((long)&optprobe_template_val - (long)&optprobe_template_entry) 138 #define TMPL_CALL_IDX \ 139 ((long)&optprobe_template_call - (long)&optprobe_template_entry) 140 #define TMPL_END_IDX \ 141 ((long)&optprobe_template_end - (long)&optprobe_template_entry) 142 143 #define INT3_SIZE sizeof(kprobe_opcode_t) 144 145 /* Optimized kprobe call back function: called from optinsn */ 146 static void 147 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 148 { 149 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 150 unsigned long flags; 151 152 /* This is possible if op is under delayed unoptimizing */ 153 if (kprobe_disabled(&op->kp)) 154 return; 155 156 local_irq_save(flags); 157 if (kprobe_running()) { 158 kprobes_inc_nmissed_count(&op->kp); 159 } else { 160 /* Save skipped registers */ 161 #ifdef CONFIG_X86_64 162 regs->cs = __KERNEL_CS; 163 #else 164 regs->cs = __KERNEL_CS | get_kernel_rpl(); 165 regs->gs = 0; 166 #endif 167 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 168 regs->orig_ax = ~0UL; 169 170 __this_cpu_write(current_kprobe, &op->kp); 171 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 172 opt_pre_handler(&op->kp, regs); 173 __this_cpu_write(current_kprobe, NULL); 174 } 175 local_irq_restore(flags); 176 } 177 NOKPROBE_SYMBOL(optimized_callback); 178 179 static int copy_optimized_instructions(u8 *dest, u8 *src) 180 { 181 struct insn insn; 182 int len = 0, ret; 183 184 while (len < RELATIVEJUMP_SIZE) { 185 ret = __copy_instruction(dest + len, src + len, &insn); 186 if (!ret || !can_boost(&insn, src + len)) 187 return -EINVAL; 188 len += ret; 189 } 190 /* Check whether the address range is reserved */ 191 if (ftrace_text_reserved(src, src + len - 1) || 192 alternatives_text_reserved(src, src + len - 1) || 193 jump_label_text_reserved(src, src + len - 1)) 194 return -EBUSY; 195 196 return len; 197 } 198 199 /* Check whether insn is indirect jump */ 200 static int insn_is_indirect_jump(struct insn *insn) 201 { 202 return ((insn->opcode.bytes[0] == 0xff && 203 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 204 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 205 } 206 207 /* Check whether insn jumps into specified address range */ 208 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 209 { 210 unsigned long target = 0; 211 212 switch (insn->opcode.bytes[0]) { 213 case 0xe0: /* loopne */ 214 case 0xe1: /* loope */ 215 case 0xe2: /* loop */ 216 case 0xe3: /* jcxz */ 217 case 0xe9: /* near relative jump */ 218 case 0xeb: /* short relative jump */ 219 break; 220 case 0x0f: 221 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 222 break; 223 return 0; 224 default: 225 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 226 break; 227 return 0; 228 } 229 target = (unsigned long)insn->next_byte + insn->immediate.value; 230 231 return (start <= target && target <= start + len); 232 } 233 234 /* Decode whole function to ensure any instructions don't jump into target */ 235 static int can_optimize(unsigned long paddr) 236 { 237 unsigned long addr, size = 0, offset = 0; 238 struct insn insn; 239 kprobe_opcode_t buf[MAX_INSN_SIZE]; 240 241 /* Lookup symbol including addr */ 242 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 243 return 0; 244 245 /* 246 * Do not optimize in the entry code due to the unstable 247 * stack handling. 248 */ 249 if ((paddr >= (unsigned long)__entry_text_start) && 250 (paddr < (unsigned long)__entry_text_end)) 251 return 0; 252 253 /* Check there is enough space for a relative jump. */ 254 if (size - offset < RELATIVEJUMP_SIZE) 255 return 0; 256 257 /* Decode instructions */ 258 addr = paddr - offset; 259 while (addr < paddr - offset + size) { /* Decode until function end */ 260 unsigned long recovered_insn; 261 if (search_exception_tables(addr)) 262 /* 263 * Since some fixup code will jumps into this function, 264 * we can't optimize kprobe in this function. 265 */ 266 return 0; 267 recovered_insn = recover_probed_instruction(buf, addr); 268 if (!recovered_insn) 269 return 0; 270 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 271 insn_get_length(&insn); 272 /* Another subsystem puts a breakpoint */ 273 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 274 return 0; 275 /* Recover address */ 276 insn.kaddr = (void *)addr; 277 insn.next_byte = (void *)(addr + insn.length); 278 /* Check any instructions don't jump into target */ 279 if (insn_is_indirect_jump(&insn) || 280 insn_jump_into_range(&insn, paddr + INT3_SIZE, 281 RELATIVE_ADDR_SIZE)) 282 return 0; 283 addr += insn.length; 284 } 285 286 return 1; 287 } 288 289 /* Check optimized_kprobe can actually be optimized. */ 290 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 291 { 292 int i; 293 struct kprobe *p; 294 295 for (i = 1; i < op->optinsn.size; i++) { 296 p = get_kprobe(op->kp.addr + i); 297 if (p && !kprobe_disabled(p)) 298 return -EEXIST; 299 } 300 301 return 0; 302 } 303 304 /* Check the addr is within the optimized instructions. */ 305 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 306 unsigned long addr) 307 { 308 return ((unsigned long)op->kp.addr <= addr && 309 (unsigned long)op->kp.addr + op->optinsn.size > addr); 310 } 311 312 /* Free optimized instruction slot */ 313 static 314 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 315 { 316 if (op->optinsn.insn) { 317 free_optinsn_slot(op->optinsn.insn, dirty); 318 op->optinsn.insn = NULL; 319 op->optinsn.size = 0; 320 } 321 } 322 323 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 324 { 325 __arch_remove_optimized_kprobe(op, 1); 326 } 327 328 /* 329 * Copy replacing target instructions 330 * Target instructions MUST be relocatable (checked inside) 331 * This is called when new aggr(opt)probe is allocated or reused. 332 */ 333 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 334 struct kprobe *__unused) 335 { 336 u8 *buf; 337 int ret; 338 long rel; 339 340 if (!can_optimize((unsigned long)op->kp.addr)) 341 return -EILSEQ; 342 343 op->optinsn.insn = get_optinsn_slot(); 344 if (!op->optinsn.insn) 345 return -ENOMEM; 346 347 /* 348 * Verify if the address gap is in 2GB range, because this uses 349 * a relative jump. 350 */ 351 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; 352 if (abs(rel) > 0x7fffffff) { 353 __arch_remove_optimized_kprobe(op, 0); 354 return -ERANGE; 355 } 356 357 buf = (u8 *)op->optinsn.insn; 358 set_memory_rw((unsigned long)buf & PAGE_MASK, 1); 359 360 /* Copy instructions into the out-of-line buffer */ 361 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); 362 if (ret < 0) { 363 __arch_remove_optimized_kprobe(op, 0); 364 return ret; 365 } 366 op->optinsn.size = ret; 367 368 /* Copy arch-dep-instance from template */ 369 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); 370 371 /* Set probe information */ 372 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 373 374 /* Set probe function call */ 375 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); 376 377 /* Set returning jmp instruction at the tail of out-of-line buffer */ 378 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, 379 (u8 *)op->kp.addr + op->optinsn.size); 380 381 set_memory_ro((unsigned long)buf & PAGE_MASK, 1); 382 383 flush_icache_range((unsigned long) buf, 384 (unsigned long) buf + TMPL_END_IDX + 385 op->optinsn.size + RELATIVEJUMP_SIZE); 386 return 0; 387 } 388 389 /* 390 * Replace breakpoints (int3) with relative jumps. 391 * Caller must call with locking kprobe_mutex and text_mutex. 392 */ 393 void arch_optimize_kprobes(struct list_head *oplist) 394 { 395 struct optimized_kprobe *op, *tmp; 396 u8 insn_buf[RELATIVEJUMP_SIZE]; 397 398 list_for_each_entry_safe(op, tmp, oplist, list) { 399 s32 rel = (s32)((long)op->optinsn.insn - 400 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 401 402 WARN_ON(kprobe_disabled(&op->kp)); 403 404 /* Backup instructions which will be replaced by jump address */ 405 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 406 RELATIVE_ADDR_SIZE); 407 408 insn_buf[0] = RELATIVEJUMP_OPCODE; 409 *(s32 *)(&insn_buf[1]) = rel; 410 411 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 412 op->optinsn.insn); 413 414 list_del_init(&op->list); 415 } 416 } 417 418 /* Replace a relative jump with a breakpoint (int3). */ 419 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 420 { 421 u8 insn_buf[RELATIVEJUMP_SIZE]; 422 423 /* Set int3 to first byte for kprobes */ 424 insn_buf[0] = BREAKPOINT_INSTRUCTION; 425 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 426 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 427 op->optinsn.insn); 428 } 429 430 /* 431 * Recover original instructions and breakpoints from relative jumps. 432 * Caller must call with locking kprobe_mutex. 433 */ 434 extern void arch_unoptimize_kprobes(struct list_head *oplist, 435 struct list_head *done_list) 436 { 437 struct optimized_kprobe *op, *tmp; 438 439 list_for_each_entry_safe(op, tmp, oplist, list) { 440 arch_unoptimize_kprobe(op); 441 list_move(&op->list, done_list); 442 } 443 } 444 445 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 446 { 447 struct optimized_kprobe *op; 448 449 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 450 /* This kprobe is really able to run optimized path. */ 451 op = container_of(p, struct optimized_kprobe, kp); 452 /* Detour through copied instructions */ 453 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 454 if (!reenter) 455 reset_current_kprobe(); 456 preempt_enable_no_resched(); 457 return 1; 458 } 459 return 0; 460 } 461 NOKPROBE_SYMBOL(setup_detour_execution); 462