1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/extable.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 #include <linux/frame.h> 32 33 #include <asm/text-patching.h> 34 #include <asm/cacheflush.h> 35 #include <asm/desc.h> 36 #include <asm/pgtable.h> 37 #include <linux/uaccess.h> 38 #include <asm/alternative.h> 39 #include <asm/insn.h> 40 #include <asm/debugreg.h> 41 #include <asm/set_memory.h> 42 #include <asm/sections.h> 43 44 #include "common.h" 45 46 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 47 { 48 struct optimized_kprobe *op; 49 struct kprobe *kp; 50 long offs; 51 int i; 52 53 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 54 kp = get_kprobe((void *)addr - i); 55 /* This function only handles jump-optimized kprobe */ 56 if (kp && kprobe_optimized(kp)) { 57 op = container_of(kp, struct optimized_kprobe, kp); 58 /* If op->list is not empty, op is under optimizing */ 59 if (list_empty(&op->list)) 60 goto found; 61 } 62 } 63 64 return addr; 65 found: 66 /* 67 * If the kprobe can be optimized, original bytes which can be 68 * overwritten by jump destination address. In this case, original 69 * bytes must be recovered from op->optinsn.copied_insn buffer. 70 */ 71 if (probe_kernel_read(buf, (void *)addr, 72 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 73 return 0UL; 74 75 if (addr == (unsigned long)kp->addr) { 76 buf[0] = kp->opcode; 77 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 78 } else { 79 offs = addr - (unsigned long)kp->addr - 1; 80 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 81 } 82 83 return (unsigned long)buf; 84 } 85 86 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 87 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 88 { 89 #ifdef CONFIG_X86_64 90 *addr++ = 0x48; 91 *addr++ = 0xbf; 92 #else 93 *addr++ = 0xb8; 94 #endif 95 *(unsigned long *)addr = val; 96 } 97 98 asm ( 99 "optprobe_template_func:\n" 100 ".global optprobe_template_entry\n" 101 "optprobe_template_entry:\n" 102 #ifdef CONFIG_X86_64 103 /* We don't bother saving the ss register */ 104 " pushq %rsp\n" 105 " pushfq\n" 106 SAVE_REGS_STRING 107 " movq %rsp, %rsi\n" 108 ".global optprobe_template_val\n" 109 "optprobe_template_val:\n" 110 ASM_NOP5 111 ASM_NOP5 112 ".global optprobe_template_call\n" 113 "optprobe_template_call:\n" 114 ASM_NOP5 115 /* Move flags to rsp */ 116 " movq 144(%rsp), %rdx\n" 117 " movq %rdx, 152(%rsp)\n" 118 RESTORE_REGS_STRING 119 /* Skip flags entry */ 120 " addq $8, %rsp\n" 121 " popfq\n" 122 #else /* CONFIG_X86_32 */ 123 " pushf\n" 124 SAVE_REGS_STRING 125 " movl %esp, %edx\n" 126 ".global optprobe_template_val\n" 127 "optprobe_template_val:\n" 128 ASM_NOP5 129 ".global optprobe_template_call\n" 130 "optprobe_template_call:\n" 131 ASM_NOP5 132 RESTORE_REGS_STRING 133 " addl $4, %esp\n" /* skip cs */ 134 " popf\n" 135 #endif 136 ".global optprobe_template_end\n" 137 "optprobe_template_end:\n" 138 ".type optprobe_template_func, @function\n" 139 ".size optprobe_template_func, .-optprobe_template_func\n"); 140 141 void optprobe_template_func(void); 142 STACK_FRAME_NON_STANDARD(optprobe_template_func); 143 144 #define TMPL_MOVE_IDX \ 145 ((long)&optprobe_template_val - (long)&optprobe_template_entry) 146 #define TMPL_CALL_IDX \ 147 ((long)&optprobe_template_call - (long)&optprobe_template_entry) 148 #define TMPL_END_IDX \ 149 ((long)&optprobe_template_end - (long)&optprobe_template_entry) 150 151 #define INT3_SIZE sizeof(kprobe_opcode_t) 152 153 /* Optimized kprobe call back function: called from optinsn */ 154 static void 155 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 156 { 157 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 158 unsigned long flags; 159 160 /* This is possible if op is under delayed unoptimizing */ 161 if (kprobe_disabled(&op->kp)) 162 return; 163 164 local_irq_save(flags); 165 if (kprobe_running()) { 166 kprobes_inc_nmissed_count(&op->kp); 167 } else { 168 /* Save skipped registers */ 169 #ifdef CONFIG_X86_64 170 regs->cs = __KERNEL_CS; 171 #else 172 regs->cs = __KERNEL_CS | get_kernel_rpl(); 173 regs->gs = 0; 174 #endif 175 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 176 regs->orig_ax = ~0UL; 177 178 __this_cpu_write(current_kprobe, &op->kp); 179 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 180 opt_pre_handler(&op->kp, regs); 181 __this_cpu_write(current_kprobe, NULL); 182 } 183 local_irq_restore(flags); 184 } 185 NOKPROBE_SYMBOL(optimized_callback); 186 187 static int copy_optimized_instructions(u8 *dest, u8 *src) 188 { 189 struct insn insn; 190 int len = 0, ret; 191 192 while (len < RELATIVEJUMP_SIZE) { 193 ret = __copy_instruction(dest + len, src + len, &insn); 194 if (!ret || !can_boost(&insn, src + len)) 195 return -EINVAL; 196 len += ret; 197 } 198 /* Check whether the address range is reserved */ 199 if (ftrace_text_reserved(src, src + len - 1) || 200 alternatives_text_reserved(src, src + len - 1) || 201 jump_label_text_reserved(src, src + len - 1)) 202 return -EBUSY; 203 204 return len; 205 } 206 207 /* Check whether insn is indirect jump */ 208 static int insn_is_indirect_jump(struct insn *insn) 209 { 210 return ((insn->opcode.bytes[0] == 0xff && 211 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 212 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 213 } 214 215 /* Check whether insn jumps into specified address range */ 216 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 217 { 218 unsigned long target = 0; 219 220 switch (insn->opcode.bytes[0]) { 221 case 0xe0: /* loopne */ 222 case 0xe1: /* loope */ 223 case 0xe2: /* loop */ 224 case 0xe3: /* jcxz */ 225 case 0xe9: /* near relative jump */ 226 case 0xeb: /* short relative jump */ 227 break; 228 case 0x0f: 229 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 230 break; 231 return 0; 232 default: 233 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 234 break; 235 return 0; 236 } 237 target = (unsigned long)insn->next_byte + insn->immediate.value; 238 239 return (start <= target && target <= start + len); 240 } 241 242 /* Decode whole function to ensure any instructions don't jump into target */ 243 static int can_optimize(unsigned long paddr) 244 { 245 unsigned long addr, size = 0, offset = 0; 246 struct insn insn; 247 kprobe_opcode_t buf[MAX_INSN_SIZE]; 248 249 /* Lookup symbol including addr */ 250 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 251 return 0; 252 253 /* 254 * Do not optimize in the entry code due to the unstable 255 * stack handling and registers setup. 256 */ 257 if (((paddr >= (unsigned long)__entry_text_start) && 258 (paddr < (unsigned long)__entry_text_end)) || 259 ((paddr >= (unsigned long)__irqentry_text_start) && 260 (paddr < (unsigned long)__irqentry_text_end))) 261 return 0; 262 263 /* Check there is enough space for a relative jump. */ 264 if (size - offset < RELATIVEJUMP_SIZE) 265 return 0; 266 267 /* Decode instructions */ 268 addr = paddr - offset; 269 while (addr < paddr - offset + size) { /* Decode until function end */ 270 unsigned long recovered_insn; 271 if (search_exception_tables(addr)) 272 /* 273 * Since some fixup code will jumps into this function, 274 * we can't optimize kprobe in this function. 275 */ 276 return 0; 277 recovered_insn = recover_probed_instruction(buf, addr); 278 if (!recovered_insn) 279 return 0; 280 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 281 insn_get_length(&insn); 282 /* Another subsystem puts a breakpoint */ 283 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 284 return 0; 285 /* Recover address */ 286 insn.kaddr = (void *)addr; 287 insn.next_byte = (void *)(addr + insn.length); 288 /* Check any instructions don't jump into target */ 289 if (insn_is_indirect_jump(&insn) || 290 insn_jump_into_range(&insn, paddr + INT3_SIZE, 291 RELATIVE_ADDR_SIZE)) 292 return 0; 293 addr += insn.length; 294 } 295 296 return 1; 297 } 298 299 /* Check optimized_kprobe can actually be optimized. */ 300 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 301 { 302 int i; 303 struct kprobe *p; 304 305 for (i = 1; i < op->optinsn.size; i++) { 306 p = get_kprobe(op->kp.addr + i); 307 if (p && !kprobe_disabled(p)) 308 return -EEXIST; 309 } 310 311 return 0; 312 } 313 314 /* Check the addr is within the optimized instructions. */ 315 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 316 unsigned long addr) 317 { 318 return ((unsigned long)op->kp.addr <= addr && 319 (unsigned long)op->kp.addr + op->optinsn.size > addr); 320 } 321 322 /* Free optimized instruction slot */ 323 static 324 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 325 { 326 if (op->optinsn.insn) { 327 free_optinsn_slot(op->optinsn.insn, dirty); 328 op->optinsn.insn = NULL; 329 op->optinsn.size = 0; 330 } 331 } 332 333 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 334 { 335 __arch_remove_optimized_kprobe(op, 1); 336 } 337 338 /* 339 * Copy replacing target instructions 340 * Target instructions MUST be relocatable (checked inside) 341 * This is called when new aggr(opt)probe is allocated or reused. 342 */ 343 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 344 struct kprobe *__unused) 345 { 346 u8 *buf; 347 int ret; 348 long rel; 349 350 if (!can_optimize((unsigned long)op->kp.addr)) 351 return -EILSEQ; 352 353 op->optinsn.insn = get_optinsn_slot(); 354 if (!op->optinsn.insn) 355 return -ENOMEM; 356 357 /* 358 * Verify if the address gap is in 2GB range, because this uses 359 * a relative jump. 360 */ 361 rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; 362 if (abs(rel) > 0x7fffffff) { 363 __arch_remove_optimized_kprobe(op, 0); 364 return -ERANGE; 365 } 366 367 buf = (u8 *)op->optinsn.insn; 368 set_memory_rw((unsigned long)buf & PAGE_MASK, 1); 369 370 /* Copy instructions into the out-of-line buffer */ 371 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); 372 if (ret < 0) { 373 __arch_remove_optimized_kprobe(op, 0); 374 return ret; 375 } 376 op->optinsn.size = ret; 377 378 /* Copy arch-dep-instance from template */ 379 memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); 380 381 /* Set probe information */ 382 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 383 384 /* Set probe function call */ 385 synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); 386 387 /* Set returning jmp instruction at the tail of out-of-line buffer */ 388 synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, 389 (u8 *)op->kp.addr + op->optinsn.size); 390 391 set_memory_ro((unsigned long)buf & PAGE_MASK, 1); 392 393 flush_icache_range((unsigned long) buf, 394 (unsigned long) buf + TMPL_END_IDX + 395 op->optinsn.size + RELATIVEJUMP_SIZE); 396 return 0; 397 } 398 399 /* 400 * Replace breakpoints (int3) with relative jumps. 401 * Caller must call with locking kprobe_mutex and text_mutex. 402 */ 403 void arch_optimize_kprobes(struct list_head *oplist) 404 { 405 struct optimized_kprobe *op, *tmp; 406 u8 insn_buf[RELATIVEJUMP_SIZE]; 407 408 list_for_each_entry_safe(op, tmp, oplist, list) { 409 s32 rel = (s32)((long)op->optinsn.insn - 410 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 411 412 WARN_ON(kprobe_disabled(&op->kp)); 413 414 /* Backup instructions which will be replaced by jump address */ 415 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 416 RELATIVE_ADDR_SIZE); 417 418 insn_buf[0] = RELATIVEJUMP_OPCODE; 419 *(s32 *)(&insn_buf[1]) = rel; 420 421 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 422 op->optinsn.insn); 423 424 list_del_init(&op->list); 425 } 426 } 427 428 /* Replace a relative jump with a breakpoint (int3). */ 429 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 430 { 431 u8 insn_buf[RELATIVEJUMP_SIZE]; 432 433 /* Set int3 to first byte for kprobes */ 434 insn_buf[0] = BREAKPOINT_INSTRUCTION; 435 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 436 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 437 op->optinsn.insn); 438 } 439 440 /* 441 * Recover original instructions and breakpoints from relative jumps. 442 * Caller must call with locking kprobe_mutex. 443 */ 444 extern void arch_unoptimize_kprobes(struct list_head *oplist, 445 struct list_head *done_list) 446 { 447 struct optimized_kprobe *op, *tmp; 448 449 list_for_each_entry_safe(op, tmp, oplist, list) { 450 arch_unoptimize_kprobe(op); 451 list_move(&op->list, done_list); 452 } 453 } 454 455 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 456 { 457 struct optimized_kprobe *op; 458 459 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 460 /* This kprobe is really able to run optimized path. */ 461 op = container_of(p, struct optimized_kprobe, kp); 462 /* Detour through copied instructions */ 463 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 464 if (!reenter) 465 reset_current_kprobe(); 466 preempt_enable_no_resched(); 467 return 1; 468 } 469 return 0; 470 } 471 NOKPROBE_SYMBOL(setup_detour_execution); 472