1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/extable.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 #include <linux/frame.h> 32 33 #include <asm/text-patching.h> 34 #include <asm/cacheflush.h> 35 #include <asm/desc.h> 36 #include <asm/pgtable.h> 37 #include <linux/uaccess.h> 38 #include <asm/alternative.h> 39 #include <asm/insn.h> 40 #include <asm/debugreg.h> 41 #include <asm/set_memory.h> 42 #include <asm/sections.h> 43 44 #include "common.h" 45 46 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 47 { 48 struct optimized_kprobe *op; 49 struct kprobe *kp; 50 long offs; 51 int i; 52 53 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 54 kp = get_kprobe((void *)addr - i); 55 /* This function only handles jump-optimized kprobe */ 56 if (kp && kprobe_optimized(kp)) { 57 op = container_of(kp, struct optimized_kprobe, kp); 58 /* If op->list is not empty, op is under optimizing */ 59 if (list_empty(&op->list)) 60 goto found; 61 } 62 } 63 64 return addr; 65 found: 66 /* 67 * If the kprobe can be optimized, original bytes which can be 68 * overwritten by jump destination address. In this case, original 69 * bytes must be recovered from op->optinsn.copied_insn buffer. 70 */ 71 if (probe_kernel_read(buf, (void *)addr, 72 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 73 return 0UL; 74 75 if (addr == (unsigned long)kp->addr) { 76 buf[0] = kp->opcode; 77 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 78 } else { 79 offs = addr - (unsigned long)kp->addr - 1; 80 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 81 } 82 83 return (unsigned long)buf; 84 } 85 86 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 87 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 88 { 89 #ifdef CONFIG_X86_64 90 *addr++ = 0x48; 91 *addr++ = 0xbf; 92 #else 93 *addr++ = 0xb8; 94 #endif 95 *(unsigned long *)addr = val; 96 } 97 98 asm ( 99 "optprobe_template_func:\n" 100 ".global optprobe_template_entry\n" 101 "optprobe_template_entry:\n" 102 #ifdef CONFIG_X86_64 103 /* We don't bother saving the ss register */ 104 " pushq %rsp\n" 105 " pushfq\n" 106 SAVE_REGS_STRING 107 " movq %rsp, %rsi\n" 108 ".global optprobe_template_val\n" 109 "optprobe_template_val:\n" 110 ASM_NOP5 111 ASM_NOP5 112 ".global optprobe_template_call\n" 113 "optprobe_template_call:\n" 114 ASM_NOP5 115 /* Move flags to rsp */ 116 " movq 144(%rsp), %rdx\n" 117 " movq %rdx, 152(%rsp)\n" 118 RESTORE_REGS_STRING 119 /* Skip flags entry */ 120 " addq $8, %rsp\n" 121 " popfq\n" 122 #else /* CONFIG_X86_32 */ 123 " pushf\n" 124 SAVE_REGS_STRING 125 " movl %esp, %edx\n" 126 ".global optprobe_template_val\n" 127 "optprobe_template_val:\n" 128 ASM_NOP5 129 ".global optprobe_template_call\n" 130 "optprobe_template_call:\n" 131 ASM_NOP5 132 RESTORE_REGS_STRING 133 " addl $4, %esp\n" /* skip cs */ 134 " popf\n" 135 #endif 136 ".global optprobe_template_end\n" 137 "optprobe_template_end:\n" 138 ".type optprobe_template_func, @function\n" 139 ".size optprobe_template_func, .-optprobe_template_func\n"); 140 141 void optprobe_template_func(void); 142 STACK_FRAME_NON_STANDARD(optprobe_template_func); 143 144 #define TMPL_MOVE_IDX \ 145 ((long)optprobe_template_val - (long)optprobe_template_entry) 146 #define TMPL_CALL_IDX \ 147 ((long)optprobe_template_call - (long)optprobe_template_entry) 148 #define TMPL_END_IDX \ 149 ((long)optprobe_template_end - (long)optprobe_template_entry) 150 151 #define INT3_SIZE sizeof(kprobe_opcode_t) 152 153 /* Optimized kprobe call back function: called from optinsn */ 154 static void 155 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 156 { 157 /* This is possible if op is under delayed unoptimizing */ 158 if (kprobe_disabled(&op->kp)) 159 return; 160 161 preempt_disable(); 162 if (kprobe_running()) { 163 kprobes_inc_nmissed_count(&op->kp); 164 } else { 165 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 166 /* Save skipped registers */ 167 #ifdef CONFIG_X86_64 168 regs->cs = __KERNEL_CS; 169 #else 170 regs->cs = __KERNEL_CS | get_kernel_rpl(); 171 regs->gs = 0; 172 #endif 173 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 174 regs->orig_ax = ~0UL; 175 176 __this_cpu_write(current_kprobe, &op->kp); 177 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 178 opt_pre_handler(&op->kp, regs); 179 __this_cpu_write(current_kprobe, NULL); 180 } 181 preempt_enable_no_resched(); 182 } 183 NOKPROBE_SYMBOL(optimized_callback); 184 185 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) 186 { 187 struct insn insn; 188 int len = 0, ret; 189 190 while (len < RELATIVEJUMP_SIZE) { 191 ret = __copy_instruction(dest + len, src + len, real, &insn); 192 if (!ret || !can_boost(&insn, src + len)) 193 return -EINVAL; 194 len += ret; 195 } 196 /* Check whether the address range is reserved */ 197 if (ftrace_text_reserved(src, src + len - 1) || 198 alternatives_text_reserved(src, src + len - 1) || 199 jump_label_text_reserved(src, src + len - 1)) 200 return -EBUSY; 201 202 return len; 203 } 204 205 /* Check whether insn is indirect jump */ 206 static int insn_is_indirect_jump(struct insn *insn) 207 { 208 return ((insn->opcode.bytes[0] == 0xff && 209 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 210 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 211 } 212 213 /* Check whether insn jumps into specified address range */ 214 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 215 { 216 unsigned long target = 0; 217 218 switch (insn->opcode.bytes[0]) { 219 case 0xe0: /* loopne */ 220 case 0xe1: /* loope */ 221 case 0xe2: /* loop */ 222 case 0xe3: /* jcxz */ 223 case 0xe9: /* near relative jump */ 224 case 0xeb: /* short relative jump */ 225 break; 226 case 0x0f: 227 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 228 break; 229 return 0; 230 default: 231 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 232 break; 233 return 0; 234 } 235 target = (unsigned long)insn->next_byte + insn->immediate.value; 236 237 return (start <= target && target <= start + len); 238 } 239 240 /* Decode whole function to ensure any instructions don't jump into target */ 241 static int can_optimize(unsigned long paddr) 242 { 243 unsigned long addr, size = 0, offset = 0; 244 struct insn insn; 245 kprobe_opcode_t buf[MAX_INSN_SIZE]; 246 247 /* Lookup symbol including addr */ 248 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 249 return 0; 250 251 /* 252 * Do not optimize in the entry code due to the unstable 253 * stack handling and registers setup. 254 */ 255 if (((paddr >= (unsigned long)__entry_text_start) && 256 (paddr < (unsigned long)__entry_text_end)) || 257 ((paddr >= (unsigned long)__irqentry_text_start) && 258 (paddr < (unsigned long)__irqentry_text_end))) 259 return 0; 260 261 /* Check there is enough space for a relative jump. */ 262 if (size - offset < RELATIVEJUMP_SIZE) 263 return 0; 264 265 /* Decode instructions */ 266 addr = paddr - offset; 267 while (addr < paddr - offset + size) { /* Decode until function end */ 268 unsigned long recovered_insn; 269 if (search_exception_tables(addr)) 270 /* 271 * Since some fixup code will jumps into this function, 272 * we can't optimize kprobe in this function. 273 */ 274 return 0; 275 recovered_insn = recover_probed_instruction(buf, addr); 276 if (!recovered_insn) 277 return 0; 278 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 279 insn_get_length(&insn); 280 /* Another subsystem puts a breakpoint */ 281 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 282 return 0; 283 /* Recover address */ 284 insn.kaddr = (void *)addr; 285 insn.next_byte = (void *)(addr + insn.length); 286 /* Check any instructions don't jump into target */ 287 if (insn_is_indirect_jump(&insn) || 288 insn_jump_into_range(&insn, paddr + INT3_SIZE, 289 RELATIVE_ADDR_SIZE)) 290 return 0; 291 addr += insn.length; 292 } 293 294 return 1; 295 } 296 297 /* Check optimized_kprobe can actually be optimized. */ 298 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 299 { 300 int i; 301 struct kprobe *p; 302 303 for (i = 1; i < op->optinsn.size; i++) { 304 p = get_kprobe(op->kp.addr + i); 305 if (p && !kprobe_disabled(p)) 306 return -EEXIST; 307 } 308 309 return 0; 310 } 311 312 /* Check the addr is within the optimized instructions. */ 313 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 314 unsigned long addr) 315 { 316 return ((unsigned long)op->kp.addr <= addr && 317 (unsigned long)op->kp.addr + op->optinsn.size > addr); 318 } 319 320 /* Free optimized instruction slot */ 321 static 322 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 323 { 324 if (op->optinsn.insn) { 325 free_optinsn_slot(op->optinsn.insn, dirty); 326 op->optinsn.insn = NULL; 327 op->optinsn.size = 0; 328 } 329 } 330 331 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 332 { 333 __arch_remove_optimized_kprobe(op, 1); 334 } 335 336 /* 337 * Copy replacing target instructions 338 * Target instructions MUST be relocatable (checked inside) 339 * This is called when new aggr(opt)probe is allocated or reused. 340 */ 341 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 342 struct kprobe *__unused) 343 { 344 u8 *buf = NULL, *slot; 345 int ret, len; 346 long rel; 347 348 if (!can_optimize((unsigned long)op->kp.addr)) 349 return -EILSEQ; 350 351 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); 352 if (!buf) 353 return -ENOMEM; 354 355 op->optinsn.insn = slot = get_optinsn_slot(); 356 if (!slot) { 357 ret = -ENOMEM; 358 goto out; 359 } 360 361 /* 362 * Verify if the address gap is in 2GB range, because this uses 363 * a relative jump. 364 */ 365 rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; 366 if (abs(rel) > 0x7fffffff) { 367 ret = -ERANGE; 368 goto err; 369 } 370 371 /* Copy arch-dep-instance from template */ 372 memcpy(buf, optprobe_template_entry, TMPL_END_IDX); 373 374 /* Copy instructions into the out-of-line buffer */ 375 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, 376 slot + TMPL_END_IDX); 377 if (ret < 0) 378 goto err; 379 op->optinsn.size = ret; 380 len = TMPL_END_IDX + op->optinsn.size; 381 382 /* Set probe information */ 383 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 384 385 /* Set probe function call */ 386 synthesize_relcall(buf + TMPL_CALL_IDX, 387 slot + TMPL_CALL_IDX, optimized_callback); 388 389 /* Set returning jmp instruction at the tail of out-of-line buffer */ 390 synthesize_reljump(buf + len, slot + len, 391 (u8 *)op->kp.addr + op->optinsn.size); 392 len += RELATIVEJUMP_SIZE; 393 394 /* We have to use text_poke for instuction buffer because it is RO */ 395 text_poke(slot, buf, len); 396 ret = 0; 397 out: 398 kfree(buf); 399 return ret; 400 401 err: 402 __arch_remove_optimized_kprobe(op, 0); 403 goto out; 404 } 405 406 /* 407 * Replace breakpoints (int3) with relative jumps. 408 * Caller must call with locking kprobe_mutex and text_mutex. 409 */ 410 void arch_optimize_kprobes(struct list_head *oplist) 411 { 412 struct optimized_kprobe *op, *tmp; 413 u8 insn_buf[RELATIVEJUMP_SIZE]; 414 415 list_for_each_entry_safe(op, tmp, oplist, list) { 416 s32 rel = (s32)((long)op->optinsn.insn - 417 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 418 419 WARN_ON(kprobe_disabled(&op->kp)); 420 421 /* Backup instructions which will be replaced by jump address */ 422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 423 RELATIVE_ADDR_SIZE); 424 425 insn_buf[0] = RELATIVEJUMP_OPCODE; 426 *(s32 *)(&insn_buf[1]) = rel; 427 428 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 429 op->optinsn.insn); 430 431 list_del_init(&op->list); 432 } 433 } 434 435 /* Replace a relative jump with a breakpoint (int3). */ 436 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 437 { 438 u8 insn_buf[RELATIVEJUMP_SIZE]; 439 440 /* Set int3 to first byte for kprobes */ 441 insn_buf[0] = BREAKPOINT_INSTRUCTION; 442 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 443 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 444 op->optinsn.insn); 445 } 446 447 /* 448 * Recover original instructions and breakpoints from relative jumps. 449 * Caller must call with locking kprobe_mutex. 450 */ 451 extern void arch_unoptimize_kprobes(struct list_head *oplist, 452 struct list_head *done_list) 453 { 454 struct optimized_kprobe *op, *tmp; 455 456 list_for_each_entry_safe(op, tmp, oplist, list) { 457 arch_unoptimize_kprobe(op); 458 list_move(&op->list, done_list); 459 } 460 } 461 462 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 463 { 464 struct optimized_kprobe *op; 465 466 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 467 /* This kprobe is really able to run optimized path. */ 468 op = container_of(p, struct optimized_kprobe, kp); 469 /* Detour through copied instructions */ 470 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 471 if (!reenter) 472 reset_current_kprobe(); 473 preempt_enable_no_resched(); 474 return 1; 475 } 476 return 0; 477 } 478 NOKPROBE_SYMBOL(setup_detour_execution); 479