1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 */ 21 #include <linux/kprobes.h> 22 #include <linux/ptrace.h> 23 #include <linux/string.h> 24 #include <linux/slab.h> 25 #include <linux/hardirq.h> 26 #include <linux/preempt.h> 27 #include <linux/extable.h> 28 #include <linux/kdebug.h> 29 #include <linux/kallsyms.h> 30 #include <linux/ftrace.h> 31 #include <linux/frame.h> 32 33 #include <asm/text-patching.h> 34 #include <asm/cacheflush.h> 35 #include <asm/desc.h> 36 #include <asm/pgtable.h> 37 #include <linux/uaccess.h> 38 #include <asm/alternative.h> 39 #include <asm/insn.h> 40 #include <asm/debugreg.h> 41 #include <asm/set_memory.h> 42 #include <asm/sections.h> 43 #include <asm/nospec-branch.h> 44 45 #include "common.h" 46 47 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 48 { 49 struct optimized_kprobe *op; 50 struct kprobe *kp; 51 long offs; 52 int i; 53 54 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 55 kp = get_kprobe((void *)addr - i); 56 /* This function only handles jump-optimized kprobe */ 57 if (kp && kprobe_optimized(kp)) { 58 op = container_of(kp, struct optimized_kprobe, kp); 59 /* If op->list is not empty, op is under optimizing */ 60 if (list_empty(&op->list)) 61 goto found; 62 } 63 } 64 65 return addr; 66 found: 67 /* 68 * If the kprobe can be optimized, original bytes which can be 69 * overwritten by jump destination address. In this case, original 70 * bytes must be recovered from op->optinsn.copied_insn buffer. 71 */ 72 if (probe_kernel_read(buf, (void *)addr, 73 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 74 return 0UL; 75 76 if (addr == (unsigned long)kp->addr) { 77 buf[0] = kp->opcode; 78 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 79 } else { 80 offs = addr - (unsigned long)kp->addr - 1; 81 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 82 } 83 84 return (unsigned long)buf; 85 } 86 87 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 88 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 89 { 90 #ifdef CONFIG_X86_64 91 *addr++ = 0x48; 92 *addr++ = 0xbf; 93 #else 94 *addr++ = 0xb8; 95 #endif 96 *(unsigned long *)addr = val; 97 } 98 99 asm ( 100 "optprobe_template_func:\n" 101 ".global optprobe_template_entry\n" 102 "optprobe_template_entry:\n" 103 #ifdef CONFIG_X86_64 104 /* We don't bother saving the ss register */ 105 " pushq %rsp\n" 106 " pushfq\n" 107 SAVE_REGS_STRING 108 " movq %rsp, %rsi\n" 109 ".global optprobe_template_val\n" 110 "optprobe_template_val:\n" 111 ASM_NOP5 112 ASM_NOP5 113 ".global optprobe_template_call\n" 114 "optprobe_template_call:\n" 115 ASM_NOP5 116 /* Move flags to rsp */ 117 " movq 144(%rsp), %rdx\n" 118 " movq %rdx, 152(%rsp)\n" 119 RESTORE_REGS_STRING 120 /* Skip flags entry */ 121 " addq $8, %rsp\n" 122 " popfq\n" 123 #else /* CONFIG_X86_32 */ 124 " pushf\n" 125 SAVE_REGS_STRING 126 " movl %esp, %edx\n" 127 ".global optprobe_template_val\n" 128 "optprobe_template_val:\n" 129 ASM_NOP5 130 ".global optprobe_template_call\n" 131 "optprobe_template_call:\n" 132 ASM_NOP5 133 RESTORE_REGS_STRING 134 " addl $4, %esp\n" /* skip cs */ 135 " popf\n" 136 #endif 137 ".global optprobe_template_end\n" 138 "optprobe_template_end:\n" 139 ".type optprobe_template_func, @function\n" 140 ".size optprobe_template_func, .-optprobe_template_func\n"); 141 142 void optprobe_template_func(void); 143 STACK_FRAME_NON_STANDARD(optprobe_template_func); 144 145 #define TMPL_MOVE_IDX \ 146 ((long)optprobe_template_val - (long)optprobe_template_entry) 147 #define TMPL_CALL_IDX \ 148 ((long)optprobe_template_call - (long)optprobe_template_entry) 149 #define TMPL_END_IDX \ 150 ((long)optprobe_template_end - (long)optprobe_template_entry) 151 152 #define INT3_SIZE sizeof(kprobe_opcode_t) 153 154 /* Optimized kprobe call back function: called from optinsn */ 155 static void 156 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 157 { 158 /* This is possible if op is under delayed unoptimizing */ 159 if (kprobe_disabled(&op->kp)) 160 return; 161 162 preempt_disable(); 163 if (kprobe_running()) { 164 kprobes_inc_nmissed_count(&op->kp); 165 } else { 166 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 167 /* Save skipped registers */ 168 #ifdef CONFIG_X86_64 169 regs->cs = __KERNEL_CS; 170 #else 171 regs->cs = __KERNEL_CS | get_kernel_rpl(); 172 regs->gs = 0; 173 #endif 174 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 175 regs->orig_ax = ~0UL; 176 177 __this_cpu_write(current_kprobe, &op->kp); 178 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 179 opt_pre_handler(&op->kp, regs); 180 __this_cpu_write(current_kprobe, NULL); 181 } 182 preempt_enable_no_resched(); 183 } 184 NOKPROBE_SYMBOL(optimized_callback); 185 186 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) 187 { 188 struct insn insn; 189 int len = 0, ret; 190 191 while (len < RELATIVEJUMP_SIZE) { 192 ret = __copy_instruction(dest + len, src + len, real, &insn); 193 if (!ret || !can_boost(&insn, src + len)) 194 return -EINVAL; 195 len += ret; 196 } 197 /* Check whether the address range is reserved */ 198 if (ftrace_text_reserved(src, src + len - 1) || 199 alternatives_text_reserved(src, src + len - 1) || 200 jump_label_text_reserved(src, src + len - 1)) 201 return -EBUSY; 202 203 return len; 204 } 205 206 /* Check whether insn is indirect jump */ 207 static int __insn_is_indirect_jump(struct insn *insn) 208 { 209 return ((insn->opcode.bytes[0] == 0xff && 210 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 211 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 212 } 213 214 /* Check whether insn jumps into specified address range */ 215 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 216 { 217 unsigned long target = 0; 218 219 switch (insn->opcode.bytes[0]) { 220 case 0xe0: /* loopne */ 221 case 0xe1: /* loope */ 222 case 0xe2: /* loop */ 223 case 0xe3: /* jcxz */ 224 case 0xe9: /* near relative jump */ 225 case 0xeb: /* short relative jump */ 226 break; 227 case 0x0f: 228 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 229 break; 230 return 0; 231 default: 232 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 233 break; 234 return 0; 235 } 236 target = (unsigned long)insn->next_byte + insn->immediate.value; 237 238 return (start <= target && target <= start + len); 239 } 240 241 static int insn_is_indirect_jump(struct insn *insn) 242 { 243 int ret = __insn_is_indirect_jump(insn); 244 245 #ifdef CONFIG_RETPOLINE 246 /* 247 * Jump to x86_indirect_thunk_* is treated as an indirect jump. 248 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with 249 * older gcc may use indirect jump. So we add this check instead of 250 * replace indirect-jump check. 251 */ 252 if (!ret) 253 ret = insn_jump_into_range(insn, 254 (unsigned long)__indirect_thunk_start, 255 (unsigned long)__indirect_thunk_end - 256 (unsigned long)__indirect_thunk_start); 257 #endif 258 return ret; 259 } 260 261 /* Decode whole function to ensure any instructions don't jump into target */ 262 static int can_optimize(unsigned long paddr) 263 { 264 unsigned long addr, size = 0, offset = 0; 265 struct insn insn; 266 kprobe_opcode_t buf[MAX_INSN_SIZE]; 267 268 /* Lookup symbol including addr */ 269 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 270 return 0; 271 272 /* 273 * Do not optimize in the entry code due to the unstable 274 * stack handling and registers setup. 275 */ 276 if (((paddr >= (unsigned long)__entry_text_start) && 277 (paddr < (unsigned long)__entry_text_end)) || 278 ((paddr >= (unsigned long)__irqentry_text_start) && 279 (paddr < (unsigned long)__irqentry_text_end))) 280 return 0; 281 282 /* Check there is enough space for a relative jump. */ 283 if (size - offset < RELATIVEJUMP_SIZE) 284 return 0; 285 286 /* Decode instructions */ 287 addr = paddr - offset; 288 while (addr < paddr - offset + size) { /* Decode until function end */ 289 unsigned long recovered_insn; 290 if (search_exception_tables(addr)) 291 /* 292 * Since some fixup code will jumps into this function, 293 * we can't optimize kprobe in this function. 294 */ 295 return 0; 296 recovered_insn = recover_probed_instruction(buf, addr); 297 if (!recovered_insn) 298 return 0; 299 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 300 insn_get_length(&insn); 301 /* Another subsystem puts a breakpoint */ 302 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 303 return 0; 304 /* Recover address */ 305 insn.kaddr = (void *)addr; 306 insn.next_byte = (void *)(addr + insn.length); 307 /* Check any instructions don't jump into target */ 308 if (insn_is_indirect_jump(&insn) || 309 insn_jump_into_range(&insn, paddr + INT3_SIZE, 310 RELATIVE_ADDR_SIZE)) 311 return 0; 312 addr += insn.length; 313 } 314 315 return 1; 316 } 317 318 /* Check optimized_kprobe can actually be optimized. */ 319 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 320 { 321 int i; 322 struct kprobe *p; 323 324 for (i = 1; i < op->optinsn.size; i++) { 325 p = get_kprobe(op->kp.addr + i); 326 if (p && !kprobe_disabled(p)) 327 return -EEXIST; 328 } 329 330 return 0; 331 } 332 333 /* Check the addr is within the optimized instructions. */ 334 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 335 unsigned long addr) 336 { 337 return ((unsigned long)op->kp.addr <= addr && 338 (unsigned long)op->kp.addr + op->optinsn.size > addr); 339 } 340 341 /* Free optimized instruction slot */ 342 static 343 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 344 { 345 if (op->optinsn.insn) { 346 free_optinsn_slot(op->optinsn.insn, dirty); 347 op->optinsn.insn = NULL; 348 op->optinsn.size = 0; 349 } 350 } 351 352 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 353 { 354 __arch_remove_optimized_kprobe(op, 1); 355 } 356 357 /* 358 * Copy replacing target instructions 359 * Target instructions MUST be relocatable (checked inside) 360 * This is called when new aggr(opt)probe is allocated or reused. 361 */ 362 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 363 struct kprobe *__unused) 364 { 365 u8 *buf = NULL, *slot; 366 int ret, len; 367 long rel; 368 369 if (!can_optimize((unsigned long)op->kp.addr)) 370 return -EILSEQ; 371 372 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); 373 if (!buf) 374 return -ENOMEM; 375 376 op->optinsn.insn = slot = get_optinsn_slot(); 377 if (!slot) { 378 ret = -ENOMEM; 379 goto out; 380 } 381 382 /* 383 * Verify if the address gap is in 2GB range, because this uses 384 * a relative jump. 385 */ 386 rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; 387 if (abs(rel) > 0x7fffffff) { 388 ret = -ERANGE; 389 goto err; 390 } 391 392 /* Copy arch-dep-instance from template */ 393 memcpy(buf, optprobe_template_entry, TMPL_END_IDX); 394 395 /* Copy instructions into the out-of-line buffer */ 396 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, 397 slot + TMPL_END_IDX); 398 if (ret < 0) 399 goto err; 400 op->optinsn.size = ret; 401 len = TMPL_END_IDX + op->optinsn.size; 402 403 /* Set probe information */ 404 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 405 406 /* Set probe function call */ 407 synthesize_relcall(buf + TMPL_CALL_IDX, 408 slot + TMPL_CALL_IDX, optimized_callback); 409 410 /* Set returning jmp instruction at the tail of out-of-line buffer */ 411 synthesize_reljump(buf + len, slot + len, 412 (u8 *)op->kp.addr + op->optinsn.size); 413 len += RELATIVEJUMP_SIZE; 414 415 /* We have to use text_poke for instuction buffer because it is RO */ 416 text_poke(slot, buf, len); 417 ret = 0; 418 out: 419 kfree(buf); 420 return ret; 421 422 err: 423 __arch_remove_optimized_kprobe(op, 0); 424 goto out; 425 } 426 427 /* 428 * Replace breakpoints (int3) with relative jumps. 429 * Caller must call with locking kprobe_mutex and text_mutex. 430 */ 431 void arch_optimize_kprobes(struct list_head *oplist) 432 { 433 struct optimized_kprobe *op, *tmp; 434 u8 insn_buf[RELATIVEJUMP_SIZE]; 435 436 list_for_each_entry_safe(op, tmp, oplist, list) { 437 s32 rel = (s32)((long)op->optinsn.insn - 438 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 439 440 WARN_ON(kprobe_disabled(&op->kp)); 441 442 /* Backup instructions which will be replaced by jump address */ 443 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 444 RELATIVE_ADDR_SIZE); 445 446 insn_buf[0] = RELATIVEJUMP_OPCODE; 447 *(s32 *)(&insn_buf[1]) = rel; 448 449 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 450 op->optinsn.insn); 451 452 list_del_init(&op->list); 453 } 454 } 455 456 /* Replace a relative jump with a breakpoint (int3). */ 457 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 458 { 459 u8 insn_buf[RELATIVEJUMP_SIZE]; 460 461 /* Set int3 to first byte for kprobes */ 462 insn_buf[0] = BREAKPOINT_INSTRUCTION; 463 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 464 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 465 op->optinsn.insn); 466 } 467 468 /* 469 * Recover original instructions and breakpoints from relative jumps. 470 * Caller must call with locking kprobe_mutex. 471 */ 472 extern void arch_unoptimize_kprobes(struct list_head *oplist, 473 struct list_head *done_list) 474 { 475 struct optimized_kprobe *op, *tmp; 476 477 list_for_each_entry_safe(op, tmp, oplist, list) { 478 arch_unoptimize_kprobe(op); 479 list_move(&op->list, done_list); 480 } 481 } 482 483 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 484 { 485 struct optimized_kprobe *op; 486 487 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 488 /* This kprobe is really able to run optimized path. */ 489 op = container_of(p, struct optimized_kprobe, kp); 490 /* Detour through copied instructions */ 491 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 492 if (!reenter) 493 reset_current_kprobe(); 494 preempt_enable_no_resched(); 495 return 1; 496 } 497 return 0; 498 } 499 NOKPROBE_SYMBOL(setup_detour_execution); 500