1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes Jump Optimization (Optprobes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * Copyright (C) Hitachi Ltd., 2012 7 */ 8 #include <linux/kprobes.h> 9 #include <linux/ptrace.h> 10 #include <linux/string.h> 11 #include <linux/slab.h> 12 #include <linux/hardirq.h> 13 #include <linux/preempt.h> 14 #include <linux/extable.h> 15 #include <linux/kdebug.h> 16 #include <linux/kallsyms.h> 17 #include <linux/ftrace.h> 18 #include <linux/frame.h> 19 20 #include <asm/text-patching.h> 21 #include <asm/cacheflush.h> 22 #include <asm/desc.h> 23 #include <asm/pgtable.h> 24 #include <linux/uaccess.h> 25 #include <asm/alternative.h> 26 #include <asm/insn.h> 27 #include <asm/debugreg.h> 28 #include <asm/set_memory.h> 29 #include <asm/sections.h> 30 #include <asm/nospec-branch.h> 31 32 #include "common.h" 33 34 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 35 { 36 struct optimized_kprobe *op; 37 struct kprobe *kp; 38 long offs; 39 int i; 40 41 for (i = 0; i < RELATIVEJUMP_SIZE; i++) { 42 kp = get_kprobe((void *)addr - i); 43 /* This function only handles jump-optimized kprobe */ 44 if (kp && kprobe_optimized(kp)) { 45 op = container_of(kp, struct optimized_kprobe, kp); 46 /* If op->list is not empty, op is under optimizing */ 47 if (list_empty(&op->list)) 48 goto found; 49 } 50 } 51 52 return addr; 53 found: 54 /* 55 * If the kprobe can be optimized, original bytes which can be 56 * overwritten by jump destination address. In this case, original 57 * bytes must be recovered from op->optinsn.copied_insn buffer. 58 */ 59 if (probe_kernel_read(buf, (void *)addr, 60 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 61 return 0UL; 62 63 if (addr == (unsigned long)kp->addr) { 64 buf[0] = kp->opcode; 65 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 66 } else { 67 offs = addr - (unsigned long)kp->addr - 1; 68 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); 69 } 70 71 return (unsigned long)buf; 72 } 73 74 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 75 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 76 { 77 #ifdef CONFIG_X86_64 78 *addr++ = 0x48; 79 *addr++ = 0xbf; 80 #else 81 *addr++ = 0xb8; 82 #endif 83 *(unsigned long *)addr = val; 84 } 85 86 asm ( 87 ".pushsection .rodata\n" 88 "optprobe_template_func:\n" 89 ".global optprobe_template_entry\n" 90 "optprobe_template_entry:\n" 91 #ifdef CONFIG_X86_64 92 /* We don't bother saving the ss register */ 93 " pushq %rsp\n" 94 " pushfq\n" 95 SAVE_REGS_STRING 96 " movq %rsp, %rsi\n" 97 ".global optprobe_template_val\n" 98 "optprobe_template_val:\n" 99 ASM_NOP5 100 ASM_NOP5 101 ".global optprobe_template_call\n" 102 "optprobe_template_call:\n" 103 ASM_NOP5 104 /* Move flags to rsp */ 105 " movq 144(%rsp), %rdx\n" 106 " movq %rdx, 152(%rsp)\n" 107 RESTORE_REGS_STRING 108 /* Skip flags entry */ 109 " addq $8, %rsp\n" 110 " popfq\n" 111 #else /* CONFIG_X86_32 */ 112 " pushf\n" 113 SAVE_REGS_STRING 114 " movl %esp, %edx\n" 115 ".global optprobe_template_val\n" 116 "optprobe_template_val:\n" 117 ASM_NOP5 118 ".global optprobe_template_call\n" 119 "optprobe_template_call:\n" 120 ASM_NOP5 121 RESTORE_REGS_STRING 122 " addl $4, %esp\n" /* skip cs */ 123 " popf\n" 124 #endif 125 ".global optprobe_template_end\n" 126 "optprobe_template_end:\n" 127 ".popsection\n"); 128 129 void optprobe_template_func(void); 130 STACK_FRAME_NON_STANDARD(optprobe_template_func); 131 132 #define TMPL_MOVE_IDX \ 133 ((long)optprobe_template_val - (long)optprobe_template_entry) 134 #define TMPL_CALL_IDX \ 135 ((long)optprobe_template_call - (long)optprobe_template_entry) 136 #define TMPL_END_IDX \ 137 ((long)optprobe_template_end - (long)optprobe_template_entry) 138 139 #define INT3_SIZE sizeof(kprobe_opcode_t) 140 141 /* Optimized kprobe call back function: called from optinsn */ 142 static void 143 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 144 { 145 /* This is possible if op is under delayed unoptimizing */ 146 if (kprobe_disabled(&op->kp)) 147 return; 148 149 preempt_disable(); 150 if (kprobe_running()) { 151 kprobes_inc_nmissed_count(&op->kp); 152 } else { 153 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 154 /* Save skipped registers */ 155 #ifdef CONFIG_X86_64 156 regs->cs = __KERNEL_CS; 157 #else 158 regs->cs = __KERNEL_CS | get_kernel_rpl(); 159 regs->gs = 0; 160 #endif 161 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 162 regs->orig_ax = ~0UL; 163 164 __this_cpu_write(current_kprobe, &op->kp); 165 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 166 opt_pre_handler(&op->kp, regs); 167 __this_cpu_write(current_kprobe, NULL); 168 } 169 preempt_enable(); 170 } 171 NOKPROBE_SYMBOL(optimized_callback); 172 173 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) 174 { 175 struct insn insn; 176 int len = 0, ret; 177 178 while (len < RELATIVEJUMP_SIZE) { 179 ret = __copy_instruction(dest + len, src + len, real + len, &insn); 180 if (!ret || !can_boost(&insn, src + len)) 181 return -EINVAL; 182 len += ret; 183 } 184 /* Check whether the address range is reserved */ 185 if (ftrace_text_reserved(src, src + len - 1) || 186 alternatives_text_reserved(src, src + len - 1) || 187 jump_label_text_reserved(src, src + len - 1)) 188 return -EBUSY; 189 190 return len; 191 } 192 193 /* Check whether insn is indirect jump */ 194 static int __insn_is_indirect_jump(struct insn *insn) 195 { 196 return ((insn->opcode.bytes[0] == 0xff && 197 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 198 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 199 } 200 201 /* Check whether insn jumps into specified address range */ 202 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 203 { 204 unsigned long target = 0; 205 206 switch (insn->opcode.bytes[0]) { 207 case 0xe0: /* loopne */ 208 case 0xe1: /* loope */ 209 case 0xe2: /* loop */ 210 case 0xe3: /* jcxz */ 211 case 0xe9: /* near relative jump */ 212 case 0xeb: /* short relative jump */ 213 break; 214 case 0x0f: 215 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 216 break; 217 return 0; 218 default: 219 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 220 break; 221 return 0; 222 } 223 target = (unsigned long)insn->next_byte + insn->immediate.value; 224 225 return (start <= target && target <= start + len); 226 } 227 228 static int insn_is_indirect_jump(struct insn *insn) 229 { 230 int ret = __insn_is_indirect_jump(insn); 231 232 #ifdef CONFIG_RETPOLINE 233 /* 234 * Jump to x86_indirect_thunk_* is treated as an indirect jump. 235 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with 236 * older gcc may use indirect jump. So we add this check instead of 237 * replace indirect-jump check. 238 */ 239 if (!ret) 240 ret = insn_jump_into_range(insn, 241 (unsigned long)__indirect_thunk_start, 242 (unsigned long)__indirect_thunk_end - 243 (unsigned long)__indirect_thunk_start); 244 #endif 245 return ret; 246 } 247 248 /* Decode whole function to ensure any instructions don't jump into target */ 249 static int can_optimize(unsigned long paddr) 250 { 251 unsigned long addr, size = 0, offset = 0; 252 struct insn insn; 253 kprobe_opcode_t buf[MAX_INSN_SIZE]; 254 255 /* Lookup symbol including addr */ 256 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 257 return 0; 258 259 /* 260 * Do not optimize in the entry code due to the unstable 261 * stack handling and registers setup. 262 */ 263 if (((paddr >= (unsigned long)__entry_text_start) && 264 (paddr < (unsigned long)__entry_text_end)) || 265 ((paddr >= (unsigned long)__irqentry_text_start) && 266 (paddr < (unsigned long)__irqentry_text_end))) 267 return 0; 268 269 /* Check there is enough space for a relative jump. */ 270 if (size - offset < RELATIVEJUMP_SIZE) 271 return 0; 272 273 /* Decode instructions */ 274 addr = paddr - offset; 275 while (addr < paddr - offset + size) { /* Decode until function end */ 276 unsigned long recovered_insn; 277 if (search_exception_tables(addr)) 278 /* 279 * Since some fixup code will jumps into this function, 280 * we can't optimize kprobe in this function. 281 */ 282 return 0; 283 recovered_insn = recover_probed_instruction(buf, addr); 284 if (!recovered_insn) 285 return 0; 286 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 287 insn_get_length(&insn); 288 /* Another subsystem puts a breakpoint */ 289 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 290 return 0; 291 /* Recover address */ 292 insn.kaddr = (void *)addr; 293 insn.next_byte = (void *)(addr + insn.length); 294 /* Check any instructions don't jump into target */ 295 if (insn_is_indirect_jump(&insn) || 296 insn_jump_into_range(&insn, paddr + INT3_SIZE, 297 RELATIVE_ADDR_SIZE)) 298 return 0; 299 addr += insn.length; 300 } 301 302 return 1; 303 } 304 305 /* Check optimized_kprobe can actually be optimized. */ 306 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 307 { 308 int i; 309 struct kprobe *p; 310 311 for (i = 1; i < op->optinsn.size; i++) { 312 p = get_kprobe(op->kp.addr + i); 313 if (p && !kprobe_disabled(p)) 314 return -EEXIST; 315 } 316 317 return 0; 318 } 319 320 /* Check the addr is within the optimized instructions. */ 321 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 322 unsigned long addr) 323 { 324 return ((unsigned long)op->kp.addr <= addr && 325 (unsigned long)op->kp.addr + op->optinsn.size > addr); 326 } 327 328 /* Free optimized instruction slot */ 329 static 330 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 331 { 332 if (op->optinsn.insn) { 333 free_optinsn_slot(op->optinsn.insn, dirty); 334 op->optinsn.insn = NULL; 335 op->optinsn.size = 0; 336 } 337 } 338 339 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 340 { 341 __arch_remove_optimized_kprobe(op, 1); 342 } 343 344 /* 345 * Copy replacing target instructions 346 * Target instructions MUST be relocatable (checked inside) 347 * This is called when new aggr(opt)probe is allocated or reused. 348 */ 349 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 350 struct kprobe *__unused) 351 { 352 u8 *buf = NULL, *slot; 353 int ret, len; 354 long rel; 355 356 if (!can_optimize((unsigned long)op->kp.addr)) 357 return -EILSEQ; 358 359 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); 360 if (!buf) 361 return -ENOMEM; 362 363 op->optinsn.insn = slot = get_optinsn_slot(); 364 if (!slot) { 365 ret = -ENOMEM; 366 goto out; 367 } 368 369 /* 370 * Verify if the address gap is in 2GB range, because this uses 371 * a relative jump. 372 */ 373 rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; 374 if (abs(rel) > 0x7fffffff) { 375 ret = -ERANGE; 376 goto err; 377 } 378 379 /* Copy arch-dep-instance from template */ 380 memcpy(buf, optprobe_template_entry, TMPL_END_IDX); 381 382 /* Copy instructions into the out-of-line buffer */ 383 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, 384 slot + TMPL_END_IDX); 385 if (ret < 0) 386 goto err; 387 op->optinsn.size = ret; 388 len = TMPL_END_IDX + op->optinsn.size; 389 390 /* Set probe information */ 391 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 392 393 /* Set probe function call */ 394 synthesize_relcall(buf + TMPL_CALL_IDX, 395 slot + TMPL_CALL_IDX, optimized_callback); 396 397 /* Set returning jmp instruction at the tail of out-of-line buffer */ 398 synthesize_reljump(buf + len, slot + len, 399 (u8 *)op->kp.addr + op->optinsn.size); 400 len += RELATIVEJUMP_SIZE; 401 402 /* We have to use text_poke for instuction buffer because it is RO */ 403 text_poke(slot, buf, len); 404 ret = 0; 405 out: 406 kfree(buf); 407 return ret; 408 409 err: 410 __arch_remove_optimized_kprobe(op, 0); 411 goto out; 412 } 413 414 /* 415 * Replace breakpoints (int3) with relative jumps. 416 * Caller must call with locking kprobe_mutex and text_mutex. 417 */ 418 void arch_optimize_kprobes(struct list_head *oplist) 419 { 420 struct optimized_kprobe *op, *tmp; 421 u8 insn_buf[RELATIVEJUMP_SIZE]; 422 423 list_for_each_entry_safe(op, tmp, oplist, list) { 424 s32 rel = (s32)((long)op->optinsn.insn - 425 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 426 427 WARN_ON(kprobe_disabled(&op->kp)); 428 429 /* Backup instructions which will be replaced by jump address */ 430 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 431 RELATIVE_ADDR_SIZE); 432 433 insn_buf[0] = RELATIVEJUMP_OPCODE; 434 *(s32 *)(&insn_buf[1]) = rel; 435 436 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 437 op->optinsn.insn); 438 439 list_del_init(&op->list); 440 } 441 } 442 443 /* Replace a relative jump with a breakpoint (int3). */ 444 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 445 { 446 u8 insn_buf[RELATIVEJUMP_SIZE]; 447 448 /* Set int3 to first byte for kprobes */ 449 insn_buf[0] = BREAKPOINT_INSTRUCTION; 450 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 451 text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, 452 op->optinsn.insn); 453 } 454 455 /* 456 * Recover original instructions and breakpoints from relative jumps. 457 * Caller must call with locking kprobe_mutex. 458 */ 459 extern void arch_unoptimize_kprobes(struct list_head *oplist, 460 struct list_head *done_list) 461 { 462 struct optimized_kprobe *op, *tmp; 463 464 list_for_each_entry_safe(op, tmp, oplist, list) { 465 arch_unoptimize_kprobe(op); 466 list_move(&op->list, done_list); 467 } 468 } 469 470 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 471 { 472 struct optimized_kprobe *op; 473 474 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 475 /* This kprobe is really able to run optimized path. */ 476 op = container_of(p, struct optimized_kprobe, kp); 477 /* Detour through copied instructions */ 478 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 479 if (!reenter) 480 reset_current_kprobe(); 481 return 1; 482 } 483 return 0; 484 } 485 NOKPROBE_SYMBOL(setup_detour_execution); 486