1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Kernel Probes Jump Optimization (Optprobes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * Copyright (C) Hitachi Ltd., 2012 7 */ 8 #include <linux/kprobes.h> 9 #include <linux/ptrace.h> 10 #include <linux/string.h> 11 #include <linux/slab.h> 12 #include <linux/hardirq.h> 13 #include <linux/preempt.h> 14 #include <linux/extable.h> 15 #include <linux/kdebug.h> 16 #include <linux/kallsyms.h> 17 #include <linux/ftrace.h> 18 #include <linux/frame.h> 19 #include <linux/pgtable.h> 20 21 #include <asm/text-patching.h> 22 #include <asm/cacheflush.h> 23 #include <asm/desc.h> 24 #include <linux/uaccess.h> 25 #include <asm/alternative.h> 26 #include <asm/insn.h> 27 #include <asm/debugreg.h> 28 #include <asm/set_memory.h> 29 #include <asm/sections.h> 30 #include <asm/nospec-branch.h> 31 32 #include "common.h" 33 34 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) 35 { 36 struct optimized_kprobe *op; 37 struct kprobe *kp; 38 long offs; 39 int i; 40 41 for (i = 0; i < JMP32_INSN_SIZE; i++) { 42 kp = get_kprobe((void *)addr - i); 43 /* This function only handles jump-optimized kprobe */ 44 if (kp && kprobe_optimized(kp)) { 45 op = container_of(kp, struct optimized_kprobe, kp); 46 /* If op->list is not empty, op is under optimizing */ 47 if (list_empty(&op->list)) 48 goto found; 49 } 50 } 51 52 return addr; 53 found: 54 /* 55 * If the kprobe can be optimized, original bytes which can be 56 * overwritten by jump destination address. In this case, original 57 * bytes must be recovered from op->optinsn.copied_insn buffer. 58 */ 59 if (copy_from_kernel_nofault(buf, (void *)addr, 60 MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) 61 return 0UL; 62 63 if (addr == (unsigned long)kp->addr) { 64 buf[0] = kp->opcode; 65 memcpy(buf + 1, op->optinsn.copied_insn, DISP32_SIZE); 66 } else { 67 offs = addr - (unsigned long)kp->addr - 1; 68 memcpy(buf, op->optinsn.copied_insn + offs, DISP32_SIZE - offs); 69 } 70 71 return (unsigned long)buf; 72 } 73 74 static void synthesize_clac(kprobe_opcode_t *addr) 75 { 76 /* 77 * Can't be static_cpu_has() due to how objtool treats this feature bit. 78 * This isn't a fast path anyway. 79 */ 80 if (!boot_cpu_has(X86_FEATURE_SMAP)) 81 return; 82 83 /* Replace the NOP3 with CLAC */ 84 addr[0] = 0x0f; 85 addr[1] = 0x01; 86 addr[2] = 0xca; 87 } 88 89 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 90 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 91 { 92 #ifdef CONFIG_X86_64 93 *addr++ = 0x48; 94 *addr++ = 0xbf; 95 #else 96 *addr++ = 0xb8; 97 #endif 98 *(unsigned long *)addr = val; 99 } 100 101 asm ( 102 ".pushsection .rodata\n" 103 "optprobe_template_func:\n" 104 ".global optprobe_template_entry\n" 105 "optprobe_template_entry:\n" 106 #ifdef CONFIG_X86_64 107 /* We don't bother saving the ss register */ 108 " pushq %rsp\n" 109 " pushfq\n" 110 ".global optprobe_template_clac\n" 111 "optprobe_template_clac:\n" 112 ASM_NOP3 113 SAVE_REGS_STRING 114 " movq %rsp, %rsi\n" 115 ".global optprobe_template_val\n" 116 "optprobe_template_val:\n" 117 ASM_NOP5 118 ASM_NOP5 119 ".global optprobe_template_call\n" 120 "optprobe_template_call:\n" 121 ASM_NOP5 122 /* Move flags to rsp */ 123 " movq 18*8(%rsp), %rdx\n" 124 " movq %rdx, 19*8(%rsp)\n" 125 RESTORE_REGS_STRING 126 /* Skip flags entry */ 127 " addq $8, %rsp\n" 128 " popfq\n" 129 #else /* CONFIG_X86_32 */ 130 " pushl %esp\n" 131 " pushfl\n" 132 ".global optprobe_template_clac\n" 133 "optprobe_template_clac:\n" 134 ASM_NOP3 135 SAVE_REGS_STRING 136 " movl %esp, %edx\n" 137 ".global optprobe_template_val\n" 138 "optprobe_template_val:\n" 139 ASM_NOP5 140 ".global optprobe_template_call\n" 141 "optprobe_template_call:\n" 142 ASM_NOP5 143 /* Move flags into esp */ 144 " movl 14*4(%esp), %edx\n" 145 " movl %edx, 15*4(%esp)\n" 146 RESTORE_REGS_STRING 147 /* Skip flags entry */ 148 " addl $4, %esp\n" 149 " popfl\n" 150 #endif 151 ".global optprobe_template_end\n" 152 "optprobe_template_end:\n" 153 ".popsection\n"); 154 155 void optprobe_template_func(void); 156 STACK_FRAME_NON_STANDARD(optprobe_template_func); 157 158 #define TMPL_CLAC_IDX \ 159 ((long)optprobe_template_clac - (long)optprobe_template_entry) 160 #define TMPL_MOVE_IDX \ 161 ((long)optprobe_template_val - (long)optprobe_template_entry) 162 #define TMPL_CALL_IDX \ 163 ((long)optprobe_template_call - (long)optprobe_template_entry) 164 #define TMPL_END_IDX \ 165 ((long)optprobe_template_end - (long)optprobe_template_entry) 166 167 /* Optimized kprobe call back function: called from optinsn */ 168 static void 169 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 170 { 171 /* This is possible if op is under delayed unoptimizing */ 172 if (kprobe_disabled(&op->kp)) 173 return; 174 175 preempt_disable(); 176 if (kprobe_running()) { 177 kprobes_inc_nmissed_count(&op->kp); 178 } else { 179 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 180 /* Save skipped registers */ 181 regs->cs = __KERNEL_CS; 182 #ifdef CONFIG_X86_32 183 regs->cs |= get_kernel_rpl(); 184 regs->gs = 0; 185 #endif 186 regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE; 187 regs->orig_ax = ~0UL; 188 189 __this_cpu_write(current_kprobe, &op->kp); 190 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 191 opt_pre_handler(&op->kp, regs); 192 __this_cpu_write(current_kprobe, NULL); 193 } 194 preempt_enable(); 195 } 196 NOKPROBE_SYMBOL(optimized_callback); 197 198 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) 199 { 200 struct insn insn; 201 int len = 0, ret; 202 203 while (len < JMP32_INSN_SIZE) { 204 ret = __copy_instruction(dest + len, src + len, real + len, &insn); 205 if (!ret || !can_boost(&insn, src + len)) 206 return -EINVAL; 207 len += ret; 208 } 209 /* Check whether the address range is reserved */ 210 if (ftrace_text_reserved(src, src + len - 1) || 211 alternatives_text_reserved(src, src + len - 1) || 212 jump_label_text_reserved(src, src + len - 1)) 213 return -EBUSY; 214 215 return len; 216 } 217 218 /* Check whether insn is indirect jump */ 219 static int __insn_is_indirect_jump(struct insn *insn) 220 { 221 return ((insn->opcode.bytes[0] == 0xff && 222 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ 223 insn->opcode.bytes[0] == 0xea); /* Segment based jump */ 224 } 225 226 /* Check whether insn jumps into specified address range */ 227 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) 228 { 229 unsigned long target = 0; 230 231 switch (insn->opcode.bytes[0]) { 232 case 0xe0: /* loopne */ 233 case 0xe1: /* loope */ 234 case 0xe2: /* loop */ 235 case 0xe3: /* jcxz */ 236 case 0xe9: /* near relative jump */ 237 case 0xeb: /* short relative jump */ 238 break; 239 case 0x0f: 240 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ 241 break; 242 return 0; 243 default: 244 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ 245 break; 246 return 0; 247 } 248 target = (unsigned long)insn->next_byte + insn->immediate.value; 249 250 return (start <= target && target <= start + len); 251 } 252 253 static int insn_is_indirect_jump(struct insn *insn) 254 { 255 int ret = __insn_is_indirect_jump(insn); 256 257 #ifdef CONFIG_RETPOLINE 258 /* 259 * Jump to x86_indirect_thunk_* is treated as an indirect jump. 260 * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with 261 * older gcc may use indirect jump. So we add this check instead of 262 * replace indirect-jump check. 263 */ 264 if (!ret) 265 ret = insn_jump_into_range(insn, 266 (unsigned long)__indirect_thunk_start, 267 (unsigned long)__indirect_thunk_end - 268 (unsigned long)__indirect_thunk_start); 269 #endif 270 return ret; 271 } 272 273 /* Decode whole function to ensure any instructions don't jump into target */ 274 static int can_optimize(unsigned long paddr) 275 { 276 unsigned long addr, size = 0, offset = 0; 277 struct insn insn; 278 kprobe_opcode_t buf[MAX_INSN_SIZE]; 279 280 /* Lookup symbol including addr */ 281 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 282 return 0; 283 284 /* 285 * Do not optimize in the entry code due to the unstable 286 * stack handling and registers setup. 287 */ 288 if (((paddr >= (unsigned long)__entry_text_start) && 289 (paddr < (unsigned long)__entry_text_end))) 290 return 0; 291 292 /* Check there is enough space for a relative jump. */ 293 if (size - offset < JMP32_INSN_SIZE) 294 return 0; 295 296 /* Decode instructions */ 297 addr = paddr - offset; 298 while (addr < paddr - offset + size) { /* Decode until function end */ 299 unsigned long recovered_insn; 300 if (search_exception_tables(addr)) 301 /* 302 * Since some fixup code will jumps into this function, 303 * we can't optimize kprobe in this function. 304 */ 305 return 0; 306 recovered_insn = recover_probed_instruction(buf, addr); 307 if (!recovered_insn) 308 return 0; 309 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); 310 insn_get_length(&insn); 311 /* Another subsystem puts a breakpoint */ 312 if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) 313 return 0; 314 /* Recover address */ 315 insn.kaddr = (void *)addr; 316 insn.next_byte = (void *)(addr + insn.length); 317 /* Check any instructions don't jump into target */ 318 if (insn_is_indirect_jump(&insn) || 319 insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, 320 DISP32_SIZE)) 321 return 0; 322 addr += insn.length; 323 } 324 325 return 1; 326 } 327 328 /* Check optimized_kprobe can actually be optimized. */ 329 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 330 { 331 int i; 332 struct kprobe *p; 333 334 for (i = 1; i < op->optinsn.size; i++) { 335 p = get_kprobe(op->kp.addr + i); 336 if (p && !kprobe_disabled(p)) 337 return -EEXIST; 338 } 339 340 return 0; 341 } 342 343 /* Check the addr is within the optimized instructions. */ 344 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 345 unsigned long addr) 346 { 347 return ((unsigned long)op->kp.addr <= addr && 348 (unsigned long)op->kp.addr + op->optinsn.size > addr); 349 } 350 351 /* Free optimized instruction slot */ 352 static 353 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 354 { 355 if (op->optinsn.insn) { 356 free_optinsn_slot(op->optinsn.insn, dirty); 357 op->optinsn.insn = NULL; 358 op->optinsn.size = 0; 359 } 360 } 361 362 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 363 { 364 __arch_remove_optimized_kprobe(op, 1); 365 } 366 367 /* 368 * Copy replacing target instructions 369 * Target instructions MUST be relocatable (checked inside) 370 * This is called when new aggr(opt)probe is allocated or reused. 371 */ 372 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, 373 struct kprobe *__unused) 374 { 375 u8 *buf = NULL, *slot; 376 int ret, len; 377 long rel; 378 379 if (!can_optimize((unsigned long)op->kp.addr)) 380 return -EILSEQ; 381 382 buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); 383 if (!buf) 384 return -ENOMEM; 385 386 op->optinsn.insn = slot = get_optinsn_slot(); 387 if (!slot) { 388 ret = -ENOMEM; 389 goto out; 390 } 391 392 /* 393 * Verify if the address gap is in 2GB range, because this uses 394 * a relative jump. 395 */ 396 rel = (long)slot - (long)op->kp.addr + JMP32_INSN_SIZE; 397 if (abs(rel) > 0x7fffffff) { 398 ret = -ERANGE; 399 goto err; 400 } 401 402 /* Copy arch-dep-instance from template */ 403 memcpy(buf, optprobe_template_entry, TMPL_END_IDX); 404 405 /* Copy instructions into the out-of-line buffer */ 406 ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, 407 slot + TMPL_END_IDX); 408 if (ret < 0) 409 goto err; 410 op->optinsn.size = ret; 411 len = TMPL_END_IDX + op->optinsn.size; 412 413 synthesize_clac(buf + TMPL_CLAC_IDX); 414 415 /* Set probe information */ 416 synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); 417 418 /* Set probe function call */ 419 synthesize_relcall(buf + TMPL_CALL_IDX, 420 slot + TMPL_CALL_IDX, optimized_callback); 421 422 /* Set returning jmp instruction at the tail of out-of-line buffer */ 423 synthesize_reljump(buf + len, slot + len, 424 (u8 *)op->kp.addr + op->optinsn.size); 425 len += JMP32_INSN_SIZE; 426 427 /* We have to use text_poke() for instruction buffer because it is RO */ 428 text_poke(slot, buf, len); 429 ret = 0; 430 out: 431 kfree(buf); 432 return ret; 433 434 err: 435 __arch_remove_optimized_kprobe(op, 0); 436 goto out; 437 } 438 439 /* 440 * Replace breakpoints (INT3) with relative jumps (JMP.d32). 441 * Caller must call with locking kprobe_mutex and text_mutex. 442 * 443 * The caller will have installed a regular kprobe and after that issued 444 * syncrhonize_rcu_tasks(), this ensures that the instruction(s) that live in 445 * the 4 bytes after the INT3 are unused and can now be overwritten. 446 */ 447 void arch_optimize_kprobes(struct list_head *oplist) 448 { 449 struct optimized_kprobe *op, *tmp; 450 u8 insn_buff[JMP32_INSN_SIZE]; 451 452 list_for_each_entry_safe(op, tmp, oplist, list) { 453 s32 rel = (s32)((long)op->optinsn.insn - 454 ((long)op->kp.addr + JMP32_INSN_SIZE)); 455 456 WARN_ON(kprobe_disabled(&op->kp)); 457 458 /* Backup instructions which will be replaced by jump address */ 459 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_INSN_SIZE, 460 DISP32_SIZE); 461 462 insn_buff[0] = JMP32_INSN_OPCODE; 463 *(s32 *)(&insn_buff[1]) = rel; 464 465 text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL); 466 467 list_del_init(&op->list); 468 } 469 } 470 471 /* 472 * Replace a relative jump (JMP.d32) with a breakpoint (INT3). 473 * 474 * After that, we can restore the 4 bytes after the INT3 to undo what 475 * arch_optimize_kprobes() scribbled. This is safe since those bytes will be 476 * unused once the INT3 lands. 477 */ 478 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 479 { 480 arch_arm_kprobe(&op->kp); 481 text_poke(op->kp.addr + INT3_INSN_SIZE, 482 op->optinsn.copied_insn, DISP32_SIZE); 483 text_poke_sync(); 484 } 485 486 /* 487 * Recover original instructions and breakpoints from relative jumps. 488 * Caller must call with locking kprobe_mutex. 489 */ 490 extern void arch_unoptimize_kprobes(struct list_head *oplist, 491 struct list_head *done_list) 492 { 493 struct optimized_kprobe *op, *tmp; 494 495 list_for_each_entry_safe(op, tmp, oplist, list) { 496 arch_unoptimize_kprobe(op); 497 list_move(&op->list, done_list); 498 } 499 } 500 501 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 502 { 503 struct optimized_kprobe *op; 504 505 if (p->flags & KPROBE_FLAG_OPTIMIZED) { 506 /* This kprobe is really able to run optimized path. */ 507 op = container_of(p, struct optimized_kprobe, kp); 508 /* Detour through copied instructions */ 509 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; 510 if (!reenter) 511 reset_current_kprobe(); 512 return 1; 513 } 514 return 0; 515 } 516 NOKPROBE_SYMBOL(setup_detour_execution); 517