1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 * Copyright (C) Huawei Inc., 2014 21 */ 22 23 #include <linux/kprobes.h> 24 #include <linux/jump_label.h> 25 #include <asm/kprobes.h> 26 #include <asm/cacheflush.h> 27 /* for arm_gen_branch */ 28 #include <asm/insn.h> 29 /* for patch_text */ 30 #include <asm/patch.h> 31 32 #include "core.h" 33 34 /* 35 * See register_usage_flags. If the probed instruction doesn't use PC, 36 * we can copy it into template and have it executed directly without 37 * simulation or emulation. 38 */ 39 #define ARM_REG_PC 15 40 #define can_kprobe_direct_exec(m) (!test_bit(ARM_REG_PC, &(m))) 41 42 /* 43 * NOTE: the first sub and add instruction will be modified according 44 * to the stack cost of the instruction. 45 */ 46 asm ( 47 ".global optprobe_template_entry\n" 48 "optprobe_template_entry:\n" 49 ".global optprobe_template_sub_sp\n" 50 "optprobe_template_sub_sp:" 51 " sub sp, sp, #0xff\n" 52 " stmia sp, {r0 - r14} \n" 53 ".global optprobe_template_add_sp\n" 54 "optprobe_template_add_sp:" 55 " add r3, sp, #0xff\n" 56 " str r3, [sp, #52]\n" 57 " mrs r4, cpsr\n" 58 " str r4, [sp, #64]\n" 59 " mov r1, sp\n" 60 " ldr r0, 1f\n" 61 " ldr r2, 2f\n" 62 /* 63 * AEABI requires an 8-bytes alignment stack. If 64 * SP % 8 != 0 (SP % 4 == 0 should be ensured), 65 * alloc more bytes here. 66 */ 67 " and r4, sp, #4\n" 68 " sub sp, sp, r4\n" 69 #if __LINUX_ARM_ARCH__ >= 5 70 " blx r2\n" 71 #else 72 " mov lr, pc\n" 73 " mov pc, r2\n" 74 #endif 75 " add sp, sp, r4\n" 76 " ldr r1, [sp, #64]\n" 77 " tst r1, #"__stringify(PSR_T_BIT)"\n" 78 " ldrne r2, [sp, #60]\n" 79 " orrne r2, #1\n" 80 " strne r2, [sp, #60] @ set bit0 of PC for thumb\n" 81 " msr cpsr_cxsf, r1\n" 82 ".global optprobe_template_restore_begin\n" 83 "optprobe_template_restore_begin:\n" 84 " ldmia sp, {r0 - r15}\n" 85 ".global optprobe_template_restore_orig_insn\n" 86 "optprobe_template_restore_orig_insn:\n" 87 " nop\n" 88 ".global optprobe_template_restore_end\n" 89 "optprobe_template_restore_end:\n" 90 " nop\n" 91 ".global optprobe_template_val\n" 92 "optprobe_template_val:\n" 93 "1: .long 0\n" 94 ".global optprobe_template_call\n" 95 "optprobe_template_call:\n" 96 "2: .long 0\n" 97 ".global optprobe_template_end\n" 98 "optprobe_template_end:\n"); 99 100 #define TMPL_VAL_IDX \ 101 ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) 102 #define TMPL_CALL_IDX \ 103 ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) 104 #define TMPL_END_IDX \ 105 ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) 106 #define TMPL_ADD_SP \ 107 ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) 108 #define TMPL_SUB_SP \ 109 ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) 110 #define TMPL_RESTORE_BEGIN \ 111 ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) 112 #define TMPL_RESTORE_ORIGN_INSN \ 113 ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) 114 #define TMPL_RESTORE_END \ 115 ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) 116 117 /* 118 * ARM can always optimize an instruction when using ARM ISA, except 119 * instructions like 'str r0, [sp, r1]' which store to stack and unable 120 * to determine stack space consumption statically. 121 */ 122 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 123 { 124 return optinsn->insn != NULL; 125 } 126 127 /* 128 * In ARM ISA, kprobe opt always replace one instruction (4 bytes 129 * aligned and 4 bytes long). It is impossible to encounter another 130 * kprobe in the address range. So always return 0. 131 */ 132 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 133 { 134 return 0; 135 } 136 137 /* Caller must ensure addr & 3 == 0 */ 138 static int can_optimize(struct kprobe *kp) 139 { 140 if (kp->ainsn.stack_space < 0) 141 return 0; 142 /* 143 * 255 is the biggest imm can be used in 'sub r0, r0, #<imm>'. 144 * Number larger than 255 needs special encoding. 145 */ 146 if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs)) 147 return 0; 148 return 1; 149 } 150 151 /* Free optimized instruction slot */ 152 static void 153 __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 154 { 155 if (op->optinsn.insn) { 156 free_optinsn_slot(op->optinsn.insn, dirty); 157 op->optinsn.insn = NULL; 158 } 159 } 160 161 extern void kprobe_handler(struct pt_regs *regs); 162 163 static void 164 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 165 { 166 unsigned long flags; 167 struct kprobe *p = &op->kp; 168 struct kprobe_ctlblk *kcb; 169 170 /* Save skipped registers */ 171 regs->ARM_pc = (unsigned long)op->kp.addr; 172 regs->ARM_ORIG_r0 = ~0UL; 173 174 local_irq_save(flags); 175 kcb = get_kprobe_ctlblk(); 176 177 if (kprobe_running()) { 178 kprobes_inc_nmissed_count(&op->kp); 179 } else { 180 __this_cpu_write(current_kprobe, &op->kp); 181 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 182 opt_pre_handler(&op->kp, regs); 183 __this_cpu_write(current_kprobe, NULL); 184 } 185 186 /* 187 * We singlestep the replaced instruction only when it can't be 188 * executed directly during restore. 189 */ 190 if (!p->ainsn.kprobe_direct_exec) 191 op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); 192 193 local_irq_restore(flags); 194 } 195 NOKPROBE_SYMBOL(optimized_callback) 196 197 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) 198 { 199 kprobe_opcode_t *code; 200 unsigned long rel_chk; 201 unsigned long val; 202 unsigned long stack_protect = sizeof(struct pt_regs); 203 204 if (!can_optimize(orig)) 205 return -EILSEQ; 206 207 code = get_optinsn_slot(); 208 if (!code) 209 return -ENOMEM; 210 211 /* 212 * Verify if the address gap is in 32MiB range, because this uses 213 * a relative jump. 214 * 215 * kprobe opt use a 'b' instruction to branch to optinsn.insn. 216 * According to ARM manual, branch instruction is: 217 * 218 * 31 28 27 24 23 0 219 * +------+---+---+---+---+----------------+ 220 * | cond | 1 | 0 | 1 | 0 | imm24 | 221 * +------+---+---+---+---+----------------+ 222 * 223 * imm24 is a signed 24 bits integer. The real branch offset is computed 224 * by: imm32 = SignExtend(imm24:'00', 32); 225 * 226 * So the maximum forward branch should be: 227 * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc 228 * The maximum backword branch should be: 229 * (0xff800000 << 2) = 0xfe000000 = -0x2000000 230 * 231 * We can simply check (rel & 0xfe000003): 232 * if rel is positive, (rel & 0xfe000000) shoule be 0 233 * if rel is negitive, (rel & 0xfe000000) should be 0xfe000000 234 * the last '3' is used for alignment checking. 235 */ 236 rel_chk = (unsigned long)((long)code - 237 (long)orig->addr + 8) & 0xfe000003; 238 239 if ((rel_chk != 0) && (rel_chk != 0xfe000000)) { 240 /* 241 * Different from x86, we free code buf directly instead of 242 * calling __arch_remove_optimized_kprobe() because 243 * we have not fill any field in op. 244 */ 245 free_optinsn_slot(code, 0); 246 return -ERANGE; 247 } 248 249 /* Copy arch-dep-instance from template. */ 250 memcpy(code, (unsigned long *)&optprobe_template_entry, 251 TMPL_END_IDX * sizeof(kprobe_opcode_t)); 252 253 /* Adjust buffer according to instruction. */ 254 BUG_ON(orig->ainsn.stack_space < 0); 255 256 stack_protect += orig->ainsn.stack_space; 257 258 /* Should have been filtered by can_optimize(). */ 259 BUG_ON(stack_protect > 255); 260 261 /* Create a 'sub sp, sp, #<stack_protect>' */ 262 code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect); 263 /* Create a 'add r3, sp, #<stack_protect>' */ 264 code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect); 265 266 /* Set probe information */ 267 val = (unsigned long)op; 268 code[TMPL_VAL_IDX] = val; 269 270 /* Set probe function call */ 271 val = (unsigned long)optimized_callback; 272 code[TMPL_CALL_IDX] = val; 273 274 /* If possible, copy insn and have it executed during restore */ 275 orig->ainsn.kprobe_direct_exec = false; 276 if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) { 277 kprobe_opcode_t final_branch = arm_gen_branch( 278 (unsigned long)(&code[TMPL_RESTORE_END]), 279 (unsigned long)(op->kp.addr) + 4); 280 if (final_branch != 0) { 281 /* 282 * Replace original 'ldmia sp, {r0 - r15}' with 283 * 'ldmia {r0 - r14}', restore all registers except pc. 284 */ 285 code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff); 286 287 /* The original probed instruction */ 288 code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode); 289 290 /* Jump back to next instruction */ 291 code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch); 292 orig->ainsn.kprobe_direct_exec = true; 293 } 294 } 295 296 flush_icache_range((unsigned long)code, 297 (unsigned long)(&code[TMPL_END_IDX])); 298 299 /* Set op->optinsn.insn means prepared. */ 300 op->optinsn.insn = code; 301 return 0; 302 } 303 304 void __kprobes arch_optimize_kprobes(struct list_head *oplist) 305 { 306 struct optimized_kprobe *op, *tmp; 307 308 list_for_each_entry_safe(op, tmp, oplist, list) { 309 unsigned long insn; 310 WARN_ON(kprobe_disabled(&op->kp)); 311 312 /* 313 * Backup instructions which will be replaced 314 * by jump address 315 */ 316 memcpy(op->optinsn.copied_insn, op->kp.addr, 317 RELATIVEJUMP_SIZE); 318 319 insn = arm_gen_branch((unsigned long)op->kp.addr, 320 (unsigned long)op->optinsn.insn); 321 BUG_ON(insn == 0); 322 323 /* 324 * Make it a conditional branch if replaced insn 325 * is consitional 326 */ 327 insn = (__mem_to_opcode_arm( 328 op->optinsn.copied_insn[0]) & 0xf0000000) | 329 (insn & 0x0fffffff); 330 331 /* 332 * Similar to __arch_disarm_kprobe, operations which 333 * removing breakpoints must be wrapped by stop_machine 334 * to avoid racing. 335 */ 336 kprobes_remove_breakpoint(op->kp.addr, insn); 337 338 list_del_init(&op->list); 339 } 340 } 341 342 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 343 { 344 arch_arm_kprobe(&op->kp); 345 } 346 347 /* 348 * Recover original instructions and breakpoints from relative jumps. 349 * Caller must call with locking kprobe_mutex. 350 */ 351 void arch_unoptimize_kprobes(struct list_head *oplist, 352 struct list_head *done_list) 353 { 354 struct optimized_kprobe *op, *tmp; 355 356 list_for_each_entry_safe(op, tmp, oplist, list) { 357 arch_unoptimize_kprobe(op); 358 list_move(&op->list, done_list); 359 } 360 } 361 362 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 363 unsigned long addr) 364 { 365 return ((unsigned long)op->kp.addr <= addr && 366 (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); 367 } 368 369 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 370 { 371 __arch_remove_optimized_kprobe(op, 1); 372 } 373