1 /* 2 * Kernel Probes Jump Optimization (Optprobes) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004 19 * Copyright (C) Hitachi Ltd., 2012 20 * Copyright (C) Huawei Inc., 2014 21 */ 22 23 #include <linux/kprobes.h> 24 #include <linux/jump_label.h> 25 #include <asm/kprobes.h> 26 #include <asm/cacheflush.h> 27 /* for arm_gen_branch */ 28 #include <asm/insn.h> 29 /* for patch_text */ 30 #include <asm/patch.h> 31 32 #include "core.h" 33 34 /* 35 * See register_usage_flags. If the probed instruction doesn't use PC, 36 * we can copy it into template and have it executed directly without 37 * simulation or emulation. 38 */ 39 #define ARM_REG_PC 15 40 #define can_kprobe_direct_exec(m) (!test_bit(ARM_REG_PC, &(m))) 41 42 /* 43 * NOTE: the first sub and add instruction will be modified according 44 * to the stack cost of the instruction. 45 */ 46 asm ( 47 ".global optprobe_template_entry\n" 48 "optprobe_template_entry:\n" 49 ".global optprobe_template_sub_sp\n" 50 "optprobe_template_sub_sp:" 51 " sub sp, sp, #0xff\n" 52 " stmia sp, {r0 - r14} \n" 53 ".global optprobe_template_add_sp\n" 54 "optprobe_template_add_sp:" 55 " add r3, sp, #0xff\n" 56 " str r3, [sp, #52]\n" 57 " mrs r4, cpsr\n" 58 " str r4, [sp, #64]\n" 59 " mov r1, sp\n" 60 " ldr r0, 1f\n" 61 " ldr r2, 2f\n" 62 /* 63 * AEABI requires an 8-bytes alignment stack. If 64 * SP % 8 != 0 (SP % 4 == 0 should be ensured), 65 * alloc more bytes here. 66 */ 67 " and r4, sp, #4\n" 68 " sub sp, sp, r4\n" 69 #if __LINUX_ARM_ARCH__ >= 5 70 " blx r2\n" 71 #else 72 " mov lr, pc\n" 73 " mov pc, r2\n" 74 #endif 75 " add sp, sp, r4\n" 76 " ldr r1, [sp, #64]\n" 77 " tst r1, #"__stringify(PSR_T_BIT)"\n" 78 " ldrne r2, [sp, #60]\n" 79 " orrne r2, #1\n" 80 " strne r2, [sp, #60] @ set bit0 of PC for thumb\n" 81 " msr cpsr_cxsf, r1\n" 82 ".global optprobe_template_restore_begin\n" 83 "optprobe_template_restore_begin:\n" 84 " ldmia sp, {r0 - r15}\n" 85 ".global optprobe_template_restore_orig_insn\n" 86 "optprobe_template_restore_orig_insn:\n" 87 " nop\n" 88 ".global optprobe_template_restore_end\n" 89 "optprobe_template_restore_end:\n" 90 " nop\n" 91 ".global optprobe_template_val\n" 92 "optprobe_template_val:\n" 93 "1: .long 0\n" 94 ".global optprobe_template_call\n" 95 "optprobe_template_call:\n" 96 "2: .long 0\n" 97 ".global optprobe_template_end\n" 98 "optprobe_template_end:\n"); 99 100 #define TMPL_VAL_IDX \ 101 ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) 102 #define TMPL_CALL_IDX \ 103 ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) 104 #define TMPL_END_IDX \ 105 ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) 106 #define TMPL_ADD_SP \ 107 ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) 108 #define TMPL_SUB_SP \ 109 ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) 110 #define TMPL_RESTORE_BEGIN \ 111 ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) 112 #define TMPL_RESTORE_ORIGN_INSN \ 113 ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) 114 #define TMPL_RESTORE_END \ 115 ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) 116 117 /* 118 * ARM can always optimize an instruction when using ARM ISA, except 119 * instructions like 'str r0, [sp, r1]' which store to stack and unable 120 * to determine stack space consumption statically. 121 */ 122 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 123 { 124 return optinsn->insn != NULL; 125 } 126 127 /* 128 * In ARM ISA, kprobe opt always replace one instruction (4 bytes 129 * aligned and 4 bytes long). It is impossible to encounter another 130 * kprobe in the address range. So always return 0. 131 */ 132 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 133 { 134 return 0; 135 } 136 137 /* Caller must ensure addr & 3 == 0 */ 138 static int can_optimize(struct kprobe *kp) 139 { 140 if (kp->ainsn.stack_space < 0) 141 return 0; 142 /* 143 * 255 is the biggest imm can be used in 'sub r0, r0, #<imm>'. 144 * Number larger than 255 needs special encoding. 145 */ 146 if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs)) 147 return 0; 148 return 1; 149 } 150 151 /* Free optimized instruction slot */ 152 static void 153 __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 154 { 155 if (op->optinsn.insn) { 156 free_optinsn_slot(op->optinsn.insn, dirty); 157 op->optinsn.insn = NULL; 158 } 159 } 160 161 extern void kprobe_handler(struct pt_regs *regs); 162 163 static void 164 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 165 { 166 unsigned long flags; 167 struct kprobe *p = &op->kp; 168 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 169 170 /* Save skipped registers */ 171 regs->ARM_pc = (unsigned long)op->kp.addr; 172 regs->ARM_ORIG_r0 = ~0UL; 173 174 local_irq_save(flags); 175 176 if (kprobe_running()) { 177 kprobes_inc_nmissed_count(&op->kp); 178 } else { 179 __this_cpu_write(current_kprobe, &op->kp); 180 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 181 opt_pre_handler(&op->kp, regs); 182 __this_cpu_write(current_kprobe, NULL); 183 } 184 185 /* 186 * We singlestep the replaced instruction only when it can't be 187 * executed directly during restore. 188 */ 189 if (!p->ainsn.kprobe_direct_exec) 190 op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); 191 192 local_irq_restore(flags); 193 } 194 195 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) 196 { 197 kprobe_opcode_t *code; 198 unsigned long rel_chk; 199 unsigned long val; 200 unsigned long stack_protect = sizeof(struct pt_regs); 201 202 if (!can_optimize(orig)) 203 return -EILSEQ; 204 205 code = get_optinsn_slot(); 206 if (!code) 207 return -ENOMEM; 208 209 /* 210 * Verify if the address gap is in 32MiB range, because this uses 211 * a relative jump. 212 * 213 * kprobe opt use a 'b' instruction to branch to optinsn.insn. 214 * According to ARM manual, branch instruction is: 215 * 216 * 31 28 27 24 23 0 217 * +------+---+---+---+---+----------------+ 218 * | cond | 1 | 0 | 1 | 0 | imm24 | 219 * +------+---+---+---+---+----------------+ 220 * 221 * imm24 is a signed 24 bits integer. The real branch offset is computed 222 * by: imm32 = SignExtend(imm24:'00', 32); 223 * 224 * So the maximum forward branch should be: 225 * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc 226 * The maximum backword branch should be: 227 * (0xff800000 << 2) = 0xfe000000 = -0x2000000 228 * 229 * We can simply check (rel & 0xfe000003): 230 * if rel is positive, (rel & 0xfe000000) shoule be 0 231 * if rel is negitive, (rel & 0xfe000000) should be 0xfe000000 232 * the last '3' is used for alignment checking. 233 */ 234 rel_chk = (unsigned long)((long)code - 235 (long)orig->addr + 8) & 0xfe000003; 236 237 if ((rel_chk != 0) && (rel_chk != 0xfe000000)) { 238 /* 239 * Different from x86, we free code buf directly instead of 240 * calling __arch_remove_optimized_kprobe() because 241 * we have not fill any field in op. 242 */ 243 free_optinsn_slot(code, 0); 244 return -ERANGE; 245 } 246 247 /* Copy arch-dep-instance from template. */ 248 memcpy(code, &optprobe_template_entry, 249 TMPL_END_IDX * sizeof(kprobe_opcode_t)); 250 251 /* Adjust buffer according to instruction. */ 252 BUG_ON(orig->ainsn.stack_space < 0); 253 254 stack_protect += orig->ainsn.stack_space; 255 256 /* Should have been filtered by can_optimize(). */ 257 BUG_ON(stack_protect > 255); 258 259 /* Create a 'sub sp, sp, #<stack_protect>' */ 260 code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect); 261 /* Create a 'add r3, sp, #<stack_protect>' */ 262 code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect); 263 264 /* Set probe information */ 265 val = (unsigned long)op; 266 code[TMPL_VAL_IDX] = val; 267 268 /* Set probe function call */ 269 val = (unsigned long)optimized_callback; 270 code[TMPL_CALL_IDX] = val; 271 272 /* If possible, copy insn and have it executed during restore */ 273 orig->ainsn.kprobe_direct_exec = false; 274 if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) { 275 kprobe_opcode_t final_branch = arm_gen_branch( 276 (unsigned long)(&code[TMPL_RESTORE_END]), 277 (unsigned long)(op->kp.addr) + 4); 278 if (final_branch != 0) { 279 /* 280 * Replace original 'ldmia sp, {r0 - r15}' with 281 * 'ldmia {r0 - r14}', restore all registers except pc. 282 */ 283 code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff); 284 285 /* The original probed instruction */ 286 code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode); 287 288 /* Jump back to next instruction */ 289 code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch); 290 orig->ainsn.kprobe_direct_exec = true; 291 } 292 } 293 294 flush_icache_range((unsigned long)code, 295 (unsigned long)(&code[TMPL_END_IDX])); 296 297 /* Set op->optinsn.insn means prepared. */ 298 op->optinsn.insn = code; 299 return 0; 300 } 301 302 void __kprobes arch_optimize_kprobes(struct list_head *oplist) 303 { 304 struct optimized_kprobe *op, *tmp; 305 306 list_for_each_entry_safe(op, tmp, oplist, list) { 307 unsigned long insn; 308 WARN_ON(kprobe_disabled(&op->kp)); 309 310 /* 311 * Backup instructions which will be replaced 312 * by jump address 313 */ 314 memcpy(op->optinsn.copied_insn, op->kp.addr, 315 RELATIVEJUMP_SIZE); 316 317 insn = arm_gen_branch((unsigned long)op->kp.addr, 318 (unsigned long)op->optinsn.insn); 319 BUG_ON(insn == 0); 320 321 /* 322 * Make it a conditional branch if replaced insn 323 * is consitional 324 */ 325 insn = (__mem_to_opcode_arm( 326 op->optinsn.copied_insn[0]) & 0xf0000000) | 327 (insn & 0x0fffffff); 328 329 /* 330 * Similar to __arch_disarm_kprobe, operations which 331 * removing breakpoints must be wrapped by stop_machine 332 * to avoid racing. 333 */ 334 kprobes_remove_breakpoint(op->kp.addr, insn); 335 336 list_del_init(&op->list); 337 } 338 } 339 340 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 341 { 342 arch_arm_kprobe(&op->kp); 343 } 344 345 /* 346 * Recover original instructions and breakpoints from relative jumps. 347 * Caller must call with locking kprobe_mutex. 348 */ 349 void arch_unoptimize_kprobes(struct list_head *oplist, 350 struct list_head *done_list) 351 { 352 struct optimized_kprobe *op, *tmp; 353 354 list_for_each_entry_safe(op, tmp, oplist, list) { 355 arch_unoptimize_kprobe(op); 356 list_move(&op->list, done_list); 357 } 358 } 359 360 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 361 unsigned long addr) 362 { 363 return ((unsigned long)op->kp.addr <= addr && 364 (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); 365 } 366 367 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 368 { 369 __arch_remove_optimized_kprobe(op, 1); 370 } 371