1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Code for Kernel probes Jump optimization. 4 * 5 * Copyright 2017, Anju T, IBM Corp. 6 */ 7 8 #include <linux/kprobes.h> 9 #include <linux/jump_label.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/list.h> 13 #include <asm/kprobes.h> 14 #include <asm/ptrace.h> 15 #include <asm/cacheflush.h> 16 #include <asm/code-patching.h> 17 #include <asm/sstep.h> 18 #include <asm/ppc-opcode.h> 19 #include <asm/inst.h> 20 21 #define TMPL_CALL_HDLR_IDX \ 22 (optprobe_template_call_handler - optprobe_template_entry) 23 #define TMPL_EMULATE_IDX \ 24 (optprobe_template_call_emulate - optprobe_template_entry) 25 #define TMPL_RET_IDX \ 26 (optprobe_template_ret - optprobe_template_entry) 27 #define TMPL_OP_IDX \ 28 (optprobe_template_op_address - optprobe_template_entry) 29 #define TMPL_INSN_IDX \ 30 (optprobe_template_insn - optprobe_template_entry) 31 #define TMPL_END_IDX \ 32 (optprobe_template_end - optprobe_template_entry) 33 34 DEFINE_INSN_CACHE_OPS(ppc_optinsn); 35 36 static bool insn_page_in_use; 37 38 static void *__ppc_alloc_insn_page(void) 39 { 40 if (insn_page_in_use) 41 return NULL; 42 insn_page_in_use = true; 43 return &optinsn_slot; 44 } 45 46 static void __ppc_free_insn_page(void *page __maybe_unused) 47 { 48 insn_page_in_use = false; 49 } 50 51 struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { 52 .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), 53 .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), 54 /* insn_size initialized later */ 55 .alloc = __ppc_alloc_insn_page, 56 .free = __ppc_free_insn_page, 57 .nr_garbage = 0, 58 }; 59 60 /* 61 * Check if we can optimize this probe. Returns NIP post-emulation if this can 62 * be optimized and 0 otherwise. 63 */ 64 static unsigned long can_optimize(struct kprobe *p) 65 { 66 struct pt_regs regs; 67 struct instruction_op op; 68 unsigned long nip = 0; 69 70 /* 71 * kprobe placed for kretprobe during boot time 72 * has a 'nop' instruction, which can be emulated. 73 * So further checks can be skipped. 74 */ 75 if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) 76 return (unsigned long)p->addr + sizeof(kprobe_opcode_t); 77 78 /* 79 * We only support optimizing kernel addresses, but not 80 * module addresses. 81 * 82 * FIXME: Optimize kprobes placed in module addresses. 83 */ 84 if (!is_kernel_addr((unsigned long)p->addr)) 85 return 0; 86 87 memset(®s, 0, sizeof(struct pt_regs)); 88 regs.nip = (unsigned long)p->addr; 89 regs.trap = 0x0; 90 regs.msr = MSR_KERNEL; 91 92 /* 93 * Kprobe placed in conditional branch instructions are 94 * not optimized, as we can't predict the nip prior with 95 * dummy pt_regs and can not ensure that the return branch 96 * from detour buffer falls in the range of address (i.e 32MB). 97 * A branch back from trampoline is set up in the detour buffer 98 * to the nip returned by the analyse_instr() here. 99 * 100 * Ensure that the instruction is not a conditional branch, 101 * and that can be emulated. 102 */ 103 if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && 104 analyse_instr(&op, ®s, 105 ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { 106 emulate_update_regs(®s, &op); 107 nip = regs.nip; 108 } 109 110 return nip; 111 } 112 113 static void optimized_callback(struct optimized_kprobe *op, 114 struct pt_regs *regs) 115 { 116 /* This is possible if op is under delayed unoptimizing */ 117 if (kprobe_disabled(&op->kp)) 118 return; 119 120 preempt_disable(); 121 122 if (kprobe_running()) { 123 kprobes_inc_nmissed_count(&op->kp); 124 } else { 125 __this_cpu_write(current_kprobe, &op->kp); 126 regs->nip = (unsigned long)op->kp.addr; 127 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 128 opt_pre_handler(&op->kp, regs); 129 __this_cpu_write(current_kprobe, NULL); 130 } 131 132 preempt_enable_no_resched(); 133 } 134 NOKPROBE_SYMBOL(optimized_callback); 135 136 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 137 { 138 if (op->optinsn.insn) { 139 free_ppc_optinsn_slot(op->optinsn.insn, 1); 140 op->optinsn.insn = NULL; 141 } 142 } 143 144 /* 145 * Generate instructions to load provided immediate 64-bit value 146 * to register 'reg' and patch these instructions at 'addr'. 147 */ 148 static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) 149 { 150 /* lis reg,(op)@highest */ 151 patch_instruction((struct ppc_inst *)addr, 152 ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | 153 ((val >> 48) & 0xffff))); 154 addr++; 155 156 /* ori reg,reg,(op)@higher */ 157 patch_instruction((struct ppc_inst *)addr, 158 ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | 159 ___PPC_RS(reg) | ((val >> 32) & 0xffff))); 160 addr++; 161 162 /* rldicr reg,reg,32,31 */ 163 patch_instruction((struct ppc_inst *)addr, 164 ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | 165 ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); 166 addr++; 167 168 /* oris reg,reg,(op)@h */ 169 patch_instruction((struct ppc_inst *)addr, 170 ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | 171 ___PPC_RS(reg) | ((val >> 16) & 0xffff))); 172 addr++; 173 174 /* ori reg,reg,(op)@l */ 175 patch_instruction((struct ppc_inst *)addr, 176 ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | 177 ___PPC_RS(reg) | (val & 0xffff))); 178 } 179 180 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 181 { 182 struct ppc_inst branch_op_callback, branch_emulate_step, temp; 183 kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; 184 long b_offset; 185 unsigned long nip, size; 186 int rc, i; 187 188 kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 189 190 nip = can_optimize(p); 191 if (!nip) 192 return -EILSEQ; 193 194 /* Allocate instruction slot for detour buffer */ 195 buff = get_ppc_optinsn_slot(); 196 if (!buff) 197 return -ENOMEM; 198 199 /* 200 * OPTPROBE uses 'b' instruction to branch to optinsn.insn. 201 * 202 * The target address has to be relatively nearby, to permit use 203 * of branch instruction in powerpc, because the address is specified 204 * in an immediate field in the instruction opcode itself, ie 24 bits 205 * in the opcode specify the address. Therefore the address should 206 * be within 32MB on either side of the current instruction. 207 */ 208 b_offset = (unsigned long)buff - (unsigned long)p->addr; 209 if (!is_offset_in_branch_range(b_offset)) 210 goto error; 211 212 /* Check if the return address is also within 32MB range */ 213 b_offset = (unsigned long)(buff + TMPL_RET_IDX) - 214 (unsigned long)nip; 215 if (!is_offset_in_branch_range(b_offset)) 216 goto error; 217 218 /* Setup template */ 219 /* We can optimize this via patch_instruction_window later */ 220 size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); 221 pr_devel("Copying template to %p, size %lu\n", buff, size); 222 for (i = 0; i < size; i++) { 223 rc = patch_instruction((struct ppc_inst *)(buff + i), 224 ppc_inst(*(optprobe_template_entry + i))); 225 if (rc < 0) 226 goto error; 227 } 228 229 /* 230 * Fixup the template with instructions to: 231 * 1. load the address of the actual probepoint 232 */ 233 patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); 234 235 /* 236 * 2. branch to optimized_callback() and emulate_step() 237 */ 238 op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); 239 emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); 240 if (!op_callback_addr || !emulate_step_addr) { 241 WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); 242 goto error; 243 } 244 245 rc = create_branch(&branch_op_callback, 246 (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), 247 (unsigned long)op_callback_addr, 248 BRANCH_SET_LINK); 249 250 rc |= create_branch(&branch_emulate_step, 251 (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), 252 (unsigned long)emulate_step_addr, 253 BRANCH_SET_LINK); 254 255 if (rc) 256 goto error; 257 258 patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), 259 branch_op_callback); 260 patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), 261 branch_emulate_step); 262 263 /* 264 * 3. load instruction to be emulated into relevant register, and 265 */ 266 temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); 267 patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); 268 269 /* 270 * 4. branch back from trampoline 271 */ 272 patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); 273 274 flush_icache_range((unsigned long)buff, 275 (unsigned long)(&buff[TMPL_END_IDX])); 276 277 op->optinsn.insn = buff; 278 279 return 0; 280 281 error: 282 free_ppc_optinsn_slot(buff, 0); 283 return -ERANGE; 284 285 } 286 287 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 288 { 289 return optinsn->insn != NULL; 290 } 291 292 /* 293 * On powerpc, Optprobes always replaces one instruction (4 bytes 294 * aligned and 4 bytes long). It is impossible to encounter another 295 * kprobe in this address range. So always return 0. 296 */ 297 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 298 { 299 return 0; 300 } 301 302 void arch_optimize_kprobes(struct list_head *oplist) 303 { 304 struct ppc_inst instr; 305 struct optimized_kprobe *op; 306 struct optimized_kprobe *tmp; 307 308 list_for_each_entry_safe(op, tmp, oplist, list) { 309 /* 310 * Backup instructions which will be replaced 311 * by jump address 312 */ 313 memcpy(op->optinsn.copied_insn, op->kp.addr, 314 RELATIVEJUMP_SIZE); 315 create_branch(&instr, 316 (struct ppc_inst *)op->kp.addr, 317 (unsigned long)op->optinsn.insn, 0); 318 patch_instruction((struct ppc_inst *)op->kp.addr, instr); 319 list_del_init(&op->list); 320 } 321 } 322 323 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 324 { 325 arch_arm_kprobe(&op->kp); 326 } 327 328 void arch_unoptimize_kprobes(struct list_head *oplist, 329 struct list_head *done_list) 330 { 331 struct optimized_kprobe *op; 332 struct optimized_kprobe *tmp; 333 334 list_for_each_entry_safe(op, tmp, oplist, list) { 335 arch_unoptimize_kprobe(op); 336 list_move(&op->list, done_list); 337 } 338 } 339 340 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 341 unsigned long addr) 342 { 343 return ((unsigned long)op->kp.addr <= addr && 344 (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); 345 } 346