1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Code for Kernel probes Jump optimization. 4 * 5 * Copyright 2017, Anju T, IBM Corp. 6 */ 7 8 #include <linux/kprobes.h> 9 #include <linux/jump_label.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/list.h> 13 #include <asm/kprobes.h> 14 #include <asm/ptrace.h> 15 #include <asm/cacheflush.h> 16 #include <asm/code-patching.h> 17 #include <asm/sstep.h> 18 #include <asm/ppc-opcode.h> 19 #include <asm/inst.h> 20 21 #define TMPL_CALL_HDLR_IDX \ 22 (optprobe_template_call_handler - optprobe_template_entry) 23 #define TMPL_EMULATE_IDX \ 24 (optprobe_template_call_emulate - optprobe_template_entry) 25 #define TMPL_RET_IDX \ 26 (optprobe_template_ret - optprobe_template_entry) 27 #define TMPL_OP_IDX \ 28 (optprobe_template_op_address - optprobe_template_entry) 29 #define TMPL_INSN_IDX \ 30 (optprobe_template_insn - optprobe_template_entry) 31 #define TMPL_END_IDX \ 32 (optprobe_template_end - optprobe_template_entry) 33 34 DEFINE_INSN_CACHE_OPS(ppc_optinsn); 35 36 static bool insn_page_in_use; 37 38 static void *__ppc_alloc_insn_page(void) 39 { 40 if (insn_page_in_use) 41 return NULL; 42 insn_page_in_use = true; 43 return &optinsn_slot; 44 } 45 46 static void __ppc_free_insn_page(void *page __maybe_unused) 47 { 48 insn_page_in_use = false; 49 } 50 51 struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { 52 .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), 53 .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), 54 /* insn_size initialized later */ 55 .alloc = __ppc_alloc_insn_page, 56 .free = __ppc_free_insn_page, 57 .nr_garbage = 0, 58 }; 59 60 /* 61 * Check if we can optimize this probe. Returns NIP post-emulation if this can 62 * be optimized and 0 otherwise. 63 */ 64 static unsigned long can_optimize(struct kprobe *p) 65 { 66 struct pt_regs regs; 67 struct instruction_op op; 68 unsigned long nip = 0; 69 70 /* 71 * kprobe placed for kretprobe during boot time 72 * has a 'nop' instruction, which can be emulated. 73 * So further checks can be skipped. 74 */ 75 if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) 76 return (unsigned long)p->addr + sizeof(kprobe_opcode_t); 77 78 /* 79 * We only support optimizing kernel addresses, but not 80 * module addresses. 81 * 82 * FIXME: Optimize kprobes placed in module addresses. 83 */ 84 if (!is_kernel_addr((unsigned long)p->addr)) 85 return 0; 86 87 memset(®s, 0, sizeof(struct pt_regs)); 88 regs.nip = (unsigned long)p->addr; 89 regs.trap = 0x0; 90 regs.msr = MSR_KERNEL; 91 92 /* 93 * Kprobe placed in conditional branch instructions are 94 * not optimized, as we can't predict the nip prior with 95 * dummy pt_regs and can not ensure that the return branch 96 * from detour buffer falls in the range of address (i.e 32MB). 97 * A branch back from trampoline is set up in the detour buffer 98 * to the nip returned by the analyse_instr() here. 99 * 100 * Ensure that the instruction is not a conditional branch, 101 * and that can be emulated. 102 */ 103 if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && 104 analyse_instr(&op, ®s, 105 ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { 106 emulate_update_regs(®s, &op); 107 nip = regs.nip; 108 } 109 110 return nip; 111 } 112 113 static void optimized_callback(struct optimized_kprobe *op, 114 struct pt_regs *regs) 115 { 116 /* This is possible if op is under delayed unoptimizing */ 117 if (kprobe_disabled(&op->kp)) 118 return; 119 120 preempt_disable(); 121 122 if (kprobe_running()) { 123 kprobes_inc_nmissed_count(&op->kp); 124 } else { 125 __this_cpu_write(current_kprobe, &op->kp); 126 regs->nip = (unsigned long)op->kp.addr; 127 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 128 opt_pre_handler(&op->kp, regs); 129 __this_cpu_write(current_kprobe, NULL); 130 } 131 132 preempt_enable_no_resched(); 133 } 134 NOKPROBE_SYMBOL(optimized_callback); 135 136 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 137 { 138 if (op->optinsn.insn) { 139 free_ppc_optinsn_slot(op->optinsn.insn, 1); 140 op->optinsn.insn = NULL; 141 } 142 } 143 144 /* 145 * emulate_step() requires insn to be emulated as 146 * second parameter. Load register 'r4' with the 147 * instruction. 148 */ 149 void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) 150 { 151 /* addis r4,0,(insn)@h */ 152 patch_instruction((struct ppc_inst *)addr, 153 ppc_inst(PPC_INST_ADDIS | ___PPC_RT(4) | 154 ((val >> 16) & 0xffff))); 155 addr++; 156 157 /* ori r4,r4,(insn)@l */ 158 patch_instruction((struct ppc_inst *)addr, 159 ppc_inst(PPC_INST_ORI | ___PPC_RA(4) | 160 ___PPC_RS(4) | (val & 0xffff))); 161 } 162 163 /* 164 * Generate instructions to load provided immediate 64-bit value 165 * to register 'reg' and patch these instructions at 'addr'. 166 */ 167 void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) 168 { 169 /* lis reg,(op)@highest */ 170 patch_instruction((struct ppc_inst *)addr, 171 ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | 172 ((val >> 48) & 0xffff))); 173 addr++; 174 175 /* ori reg,reg,(op)@higher */ 176 patch_instruction((struct ppc_inst *)addr, 177 ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | 178 ___PPC_RS(reg) | ((val >> 32) & 0xffff))); 179 addr++; 180 181 /* rldicr reg,reg,32,31 */ 182 patch_instruction((struct ppc_inst *)addr, 183 ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | 184 ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); 185 addr++; 186 187 /* oris reg,reg,(op)@h */ 188 patch_instruction((struct ppc_inst *)addr, 189 ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | 190 ___PPC_RS(reg) | ((val >> 16) & 0xffff))); 191 addr++; 192 193 /* ori reg,reg,(op)@l */ 194 patch_instruction((struct ppc_inst *)addr, 195 ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | 196 ___PPC_RS(reg) | (val & 0xffff))); 197 } 198 199 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 200 { 201 struct ppc_inst branch_op_callback, branch_emulate_step, temp; 202 kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; 203 long b_offset; 204 unsigned long nip, size; 205 int rc, i; 206 207 kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 208 209 nip = can_optimize(p); 210 if (!nip) 211 return -EILSEQ; 212 213 /* Allocate instruction slot for detour buffer */ 214 buff = get_ppc_optinsn_slot(); 215 if (!buff) 216 return -ENOMEM; 217 218 /* 219 * OPTPROBE uses 'b' instruction to branch to optinsn.insn. 220 * 221 * The target address has to be relatively nearby, to permit use 222 * of branch instruction in powerpc, because the address is specified 223 * in an immediate field in the instruction opcode itself, ie 24 bits 224 * in the opcode specify the address. Therefore the address should 225 * be within 32MB on either side of the current instruction. 226 */ 227 b_offset = (unsigned long)buff - (unsigned long)p->addr; 228 if (!is_offset_in_branch_range(b_offset)) 229 goto error; 230 231 /* Check if the return address is also within 32MB range */ 232 b_offset = (unsigned long)(buff + TMPL_RET_IDX) - 233 (unsigned long)nip; 234 if (!is_offset_in_branch_range(b_offset)) 235 goto error; 236 237 /* Setup template */ 238 /* We can optimize this via patch_instruction_window later */ 239 size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); 240 pr_devel("Copying template to %p, size %lu\n", buff, size); 241 for (i = 0; i < size; i++) { 242 rc = patch_instruction((struct ppc_inst *)(buff + i), 243 ppc_inst(*(optprobe_template_entry + i))); 244 if (rc < 0) 245 goto error; 246 } 247 248 /* 249 * Fixup the template with instructions to: 250 * 1. load the address of the actual probepoint 251 */ 252 patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); 253 254 /* 255 * 2. branch to optimized_callback() and emulate_step() 256 */ 257 op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); 258 emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); 259 if (!op_callback_addr || !emulate_step_addr) { 260 WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); 261 goto error; 262 } 263 264 rc = create_branch(&branch_op_callback, 265 (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), 266 (unsigned long)op_callback_addr, 267 BRANCH_SET_LINK); 268 269 rc |= create_branch(&branch_emulate_step, 270 (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), 271 (unsigned long)emulate_step_addr, 272 BRANCH_SET_LINK); 273 274 if (rc) 275 goto error; 276 277 patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), 278 branch_op_callback); 279 patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), 280 branch_emulate_step); 281 282 /* 283 * 3. load instruction to be emulated into relevant register, and 284 */ 285 temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); 286 patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); 287 288 /* 289 * 4. branch back from trampoline 290 */ 291 patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); 292 293 flush_icache_range((unsigned long)buff, 294 (unsigned long)(&buff[TMPL_END_IDX])); 295 296 op->optinsn.insn = buff; 297 298 return 0; 299 300 error: 301 free_ppc_optinsn_slot(buff, 0); 302 return -ERANGE; 303 304 } 305 306 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 307 { 308 return optinsn->insn != NULL; 309 } 310 311 /* 312 * On powerpc, Optprobes always replaces one instruction (4 bytes 313 * aligned and 4 bytes long). It is impossible to encounter another 314 * kprobe in this address range. So always return 0. 315 */ 316 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 317 { 318 return 0; 319 } 320 321 void arch_optimize_kprobes(struct list_head *oplist) 322 { 323 struct ppc_inst instr; 324 struct optimized_kprobe *op; 325 struct optimized_kprobe *tmp; 326 327 list_for_each_entry_safe(op, tmp, oplist, list) { 328 /* 329 * Backup instructions which will be replaced 330 * by jump address 331 */ 332 memcpy(op->optinsn.copied_insn, op->kp.addr, 333 RELATIVEJUMP_SIZE); 334 create_branch(&instr, 335 (struct ppc_inst *)op->kp.addr, 336 (unsigned long)op->optinsn.insn, 0); 337 patch_instruction((struct ppc_inst *)op->kp.addr, instr); 338 list_del_init(&op->list); 339 } 340 } 341 342 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 343 { 344 arch_arm_kprobe(&op->kp); 345 } 346 347 void arch_unoptimize_kprobes(struct list_head *oplist, 348 struct list_head *done_list) 349 { 350 struct optimized_kprobe *op; 351 struct optimized_kprobe *tmp; 352 353 list_for_each_entry_safe(op, tmp, oplist, list) { 354 arch_unoptimize_kprobe(op); 355 list_move(&op->list, done_list); 356 } 357 } 358 359 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 360 unsigned long addr) 361 { 362 return ((unsigned long)op->kp.addr <= addr && 363 (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); 364 } 365