1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Code for Kernel probes Jump optimization. 4 * 5 * Copyright 2017, Anju T, IBM Corp. 6 */ 7 8 #include <linux/kprobes.h> 9 #include <linux/jump_label.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/list.h> 13 #include <asm/kprobes.h> 14 #include <asm/ptrace.h> 15 #include <asm/cacheflush.h> 16 #include <asm/code-patching.h> 17 #include <asm/sstep.h> 18 #include <asm/ppc-opcode.h> 19 #include <asm/inst.h> 20 21 #define TMPL_CALL_HDLR_IDX \ 22 (optprobe_template_call_handler - optprobe_template_entry) 23 #define TMPL_EMULATE_IDX \ 24 (optprobe_template_call_emulate - optprobe_template_entry) 25 #define TMPL_RET_IDX \ 26 (optprobe_template_ret - optprobe_template_entry) 27 #define TMPL_OP_IDX \ 28 (optprobe_template_op_address - optprobe_template_entry) 29 #define TMPL_INSN_IDX \ 30 (optprobe_template_insn - optprobe_template_entry) 31 #define TMPL_END_IDX \ 32 (optprobe_template_end - optprobe_template_entry) 33 34 DEFINE_INSN_CACHE_OPS(ppc_optinsn); 35 36 static bool insn_page_in_use; 37 38 static void *__ppc_alloc_insn_page(void) 39 { 40 if (insn_page_in_use) 41 return NULL; 42 insn_page_in_use = true; 43 return &optinsn_slot; 44 } 45 46 static void __ppc_free_insn_page(void *page __maybe_unused) 47 { 48 insn_page_in_use = false; 49 } 50 51 struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { 52 .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), 53 .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), 54 /* insn_size initialized later */ 55 .alloc = __ppc_alloc_insn_page, 56 .free = __ppc_free_insn_page, 57 .nr_garbage = 0, 58 }; 59 60 /* 61 * Check if we can optimize this probe. Returns NIP post-emulation if this can 62 * be optimized and 0 otherwise. 63 */ 64 static unsigned long can_optimize(struct kprobe *p) 65 { 66 struct pt_regs regs; 67 struct instruction_op op; 68 unsigned long nip = 0; 69 70 /* 71 * kprobe placed for kretprobe during boot time 72 * has a 'nop' instruction, which can be emulated. 73 * So further checks can be skipped. 74 */ 75 if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) 76 return (unsigned long)p->addr + sizeof(kprobe_opcode_t); 77 78 /* 79 * We only support optimizing kernel addresses, but not 80 * module addresses. 81 * 82 * FIXME: Optimize kprobes placed in module addresses. 83 */ 84 if (!is_kernel_addr((unsigned long)p->addr)) 85 return 0; 86 87 memset(®s, 0, sizeof(struct pt_regs)); 88 regs.nip = (unsigned long)p->addr; 89 regs.trap = 0x0; 90 regs.msr = MSR_KERNEL; 91 92 /* 93 * Kprobe placed in conditional branch instructions are 94 * not optimized, as we can't predict the nip prior with 95 * dummy pt_regs and can not ensure that the return branch 96 * from detour buffer falls in the range of address (i.e 32MB). 97 * A branch back from trampoline is set up in the detour buffer 98 * to the nip returned by the analyse_instr() here. 99 * 100 * Ensure that the instruction is not a conditional branch, 101 * and that can be emulated. 102 */ 103 if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && 104 analyse_instr(&op, ®s, 105 ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { 106 emulate_update_regs(®s, &op); 107 nip = regs.nip; 108 } 109 110 return nip; 111 } 112 113 static void optimized_callback(struct optimized_kprobe *op, 114 struct pt_regs *regs) 115 { 116 /* This is possible if op is under delayed unoptimizing */ 117 if (kprobe_disabled(&op->kp)) 118 return; 119 120 preempt_disable(); 121 122 if (kprobe_running()) { 123 kprobes_inc_nmissed_count(&op->kp); 124 } else { 125 __this_cpu_write(current_kprobe, &op->kp); 126 regs->nip = (unsigned long)op->kp.addr; 127 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 128 opt_pre_handler(&op->kp, regs); 129 __this_cpu_write(current_kprobe, NULL); 130 } 131 132 preempt_enable_no_resched(); 133 } 134 NOKPROBE_SYMBOL(optimized_callback); 135 136 void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 137 { 138 if (op->optinsn.insn) { 139 free_ppc_optinsn_slot(op->optinsn.insn, 1); 140 op->optinsn.insn = NULL; 141 } 142 } 143 144 static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) 145 { 146 patch_instruction((struct ppc_inst *)addr, 147 ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); 148 addr++; 149 150 patch_instruction((struct ppc_inst *)addr, 151 ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); 152 } 153 154 /* 155 * Generate instructions to load provided immediate 64-bit value 156 * to register 'reg' and patch these instructions at 'addr'. 157 */ 158 static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) 159 { 160 /* lis reg,(op)@highest */ 161 patch_instruction((struct ppc_inst *)addr, 162 ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | 163 ((val >> 48) & 0xffff))); 164 addr++; 165 166 /* ori reg,reg,(op)@higher */ 167 patch_instruction((struct ppc_inst *)addr, 168 ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | 169 ___PPC_RS(reg) | ((val >> 32) & 0xffff))); 170 addr++; 171 172 /* rldicr reg,reg,32,31 */ 173 patch_instruction((struct ppc_inst *)addr, 174 ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | 175 ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); 176 addr++; 177 178 /* oris reg,reg,(op)@h */ 179 patch_instruction((struct ppc_inst *)addr, 180 ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | 181 ___PPC_RS(reg) | ((val >> 16) & 0xffff))); 182 addr++; 183 184 /* ori reg,reg,(op)@l */ 185 patch_instruction((struct ppc_inst *)addr, 186 ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | 187 ___PPC_RS(reg) | (val & 0xffff))); 188 } 189 190 static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) 191 { 192 if (IS_ENABLED(CONFIG_PPC64)) 193 patch_imm64_load_insns(val, reg, addr); 194 else 195 patch_imm32_load_insns(val, reg, addr); 196 } 197 198 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 199 { 200 struct ppc_inst branch_op_callback, branch_emulate_step, temp; 201 kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; 202 long b_offset; 203 unsigned long nip, size; 204 int rc, i; 205 206 kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 207 208 nip = can_optimize(p); 209 if (!nip) 210 return -EILSEQ; 211 212 /* Allocate instruction slot for detour buffer */ 213 buff = get_ppc_optinsn_slot(); 214 if (!buff) 215 return -ENOMEM; 216 217 /* 218 * OPTPROBE uses 'b' instruction to branch to optinsn.insn. 219 * 220 * The target address has to be relatively nearby, to permit use 221 * of branch instruction in powerpc, because the address is specified 222 * in an immediate field in the instruction opcode itself, ie 24 bits 223 * in the opcode specify the address. Therefore the address should 224 * be within 32MB on either side of the current instruction. 225 */ 226 b_offset = (unsigned long)buff - (unsigned long)p->addr; 227 if (!is_offset_in_branch_range(b_offset)) 228 goto error; 229 230 /* Check if the return address is also within 32MB range */ 231 b_offset = (unsigned long)(buff + TMPL_RET_IDX) - 232 (unsigned long)nip; 233 if (!is_offset_in_branch_range(b_offset)) 234 goto error; 235 236 /* Setup template */ 237 /* We can optimize this via patch_instruction_window later */ 238 size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); 239 pr_devel("Copying template to %p, size %lu\n", buff, size); 240 for (i = 0; i < size; i++) { 241 rc = patch_instruction((struct ppc_inst *)(buff + i), 242 ppc_inst(*(optprobe_template_entry + i))); 243 if (rc < 0) 244 goto error; 245 } 246 247 /* 248 * Fixup the template with instructions to: 249 * 1. load the address of the actual probepoint 250 */ 251 patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); 252 253 /* 254 * 2. branch to optimized_callback() and emulate_step() 255 */ 256 op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); 257 emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); 258 if (!op_callback_addr || !emulate_step_addr) { 259 WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); 260 goto error; 261 } 262 263 rc = create_branch(&branch_op_callback, 264 (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), 265 (unsigned long)op_callback_addr, 266 BRANCH_SET_LINK); 267 268 rc |= create_branch(&branch_emulate_step, 269 (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), 270 (unsigned long)emulate_step_addr, 271 BRANCH_SET_LINK); 272 273 if (rc) 274 goto error; 275 276 patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), 277 branch_op_callback); 278 patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), 279 branch_emulate_step); 280 281 /* 282 * 3. load instruction to be emulated into relevant register, and 283 */ 284 temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); 285 patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); 286 287 /* 288 * 4. branch back from trampoline 289 */ 290 patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); 291 292 flush_icache_range((unsigned long)buff, 293 (unsigned long)(&buff[TMPL_END_IDX])); 294 295 op->optinsn.insn = buff; 296 297 return 0; 298 299 error: 300 free_ppc_optinsn_slot(buff, 0); 301 return -ERANGE; 302 303 } 304 305 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 306 { 307 return optinsn->insn != NULL; 308 } 309 310 /* 311 * On powerpc, Optprobes always replaces one instruction (4 bytes 312 * aligned and 4 bytes long). It is impossible to encounter another 313 * kprobe in this address range. So always return 0. 314 */ 315 int arch_check_optimized_kprobe(struct optimized_kprobe *op) 316 { 317 return 0; 318 } 319 320 void arch_optimize_kprobes(struct list_head *oplist) 321 { 322 struct ppc_inst instr; 323 struct optimized_kprobe *op; 324 struct optimized_kprobe *tmp; 325 326 list_for_each_entry_safe(op, tmp, oplist, list) { 327 /* 328 * Backup instructions which will be replaced 329 * by jump address 330 */ 331 memcpy(op->optinsn.copied_insn, op->kp.addr, 332 RELATIVEJUMP_SIZE); 333 create_branch(&instr, 334 (struct ppc_inst *)op->kp.addr, 335 (unsigned long)op->optinsn.insn, 0); 336 patch_instruction((struct ppc_inst *)op->kp.addr, instr); 337 list_del_init(&op->list); 338 } 339 } 340 341 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 342 { 343 arch_arm_kprobe(&op->kp); 344 } 345 346 void arch_unoptimize_kprobes(struct list_head *oplist, 347 struct list_head *done_list) 348 { 349 struct optimized_kprobe *op; 350 struct optimized_kprobe *tmp; 351 352 list_for_each_entry_safe(op, tmp, oplist, list) { 353 arch_unoptimize_kprobe(op); 354 list_move(&op->list, done_list); 355 } 356 } 357 358 int arch_within_optimized_kprobe(struct optimized_kprobe *op, 359 unsigned long addr) 360 { 361 return ((unsigned long)op->kp.addr <= addr && 362 (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); 363 } 364