1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bpf_jit_comp64.c: eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <asm/kprobes.h> 17 #include <linux/bpf.h> 18 19 #include "bpf_jit64.h" 20 21 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 22 { 23 memset32(area, BREAKPOINT_INSTRUCTION, size/4); 24 } 25 26 static inline void bpf_flush_icache(void *start, void *end) 27 { 28 smp_wmb(); 29 flush_icache_range((unsigned long)start, (unsigned long)end); 30 } 31 32 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) 33 { 34 return (ctx->seen & (1 << (31 - b2p[i]))); 35 } 36 37 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) 38 { 39 ctx->seen |= (1 << (31 - b2p[i])); 40 } 41 42 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 43 { 44 /* 45 * We only need a stack frame if: 46 * - we call other functions (kernel helpers), or 47 * - the bpf program uses its stack area 48 * The latter condition is deduced from the usage of BPF_REG_FP 49 */ 50 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); 51 } 52 53 /* 54 * When not setting up our own stackframe, the redzone usage is: 55 * 56 * [ prev sp ] <------------- 57 * [ ... ] | 58 * sp (r1) ---> [ stack pointer ] -------------- 59 * [ nv gpr save area ] 6*8 60 * [ tail_call_cnt ] 8 61 * [ local_tmp_var ] 8 62 * [ unused red zone ] 208 bytes protected 63 */ 64 static int bpf_jit_stack_local(struct codegen_context *ctx) 65 { 66 if (bpf_has_stack_frame(ctx)) 67 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 68 else 69 return -(BPF_PPC_STACK_SAVE + 16); 70 } 71 72 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 73 { 74 return bpf_jit_stack_local(ctx) + 8; 75 } 76 77 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 78 { 79 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 80 return (bpf_has_stack_frame(ctx) ? 81 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) 82 - (8 * (32 - reg)); 83 84 pr_err("BPF JIT is asking about unknown registers"); 85 BUG(); 86 } 87 88 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 89 { 90 int i; 91 92 /* 93 * Initialize tail_call_cnt if we do tail calls. 94 * Otherwise, put in NOPs so that it can be skipped when we are 95 * invoked through a tail call. 96 */ 97 if (ctx->seen & SEEN_TAILCALL) { 98 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); 99 /* this goes in the redzone */ 100 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); 101 } else { 102 EMIT(PPC_RAW_NOP()); 103 EMIT(PPC_RAW_NOP()); 104 } 105 106 #define BPF_TAILCALL_PROLOGUE_SIZE 8 107 108 if (bpf_has_stack_frame(ctx)) { 109 /* 110 * We need a stack frame, but we don't necessarily need to 111 * save/restore LR unless we call other functions 112 */ 113 if (ctx->seen & SEEN_FUNC) { 114 EMIT(PPC_INST_MFLR | __PPC_RT(R0)); 115 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 116 } 117 118 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); 119 } 120 121 /* 122 * Back up non-volatile regs -- BPF registers 6-10 123 * If we haven't created our own stack frame, we save these 124 * in the protected zone below the previous stack frame 125 */ 126 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 127 if (bpf_is_seen_register(ctx, i)) 128 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 129 130 /* Setup frame pointer to point to the bpf stack area */ 131 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 132 EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, 133 STACK_FRAME_MIN_SIZE + ctx->stack_size)); 134 } 135 136 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 137 { 138 int i; 139 140 /* Restore NVRs */ 141 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 142 if (bpf_is_seen_register(ctx, i)) 143 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 144 145 /* Tear down our stack frame */ 146 if (bpf_has_stack_frame(ctx)) { 147 EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); 148 if (ctx->seen & SEEN_FUNC) { 149 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 150 EMIT(PPC_RAW_MTLR(0)); 151 } 152 } 153 } 154 155 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 156 { 157 bpf_jit_emit_common_epilogue(image, ctx); 158 159 /* Move result to r3 */ 160 EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); 161 162 EMIT(PPC_RAW_BLR()); 163 } 164 165 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, 166 u64 func) 167 { 168 #ifdef PPC64_ELF_ABI_v1 169 /* func points to the function descriptor */ 170 PPC_LI64(b2p[TMP_REG_2], func); 171 /* Load actual entry point from function descriptor */ 172 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); 173 /* ... and move it to LR */ 174 EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); 175 /* 176 * Load TOC from function descriptor at offset 8. 177 * We can clobber r2 since we get called through a 178 * function pointer (so caller will save/restore r2) 179 * and since we don't use a TOC ourself. 180 */ 181 PPC_BPF_LL(2, b2p[TMP_REG_2], 8); 182 #else 183 /* We can clobber r12 */ 184 PPC_FUNC_ADDR(12, func); 185 EMIT(PPC_RAW_MTLR(12)); 186 #endif 187 EMIT(PPC_RAW_BLRL()); 188 } 189 190 static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, 191 u64 func) 192 { 193 unsigned int i, ctx_idx = ctx->idx; 194 195 /* Load function address into r12 */ 196 PPC_LI64(12, func); 197 198 /* For bpf-to-bpf function calls, the callee's address is unknown 199 * until the last extra pass. As seen above, we use PPC_LI64() to 200 * load the callee's address, but this may optimize the number of 201 * instructions required based on the nature of the address. 202 * 203 * Since we don't want the number of instructions emitted to change, 204 * we pad the optimized PPC_LI64() call with NOPs to guarantee that 205 * we always have a five-instruction sequence, which is the maximum 206 * that PPC_LI64() can emit. 207 */ 208 for (i = ctx->idx - ctx_idx; i < 5; i++) 209 EMIT(PPC_RAW_NOP()); 210 211 #ifdef PPC64_ELF_ABI_v1 212 /* 213 * Load TOC from function descriptor at offset 8. 214 * We can clobber r2 since we get called through a 215 * function pointer (so caller will save/restore r2) 216 * and since we don't use a TOC ourself. 217 */ 218 PPC_BPF_LL(2, 12, 8); 219 /* Load actual entry point from function descriptor */ 220 PPC_BPF_LL(12, 12, 0); 221 #endif 222 223 EMIT(PPC_RAW_MTLR(12)); 224 EMIT(PPC_RAW_BLRL()); 225 } 226 227 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) 228 { 229 /* 230 * By now, the eBPF program has already setup parameters in r3, r4 and r5 231 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program 232 * r4/BPF_REG_2 - pointer to bpf_array 233 * r5/BPF_REG_3 - index in bpf_array 234 */ 235 int b2p_bpf_array = b2p[BPF_REG_2]; 236 int b2p_index = b2p[BPF_REG_3]; 237 238 /* 239 * if (index >= array->map.max_entries) 240 * goto out; 241 */ 242 EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); 243 EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); 244 EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); 245 PPC_BCC(COND_GE, out); 246 247 /* 248 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 249 * goto out; 250 */ 251 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 252 EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); 253 PPC_BCC(COND_GT, out); 254 255 /* 256 * tail_call_cnt++; 257 */ 258 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); 259 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 260 261 /* prog = array->ptrs[index]; */ 262 EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); 263 EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); 264 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 265 266 /* 267 * if (prog == NULL) 268 * goto out; 269 */ 270 EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); 271 PPC_BCC(COND_EQ, out); 272 273 /* goto *(prog->bpf_func + prologue_size); */ 274 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 275 #ifdef PPC64_ELF_ABI_v1 276 /* skip past the function descriptor */ 277 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 278 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); 279 #else 280 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); 281 #endif 282 EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); 283 284 /* tear down stack, restore NVRs, ... */ 285 bpf_jit_emit_common_epilogue(image, ctx); 286 287 EMIT(PPC_RAW_BCTR()); 288 /* out: */ 289 } 290 291 /* Assemble the body code between the prologue & epilogue */ 292 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 293 struct codegen_context *ctx, 294 u32 *addrs, bool extra_pass) 295 { 296 const struct bpf_insn *insn = fp->insnsi; 297 int flen = fp->len; 298 int i, ret; 299 300 /* Start of epilogue code - will only be valid 2nd pass onwards */ 301 u32 exit_addr = addrs[flen]; 302 303 for (i = 0; i < flen; i++) { 304 u32 code = insn[i].code; 305 u32 dst_reg = b2p[insn[i].dst_reg]; 306 u32 src_reg = b2p[insn[i].src_reg]; 307 s16 off = insn[i].off; 308 s32 imm = insn[i].imm; 309 bool func_addr_fixed; 310 u64 func_addr; 311 u64 imm64; 312 u32 true_cond; 313 u32 tmp_idx; 314 315 /* 316 * addrs[] maps a BPF bytecode address into a real offset from 317 * the start of the body code. 318 */ 319 addrs[i] = ctx->idx * 4; 320 321 /* 322 * As an optimization, we note down which non-volatile registers 323 * are used so that we can only save/restore those in our 324 * prologue and epilogue. We do this here regardless of whether 325 * the actual BPF instruction uses src/dst registers or not 326 * (for instance, BPF_CALL does not use them). The expectation 327 * is that those instructions will have src_reg/dst_reg set to 328 * 0. Even otherwise, we just lose some prologue/epilogue 329 * optimization but everything else should work without 330 * any issues. 331 */ 332 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) 333 bpf_set_seen_register(ctx, insn[i].dst_reg); 334 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) 335 bpf_set_seen_register(ctx, insn[i].src_reg); 336 337 switch (code) { 338 /* 339 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG 340 */ 341 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ 342 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ 343 EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); 344 goto bpf_alu32_trunc; 345 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ 346 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ 347 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); 348 goto bpf_alu32_trunc; 349 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ 350 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ 351 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ 352 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ 353 if (BPF_OP(code) == BPF_SUB) 354 imm = -imm; 355 if (imm) { 356 if (imm >= -32768 && imm < 32768) 357 EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); 358 else { 359 PPC_LI32(b2p[TMP_REG_1], imm); 360 EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); 361 } 362 } 363 goto bpf_alu32_trunc; 364 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ 365 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ 366 if (BPF_CLASS(code) == BPF_ALU) 367 EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); 368 else 369 EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg)); 370 goto bpf_alu32_trunc; 371 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ 372 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ 373 if (imm >= -32768 && imm < 32768) 374 EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); 375 else { 376 PPC_LI32(b2p[TMP_REG_1], imm); 377 if (BPF_CLASS(code) == BPF_ALU) 378 EMIT(PPC_RAW_MULW(dst_reg, dst_reg, 379 b2p[TMP_REG_1])); 380 else 381 EMIT(PPC_RAW_MULD(dst_reg, dst_reg, 382 b2p[TMP_REG_1])); 383 } 384 goto bpf_alu32_trunc; 385 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ 386 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ 387 if (BPF_OP(code) == BPF_MOD) { 388 EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); 389 EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, 390 b2p[TMP_REG_1])); 391 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); 392 } else 393 EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); 394 goto bpf_alu32_trunc; 395 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ 396 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ 397 if (BPF_OP(code) == BPF_MOD) { 398 EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); 399 EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, 400 b2p[TMP_REG_1])); 401 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); 402 } else 403 EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); 404 break; 405 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ 406 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ 407 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ 408 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ 409 if (imm == 0) 410 return -EINVAL; 411 else if (imm == 1) 412 goto bpf_alu32_trunc; 413 414 PPC_LI32(b2p[TMP_REG_1], imm); 415 switch (BPF_CLASS(code)) { 416 case BPF_ALU: 417 if (BPF_OP(code) == BPF_MOD) { 418 EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], 419 dst_reg, 420 b2p[TMP_REG_1])); 421 EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], 422 b2p[TMP_REG_1], 423 b2p[TMP_REG_2])); 424 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, 425 b2p[TMP_REG_1])); 426 } else 427 EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, 428 b2p[TMP_REG_1])); 429 break; 430 case BPF_ALU64: 431 if (BPF_OP(code) == BPF_MOD) { 432 EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], 433 dst_reg, 434 b2p[TMP_REG_1])); 435 EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], 436 b2p[TMP_REG_1], 437 b2p[TMP_REG_2])); 438 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, 439 b2p[TMP_REG_1])); 440 } else 441 EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, 442 b2p[TMP_REG_1])); 443 break; 444 } 445 goto bpf_alu32_trunc; 446 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ 447 case BPF_ALU64 | BPF_NEG: /* dst = -dst */ 448 EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); 449 goto bpf_alu32_trunc; 450 451 /* 452 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH 453 */ 454 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ 455 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ 456 EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); 457 goto bpf_alu32_trunc; 458 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ 459 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 460 if (!IMM_H(imm)) 461 EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); 462 else { 463 /* Sign-extended */ 464 PPC_LI32(b2p[TMP_REG_1], imm); 465 EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); 466 } 467 goto bpf_alu32_trunc; 468 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ 469 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ 470 EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); 471 goto bpf_alu32_trunc; 472 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ 473 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ 474 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 475 /* Sign-extended */ 476 PPC_LI32(b2p[TMP_REG_1], imm); 477 EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); 478 } else { 479 if (IMM_L(imm)) 480 EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); 481 if (IMM_H(imm)) 482 EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); 483 } 484 goto bpf_alu32_trunc; 485 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ 486 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ 487 EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); 488 goto bpf_alu32_trunc; 489 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ 490 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ 491 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 492 /* Sign-extended */ 493 PPC_LI32(b2p[TMP_REG_1], imm); 494 EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); 495 } else { 496 if (IMM_L(imm)) 497 EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); 498 if (IMM_H(imm)) 499 EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); 500 } 501 goto bpf_alu32_trunc; 502 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 503 /* slw clears top 32 bits */ 504 EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); 505 /* skip zero extension move, but set address map. */ 506 if (insn_is_zext(&insn[i + 1])) 507 addrs[++i] = ctx->idx * 4; 508 break; 509 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 510 EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg)); 511 break; 512 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 513 /* with imm 0, we still need to clear top 32 bits */ 514 EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); 515 if (insn_is_zext(&insn[i + 1])) 516 addrs[++i] = ctx->idx * 4; 517 break; 518 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 519 if (imm != 0) 520 EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm)); 521 break; 522 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 523 EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); 524 if (insn_is_zext(&insn[i + 1])) 525 addrs[++i] = ctx->idx * 4; 526 break; 527 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 528 EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg)); 529 break; 530 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 531 EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); 532 if (insn_is_zext(&insn[i + 1])) 533 addrs[++i] = ctx->idx * 4; 534 break; 535 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 536 if (imm != 0) 537 EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm)); 538 break; 539 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ 540 EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); 541 goto bpf_alu32_trunc; 542 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ 543 EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg)); 544 break; 545 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ 546 EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); 547 goto bpf_alu32_trunc; 548 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ 549 if (imm != 0) 550 EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm)); 551 break; 552 553 /* 554 * MOV 555 */ 556 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 557 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 558 if (imm == 1) { 559 /* special mov32 for zext */ 560 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); 561 break; 562 } 563 EMIT(PPC_RAW_MR(dst_reg, src_reg)); 564 goto bpf_alu32_trunc; 565 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 566 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ 567 PPC_LI32(dst_reg, imm); 568 if (imm < 0) 569 goto bpf_alu32_trunc; 570 else if (insn_is_zext(&insn[i + 1])) 571 addrs[++i] = ctx->idx * 4; 572 break; 573 574 bpf_alu32_trunc: 575 /* Truncate to 32-bits */ 576 if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) 577 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); 578 break; 579 580 /* 581 * BPF_FROM_BE/LE 582 */ 583 case BPF_ALU | BPF_END | BPF_FROM_LE: 584 case BPF_ALU | BPF_END | BPF_FROM_BE: 585 #ifdef __BIG_ENDIAN__ 586 if (BPF_SRC(code) == BPF_FROM_BE) 587 goto emit_clear; 588 #else /* !__BIG_ENDIAN__ */ 589 if (BPF_SRC(code) == BPF_FROM_LE) 590 goto emit_clear; 591 #endif 592 switch (imm) { 593 case 16: 594 /* Rotate 8 bits left & mask with 0x0000ff00 */ 595 EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); 596 /* Rotate 8 bits right & insert LSB to reg */ 597 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); 598 /* Move result back to dst_reg */ 599 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 600 break; 601 case 32: 602 /* 603 * Rotate word left by 8 bits: 604 * 2 bytes are already in their final position 605 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) 606 */ 607 EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); 608 /* Rotate 24 bits and insert byte 1 */ 609 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); 610 /* Rotate 24 bits and insert byte 3 */ 611 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); 612 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 613 break; 614 case 64: 615 /* 616 * Way easier and faster(?) to store the value 617 * into stack and then use ldbrx 618 * 619 * ctx->seen will be reliable in pass2, but 620 * the instructions generated will remain the 621 * same across all passes 622 */ 623 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 624 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); 625 EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); 626 break; 627 } 628 break; 629 630 emit_clear: 631 switch (imm) { 632 case 16: 633 /* zero-extend 16 bits into 64 bits */ 634 EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48)); 635 if (insn_is_zext(&insn[i + 1])) 636 addrs[++i] = ctx->idx * 4; 637 break; 638 case 32: 639 if (!fp->aux->verifier_zext) 640 /* zero-extend 32 bits into 64 bits */ 641 EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32)); 642 break; 643 case 64: 644 /* nop */ 645 break; 646 } 647 break; 648 649 /* 650 * BPF_ST(X) 651 */ 652 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ 653 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ 654 if (BPF_CLASS(code) == BPF_ST) { 655 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); 656 src_reg = b2p[TMP_REG_1]; 657 } 658 EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); 659 break; 660 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ 661 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ 662 if (BPF_CLASS(code) == BPF_ST) { 663 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); 664 src_reg = b2p[TMP_REG_1]; 665 } 666 EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); 667 break; 668 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ 669 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ 670 if (BPF_CLASS(code) == BPF_ST) { 671 PPC_LI32(b2p[TMP_REG_1], imm); 672 src_reg = b2p[TMP_REG_1]; 673 } 674 EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); 675 break; 676 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ 677 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ 678 if (BPF_CLASS(code) == BPF_ST) { 679 PPC_LI32(b2p[TMP_REG_1], imm); 680 src_reg = b2p[TMP_REG_1]; 681 } 682 PPC_BPF_STL(src_reg, dst_reg, off); 683 break; 684 685 /* 686 * BPF_STX XADD (atomic_add) 687 */ 688 /* *(u32 *)(dst + off) += src */ 689 case BPF_STX | BPF_XADD | BPF_W: 690 /* Get EA into TMP_REG_1 */ 691 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); 692 tmp_idx = ctx->idx * 4; 693 /* load value from memory into TMP_REG_2 */ 694 EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); 695 /* add value from src_reg into this */ 696 EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); 697 /* store result back */ 698 EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); 699 /* we're done if this succeeded */ 700 PPC_BCC_SHORT(COND_NE, tmp_idx); 701 break; 702 /* *(u64 *)(dst + off) += src */ 703 case BPF_STX | BPF_XADD | BPF_DW: 704 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); 705 tmp_idx = ctx->idx * 4; 706 EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); 707 EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); 708 EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); 709 PPC_BCC_SHORT(COND_NE, tmp_idx); 710 break; 711 712 /* 713 * BPF_LDX 714 */ 715 /* dst = *(u8 *)(ul) (src + off) */ 716 case BPF_LDX | BPF_MEM | BPF_B: 717 EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 718 if (insn_is_zext(&insn[i + 1])) 719 addrs[++i] = ctx->idx * 4; 720 break; 721 /* dst = *(u16 *)(ul) (src + off) */ 722 case BPF_LDX | BPF_MEM | BPF_H: 723 EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 724 if (insn_is_zext(&insn[i + 1])) 725 addrs[++i] = ctx->idx * 4; 726 break; 727 /* dst = *(u32 *)(ul) (src + off) */ 728 case BPF_LDX | BPF_MEM | BPF_W: 729 EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 730 if (insn_is_zext(&insn[i + 1])) 731 addrs[++i] = ctx->idx * 4; 732 break; 733 /* dst = *(u64 *)(ul) (src + off) */ 734 case BPF_LDX | BPF_MEM | BPF_DW: 735 PPC_BPF_LL(dst_reg, src_reg, off); 736 break; 737 738 /* 739 * Doubleword load 740 * 16 byte instruction that uses two 'struct bpf_insn' 741 */ 742 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 743 imm64 = ((u64)(u32) insn[i].imm) | 744 (((u64)(u32) insn[i+1].imm) << 32); 745 /* Adjust for two bpf instructions */ 746 addrs[++i] = ctx->idx * 4; 747 PPC_LI64(dst_reg, imm64); 748 break; 749 750 /* 751 * Return/Exit 752 */ 753 case BPF_JMP | BPF_EXIT: 754 /* 755 * If this isn't the very last instruction, branch to 756 * the epilogue. If we _are_ the last instruction, 757 * we'll just fall through to the epilogue. 758 */ 759 if (i != flen - 1) 760 PPC_JMP(exit_addr); 761 /* else fall through to the epilogue */ 762 break; 763 764 /* 765 * Call kernel helper or bpf function 766 */ 767 case BPF_JMP | BPF_CALL: 768 ctx->seen |= SEEN_FUNC; 769 770 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 771 &func_addr, &func_addr_fixed); 772 if (ret < 0) 773 return ret; 774 775 if (func_addr_fixed) 776 bpf_jit_emit_func_call_hlp(image, ctx, func_addr); 777 else 778 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 779 /* move return value from r3 to BPF_REG_0 */ 780 EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); 781 break; 782 783 /* 784 * Jumps and branches 785 */ 786 case BPF_JMP | BPF_JA: 787 PPC_JMP(addrs[i + 1 + off]); 788 break; 789 790 case BPF_JMP | BPF_JGT | BPF_K: 791 case BPF_JMP | BPF_JGT | BPF_X: 792 case BPF_JMP | BPF_JSGT | BPF_K: 793 case BPF_JMP | BPF_JSGT | BPF_X: 794 case BPF_JMP32 | BPF_JGT | BPF_K: 795 case BPF_JMP32 | BPF_JGT | BPF_X: 796 case BPF_JMP32 | BPF_JSGT | BPF_K: 797 case BPF_JMP32 | BPF_JSGT | BPF_X: 798 true_cond = COND_GT; 799 goto cond_branch; 800 case BPF_JMP | BPF_JLT | BPF_K: 801 case BPF_JMP | BPF_JLT | BPF_X: 802 case BPF_JMP | BPF_JSLT | BPF_K: 803 case BPF_JMP | BPF_JSLT | BPF_X: 804 case BPF_JMP32 | BPF_JLT | BPF_K: 805 case BPF_JMP32 | BPF_JLT | BPF_X: 806 case BPF_JMP32 | BPF_JSLT | BPF_K: 807 case BPF_JMP32 | BPF_JSLT | BPF_X: 808 true_cond = COND_LT; 809 goto cond_branch; 810 case BPF_JMP | BPF_JGE | BPF_K: 811 case BPF_JMP | BPF_JGE | BPF_X: 812 case BPF_JMP | BPF_JSGE | BPF_K: 813 case BPF_JMP | BPF_JSGE | BPF_X: 814 case BPF_JMP32 | BPF_JGE | BPF_K: 815 case BPF_JMP32 | BPF_JGE | BPF_X: 816 case BPF_JMP32 | BPF_JSGE | BPF_K: 817 case BPF_JMP32 | BPF_JSGE | BPF_X: 818 true_cond = COND_GE; 819 goto cond_branch; 820 case BPF_JMP | BPF_JLE | BPF_K: 821 case BPF_JMP | BPF_JLE | BPF_X: 822 case BPF_JMP | BPF_JSLE | BPF_K: 823 case BPF_JMP | BPF_JSLE | BPF_X: 824 case BPF_JMP32 | BPF_JLE | BPF_K: 825 case BPF_JMP32 | BPF_JLE | BPF_X: 826 case BPF_JMP32 | BPF_JSLE | BPF_K: 827 case BPF_JMP32 | BPF_JSLE | BPF_X: 828 true_cond = COND_LE; 829 goto cond_branch; 830 case BPF_JMP | BPF_JEQ | BPF_K: 831 case BPF_JMP | BPF_JEQ | BPF_X: 832 case BPF_JMP32 | BPF_JEQ | BPF_K: 833 case BPF_JMP32 | BPF_JEQ | BPF_X: 834 true_cond = COND_EQ; 835 goto cond_branch; 836 case BPF_JMP | BPF_JNE | BPF_K: 837 case BPF_JMP | BPF_JNE | BPF_X: 838 case BPF_JMP32 | BPF_JNE | BPF_K: 839 case BPF_JMP32 | BPF_JNE | BPF_X: 840 true_cond = COND_NE; 841 goto cond_branch; 842 case BPF_JMP | BPF_JSET | BPF_K: 843 case BPF_JMP | BPF_JSET | BPF_X: 844 case BPF_JMP32 | BPF_JSET | BPF_K: 845 case BPF_JMP32 | BPF_JSET | BPF_X: 846 true_cond = COND_NE; 847 /* Fall through */ 848 849 cond_branch: 850 switch (code) { 851 case BPF_JMP | BPF_JGT | BPF_X: 852 case BPF_JMP | BPF_JLT | BPF_X: 853 case BPF_JMP | BPF_JGE | BPF_X: 854 case BPF_JMP | BPF_JLE | BPF_X: 855 case BPF_JMP | BPF_JEQ | BPF_X: 856 case BPF_JMP | BPF_JNE | BPF_X: 857 case BPF_JMP32 | BPF_JGT | BPF_X: 858 case BPF_JMP32 | BPF_JLT | BPF_X: 859 case BPF_JMP32 | BPF_JGE | BPF_X: 860 case BPF_JMP32 | BPF_JLE | BPF_X: 861 case BPF_JMP32 | BPF_JEQ | BPF_X: 862 case BPF_JMP32 | BPF_JNE | BPF_X: 863 /* unsigned comparison */ 864 if (BPF_CLASS(code) == BPF_JMP32) 865 EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); 866 else 867 EMIT(PPC_RAW_CMPLD(dst_reg, src_reg)); 868 break; 869 case BPF_JMP | BPF_JSGT | BPF_X: 870 case BPF_JMP | BPF_JSLT | BPF_X: 871 case BPF_JMP | BPF_JSGE | BPF_X: 872 case BPF_JMP | BPF_JSLE | BPF_X: 873 case BPF_JMP32 | BPF_JSGT | BPF_X: 874 case BPF_JMP32 | BPF_JSLT | BPF_X: 875 case BPF_JMP32 | BPF_JSGE | BPF_X: 876 case BPF_JMP32 | BPF_JSLE | BPF_X: 877 /* signed comparison */ 878 if (BPF_CLASS(code) == BPF_JMP32) 879 EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); 880 else 881 EMIT(PPC_RAW_CMPD(dst_reg, src_reg)); 882 break; 883 case BPF_JMP | BPF_JSET | BPF_X: 884 case BPF_JMP32 | BPF_JSET | BPF_X: 885 if (BPF_CLASS(code) == BPF_JMP) { 886 EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, 887 src_reg)); 888 } else { 889 int tmp_reg = b2p[TMP_REG_1]; 890 891 EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); 892 EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, 893 31)); 894 } 895 break; 896 case BPF_JMP | BPF_JNE | BPF_K: 897 case BPF_JMP | BPF_JEQ | BPF_K: 898 case BPF_JMP | BPF_JGT | BPF_K: 899 case BPF_JMP | BPF_JLT | BPF_K: 900 case BPF_JMP | BPF_JGE | BPF_K: 901 case BPF_JMP | BPF_JLE | BPF_K: 902 case BPF_JMP32 | BPF_JNE | BPF_K: 903 case BPF_JMP32 | BPF_JEQ | BPF_K: 904 case BPF_JMP32 | BPF_JGT | BPF_K: 905 case BPF_JMP32 | BPF_JLT | BPF_K: 906 case BPF_JMP32 | BPF_JGE | BPF_K: 907 case BPF_JMP32 | BPF_JLE | BPF_K: 908 { 909 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 910 911 /* 912 * Need sign-extended load, so only positive 913 * values can be used as imm in cmpldi 914 */ 915 if (imm >= 0 && imm < 32768) { 916 if (is_jmp32) 917 EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); 918 else 919 EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); 920 } else { 921 /* sign-extending load */ 922 PPC_LI32(b2p[TMP_REG_1], imm); 923 /* ... but unsigned comparison */ 924 if (is_jmp32) 925 EMIT(PPC_RAW_CMPLW(dst_reg, 926 b2p[TMP_REG_1])); 927 else 928 EMIT(PPC_RAW_CMPLD(dst_reg, 929 b2p[TMP_REG_1])); 930 } 931 break; 932 } 933 case BPF_JMP | BPF_JSGT | BPF_K: 934 case BPF_JMP | BPF_JSLT | BPF_K: 935 case BPF_JMP | BPF_JSGE | BPF_K: 936 case BPF_JMP | BPF_JSLE | BPF_K: 937 case BPF_JMP32 | BPF_JSGT | BPF_K: 938 case BPF_JMP32 | BPF_JSLT | BPF_K: 939 case BPF_JMP32 | BPF_JSGE | BPF_K: 940 case BPF_JMP32 | BPF_JSLE | BPF_K: 941 { 942 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 943 944 /* 945 * signed comparison, so any 16-bit value 946 * can be used in cmpdi 947 */ 948 if (imm >= -32768 && imm < 32768) { 949 if (is_jmp32) 950 EMIT(PPC_RAW_CMPWI(dst_reg, imm)); 951 else 952 EMIT(PPC_RAW_CMPDI(dst_reg, imm)); 953 } else { 954 PPC_LI32(b2p[TMP_REG_1], imm); 955 if (is_jmp32) 956 EMIT(PPC_RAW_CMPW(dst_reg, 957 b2p[TMP_REG_1])); 958 else 959 EMIT(PPC_RAW_CMPD(dst_reg, 960 b2p[TMP_REG_1])); 961 } 962 break; 963 } 964 case BPF_JMP | BPF_JSET | BPF_K: 965 case BPF_JMP32 | BPF_JSET | BPF_K: 966 /* andi does not sign-extend the immediate */ 967 if (imm >= 0 && imm < 32768) 968 /* PPC_ANDI is _only/always_ dot-form */ 969 EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); 970 else { 971 int tmp_reg = b2p[TMP_REG_1]; 972 973 PPC_LI32(tmp_reg, imm); 974 if (BPF_CLASS(code) == BPF_JMP) { 975 EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, 976 tmp_reg)); 977 } else { 978 EMIT(PPC_RAW_AND(tmp_reg, dst_reg, 979 tmp_reg)); 980 EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 981 0, 0, 31)); 982 } 983 } 984 break; 985 } 986 PPC_BCC(true_cond, addrs[i + 1 + off]); 987 break; 988 989 /* 990 * Tail call 991 */ 992 case BPF_JMP | BPF_TAIL_CALL: 993 ctx->seen |= SEEN_TAILCALL; 994 bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); 995 break; 996 997 default: 998 /* 999 * The filter contains something cruel & unusual. 1000 * We don't handle it, but also there shouldn't be 1001 * anything missing from our list. 1002 */ 1003 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", 1004 code, i); 1005 return -ENOTSUPP; 1006 } 1007 } 1008 1009 /* Set end-of-body-code address for exit. */ 1010 addrs[i] = ctx->idx * 4; 1011 1012 return 0; 1013 } 1014 1015 /* Fix the branch target addresses for subprog calls */ 1016 static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, 1017 struct codegen_context *ctx, u32 *addrs) 1018 { 1019 const struct bpf_insn *insn = fp->insnsi; 1020 bool func_addr_fixed; 1021 u64 func_addr; 1022 u32 tmp_idx; 1023 int i, ret; 1024 1025 for (i = 0; i < fp->len; i++) { 1026 /* 1027 * During the extra pass, only the branch target addresses for 1028 * the subprog calls need to be fixed. All other instructions 1029 * can left untouched. 1030 * 1031 * The JITed image length does not change because we already 1032 * ensure that the JITed instruction sequence for these calls 1033 * are of fixed length by padding them with NOPs. 1034 */ 1035 if (insn[i].code == (BPF_JMP | BPF_CALL) && 1036 insn[i].src_reg == BPF_PSEUDO_CALL) { 1037 ret = bpf_jit_get_func_addr(fp, &insn[i], true, 1038 &func_addr, 1039 &func_addr_fixed); 1040 if (ret < 0) 1041 return ret; 1042 1043 /* 1044 * Save ctx->idx as this would currently point to the 1045 * end of the JITed image and set it to the offset of 1046 * the instruction sequence corresponding to the 1047 * subprog call temporarily. 1048 */ 1049 tmp_idx = ctx->idx; 1050 ctx->idx = addrs[i] / 4; 1051 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 1052 1053 /* 1054 * Restore ctx->idx here. This is safe as the length 1055 * of the JITed sequence remains unchanged. 1056 */ 1057 ctx->idx = tmp_idx; 1058 } 1059 } 1060 1061 return 0; 1062 } 1063 1064 struct powerpc64_jit_data { 1065 struct bpf_binary_header *header; 1066 u32 *addrs; 1067 u8 *image; 1068 u32 proglen; 1069 struct codegen_context ctx; 1070 }; 1071 1072 bool bpf_jit_needs_zext(void) 1073 { 1074 return true; 1075 } 1076 1077 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 1078 { 1079 u32 proglen; 1080 u32 alloclen; 1081 u8 *image = NULL; 1082 u32 *code_base; 1083 u32 *addrs; 1084 struct powerpc64_jit_data *jit_data; 1085 struct codegen_context cgctx; 1086 int pass; 1087 int flen; 1088 struct bpf_binary_header *bpf_hdr; 1089 struct bpf_prog *org_fp = fp; 1090 struct bpf_prog *tmp_fp; 1091 bool bpf_blinded = false; 1092 bool extra_pass = false; 1093 1094 if (!fp->jit_requested) 1095 return org_fp; 1096 1097 tmp_fp = bpf_jit_blind_constants(org_fp); 1098 if (IS_ERR(tmp_fp)) 1099 return org_fp; 1100 1101 if (tmp_fp != org_fp) { 1102 bpf_blinded = true; 1103 fp = tmp_fp; 1104 } 1105 1106 jit_data = fp->aux->jit_data; 1107 if (!jit_data) { 1108 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1109 if (!jit_data) { 1110 fp = org_fp; 1111 goto out; 1112 } 1113 fp->aux->jit_data = jit_data; 1114 } 1115 1116 flen = fp->len; 1117 addrs = jit_data->addrs; 1118 if (addrs) { 1119 cgctx = jit_data->ctx; 1120 image = jit_data->image; 1121 bpf_hdr = jit_data->header; 1122 proglen = jit_data->proglen; 1123 alloclen = proglen + FUNCTION_DESCR_SIZE; 1124 extra_pass = true; 1125 goto skip_init_ctx; 1126 } 1127 1128 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 1129 if (addrs == NULL) { 1130 fp = org_fp; 1131 goto out_addrs; 1132 } 1133 1134 memset(&cgctx, 0, sizeof(struct codegen_context)); 1135 1136 /* Make sure that the stack is quadword aligned. */ 1137 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 1138 1139 /* Scouting faux-generate pass 0 */ 1140 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1141 /* We hit something illegal or unsupported. */ 1142 fp = org_fp; 1143 goto out_addrs; 1144 } 1145 1146 /* 1147 * If we have seen a tail call, we need a second pass. 1148 * This is because bpf_jit_emit_common_epilogue() is called 1149 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. 1150 */ 1151 if (cgctx.seen & SEEN_TAILCALL) { 1152 cgctx.idx = 0; 1153 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1154 fp = org_fp; 1155 goto out_addrs; 1156 } 1157 } 1158 1159 /* 1160 * Pretend to build prologue, given the features we've seen. This will 1161 * update ctgtx.idx as it pretends to output instructions, then we can 1162 * calculate total size from idx. 1163 */ 1164 bpf_jit_build_prologue(0, &cgctx); 1165 bpf_jit_build_epilogue(0, &cgctx); 1166 1167 proglen = cgctx.idx * 4; 1168 alloclen = proglen + FUNCTION_DESCR_SIZE; 1169 1170 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, 1171 bpf_jit_fill_ill_insns); 1172 if (!bpf_hdr) { 1173 fp = org_fp; 1174 goto out_addrs; 1175 } 1176 1177 skip_init_ctx: 1178 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 1179 1180 if (extra_pass) { 1181 /* 1182 * Do not touch the prologue and epilogue as they will remain 1183 * unchanged. Only fix the branch target address for subprog 1184 * calls in the body. 1185 * 1186 * This does not change the offsets and lengths of the subprog 1187 * call instruction sequences and hence, the size of the JITed 1188 * image as well. 1189 */ 1190 bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); 1191 1192 /* There is no need to perform the usual passes. */ 1193 goto skip_codegen_passes; 1194 } 1195 1196 /* Code generation passes 1-2 */ 1197 for (pass = 1; pass < 3; pass++) { 1198 /* Now build the prologue, body code & epilogue for real. */ 1199 cgctx.idx = 0; 1200 bpf_jit_build_prologue(code_base, &cgctx); 1201 bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); 1202 bpf_jit_build_epilogue(code_base, &cgctx); 1203 1204 if (bpf_jit_enable > 1) 1205 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 1206 proglen - (cgctx.idx * 4), cgctx.seen); 1207 } 1208 1209 skip_codegen_passes: 1210 if (bpf_jit_enable > 1) 1211 /* 1212 * Note that we output the base address of the code_base 1213 * rather than image, since opcodes are in code_base. 1214 */ 1215 bpf_jit_dump(flen, proglen, pass, code_base); 1216 1217 #ifdef PPC64_ELF_ABI_v1 1218 /* Function descriptor nastiness: Address + TOC */ 1219 ((u64 *)image)[0] = (u64)code_base; 1220 ((u64 *)image)[1] = local_paca->kernel_toc; 1221 #endif 1222 1223 fp->bpf_func = (void *)image; 1224 fp->jited = 1; 1225 fp->jited_len = alloclen; 1226 1227 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 1228 if (!fp->is_func || extra_pass) { 1229 bpf_prog_fill_jited_linfo(fp, addrs); 1230 out_addrs: 1231 kfree(addrs); 1232 kfree(jit_data); 1233 fp->aux->jit_data = NULL; 1234 } else { 1235 jit_data->addrs = addrs; 1236 jit_data->ctx = cgctx; 1237 jit_data->proglen = proglen; 1238 jit_data->image = image; 1239 jit_data->header = bpf_hdr; 1240 } 1241 1242 out: 1243 if (bpf_blinded) 1244 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 1245 1246 return fp; 1247 } 1248 1249 /* Overriding bpf_jit_free() as we don't set images read-only. */ 1250 void bpf_jit_free(struct bpf_prog *fp) 1251 { 1252 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 1253 struct bpf_binary_header *bpf_hdr = (void *)addr; 1254 1255 if (fp->jited) 1256 bpf_jit_binary_free(bpf_hdr); 1257 1258 bpf_prog_unlock_free(fp); 1259 } 1260