1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bpf_jit_comp64.c: eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <asm/kprobes.h> 17 #include <linux/bpf.h> 18 19 #include "bpf_jit64.h" 20 21 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 22 { 23 memset32(area, BREAKPOINT_INSTRUCTION, size/4); 24 } 25 26 static inline void bpf_flush_icache(void *start, void *end) 27 { 28 smp_wmb(); 29 flush_icache_range((unsigned long)start, (unsigned long)end); 30 } 31 32 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) 33 { 34 return (ctx->seen & (1 << (31 - b2p[i]))); 35 } 36 37 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) 38 { 39 ctx->seen |= (1 << (31 - b2p[i])); 40 } 41 42 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 43 { 44 /* 45 * We only need a stack frame if: 46 * - we call other functions (kernel helpers), or 47 * - the bpf program uses its stack area 48 * The latter condition is deduced from the usage of BPF_REG_FP 49 */ 50 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); 51 } 52 53 /* 54 * When not setting up our own stackframe, the redzone usage is: 55 * 56 * [ prev sp ] <------------- 57 * [ ... ] | 58 * sp (r1) ---> [ stack pointer ] -------------- 59 * [ nv gpr save area ] 6*8 60 * [ tail_call_cnt ] 8 61 * [ local_tmp_var ] 8 62 * [ unused red zone ] 208 bytes protected 63 */ 64 static int bpf_jit_stack_local(struct codegen_context *ctx) 65 { 66 if (bpf_has_stack_frame(ctx)) 67 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 68 else 69 return -(BPF_PPC_STACK_SAVE + 16); 70 } 71 72 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 73 { 74 return bpf_jit_stack_local(ctx) + 8; 75 } 76 77 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 78 { 79 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 80 return (bpf_has_stack_frame(ctx) ? 81 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) 82 - (8 * (32 - reg)); 83 84 pr_err("BPF JIT is asking about unknown registers"); 85 BUG(); 86 } 87 88 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 89 { 90 int i; 91 92 /* 93 * Initialize tail_call_cnt if we do tail calls. 94 * Otherwise, put in NOPs so that it can be skipped when we are 95 * invoked through a tail call. 96 */ 97 if (ctx->seen & SEEN_TAILCALL) { 98 PPC_LI(b2p[TMP_REG_1], 0); 99 /* this goes in the redzone */ 100 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); 101 } else { 102 PPC_NOP(); 103 PPC_NOP(); 104 } 105 106 #define BPF_TAILCALL_PROLOGUE_SIZE 8 107 108 if (bpf_has_stack_frame(ctx)) { 109 /* 110 * We need a stack frame, but we don't necessarily need to 111 * save/restore LR unless we call other functions 112 */ 113 if (ctx->seen & SEEN_FUNC) { 114 EMIT(PPC_INST_MFLR | __PPC_RT(R0)); 115 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 116 } 117 118 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); 119 } 120 121 /* 122 * Back up non-volatile regs -- BPF registers 6-10 123 * If we haven't created our own stack frame, we save these 124 * in the protected zone below the previous stack frame 125 */ 126 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 127 if (bpf_is_seen_register(ctx, i)) 128 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 129 130 /* Setup frame pointer to point to the bpf stack area */ 131 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 132 PPC_ADDI(b2p[BPF_REG_FP], 1, 133 STACK_FRAME_MIN_SIZE + ctx->stack_size); 134 } 135 136 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 137 { 138 int i; 139 140 /* Restore NVRs */ 141 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 142 if (bpf_is_seen_register(ctx, i)) 143 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 144 145 /* Tear down our stack frame */ 146 if (bpf_has_stack_frame(ctx)) { 147 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); 148 if (ctx->seen & SEEN_FUNC) { 149 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 150 PPC_MTLR(0); 151 } 152 } 153 } 154 155 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 156 { 157 bpf_jit_emit_common_epilogue(image, ctx); 158 159 /* Move result to r3 */ 160 PPC_MR(3, b2p[BPF_REG_0]); 161 162 PPC_BLR(); 163 } 164 165 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, 166 u64 func) 167 { 168 #ifdef PPC64_ELF_ABI_v1 169 /* func points to the function descriptor */ 170 PPC_LI64(b2p[TMP_REG_2], func); 171 /* Load actual entry point from function descriptor */ 172 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); 173 /* ... and move it to LR */ 174 PPC_MTLR(b2p[TMP_REG_1]); 175 /* 176 * Load TOC from function descriptor at offset 8. 177 * We can clobber r2 since we get called through a 178 * function pointer (so caller will save/restore r2) 179 * and since we don't use a TOC ourself. 180 */ 181 PPC_BPF_LL(2, b2p[TMP_REG_2], 8); 182 #else 183 /* We can clobber r12 */ 184 PPC_FUNC_ADDR(12, func); 185 PPC_MTLR(12); 186 #endif 187 PPC_BLRL(); 188 } 189 190 static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, 191 u64 func) 192 { 193 unsigned int i, ctx_idx = ctx->idx; 194 195 /* Load function address into r12 */ 196 PPC_LI64(12, func); 197 198 /* For bpf-to-bpf function calls, the callee's address is unknown 199 * until the last extra pass. As seen above, we use PPC_LI64() to 200 * load the callee's address, but this may optimize the number of 201 * instructions required based on the nature of the address. 202 * 203 * Since we don't want the number of instructions emitted to change, 204 * we pad the optimized PPC_LI64() call with NOPs to guarantee that 205 * we always have a five-instruction sequence, which is the maximum 206 * that PPC_LI64() can emit. 207 */ 208 for (i = ctx->idx - ctx_idx; i < 5; i++) 209 PPC_NOP(); 210 211 #ifdef PPC64_ELF_ABI_v1 212 /* 213 * Load TOC from function descriptor at offset 8. 214 * We can clobber r2 since we get called through a 215 * function pointer (so caller will save/restore r2) 216 * and since we don't use a TOC ourself. 217 */ 218 PPC_BPF_LL(2, 12, 8); 219 /* Load actual entry point from function descriptor */ 220 PPC_BPF_LL(12, 12, 0); 221 #endif 222 223 PPC_MTLR(12); 224 PPC_BLRL(); 225 } 226 227 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) 228 { 229 /* 230 * By now, the eBPF program has already setup parameters in r3, r4 and r5 231 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program 232 * r4/BPF_REG_2 - pointer to bpf_array 233 * r5/BPF_REG_3 - index in bpf_array 234 */ 235 int b2p_bpf_array = b2p[BPF_REG_2]; 236 int b2p_index = b2p[BPF_REG_3]; 237 238 /* 239 * if (index >= array->map.max_entries) 240 * goto out; 241 */ 242 PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); 243 PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); 244 PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); 245 PPC_BCC(COND_GE, out); 246 247 /* 248 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 249 * goto out; 250 */ 251 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 252 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); 253 PPC_BCC(COND_GT, out); 254 255 /* 256 * tail_call_cnt++; 257 */ 258 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); 259 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 260 261 /* prog = array->ptrs[index]; */ 262 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); 263 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); 264 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 265 266 /* 267 * if (prog == NULL) 268 * goto out; 269 */ 270 PPC_CMPLDI(b2p[TMP_REG_1], 0); 271 PPC_BCC(COND_EQ, out); 272 273 /* goto *(prog->bpf_func + prologue_size); */ 274 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 275 #ifdef PPC64_ELF_ABI_v1 276 /* skip past the function descriptor */ 277 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 278 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); 279 #else 280 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); 281 #endif 282 PPC_MTCTR(b2p[TMP_REG_1]); 283 284 /* tear down stack, restore NVRs, ... */ 285 bpf_jit_emit_common_epilogue(image, ctx); 286 287 PPC_BCTR(); 288 /* out: */ 289 } 290 291 /* Assemble the body code between the prologue & epilogue */ 292 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 293 struct codegen_context *ctx, 294 u32 *addrs, bool extra_pass) 295 { 296 const struct bpf_insn *insn = fp->insnsi; 297 int flen = fp->len; 298 int i, ret; 299 300 /* Start of epilogue code - will only be valid 2nd pass onwards */ 301 u32 exit_addr = addrs[flen]; 302 303 for (i = 0; i < flen; i++) { 304 u32 code = insn[i].code; 305 u32 dst_reg = b2p[insn[i].dst_reg]; 306 u32 src_reg = b2p[insn[i].src_reg]; 307 s16 off = insn[i].off; 308 s32 imm = insn[i].imm; 309 bool func_addr_fixed; 310 u64 func_addr; 311 u64 imm64; 312 u32 true_cond; 313 u32 tmp_idx; 314 315 /* 316 * addrs[] maps a BPF bytecode address into a real offset from 317 * the start of the body code. 318 */ 319 addrs[i] = ctx->idx * 4; 320 321 /* 322 * As an optimization, we note down which non-volatile registers 323 * are used so that we can only save/restore those in our 324 * prologue and epilogue. We do this here regardless of whether 325 * the actual BPF instruction uses src/dst registers or not 326 * (for instance, BPF_CALL does not use them). The expectation 327 * is that those instructions will have src_reg/dst_reg set to 328 * 0. Even otherwise, we just lose some prologue/epilogue 329 * optimization but everything else should work without 330 * any issues. 331 */ 332 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) 333 bpf_set_seen_register(ctx, insn[i].dst_reg); 334 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) 335 bpf_set_seen_register(ctx, insn[i].src_reg); 336 337 switch (code) { 338 /* 339 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG 340 */ 341 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ 342 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ 343 PPC_ADD(dst_reg, dst_reg, src_reg); 344 goto bpf_alu32_trunc; 345 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ 346 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ 347 PPC_SUB(dst_reg, dst_reg, src_reg); 348 goto bpf_alu32_trunc; 349 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ 350 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ 351 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ 352 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ 353 if (BPF_OP(code) == BPF_SUB) 354 imm = -imm; 355 if (imm) { 356 if (imm >= -32768 && imm < 32768) 357 PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); 358 else { 359 PPC_LI32(b2p[TMP_REG_1], imm); 360 PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); 361 } 362 } 363 goto bpf_alu32_trunc; 364 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ 365 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ 366 if (BPF_CLASS(code) == BPF_ALU) 367 PPC_MULW(dst_reg, dst_reg, src_reg); 368 else 369 PPC_MULD(dst_reg, dst_reg, src_reg); 370 goto bpf_alu32_trunc; 371 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ 372 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ 373 if (imm >= -32768 && imm < 32768) 374 PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); 375 else { 376 PPC_LI32(b2p[TMP_REG_1], imm); 377 if (BPF_CLASS(code) == BPF_ALU) 378 PPC_MULW(dst_reg, dst_reg, 379 b2p[TMP_REG_1]); 380 else 381 PPC_MULD(dst_reg, dst_reg, 382 b2p[TMP_REG_1]); 383 } 384 goto bpf_alu32_trunc; 385 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ 386 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ 387 if (BPF_OP(code) == BPF_MOD) { 388 PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); 389 PPC_MULW(b2p[TMP_REG_1], src_reg, 390 b2p[TMP_REG_1]); 391 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); 392 } else 393 PPC_DIVWU(dst_reg, dst_reg, src_reg); 394 goto bpf_alu32_trunc; 395 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ 396 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ 397 if (BPF_OP(code) == BPF_MOD) { 398 PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); 399 PPC_MULD(b2p[TMP_REG_1], src_reg, 400 b2p[TMP_REG_1]); 401 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); 402 } else 403 PPC_DIVDU(dst_reg, dst_reg, src_reg); 404 break; 405 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ 406 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ 407 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ 408 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ 409 if (imm == 0) 410 return -EINVAL; 411 else if (imm == 1) 412 goto bpf_alu32_trunc; 413 414 PPC_LI32(b2p[TMP_REG_1], imm); 415 switch (BPF_CLASS(code)) { 416 case BPF_ALU: 417 if (BPF_OP(code) == BPF_MOD) { 418 PPC_DIVWU(b2p[TMP_REG_2], dst_reg, 419 b2p[TMP_REG_1]); 420 PPC_MULW(b2p[TMP_REG_1], 421 b2p[TMP_REG_1], 422 b2p[TMP_REG_2]); 423 PPC_SUB(dst_reg, dst_reg, 424 b2p[TMP_REG_1]); 425 } else 426 PPC_DIVWU(dst_reg, dst_reg, 427 b2p[TMP_REG_1]); 428 break; 429 case BPF_ALU64: 430 if (BPF_OP(code) == BPF_MOD) { 431 PPC_DIVDU(b2p[TMP_REG_2], dst_reg, 432 b2p[TMP_REG_1]); 433 PPC_MULD(b2p[TMP_REG_1], 434 b2p[TMP_REG_1], 435 b2p[TMP_REG_2]); 436 PPC_SUB(dst_reg, dst_reg, 437 b2p[TMP_REG_1]); 438 } else 439 PPC_DIVDU(dst_reg, dst_reg, 440 b2p[TMP_REG_1]); 441 break; 442 } 443 goto bpf_alu32_trunc; 444 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ 445 case BPF_ALU64 | BPF_NEG: /* dst = -dst */ 446 PPC_NEG(dst_reg, dst_reg); 447 goto bpf_alu32_trunc; 448 449 /* 450 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH 451 */ 452 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ 453 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ 454 PPC_AND(dst_reg, dst_reg, src_reg); 455 goto bpf_alu32_trunc; 456 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ 457 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 458 if (!IMM_H(imm)) 459 PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); 460 else { 461 /* Sign-extended */ 462 PPC_LI32(b2p[TMP_REG_1], imm); 463 PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); 464 } 465 goto bpf_alu32_trunc; 466 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ 467 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ 468 PPC_OR(dst_reg, dst_reg, src_reg); 469 goto bpf_alu32_trunc; 470 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ 471 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ 472 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 473 /* Sign-extended */ 474 PPC_LI32(b2p[TMP_REG_1], imm); 475 PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); 476 } else { 477 if (IMM_L(imm)) 478 PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); 479 if (IMM_H(imm)) 480 PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); 481 } 482 goto bpf_alu32_trunc; 483 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ 484 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ 485 PPC_XOR(dst_reg, dst_reg, src_reg); 486 goto bpf_alu32_trunc; 487 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ 488 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ 489 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 490 /* Sign-extended */ 491 PPC_LI32(b2p[TMP_REG_1], imm); 492 PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); 493 } else { 494 if (IMM_L(imm)) 495 PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); 496 if (IMM_H(imm)) 497 PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); 498 } 499 goto bpf_alu32_trunc; 500 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 501 /* slw clears top 32 bits */ 502 PPC_SLW(dst_reg, dst_reg, src_reg); 503 /* skip zero extension move, but set address map. */ 504 if (insn_is_zext(&insn[i + 1])) 505 addrs[++i] = ctx->idx * 4; 506 break; 507 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 508 PPC_SLD(dst_reg, dst_reg, src_reg); 509 break; 510 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 511 /* with imm 0, we still need to clear top 32 bits */ 512 PPC_SLWI(dst_reg, dst_reg, imm); 513 if (insn_is_zext(&insn[i + 1])) 514 addrs[++i] = ctx->idx * 4; 515 break; 516 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 517 if (imm != 0) 518 PPC_SLDI(dst_reg, dst_reg, imm); 519 break; 520 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 521 PPC_SRW(dst_reg, dst_reg, src_reg); 522 if (insn_is_zext(&insn[i + 1])) 523 addrs[++i] = ctx->idx * 4; 524 break; 525 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 526 PPC_SRD(dst_reg, dst_reg, src_reg); 527 break; 528 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 529 PPC_SRWI(dst_reg, dst_reg, imm); 530 if (insn_is_zext(&insn[i + 1])) 531 addrs[++i] = ctx->idx * 4; 532 break; 533 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 534 if (imm != 0) 535 PPC_SRDI(dst_reg, dst_reg, imm); 536 break; 537 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ 538 PPC_SRAW(dst_reg, dst_reg, src_reg); 539 goto bpf_alu32_trunc; 540 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ 541 PPC_SRAD(dst_reg, dst_reg, src_reg); 542 break; 543 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ 544 PPC_SRAWI(dst_reg, dst_reg, imm); 545 goto bpf_alu32_trunc; 546 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ 547 if (imm != 0) 548 PPC_SRADI(dst_reg, dst_reg, imm); 549 break; 550 551 /* 552 * MOV 553 */ 554 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 555 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 556 if (imm == 1) { 557 /* special mov32 for zext */ 558 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); 559 break; 560 } 561 PPC_MR(dst_reg, src_reg); 562 goto bpf_alu32_trunc; 563 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 564 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ 565 PPC_LI32(dst_reg, imm); 566 if (imm < 0) 567 goto bpf_alu32_trunc; 568 else if (insn_is_zext(&insn[i + 1])) 569 addrs[++i] = ctx->idx * 4; 570 break; 571 572 bpf_alu32_trunc: 573 /* Truncate to 32-bits */ 574 if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) 575 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); 576 break; 577 578 /* 579 * BPF_FROM_BE/LE 580 */ 581 case BPF_ALU | BPF_END | BPF_FROM_LE: 582 case BPF_ALU | BPF_END | BPF_FROM_BE: 583 #ifdef __BIG_ENDIAN__ 584 if (BPF_SRC(code) == BPF_FROM_BE) 585 goto emit_clear; 586 #else /* !__BIG_ENDIAN__ */ 587 if (BPF_SRC(code) == BPF_FROM_LE) 588 goto emit_clear; 589 #endif 590 switch (imm) { 591 case 16: 592 /* Rotate 8 bits left & mask with 0x0000ff00 */ 593 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); 594 /* Rotate 8 bits right & insert LSB to reg */ 595 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); 596 /* Move result back to dst_reg */ 597 PPC_MR(dst_reg, b2p[TMP_REG_1]); 598 break; 599 case 32: 600 /* 601 * Rotate word left by 8 bits: 602 * 2 bytes are already in their final position 603 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) 604 */ 605 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); 606 /* Rotate 24 bits and insert byte 1 */ 607 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); 608 /* Rotate 24 bits and insert byte 3 */ 609 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); 610 PPC_MR(dst_reg, b2p[TMP_REG_1]); 611 break; 612 case 64: 613 /* 614 * Way easier and faster(?) to store the value 615 * into stack and then use ldbrx 616 * 617 * ctx->seen will be reliable in pass2, but 618 * the instructions generated will remain the 619 * same across all passes 620 */ 621 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 622 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); 623 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); 624 break; 625 } 626 break; 627 628 emit_clear: 629 switch (imm) { 630 case 16: 631 /* zero-extend 16 bits into 64 bits */ 632 PPC_RLDICL(dst_reg, dst_reg, 0, 48); 633 if (insn_is_zext(&insn[i + 1])) 634 addrs[++i] = ctx->idx * 4; 635 break; 636 case 32: 637 if (!fp->aux->verifier_zext) 638 /* zero-extend 32 bits into 64 bits */ 639 PPC_RLDICL(dst_reg, dst_reg, 0, 32); 640 break; 641 case 64: 642 /* nop */ 643 break; 644 } 645 break; 646 647 /* 648 * BPF_ST(X) 649 */ 650 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ 651 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ 652 if (BPF_CLASS(code) == BPF_ST) { 653 PPC_LI(b2p[TMP_REG_1], imm); 654 src_reg = b2p[TMP_REG_1]; 655 } 656 PPC_STB(src_reg, dst_reg, off); 657 break; 658 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ 659 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ 660 if (BPF_CLASS(code) == BPF_ST) { 661 PPC_LI(b2p[TMP_REG_1], imm); 662 src_reg = b2p[TMP_REG_1]; 663 } 664 PPC_STH(src_reg, dst_reg, off); 665 break; 666 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ 667 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ 668 if (BPF_CLASS(code) == BPF_ST) { 669 PPC_LI32(b2p[TMP_REG_1], imm); 670 src_reg = b2p[TMP_REG_1]; 671 } 672 PPC_STW(src_reg, dst_reg, off); 673 break; 674 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ 675 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ 676 if (BPF_CLASS(code) == BPF_ST) { 677 PPC_LI32(b2p[TMP_REG_1], imm); 678 src_reg = b2p[TMP_REG_1]; 679 } 680 PPC_BPF_STL(src_reg, dst_reg, off); 681 break; 682 683 /* 684 * BPF_STX XADD (atomic_add) 685 */ 686 /* *(u32 *)(dst + off) += src */ 687 case BPF_STX | BPF_XADD | BPF_W: 688 /* Get EA into TMP_REG_1 */ 689 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); 690 tmp_idx = ctx->idx * 4; 691 /* load value from memory into TMP_REG_2 */ 692 PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); 693 /* add value from src_reg into this */ 694 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); 695 /* store result back */ 696 PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); 697 /* we're done if this succeeded */ 698 PPC_BCC_SHORT(COND_NE, tmp_idx); 699 break; 700 /* *(u64 *)(dst + off) += src */ 701 case BPF_STX | BPF_XADD | BPF_DW: 702 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); 703 tmp_idx = ctx->idx * 4; 704 PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); 705 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); 706 PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); 707 PPC_BCC_SHORT(COND_NE, tmp_idx); 708 break; 709 710 /* 711 * BPF_LDX 712 */ 713 /* dst = *(u8 *)(ul) (src + off) */ 714 case BPF_LDX | BPF_MEM | BPF_B: 715 PPC_LBZ(dst_reg, src_reg, off); 716 if (insn_is_zext(&insn[i + 1])) 717 addrs[++i] = ctx->idx * 4; 718 break; 719 /* dst = *(u16 *)(ul) (src + off) */ 720 case BPF_LDX | BPF_MEM | BPF_H: 721 PPC_LHZ(dst_reg, src_reg, off); 722 if (insn_is_zext(&insn[i + 1])) 723 addrs[++i] = ctx->idx * 4; 724 break; 725 /* dst = *(u32 *)(ul) (src + off) */ 726 case BPF_LDX | BPF_MEM | BPF_W: 727 PPC_LWZ(dst_reg, src_reg, off); 728 if (insn_is_zext(&insn[i + 1])) 729 addrs[++i] = ctx->idx * 4; 730 break; 731 /* dst = *(u64 *)(ul) (src + off) */ 732 case BPF_LDX | BPF_MEM | BPF_DW: 733 PPC_BPF_LL(dst_reg, src_reg, off); 734 break; 735 736 /* 737 * Doubleword load 738 * 16 byte instruction that uses two 'struct bpf_insn' 739 */ 740 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 741 imm64 = ((u64)(u32) insn[i].imm) | 742 (((u64)(u32) insn[i+1].imm) << 32); 743 /* Adjust for two bpf instructions */ 744 addrs[++i] = ctx->idx * 4; 745 PPC_LI64(dst_reg, imm64); 746 break; 747 748 /* 749 * Return/Exit 750 */ 751 case BPF_JMP | BPF_EXIT: 752 /* 753 * If this isn't the very last instruction, branch to 754 * the epilogue. If we _are_ the last instruction, 755 * we'll just fall through to the epilogue. 756 */ 757 if (i != flen - 1) 758 PPC_JMP(exit_addr); 759 /* else fall through to the epilogue */ 760 break; 761 762 /* 763 * Call kernel helper or bpf function 764 */ 765 case BPF_JMP | BPF_CALL: 766 ctx->seen |= SEEN_FUNC; 767 768 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 769 &func_addr, &func_addr_fixed); 770 if (ret < 0) 771 return ret; 772 773 if (func_addr_fixed) 774 bpf_jit_emit_func_call_hlp(image, ctx, func_addr); 775 else 776 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 777 /* move return value from r3 to BPF_REG_0 */ 778 PPC_MR(b2p[BPF_REG_0], 3); 779 break; 780 781 /* 782 * Jumps and branches 783 */ 784 case BPF_JMP | BPF_JA: 785 PPC_JMP(addrs[i + 1 + off]); 786 break; 787 788 case BPF_JMP | BPF_JGT | BPF_K: 789 case BPF_JMP | BPF_JGT | BPF_X: 790 case BPF_JMP | BPF_JSGT | BPF_K: 791 case BPF_JMP | BPF_JSGT | BPF_X: 792 case BPF_JMP32 | BPF_JGT | BPF_K: 793 case BPF_JMP32 | BPF_JGT | BPF_X: 794 case BPF_JMP32 | BPF_JSGT | BPF_K: 795 case BPF_JMP32 | BPF_JSGT | BPF_X: 796 true_cond = COND_GT; 797 goto cond_branch; 798 case BPF_JMP | BPF_JLT | BPF_K: 799 case BPF_JMP | BPF_JLT | BPF_X: 800 case BPF_JMP | BPF_JSLT | BPF_K: 801 case BPF_JMP | BPF_JSLT | BPF_X: 802 case BPF_JMP32 | BPF_JLT | BPF_K: 803 case BPF_JMP32 | BPF_JLT | BPF_X: 804 case BPF_JMP32 | BPF_JSLT | BPF_K: 805 case BPF_JMP32 | BPF_JSLT | BPF_X: 806 true_cond = COND_LT; 807 goto cond_branch; 808 case BPF_JMP | BPF_JGE | BPF_K: 809 case BPF_JMP | BPF_JGE | BPF_X: 810 case BPF_JMP | BPF_JSGE | BPF_K: 811 case BPF_JMP | BPF_JSGE | BPF_X: 812 case BPF_JMP32 | BPF_JGE | BPF_K: 813 case BPF_JMP32 | BPF_JGE | BPF_X: 814 case BPF_JMP32 | BPF_JSGE | BPF_K: 815 case BPF_JMP32 | BPF_JSGE | BPF_X: 816 true_cond = COND_GE; 817 goto cond_branch; 818 case BPF_JMP | BPF_JLE | BPF_K: 819 case BPF_JMP | BPF_JLE | BPF_X: 820 case BPF_JMP | BPF_JSLE | BPF_K: 821 case BPF_JMP | BPF_JSLE | BPF_X: 822 case BPF_JMP32 | BPF_JLE | BPF_K: 823 case BPF_JMP32 | BPF_JLE | BPF_X: 824 case BPF_JMP32 | BPF_JSLE | BPF_K: 825 case BPF_JMP32 | BPF_JSLE | BPF_X: 826 true_cond = COND_LE; 827 goto cond_branch; 828 case BPF_JMP | BPF_JEQ | BPF_K: 829 case BPF_JMP | BPF_JEQ | BPF_X: 830 case BPF_JMP32 | BPF_JEQ | BPF_K: 831 case BPF_JMP32 | BPF_JEQ | BPF_X: 832 true_cond = COND_EQ; 833 goto cond_branch; 834 case BPF_JMP | BPF_JNE | BPF_K: 835 case BPF_JMP | BPF_JNE | BPF_X: 836 case BPF_JMP32 | BPF_JNE | BPF_K: 837 case BPF_JMP32 | BPF_JNE | BPF_X: 838 true_cond = COND_NE; 839 goto cond_branch; 840 case BPF_JMP | BPF_JSET | BPF_K: 841 case BPF_JMP | BPF_JSET | BPF_X: 842 case BPF_JMP32 | BPF_JSET | BPF_K: 843 case BPF_JMP32 | BPF_JSET | BPF_X: 844 true_cond = COND_NE; 845 /* Fall through */ 846 847 cond_branch: 848 switch (code) { 849 case BPF_JMP | BPF_JGT | BPF_X: 850 case BPF_JMP | BPF_JLT | BPF_X: 851 case BPF_JMP | BPF_JGE | BPF_X: 852 case BPF_JMP | BPF_JLE | BPF_X: 853 case BPF_JMP | BPF_JEQ | BPF_X: 854 case BPF_JMP | BPF_JNE | BPF_X: 855 case BPF_JMP32 | BPF_JGT | BPF_X: 856 case BPF_JMP32 | BPF_JLT | BPF_X: 857 case BPF_JMP32 | BPF_JGE | BPF_X: 858 case BPF_JMP32 | BPF_JLE | BPF_X: 859 case BPF_JMP32 | BPF_JEQ | BPF_X: 860 case BPF_JMP32 | BPF_JNE | BPF_X: 861 /* unsigned comparison */ 862 if (BPF_CLASS(code) == BPF_JMP32) 863 PPC_CMPLW(dst_reg, src_reg); 864 else 865 PPC_CMPLD(dst_reg, src_reg); 866 break; 867 case BPF_JMP | BPF_JSGT | BPF_X: 868 case BPF_JMP | BPF_JSLT | BPF_X: 869 case BPF_JMP | BPF_JSGE | BPF_X: 870 case BPF_JMP | BPF_JSLE | BPF_X: 871 case BPF_JMP32 | BPF_JSGT | BPF_X: 872 case BPF_JMP32 | BPF_JSLT | BPF_X: 873 case BPF_JMP32 | BPF_JSGE | BPF_X: 874 case BPF_JMP32 | BPF_JSLE | BPF_X: 875 /* signed comparison */ 876 if (BPF_CLASS(code) == BPF_JMP32) 877 PPC_CMPW(dst_reg, src_reg); 878 else 879 PPC_CMPD(dst_reg, src_reg); 880 break; 881 case BPF_JMP | BPF_JSET | BPF_X: 882 case BPF_JMP32 | BPF_JSET | BPF_X: 883 if (BPF_CLASS(code) == BPF_JMP) { 884 PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, 885 src_reg); 886 } else { 887 int tmp_reg = b2p[TMP_REG_1]; 888 889 PPC_AND(tmp_reg, dst_reg, src_reg); 890 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, 891 31); 892 } 893 break; 894 case BPF_JMP | BPF_JNE | BPF_K: 895 case BPF_JMP | BPF_JEQ | BPF_K: 896 case BPF_JMP | BPF_JGT | BPF_K: 897 case BPF_JMP | BPF_JLT | BPF_K: 898 case BPF_JMP | BPF_JGE | BPF_K: 899 case BPF_JMP | BPF_JLE | BPF_K: 900 case BPF_JMP32 | BPF_JNE | BPF_K: 901 case BPF_JMP32 | BPF_JEQ | BPF_K: 902 case BPF_JMP32 | BPF_JGT | BPF_K: 903 case BPF_JMP32 | BPF_JLT | BPF_K: 904 case BPF_JMP32 | BPF_JGE | BPF_K: 905 case BPF_JMP32 | BPF_JLE | BPF_K: 906 { 907 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 908 909 /* 910 * Need sign-extended load, so only positive 911 * values can be used as imm in cmpldi 912 */ 913 if (imm >= 0 && imm < 32768) { 914 if (is_jmp32) 915 PPC_CMPLWI(dst_reg, imm); 916 else 917 PPC_CMPLDI(dst_reg, imm); 918 } else { 919 /* sign-extending load */ 920 PPC_LI32(b2p[TMP_REG_1], imm); 921 /* ... but unsigned comparison */ 922 if (is_jmp32) 923 PPC_CMPLW(dst_reg, 924 b2p[TMP_REG_1]); 925 else 926 PPC_CMPLD(dst_reg, 927 b2p[TMP_REG_1]); 928 } 929 break; 930 } 931 case BPF_JMP | BPF_JSGT | BPF_K: 932 case BPF_JMP | BPF_JSLT | BPF_K: 933 case BPF_JMP | BPF_JSGE | BPF_K: 934 case BPF_JMP | BPF_JSLE | BPF_K: 935 case BPF_JMP32 | BPF_JSGT | BPF_K: 936 case BPF_JMP32 | BPF_JSLT | BPF_K: 937 case BPF_JMP32 | BPF_JSGE | BPF_K: 938 case BPF_JMP32 | BPF_JSLE | BPF_K: 939 { 940 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 941 942 /* 943 * signed comparison, so any 16-bit value 944 * can be used in cmpdi 945 */ 946 if (imm >= -32768 && imm < 32768) { 947 if (is_jmp32) 948 PPC_CMPWI(dst_reg, imm); 949 else 950 PPC_CMPDI(dst_reg, imm); 951 } else { 952 PPC_LI32(b2p[TMP_REG_1], imm); 953 if (is_jmp32) 954 PPC_CMPW(dst_reg, 955 b2p[TMP_REG_1]); 956 else 957 PPC_CMPD(dst_reg, 958 b2p[TMP_REG_1]); 959 } 960 break; 961 } 962 case BPF_JMP | BPF_JSET | BPF_K: 963 case BPF_JMP32 | BPF_JSET | BPF_K: 964 /* andi does not sign-extend the immediate */ 965 if (imm >= 0 && imm < 32768) 966 /* PPC_ANDI is _only/always_ dot-form */ 967 PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); 968 else { 969 int tmp_reg = b2p[TMP_REG_1]; 970 971 PPC_LI32(tmp_reg, imm); 972 if (BPF_CLASS(code) == BPF_JMP) { 973 PPC_AND_DOT(tmp_reg, dst_reg, 974 tmp_reg); 975 } else { 976 PPC_AND(tmp_reg, dst_reg, 977 tmp_reg); 978 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 979 0, 0, 31); 980 } 981 } 982 break; 983 } 984 PPC_BCC(true_cond, addrs[i + 1 + off]); 985 break; 986 987 /* 988 * Tail call 989 */ 990 case BPF_JMP | BPF_TAIL_CALL: 991 ctx->seen |= SEEN_TAILCALL; 992 bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); 993 break; 994 995 default: 996 /* 997 * The filter contains something cruel & unusual. 998 * We don't handle it, but also there shouldn't be 999 * anything missing from our list. 1000 */ 1001 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", 1002 code, i); 1003 return -ENOTSUPP; 1004 } 1005 } 1006 1007 /* Set end-of-body-code address for exit. */ 1008 addrs[i] = ctx->idx * 4; 1009 1010 return 0; 1011 } 1012 1013 /* Fix the branch target addresses for subprog calls */ 1014 static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, 1015 struct codegen_context *ctx, u32 *addrs) 1016 { 1017 const struct bpf_insn *insn = fp->insnsi; 1018 bool func_addr_fixed; 1019 u64 func_addr; 1020 u32 tmp_idx; 1021 int i, ret; 1022 1023 for (i = 0; i < fp->len; i++) { 1024 /* 1025 * During the extra pass, only the branch target addresses for 1026 * the subprog calls need to be fixed. All other instructions 1027 * can left untouched. 1028 * 1029 * The JITed image length does not change because we already 1030 * ensure that the JITed instruction sequence for these calls 1031 * are of fixed length by padding them with NOPs. 1032 */ 1033 if (insn[i].code == (BPF_JMP | BPF_CALL) && 1034 insn[i].src_reg == BPF_PSEUDO_CALL) { 1035 ret = bpf_jit_get_func_addr(fp, &insn[i], true, 1036 &func_addr, 1037 &func_addr_fixed); 1038 if (ret < 0) 1039 return ret; 1040 1041 /* 1042 * Save ctx->idx as this would currently point to the 1043 * end of the JITed image and set it to the offset of 1044 * the instruction sequence corresponding to the 1045 * subprog call temporarily. 1046 */ 1047 tmp_idx = ctx->idx; 1048 ctx->idx = addrs[i] / 4; 1049 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 1050 1051 /* 1052 * Restore ctx->idx here. This is safe as the length 1053 * of the JITed sequence remains unchanged. 1054 */ 1055 ctx->idx = tmp_idx; 1056 } 1057 } 1058 1059 return 0; 1060 } 1061 1062 struct powerpc64_jit_data { 1063 struct bpf_binary_header *header; 1064 u32 *addrs; 1065 u8 *image; 1066 u32 proglen; 1067 struct codegen_context ctx; 1068 }; 1069 1070 bool bpf_jit_needs_zext(void) 1071 { 1072 return true; 1073 } 1074 1075 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 1076 { 1077 u32 proglen; 1078 u32 alloclen; 1079 u8 *image = NULL; 1080 u32 *code_base; 1081 u32 *addrs; 1082 struct powerpc64_jit_data *jit_data; 1083 struct codegen_context cgctx; 1084 int pass; 1085 int flen; 1086 struct bpf_binary_header *bpf_hdr; 1087 struct bpf_prog *org_fp = fp; 1088 struct bpf_prog *tmp_fp; 1089 bool bpf_blinded = false; 1090 bool extra_pass = false; 1091 1092 if (!fp->jit_requested) 1093 return org_fp; 1094 1095 tmp_fp = bpf_jit_blind_constants(org_fp); 1096 if (IS_ERR(tmp_fp)) 1097 return org_fp; 1098 1099 if (tmp_fp != org_fp) { 1100 bpf_blinded = true; 1101 fp = tmp_fp; 1102 } 1103 1104 jit_data = fp->aux->jit_data; 1105 if (!jit_data) { 1106 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1107 if (!jit_data) { 1108 fp = org_fp; 1109 goto out; 1110 } 1111 fp->aux->jit_data = jit_data; 1112 } 1113 1114 flen = fp->len; 1115 addrs = jit_data->addrs; 1116 if (addrs) { 1117 cgctx = jit_data->ctx; 1118 image = jit_data->image; 1119 bpf_hdr = jit_data->header; 1120 proglen = jit_data->proglen; 1121 alloclen = proglen + FUNCTION_DESCR_SIZE; 1122 extra_pass = true; 1123 goto skip_init_ctx; 1124 } 1125 1126 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 1127 if (addrs == NULL) { 1128 fp = org_fp; 1129 goto out_addrs; 1130 } 1131 1132 memset(&cgctx, 0, sizeof(struct codegen_context)); 1133 1134 /* Make sure that the stack is quadword aligned. */ 1135 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 1136 1137 /* Scouting faux-generate pass 0 */ 1138 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1139 /* We hit something illegal or unsupported. */ 1140 fp = org_fp; 1141 goto out_addrs; 1142 } 1143 1144 /* 1145 * Pretend to build prologue, given the features we've seen. This will 1146 * update ctgtx.idx as it pretends to output instructions, then we can 1147 * calculate total size from idx. 1148 */ 1149 bpf_jit_build_prologue(0, &cgctx); 1150 bpf_jit_build_epilogue(0, &cgctx); 1151 1152 proglen = cgctx.idx * 4; 1153 alloclen = proglen + FUNCTION_DESCR_SIZE; 1154 1155 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, 1156 bpf_jit_fill_ill_insns); 1157 if (!bpf_hdr) { 1158 fp = org_fp; 1159 goto out_addrs; 1160 } 1161 1162 skip_init_ctx: 1163 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 1164 1165 if (extra_pass) { 1166 /* 1167 * Do not touch the prologue and epilogue as they will remain 1168 * unchanged. Only fix the branch target address for subprog 1169 * calls in the body. 1170 * 1171 * This does not change the offsets and lengths of the subprog 1172 * call instruction sequences and hence, the size of the JITed 1173 * image as well. 1174 */ 1175 bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); 1176 1177 /* There is no need to perform the usual passes. */ 1178 goto skip_codegen_passes; 1179 } 1180 1181 /* Code generation passes 1-2 */ 1182 for (pass = 1; pass < 3; pass++) { 1183 /* Now build the prologue, body code & epilogue for real. */ 1184 cgctx.idx = 0; 1185 bpf_jit_build_prologue(code_base, &cgctx); 1186 bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); 1187 bpf_jit_build_epilogue(code_base, &cgctx); 1188 1189 if (bpf_jit_enable > 1) 1190 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 1191 proglen - (cgctx.idx * 4), cgctx.seen); 1192 } 1193 1194 skip_codegen_passes: 1195 if (bpf_jit_enable > 1) 1196 /* 1197 * Note that we output the base address of the code_base 1198 * rather than image, since opcodes are in code_base. 1199 */ 1200 bpf_jit_dump(flen, proglen, pass, code_base); 1201 1202 #ifdef PPC64_ELF_ABI_v1 1203 /* Function descriptor nastiness: Address + TOC */ 1204 ((u64 *)image)[0] = (u64)code_base; 1205 ((u64 *)image)[1] = local_paca->kernel_toc; 1206 #endif 1207 1208 fp->bpf_func = (void *)image; 1209 fp->jited = 1; 1210 fp->jited_len = alloclen; 1211 1212 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 1213 if (!fp->is_func || extra_pass) { 1214 bpf_prog_fill_jited_linfo(fp, addrs); 1215 out_addrs: 1216 kfree(addrs); 1217 kfree(jit_data); 1218 fp->aux->jit_data = NULL; 1219 } else { 1220 jit_data->addrs = addrs; 1221 jit_data->ctx = cgctx; 1222 jit_data->proglen = proglen; 1223 jit_data->image = image; 1224 jit_data->header = bpf_hdr; 1225 } 1226 1227 out: 1228 if (bpf_blinded) 1229 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 1230 1231 return fp; 1232 } 1233 1234 /* Overriding bpf_jit_free() as we don't set images read-only. */ 1235 void bpf_jit_free(struct bpf_prog *fp) 1236 { 1237 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 1238 struct bpf_binary_header *bpf_hdr = (void *)addr; 1239 1240 if (fp->jited) 1241 bpf_jit_binary_free(bpf_hdr); 1242 1243 bpf_prog_unlock_free(fp); 1244 } 1245