1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bpf_jit_comp64.c: eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <asm/kprobes.h> 17 #include <linux/bpf.h> 18 19 #include "bpf_jit64.h" 20 21 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 22 { 23 memset32(area, BREAKPOINT_INSTRUCTION, size/4); 24 } 25 26 static inline void bpf_flush_icache(void *start, void *end) 27 { 28 smp_wmb(); 29 flush_icache_range((unsigned long)start, (unsigned long)end); 30 } 31 32 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) 33 { 34 return (ctx->seen & (1 << (31 - b2p[i]))); 35 } 36 37 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) 38 { 39 ctx->seen |= (1 << (31 - b2p[i])); 40 } 41 42 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 43 { 44 /* 45 * We only need a stack frame if: 46 * - we call other functions (kernel helpers), or 47 * - the bpf program uses its stack area 48 * The latter condition is deduced from the usage of BPF_REG_FP 49 */ 50 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); 51 } 52 53 /* 54 * When not setting up our own stackframe, the redzone usage is: 55 * 56 * [ prev sp ] <------------- 57 * [ ... ] | 58 * sp (r1) ---> [ stack pointer ] -------------- 59 * [ nv gpr save area ] 6*8 60 * [ tail_call_cnt ] 8 61 * [ local_tmp_var ] 8 62 * [ unused red zone ] 208 bytes protected 63 */ 64 static int bpf_jit_stack_local(struct codegen_context *ctx) 65 { 66 if (bpf_has_stack_frame(ctx)) 67 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 68 else 69 return -(BPF_PPC_STACK_SAVE + 16); 70 } 71 72 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 73 { 74 return bpf_jit_stack_local(ctx) + 8; 75 } 76 77 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 78 { 79 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 80 return (bpf_has_stack_frame(ctx) ? 81 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) 82 - (8 * (32 - reg)); 83 84 pr_err("BPF JIT is asking about unknown registers"); 85 BUG(); 86 } 87 88 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 89 { 90 int i; 91 92 /* 93 * Initialize tail_call_cnt if we do tail calls. 94 * Otherwise, put in NOPs so that it can be skipped when we are 95 * invoked through a tail call. 96 */ 97 if (ctx->seen & SEEN_TAILCALL) { 98 PPC_LI(b2p[TMP_REG_1], 0); 99 /* this goes in the redzone */ 100 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); 101 } else { 102 PPC_NOP(); 103 PPC_NOP(); 104 } 105 106 #define BPF_TAILCALL_PROLOGUE_SIZE 8 107 108 if (bpf_has_stack_frame(ctx)) { 109 /* 110 * We need a stack frame, but we don't necessarily need to 111 * save/restore LR unless we call other functions 112 */ 113 if (ctx->seen & SEEN_FUNC) { 114 EMIT(PPC_INST_MFLR | __PPC_RT(R0)); 115 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 116 } 117 118 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); 119 } 120 121 /* 122 * Back up non-volatile regs -- BPF registers 6-10 123 * If we haven't created our own stack frame, we save these 124 * in the protected zone below the previous stack frame 125 */ 126 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 127 if (bpf_is_seen_register(ctx, i)) 128 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 129 130 /* Setup frame pointer to point to the bpf stack area */ 131 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 132 PPC_ADDI(b2p[BPF_REG_FP], 1, 133 STACK_FRAME_MIN_SIZE + ctx->stack_size); 134 } 135 136 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 137 { 138 int i; 139 140 /* Restore NVRs */ 141 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 142 if (bpf_is_seen_register(ctx, i)) 143 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 144 145 /* Tear down our stack frame */ 146 if (bpf_has_stack_frame(ctx)) { 147 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); 148 if (ctx->seen & SEEN_FUNC) { 149 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 150 PPC_MTLR(0); 151 } 152 } 153 } 154 155 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 156 { 157 bpf_jit_emit_common_epilogue(image, ctx); 158 159 /* Move result to r3 */ 160 PPC_MR(3, b2p[BPF_REG_0]); 161 162 PPC_BLR(); 163 } 164 165 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, 166 u64 func) 167 { 168 #ifdef PPC64_ELF_ABI_v1 169 /* func points to the function descriptor */ 170 PPC_LI64(b2p[TMP_REG_2], func); 171 /* Load actual entry point from function descriptor */ 172 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); 173 /* ... and move it to LR */ 174 PPC_MTLR(b2p[TMP_REG_1]); 175 /* 176 * Load TOC from function descriptor at offset 8. 177 * We can clobber r2 since we get called through a 178 * function pointer (so caller will save/restore r2) 179 * and since we don't use a TOC ourself. 180 */ 181 PPC_BPF_LL(2, b2p[TMP_REG_2], 8); 182 #else 183 /* We can clobber r12 */ 184 PPC_FUNC_ADDR(12, func); 185 PPC_MTLR(12); 186 #endif 187 PPC_BLRL(); 188 } 189 190 static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, 191 u64 func) 192 { 193 unsigned int i, ctx_idx = ctx->idx; 194 195 /* Load function address into r12 */ 196 PPC_LI64(12, func); 197 198 /* For bpf-to-bpf function calls, the callee's address is unknown 199 * until the last extra pass. As seen above, we use PPC_LI64() to 200 * load the callee's address, but this may optimize the number of 201 * instructions required based on the nature of the address. 202 * 203 * Since we don't want the number of instructions emitted to change, 204 * we pad the optimized PPC_LI64() call with NOPs to guarantee that 205 * we always have a five-instruction sequence, which is the maximum 206 * that PPC_LI64() can emit. 207 */ 208 for (i = ctx->idx - ctx_idx; i < 5; i++) 209 PPC_NOP(); 210 211 #ifdef PPC64_ELF_ABI_v1 212 /* 213 * Load TOC from function descriptor at offset 8. 214 * We can clobber r2 since we get called through a 215 * function pointer (so caller will save/restore r2) 216 * and since we don't use a TOC ourself. 217 */ 218 PPC_BPF_LL(2, 12, 8); 219 /* Load actual entry point from function descriptor */ 220 PPC_BPF_LL(12, 12, 0); 221 #endif 222 223 PPC_MTLR(12); 224 PPC_BLRL(); 225 } 226 227 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) 228 { 229 /* 230 * By now, the eBPF program has already setup parameters in r3, r4 and r5 231 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program 232 * r4/BPF_REG_2 - pointer to bpf_array 233 * r5/BPF_REG_3 - index in bpf_array 234 */ 235 int b2p_bpf_array = b2p[BPF_REG_2]; 236 int b2p_index = b2p[BPF_REG_3]; 237 238 /* 239 * if (index >= array->map.max_entries) 240 * goto out; 241 */ 242 PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); 243 PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); 244 PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); 245 PPC_BCC(COND_GE, out); 246 247 /* 248 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 249 * goto out; 250 */ 251 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 252 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); 253 PPC_BCC(COND_GT, out); 254 255 /* 256 * tail_call_cnt++; 257 */ 258 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); 259 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 260 261 /* prog = array->ptrs[index]; */ 262 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); 263 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); 264 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 265 266 /* 267 * if (prog == NULL) 268 * goto out; 269 */ 270 PPC_CMPLDI(b2p[TMP_REG_1], 0); 271 PPC_BCC(COND_EQ, out); 272 273 /* goto *(prog->bpf_func + prologue_size); */ 274 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 275 #ifdef PPC64_ELF_ABI_v1 276 /* skip past the function descriptor */ 277 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 278 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); 279 #else 280 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); 281 #endif 282 PPC_MTCTR(b2p[TMP_REG_1]); 283 284 /* tear down stack, restore NVRs, ... */ 285 bpf_jit_emit_common_epilogue(image, ctx); 286 287 PPC_BCTR(); 288 /* out: */ 289 } 290 291 /* Assemble the body code between the prologue & epilogue */ 292 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 293 struct codegen_context *ctx, 294 u32 *addrs, bool extra_pass) 295 { 296 const struct bpf_insn *insn = fp->insnsi; 297 int flen = fp->len; 298 int i, ret; 299 300 /* Start of epilogue code - will only be valid 2nd pass onwards */ 301 u32 exit_addr = addrs[flen]; 302 303 for (i = 0; i < flen; i++) { 304 u32 code = insn[i].code; 305 u32 dst_reg = b2p[insn[i].dst_reg]; 306 u32 src_reg = b2p[insn[i].src_reg]; 307 s16 off = insn[i].off; 308 s32 imm = insn[i].imm; 309 bool func_addr_fixed; 310 u64 func_addr; 311 u64 imm64; 312 u32 true_cond; 313 u32 tmp_idx; 314 315 /* 316 * addrs[] maps a BPF bytecode address into a real offset from 317 * the start of the body code. 318 */ 319 addrs[i] = ctx->idx * 4; 320 321 /* 322 * As an optimization, we note down which non-volatile registers 323 * are used so that we can only save/restore those in our 324 * prologue and epilogue. We do this here regardless of whether 325 * the actual BPF instruction uses src/dst registers or not 326 * (for instance, BPF_CALL does not use them). The expectation 327 * is that those instructions will have src_reg/dst_reg set to 328 * 0. Even otherwise, we just lose some prologue/epilogue 329 * optimization but everything else should work without 330 * any issues. 331 */ 332 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) 333 bpf_set_seen_register(ctx, insn[i].dst_reg); 334 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) 335 bpf_set_seen_register(ctx, insn[i].src_reg); 336 337 switch (code) { 338 /* 339 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG 340 */ 341 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ 342 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ 343 PPC_ADD(dst_reg, dst_reg, src_reg); 344 goto bpf_alu32_trunc; 345 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ 346 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ 347 PPC_SUB(dst_reg, dst_reg, src_reg); 348 goto bpf_alu32_trunc; 349 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ 350 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ 351 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ 352 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ 353 if (BPF_OP(code) == BPF_SUB) 354 imm = -imm; 355 if (imm) { 356 if (imm >= -32768 && imm < 32768) 357 PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); 358 else { 359 PPC_LI32(b2p[TMP_REG_1], imm); 360 PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); 361 } 362 } 363 goto bpf_alu32_trunc; 364 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ 365 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ 366 if (BPF_CLASS(code) == BPF_ALU) 367 PPC_MULW(dst_reg, dst_reg, src_reg); 368 else 369 PPC_MULD(dst_reg, dst_reg, src_reg); 370 goto bpf_alu32_trunc; 371 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ 372 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ 373 if (imm >= -32768 && imm < 32768) 374 PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); 375 else { 376 PPC_LI32(b2p[TMP_REG_1], imm); 377 if (BPF_CLASS(code) == BPF_ALU) 378 PPC_MULW(dst_reg, dst_reg, 379 b2p[TMP_REG_1]); 380 else 381 PPC_MULD(dst_reg, dst_reg, 382 b2p[TMP_REG_1]); 383 } 384 goto bpf_alu32_trunc; 385 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ 386 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ 387 if (BPF_OP(code) == BPF_MOD) { 388 PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); 389 PPC_MULW(b2p[TMP_REG_1], src_reg, 390 b2p[TMP_REG_1]); 391 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); 392 } else 393 PPC_DIVWU(dst_reg, dst_reg, src_reg); 394 goto bpf_alu32_trunc; 395 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ 396 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ 397 if (BPF_OP(code) == BPF_MOD) { 398 PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); 399 PPC_MULD(b2p[TMP_REG_1], src_reg, 400 b2p[TMP_REG_1]); 401 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); 402 } else 403 PPC_DIVD(dst_reg, dst_reg, src_reg); 404 break; 405 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ 406 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ 407 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ 408 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ 409 if (imm == 0) 410 return -EINVAL; 411 else if (imm == 1) 412 goto bpf_alu32_trunc; 413 414 PPC_LI32(b2p[TMP_REG_1], imm); 415 switch (BPF_CLASS(code)) { 416 case BPF_ALU: 417 if (BPF_OP(code) == BPF_MOD) { 418 PPC_DIVWU(b2p[TMP_REG_2], dst_reg, 419 b2p[TMP_REG_1]); 420 PPC_MULW(b2p[TMP_REG_1], 421 b2p[TMP_REG_1], 422 b2p[TMP_REG_2]); 423 PPC_SUB(dst_reg, dst_reg, 424 b2p[TMP_REG_1]); 425 } else 426 PPC_DIVWU(dst_reg, dst_reg, 427 b2p[TMP_REG_1]); 428 break; 429 case BPF_ALU64: 430 if (BPF_OP(code) == BPF_MOD) { 431 PPC_DIVD(b2p[TMP_REG_2], dst_reg, 432 b2p[TMP_REG_1]); 433 PPC_MULD(b2p[TMP_REG_1], 434 b2p[TMP_REG_1], 435 b2p[TMP_REG_2]); 436 PPC_SUB(dst_reg, dst_reg, 437 b2p[TMP_REG_1]); 438 } else 439 PPC_DIVD(dst_reg, dst_reg, 440 b2p[TMP_REG_1]); 441 break; 442 } 443 goto bpf_alu32_trunc; 444 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ 445 case BPF_ALU64 | BPF_NEG: /* dst = -dst */ 446 PPC_NEG(dst_reg, dst_reg); 447 goto bpf_alu32_trunc; 448 449 /* 450 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH 451 */ 452 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ 453 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ 454 PPC_AND(dst_reg, dst_reg, src_reg); 455 goto bpf_alu32_trunc; 456 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ 457 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 458 if (!IMM_H(imm)) 459 PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); 460 else { 461 /* Sign-extended */ 462 PPC_LI32(b2p[TMP_REG_1], imm); 463 PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); 464 } 465 goto bpf_alu32_trunc; 466 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ 467 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ 468 PPC_OR(dst_reg, dst_reg, src_reg); 469 goto bpf_alu32_trunc; 470 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ 471 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ 472 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 473 /* Sign-extended */ 474 PPC_LI32(b2p[TMP_REG_1], imm); 475 PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); 476 } else { 477 if (IMM_L(imm)) 478 PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); 479 if (IMM_H(imm)) 480 PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); 481 } 482 goto bpf_alu32_trunc; 483 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ 484 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ 485 PPC_XOR(dst_reg, dst_reg, src_reg); 486 goto bpf_alu32_trunc; 487 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ 488 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ 489 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 490 /* Sign-extended */ 491 PPC_LI32(b2p[TMP_REG_1], imm); 492 PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); 493 } else { 494 if (IMM_L(imm)) 495 PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); 496 if (IMM_H(imm)) 497 PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); 498 } 499 goto bpf_alu32_trunc; 500 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 501 /* slw clears top 32 bits */ 502 PPC_SLW(dst_reg, dst_reg, src_reg); 503 break; 504 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 505 PPC_SLD(dst_reg, dst_reg, src_reg); 506 break; 507 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 508 /* with imm 0, we still need to clear top 32 bits */ 509 PPC_SLWI(dst_reg, dst_reg, imm); 510 break; 511 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 512 if (imm != 0) 513 PPC_SLDI(dst_reg, dst_reg, imm); 514 break; 515 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 516 PPC_SRW(dst_reg, dst_reg, src_reg); 517 break; 518 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 519 PPC_SRD(dst_reg, dst_reg, src_reg); 520 break; 521 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 522 PPC_SRWI(dst_reg, dst_reg, imm); 523 break; 524 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 525 if (imm != 0) 526 PPC_SRDI(dst_reg, dst_reg, imm); 527 break; 528 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ 529 PPC_SRAW(dst_reg, dst_reg, src_reg); 530 goto bpf_alu32_trunc; 531 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ 532 PPC_SRAD(dst_reg, dst_reg, src_reg); 533 break; 534 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ 535 PPC_SRAWI(dst_reg, dst_reg, imm); 536 goto bpf_alu32_trunc; 537 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ 538 if (imm != 0) 539 PPC_SRADI(dst_reg, dst_reg, imm); 540 break; 541 542 /* 543 * MOV 544 */ 545 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 546 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 547 PPC_MR(dst_reg, src_reg); 548 goto bpf_alu32_trunc; 549 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 550 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ 551 PPC_LI32(dst_reg, imm); 552 if (imm < 0) 553 goto bpf_alu32_trunc; 554 break; 555 556 bpf_alu32_trunc: 557 /* Truncate to 32-bits */ 558 if (BPF_CLASS(code) == BPF_ALU) 559 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); 560 break; 561 562 /* 563 * BPF_FROM_BE/LE 564 */ 565 case BPF_ALU | BPF_END | BPF_FROM_LE: 566 case BPF_ALU | BPF_END | BPF_FROM_BE: 567 #ifdef __BIG_ENDIAN__ 568 if (BPF_SRC(code) == BPF_FROM_BE) 569 goto emit_clear; 570 #else /* !__BIG_ENDIAN__ */ 571 if (BPF_SRC(code) == BPF_FROM_LE) 572 goto emit_clear; 573 #endif 574 switch (imm) { 575 case 16: 576 /* Rotate 8 bits left & mask with 0x0000ff00 */ 577 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); 578 /* Rotate 8 bits right & insert LSB to reg */ 579 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); 580 /* Move result back to dst_reg */ 581 PPC_MR(dst_reg, b2p[TMP_REG_1]); 582 break; 583 case 32: 584 /* 585 * Rotate word left by 8 bits: 586 * 2 bytes are already in their final position 587 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) 588 */ 589 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); 590 /* Rotate 24 bits and insert byte 1 */ 591 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); 592 /* Rotate 24 bits and insert byte 3 */ 593 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); 594 PPC_MR(dst_reg, b2p[TMP_REG_1]); 595 break; 596 case 64: 597 /* 598 * Way easier and faster(?) to store the value 599 * into stack and then use ldbrx 600 * 601 * ctx->seen will be reliable in pass2, but 602 * the instructions generated will remain the 603 * same across all passes 604 */ 605 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 606 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); 607 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); 608 break; 609 } 610 break; 611 612 emit_clear: 613 switch (imm) { 614 case 16: 615 /* zero-extend 16 bits into 64 bits */ 616 PPC_RLDICL(dst_reg, dst_reg, 0, 48); 617 break; 618 case 32: 619 /* zero-extend 32 bits into 64 bits */ 620 PPC_RLDICL(dst_reg, dst_reg, 0, 32); 621 break; 622 case 64: 623 /* nop */ 624 break; 625 } 626 break; 627 628 /* 629 * BPF_ST(X) 630 */ 631 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ 632 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ 633 if (BPF_CLASS(code) == BPF_ST) { 634 PPC_LI(b2p[TMP_REG_1], imm); 635 src_reg = b2p[TMP_REG_1]; 636 } 637 PPC_STB(src_reg, dst_reg, off); 638 break; 639 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ 640 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ 641 if (BPF_CLASS(code) == BPF_ST) { 642 PPC_LI(b2p[TMP_REG_1], imm); 643 src_reg = b2p[TMP_REG_1]; 644 } 645 PPC_STH(src_reg, dst_reg, off); 646 break; 647 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ 648 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ 649 if (BPF_CLASS(code) == BPF_ST) { 650 PPC_LI32(b2p[TMP_REG_1], imm); 651 src_reg = b2p[TMP_REG_1]; 652 } 653 PPC_STW(src_reg, dst_reg, off); 654 break; 655 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ 656 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ 657 if (BPF_CLASS(code) == BPF_ST) { 658 PPC_LI32(b2p[TMP_REG_1], imm); 659 src_reg = b2p[TMP_REG_1]; 660 } 661 PPC_BPF_STL(src_reg, dst_reg, off); 662 break; 663 664 /* 665 * BPF_STX XADD (atomic_add) 666 */ 667 /* *(u32 *)(dst + off) += src */ 668 case BPF_STX | BPF_XADD | BPF_W: 669 /* Get EA into TMP_REG_1 */ 670 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); 671 tmp_idx = ctx->idx * 4; 672 /* load value from memory into TMP_REG_2 */ 673 PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); 674 /* add value from src_reg into this */ 675 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); 676 /* store result back */ 677 PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); 678 /* we're done if this succeeded */ 679 PPC_BCC_SHORT(COND_NE, tmp_idx); 680 break; 681 /* *(u64 *)(dst + off) += src */ 682 case BPF_STX | BPF_XADD | BPF_DW: 683 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); 684 tmp_idx = ctx->idx * 4; 685 PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); 686 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); 687 PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); 688 PPC_BCC_SHORT(COND_NE, tmp_idx); 689 break; 690 691 /* 692 * BPF_LDX 693 */ 694 /* dst = *(u8 *)(ul) (src + off) */ 695 case BPF_LDX | BPF_MEM | BPF_B: 696 PPC_LBZ(dst_reg, src_reg, off); 697 break; 698 /* dst = *(u16 *)(ul) (src + off) */ 699 case BPF_LDX | BPF_MEM | BPF_H: 700 PPC_LHZ(dst_reg, src_reg, off); 701 break; 702 /* dst = *(u32 *)(ul) (src + off) */ 703 case BPF_LDX | BPF_MEM | BPF_W: 704 PPC_LWZ(dst_reg, src_reg, off); 705 break; 706 /* dst = *(u64 *)(ul) (src + off) */ 707 case BPF_LDX | BPF_MEM | BPF_DW: 708 PPC_BPF_LL(dst_reg, src_reg, off); 709 break; 710 711 /* 712 * Doubleword load 713 * 16 byte instruction that uses two 'struct bpf_insn' 714 */ 715 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 716 imm64 = ((u64)(u32) insn[i].imm) | 717 (((u64)(u32) insn[i+1].imm) << 32); 718 /* Adjust for two bpf instructions */ 719 addrs[++i] = ctx->idx * 4; 720 PPC_LI64(dst_reg, imm64); 721 break; 722 723 /* 724 * Return/Exit 725 */ 726 case BPF_JMP | BPF_EXIT: 727 /* 728 * If this isn't the very last instruction, branch to 729 * the epilogue. If we _are_ the last instruction, 730 * we'll just fall through to the epilogue. 731 */ 732 if (i != flen - 1) 733 PPC_JMP(exit_addr); 734 /* else fall through to the epilogue */ 735 break; 736 737 /* 738 * Call kernel helper or bpf function 739 */ 740 case BPF_JMP | BPF_CALL: 741 ctx->seen |= SEEN_FUNC; 742 743 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 744 &func_addr, &func_addr_fixed); 745 if (ret < 0) 746 return ret; 747 748 if (func_addr_fixed) 749 bpf_jit_emit_func_call_hlp(image, ctx, func_addr); 750 else 751 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 752 /* move return value from r3 to BPF_REG_0 */ 753 PPC_MR(b2p[BPF_REG_0], 3); 754 break; 755 756 /* 757 * Jumps and branches 758 */ 759 case BPF_JMP | BPF_JA: 760 PPC_JMP(addrs[i + 1 + off]); 761 break; 762 763 case BPF_JMP | BPF_JGT | BPF_K: 764 case BPF_JMP | BPF_JGT | BPF_X: 765 case BPF_JMP | BPF_JSGT | BPF_K: 766 case BPF_JMP | BPF_JSGT | BPF_X: 767 case BPF_JMP32 | BPF_JGT | BPF_K: 768 case BPF_JMP32 | BPF_JGT | BPF_X: 769 case BPF_JMP32 | BPF_JSGT | BPF_K: 770 case BPF_JMP32 | BPF_JSGT | BPF_X: 771 true_cond = COND_GT; 772 goto cond_branch; 773 case BPF_JMP | BPF_JLT | BPF_K: 774 case BPF_JMP | BPF_JLT | BPF_X: 775 case BPF_JMP | BPF_JSLT | BPF_K: 776 case BPF_JMP | BPF_JSLT | BPF_X: 777 case BPF_JMP32 | BPF_JLT | BPF_K: 778 case BPF_JMP32 | BPF_JLT | BPF_X: 779 case BPF_JMP32 | BPF_JSLT | BPF_K: 780 case BPF_JMP32 | BPF_JSLT | BPF_X: 781 true_cond = COND_LT; 782 goto cond_branch; 783 case BPF_JMP | BPF_JGE | BPF_K: 784 case BPF_JMP | BPF_JGE | BPF_X: 785 case BPF_JMP | BPF_JSGE | BPF_K: 786 case BPF_JMP | BPF_JSGE | BPF_X: 787 case BPF_JMP32 | BPF_JGE | BPF_K: 788 case BPF_JMP32 | BPF_JGE | BPF_X: 789 case BPF_JMP32 | BPF_JSGE | BPF_K: 790 case BPF_JMP32 | BPF_JSGE | BPF_X: 791 true_cond = COND_GE; 792 goto cond_branch; 793 case BPF_JMP | BPF_JLE | BPF_K: 794 case BPF_JMP | BPF_JLE | BPF_X: 795 case BPF_JMP | BPF_JSLE | BPF_K: 796 case BPF_JMP | BPF_JSLE | BPF_X: 797 case BPF_JMP32 | BPF_JLE | BPF_K: 798 case BPF_JMP32 | BPF_JLE | BPF_X: 799 case BPF_JMP32 | BPF_JSLE | BPF_K: 800 case BPF_JMP32 | BPF_JSLE | BPF_X: 801 true_cond = COND_LE; 802 goto cond_branch; 803 case BPF_JMP | BPF_JEQ | BPF_K: 804 case BPF_JMP | BPF_JEQ | BPF_X: 805 case BPF_JMP32 | BPF_JEQ | BPF_K: 806 case BPF_JMP32 | BPF_JEQ | BPF_X: 807 true_cond = COND_EQ; 808 goto cond_branch; 809 case BPF_JMP | BPF_JNE | BPF_K: 810 case BPF_JMP | BPF_JNE | BPF_X: 811 case BPF_JMP32 | BPF_JNE | BPF_K: 812 case BPF_JMP32 | BPF_JNE | BPF_X: 813 true_cond = COND_NE; 814 goto cond_branch; 815 case BPF_JMP | BPF_JSET | BPF_K: 816 case BPF_JMP | BPF_JSET | BPF_X: 817 case BPF_JMP32 | BPF_JSET | BPF_K: 818 case BPF_JMP32 | BPF_JSET | BPF_X: 819 true_cond = COND_NE; 820 /* Fall through */ 821 822 cond_branch: 823 switch (code) { 824 case BPF_JMP | BPF_JGT | BPF_X: 825 case BPF_JMP | BPF_JLT | BPF_X: 826 case BPF_JMP | BPF_JGE | BPF_X: 827 case BPF_JMP | BPF_JLE | BPF_X: 828 case BPF_JMP | BPF_JEQ | BPF_X: 829 case BPF_JMP | BPF_JNE | BPF_X: 830 case BPF_JMP32 | BPF_JGT | BPF_X: 831 case BPF_JMP32 | BPF_JLT | BPF_X: 832 case BPF_JMP32 | BPF_JGE | BPF_X: 833 case BPF_JMP32 | BPF_JLE | BPF_X: 834 case BPF_JMP32 | BPF_JEQ | BPF_X: 835 case BPF_JMP32 | BPF_JNE | BPF_X: 836 /* unsigned comparison */ 837 if (BPF_CLASS(code) == BPF_JMP32) 838 PPC_CMPLW(dst_reg, src_reg); 839 else 840 PPC_CMPLD(dst_reg, src_reg); 841 break; 842 case BPF_JMP | BPF_JSGT | BPF_X: 843 case BPF_JMP | BPF_JSLT | BPF_X: 844 case BPF_JMP | BPF_JSGE | BPF_X: 845 case BPF_JMP | BPF_JSLE | BPF_X: 846 case BPF_JMP32 | BPF_JSGT | BPF_X: 847 case BPF_JMP32 | BPF_JSLT | BPF_X: 848 case BPF_JMP32 | BPF_JSGE | BPF_X: 849 case BPF_JMP32 | BPF_JSLE | BPF_X: 850 /* signed comparison */ 851 if (BPF_CLASS(code) == BPF_JMP32) 852 PPC_CMPW(dst_reg, src_reg); 853 else 854 PPC_CMPD(dst_reg, src_reg); 855 break; 856 case BPF_JMP | BPF_JSET | BPF_X: 857 case BPF_JMP32 | BPF_JSET | BPF_X: 858 if (BPF_CLASS(code) == BPF_JMP) { 859 PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, 860 src_reg); 861 } else { 862 int tmp_reg = b2p[TMP_REG_1]; 863 864 PPC_AND(tmp_reg, dst_reg, src_reg); 865 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, 866 31); 867 } 868 break; 869 case BPF_JMP | BPF_JNE | BPF_K: 870 case BPF_JMP | BPF_JEQ | BPF_K: 871 case BPF_JMP | BPF_JGT | BPF_K: 872 case BPF_JMP | BPF_JLT | BPF_K: 873 case BPF_JMP | BPF_JGE | BPF_K: 874 case BPF_JMP | BPF_JLE | BPF_K: 875 case BPF_JMP32 | BPF_JNE | BPF_K: 876 case BPF_JMP32 | BPF_JEQ | BPF_K: 877 case BPF_JMP32 | BPF_JGT | BPF_K: 878 case BPF_JMP32 | BPF_JLT | BPF_K: 879 case BPF_JMP32 | BPF_JGE | BPF_K: 880 case BPF_JMP32 | BPF_JLE | BPF_K: 881 { 882 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 883 884 /* 885 * Need sign-extended load, so only positive 886 * values can be used as imm in cmpldi 887 */ 888 if (imm >= 0 && imm < 32768) { 889 if (is_jmp32) 890 PPC_CMPLWI(dst_reg, imm); 891 else 892 PPC_CMPLDI(dst_reg, imm); 893 } else { 894 /* sign-extending load */ 895 PPC_LI32(b2p[TMP_REG_1], imm); 896 /* ... but unsigned comparison */ 897 if (is_jmp32) 898 PPC_CMPLW(dst_reg, 899 b2p[TMP_REG_1]); 900 else 901 PPC_CMPLD(dst_reg, 902 b2p[TMP_REG_1]); 903 } 904 break; 905 } 906 case BPF_JMP | BPF_JSGT | BPF_K: 907 case BPF_JMP | BPF_JSLT | BPF_K: 908 case BPF_JMP | BPF_JSGE | BPF_K: 909 case BPF_JMP | BPF_JSLE | BPF_K: 910 case BPF_JMP32 | BPF_JSGT | BPF_K: 911 case BPF_JMP32 | BPF_JSLT | BPF_K: 912 case BPF_JMP32 | BPF_JSGE | BPF_K: 913 case BPF_JMP32 | BPF_JSLE | BPF_K: 914 { 915 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 916 917 /* 918 * signed comparison, so any 16-bit value 919 * can be used in cmpdi 920 */ 921 if (imm >= -32768 && imm < 32768) { 922 if (is_jmp32) 923 PPC_CMPWI(dst_reg, imm); 924 else 925 PPC_CMPDI(dst_reg, imm); 926 } else { 927 PPC_LI32(b2p[TMP_REG_1], imm); 928 if (is_jmp32) 929 PPC_CMPW(dst_reg, 930 b2p[TMP_REG_1]); 931 else 932 PPC_CMPD(dst_reg, 933 b2p[TMP_REG_1]); 934 } 935 break; 936 } 937 case BPF_JMP | BPF_JSET | BPF_K: 938 case BPF_JMP32 | BPF_JSET | BPF_K: 939 /* andi does not sign-extend the immediate */ 940 if (imm >= 0 && imm < 32768) 941 /* PPC_ANDI is _only/always_ dot-form */ 942 PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); 943 else { 944 int tmp_reg = b2p[TMP_REG_1]; 945 946 PPC_LI32(tmp_reg, imm); 947 if (BPF_CLASS(code) == BPF_JMP) { 948 PPC_AND_DOT(tmp_reg, dst_reg, 949 tmp_reg); 950 } else { 951 PPC_AND(tmp_reg, dst_reg, 952 tmp_reg); 953 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 954 0, 0, 31); 955 } 956 } 957 break; 958 } 959 PPC_BCC(true_cond, addrs[i + 1 + off]); 960 break; 961 962 /* 963 * Tail call 964 */ 965 case BPF_JMP | BPF_TAIL_CALL: 966 ctx->seen |= SEEN_TAILCALL; 967 bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); 968 break; 969 970 default: 971 /* 972 * The filter contains something cruel & unusual. 973 * We don't handle it, but also there shouldn't be 974 * anything missing from our list. 975 */ 976 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", 977 code, i); 978 return -ENOTSUPP; 979 } 980 } 981 982 /* Set end-of-body-code address for exit. */ 983 addrs[i] = ctx->idx * 4; 984 985 return 0; 986 } 987 988 /* Fix the branch target addresses for subprog calls */ 989 static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, 990 struct codegen_context *ctx, u32 *addrs) 991 { 992 const struct bpf_insn *insn = fp->insnsi; 993 bool func_addr_fixed; 994 u64 func_addr; 995 u32 tmp_idx; 996 int i, ret; 997 998 for (i = 0; i < fp->len; i++) { 999 /* 1000 * During the extra pass, only the branch target addresses for 1001 * the subprog calls need to be fixed. All other instructions 1002 * can left untouched. 1003 * 1004 * The JITed image length does not change because we already 1005 * ensure that the JITed instruction sequence for these calls 1006 * are of fixed length by padding them with NOPs. 1007 */ 1008 if (insn[i].code == (BPF_JMP | BPF_CALL) && 1009 insn[i].src_reg == BPF_PSEUDO_CALL) { 1010 ret = bpf_jit_get_func_addr(fp, &insn[i], true, 1011 &func_addr, 1012 &func_addr_fixed); 1013 if (ret < 0) 1014 return ret; 1015 1016 /* 1017 * Save ctx->idx as this would currently point to the 1018 * end of the JITed image and set it to the offset of 1019 * the instruction sequence corresponding to the 1020 * subprog call temporarily. 1021 */ 1022 tmp_idx = ctx->idx; 1023 ctx->idx = addrs[i] / 4; 1024 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 1025 1026 /* 1027 * Restore ctx->idx here. This is safe as the length 1028 * of the JITed sequence remains unchanged. 1029 */ 1030 ctx->idx = tmp_idx; 1031 } 1032 } 1033 1034 return 0; 1035 } 1036 1037 struct powerpc64_jit_data { 1038 struct bpf_binary_header *header; 1039 u32 *addrs; 1040 u8 *image; 1041 u32 proglen; 1042 struct codegen_context ctx; 1043 }; 1044 1045 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 1046 { 1047 u32 proglen; 1048 u32 alloclen; 1049 u8 *image = NULL; 1050 u32 *code_base; 1051 u32 *addrs; 1052 struct powerpc64_jit_data *jit_data; 1053 struct codegen_context cgctx; 1054 int pass; 1055 int flen; 1056 struct bpf_binary_header *bpf_hdr; 1057 struct bpf_prog *org_fp = fp; 1058 struct bpf_prog *tmp_fp; 1059 bool bpf_blinded = false; 1060 bool extra_pass = false; 1061 1062 if (!fp->jit_requested) 1063 return org_fp; 1064 1065 tmp_fp = bpf_jit_blind_constants(org_fp); 1066 if (IS_ERR(tmp_fp)) 1067 return org_fp; 1068 1069 if (tmp_fp != org_fp) { 1070 bpf_blinded = true; 1071 fp = tmp_fp; 1072 } 1073 1074 jit_data = fp->aux->jit_data; 1075 if (!jit_data) { 1076 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1077 if (!jit_data) { 1078 fp = org_fp; 1079 goto out; 1080 } 1081 fp->aux->jit_data = jit_data; 1082 } 1083 1084 flen = fp->len; 1085 addrs = jit_data->addrs; 1086 if (addrs) { 1087 cgctx = jit_data->ctx; 1088 image = jit_data->image; 1089 bpf_hdr = jit_data->header; 1090 proglen = jit_data->proglen; 1091 alloclen = proglen + FUNCTION_DESCR_SIZE; 1092 extra_pass = true; 1093 goto skip_init_ctx; 1094 } 1095 1096 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 1097 if (addrs == NULL) { 1098 fp = org_fp; 1099 goto out_addrs; 1100 } 1101 1102 memset(&cgctx, 0, sizeof(struct codegen_context)); 1103 1104 /* Make sure that the stack is quadword aligned. */ 1105 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 1106 1107 /* Scouting faux-generate pass 0 */ 1108 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1109 /* We hit something illegal or unsupported. */ 1110 fp = org_fp; 1111 goto out_addrs; 1112 } 1113 1114 /* 1115 * Pretend to build prologue, given the features we've seen. This will 1116 * update ctgtx.idx as it pretends to output instructions, then we can 1117 * calculate total size from idx. 1118 */ 1119 bpf_jit_build_prologue(0, &cgctx); 1120 bpf_jit_build_epilogue(0, &cgctx); 1121 1122 proglen = cgctx.idx * 4; 1123 alloclen = proglen + FUNCTION_DESCR_SIZE; 1124 1125 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, 1126 bpf_jit_fill_ill_insns); 1127 if (!bpf_hdr) { 1128 fp = org_fp; 1129 goto out_addrs; 1130 } 1131 1132 skip_init_ctx: 1133 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 1134 1135 if (extra_pass) { 1136 /* 1137 * Do not touch the prologue and epilogue as they will remain 1138 * unchanged. Only fix the branch target address for subprog 1139 * calls in the body. 1140 * 1141 * This does not change the offsets and lengths of the subprog 1142 * call instruction sequences and hence, the size of the JITed 1143 * image as well. 1144 */ 1145 bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); 1146 1147 /* There is no need to perform the usual passes. */ 1148 goto skip_codegen_passes; 1149 } 1150 1151 /* Code generation passes 1-2 */ 1152 for (pass = 1; pass < 3; pass++) { 1153 /* Now build the prologue, body code & epilogue for real. */ 1154 cgctx.idx = 0; 1155 bpf_jit_build_prologue(code_base, &cgctx); 1156 bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); 1157 bpf_jit_build_epilogue(code_base, &cgctx); 1158 1159 if (bpf_jit_enable > 1) 1160 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 1161 proglen - (cgctx.idx * 4), cgctx.seen); 1162 } 1163 1164 skip_codegen_passes: 1165 if (bpf_jit_enable > 1) 1166 /* 1167 * Note that we output the base address of the code_base 1168 * rather than image, since opcodes are in code_base. 1169 */ 1170 bpf_jit_dump(flen, proglen, pass, code_base); 1171 1172 #ifdef PPC64_ELF_ABI_v1 1173 /* Function descriptor nastiness: Address + TOC */ 1174 ((u64 *)image)[0] = (u64)code_base; 1175 ((u64 *)image)[1] = local_paca->kernel_toc; 1176 #endif 1177 1178 fp->bpf_func = (void *)image; 1179 fp->jited = 1; 1180 fp->jited_len = alloclen; 1181 1182 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 1183 if (!fp->is_func || extra_pass) { 1184 bpf_prog_fill_jited_linfo(fp, addrs); 1185 out_addrs: 1186 kfree(addrs); 1187 kfree(jit_data); 1188 fp->aux->jit_data = NULL; 1189 } else { 1190 jit_data->addrs = addrs; 1191 jit_data->ctx = cgctx; 1192 jit_data->proglen = proglen; 1193 jit_data->image = image; 1194 jit_data->header = bpf_hdr; 1195 } 1196 1197 out: 1198 if (bpf_blinded) 1199 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 1200 1201 return fp; 1202 } 1203 1204 /* Overriding bpf_jit_free() as we don't set images read-only. */ 1205 void bpf_jit_free(struct bpf_prog *fp) 1206 { 1207 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 1208 struct bpf_binary_header *bpf_hdr = (void *)addr; 1209 1210 if (fp->jited) 1211 bpf_jit_binary_free(bpf_hdr); 1212 1213 bpf_prog_unlock_free(fp); 1214 } 1215