1 /* 2 * bpf_jit_comp64.c: eBPF JIT compiler 3 * 4 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 5 * IBM Corporation 6 * 7 * Based on the powerpc classic BPF JIT compiler by Matt Evans 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; version 2 12 * of the License. 13 */ 14 #include <linux/moduleloader.h> 15 #include <asm/cacheflush.h> 16 #include <asm/asm-compat.h> 17 #include <linux/netdevice.h> 18 #include <linux/filter.h> 19 #include <linux/if_vlan.h> 20 #include <asm/kprobes.h> 21 #include <linux/bpf.h> 22 23 #include "bpf_jit64.h" 24 25 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 26 { 27 memset32(area, BREAKPOINT_INSTRUCTION, size/4); 28 } 29 30 static inline void bpf_flush_icache(void *start, void *end) 31 { 32 smp_wmb(); 33 flush_icache_range((unsigned long)start, (unsigned long)end); 34 } 35 36 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) 37 { 38 return (ctx->seen & (1 << (31 - b2p[i]))); 39 } 40 41 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) 42 { 43 ctx->seen |= (1 << (31 - b2p[i])); 44 } 45 46 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 47 { 48 /* 49 * We only need a stack frame if: 50 * - we call other functions (kernel helpers), or 51 * - the bpf program uses its stack area 52 * The latter condition is deduced from the usage of BPF_REG_FP 53 */ 54 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); 55 } 56 57 /* 58 * When not setting up our own stackframe, the redzone usage is: 59 * 60 * [ prev sp ] <------------- 61 * [ ... ] | 62 * sp (r1) ---> [ stack pointer ] -------------- 63 * [ nv gpr save area ] 6*8 64 * [ tail_call_cnt ] 8 65 * [ local_tmp_var ] 8 66 * [ unused red zone ] 208 bytes protected 67 */ 68 static int bpf_jit_stack_local(struct codegen_context *ctx) 69 { 70 if (bpf_has_stack_frame(ctx)) 71 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 72 else 73 return -(BPF_PPC_STACK_SAVE + 16); 74 } 75 76 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 77 { 78 return bpf_jit_stack_local(ctx) + 8; 79 } 80 81 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 82 { 83 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 84 return (bpf_has_stack_frame(ctx) ? 85 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) 86 - (8 * (32 - reg)); 87 88 pr_err("BPF JIT is asking about unknown registers"); 89 BUG(); 90 } 91 92 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 93 { 94 int i; 95 96 /* 97 * Initialize tail_call_cnt if we do tail calls. 98 * Otherwise, put in NOPs so that it can be skipped when we are 99 * invoked through a tail call. 100 */ 101 if (ctx->seen & SEEN_TAILCALL) { 102 PPC_LI(b2p[TMP_REG_1], 0); 103 /* this goes in the redzone */ 104 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); 105 } else { 106 PPC_NOP(); 107 PPC_NOP(); 108 } 109 110 #define BPF_TAILCALL_PROLOGUE_SIZE 8 111 112 if (bpf_has_stack_frame(ctx)) { 113 /* 114 * We need a stack frame, but we don't necessarily need to 115 * save/restore LR unless we call other functions 116 */ 117 if (ctx->seen & SEEN_FUNC) { 118 EMIT(PPC_INST_MFLR | __PPC_RT(R0)); 119 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 120 } 121 122 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); 123 } 124 125 /* 126 * Back up non-volatile regs -- BPF registers 6-10 127 * If we haven't created our own stack frame, we save these 128 * in the protected zone below the previous stack frame 129 */ 130 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 131 if (bpf_is_seen_register(ctx, i)) 132 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 133 134 /* Setup frame pointer to point to the bpf stack area */ 135 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 136 PPC_ADDI(b2p[BPF_REG_FP], 1, 137 STACK_FRAME_MIN_SIZE + ctx->stack_size); 138 } 139 140 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 141 { 142 int i; 143 144 /* Restore NVRs */ 145 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 146 if (bpf_is_seen_register(ctx, i)) 147 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 148 149 /* Tear down our stack frame */ 150 if (bpf_has_stack_frame(ctx)) { 151 PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); 152 if (ctx->seen & SEEN_FUNC) { 153 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 154 PPC_MTLR(0); 155 } 156 } 157 } 158 159 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 160 { 161 bpf_jit_emit_common_epilogue(image, ctx); 162 163 /* Move result to r3 */ 164 PPC_MR(3, b2p[BPF_REG_0]); 165 166 PPC_BLR(); 167 } 168 169 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, 170 u64 func) 171 { 172 #ifdef PPC64_ELF_ABI_v1 173 /* func points to the function descriptor */ 174 PPC_LI64(b2p[TMP_REG_2], func); 175 /* Load actual entry point from function descriptor */ 176 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); 177 /* ... and move it to LR */ 178 PPC_MTLR(b2p[TMP_REG_1]); 179 /* 180 * Load TOC from function descriptor at offset 8. 181 * We can clobber r2 since we get called through a 182 * function pointer (so caller will save/restore r2) 183 * and since we don't use a TOC ourself. 184 */ 185 PPC_BPF_LL(2, b2p[TMP_REG_2], 8); 186 #else 187 /* We can clobber r12 */ 188 PPC_FUNC_ADDR(12, func); 189 PPC_MTLR(12); 190 #endif 191 PPC_BLRL(); 192 } 193 194 static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, 195 u64 func) 196 { 197 unsigned int i, ctx_idx = ctx->idx; 198 199 /* Load function address into r12 */ 200 PPC_LI64(12, func); 201 202 /* For bpf-to-bpf function calls, the callee's address is unknown 203 * until the last extra pass. As seen above, we use PPC_LI64() to 204 * load the callee's address, but this may optimize the number of 205 * instructions required based on the nature of the address. 206 * 207 * Since we don't want the number of instructions emitted to change, 208 * we pad the optimized PPC_LI64() call with NOPs to guarantee that 209 * we always have a five-instruction sequence, which is the maximum 210 * that PPC_LI64() can emit. 211 */ 212 for (i = ctx->idx - ctx_idx; i < 5; i++) 213 PPC_NOP(); 214 215 #ifdef PPC64_ELF_ABI_v1 216 /* 217 * Load TOC from function descriptor at offset 8. 218 * We can clobber r2 since we get called through a 219 * function pointer (so caller will save/restore r2) 220 * and since we don't use a TOC ourself. 221 */ 222 PPC_BPF_LL(2, 12, 8); 223 /* Load actual entry point from function descriptor */ 224 PPC_BPF_LL(12, 12, 0); 225 #endif 226 227 PPC_MTLR(12); 228 PPC_BLRL(); 229 } 230 231 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) 232 { 233 /* 234 * By now, the eBPF program has already setup parameters in r3, r4 and r5 235 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program 236 * r4/BPF_REG_2 - pointer to bpf_array 237 * r5/BPF_REG_3 - index in bpf_array 238 */ 239 int b2p_bpf_array = b2p[BPF_REG_2]; 240 int b2p_index = b2p[BPF_REG_3]; 241 242 /* 243 * if (index >= array->map.max_entries) 244 * goto out; 245 */ 246 PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); 247 PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); 248 PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); 249 PPC_BCC(COND_GE, out); 250 251 /* 252 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 253 * goto out; 254 */ 255 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 256 PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); 257 PPC_BCC(COND_GT, out); 258 259 /* 260 * tail_call_cnt++; 261 */ 262 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); 263 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 264 265 /* prog = array->ptrs[index]; */ 266 PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); 267 PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); 268 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 269 270 /* 271 * if (prog == NULL) 272 * goto out; 273 */ 274 PPC_CMPLDI(b2p[TMP_REG_1], 0); 275 PPC_BCC(COND_EQ, out); 276 277 /* goto *(prog->bpf_func + prologue_size); */ 278 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 279 #ifdef PPC64_ELF_ABI_v1 280 /* skip past the function descriptor */ 281 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 282 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); 283 #else 284 PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); 285 #endif 286 PPC_MTCTR(b2p[TMP_REG_1]); 287 288 /* tear down stack, restore NVRs, ... */ 289 bpf_jit_emit_common_epilogue(image, ctx); 290 291 PPC_BCTR(); 292 /* out: */ 293 } 294 295 /* Assemble the body code between the prologue & epilogue */ 296 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 297 struct codegen_context *ctx, 298 u32 *addrs, bool extra_pass) 299 { 300 const struct bpf_insn *insn = fp->insnsi; 301 int flen = fp->len; 302 int i, ret; 303 304 /* Start of epilogue code - will only be valid 2nd pass onwards */ 305 u32 exit_addr = addrs[flen]; 306 307 for (i = 0; i < flen; i++) { 308 u32 code = insn[i].code; 309 u32 dst_reg = b2p[insn[i].dst_reg]; 310 u32 src_reg = b2p[insn[i].src_reg]; 311 s16 off = insn[i].off; 312 s32 imm = insn[i].imm; 313 bool func_addr_fixed; 314 u64 func_addr; 315 u64 imm64; 316 u32 true_cond; 317 u32 tmp_idx; 318 319 /* 320 * addrs[] maps a BPF bytecode address into a real offset from 321 * the start of the body code. 322 */ 323 addrs[i] = ctx->idx * 4; 324 325 /* 326 * As an optimization, we note down which non-volatile registers 327 * are used so that we can only save/restore those in our 328 * prologue and epilogue. We do this here regardless of whether 329 * the actual BPF instruction uses src/dst registers or not 330 * (for instance, BPF_CALL does not use them). The expectation 331 * is that those instructions will have src_reg/dst_reg set to 332 * 0. Even otherwise, we just lose some prologue/epilogue 333 * optimization but everything else should work without 334 * any issues. 335 */ 336 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) 337 bpf_set_seen_register(ctx, insn[i].dst_reg); 338 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) 339 bpf_set_seen_register(ctx, insn[i].src_reg); 340 341 switch (code) { 342 /* 343 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG 344 */ 345 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ 346 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ 347 PPC_ADD(dst_reg, dst_reg, src_reg); 348 goto bpf_alu32_trunc; 349 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ 350 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ 351 PPC_SUB(dst_reg, dst_reg, src_reg); 352 goto bpf_alu32_trunc; 353 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ 354 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ 355 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ 356 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ 357 if (BPF_OP(code) == BPF_SUB) 358 imm = -imm; 359 if (imm) { 360 if (imm >= -32768 && imm < 32768) 361 PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); 362 else { 363 PPC_LI32(b2p[TMP_REG_1], imm); 364 PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); 365 } 366 } 367 goto bpf_alu32_trunc; 368 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ 369 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ 370 if (BPF_CLASS(code) == BPF_ALU) 371 PPC_MULW(dst_reg, dst_reg, src_reg); 372 else 373 PPC_MULD(dst_reg, dst_reg, src_reg); 374 goto bpf_alu32_trunc; 375 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ 376 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ 377 if (imm >= -32768 && imm < 32768) 378 PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); 379 else { 380 PPC_LI32(b2p[TMP_REG_1], imm); 381 if (BPF_CLASS(code) == BPF_ALU) 382 PPC_MULW(dst_reg, dst_reg, 383 b2p[TMP_REG_1]); 384 else 385 PPC_MULD(dst_reg, dst_reg, 386 b2p[TMP_REG_1]); 387 } 388 goto bpf_alu32_trunc; 389 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ 390 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ 391 if (BPF_OP(code) == BPF_MOD) { 392 PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); 393 PPC_MULW(b2p[TMP_REG_1], src_reg, 394 b2p[TMP_REG_1]); 395 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); 396 } else 397 PPC_DIVWU(dst_reg, dst_reg, src_reg); 398 goto bpf_alu32_trunc; 399 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ 400 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ 401 if (BPF_OP(code) == BPF_MOD) { 402 PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); 403 PPC_MULD(b2p[TMP_REG_1], src_reg, 404 b2p[TMP_REG_1]); 405 PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); 406 } else 407 PPC_DIVD(dst_reg, dst_reg, src_reg); 408 break; 409 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ 410 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ 411 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ 412 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ 413 if (imm == 0) 414 return -EINVAL; 415 else if (imm == 1) 416 goto bpf_alu32_trunc; 417 418 PPC_LI32(b2p[TMP_REG_1], imm); 419 switch (BPF_CLASS(code)) { 420 case BPF_ALU: 421 if (BPF_OP(code) == BPF_MOD) { 422 PPC_DIVWU(b2p[TMP_REG_2], dst_reg, 423 b2p[TMP_REG_1]); 424 PPC_MULW(b2p[TMP_REG_1], 425 b2p[TMP_REG_1], 426 b2p[TMP_REG_2]); 427 PPC_SUB(dst_reg, dst_reg, 428 b2p[TMP_REG_1]); 429 } else 430 PPC_DIVWU(dst_reg, dst_reg, 431 b2p[TMP_REG_1]); 432 break; 433 case BPF_ALU64: 434 if (BPF_OP(code) == BPF_MOD) { 435 PPC_DIVD(b2p[TMP_REG_2], dst_reg, 436 b2p[TMP_REG_1]); 437 PPC_MULD(b2p[TMP_REG_1], 438 b2p[TMP_REG_1], 439 b2p[TMP_REG_2]); 440 PPC_SUB(dst_reg, dst_reg, 441 b2p[TMP_REG_1]); 442 } else 443 PPC_DIVD(dst_reg, dst_reg, 444 b2p[TMP_REG_1]); 445 break; 446 } 447 goto bpf_alu32_trunc; 448 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ 449 case BPF_ALU64 | BPF_NEG: /* dst = -dst */ 450 PPC_NEG(dst_reg, dst_reg); 451 goto bpf_alu32_trunc; 452 453 /* 454 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH 455 */ 456 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ 457 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ 458 PPC_AND(dst_reg, dst_reg, src_reg); 459 goto bpf_alu32_trunc; 460 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ 461 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 462 if (!IMM_H(imm)) 463 PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); 464 else { 465 /* Sign-extended */ 466 PPC_LI32(b2p[TMP_REG_1], imm); 467 PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); 468 } 469 goto bpf_alu32_trunc; 470 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ 471 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ 472 PPC_OR(dst_reg, dst_reg, src_reg); 473 goto bpf_alu32_trunc; 474 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ 475 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ 476 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 477 /* Sign-extended */ 478 PPC_LI32(b2p[TMP_REG_1], imm); 479 PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); 480 } else { 481 if (IMM_L(imm)) 482 PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); 483 if (IMM_H(imm)) 484 PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); 485 } 486 goto bpf_alu32_trunc; 487 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ 488 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ 489 PPC_XOR(dst_reg, dst_reg, src_reg); 490 goto bpf_alu32_trunc; 491 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ 492 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ 493 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 494 /* Sign-extended */ 495 PPC_LI32(b2p[TMP_REG_1], imm); 496 PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); 497 } else { 498 if (IMM_L(imm)) 499 PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); 500 if (IMM_H(imm)) 501 PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); 502 } 503 goto bpf_alu32_trunc; 504 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 505 /* slw clears top 32 bits */ 506 PPC_SLW(dst_reg, dst_reg, src_reg); 507 break; 508 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 509 PPC_SLD(dst_reg, dst_reg, src_reg); 510 break; 511 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 512 /* with imm 0, we still need to clear top 32 bits */ 513 PPC_SLWI(dst_reg, dst_reg, imm); 514 break; 515 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 516 if (imm != 0) 517 PPC_SLDI(dst_reg, dst_reg, imm); 518 break; 519 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 520 PPC_SRW(dst_reg, dst_reg, src_reg); 521 break; 522 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 523 PPC_SRD(dst_reg, dst_reg, src_reg); 524 break; 525 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 526 PPC_SRWI(dst_reg, dst_reg, imm); 527 break; 528 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 529 if (imm != 0) 530 PPC_SRDI(dst_reg, dst_reg, imm); 531 break; 532 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ 533 PPC_SRAW(dst_reg, dst_reg, src_reg); 534 goto bpf_alu32_trunc; 535 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ 536 PPC_SRAD(dst_reg, dst_reg, src_reg); 537 break; 538 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ 539 PPC_SRAWI(dst_reg, dst_reg, imm); 540 goto bpf_alu32_trunc; 541 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ 542 if (imm != 0) 543 PPC_SRADI(dst_reg, dst_reg, imm); 544 break; 545 546 /* 547 * MOV 548 */ 549 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 550 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 551 PPC_MR(dst_reg, src_reg); 552 goto bpf_alu32_trunc; 553 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 554 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ 555 PPC_LI32(dst_reg, imm); 556 if (imm < 0) 557 goto bpf_alu32_trunc; 558 break; 559 560 bpf_alu32_trunc: 561 /* Truncate to 32-bits */ 562 if (BPF_CLASS(code) == BPF_ALU) 563 PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); 564 break; 565 566 /* 567 * BPF_FROM_BE/LE 568 */ 569 case BPF_ALU | BPF_END | BPF_FROM_LE: 570 case BPF_ALU | BPF_END | BPF_FROM_BE: 571 #ifdef __BIG_ENDIAN__ 572 if (BPF_SRC(code) == BPF_FROM_BE) 573 goto emit_clear; 574 #else /* !__BIG_ENDIAN__ */ 575 if (BPF_SRC(code) == BPF_FROM_LE) 576 goto emit_clear; 577 #endif 578 switch (imm) { 579 case 16: 580 /* Rotate 8 bits left & mask with 0x0000ff00 */ 581 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); 582 /* Rotate 8 bits right & insert LSB to reg */ 583 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); 584 /* Move result back to dst_reg */ 585 PPC_MR(dst_reg, b2p[TMP_REG_1]); 586 break; 587 case 32: 588 /* 589 * Rotate word left by 8 bits: 590 * 2 bytes are already in their final position 591 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) 592 */ 593 PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); 594 /* Rotate 24 bits and insert byte 1 */ 595 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); 596 /* Rotate 24 bits and insert byte 3 */ 597 PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); 598 PPC_MR(dst_reg, b2p[TMP_REG_1]); 599 break; 600 case 64: 601 /* 602 * Way easier and faster(?) to store the value 603 * into stack and then use ldbrx 604 * 605 * ctx->seen will be reliable in pass2, but 606 * the instructions generated will remain the 607 * same across all passes 608 */ 609 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 610 PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); 611 PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); 612 break; 613 } 614 break; 615 616 emit_clear: 617 switch (imm) { 618 case 16: 619 /* zero-extend 16 bits into 64 bits */ 620 PPC_RLDICL(dst_reg, dst_reg, 0, 48); 621 break; 622 case 32: 623 /* zero-extend 32 bits into 64 bits */ 624 PPC_RLDICL(dst_reg, dst_reg, 0, 32); 625 break; 626 case 64: 627 /* nop */ 628 break; 629 } 630 break; 631 632 /* 633 * BPF_ST(X) 634 */ 635 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ 636 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ 637 if (BPF_CLASS(code) == BPF_ST) { 638 PPC_LI(b2p[TMP_REG_1], imm); 639 src_reg = b2p[TMP_REG_1]; 640 } 641 PPC_STB(src_reg, dst_reg, off); 642 break; 643 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ 644 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ 645 if (BPF_CLASS(code) == BPF_ST) { 646 PPC_LI(b2p[TMP_REG_1], imm); 647 src_reg = b2p[TMP_REG_1]; 648 } 649 PPC_STH(src_reg, dst_reg, off); 650 break; 651 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ 652 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ 653 if (BPF_CLASS(code) == BPF_ST) { 654 PPC_LI32(b2p[TMP_REG_1], imm); 655 src_reg = b2p[TMP_REG_1]; 656 } 657 PPC_STW(src_reg, dst_reg, off); 658 break; 659 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ 660 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ 661 if (BPF_CLASS(code) == BPF_ST) { 662 PPC_LI32(b2p[TMP_REG_1], imm); 663 src_reg = b2p[TMP_REG_1]; 664 } 665 PPC_BPF_STL(src_reg, dst_reg, off); 666 break; 667 668 /* 669 * BPF_STX XADD (atomic_add) 670 */ 671 /* *(u32 *)(dst + off) += src */ 672 case BPF_STX | BPF_XADD | BPF_W: 673 /* Get EA into TMP_REG_1 */ 674 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); 675 tmp_idx = ctx->idx * 4; 676 /* load value from memory into TMP_REG_2 */ 677 PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); 678 /* add value from src_reg into this */ 679 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); 680 /* store result back */ 681 PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); 682 /* we're done if this succeeded */ 683 PPC_BCC_SHORT(COND_NE, tmp_idx); 684 break; 685 /* *(u64 *)(dst + off) += src */ 686 case BPF_STX | BPF_XADD | BPF_DW: 687 PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); 688 tmp_idx = ctx->idx * 4; 689 PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); 690 PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); 691 PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); 692 PPC_BCC_SHORT(COND_NE, tmp_idx); 693 break; 694 695 /* 696 * BPF_LDX 697 */ 698 /* dst = *(u8 *)(ul) (src + off) */ 699 case BPF_LDX | BPF_MEM | BPF_B: 700 PPC_LBZ(dst_reg, src_reg, off); 701 break; 702 /* dst = *(u16 *)(ul) (src + off) */ 703 case BPF_LDX | BPF_MEM | BPF_H: 704 PPC_LHZ(dst_reg, src_reg, off); 705 break; 706 /* dst = *(u32 *)(ul) (src + off) */ 707 case BPF_LDX | BPF_MEM | BPF_W: 708 PPC_LWZ(dst_reg, src_reg, off); 709 break; 710 /* dst = *(u64 *)(ul) (src + off) */ 711 case BPF_LDX | BPF_MEM | BPF_DW: 712 PPC_BPF_LL(dst_reg, src_reg, off); 713 break; 714 715 /* 716 * Doubleword load 717 * 16 byte instruction that uses two 'struct bpf_insn' 718 */ 719 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 720 imm64 = ((u64)(u32) insn[i].imm) | 721 (((u64)(u32) insn[i+1].imm) << 32); 722 /* Adjust for two bpf instructions */ 723 addrs[++i] = ctx->idx * 4; 724 PPC_LI64(dst_reg, imm64); 725 break; 726 727 /* 728 * Return/Exit 729 */ 730 case BPF_JMP | BPF_EXIT: 731 /* 732 * If this isn't the very last instruction, branch to 733 * the epilogue. If we _are_ the last instruction, 734 * we'll just fall through to the epilogue. 735 */ 736 if (i != flen - 1) 737 PPC_JMP(exit_addr); 738 /* else fall through to the epilogue */ 739 break; 740 741 /* 742 * Call kernel helper or bpf function 743 */ 744 case BPF_JMP | BPF_CALL: 745 ctx->seen |= SEEN_FUNC; 746 747 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 748 &func_addr, &func_addr_fixed); 749 if (ret < 0) 750 return ret; 751 752 if (func_addr_fixed) 753 bpf_jit_emit_func_call_hlp(image, ctx, func_addr); 754 else 755 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 756 /* move return value from r3 to BPF_REG_0 */ 757 PPC_MR(b2p[BPF_REG_0], 3); 758 break; 759 760 /* 761 * Jumps and branches 762 */ 763 case BPF_JMP | BPF_JA: 764 PPC_JMP(addrs[i + 1 + off]); 765 break; 766 767 case BPF_JMP | BPF_JGT | BPF_K: 768 case BPF_JMP | BPF_JGT | BPF_X: 769 case BPF_JMP | BPF_JSGT | BPF_K: 770 case BPF_JMP | BPF_JSGT | BPF_X: 771 case BPF_JMP32 | BPF_JGT | BPF_K: 772 case BPF_JMP32 | BPF_JGT | BPF_X: 773 case BPF_JMP32 | BPF_JSGT | BPF_K: 774 case BPF_JMP32 | BPF_JSGT | BPF_X: 775 true_cond = COND_GT; 776 goto cond_branch; 777 case BPF_JMP | BPF_JLT | BPF_K: 778 case BPF_JMP | BPF_JLT | BPF_X: 779 case BPF_JMP | BPF_JSLT | BPF_K: 780 case BPF_JMP | BPF_JSLT | BPF_X: 781 case BPF_JMP32 | BPF_JLT | BPF_K: 782 case BPF_JMP32 | BPF_JLT | BPF_X: 783 case BPF_JMP32 | BPF_JSLT | BPF_K: 784 case BPF_JMP32 | BPF_JSLT | BPF_X: 785 true_cond = COND_LT; 786 goto cond_branch; 787 case BPF_JMP | BPF_JGE | BPF_K: 788 case BPF_JMP | BPF_JGE | BPF_X: 789 case BPF_JMP | BPF_JSGE | BPF_K: 790 case BPF_JMP | BPF_JSGE | BPF_X: 791 case BPF_JMP32 | BPF_JGE | BPF_K: 792 case BPF_JMP32 | BPF_JGE | BPF_X: 793 case BPF_JMP32 | BPF_JSGE | BPF_K: 794 case BPF_JMP32 | BPF_JSGE | BPF_X: 795 true_cond = COND_GE; 796 goto cond_branch; 797 case BPF_JMP | BPF_JLE | BPF_K: 798 case BPF_JMP | BPF_JLE | BPF_X: 799 case BPF_JMP | BPF_JSLE | BPF_K: 800 case BPF_JMP | BPF_JSLE | BPF_X: 801 case BPF_JMP32 | BPF_JLE | BPF_K: 802 case BPF_JMP32 | BPF_JLE | BPF_X: 803 case BPF_JMP32 | BPF_JSLE | BPF_K: 804 case BPF_JMP32 | BPF_JSLE | BPF_X: 805 true_cond = COND_LE; 806 goto cond_branch; 807 case BPF_JMP | BPF_JEQ | BPF_K: 808 case BPF_JMP | BPF_JEQ | BPF_X: 809 case BPF_JMP32 | BPF_JEQ | BPF_K: 810 case BPF_JMP32 | BPF_JEQ | BPF_X: 811 true_cond = COND_EQ; 812 goto cond_branch; 813 case BPF_JMP | BPF_JNE | BPF_K: 814 case BPF_JMP | BPF_JNE | BPF_X: 815 case BPF_JMP32 | BPF_JNE | BPF_K: 816 case BPF_JMP32 | BPF_JNE | BPF_X: 817 true_cond = COND_NE; 818 goto cond_branch; 819 case BPF_JMP | BPF_JSET | BPF_K: 820 case BPF_JMP | BPF_JSET | BPF_X: 821 case BPF_JMP32 | BPF_JSET | BPF_K: 822 case BPF_JMP32 | BPF_JSET | BPF_X: 823 true_cond = COND_NE; 824 /* Fall through */ 825 826 cond_branch: 827 switch (code) { 828 case BPF_JMP | BPF_JGT | BPF_X: 829 case BPF_JMP | BPF_JLT | BPF_X: 830 case BPF_JMP | BPF_JGE | BPF_X: 831 case BPF_JMP | BPF_JLE | BPF_X: 832 case BPF_JMP | BPF_JEQ | BPF_X: 833 case BPF_JMP | BPF_JNE | BPF_X: 834 case BPF_JMP32 | BPF_JGT | BPF_X: 835 case BPF_JMP32 | BPF_JLT | BPF_X: 836 case BPF_JMP32 | BPF_JGE | BPF_X: 837 case BPF_JMP32 | BPF_JLE | BPF_X: 838 case BPF_JMP32 | BPF_JEQ | BPF_X: 839 case BPF_JMP32 | BPF_JNE | BPF_X: 840 /* unsigned comparison */ 841 if (BPF_CLASS(code) == BPF_JMP32) 842 PPC_CMPLW(dst_reg, src_reg); 843 else 844 PPC_CMPLD(dst_reg, src_reg); 845 break; 846 case BPF_JMP | BPF_JSGT | BPF_X: 847 case BPF_JMP | BPF_JSLT | BPF_X: 848 case BPF_JMP | BPF_JSGE | BPF_X: 849 case BPF_JMP | BPF_JSLE | BPF_X: 850 case BPF_JMP32 | BPF_JSGT | BPF_X: 851 case BPF_JMP32 | BPF_JSLT | BPF_X: 852 case BPF_JMP32 | BPF_JSGE | BPF_X: 853 case BPF_JMP32 | BPF_JSLE | BPF_X: 854 /* signed comparison */ 855 if (BPF_CLASS(code) == BPF_JMP32) 856 PPC_CMPW(dst_reg, src_reg); 857 else 858 PPC_CMPD(dst_reg, src_reg); 859 break; 860 case BPF_JMP | BPF_JSET | BPF_X: 861 case BPF_JMP32 | BPF_JSET | BPF_X: 862 if (BPF_CLASS(code) == BPF_JMP) { 863 PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, 864 src_reg); 865 } else { 866 int tmp_reg = b2p[TMP_REG_1]; 867 868 PPC_AND(tmp_reg, dst_reg, src_reg); 869 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, 870 31); 871 } 872 break; 873 case BPF_JMP | BPF_JNE | BPF_K: 874 case BPF_JMP | BPF_JEQ | BPF_K: 875 case BPF_JMP | BPF_JGT | BPF_K: 876 case BPF_JMP | BPF_JLT | BPF_K: 877 case BPF_JMP | BPF_JGE | BPF_K: 878 case BPF_JMP | BPF_JLE | BPF_K: 879 case BPF_JMP32 | BPF_JNE | BPF_K: 880 case BPF_JMP32 | BPF_JEQ | BPF_K: 881 case BPF_JMP32 | BPF_JGT | BPF_K: 882 case BPF_JMP32 | BPF_JLT | BPF_K: 883 case BPF_JMP32 | BPF_JGE | BPF_K: 884 case BPF_JMP32 | BPF_JLE | BPF_K: 885 { 886 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 887 888 /* 889 * Need sign-extended load, so only positive 890 * values can be used as imm in cmpldi 891 */ 892 if (imm >= 0 && imm < 32768) { 893 if (is_jmp32) 894 PPC_CMPLWI(dst_reg, imm); 895 else 896 PPC_CMPLDI(dst_reg, imm); 897 } else { 898 /* sign-extending load */ 899 PPC_LI32(b2p[TMP_REG_1], imm); 900 /* ... but unsigned comparison */ 901 if (is_jmp32) 902 PPC_CMPLW(dst_reg, 903 b2p[TMP_REG_1]); 904 else 905 PPC_CMPLD(dst_reg, 906 b2p[TMP_REG_1]); 907 } 908 break; 909 } 910 case BPF_JMP | BPF_JSGT | BPF_K: 911 case BPF_JMP | BPF_JSLT | BPF_K: 912 case BPF_JMP | BPF_JSGE | BPF_K: 913 case BPF_JMP | BPF_JSLE | BPF_K: 914 case BPF_JMP32 | BPF_JSGT | BPF_K: 915 case BPF_JMP32 | BPF_JSLT | BPF_K: 916 case BPF_JMP32 | BPF_JSGE | BPF_K: 917 case BPF_JMP32 | BPF_JSLE | BPF_K: 918 { 919 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 920 921 /* 922 * signed comparison, so any 16-bit value 923 * can be used in cmpdi 924 */ 925 if (imm >= -32768 && imm < 32768) { 926 if (is_jmp32) 927 PPC_CMPWI(dst_reg, imm); 928 else 929 PPC_CMPDI(dst_reg, imm); 930 } else { 931 PPC_LI32(b2p[TMP_REG_1], imm); 932 if (is_jmp32) 933 PPC_CMPW(dst_reg, 934 b2p[TMP_REG_1]); 935 else 936 PPC_CMPD(dst_reg, 937 b2p[TMP_REG_1]); 938 } 939 break; 940 } 941 case BPF_JMP | BPF_JSET | BPF_K: 942 case BPF_JMP32 | BPF_JSET | BPF_K: 943 /* andi does not sign-extend the immediate */ 944 if (imm >= 0 && imm < 32768) 945 /* PPC_ANDI is _only/always_ dot-form */ 946 PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); 947 else { 948 int tmp_reg = b2p[TMP_REG_1]; 949 950 PPC_LI32(tmp_reg, imm); 951 if (BPF_CLASS(code) == BPF_JMP) { 952 PPC_AND_DOT(tmp_reg, dst_reg, 953 tmp_reg); 954 } else { 955 PPC_AND(tmp_reg, dst_reg, 956 tmp_reg); 957 PPC_RLWINM_DOT(tmp_reg, tmp_reg, 958 0, 0, 31); 959 } 960 } 961 break; 962 } 963 PPC_BCC(true_cond, addrs[i + 1 + off]); 964 break; 965 966 /* 967 * Tail call 968 */ 969 case BPF_JMP | BPF_TAIL_CALL: 970 ctx->seen |= SEEN_TAILCALL; 971 bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); 972 break; 973 974 default: 975 /* 976 * The filter contains something cruel & unusual. 977 * We don't handle it, but also there shouldn't be 978 * anything missing from our list. 979 */ 980 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", 981 code, i); 982 return -ENOTSUPP; 983 } 984 } 985 986 /* Set end-of-body-code address for exit. */ 987 addrs[i] = ctx->idx * 4; 988 989 return 0; 990 } 991 992 /* Fix the branch target addresses for subprog calls */ 993 static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, 994 struct codegen_context *ctx, u32 *addrs) 995 { 996 const struct bpf_insn *insn = fp->insnsi; 997 bool func_addr_fixed; 998 u64 func_addr; 999 u32 tmp_idx; 1000 int i, ret; 1001 1002 for (i = 0; i < fp->len; i++) { 1003 /* 1004 * During the extra pass, only the branch target addresses for 1005 * the subprog calls need to be fixed. All other instructions 1006 * can left untouched. 1007 * 1008 * The JITed image length does not change because we already 1009 * ensure that the JITed instruction sequence for these calls 1010 * are of fixed length by padding them with NOPs. 1011 */ 1012 if (insn[i].code == (BPF_JMP | BPF_CALL) && 1013 insn[i].src_reg == BPF_PSEUDO_CALL) { 1014 ret = bpf_jit_get_func_addr(fp, &insn[i], true, 1015 &func_addr, 1016 &func_addr_fixed); 1017 if (ret < 0) 1018 return ret; 1019 1020 /* 1021 * Save ctx->idx as this would currently point to the 1022 * end of the JITed image and set it to the offset of 1023 * the instruction sequence corresponding to the 1024 * subprog call temporarily. 1025 */ 1026 tmp_idx = ctx->idx; 1027 ctx->idx = addrs[i] / 4; 1028 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 1029 1030 /* 1031 * Restore ctx->idx here. This is safe as the length 1032 * of the JITed sequence remains unchanged. 1033 */ 1034 ctx->idx = tmp_idx; 1035 } 1036 } 1037 1038 return 0; 1039 } 1040 1041 struct powerpc64_jit_data { 1042 struct bpf_binary_header *header; 1043 u32 *addrs; 1044 u8 *image; 1045 u32 proglen; 1046 struct codegen_context ctx; 1047 }; 1048 1049 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 1050 { 1051 u32 proglen; 1052 u32 alloclen; 1053 u8 *image = NULL; 1054 u32 *code_base; 1055 u32 *addrs; 1056 struct powerpc64_jit_data *jit_data; 1057 struct codegen_context cgctx; 1058 int pass; 1059 int flen; 1060 struct bpf_binary_header *bpf_hdr; 1061 struct bpf_prog *org_fp = fp; 1062 struct bpf_prog *tmp_fp; 1063 bool bpf_blinded = false; 1064 bool extra_pass = false; 1065 1066 if (!fp->jit_requested) 1067 return org_fp; 1068 1069 tmp_fp = bpf_jit_blind_constants(org_fp); 1070 if (IS_ERR(tmp_fp)) 1071 return org_fp; 1072 1073 if (tmp_fp != org_fp) { 1074 bpf_blinded = true; 1075 fp = tmp_fp; 1076 } 1077 1078 jit_data = fp->aux->jit_data; 1079 if (!jit_data) { 1080 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1081 if (!jit_data) { 1082 fp = org_fp; 1083 goto out; 1084 } 1085 fp->aux->jit_data = jit_data; 1086 } 1087 1088 flen = fp->len; 1089 addrs = jit_data->addrs; 1090 if (addrs) { 1091 cgctx = jit_data->ctx; 1092 image = jit_data->image; 1093 bpf_hdr = jit_data->header; 1094 proglen = jit_data->proglen; 1095 alloclen = proglen + FUNCTION_DESCR_SIZE; 1096 extra_pass = true; 1097 goto skip_init_ctx; 1098 } 1099 1100 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 1101 if (addrs == NULL) { 1102 fp = org_fp; 1103 goto out_addrs; 1104 } 1105 1106 memset(&cgctx, 0, sizeof(struct codegen_context)); 1107 1108 /* Make sure that the stack is quadword aligned. */ 1109 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 1110 1111 /* Scouting faux-generate pass 0 */ 1112 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1113 /* We hit something illegal or unsupported. */ 1114 fp = org_fp; 1115 goto out_addrs; 1116 } 1117 1118 /* 1119 * Pretend to build prologue, given the features we've seen. This will 1120 * update ctgtx.idx as it pretends to output instructions, then we can 1121 * calculate total size from idx. 1122 */ 1123 bpf_jit_build_prologue(0, &cgctx); 1124 bpf_jit_build_epilogue(0, &cgctx); 1125 1126 proglen = cgctx.idx * 4; 1127 alloclen = proglen + FUNCTION_DESCR_SIZE; 1128 1129 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, 1130 bpf_jit_fill_ill_insns); 1131 if (!bpf_hdr) { 1132 fp = org_fp; 1133 goto out_addrs; 1134 } 1135 1136 skip_init_ctx: 1137 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 1138 1139 if (extra_pass) { 1140 /* 1141 * Do not touch the prologue and epilogue as they will remain 1142 * unchanged. Only fix the branch target address for subprog 1143 * calls in the body. 1144 * 1145 * This does not change the offsets and lengths of the subprog 1146 * call instruction sequences and hence, the size of the JITed 1147 * image as well. 1148 */ 1149 bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); 1150 1151 /* There is no need to perform the usual passes. */ 1152 goto skip_codegen_passes; 1153 } 1154 1155 /* Code generation passes 1-2 */ 1156 for (pass = 1; pass < 3; pass++) { 1157 /* Now build the prologue, body code & epilogue for real. */ 1158 cgctx.idx = 0; 1159 bpf_jit_build_prologue(code_base, &cgctx); 1160 bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); 1161 bpf_jit_build_epilogue(code_base, &cgctx); 1162 1163 if (bpf_jit_enable > 1) 1164 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 1165 proglen - (cgctx.idx * 4), cgctx.seen); 1166 } 1167 1168 skip_codegen_passes: 1169 if (bpf_jit_enable > 1) 1170 /* 1171 * Note that we output the base address of the code_base 1172 * rather than image, since opcodes are in code_base. 1173 */ 1174 bpf_jit_dump(flen, proglen, pass, code_base); 1175 1176 #ifdef PPC64_ELF_ABI_v1 1177 /* Function descriptor nastiness: Address + TOC */ 1178 ((u64 *)image)[0] = (u64)code_base; 1179 ((u64 *)image)[1] = local_paca->kernel_toc; 1180 #endif 1181 1182 fp->bpf_func = (void *)image; 1183 fp->jited = 1; 1184 fp->jited_len = alloclen; 1185 1186 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 1187 if (!fp->is_func || extra_pass) { 1188 bpf_prog_fill_jited_linfo(fp, addrs); 1189 out_addrs: 1190 kfree(addrs); 1191 kfree(jit_data); 1192 fp->aux->jit_data = NULL; 1193 } else { 1194 jit_data->addrs = addrs; 1195 jit_data->ctx = cgctx; 1196 jit_data->proglen = proglen; 1197 jit_data->image = image; 1198 jit_data->header = bpf_hdr; 1199 } 1200 1201 out: 1202 if (bpf_blinded) 1203 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 1204 1205 return fp; 1206 } 1207 1208 /* Overriding bpf_jit_free() as we don't set images read-only. */ 1209 void bpf_jit_free(struct bpf_prog *fp) 1210 { 1211 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 1212 struct bpf_binary_header *bpf_hdr = (void *)addr; 1213 1214 if (fp->jited) 1215 bpf_jit_binary_free(bpf_hdr); 1216 1217 bpf_prog_unlock_free(fp); 1218 } 1219