1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bpf_jit_comp64.c: eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <asm/kprobes.h> 17 #include <linux/bpf.h> 18 19 #include "bpf_jit64.h" 20 21 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 22 { 23 memset32(area, BREAKPOINT_INSTRUCTION, size/4); 24 } 25 26 static inline void bpf_flush_icache(void *start, void *end) 27 { 28 smp_wmb(); 29 flush_icache_range((unsigned long)start, (unsigned long)end); 30 } 31 32 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) 33 { 34 return (ctx->seen & (1 << (31 - b2p[i]))); 35 } 36 37 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) 38 { 39 ctx->seen |= (1 << (31 - b2p[i])); 40 } 41 42 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 43 { 44 /* 45 * We only need a stack frame if: 46 * - we call other functions (kernel helpers), or 47 * - the bpf program uses its stack area 48 * The latter condition is deduced from the usage of BPF_REG_FP 49 */ 50 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); 51 } 52 53 /* 54 * When not setting up our own stackframe, the redzone usage is: 55 * 56 * [ prev sp ] <------------- 57 * [ ... ] | 58 * sp (r1) ---> [ stack pointer ] -------------- 59 * [ nv gpr save area ] 6*8 60 * [ tail_call_cnt ] 8 61 * [ local_tmp_var ] 8 62 * [ unused red zone ] 208 bytes protected 63 */ 64 static int bpf_jit_stack_local(struct codegen_context *ctx) 65 { 66 if (bpf_has_stack_frame(ctx)) 67 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 68 else 69 return -(BPF_PPC_STACK_SAVE + 16); 70 } 71 72 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 73 { 74 return bpf_jit_stack_local(ctx) + 8; 75 } 76 77 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 78 { 79 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 80 return (bpf_has_stack_frame(ctx) ? 81 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) 82 - (8 * (32 - reg)); 83 84 pr_err("BPF JIT is asking about unknown registers"); 85 BUG(); 86 } 87 88 static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 89 { 90 int i; 91 92 /* 93 * Initialize tail_call_cnt if we do tail calls. 94 * Otherwise, put in NOPs so that it can be skipped when we are 95 * invoked through a tail call. 96 */ 97 if (ctx->seen & SEEN_TAILCALL) { 98 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); 99 /* this goes in the redzone */ 100 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); 101 } else { 102 EMIT(PPC_RAW_NOP()); 103 EMIT(PPC_RAW_NOP()); 104 } 105 106 #define BPF_TAILCALL_PROLOGUE_SIZE 8 107 108 if (bpf_has_stack_frame(ctx)) { 109 /* 110 * We need a stack frame, but we don't necessarily need to 111 * save/restore LR unless we call other functions 112 */ 113 if (ctx->seen & SEEN_FUNC) { 114 EMIT(PPC_INST_MFLR | __PPC_RT(R0)); 115 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 116 } 117 118 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); 119 } 120 121 /* 122 * Back up non-volatile regs -- BPF registers 6-10 123 * If we haven't created our own stack frame, we save these 124 * in the protected zone below the previous stack frame 125 */ 126 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 127 if (bpf_is_seen_register(ctx, i)) 128 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 129 130 /* Setup frame pointer to point to the bpf stack area */ 131 if (bpf_is_seen_register(ctx, BPF_REG_FP)) 132 EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, 133 STACK_FRAME_MIN_SIZE + ctx->stack_size)); 134 } 135 136 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 137 { 138 int i; 139 140 /* Restore NVRs */ 141 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 142 if (bpf_is_seen_register(ctx, i)) 143 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 144 145 /* Tear down our stack frame */ 146 if (bpf_has_stack_frame(ctx)) { 147 EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); 148 if (ctx->seen & SEEN_FUNC) { 149 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 150 EMIT(PPC_RAW_MTLR(0)); 151 } 152 } 153 } 154 155 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 156 { 157 bpf_jit_emit_common_epilogue(image, ctx); 158 159 /* Move result to r3 */ 160 EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); 161 162 EMIT(PPC_RAW_BLR()); 163 } 164 165 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, 166 u64 func) 167 { 168 #ifdef PPC64_ELF_ABI_v1 169 /* func points to the function descriptor */ 170 PPC_LI64(b2p[TMP_REG_2], func); 171 /* Load actual entry point from function descriptor */ 172 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); 173 /* ... and move it to LR */ 174 EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); 175 /* 176 * Load TOC from function descriptor at offset 8. 177 * We can clobber r2 since we get called through a 178 * function pointer (so caller will save/restore r2) 179 * and since we don't use a TOC ourself. 180 */ 181 PPC_BPF_LL(2, b2p[TMP_REG_2], 8); 182 #else 183 /* We can clobber r12 */ 184 PPC_FUNC_ADDR(12, func); 185 EMIT(PPC_RAW_MTLR(12)); 186 #endif 187 EMIT(PPC_RAW_BLRL()); 188 } 189 190 static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, 191 u64 func) 192 { 193 unsigned int i, ctx_idx = ctx->idx; 194 195 /* Load function address into r12 */ 196 PPC_LI64(12, func); 197 198 /* For bpf-to-bpf function calls, the callee's address is unknown 199 * until the last extra pass. As seen above, we use PPC_LI64() to 200 * load the callee's address, but this may optimize the number of 201 * instructions required based on the nature of the address. 202 * 203 * Since we don't want the number of instructions emitted to change, 204 * we pad the optimized PPC_LI64() call with NOPs to guarantee that 205 * we always have a five-instruction sequence, which is the maximum 206 * that PPC_LI64() can emit. 207 */ 208 for (i = ctx->idx - ctx_idx; i < 5; i++) 209 EMIT(PPC_RAW_NOP()); 210 211 #ifdef PPC64_ELF_ABI_v1 212 /* 213 * Load TOC from function descriptor at offset 8. 214 * We can clobber r2 since we get called through a 215 * function pointer (so caller will save/restore r2) 216 * and since we don't use a TOC ourself. 217 */ 218 PPC_BPF_LL(2, 12, 8); 219 /* Load actual entry point from function descriptor */ 220 PPC_BPF_LL(12, 12, 0); 221 #endif 222 223 EMIT(PPC_RAW_MTLR(12)); 224 EMIT(PPC_RAW_BLRL()); 225 } 226 227 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) 228 { 229 /* 230 * By now, the eBPF program has already setup parameters in r3, r4 and r5 231 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program 232 * r4/BPF_REG_2 - pointer to bpf_array 233 * r5/BPF_REG_3 - index in bpf_array 234 */ 235 int b2p_bpf_array = b2p[BPF_REG_2]; 236 int b2p_index = b2p[BPF_REG_3]; 237 238 /* 239 * if (index >= array->map.max_entries) 240 * goto out; 241 */ 242 EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); 243 EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); 244 EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); 245 PPC_BCC(COND_GE, out); 246 247 /* 248 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 249 * goto out; 250 */ 251 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 252 EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); 253 PPC_BCC(COND_GT, out); 254 255 /* 256 * tail_call_cnt++; 257 */ 258 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); 259 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 260 261 /* prog = array->ptrs[index]; */ 262 EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); 263 EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); 264 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 265 266 /* 267 * if (prog == NULL) 268 * goto out; 269 */ 270 EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); 271 PPC_BCC(COND_EQ, out); 272 273 /* goto *(prog->bpf_func + prologue_size); */ 274 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 275 #ifdef PPC64_ELF_ABI_v1 276 /* skip past the function descriptor */ 277 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 278 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); 279 #else 280 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); 281 #endif 282 EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); 283 284 /* tear down stack, restore NVRs, ... */ 285 bpf_jit_emit_common_epilogue(image, ctx); 286 287 EMIT(PPC_RAW_BCTR()); 288 /* out: */ 289 } 290 291 /* Assemble the body code between the prologue & epilogue */ 292 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 293 struct codegen_context *ctx, 294 u32 *addrs, bool extra_pass) 295 { 296 const struct bpf_insn *insn = fp->insnsi; 297 int flen = fp->len; 298 int i, ret; 299 300 /* Start of epilogue code - will only be valid 2nd pass onwards */ 301 u32 exit_addr = addrs[flen]; 302 303 for (i = 0; i < flen; i++) { 304 u32 code = insn[i].code; 305 u32 dst_reg = b2p[insn[i].dst_reg]; 306 u32 src_reg = b2p[insn[i].src_reg]; 307 s16 off = insn[i].off; 308 s32 imm = insn[i].imm; 309 bool func_addr_fixed; 310 u64 func_addr; 311 u64 imm64; 312 u32 true_cond; 313 u32 tmp_idx; 314 315 /* 316 * addrs[] maps a BPF bytecode address into a real offset from 317 * the start of the body code. 318 */ 319 addrs[i] = ctx->idx * 4; 320 321 /* 322 * As an optimization, we note down which non-volatile registers 323 * are used so that we can only save/restore those in our 324 * prologue and epilogue. We do this here regardless of whether 325 * the actual BPF instruction uses src/dst registers or not 326 * (for instance, BPF_CALL does not use them). The expectation 327 * is that those instructions will have src_reg/dst_reg set to 328 * 0. Even otherwise, we just lose some prologue/epilogue 329 * optimization but everything else should work without 330 * any issues. 331 */ 332 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) 333 bpf_set_seen_register(ctx, insn[i].dst_reg); 334 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) 335 bpf_set_seen_register(ctx, insn[i].src_reg); 336 337 switch (code) { 338 /* 339 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG 340 */ 341 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ 342 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ 343 EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); 344 goto bpf_alu32_trunc; 345 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ 346 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ 347 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); 348 goto bpf_alu32_trunc; 349 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ 350 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ 351 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ 352 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ 353 if (BPF_OP(code) == BPF_SUB) 354 imm = -imm; 355 if (imm) { 356 if (imm >= -32768 && imm < 32768) 357 EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); 358 else { 359 PPC_LI32(b2p[TMP_REG_1], imm); 360 EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); 361 } 362 } 363 goto bpf_alu32_trunc; 364 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ 365 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ 366 if (BPF_CLASS(code) == BPF_ALU) 367 EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); 368 else 369 EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg)); 370 goto bpf_alu32_trunc; 371 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ 372 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ 373 if (imm >= -32768 && imm < 32768) 374 EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); 375 else { 376 PPC_LI32(b2p[TMP_REG_1], imm); 377 if (BPF_CLASS(code) == BPF_ALU) 378 EMIT(PPC_RAW_MULW(dst_reg, dst_reg, 379 b2p[TMP_REG_1])); 380 else 381 EMIT(PPC_RAW_MULD(dst_reg, dst_reg, 382 b2p[TMP_REG_1])); 383 } 384 goto bpf_alu32_trunc; 385 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ 386 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ 387 if (BPF_OP(code) == BPF_MOD) { 388 EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); 389 EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, 390 b2p[TMP_REG_1])); 391 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); 392 } else 393 EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); 394 goto bpf_alu32_trunc; 395 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ 396 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ 397 if (BPF_OP(code) == BPF_MOD) { 398 EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); 399 EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, 400 b2p[TMP_REG_1])); 401 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); 402 } else 403 EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); 404 break; 405 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ 406 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ 407 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ 408 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ 409 if (imm == 0) 410 return -EINVAL; 411 else if (imm == 1) 412 goto bpf_alu32_trunc; 413 414 PPC_LI32(b2p[TMP_REG_1], imm); 415 switch (BPF_CLASS(code)) { 416 case BPF_ALU: 417 if (BPF_OP(code) == BPF_MOD) { 418 EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], 419 dst_reg, 420 b2p[TMP_REG_1])); 421 EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], 422 b2p[TMP_REG_1], 423 b2p[TMP_REG_2])); 424 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, 425 b2p[TMP_REG_1])); 426 } else 427 EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, 428 b2p[TMP_REG_1])); 429 break; 430 case BPF_ALU64: 431 if (BPF_OP(code) == BPF_MOD) { 432 EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], 433 dst_reg, 434 b2p[TMP_REG_1])); 435 EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], 436 b2p[TMP_REG_1], 437 b2p[TMP_REG_2])); 438 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, 439 b2p[TMP_REG_1])); 440 } else 441 EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, 442 b2p[TMP_REG_1])); 443 break; 444 } 445 goto bpf_alu32_trunc; 446 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ 447 case BPF_ALU64 | BPF_NEG: /* dst = -dst */ 448 EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); 449 goto bpf_alu32_trunc; 450 451 /* 452 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH 453 */ 454 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ 455 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ 456 EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); 457 goto bpf_alu32_trunc; 458 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ 459 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 460 if (!IMM_H(imm)) 461 EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); 462 else { 463 /* Sign-extended */ 464 PPC_LI32(b2p[TMP_REG_1], imm); 465 EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); 466 } 467 goto bpf_alu32_trunc; 468 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ 469 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ 470 EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); 471 goto bpf_alu32_trunc; 472 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ 473 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ 474 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 475 /* Sign-extended */ 476 PPC_LI32(b2p[TMP_REG_1], imm); 477 EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); 478 } else { 479 if (IMM_L(imm)) 480 EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); 481 if (IMM_H(imm)) 482 EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); 483 } 484 goto bpf_alu32_trunc; 485 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ 486 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ 487 EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); 488 goto bpf_alu32_trunc; 489 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ 490 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ 491 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 492 /* Sign-extended */ 493 PPC_LI32(b2p[TMP_REG_1], imm); 494 EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); 495 } else { 496 if (IMM_L(imm)) 497 EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); 498 if (IMM_H(imm)) 499 EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); 500 } 501 goto bpf_alu32_trunc; 502 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 503 /* slw clears top 32 bits */ 504 EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); 505 /* skip zero extension move, but set address map. */ 506 if (insn_is_zext(&insn[i + 1])) 507 addrs[++i] = ctx->idx * 4; 508 break; 509 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 510 EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg)); 511 break; 512 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 513 /* with imm 0, we still need to clear top 32 bits */ 514 EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); 515 if (insn_is_zext(&insn[i + 1])) 516 addrs[++i] = ctx->idx * 4; 517 break; 518 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 519 if (imm != 0) 520 EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm)); 521 break; 522 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 523 EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); 524 if (insn_is_zext(&insn[i + 1])) 525 addrs[++i] = ctx->idx * 4; 526 break; 527 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 528 EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg)); 529 break; 530 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 531 EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); 532 if (insn_is_zext(&insn[i + 1])) 533 addrs[++i] = ctx->idx * 4; 534 break; 535 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 536 if (imm != 0) 537 EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm)); 538 break; 539 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ 540 EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); 541 goto bpf_alu32_trunc; 542 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ 543 EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg)); 544 break; 545 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ 546 EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); 547 goto bpf_alu32_trunc; 548 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ 549 if (imm != 0) 550 EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm)); 551 break; 552 553 /* 554 * MOV 555 */ 556 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 557 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 558 if (imm == 1) { 559 /* special mov32 for zext */ 560 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); 561 break; 562 } 563 EMIT(PPC_RAW_MR(dst_reg, src_reg)); 564 goto bpf_alu32_trunc; 565 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 566 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ 567 PPC_LI32(dst_reg, imm); 568 if (imm < 0) 569 goto bpf_alu32_trunc; 570 else if (insn_is_zext(&insn[i + 1])) 571 addrs[++i] = ctx->idx * 4; 572 break; 573 574 bpf_alu32_trunc: 575 /* Truncate to 32-bits */ 576 if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) 577 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); 578 break; 579 580 /* 581 * BPF_FROM_BE/LE 582 */ 583 case BPF_ALU | BPF_END | BPF_FROM_LE: 584 case BPF_ALU | BPF_END | BPF_FROM_BE: 585 #ifdef __BIG_ENDIAN__ 586 if (BPF_SRC(code) == BPF_FROM_BE) 587 goto emit_clear; 588 #else /* !__BIG_ENDIAN__ */ 589 if (BPF_SRC(code) == BPF_FROM_LE) 590 goto emit_clear; 591 #endif 592 switch (imm) { 593 case 16: 594 /* Rotate 8 bits left & mask with 0x0000ff00 */ 595 EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); 596 /* Rotate 8 bits right & insert LSB to reg */ 597 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); 598 /* Move result back to dst_reg */ 599 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 600 break; 601 case 32: 602 /* 603 * Rotate word left by 8 bits: 604 * 2 bytes are already in their final position 605 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) 606 */ 607 EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); 608 /* Rotate 24 bits and insert byte 1 */ 609 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); 610 /* Rotate 24 bits and insert byte 3 */ 611 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); 612 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 613 break; 614 case 64: 615 /* 616 * Way easier and faster(?) to store the value 617 * into stack and then use ldbrx 618 * 619 * ctx->seen will be reliable in pass2, but 620 * the instructions generated will remain the 621 * same across all passes 622 */ 623 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 624 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); 625 EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); 626 break; 627 } 628 break; 629 630 emit_clear: 631 switch (imm) { 632 case 16: 633 /* zero-extend 16 bits into 64 bits */ 634 EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48)); 635 if (insn_is_zext(&insn[i + 1])) 636 addrs[++i] = ctx->idx * 4; 637 break; 638 case 32: 639 if (!fp->aux->verifier_zext) 640 /* zero-extend 32 bits into 64 bits */ 641 EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32)); 642 break; 643 case 64: 644 /* nop */ 645 break; 646 } 647 break; 648 649 /* 650 * BPF_ST(X) 651 */ 652 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ 653 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ 654 if (BPF_CLASS(code) == BPF_ST) { 655 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); 656 src_reg = b2p[TMP_REG_1]; 657 } 658 EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); 659 break; 660 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ 661 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ 662 if (BPF_CLASS(code) == BPF_ST) { 663 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); 664 src_reg = b2p[TMP_REG_1]; 665 } 666 EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); 667 break; 668 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ 669 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ 670 if (BPF_CLASS(code) == BPF_ST) { 671 PPC_LI32(b2p[TMP_REG_1], imm); 672 src_reg = b2p[TMP_REG_1]; 673 } 674 EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); 675 break; 676 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ 677 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ 678 if (BPF_CLASS(code) == BPF_ST) { 679 PPC_LI32(b2p[TMP_REG_1], imm); 680 src_reg = b2p[TMP_REG_1]; 681 } 682 PPC_BPF_STL(src_reg, dst_reg, off); 683 break; 684 685 /* 686 * BPF_STX ATOMIC (atomic ops) 687 */ 688 case BPF_STX | BPF_ATOMIC | BPF_W: 689 if (insn->imm != BPF_ADD) { 690 pr_err_ratelimited( 691 "eBPF filter atomic op code %02x (@%d) unsupported\n", 692 code, i); 693 return -ENOTSUPP; 694 } 695 696 /* *(u32 *)(dst + off) += src */ 697 698 /* Get EA into TMP_REG_1 */ 699 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); 700 tmp_idx = ctx->idx * 4; 701 /* load value from memory into TMP_REG_2 */ 702 EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); 703 /* add value from src_reg into this */ 704 EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); 705 /* store result back */ 706 EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); 707 /* we're done if this succeeded */ 708 PPC_BCC_SHORT(COND_NE, tmp_idx); 709 break; 710 case BPF_STX | BPF_ATOMIC | BPF_DW: 711 if (insn->imm != BPF_ADD) { 712 pr_err_ratelimited( 713 "eBPF filter atomic op code %02x (@%d) unsupported\n", 714 code, i); 715 return -ENOTSUPP; 716 } 717 /* *(u64 *)(dst + off) += src */ 718 719 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); 720 tmp_idx = ctx->idx * 4; 721 EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); 722 EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); 723 EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); 724 PPC_BCC_SHORT(COND_NE, tmp_idx); 725 break; 726 727 /* 728 * BPF_LDX 729 */ 730 /* dst = *(u8 *)(ul) (src + off) */ 731 case BPF_LDX | BPF_MEM | BPF_B: 732 EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 733 if (insn_is_zext(&insn[i + 1])) 734 addrs[++i] = ctx->idx * 4; 735 break; 736 /* dst = *(u16 *)(ul) (src + off) */ 737 case BPF_LDX | BPF_MEM | BPF_H: 738 EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 739 if (insn_is_zext(&insn[i + 1])) 740 addrs[++i] = ctx->idx * 4; 741 break; 742 /* dst = *(u32 *)(ul) (src + off) */ 743 case BPF_LDX | BPF_MEM | BPF_W: 744 EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 745 if (insn_is_zext(&insn[i + 1])) 746 addrs[++i] = ctx->idx * 4; 747 break; 748 /* dst = *(u64 *)(ul) (src + off) */ 749 case BPF_LDX | BPF_MEM | BPF_DW: 750 PPC_BPF_LL(dst_reg, src_reg, off); 751 break; 752 753 /* 754 * Doubleword load 755 * 16 byte instruction that uses two 'struct bpf_insn' 756 */ 757 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 758 imm64 = ((u64)(u32) insn[i].imm) | 759 (((u64)(u32) insn[i+1].imm) << 32); 760 /* Adjust for two bpf instructions */ 761 addrs[++i] = ctx->idx * 4; 762 PPC_LI64(dst_reg, imm64); 763 break; 764 765 /* 766 * Return/Exit 767 */ 768 case BPF_JMP | BPF_EXIT: 769 /* 770 * If this isn't the very last instruction, branch to 771 * the epilogue. If we _are_ the last instruction, 772 * we'll just fall through to the epilogue. 773 */ 774 if (i != flen - 1) 775 PPC_JMP(exit_addr); 776 /* else fall through to the epilogue */ 777 break; 778 779 /* 780 * Call kernel helper or bpf function 781 */ 782 case BPF_JMP | BPF_CALL: 783 ctx->seen |= SEEN_FUNC; 784 785 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 786 &func_addr, &func_addr_fixed); 787 if (ret < 0) 788 return ret; 789 790 if (func_addr_fixed) 791 bpf_jit_emit_func_call_hlp(image, ctx, func_addr); 792 else 793 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 794 /* move return value from r3 to BPF_REG_0 */ 795 EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); 796 break; 797 798 /* 799 * Jumps and branches 800 */ 801 case BPF_JMP | BPF_JA: 802 PPC_JMP(addrs[i + 1 + off]); 803 break; 804 805 case BPF_JMP | BPF_JGT | BPF_K: 806 case BPF_JMP | BPF_JGT | BPF_X: 807 case BPF_JMP | BPF_JSGT | BPF_K: 808 case BPF_JMP | BPF_JSGT | BPF_X: 809 case BPF_JMP32 | BPF_JGT | BPF_K: 810 case BPF_JMP32 | BPF_JGT | BPF_X: 811 case BPF_JMP32 | BPF_JSGT | BPF_K: 812 case BPF_JMP32 | BPF_JSGT | BPF_X: 813 true_cond = COND_GT; 814 goto cond_branch; 815 case BPF_JMP | BPF_JLT | BPF_K: 816 case BPF_JMP | BPF_JLT | BPF_X: 817 case BPF_JMP | BPF_JSLT | BPF_K: 818 case BPF_JMP | BPF_JSLT | BPF_X: 819 case BPF_JMP32 | BPF_JLT | BPF_K: 820 case BPF_JMP32 | BPF_JLT | BPF_X: 821 case BPF_JMP32 | BPF_JSLT | BPF_K: 822 case BPF_JMP32 | BPF_JSLT | BPF_X: 823 true_cond = COND_LT; 824 goto cond_branch; 825 case BPF_JMP | BPF_JGE | BPF_K: 826 case BPF_JMP | BPF_JGE | BPF_X: 827 case BPF_JMP | BPF_JSGE | BPF_K: 828 case BPF_JMP | BPF_JSGE | BPF_X: 829 case BPF_JMP32 | BPF_JGE | BPF_K: 830 case BPF_JMP32 | BPF_JGE | BPF_X: 831 case BPF_JMP32 | BPF_JSGE | BPF_K: 832 case BPF_JMP32 | BPF_JSGE | BPF_X: 833 true_cond = COND_GE; 834 goto cond_branch; 835 case BPF_JMP | BPF_JLE | BPF_K: 836 case BPF_JMP | BPF_JLE | BPF_X: 837 case BPF_JMP | BPF_JSLE | BPF_K: 838 case BPF_JMP | BPF_JSLE | BPF_X: 839 case BPF_JMP32 | BPF_JLE | BPF_K: 840 case BPF_JMP32 | BPF_JLE | BPF_X: 841 case BPF_JMP32 | BPF_JSLE | BPF_K: 842 case BPF_JMP32 | BPF_JSLE | BPF_X: 843 true_cond = COND_LE; 844 goto cond_branch; 845 case BPF_JMP | BPF_JEQ | BPF_K: 846 case BPF_JMP | BPF_JEQ | BPF_X: 847 case BPF_JMP32 | BPF_JEQ | BPF_K: 848 case BPF_JMP32 | BPF_JEQ | BPF_X: 849 true_cond = COND_EQ; 850 goto cond_branch; 851 case BPF_JMP | BPF_JNE | BPF_K: 852 case BPF_JMP | BPF_JNE | BPF_X: 853 case BPF_JMP32 | BPF_JNE | BPF_K: 854 case BPF_JMP32 | BPF_JNE | BPF_X: 855 true_cond = COND_NE; 856 goto cond_branch; 857 case BPF_JMP | BPF_JSET | BPF_K: 858 case BPF_JMP | BPF_JSET | BPF_X: 859 case BPF_JMP32 | BPF_JSET | BPF_K: 860 case BPF_JMP32 | BPF_JSET | BPF_X: 861 true_cond = COND_NE; 862 /* Fall through */ 863 864 cond_branch: 865 switch (code) { 866 case BPF_JMP | BPF_JGT | BPF_X: 867 case BPF_JMP | BPF_JLT | BPF_X: 868 case BPF_JMP | BPF_JGE | BPF_X: 869 case BPF_JMP | BPF_JLE | BPF_X: 870 case BPF_JMP | BPF_JEQ | BPF_X: 871 case BPF_JMP | BPF_JNE | BPF_X: 872 case BPF_JMP32 | BPF_JGT | BPF_X: 873 case BPF_JMP32 | BPF_JLT | BPF_X: 874 case BPF_JMP32 | BPF_JGE | BPF_X: 875 case BPF_JMP32 | BPF_JLE | BPF_X: 876 case BPF_JMP32 | BPF_JEQ | BPF_X: 877 case BPF_JMP32 | BPF_JNE | BPF_X: 878 /* unsigned comparison */ 879 if (BPF_CLASS(code) == BPF_JMP32) 880 EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); 881 else 882 EMIT(PPC_RAW_CMPLD(dst_reg, src_reg)); 883 break; 884 case BPF_JMP | BPF_JSGT | BPF_X: 885 case BPF_JMP | BPF_JSLT | BPF_X: 886 case BPF_JMP | BPF_JSGE | BPF_X: 887 case BPF_JMP | BPF_JSLE | BPF_X: 888 case BPF_JMP32 | BPF_JSGT | BPF_X: 889 case BPF_JMP32 | BPF_JSLT | BPF_X: 890 case BPF_JMP32 | BPF_JSGE | BPF_X: 891 case BPF_JMP32 | BPF_JSLE | BPF_X: 892 /* signed comparison */ 893 if (BPF_CLASS(code) == BPF_JMP32) 894 EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); 895 else 896 EMIT(PPC_RAW_CMPD(dst_reg, src_reg)); 897 break; 898 case BPF_JMP | BPF_JSET | BPF_X: 899 case BPF_JMP32 | BPF_JSET | BPF_X: 900 if (BPF_CLASS(code) == BPF_JMP) { 901 EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, 902 src_reg)); 903 } else { 904 int tmp_reg = b2p[TMP_REG_1]; 905 906 EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); 907 EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, 908 31)); 909 } 910 break; 911 case BPF_JMP | BPF_JNE | BPF_K: 912 case BPF_JMP | BPF_JEQ | BPF_K: 913 case BPF_JMP | BPF_JGT | BPF_K: 914 case BPF_JMP | BPF_JLT | BPF_K: 915 case BPF_JMP | BPF_JGE | BPF_K: 916 case BPF_JMP | BPF_JLE | BPF_K: 917 case BPF_JMP32 | BPF_JNE | BPF_K: 918 case BPF_JMP32 | BPF_JEQ | BPF_K: 919 case BPF_JMP32 | BPF_JGT | BPF_K: 920 case BPF_JMP32 | BPF_JLT | BPF_K: 921 case BPF_JMP32 | BPF_JGE | BPF_K: 922 case BPF_JMP32 | BPF_JLE | BPF_K: 923 { 924 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 925 926 /* 927 * Need sign-extended load, so only positive 928 * values can be used as imm in cmpldi 929 */ 930 if (imm >= 0 && imm < 32768) { 931 if (is_jmp32) 932 EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); 933 else 934 EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); 935 } else { 936 /* sign-extending load */ 937 PPC_LI32(b2p[TMP_REG_1], imm); 938 /* ... but unsigned comparison */ 939 if (is_jmp32) 940 EMIT(PPC_RAW_CMPLW(dst_reg, 941 b2p[TMP_REG_1])); 942 else 943 EMIT(PPC_RAW_CMPLD(dst_reg, 944 b2p[TMP_REG_1])); 945 } 946 break; 947 } 948 case BPF_JMP | BPF_JSGT | BPF_K: 949 case BPF_JMP | BPF_JSLT | BPF_K: 950 case BPF_JMP | BPF_JSGE | BPF_K: 951 case BPF_JMP | BPF_JSLE | BPF_K: 952 case BPF_JMP32 | BPF_JSGT | BPF_K: 953 case BPF_JMP32 | BPF_JSLT | BPF_K: 954 case BPF_JMP32 | BPF_JSGE | BPF_K: 955 case BPF_JMP32 | BPF_JSLE | BPF_K: 956 { 957 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 958 959 /* 960 * signed comparison, so any 16-bit value 961 * can be used in cmpdi 962 */ 963 if (imm >= -32768 && imm < 32768) { 964 if (is_jmp32) 965 EMIT(PPC_RAW_CMPWI(dst_reg, imm)); 966 else 967 EMIT(PPC_RAW_CMPDI(dst_reg, imm)); 968 } else { 969 PPC_LI32(b2p[TMP_REG_1], imm); 970 if (is_jmp32) 971 EMIT(PPC_RAW_CMPW(dst_reg, 972 b2p[TMP_REG_1])); 973 else 974 EMIT(PPC_RAW_CMPD(dst_reg, 975 b2p[TMP_REG_1])); 976 } 977 break; 978 } 979 case BPF_JMP | BPF_JSET | BPF_K: 980 case BPF_JMP32 | BPF_JSET | BPF_K: 981 /* andi does not sign-extend the immediate */ 982 if (imm >= 0 && imm < 32768) 983 /* PPC_ANDI is _only/always_ dot-form */ 984 EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); 985 else { 986 int tmp_reg = b2p[TMP_REG_1]; 987 988 PPC_LI32(tmp_reg, imm); 989 if (BPF_CLASS(code) == BPF_JMP) { 990 EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, 991 tmp_reg)); 992 } else { 993 EMIT(PPC_RAW_AND(tmp_reg, dst_reg, 994 tmp_reg)); 995 EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 996 0, 0, 31)); 997 } 998 } 999 break; 1000 } 1001 PPC_BCC(true_cond, addrs[i + 1 + off]); 1002 break; 1003 1004 /* 1005 * Tail call 1006 */ 1007 case BPF_JMP | BPF_TAIL_CALL: 1008 ctx->seen |= SEEN_TAILCALL; 1009 bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); 1010 break; 1011 1012 default: 1013 /* 1014 * The filter contains something cruel & unusual. 1015 * We don't handle it, but also there shouldn't be 1016 * anything missing from our list. 1017 */ 1018 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", 1019 code, i); 1020 return -ENOTSUPP; 1021 } 1022 } 1023 1024 /* Set end-of-body-code address for exit. */ 1025 addrs[i] = ctx->idx * 4; 1026 1027 return 0; 1028 } 1029 1030 /* Fix the branch target addresses for subprog calls */ 1031 static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, 1032 struct codegen_context *ctx, u32 *addrs) 1033 { 1034 const struct bpf_insn *insn = fp->insnsi; 1035 bool func_addr_fixed; 1036 u64 func_addr; 1037 u32 tmp_idx; 1038 int i, ret; 1039 1040 for (i = 0; i < fp->len; i++) { 1041 /* 1042 * During the extra pass, only the branch target addresses for 1043 * the subprog calls need to be fixed. All other instructions 1044 * can left untouched. 1045 * 1046 * The JITed image length does not change because we already 1047 * ensure that the JITed instruction sequence for these calls 1048 * are of fixed length by padding them with NOPs. 1049 */ 1050 if (insn[i].code == (BPF_JMP | BPF_CALL) && 1051 insn[i].src_reg == BPF_PSEUDO_CALL) { 1052 ret = bpf_jit_get_func_addr(fp, &insn[i], true, 1053 &func_addr, 1054 &func_addr_fixed); 1055 if (ret < 0) 1056 return ret; 1057 1058 /* 1059 * Save ctx->idx as this would currently point to the 1060 * end of the JITed image and set it to the offset of 1061 * the instruction sequence corresponding to the 1062 * subprog call temporarily. 1063 */ 1064 tmp_idx = ctx->idx; 1065 ctx->idx = addrs[i] / 4; 1066 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 1067 1068 /* 1069 * Restore ctx->idx here. This is safe as the length 1070 * of the JITed sequence remains unchanged. 1071 */ 1072 ctx->idx = tmp_idx; 1073 } 1074 } 1075 1076 return 0; 1077 } 1078 1079 struct powerpc64_jit_data { 1080 struct bpf_binary_header *header; 1081 u32 *addrs; 1082 u8 *image; 1083 u32 proglen; 1084 struct codegen_context ctx; 1085 }; 1086 1087 bool bpf_jit_needs_zext(void) 1088 { 1089 return true; 1090 } 1091 1092 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) 1093 { 1094 u32 proglen; 1095 u32 alloclen; 1096 u8 *image = NULL; 1097 u32 *code_base; 1098 u32 *addrs; 1099 struct powerpc64_jit_data *jit_data; 1100 struct codegen_context cgctx; 1101 int pass; 1102 int flen; 1103 struct bpf_binary_header *bpf_hdr; 1104 struct bpf_prog *org_fp = fp; 1105 struct bpf_prog *tmp_fp; 1106 bool bpf_blinded = false; 1107 bool extra_pass = false; 1108 1109 if (!fp->jit_requested) 1110 return org_fp; 1111 1112 tmp_fp = bpf_jit_blind_constants(org_fp); 1113 if (IS_ERR(tmp_fp)) 1114 return org_fp; 1115 1116 if (tmp_fp != org_fp) { 1117 bpf_blinded = true; 1118 fp = tmp_fp; 1119 } 1120 1121 jit_data = fp->aux->jit_data; 1122 if (!jit_data) { 1123 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1124 if (!jit_data) { 1125 fp = org_fp; 1126 goto out; 1127 } 1128 fp->aux->jit_data = jit_data; 1129 } 1130 1131 flen = fp->len; 1132 addrs = jit_data->addrs; 1133 if (addrs) { 1134 cgctx = jit_data->ctx; 1135 image = jit_data->image; 1136 bpf_hdr = jit_data->header; 1137 proglen = jit_data->proglen; 1138 alloclen = proglen + FUNCTION_DESCR_SIZE; 1139 extra_pass = true; 1140 goto skip_init_ctx; 1141 } 1142 1143 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); 1144 if (addrs == NULL) { 1145 fp = org_fp; 1146 goto out_addrs; 1147 } 1148 1149 memset(&cgctx, 0, sizeof(struct codegen_context)); 1150 1151 /* Make sure that the stack is quadword aligned. */ 1152 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 1153 1154 /* Scouting faux-generate pass 0 */ 1155 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1156 /* We hit something illegal or unsupported. */ 1157 fp = org_fp; 1158 goto out_addrs; 1159 } 1160 1161 /* 1162 * If we have seen a tail call, we need a second pass. 1163 * This is because bpf_jit_emit_common_epilogue() is called 1164 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. 1165 */ 1166 if (cgctx.seen & SEEN_TAILCALL) { 1167 cgctx.idx = 0; 1168 if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 1169 fp = org_fp; 1170 goto out_addrs; 1171 } 1172 } 1173 1174 /* 1175 * Pretend to build prologue, given the features we've seen. This will 1176 * update ctgtx.idx as it pretends to output instructions, then we can 1177 * calculate total size from idx. 1178 */ 1179 bpf_jit_build_prologue(0, &cgctx); 1180 bpf_jit_build_epilogue(0, &cgctx); 1181 1182 proglen = cgctx.idx * 4; 1183 alloclen = proglen + FUNCTION_DESCR_SIZE; 1184 1185 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, 1186 bpf_jit_fill_ill_insns); 1187 if (!bpf_hdr) { 1188 fp = org_fp; 1189 goto out_addrs; 1190 } 1191 1192 skip_init_ctx: 1193 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); 1194 1195 if (extra_pass) { 1196 /* 1197 * Do not touch the prologue and epilogue as they will remain 1198 * unchanged. Only fix the branch target address for subprog 1199 * calls in the body. 1200 * 1201 * This does not change the offsets and lengths of the subprog 1202 * call instruction sequences and hence, the size of the JITed 1203 * image as well. 1204 */ 1205 bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); 1206 1207 /* There is no need to perform the usual passes. */ 1208 goto skip_codegen_passes; 1209 } 1210 1211 /* Code generation passes 1-2 */ 1212 for (pass = 1; pass < 3; pass++) { 1213 /* Now build the prologue, body code & epilogue for real. */ 1214 cgctx.idx = 0; 1215 bpf_jit_build_prologue(code_base, &cgctx); 1216 bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); 1217 bpf_jit_build_epilogue(code_base, &cgctx); 1218 1219 if (bpf_jit_enable > 1) 1220 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, 1221 proglen - (cgctx.idx * 4), cgctx.seen); 1222 } 1223 1224 skip_codegen_passes: 1225 if (bpf_jit_enable > 1) 1226 /* 1227 * Note that we output the base address of the code_base 1228 * rather than image, since opcodes are in code_base. 1229 */ 1230 bpf_jit_dump(flen, proglen, pass, code_base); 1231 1232 #ifdef PPC64_ELF_ABI_v1 1233 /* Function descriptor nastiness: Address + TOC */ 1234 ((u64 *)image)[0] = (u64)code_base; 1235 ((u64 *)image)[1] = local_paca->kernel_toc; 1236 #endif 1237 1238 fp->bpf_func = (void *)image; 1239 fp->jited = 1; 1240 fp->jited_len = alloclen; 1241 1242 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 1243 if (!fp->is_func || extra_pass) { 1244 bpf_prog_fill_jited_linfo(fp, addrs); 1245 out_addrs: 1246 kfree(addrs); 1247 kfree(jit_data); 1248 fp->aux->jit_data = NULL; 1249 } else { 1250 jit_data->addrs = addrs; 1251 jit_data->ctx = cgctx; 1252 jit_data->proglen = proglen; 1253 jit_data->image = image; 1254 jit_data->header = bpf_hdr; 1255 } 1256 1257 out: 1258 if (bpf_blinded) 1259 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 1260 1261 return fp; 1262 } 1263 1264 /* Overriding bpf_jit_free() as we don't set images read-only. */ 1265 void bpf_jit_free(struct bpf_prog *fp) 1266 { 1267 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 1268 struct bpf_binary_header *bpf_hdr = (void *)addr; 1269 1270 if (fp->jited) 1271 bpf_jit_binary_free(bpf_hdr); 1272 1273 bpf_prog_unlock_free(fp); 1274 } 1275