1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * bpf_jit_comp64.c: eBPF JIT compiler 4 * 5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> 6 * IBM Corporation 7 * 8 * Based on the powerpc classic BPF JIT compiler by Matt Evans 9 */ 10 #include <linux/moduleloader.h> 11 #include <asm/cacheflush.h> 12 #include <asm/asm-compat.h> 13 #include <linux/netdevice.h> 14 #include <linux/filter.h> 15 #include <linux/if_vlan.h> 16 #include <asm/kprobes.h> 17 #include <linux/bpf.h> 18 19 #include "bpf_jit64.h" 20 21 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 22 { 23 /* 24 * We only need a stack frame if: 25 * - we call other functions (kernel helpers), or 26 * - the bpf program uses its stack area 27 * The latter condition is deduced from the usage of BPF_REG_FP 28 */ 29 return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); 30 } 31 32 /* 33 * When not setting up our own stackframe, the redzone usage is: 34 * 35 * [ prev sp ] <------------- 36 * [ ... ] | 37 * sp (r1) ---> [ stack pointer ] -------------- 38 * [ nv gpr save area ] 6*8 39 * [ tail_call_cnt ] 8 40 * [ local_tmp_var ] 8 41 * [ unused red zone ] 208 bytes protected 42 */ 43 static int bpf_jit_stack_local(struct codegen_context *ctx) 44 { 45 if (bpf_has_stack_frame(ctx)) 46 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 47 else 48 return -(BPF_PPC_STACK_SAVE + 16); 49 } 50 51 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 52 { 53 return bpf_jit_stack_local(ctx) + 8; 54 } 55 56 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) 57 { 58 if (reg >= BPF_PPC_NVR_MIN && reg < 32) 59 return (bpf_has_stack_frame(ctx) ? 60 (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) 61 - (8 * (32 - reg)); 62 63 pr_err("BPF JIT is asking about unknown registers"); 64 BUG(); 65 } 66 67 void bpf_jit_realloc_regs(struct codegen_context *ctx) 68 { 69 } 70 71 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) 72 { 73 int i; 74 75 /* 76 * Initialize tail_call_cnt if we do tail calls. 77 * Otherwise, put in NOPs so that it can be skipped when we are 78 * invoked through a tail call. 79 */ 80 if (ctx->seen & SEEN_TAILCALL) { 81 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); 82 /* this goes in the redzone */ 83 PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); 84 } else { 85 EMIT(PPC_RAW_NOP()); 86 EMIT(PPC_RAW_NOP()); 87 } 88 89 #define BPF_TAILCALL_PROLOGUE_SIZE 8 90 91 if (bpf_has_stack_frame(ctx)) { 92 /* 93 * We need a stack frame, but we don't necessarily need to 94 * save/restore LR unless we call other functions 95 */ 96 if (ctx->seen & SEEN_FUNC) { 97 EMIT(PPC_INST_MFLR | __PPC_RT(R0)); 98 PPC_BPF_STL(0, 1, PPC_LR_STKOFF); 99 } 100 101 PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); 102 } 103 104 /* 105 * Back up non-volatile regs -- BPF registers 6-10 106 * If we haven't created our own stack frame, we save these 107 * in the protected zone below the previous stack frame 108 */ 109 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 110 if (bpf_is_seen_register(ctx, b2p[i])) 111 PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 112 113 /* Setup frame pointer to point to the bpf stack area */ 114 if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) 115 EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, 116 STACK_FRAME_MIN_SIZE + ctx->stack_size)); 117 } 118 119 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) 120 { 121 int i; 122 123 /* Restore NVRs */ 124 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 125 if (bpf_is_seen_register(ctx, b2p[i])) 126 PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); 127 128 /* Tear down our stack frame */ 129 if (bpf_has_stack_frame(ctx)) { 130 EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); 131 if (ctx->seen & SEEN_FUNC) { 132 PPC_BPF_LL(0, 1, PPC_LR_STKOFF); 133 EMIT(PPC_RAW_MTLR(0)); 134 } 135 } 136 } 137 138 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) 139 { 140 bpf_jit_emit_common_epilogue(image, ctx); 141 142 /* Move result to r3 */ 143 EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); 144 145 EMIT(PPC_RAW_BLR()); 146 } 147 148 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, 149 u64 func) 150 { 151 #ifdef PPC64_ELF_ABI_v1 152 /* func points to the function descriptor */ 153 PPC_LI64(b2p[TMP_REG_2], func); 154 /* Load actual entry point from function descriptor */ 155 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); 156 /* ... and move it to LR */ 157 EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); 158 /* 159 * Load TOC from function descriptor at offset 8. 160 * We can clobber r2 since we get called through a 161 * function pointer (so caller will save/restore r2) 162 * and since we don't use a TOC ourself. 163 */ 164 PPC_BPF_LL(2, b2p[TMP_REG_2], 8); 165 #else 166 /* We can clobber r12 */ 167 PPC_FUNC_ADDR(12, func); 168 EMIT(PPC_RAW_MTLR(12)); 169 #endif 170 EMIT(PPC_RAW_BLRL()); 171 } 172 173 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) 174 { 175 unsigned int i, ctx_idx = ctx->idx; 176 177 /* Load function address into r12 */ 178 PPC_LI64(12, func); 179 180 /* For bpf-to-bpf function calls, the callee's address is unknown 181 * until the last extra pass. As seen above, we use PPC_LI64() to 182 * load the callee's address, but this may optimize the number of 183 * instructions required based on the nature of the address. 184 * 185 * Since we don't want the number of instructions emitted to change, 186 * we pad the optimized PPC_LI64() call with NOPs to guarantee that 187 * we always have a five-instruction sequence, which is the maximum 188 * that PPC_LI64() can emit. 189 */ 190 for (i = ctx->idx - ctx_idx; i < 5; i++) 191 EMIT(PPC_RAW_NOP()); 192 193 #ifdef PPC64_ELF_ABI_v1 194 /* 195 * Load TOC from function descriptor at offset 8. 196 * We can clobber r2 since we get called through a 197 * function pointer (so caller will save/restore r2) 198 * and since we don't use a TOC ourself. 199 */ 200 PPC_BPF_LL(2, 12, 8); 201 /* Load actual entry point from function descriptor */ 202 PPC_BPF_LL(12, 12, 0); 203 #endif 204 205 EMIT(PPC_RAW_MTLR(12)); 206 EMIT(PPC_RAW_BLRL()); 207 } 208 209 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) 210 { 211 /* 212 * By now, the eBPF program has already setup parameters in r3, r4 and r5 213 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program 214 * r4/BPF_REG_2 - pointer to bpf_array 215 * r5/BPF_REG_3 - index in bpf_array 216 */ 217 int b2p_bpf_array = b2p[BPF_REG_2]; 218 int b2p_index = b2p[BPF_REG_3]; 219 220 /* 221 * if (index >= array->map.max_entries) 222 * goto out; 223 */ 224 EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); 225 EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); 226 EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); 227 PPC_BCC(COND_GE, out); 228 229 /* 230 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 231 * goto out; 232 */ 233 PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 234 EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); 235 PPC_BCC(COND_GT, out); 236 237 /* 238 * tail_call_cnt++; 239 */ 240 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); 241 PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); 242 243 /* prog = array->ptrs[index]; */ 244 EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); 245 EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); 246 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); 247 248 /* 249 * if (prog == NULL) 250 * goto out; 251 */ 252 EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); 253 PPC_BCC(COND_EQ, out); 254 255 /* goto *(prog->bpf_func + prologue_size); */ 256 PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); 257 #ifdef PPC64_ELF_ABI_v1 258 /* skip past the function descriptor */ 259 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 260 FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); 261 #else 262 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); 263 #endif 264 EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); 265 266 /* tear down stack, restore NVRs, ... */ 267 bpf_jit_emit_common_epilogue(image, ctx); 268 269 EMIT(PPC_RAW_BCTR()); 270 /* out: */ 271 } 272 273 /* Assemble the body code between the prologue & epilogue */ 274 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, 275 u32 *addrs, bool extra_pass) 276 { 277 const struct bpf_insn *insn = fp->insnsi; 278 int flen = fp->len; 279 int i, ret; 280 281 /* Start of epilogue code - will only be valid 2nd pass onwards */ 282 u32 exit_addr = addrs[flen]; 283 284 for (i = 0; i < flen; i++) { 285 u32 code = insn[i].code; 286 u32 dst_reg = b2p[insn[i].dst_reg]; 287 u32 src_reg = b2p[insn[i].src_reg]; 288 s16 off = insn[i].off; 289 s32 imm = insn[i].imm; 290 bool func_addr_fixed; 291 u64 func_addr; 292 u64 imm64; 293 u32 true_cond; 294 u32 tmp_idx; 295 296 /* 297 * addrs[] maps a BPF bytecode address into a real offset from 298 * the start of the body code. 299 */ 300 addrs[i] = ctx->idx * 4; 301 302 /* 303 * As an optimization, we note down which non-volatile registers 304 * are used so that we can only save/restore those in our 305 * prologue and epilogue. We do this here regardless of whether 306 * the actual BPF instruction uses src/dst registers or not 307 * (for instance, BPF_CALL does not use them). The expectation 308 * is that those instructions will have src_reg/dst_reg set to 309 * 0. Even otherwise, we just lose some prologue/epilogue 310 * optimization but everything else should work without 311 * any issues. 312 */ 313 if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) 314 bpf_set_seen_register(ctx, dst_reg); 315 if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) 316 bpf_set_seen_register(ctx, src_reg); 317 318 switch (code) { 319 /* 320 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG 321 */ 322 case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ 323 case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ 324 EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); 325 goto bpf_alu32_trunc; 326 case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ 327 case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ 328 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); 329 goto bpf_alu32_trunc; 330 case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ 331 case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ 332 case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ 333 case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ 334 if (BPF_OP(code) == BPF_SUB) 335 imm = -imm; 336 if (imm) { 337 if (imm >= -32768 && imm < 32768) 338 EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); 339 else { 340 PPC_LI32(b2p[TMP_REG_1], imm); 341 EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); 342 } 343 } 344 goto bpf_alu32_trunc; 345 case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ 346 case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ 347 if (BPF_CLASS(code) == BPF_ALU) 348 EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); 349 else 350 EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg)); 351 goto bpf_alu32_trunc; 352 case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ 353 case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ 354 if (imm >= -32768 && imm < 32768) 355 EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); 356 else { 357 PPC_LI32(b2p[TMP_REG_1], imm); 358 if (BPF_CLASS(code) == BPF_ALU) 359 EMIT(PPC_RAW_MULW(dst_reg, dst_reg, 360 b2p[TMP_REG_1])); 361 else 362 EMIT(PPC_RAW_MULD(dst_reg, dst_reg, 363 b2p[TMP_REG_1])); 364 } 365 goto bpf_alu32_trunc; 366 case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ 367 case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ 368 if (BPF_OP(code) == BPF_MOD) { 369 EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); 370 EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, 371 b2p[TMP_REG_1])); 372 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); 373 } else 374 EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); 375 goto bpf_alu32_trunc; 376 case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ 377 case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ 378 if (BPF_OP(code) == BPF_MOD) { 379 EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); 380 EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, 381 b2p[TMP_REG_1])); 382 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); 383 } else 384 EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); 385 break; 386 case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ 387 case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ 388 case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ 389 case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ 390 if (imm == 0) 391 return -EINVAL; 392 else if (imm == 1) 393 goto bpf_alu32_trunc; 394 395 PPC_LI32(b2p[TMP_REG_1], imm); 396 switch (BPF_CLASS(code)) { 397 case BPF_ALU: 398 if (BPF_OP(code) == BPF_MOD) { 399 EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], 400 dst_reg, 401 b2p[TMP_REG_1])); 402 EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], 403 b2p[TMP_REG_1], 404 b2p[TMP_REG_2])); 405 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, 406 b2p[TMP_REG_1])); 407 } else 408 EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, 409 b2p[TMP_REG_1])); 410 break; 411 case BPF_ALU64: 412 if (BPF_OP(code) == BPF_MOD) { 413 EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], 414 dst_reg, 415 b2p[TMP_REG_1])); 416 EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], 417 b2p[TMP_REG_1], 418 b2p[TMP_REG_2])); 419 EMIT(PPC_RAW_SUB(dst_reg, dst_reg, 420 b2p[TMP_REG_1])); 421 } else 422 EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, 423 b2p[TMP_REG_1])); 424 break; 425 } 426 goto bpf_alu32_trunc; 427 case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ 428 case BPF_ALU64 | BPF_NEG: /* dst = -dst */ 429 EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); 430 goto bpf_alu32_trunc; 431 432 /* 433 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH 434 */ 435 case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ 436 case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ 437 EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); 438 goto bpf_alu32_trunc; 439 case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ 440 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 441 if (!IMM_H(imm)) 442 EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); 443 else { 444 /* Sign-extended */ 445 PPC_LI32(b2p[TMP_REG_1], imm); 446 EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); 447 } 448 goto bpf_alu32_trunc; 449 case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ 450 case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ 451 EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); 452 goto bpf_alu32_trunc; 453 case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ 454 case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ 455 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 456 /* Sign-extended */ 457 PPC_LI32(b2p[TMP_REG_1], imm); 458 EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); 459 } else { 460 if (IMM_L(imm)) 461 EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); 462 if (IMM_H(imm)) 463 EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); 464 } 465 goto bpf_alu32_trunc; 466 case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ 467 case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ 468 EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); 469 goto bpf_alu32_trunc; 470 case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ 471 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ 472 if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { 473 /* Sign-extended */ 474 PPC_LI32(b2p[TMP_REG_1], imm); 475 EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); 476 } else { 477 if (IMM_L(imm)) 478 EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); 479 if (IMM_H(imm)) 480 EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); 481 } 482 goto bpf_alu32_trunc; 483 case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ 484 /* slw clears top 32 bits */ 485 EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); 486 /* skip zero extension move, but set address map. */ 487 if (insn_is_zext(&insn[i + 1])) 488 addrs[++i] = ctx->idx * 4; 489 break; 490 case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ 491 EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg)); 492 break; 493 case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ 494 /* with imm 0, we still need to clear top 32 bits */ 495 EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); 496 if (insn_is_zext(&insn[i + 1])) 497 addrs[++i] = ctx->idx * 4; 498 break; 499 case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ 500 if (imm != 0) 501 EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm)); 502 break; 503 case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ 504 EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); 505 if (insn_is_zext(&insn[i + 1])) 506 addrs[++i] = ctx->idx * 4; 507 break; 508 case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ 509 EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg)); 510 break; 511 case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ 512 EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); 513 if (insn_is_zext(&insn[i + 1])) 514 addrs[++i] = ctx->idx * 4; 515 break; 516 case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ 517 if (imm != 0) 518 EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm)); 519 break; 520 case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ 521 EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); 522 goto bpf_alu32_trunc; 523 case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ 524 EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg)); 525 break; 526 case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ 527 EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); 528 goto bpf_alu32_trunc; 529 case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ 530 if (imm != 0) 531 EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm)); 532 break; 533 534 /* 535 * MOV 536 */ 537 case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ 538 case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ 539 if (imm == 1) { 540 /* special mov32 for zext */ 541 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); 542 break; 543 } 544 EMIT(PPC_RAW_MR(dst_reg, src_reg)); 545 goto bpf_alu32_trunc; 546 case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ 547 case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ 548 PPC_LI32(dst_reg, imm); 549 if (imm < 0) 550 goto bpf_alu32_trunc; 551 else if (insn_is_zext(&insn[i + 1])) 552 addrs[++i] = ctx->idx * 4; 553 break; 554 555 bpf_alu32_trunc: 556 /* Truncate to 32-bits */ 557 if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) 558 EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); 559 break; 560 561 /* 562 * BPF_FROM_BE/LE 563 */ 564 case BPF_ALU | BPF_END | BPF_FROM_LE: 565 case BPF_ALU | BPF_END | BPF_FROM_BE: 566 #ifdef __BIG_ENDIAN__ 567 if (BPF_SRC(code) == BPF_FROM_BE) 568 goto emit_clear; 569 #else /* !__BIG_ENDIAN__ */ 570 if (BPF_SRC(code) == BPF_FROM_LE) 571 goto emit_clear; 572 #endif 573 switch (imm) { 574 case 16: 575 /* Rotate 8 bits left & mask with 0x0000ff00 */ 576 EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); 577 /* Rotate 8 bits right & insert LSB to reg */ 578 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); 579 /* Move result back to dst_reg */ 580 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 581 break; 582 case 32: 583 /* 584 * Rotate word left by 8 bits: 585 * 2 bytes are already in their final position 586 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) 587 */ 588 EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); 589 /* Rotate 24 bits and insert byte 1 */ 590 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); 591 /* Rotate 24 bits and insert byte 3 */ 592 EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); 593 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 594 break; 595 case 64: 596 /* 597 * Way easier and faster(?) to store the value 598 * into stack and then use ldbrx 599 * 600 * ctx->seen will be reliable in pass2, but 601 * the instructions generated will remain the 602 * same across all passes 603 */ 604 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 605 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); 606 EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); 607 break; 608 } 609 break; 610 611 emit_clear: 612 switch (imm) { 613 case 16: 614 /* zero-extend 16 bits into 64 bits */ 615 EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48)); 616 if (insn_is_zext(&insn[i + 1])) 617 addrs[++i] = ctx->idx * 4; 618 break; 619 case 32: 620 if (!fp->aux->verifier_zext) 621 /* zero-extend 32 bits into 64 bits */ 622 EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32)); 623 break; 624 case 64: 625 /* nop */ 626 break; 627 } 628 break; 629 630 /* 631 * BPF_ST(X) 632 */ 633 case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ 634 case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ 635 if (BPF_CLASS(code) == BPF_ST) { 636 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); 637 src_reg = b2p[TMP_REG_1]; 638 } 639 EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); 640 break; 641 case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ 642 case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ 643 if (BPF_CLASS(code) == BPF_ST) { 644 EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); 645 src_reg = b2p[TMP_REG_1]; 646 } 647 EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); 648 break; 649 case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ 650 case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ 651 if (BPF_CLASS(code) == BPF_ST) { 652 PPC_LI32(b2p[TMP_REG_1], imm); 653 src_reg = b2p[TMP_REG_1]; 654 } 655 EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); 656 break; 657 case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ 658 case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ 659 if (BPF_CLASS(code) == BPF_ST) { 660 PPC_LI32(b2p[TMP_REG_1], imm); 661 src_reg = b2p[TMP_REG_1]; 662 } 663 PPC_BPF_STL(src_reg, dst_reg, off); 664 break; 665 666 /* 667 * BPF_STX ATOMIC (atomic ops) 668 */ 669 case BPF_STX | BPF_ATOMIC | BPF_W: 670 if (insn->imm != BPF_ADD) { 671 pr_err_ratelimited( 672 "eBPF filter atomic op code %02x (@%d) unsupported\n", 673 code, i); 674 return -ENOTSUPP; 675 } 676 677 /* *(u32 *)(dst + off) += src */ 678 679 /* Get EA into TMP_REG_1 */ 680 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); 681 tmp_idx = ctx->idx * 4; 682 /* load value from memory into TMP_REG_2 */ 683 EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); 684 /* add value from src_reg into this */ 685 EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); 686 /* store result back */ 687 EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); 688 /* we're done if this succeeded */ 689 PPC_BCC_SHORT(COND_NE, tmp_idx); 690 break; 691 case BPF_STX | BPF_ATOMIC | BPF_DW: 692 if (insn->imm != BPF_ADD) { 693 pr_err_ratelimited( 694 "eBPF filter atomic op code %02x (@%d) unsupported\n", 695 code, i); 696 return -ENOTSUPP; 697 } 698 /* *(u64 *)(dst + off) += src */ 699 700 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); 701 tmp_idx = ctx->idx * 4; 702 EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); 703 EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); 704 EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); 705 PPC_BCC_SHORT(COND_NE, tmp_idx); 706 break; 707 708 /* 709 * BPF_LDX 710 */ 711 /* dst = *(u8 *)(ul) (src + off) */ 712 case BPF_LDX | BPF_MEM | BPF_B: 713 EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 714 if (insn_is_zext(&insn[i + 1])) 715 addrs[++i] = ctx->idx * 4; 716 break; 717 /* dst = *(u16 *)(ul) (src + off) */ 718 case BPF_LDX | BPF_MEM | BPF_H: 719 EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 720 if (insn_is_zext(&insn[i + 1])) 721 addrs[++i] = ctx->idx * 4; 722 break; 723 /* dst = *(u32 *)(ul) (src + off) */ 724 case BPF_LDX | BPF_MEM | BPF_W: 725 EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 726 if (insn_is_zext(&insn[i + 1])) 727 addrs[++i] = ctx->idx * 4; 728 break; 729 /* dst = *(u64 *)(ul) (src + off) */ 730 case BPF_LDX | BPF_MEM | BPF_DW: 731 PPC_BPF_LL(dst_reg, src_reg, off); 732 break; 733 734 /* 735 * Doubleword load 736 * 16 byte instruction that uses two 'struct bpf_insn' 737 */ 738 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 739 imm64 = ((u64)(u32) insn[i].imm) | 740 (((u64)(u32) insn[i+1].imm) << 32); 741 /* Adjust for two bpf instructions */ 742 addrs[++i] = ctx->idx * 4; 743 PPC_LI64(dst_reg, imm64); 744 break; 745 746 /* 747 * Return/Exit 748 */ 749 case BPF_JMP | BPF_EXIT: 750 /* 751 * If this isn't the very last instruction, branch to 752 * the epilogue. If we _are_ the last instruction, 753 * we'll just fall through to the epilogue. 754 */ 755 if (i != flen - 1) 756 PPC_JMP(exit_addr); 757 /* else fall through to the epilogue */ 758 break; 759 760 /* 761 * Call kernel helper or bpf function 762 */ 763 case BPF_JMP | BPF_CALL: 764 ctx->seen |= SEEN_FUNC; 765 766 ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 767 &func_addr, &func_addr_fixed); 768 if (ret < 0) 769 return ret; 770 771 if (func_addr_fixed) 772 bpf_jit_emit_func_call_hlp(image, ctx, func_addr); 773 else 774 bpf_jit_emit_func_call_rel(image, ctx, func_addr); 775 /* move return value from r3 to BPF_REG_0 */ 776 EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); 777 break; 778 779 /* 780 * Jumps and branches 781 */ 782 case BPF_JMP | BPF_JA: 783 PPC_JMP(addrs[i + 1 + off]); 784 break; 785 786 case BPF_JMP | BPF_JGT | BPF_K: 787 case BPF_JMP | BPF_JGT | BPF_X: 788 case BPF_JMP | BPF_JSGT | BPF_K: 789 case BPF_JMP | BPF_JSGT | BPF_X: 790 case BPF_JMP32 | BPF_JGT | BPF_K: 791 case BPF_JMP32 | BPF_JGT | BPF_X: 792 case BPF_JMP32 | BPF_JSGT | BPF_K: 793 case BPF_JMP32 | BPF_JSGT | BPF_X: 794 true_cond = COND_GT; 795 goto cond_branch; 796 case BPF_JMP | BPF_JLT | BPF_K: 797 case BPF_JMP | BPF_JLT | BPF_X: 798 case BPF_JMP | BPF_JSLT | BPF_K: 799 case BPF_JMP | BPF_JSLT | BPF_X: 800 case BPF_JMP32 | BPF_JLT | BPF_K: 801 case BPF_JMP32 | BPF_JLT | BPF_X: 802 case BPF_JMP32 | BPF_JSLT | BPF_K: 803 case BPF_JMP32 | BPF_JSLT | BPF_X: 804 true_cond = COND_LT; 805 goto cond_branch; 806 case BPF_JMP | BPF_JGE | BPF_K: 807 case BPF_JMP | BPF_JGE | BPF_X: 808 case BPF_JMP | BPF_JSGE | BPF_K: 809 case BPF_JMP | BPF_JSGE | BPF_X: 810 case BPF_JMP32 | BPF_JGE | BPF_K: 811 case BPF_JMP32 | BPF_JGE | BPF_X: 812 case BPF_JMP32 | BPF_JSGE | BPF_K: 813 case BPF_JMP32 | BPF_JSGE | BPF_X: 814 true_cond = COND_GE; 815 goto cond_branch; 816 case BPF_JMP | BPF_JLE | BPF_K: 817 case BPF_JMP | BPF_JLE | BPF_X: 818 case BPF_JMP | BPF_JSLE | BPF_K: 819 case BPF_JMP | BPF_JSLE | BPF_X: 820 case BPF_JMP32 | BPF_JLE | BPF_K: 821 case BPF_JMP32 | BPF_JLE | BPF_X: 822 case BPF_JMP32 | BPF_JSLE | BPF_K: 823 case BPF_JMP32 | BPF_JSLE | BPF_X: 824 true_cond = COND_LE; 825 goto cond_branch; 826 case BPF_JMP | BPF_JEQ | BPF_K: 827 case BPF_JMP | BPF_JEQ | BPF_X: 828 case BPF_JMP32 | BPF_JEQ | BPF_K: 829 case BPF_JMP32 | BPF_JEQ | BPF_X: 830 true_cond = COND_EQ; 831 goto cond_branch; 832 case BPF_JMP | BPF_JNE | BPF_K: 833 case BPF_JMP | BPF_JNE | BPF_X: 834 case BPF_JMP32 | BPF_JNE | BPF_K: 835 case BPF_JMP32 | BPF_JNE | BPF_X: 836 true_cond = COND_NE; 837 goto cond_branch; 838 case BPF_JMP | BPF_JSET | BPF_K: 839 case BPF_JMP | BPF_JSET | BPF_X: 840 case BPF_JMP32 | BPF_JSET | BPF_K: 841 case BPF_JMP32 | BPF_JSET | BPF_X: 842 true_cond = COND_NE; 843 /* Fall through */ 844 845 cond_branch: 846 switch (code) { 847 case BPF_JMP | BPF_JGT | BPF_X: 848 case BPF_JMP | BPF_JLT | BPF_X: 849 case BPF_JMP | BPF_JGE | BPF_X: 850 case BPF_JMP | BPF_JLE | BPF_X: 851 case BPF_JMP | BPF_JEQ | BPF_X: 852 case BPF_JMP | BPF_JNE | BPF_X: 853 case BPF_JMP32 | BPF_JGT | BPF_X: 854 case BPF_JMP32 | BPF_JLT | BPF_X: 855 case BPF_JMP32 | BPF_JGE | BPF_X: 856 case BPF_JMP32 | BPF_JLE | BPF_X: 857 case BPF_JMP32 | BPF_JEQ | BPF_X: 858 case BPF_JMP32 | BPF_JNE | BPF_X: 859 /* unsigned comparison */ 860 if (BPF_CLASS(code) == BPF_JMP32) 861 EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); 862 else 863 EMIT(PPC_RAW_CMPLD(dst_reg, src_reg)); 864 break; 865 case BPF_JMP | BPF_JSGT | BPF_X: 866 case BPF_JMP | BPF_JSLT | BPF_X: 867 case BPF_JMP | BPF_JSGE | BPF_X: 868 case BPF_JMP | BPF_JSLE | BPF_X: 869 case BPF_JMP32 | BPF_JSGT | BPF_X: 870 case BPF_JMP32 | BPF_JSLT | BPF_X: 871 case BPF_JMP32 | BPF_JSGE | BPF_X: 872 case BPF_JMP32 | BPF_JSLE | BPF_X: 873 /* signed comparison */ 874 if (BPF_CLASS(code) == BPF_JMP32) 875 EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); 876 else 877 EMIT(PPC_RAW_CMPD(dst_reg, src_reg)); 878 break; 879 case BPF_JMP | BPF_JSET | BPF_X: 880 case BPF_JMP32 | BPF_JSET | BPF_X: 881 if (BPF_CLASS(code) == BPF_JMP) { 882 EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, 883 src_reg)); 884 } else { 885 int tmp_reg = b2p[TMP_REG_1]; 886 887 EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); 888 EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, 889 31)); 890 } 891 break; 892 case BPF_JMP | BPF_JNE | BPF_K: 893 case BPF_JMP | BPF_JEQ | BPF_K: 894 case BPF_JMP | BPF_JGT | BPF_K: 895 case BPF_JMP | BPF_JLT | BPF_K: 896 case BPF_JMP | BPF_JGE | BPF_K: 897 case BPF_JMP | BPF_JLE | BPF_K: 898 case BPF_JMP32 | BPF_JNE | BPF_K: 899 case BPF_JMP32 | BPF_JEQ | BPF_K: 900 case BPF_JMP32 | BPF_JGT | BPF_K: 901 case BPF_JMP32 | BPF_JLT | BPF_K: 902 case BPF_JMP32 | BPF_JGE | BPF_K: 903 case BPF_JMP32 | BPF_JLE | BPF_K: 904 { 905 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 906 907 /* 908 * Need sign-extended load, so only positive 909 * values can be used as imm in cmpldi 910 */ 911 if (imm >= 0 && imm < 32768) { 912 if (is_jmp32) 913 EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); 914 else 915 EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); 916 } else { 917 /* sign-extending load */ 918 PPC_LI32(b2p[TMP_REG_1], imm); 919 /* ... but unsigned comparison */ 920 if (is_jmp32) 921 EMIT(PPC_RAW_CMPLW(dst_reg, 922 b2p[TMP_REG_1])); 923 else 924 EMIT(PPC_RAW_CMPLD(dst_reg, 925 b2p[TMP_REG_1])); 926 } 927 break; 928 } 929 case BPF_JMP | BPF_JSGT | BPF_K: 930 case BPF_JMP | BPF_JSLT | BPF_K: 931 case BPF_JMP | BPF_JSGE | BPF_K: 932 case BPF_JMP | BPF_JSLE | BPF_K: 933 case BPF_JMP32 | BPF_JSGT | BPF_K: 934 case BPF_JMP32 | BPF_JSLT | BPF_K: 935 case BPF_JMP32 | BPF_JSGE | BPF_K: 936 case BPF_JMP32 | BPF_JSLE | BPF_K: 937 { 938 bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; 939 940 /* 941 * signed comparison, so any 16-bit value 942 * can be used in cmpdi 943 */ 944 if (imm >= -32768 && imm < 32768) { 945 if (is_jmp32) 946 EMIT(PPC_RAW_CMPWI(dst_reg, imm)); 947 else 948 EMIT(PPC_RAW_CMPDI(dst_reg, imm)); 949 } else { 950 PPC_LI32(b2p[TMP_REG_1], imm); 951 if (is_jmp32) 952 EMIT(PPC_RAW_CMPW(dst_reg, 953 b2p[TMP_REG_1])); 954 else 955 EMIT(PPC_RAW_CMPD(dst_reg, 956 b2p[TMP_REG_1])); 957 } 958 break; 959 } 960 case BPF_JMP | BPF_JSET | BPF_K: 961 case BPF_JMP32 | BPF_JSET | BPF_K: 962 /* andi does not sign-extend the immediate */ 963 if (imm >= 0 && imm < 32768) 964 /* PPC_ANDI is _only/always_ dot-form */ 965 EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); 966 else { 967 int tmp_reg = b2p[TMP_REG_1]; 968 969 PPC_LI32(tmp_reg, imm); 970 if (BPF_CLASS(code) == BPF_JMP) { 971 EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, 972 tmp_reg)); 973 } else { 974 EMIT(PPC_RAW_AND(tmp_reg, dst_reg, 975 tmp_reg)); 976 EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 977 0, 0, 31)); 978 } 979 } 980 break; 981 } 982 PPC_BCC(true_cond, addrs[i + 1 + off]); 983 break; 984 985 /* 986 * Tail call 987 */ 988 case BPF_JMP | BPF_TAIL_CALL: 989 ctx->seen |= SEEN_TAILCALL; 990 bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); 991 break; 992 993 default: 994 /* 995 * The filter contains something cruel & unusual. 996 * We don't handle it, but also there shouldn't be 997 * anything missing from our list. 998 */ 999 pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", 1000 code, i); 1001 return -ENOTSUPP; 1002 } 1003 } 1004 1005 /* Set end-of-body-code address for exit. */ 1006 addrs[i] = ctx->idx * 4; 1007 1008 return 0; 1009 } 1010