1 /* 2 * Just-In-Time compiler for eBPF filters on 32bit ARM 3 * 4 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> 5 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the 9 * Free Software Foundation; version 2 of the License. 10 */ 11 12 #include <linux/bpf.h> 13 #include <linux/bitops.h> 14 #include <linux/compiler.h> 15 #include <linux/errno.h> 16 #include <linux/filter.h> 17 #include <linux/netdevice.h> 18 #include <linux/string.h> 19 #include <linux/slab.h> 20 #include <linux/if_vlan.h> 21 22 #include <asm/cacheflush.h> 23 #include <asm/hwcap.h> 24 #include <asm/opcodes.h> 25 26 #include "bpf_jit_32.h" 27 28 int bpf_jit_enable __read_mostly; 29 30 #define STACK_OFFSET(k) (k) 31 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ 32 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ 33 #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ 34 35 /* Flags used for JIT optimization */ 36 #define SEEN_CALL (1 << 0) 37 38 #define FLAG_IMM_OVERFLOW (1 << 0) 39 40 /* 41 * Map eBPF registers to ARM 32bit registers or stack scratch space. 42 * 43 * 1. First argument is passed using the arm 32bit registers and rest of the 44 * arguments are passed on stack scratch space. 45 * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest 46 * arguments are mapped to scratch space on stack. 47 * 3. We need two 64 bit temp registers to do complex operations on eBPF 48 * registers. 49 * 50 * As the eBPF registers are all 64 bit registers and arm has only 32 bit 51 * registers, we have to map each eBPF registers with two arm 32 bit regs or 52 * scratch memory space and we have to build eBPF 64 bit register from those. 53 * 54 */ 55 static const u8 bpf2a32[][2] = { 56 /* return value from in-kernel function, and exit value from eBPF */ 57 [BPF_REG_0] = {ARM_R1, ARM_R0}, 58 /* arguments from eBPF program to in-kernel function */ 59 [BPF_REG_1] = {ARM_R3, ARM_R2}, 60 /* Stored on stack scratch space */ 61 [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, 62 [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, 63 [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, 64 [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, 65 /* callee saved registers that in-kernel function will preserve */ 66 [BPF_REG_6] = {ARM_R5, ARM_R4}, 67 /* Stored on stack scratch space */ 68 [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, 69 [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, 70 [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, 71 /* Read only Frame Pointer to access Stack */ 72 [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, 73 /* Temporary Register for internal BPF JIT, can be used 74 * for constant blindings and others. 75 */ 76 [TMP_REG_1] = {ARM_R7, ARM_R6}, 77 [TMP_REG_2] = {ARM_R10, ARM_R8}, 78 /* Tail call count. Stored on stack scratch space. */ 79 [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, 80 /* temporary register for blinding constants. 81 * Stored on stack scratch space. 82 */ 83 [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, 84 }; 85 86 #define dst_lo dst[1] 87 #define dst_hi dst[0] 88 #define src_lo src[1] 89 #define src_hi src[0] 90 91 /* 92 * JIT Context: 93 * 94 * prog : bpf_prog 95 * idx : index of current last JITed instruction. 96 * prologue_bytes : bytes used in prologue. 97 * epilogue_offset : offset of epilogue starting. 98 * seen : bit mask used for JIT optimization. 99 * offsets : array of eBPF instruction offsets in 100 * JITed code. 101 * target : final JITed code. 102 * epilogue_bytes : no of bytes used in epilogue. 103 * imm_count : no of immediate counts used for global 104 * variables. 105 * imms : array of global variable addresses. 106 */ 107 108 struct jit_ctx { 109 const struct bpf_prog *prog; 110 unsigned int idx; 111 unsigned int prologue_bytes; 112 unsigned int epilogue_offset; 113 u32 seen; 114 u32 flags; 115 u32 *offsets; 116 u32 *target; 117 u32 stack_size; 118 #if __LINUX_ARM_ARCH__ < 7 119 u16 epilogue_bytes; 120 u16 imm_count; 121 u32 *imms; 122 #endif 123 }; 124 125 /* 126 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking 127 * (where the assembly routines like __aeabi_uidiv could cause problems). 128 */ 129 static u32 jit_udiv32(u32 dividend, u32 divisor) 130 { 131 return dividend / divisor; 132 } 133 134 static u32 jit_mod32(u32 dividend, u32 divisor) 135 { 136 return dividend % divisor; 137 } 138 139 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 140 { 141 inst |= (cond << 28); 142 inst = __opcode_to_mem_arm(inst); 143 144 if (ctx->target != NULL) 145 ctx->target[ctx->idx] = inst; 146 147 ctx->idx++; 148 } 149 150 /* 151 * Emit an instruction that will be executed unconditionally. 152 */ 153 static inline void emit(u32 inst, struct jit_ctx *ctx) 154 { 155 _emit(ARM_COND_AL, inst, ctx); 156 } 157 158 /* 159 * Checks if immediate value can be converted to imm12(12 bits) value. 160 */ 161 static int16_t imm8m(u32 x) 162 { 163 u32 rot; 164 165 for (rot = 0; rot < 16; rot++) 166 if ((x & ~ror32(0xff, 2 * rot)) == 0) 167 return rol32(x, 2 * rot) | (rot << 8); 168 return -1; 169 } 170 171 /* 172 * Initializes the JIT space with undefined instructions. 173 */ 174 static void jit_fill_hole(void *area, unsigned int size) 175 { 176 u32 *ptr; 177 /* We are guaranteed to have aligned memory. */ 178 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 179 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); 180 } 181 182 /* Stack must be multiples of 16 Bytes */ 183 #define STACK_ALIGN(sz) (((sz) + 3) & ~3) 184 185 /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, 186 * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, 187 * BPF_REG_FP and Tail call counts. 188 */ 189 #define SCRATCH_SIZE 80 190 191 /* total stack size used in JITed code */ 192 #define _STACK_SIZE \ 193 (ctx->prog->aux->stack_depth + \ 194 + SCRATCH_SIZE + \ 195 + 4 /* extra for skb_copy_bits buffer */) 196 197 #define STACK_SIZE STACK_ALIGN(_STACK_SIZE) 198 199 /* Get the offset of eBPF REGISTERs stored on scratch space. */ 200 #define STACK_VAR(off) (STACK_SIZE-off-4) 201 202 /* Offset of skb_copy_bits buffer */ 203 #define SKB_BUFFER STACK_VAR(SCRATCH_SIZE) 204 205 #if __LINUX_ARM_ARCH__ < 7 206 207 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 208 { 209 unsigned int i = 0, offset; 210 u16 imm; 211 212 /* on the "fake" run we just count them (duplicates included) */ 213 if (ctx->target == NULL) { 214 ctx->imm_count++; 215 return 0; 216 } 217 218 while ((i < ctx->imm_count) && ctx->imms[i]) { 219 if (ctx->imms[i] == k) 220 break; 221 i++; 222 } 223 224 if (ctx->imms[i] == 0) 225 ctx->imms[i] = k; 226 227 /* constants go just after the epilogue */ 228 offset = ctx->offsets[ctx->prog->len - 1] * 4; 229 offset += ctx->prologue_bytes; 230 offset += ctx->epilogue_bytes; 231 offset += i * 4; 232 233 ctx->target[offset / 4] = k; 234 235 /* PC in ARM mode == address of the instruction + 8 */ 236 imm = offset - (8 + ctx->idx * 4); 237 238 if (imm & ~0xfff) { 239 /* 240 * literal pool is too far, signal it into flags. we 241 * can only detect it on the second pass unfortunately. 242 */ 243 ctx->flags |= FLAG_IMM_OVERFLOW; 244 return 0; 245 } 246 247 return imm; 248 } 249 250 #endif /* __LINUX_ARM_ARCH__ */ 251 252 static inline int bpf2a32_offset(int bpf_to, int bpf_from, 253 const struct jit_ctx *ctx) { 254 int to, from; 255 256 if (ctx->target == NULL) 257 return 0; 258 to = ctx->offsets[bpf_to]; 259 from = ctx->offsets[bpf_from]; 260 261 return to - from - 1; 262 } 263 264 /* 265 * Move an immediate that's not an imm8m to a core register. 266 */ 267 static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) 268 { 269 #if __LINUX_ARM_ARCH__ < 7 270 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 271 #else 272 emit(ARM_MOVW(rd, val & 0xffff), ctx); 273 if (val > 0xffff) 274 emit(ARM_MOVT(rd, val >> 16), ctx); 275 #endif 276 } 277 278 static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) 279 { 280 int imm12 = imm8m(val); 281 282 if (imm12 >= 0) 283 emit(ARM_MOV_I(rd, imm12), ctx); 284 else 285 emit_mov_i_no8m(rd, val, ctx); 286 } 287 288 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 289 { 290 ctx->seen |= SEEN_CALL; 291 #if __LINUX_ARM_ARCH__ < 5 292 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 293 294 if (elf_hwcap & HWCAP_THUMB) 295 emit(ARM_BX(tgt_reg), ctx); 296 else 297 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 298 #else 299 emit(ARM_BLX_R(tgt_reg), ctx); 300 #endif 301 } 302 303 static inline int epilogue_offset(const struct jit_ctx *ctx) 304 { 305 int to, from; 306 /* No need for 1st dummy run */ 307 if (ctx->target == NULL) 308 return 0; 309 to = ctx->epilogue_offset; 310 from = ctx->idx; 311 312 return to - from - 2; 313 } 314 315 static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) 316 { 317 const u8 *tmp = bpf2a32[TMP_REG_1]; 318 s32 jmp_offset; 319 320 /* checks if divisor is zero or not. If it is, then 321 * exit directly. 322 */ 323 emit(ARM_CMP_I(rn, 0), ctx); 324 _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx); 325 jmp_offset = epilogue_offset(ctx); 326 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 327 #if __LINUX_ARM_ARCH__ == 7 328 if (elf_hwcap & HWCAP_IDIVA) { 329 if (op == BPF_DIV) 330 emit(ARM_UDIV(rd, rm, rn), ctx); 331 else { 332 emit(ARM_UDIV(ARM_IP, rm, rn), ctx); 333 emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); 334 } 335 return; 336 } 337 #endif 338 339 /* 340 * For BPF_ALU | BPF_DIV | BPF_K instructions 341 * As ARM_R1 and ARM_R0 contains 1st argument of bpf 342 * function, we need to save it on caller side to save 343 * it from getting destroyed within callee. 344 * After the return from the callee, we restore ARM_R0 345 * ARM_R1. 346 */ 347 if (rn != ARM_R1) { 348 emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); 349 emit(ARM_MOV_R(ARM_R1, rn), ctx); 350 } 351 if (rm != ARM_R0) { 352 emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); 353 emit(ARM_MOV_R(ARM_R0, rm), ctx); 354 } 355 356 /* Call appropriate function */ 357 ctx->seen |= SEEN_CALL; 358 emit_mov_i(ARM_IP, op == BPF_DIV ? 359 (u32)jit_udiv32 : (u32)jit_mod32, ctx); 360 emit_blx_r(ARM_IP, ctx); 361 362 /* Save return value */ 363 if (rd != ARM_R0) 364 emit(ARM_MOV_R(rd, ARM_R0), ctx); 365 366 /* Restore ARM_R0 and ARM_R1 */ 367 if (rn != ARM_R1) 368 emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); 369 if (rm != ARM_R0) 370 emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); 371 } 372 373 /* Checks whether BPF register is on scratch stack space or not. */ 374 static inline bool is_on_stack(u8 bpf_reg) 375 { 376 static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, 377 BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, 378 BPF_REG_2, BPF_REG_FP}; 379 int i, reg_len = sizeof(stack_regs); 380 381 for (i = 0 ; i < reg_len ; i++) { 382 if (bpf_reg == stack_regs[i]) 383 return true; 384 } 385 return false; 386 } 387 388 static inline void emit_a32_mov_i(const u8 dst, const u32 val, 389 bool dstk, struct jit_ctx *ctx) 390 { 391 const u8 *tmp = bpf2a32[TMP_REG_1]; 392 393 if (dstk) { 394 emit_mov_i(tmp[1], val, ctx); 395 emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); 396 } else { 397 emit_mov_i(dst, val, ctx); 398 } 399 } 400 401 /* Sign extended move */ 402 static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], 403 const u32 val, bool dstk, 404 struct jit_ctx *ctx) { 405 u32 hi = 0; 406 407 if (is64 && (val & (1<<31))) 408 hi = (u32)~0; 409 emit_a32_mov_i(dst_lo, val, dstk, ctx); 410 emit_a32_mov_i(dst_hi, hi, dstk, ctx); 411 } 412 413 static inline void emit_a32_add_r(const u8 dst, const u8 src, 414 const bool is64, const bool hi, 415 struct jit_ctx *ctx) { 416 /* 64 bit : 417 * adds dst_lo, dst_lo, src_lo 418 * adc dst_hi, dst_hi, src_hi 419 * 32 bit : 420 * add dst_lo, dst_lo, src_lo 421 */ 422 if (!hi && is64) 423 emit(ARM_ADDS_R(dst, dst, src), ctx); 424 else if (hi && is64) 425 emit(ARM_ADC_R(dst, dst, src), ctx); 426 else 427 emit(ARM_ADD_R(dst, dst, src), ctx); 428 } 429 430 static inline void emit_a32_sub_r(const u8 dst, const u8 src, 431 const bool is64, const bool hi, 432 struct jit_ctx *ctx) { 433 /* 64 bit : 434 * subs dst_lo, dst_lo, src_lo 435 * sbc dst_hi, dst_hi, src_hi 436 * 32 bit : 437 * sub dst_lo, dst_lo, src_lo 438 */ 439 if (!hi && is64) 440 emit(ARM_SUBS_R(dst, dst, src), ctx); 441 else if (hi && is64) 442 emit(ARM_SBC_R(dst, dst, src), ctx); 443 else 444 emit(ARM_SUB_R(dst, dst, src), ctx); 445 } 446 447 static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, 448 const bool hi, const u8 op, struct jit_ctx *ctx){ 449 switch (BPF_OP(op)) { 450 /* dst = dst + src */ 451 case BPF_ADD: 452 emit_a32_add_r(dst, src, is64, hi, ctx); 453 break; 454 /* dst = dst - src */ 455 case BPF_SUB: 456 emit_a32_sub_r(dst, src, is64, hi, ctx); 457 break; 458 /* dst = dst | src */ 459 case BPF_OR: 460 emit(ARM_ORR_R(dst, dst, src), ctx); 461 break; 462 /* dst = dst & src */ 463 case BPF_AND: 464 emit(ARM_AND_R(dst, dst, src), ctx); 465 break; 466 /* dst = dst ^ src */ 467 case BPF_XOR: 468 emit(ARM_EOR_R(dst, dst, src), ctx); 469 break; 470 /* dst = dst * src */ 471 case BPF_MUL: 472 emit(ARM_MUL(dst, dst, src), ctx); 473 break; 474 /* dst = dst << src */ 475 case BPF_LSH: 476 emit(ARM_LSL_R(dst, dst, src), ctx); 477 break; 478 /* dst = dst >> src */ 479 case BPF_RSH: 480 emit(ARM_LSR_R(dst, dst, src), ctx); 481 break; 482 /* dst = dst >> src (signed)*/ 483 case BPF_ARSH: 484 emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); 485 break; 486 } 487 } 488 489 /* ALU operation (32 bit) 490 * dst = dst (op) src 491 */ 492 static inline void emit_a32_alu_r(const u8 dst, const u8 src, 493 bool dstk, bool sstk, 494 struct jit_ctx *ctx, const bool is64, 495 const bool hi, const u8 op) { 496 const u8 *tmp = bpf2a32[TMP_REG_1]; 497 u8 rn = sstk ? tmp[1] : src; 498 499 if (sstk) 500 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); 501 502 /* ALU operation */ 503 if (dstk) { 504 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); 505 emit_alu_r(tmp[0], rn, is64, hi, op, ctx); 506 emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); 507 } else { 508 emit_alu_r(dst, rn, is64, hi, op, ctx); 509 } 510 } 511 512 /* ALU operation (64 bit) */ 513 static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], 514 const u8 src[], bool dstk, 515 bool sstk, struct jit_ctx *ctx, 516 const u8 op) { 517 emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); 518 if (is64) 519 emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); 520 else 521 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 522 } 523 524 /* dst = imm (4 bytes)*/ 525 static inline void emit_a32_mov_r(const u8 dst, const u8 src, 526 bool dstk, bool sstk, 527 struct jit_ctx *ctx) { 528 const u8 *tmp = bpf2a32[TMP_REG_1]; 529 u8 rt = sstk ? tmp[0] : src; 530 531 if (sstk) 532 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); 533 if (dstk) 534 emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); 535 else 536 emit(ARM_MOV_R(dst, rt), ctx); 537 } 538 539 /* dst = src */ 540 static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], 541 const u8 src[], bool dstk, 542 bool sstk, struct jit_ctx *ctx) { 543 emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); 544 if (is64) { 545 /* complete 8 byte move */ 546 emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); 547 } else { 548 /* Zero out high 4 bytes */ 549 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 550 } 551 } 552 553 /* Shift operations */ 554 static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, 555 struct jit_ctx *ctx, const u8 op) { 556 const u8 *tmp = bpf2a32[TMP_REG_1]; 557 u8 rd = dstk ? tmp[0] : dst; 558 559 if (dstk) 560 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 561 562 /* Do shift operation */ 563 switch (op) { 564 case BPF_LSH: 565 emit(ARM_LSL_I(rd, rd, val), ctx); 566 break; 567 case BPF_RSH: 568 emit(ARM_LSR_I(rd, rd, val), ctx); 569 break; 570 case BPF_NEG: 571 emit(ARM_RSB_I(rd, rd, val), ctx); 572 break; 573 } 574 575 if (dstk) 576 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 577 } 578 579 /* dst = ~dst (64 bit) */ 580 static inline void emit_a32_neg64(const u8 dst[], bool dstk, 581 struct jit_ctx *ctx){ 582 const u8 *tmp = bpf2a32[TMP_REG_1]; 583 u8 rd = dstk ? tmp[1] : dst[1]; 584 u8 rm = dstk ? tmp[0] : dst[0]; 585 586 /* Setup Operand */ 587 if (dstk) { 588 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 589 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 590 } 591 592 /* Do Negate Operation */ 593 emit(ARM_RSBS_I(rd, rd, 0), ctx); 594 emit(ARM_RSC_I(rm, rm, 0), ctx); 595 596 if (dstk) { 597 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 598 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 599 } 600 } 601 602 /* dst = dst << src */ 603 static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, 604 bool sstk, struct jit_ctx *ctx) { 605 const u8 *tmp = bpf2a32[TMP_REG_1]; 606 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 607 608 /* Setup Operands */ 609 u8 rt = sstk ? tmp2[1] : src_lo; 610 u8 rd = dstk ? tmp[1] : dst_lo; 611 u8 rm = dstk ? tmp[0] : dst_hi; 612 613 if (sstk) 614 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 615 if (dstk) { 616 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 617 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 618 } 619 620 /* Do LSH operation */ 621 emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); 622 emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); 623 /* As we are using ARM_LR */ 624 ctx->seen |= SEEN_CALL; 625 emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); 626 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); 627 emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); 628 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); 629 630 if (dstk) { 631 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); 632 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); 633 } else { 634 emit(ARM_MOV_R(rd, ARM_LR), ctx); 635 emit(ARM_MOV_R(rm, ARM_IP), ctx); 636 } 637 } 638 639 /* dst = dst >> src (signed)*/ 640 static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, 641 bool sstk, struct jit_ctx *ctx) { 642 const u8 *tmp = bpf2a32[TMP_REG_1]; 643 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 644 /* Setup Operands */ 645 u8 rt = sstk ? tmp2[1] : src_lo; 646 u8 rd = dstk ? tmp[1] : dst_lo; 647 u8 rm = dstk ? tmp[0] : dst_hi; 648 649 if (sstk) 650 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 651 if (dstk) { 652 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 653 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 654 } 655 656 /* Do the ARSH operation */ 657 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); 658 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); 659 /* As we are using ARM_LR */ 660 ctx->seen |= SEEN_CALL; 661 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); 662 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); 663 _emit(ARM_COND_MI, ARM_B(0), ctx); 664 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); 665 emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); 666 if (dstk) { 667 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); 668 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); 669 } else { 670 emit(ARM_MOV_R(rd, ARM_LR), ctx); 671 emit(ARM_MOV_R(rm, ARM_IP), ctx); 672 } 673 } 674 675 /* dst = dst >> src */ 676 static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, 677 bool sstk, struct jit_ctx *ctx) { 678 const u8 *tmp = bpf2a32[TMP_REG_1]; 679 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 680 /* Setup Operands */ 681 u8 rt = sstk ? tmp2[1] : src_lo; 682 u8 rd = dstk ? tmp[1] : dst_lo; 683 u8 rm = dstk ? tmp[0] : dst_hi; 684 685 if (sstk) 686 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 687 if (dstk) { 688 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 689 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 690 } 691 692 /* Do LSH operation */ 693 emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); 694 emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); 695 /* As we are using ARM_LR */ 696 ctx->seen |= SEEN_CALL; 697 emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); 698 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); 699 emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); 700 emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); 701 if (dstk) { 702 emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); 703 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); 704 } else { 705 emit(ARM_MOV_R(rd, ARM_LR), ctx); 706 emit(ARM_MOV_R(rm, ARM_IP), ctx); 707 } 708 } 709 710 /* dst = dst << val */ 711 static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, 712 const u32 val, struct jit_ctx *ctx){ 713 const u8 *tmp = bpf2a32[TMP_REG_1]; 714 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 715 /* Setup operands */ 716 u8 rd = dstk ? tmp[1] : dst_lo; 717 u8 rm = dstk ? tmp[0] : dst_hi; 718 719 if (dstk) { 720 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 721 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 722 } 723 724 /* Do LSH operation */ 725 if (val < 32) { 726 emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); 727 emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); 728 emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); 729 } else { 730 if (val == 32) 731 emit(ARM_MOV_R(rm, rd), ctx); 732 else 733 emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); 734 emit(ARM_EOR_R(rd, rd, rd), ctx); 735 } 736 737 if (dstk) { 738 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 739 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 740 } 741 } 742 743 /* dst = dst >> val */ 744 static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk, 745 const u32 val, struct jit_ctx *ctx) { 746 const u8 *tmp = bpf2a32[TMP_REG_1]; 747 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 748 /* Setup operands */ 749 u8 rd = dstk ? tmp[1] : dst_lo; 750 u8 rm = dstk ? tmp[0] : dst_hi; 751 752 if (dstk) { 753 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 754 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 755 } 756 757 /* Do LSR operation */ 758 if (val < 32) { 759 emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); 760 emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); 761 emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); 762 } else if (val == 32) { 763 emit(ARM_MOV_R(rd, rm), ctx); 764 emit(ARM_MOV_I(rm, 0), ctx); 765 } else { 766 emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); 767 emit(ARM_MOV_I(rm, 0), ctx); 768 } 769 770 if (dstk) { 771 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 772 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 773 } 774 } 775 776 /* dst = dst >> val (signed) */ 777 static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, 778 const u32 val, struct jit_ctx *ctx){ 779 const u8 *tmp = bpf2a32[TMP_REG_1]; 780 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 781 /* Setup operands */ 782 u8 rd = dstk ? tmp[1] : dst_lo; 783 u8 rm = dstk ? tmp[0] : dst_hi; 784 785 if (dstk) { 786 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 787 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 788 } 789 790 /* Do ARSH operation */ 791 if (val < 32) { 792 emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); 793 emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); 794 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); 795 } else if (val == 32) { 796 emit(ARM_MOV_R(rd, rm), ctx); 797 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); 798 } else { 799 emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx); 800 emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); 801 } 802 803 if (dstk) { 804 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 805 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 806 } 807 } 808 809 static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, 810 bool sstk, struct jit_ctx *ctx) { 811 const u8 *tmp = bpf2a32[TMP_REG_1]; 812 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 813 /* Setup operands for multiplication */ 814 u8 rd = dstk ? tmp[1] : dst_lo; 815 u8 rm = dstk ? tmp[0] : dst_hi; 816 u8 rt = sstk ? tmp2[1] : src_lo; 817 u8 rn = sstk ? tmp2[0] : src_hi; 818 819 if (dstk) { 820 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 821 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 822 } 823 if (sstk) { 824 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); 825 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); 826 } 827 828 /* Do Multiplication */ 829 emit(ARM_MUL(ARM_IP, rd, rn), ctx); 830 emit(ARM_MUL(ARM_LR, rm, rt), ctx); 831 /* As we are using ARM_LR */ 832 ctx->seen |= SEEN_CALL; 833 emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); 834 835 emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); 836 emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); 837 if (dstk) { 838 emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); 839 emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); 840 } else { 841 emit(ARM_MOV_R(rd, ARM_IP), ctx); 842 } 843 } 844 845 /* *(size *)(dst + off) = src */ 846 static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, 847 const s32 off, struct jit_ctx *ctx, const u8 sz){ 848 const u8 *tmp = bpf2a32[TMP_REG_1]; 849 u8 rd = dstk ? tmp[1] : dst; 850 851 if (dstk) 852 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 853 if (off) { 854 emit_a32_mov_i(tmp[0], off, false, ctx); 855 emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); 856 rd = tmp[0]; 857 } 858 switch (sz) { 859 case BPF_W: 860 /* Store a Word */ 861 emit(ARM_STR_I(src, rd, 0), ctx); 862 break; 863 case BPF_H: 864 /* Store a HalfWord */ 865 emit(ARM_STRH_I(src, rd, 0), ctx); 866 break; 867 case BPF_B: 868 /* Store a Byte */ 869 emit(ARM_STRB_I(src, rd, 0), ctx); 870 break; 871 } 872 } 873 874 /* dst = *(size*)(src + off) */ 875 static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk, 876 const s32 off, struct jit_ctx *ctx, const u8 sz){ 877 const u8 *tmp = bpf2a32[TMP_REG_1]; 878 u8 rd = dstk ? tmp[1] : dst; 879 u8 rm = src; 880 881 if (off) { 882 emit_a32_mov_i(tmp[0], off, false, ctx); 883 emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); 884 rm = tmp[0]; 885 } 886 switch (sz) { 887 case BPF_W: 888 /* Load a Word */ 889 emit(ARM_LDR_I(rd, rm, 0), ctx); 890 break; 891 case BPF_H: 892 /* Load a HalfWord */ 893 emit(ARM_LDRH_I(rd, rm, 0), ctx); 894 break; 895 case BPF_B: 896 /* Load a Byte */ 897 emit(ARM_LDRB_I(rd, rm, 0), ctx); 898 break; 899 } 900 if (dstk) 901 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); 902 } 903 904 /* Arithmatic Operation */ 905 static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, 906 const u8 rn, struct jit_ctx *ctx, u8 op) { 907 switch (op) { 908 case BPF_JSET: 909 ctx->seen |= SEEN_CALL; 910 emit(ARM_AND_R(ARM_IP, rt, rn), ctx); 911 emit(ARM_AND_R(ARM_LR, rd, rm), ctx); 912 emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); 913 break; 914 case BPF_JEQ: 915 case BPF_JNE: 916 case BPF_JGT: 917 case BPF_JGE: 918 case BPF_JLE: 919 case BPF_JLT: 920 emit(ARM_CMP_R(rd, rm), ctx); 921 _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); 922 break; 923 case BPF_JSLE: 924 case BPF_JSGT: 925 emit(ARM_CMP_R(rn, rt), ctx); 926 emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); 927 break; 928 case BPF_JSLT: 929 case BPF_JSGE: 930 emit(ARM_CMP_R(rt, rn), ctx); 931 emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); 932 break; 933 } 934 } 935 936 static int out_offset = -1; /* initialized on the first pass of build_body() */ 937 static int emit_bpf_tail_call(struct jit_ctx *ctx) 938 { 939 940 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ 941 const u8 *r2 = bpf2a32[BPF_REG_2]; 942 const u8 *r3 = bpf2a32[BPF_REG_3]; 943 const u8 *tmp = bpf2a32[TMP_REG_1]; 944 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 945 const u8 *tcc = bpf2a32[TCALL_CNT]; 946 const int idx0 = ctx->idx; 947 #define cur_offset (ctx->idx - idx0) 948 #define jmp_offset (out_offset - (cur_offset)) 949 u32 off, lo, hi; 950 951 /* if (index >= array->map.max_entries) 952 * goto out; 953 */ 954 off = offsetof(struct bpf_array, map.max_entries); 955 /* array->map.max_entries */ 956 emit_a32_mov_i(tmp[1], off, false, ctx); 957 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); 958 emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); 959 /* index (64 bit) */ 960 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); 961 /* index >= array->map.max_entries */ 962 emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); 963 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 964 965 /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) 966 * goto out; 967 * tail_call_cnt++; 968 */ 969 lo = (u32)MAX_TAIL_CALL_CNT; 970 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); 971 emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); 972 emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); 973 emit(ARM_CMP_I(tmp[0], hi), ctx); 974 _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx); 975 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); 976 emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx); 977 emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx); 978 emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); 979 emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); 980 981 /* prog = array->ptrs[index] 982 * if (prog == NULL) 983 * goto out; 984 */ 985 off = offsetof(struct bpf_array, ptrs); 986 emit_a32_mov_i(tmp[1], off, false, ctx); 987 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); 988 emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); 989 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); 990 emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx); 991 emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); 992 emit(ARM_CMP_I(tmp[1], 0), ctx); 993 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 994 995 /* goto *(prog->bpf_func + prologue_size); */ 996 off = offsetof(struct bpf_prog, bpf_func); 997 emit_a32_mov_i(tmp2[1], off, false, ctx); 998 emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); 999 emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); 1000 emit(ARM_BX(tmp[1]), ctx); 1001 1002 /* out: */ 1003 if (out_offset == -1) 1004 out_offset = cur_offset; 1005 if (cur_offset != out_offset) { 1006 pr_err_once("tail_call out_offset = %d, expected %d!\n", 1007 cur_offset, out_offset); 1008 return -1; 1009 } 1010 return 0; 1011 #undef cur_offset 1012 #undef jmp_offset 1013 } 1014 1015 /* 0xabcd => 0xcdab */ 1016 static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) 1017 { 1018 #if __LINUX_ARM_ARCH__ < 6 1019 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1020 1021 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); 1022 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); 1023 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); 1024 emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx); 1025 #else /* ARMv6+ */ 1026 emit(ARM_REV16(rd, rn), ctx); 1027 #endif 1028 } 1029 1030 /* 0xabcdefgh => 0xghefcdab */ 1031 static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) 1032 { 1033 #if __LINUX_ARM_ARCH__ < 6 1034 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1035 1036 emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); 1037 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); 1038 emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx); 1039 1040 emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx); 1041 emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx); 1042 emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx); 1043 emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx); 1044 emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx); 1045 emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx); 1046 emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx); 1047 1048 #else /* ARMv6+ */ 1049 emit(ARM_REV(rd, rn), ctx); 1050 #endif 1051 } 1052 1053 // push the scratch stack register on top of the stack 1054 static inline void emit_push_r64(const u8 src[], const u8 shift, 1055 struct jit_ctx *ctx) 1056 { 1057 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1058 u16 reg_set = 0; 1059 1060 emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); 1061 emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx); 1062 1063 reg_set = (1 << tmp2[1]) | (1 << tmp2[0]); 1064 emit(ARM_PUSH(reg_set), ctx); 1065 } 1066 1067 static void build_prologue(struct jit_ctx *ctx) 1068 { 1069 const u8 r0 = bpf2a32[BPF_REG_0][1]; 1070 const u8 r2 = bpf2a32[BPF_REG_1][1]; 1071 const u8 r3 = bpf2a32[BPF_REG_1][0]; 1072 const u8 r4 = bpf2a32[BPF_REG_6][1]; 1073 const u8 r5 = bpf2a32[BPF_REG_6][0]; 1074 const u8 r6 = bpf2a32[TMP_REG_1][1]; 1075 const u8 r7 = bpf2a32[TMP_REG_1][0]; 1076 const u8 r8 = bpf2a32[TMP_REG_2][1]; 1077 const u8 r10 = bpf2a32[TMP_REG_2][0]; 1078 const u8 fplo = bpf2a32[BPF_REG_FP][1]; 1079 const u8 fphi = bpf2a32[BPF_REG_FP][0]; 1080 const u8 sp = ARM_SP; 1081 const u8 *tcc = bpf2a32[TCALL_CNT]; 1082 1083 u16 reg_set = 0; 1084 1085 /* 1086 * eBPF prog stack layout 1087 * 1088 * high 1089 * original ARM_SP => +-----+ eBPF prologue 1090 * |FP/LR| 1091 * current ARM_FP => +-----+ 1092 * | ... | callee saved registers 1093 * eBPF fp register => +-----+ <= (BPF_FP) 1094 * | ... | eBPF JIT scratch space 1095 * | | eBPF prog stack 1096 * +-----+ 1097 * |RSVD | JIT scratchpad 1098 * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) 1099 * | | 1100 * | ... | Function call stack 1101 * | | 1102 * +-----+ 1103 * low 1104 */ 1105 1106 /* Save callee saved registers. */ 1107 reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); 1108 #ifdef CONFIG_FRAME_POINTER 1109 reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC); 1110 emit(ARM_MOV_R(ARM_IP, sp), ctx); 1111 emit(ARM_PUSH(reg_set), ctx); 1112 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 1113 #else 1114 /* Check if call instruction exists in BPF body */ 1115 if (ctx->seen & SEEN_CALL) 1116 reg_set |= (1<<ARM_LR); 1117 emit(ARM_PUSH(reg_set), ctx); 1118 #endif 1119 /* Save frame pointer for later */ 1120 emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx); 1121 1122 ctx->stack_size = imm8m(STACK_SIZE); 1123 1124 /* Set up function call stack */ 1125 emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); 1126 1127 /* Set up BPF prog stack base register */ 1128 emit_a32_mov_r(fplo, ARM_IP, true, false, ctx); 1129 emit_a32_mov_i(fphi, 0, true, ctx); 1130 1131 /* mov r4, 0 */ 1132 emit(ARM_MOV_I(r4, 0), ctx); 1133 1134 /* Move BPF_CTX to BPF_R1 */ 1135 emit(ARM_MOV_R(r3, r4), ctx); 1136 emit(ARM_MOV_R(r2, r0), ctx); 1137 /* Initialize Tail Count */ 1138 emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx); 1139 emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx); 1140 /* end of prologue */ 1141 } 1142 1143 static void build_epilogue(struct jit_ctx *ctx) 1144 { 1145 const u8 r4 = bpf2a32[BPF_REG_6][1]; 1146 const u8 r5 = bpf2a32[BPF_REG_6][0]; 1147 const u8 r6 = bpf2a32[TMP_REG_1][1]; 1148 const u8 r7 = bpf2a32[TMP_REG_1][0]; 1149 const u8 r8 = bpf2a32[TMP_REG_2][1]; 1150 const u8 r10 = bpf2a32[TMP_REG_2][0]; 1151 u16 reg_set = 0; 1152 1153 /* unwind function call stack */ 1154 emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); 1155 1156 /* restore callee saved registers. */ 1157 reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); 1158 #ifdef CONFIG_FRAME_POINTER 1159 /* the first instruction of the prologue was: mov ip, sp */ 1160 reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC); 1161 emit(ARM_LDM(ARM_SP, reg_set), ctx); 1162 #else 1163 if (ctx->seen & SEEN_CALL) 1164 reg_set |= (1<<ARM_PC); 1165 /* Restore callee saved registers. */ 1166 emit(ARM_POP(reg_set), ctx); 1167 /* Return back to the callee function */ 1168 if (!(ctx->seen & SEEN_CALL)) 1169 emit(ARM_BX(ARM_LR), ctx); 1170 #endif 1171 } 1172 1173 /* 1174 * Convert an eBPF instruction to native instruction, i.e 1175 * JITs an eBPF instruction. 1176 * Returns : 1177 * 0 - Successfully JITed an 8-byte eBPF instruction 1178 * >0 - Successfully JITed a 16-byte eBPF instruction 1179 * <0 - Failed to JIT. 1180 */ 1181 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) 1182 { 1183 const u8 code = insn->code; 1184 const u8 *dst = bpf2a32[insn->dst_reg]; 1185 const u8 *src = bpf2a32[insn->src_reg]; 1186 const u8 *tmp = bpf2a32[TMP_REG_1]; 1187 const u8 *tmp2 = bpf2a32[TMP_REG_2]; 1188 const s16 off = insn->off; 1189 const s32 imm = insn->imm; 1190 const int i = insn - ctx->prog->insnsi; 1191 const bool is64 = BPF_CLASS(code) == BPF_ALU64; 1192 const bool dstk = is_on_stack(insn->dst_reg); 1193 const bool sstk = is_on_stack(insn->src_reg); 1194 u8 rd, rt, rm, rn; 1195 s32 jmp_offset; 1196 1197 #define check_imm(bits, imm) do { \ 1198 if ((((imm) > 0) && ((imm) >> (bits))) || \ 1199 (((imm) < 0) && (~(imm) >> (bits)))) { \ 1200 pr_info("[%2d] imm=%d(0x%x) out of range\n", \ 1201 i, imm, imm); \ 1202 return -EINVAL; \ 1203 } \ 1204 } while (0) 1205 #define check_imm24(imm) check_imm(24, imm) 1206 1207 switch (code) { 1208 /* ALU operations */ 1209 1210 /* dst = src */ 1211 case BPF_ALU | BPF_MOV | BPF_K: 1212 case BPF_ALU | BPF_MOV | BPF_X: 1213 case BPF_ALU64 | BPF_MOV | BPF_K: 1214 case BPF_ALU64 | BPF_MOV | BPF_X: 1215 switch (BPF_SRC(code)) { 1216 case BPF_X: 1217 emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx); 1218 break; 1219 case BPF_K: 1220 /* Sign-extend immediate value to destination reg */ 1221 emit_a32_mov_i64(is64, dst, imm, dstk, ctx); 1222 break; 1223 } 1224 break; 1225 /* dst = dst + src/imm */ 1226 /* dst = dst - src/imm */ 1227 /* dst = dst | src/imm */ 1228 /* dst = dst & src/imm */ 1229 /* dst = dst ^ src/imm */ 1230 /* dst = dst * src/imm */ 1231 /* dst = dst << src */ 1232 /* dst = dst >> src */ 1233 case BPF_ALU | BPF_ADD | BPF_K: 1234 case BPF_ALU | BPF_ADD | BPF_X: 1235 case BPF_ALU | BPF_SUB | BPF_K: 1236 case BPF_ALU | BPF_SUB | BPF_X: 1237 case BPF_ALU | BPF_OR | BPF_K: 1238 case BPF_ALU | BPF_OR | BPF_X: 1239 case BPF_ALU | BPF_AND | BPF_K: 1240 case BPF_ALU | BPF_AND | BPF_X: 1241 case BPF_ALU | BPF_XOR | BPF_K: 1242 case BPF_ALU | BPF_XOR | BPF_X: 1243 case BPF_ALU | BPF_MUL | BPF_K: 1244 case BPF_ALU | BPF_MUL | BPF_X: 1245 case BPF_ALU | BPF_LSH | BPF_X: 1246 case BPF_ALU | BPF_RSH | BPF_X: 1247 case BPF_ALU | BPF_ARSH | BPF_K: 1248 case BPF_ALU | BPF_ARSH | BPF_X: 1249 case BPF_ALU64 | BPF_ADD | BPF_K: 1250 case BPF_ALU64 | BPF_ADD | BPF_X: 1251 case BPF_ALU64 | BPF_SUB | BPF_K: 1252 case BPF_ALU64 | BPF_SUB | BPF_X: 1253 case BPF_ALU64 | BPF_OR | BPF_K: 1254 case BPF_ALU64 | BPF_OR | BPF_X: 1255 case BPF_ALU64 | BPF_AND | BPF_K: 1256 case BPF_ALU64 | BPF_AND | BPF_X: 1257 case BPF_ALU64 | BPF_XOR | BPF_K: 1258 case BPF_ALU64 | BPF_XOR | BPF_X: 1259 switch (BPF_SRC(code)) { 1260 case BPF_X: 1261 emit_a32_alu_r64(is64, dst, src, dstk, sstk, 1262 ctx, BPF_OP(code)); 1263 break; 1264 case BPF_K: 1265 /* Move immediate value to the temporary register 1266 * and then do the ALU operation on the temporary 1267 * register as this will sign-extend the immediate 1268 * value into temporary reg and then it would be 1269 * safe to do the operation on it. 1270 */ 1271 emit_a32_mov_i64(is64, tmp2, imm, false, ctx); 1272 emit_a32_alu_r64(is64, dst, tmp2, dstk, false, 1273 ctx, BPF_OP(code)); 1274 break; 1275 } 1276 break; 1277 /* dst = dst / src(imm) */ 1278 /* dst = dst % src(imm) */ 1279 case BPF_ALU | BPF_DIV | BPF_K: 1280 case BPF_ALU | BPF_DIV | BPF_X: 1281 case BPF_ALU | BPF_MOD | BPF_K: 1282 case BPF_ALU | BPF_MOD | BPF_X: 1283 rt = src_lo; 1284 rd = dstk ? tmp2[1] : dst_lo; 1285 if (dstk) 1286 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 1287 switch (BPF_SRC(code)) { 1288 case BPF_X: 1289 rt = sstk ? tmp2[0] : rt; 1290 if (sstk) 1291 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), 1292 ctx); 1293 break; 1294 case BPF_K: 1295 rt = tmp2[0]; 1296 emit_a32_mov_i(rt, imm, false, ctx); 1297 break; 1298 } 1299 emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); 1300 if (dstk) 1301 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); 1302 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1303 break; 1304 case BPF_ALU64 | BPF_DIV | BPF_K: 1305 case BPF_ALU64 | BPF_DIV | BPF_X: 1306 case BPF_ALU64 | BPF_MOD | BPF_K: 1307 case BPF_ALU64 | BPF_MOD | BPF_X: 1308 goto notyet; 1309 /* dst = dst >> imm */ 1310 /* dst = dst << imm */ 1311 case BPF_ALU | BPF_RSH | BPF_K: 1312 case BPF_ALU | BPF_LSH | BPF_K: 1313 if (unlikely(imm > 31)) 1314 return -EINVAL; 1315 if (imm) 1316 emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code)); 1317 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1318 break; 1319 /* dst = dst << imm */ 1320 case BPF_ALU64 | BPF_LSH | BPF_K: 1321 if (unlikely(imm > 63)) 1322 return -EINVAL; 1323 emit_a32_lsh_i64(dst, dstk, imm, ctx); 1324 break; 1325 /* dst = dst >> imm */ 1326 case BPF_ALU64 | BPF_RSH | BPF_K: 1327 if (unlikely(imm > 63)) 1328 return -EINVAL; 1329 emit_a32_lsr_i64(dst, dstk, imm, ctx); 1330 break; 1331 /* dst = dst << src */ 1332 case BPF_ALU64 | BPF_LSH | BPF_X: 1333 emit_a32_lsh_r64(dst, src, dstk, sstk, ctx); 1334 break; 1335 /* dst = dst >> src */ 1336 case BPF_ALU64 | BPF_RSH | BPF_X: 1337 emit_a32_lsr_r64(dst, src, dstk, sstk, ctx); 1338 break; 1339 /* dst = dst >> src (signed) */ 1340 case BPF_ALU64 | BPF_ARSH | BPF_X: 1341 emit_a32_arsh_r64(dst, src, dstk, sstk, ctx); 1342 break; 1343 /* dst = dst >> imm (signed) */ 1344 case BPF_ALU64 | BPF_ARSH | BPF_K: 1345 if (unlikely(imm > 63)) 1346 return -EINVAL; 1347 emit_a32_arsh_i64(dst, dstk, imm, ctx); 1348 break; 1349 /* dst = ~dst */ 1350 case BPF_ALU | BPF_NEG: 1351 emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code)); 1352 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1353 break; 1354 /* dst = ~dst (64 bit) */ 1355 case BPF_ALU64 | BPF_NEG: 1356 emit_a32_neg64(dst, dstk, ctx); 1357 break; 1358 /* dst = dst * src/imm */ 1359 case BPF_ALU64 | BPF_MUL | BPF_X: 1360 case BPF_ALU64 | BPF_MUL | BPF_K: 1361 switch (BPF_SRC(code)) { 1362 case BPF_X: 1363 emit_a32_mul_r64(dst, src, dstk, sstk, ctx); 1364 break; 1365 case BPF_K: 1366 /* Move immediate value to the temporary register 1367 * and then do the multiplication on it as this 1368 * will sign-extend the immediate value into temp 1369 * reg then it would be safe to do the operation 1370 * on it. 1371 */ 1372 emit_a32_mov_i64(is64, tmp2, imm, false, ctx); 1373 emit_a32_mul_r64(dst, tmp2, dstk, false, ctx); 1374 break; 1375 } 1376 break; 1377 /* dst = htole(dst) */ 1378 /* dst = htobe(dst) */ 1379 case BPF_ALU | BPF_END | BPF_FROM_LE: 1380 case BPF_ALU | BPF_END | BPF_FROM_BE: 1381 rd = dstk ? tmp[0] : dst_hi; 1382 rt = dstk ? tmp[1] : dst_lo; 1383 if (dstk) { 1384 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); 1385 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); 1386 } 1387 if (BPF_SRC(code) == BPF_FROM_LE) 1388 goto emit_bswap_uxt; 1389 switch (imm) { 1390 case 16: 1391 emit_rev16(rt, rt, ctx); 1392 goto emit_bswap_uxt; 1393 case 32: 1394 emit_rev32(rt, rt, ctx); 1395 goto emit_bswap_uxt; 1396 case 64: 1397 /* Because of the usage of ARM_LR */ 1398 ctx->seen |= SEEN_CALL; 1399 emit_rev32(ARM_LR, rt, ctx); 1400 emit_rev32(rt, rd, ctx); 1401 emit(ARM_MOV_R(rd, ARM_LR), ctx); 1402 break; 1403 } 1404 goto exit; 1405 emit_bswap_uxt: 1406 switch (imm) { 1407 case 16: 1408 /* zero-extend 16 bits into 64 bits */ 1409 #if __LINUX_ARM_ARCH__ < 6 1410 emit_a32_mov_i(tmp2[1], 0xffff, false, ctx); 1411 emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); 1412 #else /* ARMv6+ */ 1413 emit(ARM_UXTH(rt, rt), ctx); 1414 #endif 1415 emit(ARM_EOR_R(rd, rd, rd), ctx); 1416 break; 1417 case 32: 1418 /* zero-extend 32 bits into 64 bits */ 1419 emit(ARM_EOR_R(rd, rd, rd), ctx); 1420 break; 1421 case 64: 1422 /* nop */ 1423 break; 1424 } 1425 exit: 1426 if (dstk) { 1427 emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); 1428 emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); 1429 } 1430 break; 1431 /* dst = imm64 */ 1432 case BPF_LD | BPF_IMM | BPF_DW: 1433 { 1434 const struct bpf_insn insn1 = insn[1]; 1435 u32 hi, lo = imm; 1436 1437 hi = insn1.imm; 1438 emit_a32_mov_i(dst_lo, lo, dstk, ctx); 1439 emit_a32_mov_i(dst_hi, hi, dstk, ctx); 1440 1441 return 1; 1442 } 1443 /* LDX: dst = *(size *)(src + off) */ 1444 case BPF_LDX | BPF_MEM | BPF_W: 1445 case BPF_LDX | BPF_MEM | BPF_H: 1446 case BPF_LDX | BPF_MEM | BPF_B: 1447 case BPF_LDX | BPF_MEM | BPF_DW: 1448 rn = sstk ? tmp2[1] : src_lo; 1449 if (sstk) 1450 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1451 switch (BPF_SIZE(code)) { 1452 case BPF_W: 1453 /* Load a Word */ 1454 case BPF_H: 1455 /* Load a Half-Word */ 1456 case BPF_B: 1457 /* Load a Byte */ 1458 emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code)); 1459 emit_a32_mov_i(dst_hi, 0, dstk, ctx); 1460 break; 1461 case BPF_DW: 1462 /* Load a double word */ 1463 emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W); 1464 emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W); 1465 break; 1466 } 1467 break; 1468 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ 1469 case BPF_LD | BPF_ABS | BPF_W: 1470 case BPF_LD | BPF_ABS | BPF_H: 1471 case BPF_LD | BPF_ABS | BPF_B: 1472 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ 1473 case BPF_LD | BPF_IND | BPF_W: 1474 case BPF_LD | BPF_IND | BPF_H: 1475 case BPF_LD | BPF_IND | BPF_B: 1476 { 1477 const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */ 1478 const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/ 1479 /* rtn value */ 1480 const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */ 1481 const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */ 1482 const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */ 1483 const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */ 1484 int size; 1485 1486 /* Setting up first argument */ 1487 emit(ARM_MOV_R(r0, r4), ctx); 1488 1489 /* Setting up second argument */ 1490 emit_a32_mov_i(r1, imm, false, ctx); 1491 if (BPF_MODE(code) == BPF_IND) 1492 emit_a32_alu_r(r1, src_lo, false, sstk, ctx, 1493 false, false, BPF_ADD); 1494 1495 /* Setting up third argument */ 1496 switch (BPF_SIZE(code)) { 1497 case BPF_W: 1498 size = 4; 1499 break; 1500 case BPF_H: 1501 size = 2; 1502 break; 1503 case BPF_B: 1504 size = 1; 1505 break; 1506 default: 1507 return -EINVAL; 1508 } 1509 emit_a32_mov_i(r2, size, false, ctx); 1510 1511 /* Setting up fourth argument */ 1512 emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx); 1513 1514 /* Setting up function pointer to call */ 1515 emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx); 1516 emit_blx_r(r6, ctx); 1517 1518 emit(ARM_EOR_R(r1, r1, r1), ctx); 1519 /* Check if return address is NULL or not. 1520 * if NULL then jump to epilogue 1521 * else continue to load the value from retn address 1522 */ 1523 emit(ARM_CMP_I(r0, 0), ctx); 1524 jmp_offset = epilogue_offset(ctx); 1525 check_imm24(jmp_offset); 1526 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 1527 1528 /* Load value from the address */ 1529 switch (BPF_SIZE(code)) { 1530 case BPF_W: 1531 emit(ARM_LDR_I(r0, r0, 0), ctx); 1532 emit_rev32(r0, r0, ctx); 1533 break; 1534 case BPF_H: 1535 emit(ARM_LDRH_I(r0, r0, 0), ctx); 1536 emit_rev16(r0, r0, ctx); 1537 break; 1538 case BPF_B: 1539 emit(ARM_LDRB_I(r0, r0, 0), ctx); 1540 /* No need to reverse */ 1541 break; 1542 } 1543 break; 1544 } 1545 /* ST: *(size *)(dst + off) = imm */ 1546 case BPF_ST | BPF_MEM | BPF_W: 1547 case BPF_ST | BPF_MEM | BPF_H: 1548 case BPF_ST | BPF_MEM | BPF_B: 1549 case BPF_ST | BPF_MEM | BPF_DW: 1550 switch (BPF_SIZE(code)) { 1551 case BPF_DW: 1552 /* Sign-extend immediate value into temp reg */ 1553 emit_a32_mov_i64(true, tmp2, imm, false, ctx); 1554 emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W); 1555 emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W); 1556 break; 1557 case BPF_W: 1558 case BPF_H: 1559 case BPF_B: 1560 emit_a32_mov_i(tmp2[1], imm, false, ctx); 1561 emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, 1562 BPF_SIZE(code)); 1563 break; 1564 } 1565 break; 1566 /* STX XADD: lock *(u32 *)(dst + off) += src */ 1567 case BPF_STX | BPF_XADD | BPF_W: 1568 /* STX XADD: lock *(u64 *)(dst + off) += src */ 1569 case BPF_STX | BPF_XADD | BPF_DW: 1570 goto notyet; 1571 /* STX: *(size *)(dst + off) = src */ 1572 case BPF_STX | BPF_MEM | BPF_W: 1573 case BPF_STX | BPF_MEM | BPF_H: 1574 case BPF_STX | BPF_MEM | BPF_B: 1575 case BPF_STX | BPF_MEM | BPF_DW: 1576 { 1577 u8 sz = BPF_SIZE(code); 1578 1579 rn = sstk ? tmp2[1] : src_lo; 1580 rm = sstk ? tmp2[0] : src_hi; 1581 if (sstk) { 1582 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1583 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); 1584 } 1585 1586 /* Store the value */ 1587 if (BPF_SIZE(code) == BPF_DW) { 1588 emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W); 1589 emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W); 1590 } else { 1591 emit_str_r(dst_lo, rn, dstk, off, ctx, sz); 1592 } 1593 break; 1594 } 1595 /* PC += off if dst == src */ 1596 /* PC += off if dst > src */ 1597 /* PC += off if dst >= src */ 1598 /* PC += off if dst < src */ 1599 /* PC += off if dst <= src */ 1600 /* PC += off if dst != src */ 1601 /* PC += off if dst > src (signed) */ 1602 /* PC += off if dst >= src (signed) */ 1603 /* PC += off if dst < src (signed) */ 1604 /* PC += off if dst <= src (signed) */ 1605 /* PC += off if dst & src */ 1606 case BPF_JMP | BPF_JEQ | BPF_X: 1607 case BPF_JMP | BPF_JGT | BPF_X: 1608 case BPF_JMP | BPF_JGE | BPF_X: 1609 case BPF_JMP | BPF_JNE | BPF_X: 1610 case BPF_JMP | BPF_JSGT | BPF_X: 1611 case BPF_JMP | BPF_JSGE | BPF_X: 1612 case BPF_JMP | BPF_JSET | BPF_X: 1613 case BPF_JMP | BPF_JLE | BPF_X: 1614 case BPF_JMP | BPF_JLT | BPF_X: 1615 case BPF_JMP | BPF_JSLT | BPF_X: 1616 case BPF_JMP | BPF_JSLE | BPF_X: 1617 /* Setup source registers */ 1618 rm = sstk ? tmp2[0] : src_hi; 1619 rn = sstk ? tmp2[1] : src_lo; 1620 if (sstk) { 1621 emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); 1622 emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); 1623 } 1624 goto go_jmp; 1625 /* PC += off if dst == imm */ 1626 /* PC += off if dst > imm */ 1627 /* PC += off if dst >= imm */ 1628 /* PC += off if dst < imm */ 1629 /* PC += off if dst <= imm */ 1630 /* PC += off if dst != imm */ 1631 /* PC += off if dst > imm (signed) */ 1632 /* PC += off if dst >= imm (signed) */ 1633 /* PC += off if dst < imm (signed) */ 1634 /* PC += off if dst <= imm (signed) */ 1635 /* PC += off if dst & imm */ 1636 case BPF_JMP | BPF_JEQ | BPF_K: 1637 case BPF_JMP | BPF_JGT | BPF_K: 1638 case BPF_JMP | BPF_JGE | BPF_K: 1639 case BPF_JMP | BPF_JNE | BPF_K: 1640 case BPF_JMP | BPF_JSGT | BPF_K: 1641 case BPF_JMP | BPF_JSGE | BPF_K: 1642 case BPF_JMP | BPF_JSET | BPF_K: 1643 case BPF_JMP | BPF_JLT | BPF_K: 1644 case BPF_JMP | BPF_JLE | BPF_K: 1645 case BPF_JMP | BPF_JSLT | BPF_K: 1646 case BPF_JMP | BPF_JSLE | BPF_K: 1647 if (off == 0) 1648 break; 1649 rm = tmp2[0]; 1650 rn = tmp2[1]; 1651 /* Sign-extend immediate value */ 1652 emit_a32_mov_i64(true, tmp2, imm, false, ctx); 1653 go_jmp: 1654 /* Setup destination register */ 1655 rd = dstk ? tmp[0] : dst_hi; 1656 rt = dstk ? tmp[1] : dst_lo; 1657 if (dstk) { 1658 emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); 1659 emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); 1660 } 1661 1662 /* Check for the condition */ 1663 emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); 1664 1665 /* Setup JUMP instruction */ 1666 jmp_offset = bpf2a32_offset(i+off, i, ctx); 1667 switch (BPF_OP(code)) { 1668 case BPF_JNE: 1669 case BPF_JSET: 1670 _emit(ARM_COND_NE, ARM_B(jmp_offset), ctx); 1671 break; 1672 case BPF_JEQ: 1673 _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); 1674 break; 1675 case BPF_JGT: 1676 _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); 1677 break; 1678 case BPF_JGE: 1679 _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); 1680 break; 1681 case BPF_JSGT: 1682 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); 1683 break; 1684 case BPF_JSGE: 1685 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); 1686 break; 1687 case BPF_JLE: 1688 _emit(ARM_COND_LS, ARM_B(jmp_offset), ctx); 1689 break; 1690 case BPF_JLT: 1691 _emit(ARM_COND_CC, ARM_B(jmp_offset), ctx); 1692 break; 1693 case BPF_JSLT: 1694 _emit(ARM_COND_LT, ARM_B(jmp_offset), ctx); 1695 break; 1696 case BPF_JSLE: 1697 _emit(ARM_COND_GE, ARM_B(jmp_offset), ctx); 1698 break; 1699 } 1700 break; 1701 /* JMP OFF */ 1702 case BPF_JMP | BPF_JA: 1703 { 1704 if (off == 0) 1705 break; 1706 jmp_offset = bpf2a32_offset(i+off, i, ctx); 1707 check_imm24(jmp_offset); 1708 emit(ARM_B(jmp_offset), ctx); 1709 break; 1710 } 1711 /* tail call */ 1712 case BPF_JMP | BPF_TAIL_CALL: 1713 if (emit_bpf_tail_call(ctx)) 1714 return -EFAULT; 1715 break; 1716 /* function call */ 1717 case BPF_JMP | BPF_CALL: 1718 { 1719 const u8 *r0 = bpf2a32[BPF_REG_0]; 1720 const u8 *r1 = bpf2a32[BPF_REG_1]; 1721 const u8 *r2 = bpf2a32[BPF_REG_2]; 1722 const u8 *r3 = bpf2a32[BPF_REG_3]; 1723 const u8 *r4 = bpf2a32[BPF_REG_4]; 1724 const u8 *r5 = bpf2a32[BPF_REG_5]; 1725 const u32 func = (u32)__bpf_call_base + (u32)imm; 1726 1727 emit_a32_mov_r64(true, r0, r1, false, false, ctx); 1728 emit_a32_mov_r64(true, r1, r2, false, true, ctx); 1729 emit_push_r64(r5, 0, ctx); 1730 emit_push_r64(r4, 8, ctx); 1731 emit_push_r64(r3, 16, ctx); 1732 1733 emit_a32_mov_i(tmp[1], func, false, ctx); 1734 emit_blx_r(tmp[1], ctx); 1735 1736 emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean 1737 break; 1738 } 1739 /* function return */ 1740 case BPF_JMP | BPF_EXIT: 1741 /* Optimization: when last instruction is EXIT 1742 * simply fallthrough to epilogue. 1743 */ 1744 if (i == ctx->prog->len - 1) 1745 break; 1746 jmp_offset = epilogue_offset(ctx); 1747 check_imm24(jmp_offset); 1748 emit(ARM_B(jmp_offset), ctx); 1749 break; 1750 notyet: 1751 pr_info_once("*** NOT YET: opcode %02x ***\n", code); 1752 return -EFAULT; 1753 default: 1754 pr_err_once("unknown opcode %02x\n", code); 1755 return -EINVAL; 1756 } 1757 1758 if (ctx->flags & FLAG_IMM_OVERFLOW) 1759 /* 1760 * this instruction generated an overflow when 1761 * trying to access the literal pool, so 1762 * delegate this filter to the kernel interpreter. 1763 */ 1764 return -1; 1765 return 0; 1766 } 1767 1768 static int build_body(struct jit_ctx *ctx) 1769 { 1770 const struct bpf_prog *prog = ctx->prog; 1771 unsigned int i; 1772 1773 for (i = 0; i < prog->len; i++) { 1774 const struct bpf_insn *insn = &(prog->insnsi[i]); 1775 int ret; 1776 1777 ret = build_insn(insn, ctx); 1778 1779 /* It's used with loading the 64 bit immediate value. */ 1780 if (ret > 0) { 1781 i++; 1782 if (ctx->target == NULL) 1783 ctx->offsets[i] = ctx->idx; 1784 continue; 1785 } 1786 1787 if (ctx->target == NULL) 1788 ctx->offsets[i] = ctx->idx; 1789 1790 /* If unsuccesfull, return with error code */ 1791 if (ret) 1792 return ret; 1793 } 1794 return 0; 1795 } 1796 1797 static int validate_code(struct jit_ctx *ctx) 1798 { 1799 int i; 1800 1801 for (i = 0; i < ctx->idx; i++) { 1802 if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF)) 1803 return -1; 1804 } 1805 1806 return 0; 1807 } 1808 1809 void bpf_jit_compile(struct bpf_prog *prog) 1810 { 1811 /* Nothing to do here. We support Internal BPF. */ 1812 } 1813 1814 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1815 { 1816 struct bpf_prog *tmp, *orig_prog = prog; 1817 struct bpf_binary_header *header; 1818 bool tmp_blinded = false; 1819 struct jit_ctx ctx; 1820 unsigned int tmp_idx; 1821 unsigned int image_size; 1822 u8 *image_ptr; 1823 1824 /* If BPF JIT was not enabled then we must fall back to 1825 * the interpreter. 1826 */ 1827 if (!bpf_jit_enable) 1828 return orig_prog; 1829 1830 /* If constant blinding was enabled and we failed during blinding 1831 * then we must fall back to the interpreter. Otherwise, we save 1832 * the new JITed code. 1833 */ 1834 tmp = bpf_jit_blind_constants(prog); 1835 1836 if (IS_ERR(tmp)) 1837 return orig_prog; 1838 if (tmp != prog) { 1839 tmp_blinded = true; 1840 prog = tmp; 1841 } 1842 1843 memset(&ctx, 0, sizeof(ctx)); 1844 ctx.prog = prog; 1845 1846 /* Not able to allocate memory for offsets[] , then 1847 * we must fall back to the interpreter 1848 */ 1849 ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); 1850 if (ctx.offsets == NULL) { 1851 prog = orig_prog; 1852 goto out; 1853 } 1854 1855 /* 1) fake pass to find in the length of the JITed code, 1856 * to compute ctx->offsets and other context variables 1857 * needed to compute final JITed code. 1858 * Also, calculate random starting pointer/start of JITed code 1859 * which is prefixed by random number of fault instructions. 1860 * 1861 * If the first pass fails then there is no chance of it 1862 * being successful in the second pass, so just fall back 1863 * to the interpreter. 1864 */ 1865 if (build_body(&ctx)) { 1866 prog = orig_prog; 1867 goto out_off; 1868 } 1869 1870 tmp_idx = ctx.idx; 1871 build_prologue(&ctx); 1872 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1873 1874 ctx.epilogue_offset = ctx.idx; 1875 1876 #if __LINUX_ARM_ARCH__ < 7 1877 tmp_idx = ctx.idx; 1878 build_epilogue(&ctx); 1879 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 1880 1881 ctx.idx += ctx.imm_count; 1882 if (ctx.imm_count) { 1883 ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); 1884 if (ctx.imms == NULL) { 1885 prog = orig_prog; 1886 goto out_off; 1887 } 1888 } 1889 #else 1890 /* there's nothing about the epilogue on ARMv7 */ 1891 build_epilogue(&ctx); 1892 #endif 1893 /* Now we can get the actual image size of the JITed arm code. 1894 * Currently, we are not considering the THUMB-2 instructions 1895 * for jit, although it can decrease the size of the image. 1896 * 1897 * As each arm instruction is of length 32bit, we are translating 1898 * number of JITed intructions into the size required to store these 1899 * JITed code. 1900 */ 1901 image_size = sizeof(u32) * ctx.idx; 1902 1903 /* Now we know the size of the structure to make */ 1904 header = bpf_jit_binary_alloc(image_size, &image_ptr, 1905 sizeof(u32), jit_fill_hole); 1906 /* Not able to allocate memory for the structure then 1907 * we must fall back to the interpretation 1908 */ 1909 if (header == NULL) { 1910 prog = orig_prog; 1911 goto out_imms; 1912 } 1913 1914 /* 2.) Actual pass to generate final JIT code */ 1915 ctx.target = (u32 *) image_ptr; 1916 ctx.idx = 0; 1917 1918 build_prologue(&ctx); 1919 1920 /* If building the body of the JITed code fails somehow, 1921 * we fall back to the interpretation. 1922 */ 1923 if (build_body(&ctx) < 0) { 1924 image_ptr = NULL; 1925 bpf_jit_binary_free(header); 1926 prog = orig_prog; 1927 goto out_imms; 1928 } 1929 build_epilogue(&ctx); 1930 1931 /* 3.) Extra pass to validate JITed Code */ 1932 if (validate_code(&ctx)) { 1933 image_ptr = NULL; 1934 bpf_jit_binary_free(header); 1935 prog = orig_prog; 1936 goto out_imms; 1937 } 1938 flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); 1939 1940 if (bpf_jit_enable > 1) 1941 /* there are 2 passes here */ 1942 bpf_jit_dump(prog->len, image_size, 2, ctx.target); 1943 1944 set_memory_ro((unsigned long)header, header->pages); 1945 prog->bpf_func = (void *)ctx.target; 1946 prog->jited = 1; 1947 prog->jited_len = image_size; 1948 1949 out_imms: 1950 #if __LINUX_ARM_ARCH__ < 7 1951 if (ctx.imm_count) 1952 kfree(ctx.imms); 1953 #endif 1954 out_off: 1955 kfree(ctx.offsets); 1956 out: 1957 if (tmp_blinded) 1958 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1959 tmp : orig_prog); 1960 return prog; 1961 } 1962 1963