1 /* 2 * Just-In-Time compiler for BPF filters on 32bit ARM 3 * 4 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the 8 * Free Software Foundation; version 2 of the License. 9 */ 10 11 #include <linux/bitops.h> 12 #include <linux/compiler.h> 13 #include <linux/errno.h> 14 #include <linux/filter.h> 15 #include <linux/netdevice.h> 16 #include <linux/string.h> 17 #include <linux/slab.h> 18 #include <linux/if_vlan.h> 19 20 #include <asm/cacheflush.h> 21 #include <asm/set_memory.h> 22 #include <asm/hwcap.h> 23 #include <asm/opcodes.h> 24 25 #include "bpf_jit_32.h" 26 27 /* 28 * ABI: 29 * 30 * r0 scratch register 31 * r4 BPF register A 32 * r5 BPF register X 33 * r6 pointer to the skb 34 * r7 skb->data 35 * r8 skb_headlen(skb) 36 */ 37 38 #define r_scratch ARM_R0 39 /* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ 40 #define r_off ARM_R1 41 #define r_A ARM_R4 42 #define r_X ARM_R5 43 #define r_skb ARM_R6 44 #define r_skb_data ARM_R7 45 #define r_skb_hl ARM_R8 46 47 #define SCRATCH_SP_OFFSET 0 48 #define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k)) 49 50 #define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) 51 #define SEEN_MEM_WORD(k) (1 << (k)) 52 #define SEEN_X (1 << BPF_MEMWORDS) 53 #define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) 54 #define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) 55 #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) 56 57 #define FLAG_NEED_X_RESET (1 << 0) 58 #define FLAG_IMM_OVERFLOW (1 << 1) 59 60 struct jit_ctx { 61 const struct bpf_prog *skf; 62 unsigned idx; 63 unsigned prologue_bytes; 64 int ret0_fp_idx; 65 u32 seen; 66 u32 flags; 67 u32 *offsets; 68 u32 *target; 69 #if __LINUX_ARM_ARCH__ < 7 70 u16 epilogue_bytes; 71 u16 imm_count; 72 u32 *imms; 73 #endif 74 }; 75 76 int bpf_jit_enable __read_mostly; 77 78 static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret, 79 unsigned int size) 80 { 81 void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size); 82 83 if (!ptr) 84 return -EFAULT; 85 memcpy(ret, ptr, size); 86 return 0; 87 } 88 89 static u64 jit_get_skb_b(struct sk_buff *skb, int offset) 90 { 91 u8 ret; 92 int err; 93 94 if (offset < 0) 95 err = call_neg_helper(skb, offset, &ret, 1); 96 else 97 err = skb_copy_bits(skb, offset, &ret, 1); 98 99 return (u64)err << 32 | ret; 100 } 101 102 static u64 jit_get_skb_h(struct sk_buff *skb, int offset) 103 { 104 u16 ret; 105 int err; 106 107 if (offset < 0) 108 err = call_neg_helper(skb, offset, &ret, 2); 109 else 110 err = skb_copy_bits(skb, offset, &ret, 2); 111 112 return (u64)err << 32 | ntohs(ret); 113 } 114 115 static u64 jit_get_skb_w(struct sk_buff *skb, int offset) 116 { 117 u32 ret; 118 int err; 119 120 if (offset < 0) 121 err = call_neg_helper(skb, offset, &ret, 4); 122 else 123 err = skb_copy_bits(skb, offset, &ret, 4); 124 125 return (u64)err << 32 | ntohl(ret); 126 } 127 128 /* 129 * Wrappers which handle both OABI and EABI and assures Thumb2 interworking 130 * (where the assembly routines like __aeabi_uidiv could cause problems). 131 */ 132 static u32 jit_udiv(u32 dividend, u32 divisor) 133 { 134 return dividend / divisor; 135 } 136 137 static u32 jit_mod(u32 dividend, u32 divisor) 138 { 139 return dividend % divisor; 140 } 141 142 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 143 { 144 inst |= (cond << 28); 145 inst = __opcode_to_mem_arm(inst); 146 147 if (ctx->target != NULL) 148 ctx->target[ctx->idx] = inst; 149 150 ctx->idx++; 151 } 152 153 /* 154 * Emit an instruction that will be executed unconditionally. 155 */ 156 static inline void emit(u32 inst, struct jit_ctx *ctx) 157 { 158 _emit(ARM_COND_AL, inst, ctx); 159 } 160 161 static u16 saved_regs(struct jit_ctx *ctx) 162 { 163 u16 ret = 0; 164 165 if ((ctx->skf->len > 1) || 166 (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) 167 ret |= 1 << r_A; 168 169 #ifdef CONFIG_FRAME_POINTER 170 ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); 171 #else 172 if (ctx->seen & SEEN_CALL) 173 ret |= 1 << ARM_LR; 174 #endif 175 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 176 ret |= 1 << r_skb; 177 if (ctx->seen & SEEN_DATA) 178 ret |= (1 << r_skb_data) | (1 << r_skb_hl); 179 if (ctx->seen & SEEN_X) 180 ret |= 1 << r_X; 181 182 return ret; 183 } 184 185 static inline int mem_words_used(struct jit_ctx *ctx) 186 { 187 /* yes, we do waste some stack space IF there are "holes" in the set" */ 188 return fls(ctx->seen & SEEN_MEM); 189 } 190 191 static void jit_fill_hole(void *area, unsigned int size) 192 { 193 u32 *ptr; 194 /* We are guaranteed to have aligned memory. */ 195 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 196 *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); 197 } 198 199 static void build_prologue(struct jit_ctx *ctx) 200 { 201 u16 reg_set = saved_regs(ctx); 202 u16 off; 203 204 #ifdef CONFIG_FRAME_POINTER 205 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); 206 emit(ARM_PUSH(reg_set), ctx); 207 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 208 #else 209 if (reg_set) 210 emit(ARM_PUSH(reg_set), ctx); 211 #endif 212 213 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 214 emit(ARM_MOV_R(r_skb, ARM_R0), ctx); 215 216 if (ctx->seen & SEEN_DATA) { 217 off = offsetof(struct sk_buff, data); 218 emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); 219 /* headlen = len - data_len */ 220 off = offsetof(struct sk_buff, len); 221 emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); 222 off = offsetof(struct sk_buff, data_len); 223 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 224 emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); 225 } 226 227 if (ctx->flags & FLAG_NEED_X_RESET) 228 emit(ARM_MOV_I(r_X, 0), ctx); 229 230 /* do not leak kernel data to userspace */ 231 if (bpf_needs_clear_a(&ctx->skf->insns[0])) 232 emit(ARM_MOV_I(r_A, 0), ctx); 233 234 /* stack space for the BPF_MEM words */ 235 if (ctx->seen & SEEN_MEM) 236 emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 237 } 238 239 static void build_epilogue(struct jit_ctx *ctx) 240 { 241 u16 reg_set = saved_regs(ctx); 242 243 if (ctx->seen & SEEN_MEM) 244 emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 245 246 reg_set &= ~(1 << ARM_LR); 247 248 #ifdef CONFIG_FRAME_POINTER 249 /* the first instruction of the prologue was: mov ip, sp */ 250 reg_set &= ~(1 << ARM_IP); 251 reg_set |= (1 << ARM_SP); 252 emit(ARM_LDM(ARM_SP, reg_set), ctx); 253 #else 254 if (reg_set) { 255 if (ctx->seen & SEEN_CALL) 256 reg_set |= 1 << ARM_PC; 257 emit(ARM_POP(reg_set), ctx); 258 } 259 260 if (!(ctx->seen & SEEN_CALL)) 261 emit(ARM_BX(ARM_LR), ctx); 262 #endif 263 } 264 265 static int16_t imm8m(u32 x) 266 { 267 u32 rot; 268 269 for (rot = 0; rot < 16; rot++) 270 if ((x & ~ror32(0xff, 2 * rot)) == 0) 271 return rol32(x, 2 * rot) | (rot << 8); 272 273 return -1; 274 } 275 276 #if __LINUX_ARM_ARCH__ < 7 277 278 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 279 { 280 unsigned i = 0, offset; 281 u16 imm; 282 283 /* on the "fake" run we just count them (duplicates included) */ 284 if (ctx->target == NULL) { 285 ctx->imm_count++; 286 return 0; 287 } 288 289 while ((i < ctx->imm_count) && ctx->imms[i]) { 290 if (ctx->imms[i] == k) 291 break; 292 i++; 293 } 294 295 if (ctx->imms[i] == 0) 296 ctx->imms[i] = k; 297 298 /* constants go just after the epilogue */ 299 offset = ctx->offsets[ctx->skf->len]; 300 offset += ctx->prologue_bytes; 301 offset += ctx->epilogue_bytes; 302 offset += i * 4; 303 304 ctx->target[offset / 4] = k; 305 306 /* PC in ARM mode == address of the instruction + 8 */ 307 imm = offset - (8 + ctx->idx * 4); 308 309 if (imm & ~0xfff) { 310 /* 311 * literal pool is too far, signal it into flags. we 312 * can only detect it on the second pass unfortunately. 313 */ 314 ctx->flags |= FLAG_IMM_OVERFLOW; 315 return 0; 316 } 317 318 return imm; 319 } 320 321 #endif /* __LINUX_ARM_ARCH__ */ 322 323 /* 324 * Move an immediate that's not an imm8m to a core register. 325 */ 326 static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) 327 { 328 #if __LINUX_ARM_ARCH__ < 7 329 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 330 #else 331 emit(ARM_MOVW(rd, val & 0xffff), ctx); 332 if (val > 0xffff) 333 emit(ARM_MOVT(rd, val >> 16), ctx); 334 #endif 335 } 336 337 static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) 338 { 339 int imm12 = imm8m(val); 340 341 if (imm12 >= 0) 342 emit(ARM_MOV_I(rd, imm12), ctx); 343 else 344 emit_mov_i_no8m(rd, val, ctx); 345 } 346 347 #if __LINUX_ARM_ARCH__ < 6 348 349 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 350 { 351 _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); 352 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 353 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); 354 _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); 355 _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); 356 _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); 357 _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); 358 _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); 359 } 360 361 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 362 { 363 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 364 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); 365 _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); 366 } 367 368 static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) 369 { 370 /* r_dst = (r_src << 8) | (r_src >> 8) */ 371 emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx); 372 emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx); 373 374 /* 375 * we need to mask out the bits set in r_dst[23:16] due to 376 * the first shift instruction. 377 * 378 * note that 0x8ff is the encoded immediate 0x00ff0000. 379 */ 380 emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx); 381 } 382 383 #else /* ARMv6+ */ 384 385 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 386 { 387 _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); 388 #ifdef __LITTLE_ENDIAN 389 _emit(cond, ARM_REV(r_res, r_res), ctx); 390 #endif 391 } 392 393 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 394 { 395 _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); 396 #ifdef __LITTLE_ENDIAN 397 _emit(cond, ARM_REV16(r_res, r_res), ctx); 398 #endif 399 } 400 401 static inline void emit_swap16(u8 r_dst __maybe_unused, 402 u8 r_src __maybe_unused, 403 struct jit_ctx *ctx __maybe_unused) 404 { 405 #ifdef __LITTLE_ENDIAN 406 emit(ARM_REV16(r_dst, r_src), ctx); 407 #endif 408 } 409 410 #endif /* __LINUX_ARM_ARCH__ < 6 */ 411 412 413 /* Compute the immediate value for a PC-relative branch. */ 414 static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) 415 { 416 u32 imm; 417 418 if (ctx->target == NULL) 419 return 0; 420 /* 421 * BPF allows only forward jumps and the offset of the target is 422 * still the one computed during the first pass. 423 */ 424 imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); 425 426 return imm >> 2; 427 } 428 429 #define OP_IMM3(op, r1, r2, imm_val, ctx) \ 430 do { \ 431 imm12 = imm8m(imm_val); \ 432 if (imm12 < 0) { \ 433 emit_mov_i_no8m(r_scratch, imm_val, ctx); \ 434 emit(op ## _R((r1), (r2), r_scratch), ctx); \ 435 } else { \ 436 emit(op ## _I((r1), (r2), imm12), ctx); \ 437 } \ 438 } while (0) 439 440 static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) 441 { 442 if (ctx->ret0_fp_idx >= 0) { 443 _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); 444 /* NOP to keep the size constant between passes */ 445 emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); 446 } else { 447 _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); 448 _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx); 449 } 450 } 451 452 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 453 { 454 #if __LINUX_ARM_ARCH__ < 5 455 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 456 457 if (elf_hwcap & HWCAP_THUMB) 458 emit(ARM_BX(tgt_reg), ctx); 459 else 460 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 461 #else 462 emit(ARM_BLX_R(tgt_reg), ctx); 463 #endif 464 } 465 466 static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, 467 int bpf_op) 468 { 469 #if __LINUX_ARM_ARCH__ == 7 470 if (elf_hwcap & HWCAP_IDIVA) { 471 if (bpf_op == BPF_DIV) 472 emit(ARM_UDIV(rd, rm, rn), ctx); 473 else { 474 emit(ARM_UDIV(ARM_R3, rm, rn), ctx); 475 emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx); 476 } 477 return; 478 } 479 #endif 480 481 /* 482 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4 483 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into 484 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm 485 * before using it as a source for ARM_R1. 486 * 487 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is 488 * ARM_R5 (r_X) so there is no particular register overlap 489 * issues. 490 */ 491 if (rn != ARM_R1) 492 emit(ARM_MOV_R(ARM_R1, rn), ctx); 493 if (rm != ARM_R0) 494 emit(ARM_MOV_R(ARM_R0, rm), ctx); 495 496 ctx->seen |= SEEN_CALL; 497 emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod, 498 ctx); 499 emit_blx_r(ARM_R3, ctx); 500 501 if (rd != ARM_R0) 502 emit(ARM_MOV_R(rd, ARM_R0), ctx); 503 } 504 505 static inline void update_on_xread(struct jit_ctx *ctx) 506 { 507 if (!(ctx->seen & SEEN_X)) 508 ctx->flags |= FLAG_NEED_X_RESET; 509 510 ctx->seen |= SEEN_X; 511 } 512 513 static int build_body(struct jit_ctx *ctx) 514 { 515 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 516 const struct bpf_prog *prog = ctx->skf; 517 const struct sock_filter *inst; 518 unsigned i, load_order, off, condt; 519 int imm12; 520 u32 k; 521 522 for (i = 0; i < prog->len; i++) { 523 u16 code; 524 525 inst = &(prog->insns[i]); 526 /* K as an immediate value operand */ 527 k = inst->k; 528 code = bpf_anc_helper(inst); 529 530 /* compute offsets only in the fake pass */ 531 if (ctx->target == NULL) 532 ctx->offsets[i] = ctx->idx * 4; 533 534 switch (code) { 535 case BPF_LD | BPF_IMM: 536 emit_mov_i(r_A, k, ctx); 537 break; 538 case BPF_LD | BPF_W | BPF_LEN: 539 ctx->seen |= SEEN_SKB; 540 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 541 emit(ARM_LDR_I(r_A, r_skb, 542 offsetof(struct sk_buff, len)), ctx); 543 break; 544 case BPF_LD | BPF_MEM: 545 /* A = scratch[k] */ 546 ctx->seen |= SEEN_MEM_WORD(k); 547 emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 548 break; 549 case BPF_LD | BPF_W | BPF_ABS: 550 load_order = 2; 551 goto load; 552 case BPF_LD | BPF_H | BPF_ABS: 553 load_order = 1; 554 goto load; 555 case BPF_LD | BPF_B | BPF_ABS: 556 load_order = 0; 557 load: 558 emit_mov_i(r_off, k, ctx); 559 load_common: 560 ctx->seen |= SEEN_DATA | SEEN_CALL; 561 562 if (load_order > 0) { 563 emit(ARM_SUB_I(r_scratch, r_skb_hl, 564 1 << load_order), ctx); 565 emit(ARM_CMP_R(r_scratch, r_off), ctx); 566 condt = ARM_COND_GE; 567 } else { 568 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 569 condt = ARM_COND_HI; 570 } 571 572 /* 573 * test for negative offset, only if we are 574 * currently scheduled to take the fast 575 * path. this will update the flags so that 576 * the slowpath instruction are ignored if the 577 * offset is negative. 578 * 579 * for loard_order == 0 the HI condition will 580 * make loads at offset 0 take the slow path too. 581 */ 582 _emit(condt, ARM_CMP_I(r_off, 0), ctx); 583 584 _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), 585 ctx); 586 587 if (load_order == 0) 588 _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0), 589 ctx); 590 else if (load_order == 1) 591 emit_load_be16(condt, r_A, r_scratch, ctx); 592 else if (load_order == 2) 593 emit_load_be32(condt, r_A, r_scratch, ctx); 594 595 _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx); 596 597 /* the slowpath */ 598 emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx); 599 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 600 /* the offset is already in R1 */ 601 emit_blx_r(ARM_R3, ctx); 602 /* check the result of skb_copy_bits */ 603 emit(ARM_CMP_I(ARM_R1, 0), ctx); 604 emit_err_ret(ARM_COND_NE, ctx); 605 emit(ARM_MOV_R(r_A, ARM_R0), ctx); 606 break; 607 case BPF_LD | BPF_W | BPF_IND: 608 load_order = 2; 609 goto load_ind; 610 case BPF_LD | BPF_H | BPF_IND: 611 load_order = 1; 612 goto load_ind; 613 case BPF_LD | BPF_B | BPF_IND: 614 load_order = 0; 615 load_ind: 616 update_on_xread(ctx); 617 OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); 618 goto load_common; 619 case BPF_LDX | BPF_IMM: 620 ctx->seen |= SEEN_X; 621 emit_mov_i(r_X, k, ctx); 622 break; 623 case BPF_LDX | BPF_W | BPF_LEN: 624 ctx->seen |= SEEN_X | SEEN_SKB; 625 emit(ARM_LDR_I(r_X, r_skb, 626 offsetof(struct sk_buff, len)), ctx); 627 break; 628 case BPF_LDX | BPF_MEM: 629 ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); 630 emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 631 break; 632 case BPF_LDX | BPF_B | BPF_MSH: 633 /* x = ((*(frame + k)) & 0xf) << 2; */ 634 ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; 635 /* the interpreter should deal with the negative K */ 636 if ((int)k < 0) 637 return -1; 638 /* offset in r1: we might have to take the slow path */ 639 emit_mov_i(r_off, k, ctx); 640 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 641 642 /* load in r0: common with the slowpath */ 643 _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data, 644 ARM_R1), ctx); 645 /* 646 * emit_mov_i() might generate one or two instructions, 647 * the same holds for emit_blx_r() 648 */ 649 _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx); 650 651 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 652 /* r_off is r1 */ 653 emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx); 654 emit_blx_r(ARM_R3, ctx); 655 /* check the return value of skb_copy_bits */ 656 emit(ARM_CMP_I(ARM_R1, 0), ctx); 657 emit_err_ret(ARM_COND_NE, ctx); 658 659 emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); 660 emit(ARM_LSL_I(r_X, r_X, 2), ctx); 661 break; 662 case BPF_ST: 663 ctx->seen |= SEEN_MEM_WORD(k); 664 emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 665 break; 666 case BPF_STX: 667 update_on_xread(ctx); 668 ctx->seen |= SEEN_MEM_WORD(k); 669 emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 670 break; 671 case BPF_ALU | BPF_ADD | BPF_K: 672 /* A += K */ 673 OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); 674 break; 675 case BPF_ALU | BPF_ADD | BPF_X: 676 update_on_xread(ctx); 677 emit(ARM_ADD_R(r_A, r_A, r_X), ctx); 678 break; 679 case BPF_ALU | BPF_SUB | BPF_K: 680 /* A -= K */ 681 OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); 682 break; 683 case BPF_ALU | BPF_SUB | BPF_X: 684 update_on_xread(ctx); 685 emit(ARM_SUB_R(r_A, r_A, r_X), ctx); 686 break; 687 case BPF_ALU | BPF_MUL | BPF_K: 688 /* A *= K */ 689 emit_mov_i(r_scratch, k, ctx); 690 emit(ARM_MUL(r_A, r_A, r_scratch), ctx); 691 break; 692 case BPF_ALU | BPF_MUL | BPF_X: 693 update_on_xread(ctx); 694 emit(ARM_MUL(r_A, r_A, r_X), ctx); 695 break; 696 case BPF_ALU | BPF_DIV | BPF_K: 697 if (k == 1) 698 break; 699 emit_mov_i(r_scratch, k, ctx); 700 emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV); 701 break; 702 case BPF_ALU | BPF_DIV | BPF_X: 703 update_on_xread(ctx); 704 emit(ARM_CMP_I(r_X, 0), ctx); 705 emit_err_ret(ARM_COND_EQ, ctx); 706 emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV); 707 break; 708 case BPF_ALU | BPF_MOD | BPF_K: 709 if (k == 1) { 710 emit_mov_i(r_A, 0, ctx); 711 break; 712 } 713 emit_mov_i(r_scratch, k, ctx); 714 emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD); 715 break; 716 case BPF_ALU | BPF_MOD | BPF_X: 717 update_on_xread(ctx); 718 emit(ARM_CMP_I(r_X, 0), ctx); 719 emit_err_ret(ARM_COND_EQ, ctx); 720 emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD); 721 break; 722 case BPF_ALU | BPF_OR | BPF_K: 723 /* A |= K */ 724 OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); 725 break; 726 case BPF_ALU | BPF_OR | BPF_X: 727 update_on_xread(ctx); 728 emit(ARM_ORR_R(r_A, r_A, r_X), ctx); 729 break; 730 case BPF_ALU | BPF_XOR | BPF_K: 731 /* A ^= K; */ 732 OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); 733 break; 734 case BPF_ANC | SKF_AD_ALU_XOR_X: 735 case BPF_ALU | BPF_XOR | BPF_X: 736 /* A ^= X */ 737 update_on_xread(ctx); 738 emit(ARM_EOR_R(r_A, r_A, r_X), ctx); 739 break; 740 case BPF_ALU | BPF_AND | BPF_K: 741 /* A &= K */ 742 OP_IMM3(ARM_AND, r_A, r_A, k, ctx); 743 break; 744 case BPF_ALU | BPF_AND | BPF_X: 745 update_on_xread(ctx); 746 emit(ARM_AND_R(r_A, r_A, r_X), ctx); 747 break; 748 case BPF_ALU | BPF_LSH | BPF_K: 749 if (unlikely(k > 31)) 750 return -1; 751 emit(ARM_LSL_I(r_A, r_A, k), ctx); 752 break; 753 case BPF_ALU | BPF_LSH | BPF_X: 754 update_on_xread(ctx); 755 emit(ARM_LSL_R(r_A, r_A, r_X), ctx); 756 break; 757 case BPF_ALU | BPF_RSH | BPF_K: 758 if (unlikely(k > 31)) 759 return -1; 760 if (k) 761 emit(ARM_LSR_I(r_A, r_A, k), ctx); 762 break; 763 case BPF_ALU | BPF_RSH | BPF_X: 764 update_on_xread(ctx); 765 emit(ARM_LSR_R(r_A, r_A, r_X), ctx); 766 break; 767 case BPF_ALU | BPF_NEG: 768 /* A = -A */ 769 emit(ARM_RSB_I(r_A, r_A, 0), ctx); 770 break; 771 case BPF_JMP | BPF_JA: 772 /* pc += K */ 773 emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); 774 break; 775 case BPF_JMP | BPF_JEQ | BPF_K: 776 /* pc += (A == K) ? pc->jt : pc->jf */ 777 condt = ARM_COND_EQ; 778 goto cmp_imm; 779 case BPF_JMP | BPF_JGT | BPF_K: 780 /* pc += (A > K) ? pc->jt : pc->jf */ 781 condt = ARM_COND_HI; 782 goto cmp_imm; 783 case BPF_JMP | BPF_JGE | BPF_K: 784 /* pc += (A >= K) ? pc->jt : pc->jf */ 785 condt = ARM_COND_HS; 786 cmp_imm: 787 imm12 = imm8m(k); 788 if (imm12 < 0) { 789 emit_mov_i_no8m(r_scratch, k, ctx); 790 emit(ARM_CMP_R(r_A, r_scratch), ctx); 791 } else { 792 emit(ARM_CMP_I(r_A, imm12), ctx); 793 } 794 cond_jump: 795 if (inst->jt) 796 _emit(condt, ARM_B(b_imm(i + inst->jt + 1, 797 ctx)), ctx); 798 if (inst->jf) 799 _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, 800 ctx)), ctx); 801 break; 802 case BPF_JMP | BPF_JEQ | BPF_X: 803 /* pc += (A == X) ? pc->jt : pc->jf */ 804 condt = ARM_COND_EQ; 805 goto cmp_x; 806 case BPF_JMP | BPF_JGT | BPF_X: 807 /* pc += (A > X) ? pc->jt : pc->jf */ 808 condt = ARM_COND_HI; 809 goto cmp_x; 810 case BPF_JMP | BPF_JGE | BPF_X: 811 /* pc += (A >= X) ? pc->jt : pc->jf */ 812 condt = ARM_COND_CS; 813 cmp_x: 814 update_on_xread(ctx); 815 emit(ARM_CMP_R(r_A, r_X), ctx); 816 goto cond_jump; 817 case BPF_JMP | BPF_JSET | BPF_K: 818 /* pc += (A & K) ? pc->jt : pc->jf */ 819 condt = ARM_COND_NE; 820 /* not set iff all zeroes iff Z==1 iff EQ */ 821 822 imm12 = imm8m(k); 823 if (imm12 < 0) { 824 emit_mov_i_no8m(r_scratch, k, ctx); 825 emit(ARM_TST_R(r_A, r_scratch), ctx); 826 } else { 827 emit(ARM_TST_I(r_A, imm12), ctx); 828 } 829 goto cond_jump; 830 case BPF_JMP | BPF_JSET | BPF_X: 831 /* pc += (A & X) ? pc->jt : pc->jf */ 832 update_on_xread(ctx); 833 condt = ARM_COND_NE; 834 emit(ARM_TST_R(r_A, r_X), ctx); 835 goto cond_jump; 836 case BPF_RET | BPF_A: 837 emit(ARM_MOV_R(ARM_R0, r_A), ctx); 838 goto b_epilogue; 839 case BPF_RET | BPF_K: 840 if ((k == 0) && (ctx->ret0_fp_idx < 0)) 841 ctx->ret0_fp_idx = i; 842 emit_mov_i(ARM_R0, k, ctx); 843 b_epilogue: 844 if (i != ctx->skf->len - 1) 845 emit(ARM_B(b_imm(prog->len, ctx)), ctx); 846 break; 847 case BPF_MISC | BPF_TAX: 848 /* X = A */ 849 ctx->seen |= SEEN_X; 850 emit(ARM_MOV_R(r_X, r_A), ctx); 851 break; 852 case BPF_MISC | BPF_TXA: 853 /* A = X */ 854 update_on_xread(ctx); 855 emit(ARM_MOV_R(r_A, r_X), ctx); 856 break; 857 case BPF_ANC | SKF_AD_PROTOCOL: 858 /* A = ntohs(skb->protocol) */ 859 ctx->seen |= SEEN_SKB; 860 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 861 protocol) != 2); 862 off = offsetof(struct sk_buff, protocol); 863 emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); 864 emit_swap16(r_A, r_scratch, ctx); 865 break; 866 case BPF_ANC | SKF_AD_CPU: 867 /* r_scratch = current_thread_info() */ 868 OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); 869 /* A = current_thread_info()->cpu */ 870 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); 871 off = offsetof(struct thread_info, cpu); 872 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 873 break; 874 case BPF_ANC | SKF_AD_IFINDEX: 875 case BPF_ANC | SKF_AD_HATYPE: 876 /* A = skb->dev->ifindex */ 877 /* A = skb->dev->type */ 878 ctx->seen |= SEEN_SKB; 879 off = offsetof(struct sk_buff, dev); 880 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 881 882 emit(ARM_CMP_I(r_scratch, 0), ctx); 883 emit_err_ret(ARM_COND_EQ, ctx); 884 885 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 886 ifindex) != 4); 887 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 888 type) != 2); 889 890 if (code == (BPF_ANC | SKF_AD_IFINDEX)) { 891 off = offsetof(struct net_device, ifindex); 892 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 893 } else { 894 /* 895 * offset of field "type" in "struct 896 * net_device" is above what can be 897 * used in the ldrh rd, [rn, #imm] 898 * instruction, so load the offset in 899 * a register and use ldrh rd, [rn, rm] 900 */ 901 off = offsetof(struct net_device, type); 902 emit_mov_i(ARM_R3, off, ctx); 903 emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx); 904 } 905 break; 906 case BPF_ANC | SKF_AD_MARK: 907 ctx->seen |= SEEN_SKB; 908 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 909 off = offsetof(struct sk_buff, mark); 910 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 911 break; 912 case BPF_ANC | SKF_AD_RXHASH: 913 ctx->seen |= SEEN_SKB; 914 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 915 off = offsetof(struct sk_buff, hash); 916 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 917 break; 918 case BPF_ANC | SKF_AD_VLAN_TAG: 919 case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: 920 ctx->seen |= SEEN_SKB; 921 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); 922 off = offsetof(struct sk_buff, vlan_tci); 923 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 924 if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) 925 OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx); 926 else { 927 OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx); 928 OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx); 929 } 930 break; 931 case BPF_ANC | SKF_AD_PKTTYPE: 932 ctx->seen |= SEEN_SKB; 933 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 934 __pkt_type_offset[0]) != 1); 935 off = PKT_TYPE_OFFSET(); 936 emit(ARM_LDRB_I(r_A, r_skb, off), ctx); 937 emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx); 938 #ifdef __BIG_ENDIAN_BITFIELD 939 emit(ARM_LSR_I(r_A, r_A, 5), ctx); 940 #endif 941 break; 942 case BPF_ANC | SKF_AD_QUEUE: 943 ctx->seen |= SEEN_SKB; 944 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 945 queue_mapping) != 2); 946 BUILD_BUG_ON(offsetof(struct sk_buff, 947 queue_mapping) > 0xff); 948 off = offsetof(struct sk_buff, queue_mapping); 949 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 950 break; 951 case BPF_ANC | SKF_AD_PAY_OFFSET: 952 ctx->seen |= SEEN_SKB | SEEN_CALL; 953 954 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 955 emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx); 956 emit_blx_r(ARM_R3, ctx); 957 emit(ARM_MOV_R(r_A, ARM_R0), ctx); 958 break; 959 case BPF_LDX | BPF_W | BPF_ABS: 960 /* 961 * load a 32bit word from struct seccomp_data. 962 * seccomp_check_filter() will already have checked 963 * that k is 32bit aligned and lies within the 964 * struct seccomp_data. 965 */ 966 ctx->seen |= SEEN_SKB; 967 emit(ARM_LDR_I(r_A, r_skb, k), ctx); 968 break; 969 default: 970 return -1; 971 } 972 973 if (ctx->flags & FLAG_IMM_OVERFLOW) 974 /* 975 * this instruction generated an overflow when 976 * trying to access the literal pool, so 977 * delegate this filter to the kernel interpreter. 978 */ 979 return -1; 980 } 981 982 /* compute offsets only during the first pass */ 983 if (ctx->target == NULL) 984 ctx->offsets[i] = ctx->idx * 4; 985 986 return 0; 987 } 988 989 990 void bpf_jit_compile(struct bpf_prog *fp) 991 { 992 struct bpf_binary_header *header; 993 struct jit_ctx ctx; 994 unsigned tmp_idx; 995 unsigned alloc_size; 996 u8 *target_ptr; 997 998 if (!bpf_jit_enable) 999 return; 1000 1001 memset(&ctx, 0, sizeof(ctx)); 1002 ctx.skf = fp; 1003 ctx.ret0_fp_idx = -1; 1004 1005 ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL); 1006 if (ctx.offsets == NULL) 1007 return; 1008 1009 /* fake pass to fill in the ctx->seen */ 1010 if (unlikely(build_body(&ctx))) 1011 goto out; 1012 1013 tmp_idx = ctx.idx; 1014 build_prologue(&ctx); 1015 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1016 1017 #if __LINUX_ARM_ARCH__ < 7 1018 tmp_idx = ctx.idx; 1019 build_epilogue(&ctx); 1020 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 1021 1022 ctx.idx += ctx.imm_count; 1023 if (ctx.imm_count) { 1024 ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL); 1025 if (ctx.imms == NULL) 1026 goto out; 1027 } 1028 #else 1029 /* there's nothing after the epilogue on ARMv7 */ 1030 build_epilogue(&ctx); 1031 #endif 1032 alloc_size = 4 * ctx.idx; 1033 header = bpf_jit_binary_alloc(alloc_size, &target_ptr, 1034 4, jit_fill_hole); 1035 if (header == NULL) 1036 goto out; 1037 1038 ctx.target = (u32 *) target_ptr; 1039 ctx.idx = 0; 1040 1041 build_prologue(&ctx); 1042 if (build_body(&ctx) < 0) { 1043 #if __LINUX_ARM_ARCH__ < 7 1044 if (ctx.imm_count) 1045 kfree(ctx.imms); 1046 #endif 1047 bpf_jit_binary_free(header); 1048 goto out; 1049 } 1050 build_epilogue(&ctx); 1051 1052 flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); 1053 1054 #if __LINUX_ARM_ARCH__ < 7 1055 if (ctx.imm_count) 1056 kfree(ctx.imms); 1057 #endif 1058 1059 if (bpf_jit_enable > 1) 1060 /* there are 2 passes here */ 1061 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 1062 1063 set_memory_ro((unsigned long)header, header->pages); 1064 fp->bpf_func = (void *)ctx.target; 1065 fp->jited = 1; 1066 out: 1067 kfree(ctx.offsets); 1068 return; 1069 } 1070 1071 void bpf_jit_free(struct bpf_prog *fp) 1072 { 1073 unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; 1074 struct bpf_binary_header *header = (void *)addr; 1075 1076 if (!fp->jited) 1077 goto free_filter; 1078 1079 set_memory_rw(addr, header->pages); 1080 bpf_jit_binary_free(header); 1081 1082 free_filter: 1083 bpf_prog_unlock_free(fp); 1084 } 1085