1 /* 2 * Just-In-Time compiler for BPF filters on 32bit ARM 3 * 4 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the 8 * Free Software Foundation; version 2 of the License. 9 */ 10 11 #include <linux/bitops.h> 12 #include <linux/compiler.h> 13 #include <linux/errno.h> 14 #include <linux/filter.h> 15 #include <linux/moduleloader.h> 16 #include <linux/netdevice.h> 17 #include <linux/string.h> 18 #include <linux/slab.h> 19 #include <linux/if_vlan.h> 20 #include <asm/cacheflush.h> 21 #include <asm/hwcap.h> 22 #include <asm/opcodes.h> 23 24 #include "bpf_jit_32.h" 25 26 /* 27 * ABI: 28 * 29 * r0 scratch register 30 * r4 BPF register A 31 * r5 BPF register X 32 * r6 pointer to the skb 33 * r7 skb->data 34 * r8 skb_headlen(skb) 35 */ 36 37 #define r_scratch ARM_R0 38 /* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ 39 #define r_off ARM_R1 40 #define r_A ARM_R4 41 #define r_X ARM_R5 42 #define r_skb ARM_R6 43 #define r_skb_data ARM_R7 44 #define r_skb_hl ARM_R8 45 46 #define SCRATCH_SP_OFFSET 0 47 #define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k)) 48 49 #define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) 50 #define SEEN_MEM_WORD(k) (1 << (k)) 51 #define SEEN_X (1 << BPF_MEMWORDS) 52 #define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) 53 #define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) 54 #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) 55 56 #define FLAG_NEED_X_RESET (1 << 0) 57 58 struct jit_ctx { 59 const struct bpf_prog *skf; 60 unsigned idx; 61 unsigned prologue_bytes; 62 int ret0_fp_idx; 63 u32 seen; 64 u32 flags; 65 u32 *offsets; 66 u32 *target; 67 #if __LINUX_ARM_ARCH__ < 7 68 u16 epilogue_bytes; 69 u16 imm_count; 70 u32 *imms; 71 #endif 72 }; 73 74 int bpf_jit_enable __read_mostly; 75 76 static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) 77 { 78 u8 ret; 79 int err; 80 81 err = skb_copy_bits(skb, offset, &ret, 1); 82 83 return (u64)err << 32 | ret; 84 } 85 86 static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) 87 { 88 u16 ret; 89 int err; 90 91 err = skb_copy_bits(skb, offset, &ret, 2); 92 93 return (u64)err << 32 | ntohs(ret); 94 } 95 96 static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) 97 { 98 u32 ret; 99 int err; 100 101 err = skb_copy_bits(skb, offset, &ret, 4); 102 103 return (u64)err << 32 | ntohl(ret); 104 } 105 106 /* 107 * Wrapper that handles both OABI and EABI and assures Thumb2 interworking 108 * (where the assembly routines like __aeabi_uidiv could cause problems). 109 */ 110 static u32 jit_udiv(u32 dividend, u32 divisor) 111 { 112 return dividend / divisor; 113 } 114 115 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 116 { 117 inst |= (cond << 28); 118 inst = __opcode_to_mem_arm(inst); 119 120 if (ctx->target != NULL) 121 ctx->target[ctx->idx] = inst; 122 123 ctx->idx++; 124 } 125 126 /* 127 * Emit an instruction that will be executed unconditionally. 128 */ 129 static inline void emit(u32 inst, struct jit_ctx *ctx) 130 { 131 _emit(ARM_COND_AL, inst, ctx); 132 } 133 134 static u16 saved_regs(struct jit_ctx *ctx) 135 { 136 u16 ret = 0; 137 138 if ((ctx->skf->len > 1) || 139 (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) 140 ret |= 1 << r_A; 141 142 #ifdef CONFIG_FRAME_POINTER 143 ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); 144 #else 145 if (ctx->seen & SEEN_CALL) 146 ret |= 1 << ARM_LR; 147 #endif 148 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 149 ret |= 1 << r_skb; 150 if (ctx->seen & SEEN_DATA) 151 ret |= (1 << r_skb_data) | (1 << r_skb_hl); 152 if (ctx->seen & SEEN_X) 153 ret |= 1 << r_X; 154 155 return ret; 156 } 157 158 static inline int mem_words_used(struct jit_ctx *ctx) 159 { 160 /* yes, we do waste some stack space IF there are "holes" in the set" */ 161 return fls(ctx->seen & SEEN_MEM); 162 } 163 164 static inline bool is_load_to_a(u16 inst) 165 { 166 switch (inst) { 167 case BPF_LD | BPF_W | BPF_LEN: 168 case BPF_LD | BPF_W | BPF_ABS: 169 case BPF_LD | BPF_H | BPF_ABS: 170 case BPF_LD | BPF_B | BPF_ABS: 171 return true; 172 default: 173 return false; 174 } 175 } 176 177 static void build_prologue(struct jit_ctx *ctx) 178 { 179 u16 reg_set = saved_regs(ctx); 180 u16 first_inst = ctx->skf->insns[0].code; 181 u16 off; 182 183 #ifdef CONFIG_FRAME_POINTER 184 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); 185 emit(ARM_PUSH(reg_set), ctx); 186 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 187 #else 188 if (reg_set) 189 emit(ARM_PUSH(reg_set), ctx); 190 #endif 191 192 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 193 emit(ARM_MOV_R(r_skb, ARM_R0), ctx); 194 195 if (ctx->seen & SEEN_DATA) { 196 off = offsetof(struct sk_buff, data); 197 emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); 198 /* headlen = len - data_len */ 199 off = offsetof(struct sk_buff, len); 200 emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); 201 off = offsetof(struct sk_buff, data_len); 202 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 203 emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); 204 } 205 206 if (ctx->flags & FLAG_NEED_X_RESET) 207 emit(ARM_MOV_I(r_X, 0), ctx); 208 209 /* do not leak kernel data to userspace */ 210 if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) 211 emit(ARM_MOV_I(r_A, 0), ctx); 212 213 /* stack space for the BPF_MEM words */ 214 if (ctx->seen & SEEN_MEM) 215 emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 216 } 217 218 static void build_epilogue(struct jit_ctx *ctx) 219 { 220 u16 reg_set = saved_regs(ctx); 221 222 if (ctx->seen & SEEN_MEM) 223 emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 224 225 reg_set &= ~(1 << ARM_LR); 226 227 #ifdef CONFIG_FRAME_POINTER 228 /* the first instruction of the prologue was: mov ip, sp */ 229 reg_set &= ~(1 << ARM_IP); 230 reg_set |= (1 << ARM_SP); 231 emit(ARM_LDM(ARM_SP, reg_set), ctx); 232 #else 233 if (reg_set) { 234 if (ctx->seen & SEEN_CALL) 235 reg_set |= 1 << ARM_PC; 236 emit(ARM_POP(reg_set), ctx); 237 } 238 239 if (!(ctx->seen & SEEN_CALL)) 240 emit(ARM_BX(ARM_LR), ctx); 241 #endif 242 } 243 244 static int16_t imm8m(u32 x) 245 { 246 u32 rot; 247 248 for (rot = 0; rot < 16; rot++) 249 if ((x & ~ror32(0xff, 2 * rot)) == 0) 250 return rol32(x, 2 * rot) | (rot << 8); 251 252 return -1; 253 } 254 255 #if __LINUX_ARM_ARCH__ < 7 256 257 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 258 { 259 unsigned i = 0, offset; 260 u16 imm; 261 262 /* on the "fake" run we just count them (duplicates included) */ 263 if (ctx->target == NULL) { 264 ctx->imm_count++; 265 return 0; 266 } 267 268 while ((i < ctx->imm_count) && ctx->imms[i]) { 269 if (ctx->imms[i] == k) 270 break; 271 i++; 272 } 273 274 if (ctx->imms[i] == 0) 275 ctx->imms[i] = k; 276 277 /* constants go just after the epilogue */ 278 offset = ctx->offsets[ctx->skf->len]; 279 offset += ctx->prologue_bytes; 280 offset += ctx->epilogue_bytes; 281 offset += i * 4; 282 283 ctx->target[offset / 4] = k; 284 285 /* PC in ARM mode == address of the instruction + 8 */ 286 imm = offset - (8 + ctx->idx * 4); 287 288 return imm; 289 } 290 291 #endif /* __LINUX_ARM_ARCH__ */ 292 293 /* 294 * Move an immediate that's not an imm8m to a core register. 295 */ 296 static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) 297 { 298 #if __LINUX_ARM_ARCH__ < 7 299 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 300 #else 301 emit(ARM_MOVW(rd, val & 0xffff), ctx); 302 if (val > 0xffff) 303 emit(ARM_MOVT(rd, val >> 16), ctx); 304 #endif 305 } 306 307 static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) 308 { 309 int imm12 = imm8m(val); 310 311 if (imm12 >= 0) 312 emit(ARM_MOV_I(rd, imm12), ctx); 313 else 314 emit_mov_i_no8m(rd, val, ctx); 315 } 316 317 #if __LINUX_ARM_ARCH__ < 6 318 319 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 320 { 321 _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); 322 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 323 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); 324 _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); 325 _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); 326 _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); 327 _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); 328 _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); 329 } 330 331 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 332 { 333 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 334 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); 335 _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); 336 } 337 338 static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) 339 { 340 /* r_dst = (r_src << 8) | (r_src >> 8) */ 341 emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx); 342 emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx); 343 344 /* 345 * we need to mask out the bits set in r_dst[23:16] due to 346 * the first shift instruction. 347 * 348 * note that 0x8ff is the encoded immediate 0x00ff0000. 349 */ 350 emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx); 351 } 352 353 #else /* ARMv6+ */ 354 355 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 356 { 357 _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); 358 #ifdef __LITTLE_ENDIAN 359 _emit(cond, ARM_REV(r_res, r_res), ctx); 360 #endif 361 } 362 363 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 364 { 365 _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); 366 #ifdef __LITTLE_ENDIAN 367 _emit(cond, ARM_REV16(r_res, r_res), ctx); 368 #endif 369 } 370 371 static inline void emit_swap16(u8 r_dst __maybe_unused, 372 u8 r_src __maybe_unused, 373 struct jit_ctx *ctx __maybe_unused) 374 { 375 #ifdef __LITTLE_ENDIAN 376 emit(ARM_REV16(r_dst, r_src), ctx); 377 #endif 378 } 379 380 #endif /* __LINUX_ARM_ARCH__ < 6 */ 381 382 383 /* Compute the immediate value for a PC-relative branch. */ 384 static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) 385 { 386 u32 imm; 387 388 if (ctx->target == NULL) 389 return 0; 390 /* 391 * BPF allows only forward jumps and the offset of the target is 392 * still the one computed during the first pass. 393 */ 394 imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); 395 396 return imm >> 2; 397 } 398 399 #define OP_IMM3(op, r1, r2, imm_val, ctx) \ 400 do { \ 401 imm12 = imm8m(imm_val); \ 402 if (imm12 < 0) { \ 403 emit_mov_i_no8m(r_scratch, imm_val, ctx); \ 404 emit(op ## _R((r1), (r2), r_scratch), ctx); \ 405 } else { \ 406 emit(op ## _I((r1), (r2), imm12), ctx); \ 407 } \ 408 } while (0) 409 410 static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) 411 { 412 if (ctx->ret0_fp_idx >= 0) { 413 _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); 414 /* NOP to keep the size constant between passes */ 415 emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); 416 } else { 417 _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); 418 _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx); 419 } 420 } 421 422 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 423 { 424 #if __LINUX_ARM_ARCH__ < 5 425 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 426 427 if (elf_hwcap & HWCAP_THUMB) 428 emit(ARM_BX(tgt_reg), ctx); 429 else 430 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 431 #else 432 emit(ARM_BLX_R(tgt_reg), ctx); 433 #endif 434 } 435 436 static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) 437 { 438 #if __LINUX_ARM_ARCH__ == 7 439 if (elf_hwcap & HWCAP_IDIVA) { 440 emit(ARM_UDIV(rd, rm, rn), ctx); 441 return; 442 } 443 #endif 444 if (rm != ARM_R0) 445 emit(ARM_MOV_R(ARM_R0, rm), ctx); 446 if (rn != ARM_R1) 447 emit(ARM_MOV_R(ARM_R1, rn), ctx); 448 449 ctx->seen |= SEEN_CALL; 450 emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); 451 emit_blx_r(ARM_R3, ctx); 452 453 if (rd != ARM_R0) 454 emit(ARM_MOV_R(rd, ARM_R0), ctx); 455 } 456 457 static inline void update_on_xread(struct jit_ctx *ctx) 458 { 459 if (!(ctx->seen & SEEN_X)) 460 ctx->flags |= FLAG_NEED_X_RESET; 461 462 ctx->seen |= SEEN_X; 463 } 464 465 static int build_body(struct jit_ctx *ctx) 466 { 467 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 468 const struct bpf_prog *prog = ctx->skf; 469 const struct sock_filter *inst; 470 unsigned i, load_order, off, condt; 471 int imm12; 472 u32 k; 473 474 for (i = 0; i < prog->len; i++) { 475 u16 code; 476 477 inst = &(prog->insns[i]); 478 /* K as an immediate value operand */ 479 k = inst->k; 480 code = bpf_anc_helper(inst); 481 482 /* compute offsets only in the fake pass */ 483 if (ctx->target == NULL) 484 ctx->offsets[i] = ctx->idx * 4; 485 486 switch (code) { 487 case BPF_LD | BPF_IMM: 488 emit_mov_i(r_A, k, ctx); 489 break; 490 case BPF_LD | BPF_W | BPF_LEN: 491 ctx->seen |= SEEN_SKB; 492 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 493 emit(ARM_LDR_I(r_A, r_skb, 494 offsetof(struct sk_buff, len)), ctx); 495 break; 496 case BPF_LD | BPF_MEM: 497 /* A = scratch[k] */ 498 ctx->seen |= SEEN_MEM_WORD(k); 499 emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 500 break; 501 case BPF_LD | BPF_W | BPF_ABS: 502 load_order = 2; 503 goto load; 504 case BPF_LD | BPF_H | BPF_ABS: 505 load_order = 1; 506 goto load; 507 case BPF_LD | BPF_B | BPF_ABS: 508 load_order = 0; 509 load: 510 /* the interpreter will deal with the negative K */ 511 if ((int)k < 0) 512 return -ENOTSUPP; 513 emit_mov_i(r_off, k, ctx); 514 load_common: 515 ctx->seen |= SEEN_DATA | SEEN_CALL; 516 517 if (load_order > 0) { 518 emit(ARM_SUB_I(r_scratch, r_skb_hl, 519 1 << load_order), ctx); 520 emit(ARM_CMP_R(r_scratch, r_off), ctx); 521 condt = ARM_COND_HS; 522 } else { 523 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 524 condt = ARM_COND_HI; 525 } 526 527 _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), 528 ctx); 529 530 if (load_order == 0) 531 _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0), 532 ctx); 533 else if (load_order == 1) 534 emit_load_be16(condt, r_A, r_scratch, ctx); 535 else if (load_order == 2) 536 emit_load_be32(condt, r_A, r_scratch, ctx); 537 538 _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx); 539 540 /* the slowpath */ 541 emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx); 542 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 543 /* the offset is already in R1 */ 544 emit_blx_r(ARM_R3, ctx); 545 /* check the result of skb_copy_bits */ 546 emit(ARM_CMP_I(ARM_R1, 0), ctx); 547 emit_err_ret(ARM_COND_NE, ctx); 548 emit(ARM_MOV_R(r_A, ARM_R0), ctx); 549 break; 550 case BPF_LD | BPF_W | BPF_IND: 551 load_order = 2; 552 goto load_ind; 553 case BPF_LD | BPF_H | BPF_IND: 554 load_order = 1; 555 goto load_ind; 556 case BPF_LD | BPF_B | BPF_IND: 557 load_order = 0; 558 load_ind: 559 OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); 560 goto load_common; 561 case BPF_LDX | BPF_IMM: 562 ctx->seen |= SEEN_X; 563 emit_mov_i(r_X, k, ctx); 564 break; 565 case BPF_LDX | BPF_W | BPF_LEN: 566 ctx->seen |= SEEN_X | SEEN_SKB; 567 emit(ARM_LDR_I(r_X, r_skb, 568 offsetof(struct sk_buff, len)), ctx); 569 break; 570 case BPF_LDX | BPF_MEM: 571 ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); 572 emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 573 break; 574 case BPF_LDX | BPF_B | BPF_MSH: 575 /* x = ((*(frame + k)) & 0xf) << 2; */ 576 ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; 577 /* the interpreter should deal with the negative K */ 578 if ((int)k < 0) 579 return -1; 580 /* offset in r1: we might have to take the slow path */ 581 emit_mov_i(r_off, k, ctx); 582 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 583 584 /* load in r0: common with the slowpath */ 585 _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data, 586 ARM_R1), ctx); 587 /* 588 * emit_mov_i() might generate one or two instructions, 589 * the same holds for emit_blx_r() 590 */ 591 _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx); 592 593 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 594 /* r_off is r1 */ 595 emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx); 596 emit_blx_r(ARM_R3, ctx); 597 /* check the return value of skb_copy_bits */ 598 emit(ARM_CMP_I(ARM_R1, 0), ctx); 599 emit_err_ret(ARM_COND_NE, ctx); 600 601 emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); 602 emit(ARM_LSL_I(r_X, r_X, 2), ctx); 603 break; 604 case BPF_ST: 605 ctx->seen |= SEEN_MEM_WORD(k); 606 emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 607 break; 608 case BPF_STX: 609 update_on_xread(ctx); 610 ctx->seen |= SEEN_MEM_WORD(k); 611 emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 612 break; 613 case BPF_ALU | BPF_ADD | BPF_K: 614 /* A += K */ 615 OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); 616 break; 617 case BPF_ALU | BPF_ADD | BPF_X: 618 update_on_xread(ctx); 619 emit(ARM_ADD_R(r_A, r_A, r_X), ctx); 620 break; 621 case BPF_ALU | BPF_SUB | BPF_K: 622 /* A -= K */ 623 OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); 624 break; 625 case BPF_ALU | BPF_SUB | BPF_X: 626 update_on_xread(ctx); 627 emit(ARM_SUB_R(r_A, r_A, r_X), ctx); 628 break; 629 case BPF_ALU | BPF_MUL | BPF_K: 630 /* A *= K */ 631 emit_mov_i(r_scratch, k, ctx); 632 emit(ARM_MUL(r_A, r_A, r_scratch), ctx); 633 break; 634 case BPF_ALU | BPF_MUL | BPF_X: 635 update_on_xread(ctx); 636 emit(ARM_MUL(r_A, r_A, r_X), ctx); 637 break; 638 case BPF_ALU | BPF_DIV | BPF_K: 639 if (k == 1) 640 break; 641 emit_mov_i(r_scratch, k, ctx); 642 emit_udiv(r_A, r_A, r_scratch, ctx); 643 break; 644 case BPF_ALU | BPF_DIV | BPF_X: 645 update_on_xread(ctx); 646 emit(ARM_CMP_I(r_X, 0), ctx); 647 emit_err_ret(ARM_COND_EQ, ctx); 648 emit_udiv(r_A, r_A, r_X, ctx); 649 break; 650 case BPF_ALU | BPF_OR | BPF_K: 651 /* A |= K */ 652 OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); 653 break; 654 case BPF_ALU | BPF_OR | BPF_X: 655 update_on_xread(ctx); 656 emit(ARM_ORR_R(r_A, r_A, r_X), ctx); 657 break; 658 case BPF_ALU | BPF_XOR | BPF_K: 659 /* A ^= K; */ 660 OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); 661 break; 662 case BPF_ANC | SKF_AD_ALU_XOR_X: 663 case BPF_ALU | BPF_XOR | BPF_X: 664 /* A ^= X */ 665 update_on_xread(ctx); 666 emit(ARM_EOR_R(r_A, r_A, r_X), ctx); 667 break; 668 case BPF_ALU | BPF_AND | BPF_K: 669 /* A &= K */ 670 OP_IMM3(ARM_AND, r_A, r_A, k, ctx); 671 break; 672 case BPF_ALU | BPF_AND | BPF_X: 673 update_on_xread(ctx); 674 emit(ARM_AND_R(r_A, r_A, r_X), ctx); 675 break; 676 case BPF_ALU | BPF_LSH | BPF_K: 677 if (unlikely(k > 31)) 678 return -1; 679 emit(ARM_LSL_I(r_A, r_A, k), ctx); 680 break; 681 case BPF_ALU | BPF_LSH | BPF_X: 682 update_on_xread(ctx); 683 emit(ARM_LSL_R(r_A, r_A, r_X), ctx); 684 break; 685 case BPF_ALU | BPF_RSH | BPF_K: 686 if (unlikely(k > 31)) 687 return -1; 688 emit(ARM_LSR_I(r_A, r_A, k), ctx); 689 break; 690 case BPF_ALU | BPF_RSH | BPF_X: 691 update_on_xread(ctx); 692 emit(ARM_LSR_R(r_A, r_A, r_X), ctx); 693 break; 694 case BPF_ALU | BPF_NEG: 695 /* A = -A */ 696 emit(ARM_RSB_I(r_A, r_A, 0), ctx); 697 break; 698 case BPF_JMP | BPF_JA: 699 /* pc += K */ 700 emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); 701 break; 702 case BPF_JMP | BPF_JEQ | BPF_K: 703 /* pc += (A == K) ? pc->jt : pc->jf */ 704 condt = ARM_COND_EQ; 705 goto cmp_imm; 706 case BPF_JMP | BPF_JGT | BPF_K: 707 /* pc += (A > K) ? pc->jt : pc->jf */ 708 condt = ARM_COND_HI; 709 goto cmp_imm; 710 case BPF_JMP | BPF_JGE | BPF_K: 711 /* pc += (A >= K) ? pc->jt : pc->jf */ 712 condt = ARM_COND_HS; 713 cmp_imm: 714 imm12 = imm8m(k); 715 if (imm12 < 0) { 716 emit_mov_i_no8m(r_scratch, k, ctx); 717 emit(ARM_CMP_R(r_A, r_scratch), ctx); 718 } else { 719 emit(ARM_CMP_I(r_A, imm12), ctx); 720 } 721 cond_jump: 722 if (inst->jt) 723 _emit(condt, ARM_B(b_imm(i + inst->jt + 1, 724 ctx)), ctx); 725 if (inst->jf) 726 _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, 727 ctx)), ctx); 728 break; 729 case BPF_JMP | BPF_JEQ | BPF_X: 730 /* pc += (A == X) ? pc->jt : pc->jf */ 731 condt = ARM_COND_EQ; 732 goto cmp_x; 733 case BPF_JMP | BPF_JGT | BPF_X: 734 /* pc += (A > X) ? pc->jt : pc->jf */ 735 condt = ARM_COND_HI; 736 goto cmp_x; 737 case BPF_JMP | BPF_JGE | BPF_X: 738 /* pc += (A >= X) ? pc->jt : pc->jf */ 739 condt = ARM_COND_CS; 740 cmp_x: 741 update_on_xread(ctx); 742 emit(ARM_CMP_R(r_A, r_X), ctx); 743 goto cond_jump; 744 case BPF_JMP | BPF_JSET | BPF_K: 745 /* pc += (A & K) ? pc->jt : pc->jf */ 746 condt = ARM_COND_NE; 747 /* not set iff all zeroes iff Z==1 iff EQ */ 748 749 imm12 = imm8m(k); 750 if (imm12 < 0) { 751 emit_mov_i_no8m(r_scratch, k, ctx); 752 emit(ARM_TST_R(r_A, r_scratch), ctx); 753 } else { 754 emit(ARM_TST_I(r_A, imm12), ctx); 755 } 756 goto cond_jump; 757 case BPF_JMP | BPF_JSET | BPF_X: 758 /* pc += (A & X) ? pc->jt : pc->jf */ 759 update_on_xread(ctx); 760 condt = ARM_COND_NE; 761 emit(ARM_TST_R(r_A, r_X), ctx); 762 goto cond_jump; 763 case BPF_RET | BPF_A: 764 emit(ARM_MOV_R(ARM_R0, r_A), ctx); 765 goto b_epilogue; 766 case BPF_RET | BPF_K: 767 if ((k == 0) && (ctx->ret0_fp_idx < 0)) 768 ctx->ret0_fp_idx = i; 769 emit_mov_i(ARM_R0, k, ctx); 770 b_epilogue: 771 if (i != ctx->skf->len - 1) 772 emit(ARM_B(b_imm(prog->len, ctx)), ctx); 773 break; 774 case BPF_MISC | BPF_TAX: 775 /* X = A */ 776 ctx->seen |= SEEN_X; 777 emit(ARM_MOV_R(r_X, r_A), ctx); 778 break; 779 case BPF_MISC | BPF_TXA: 780 /* A = X */ 781 update_on_xread(ctx); 782 emit(ARM_MOV_R(r_A, r_X), ctx); 783 break; 784 case BPF_ANC | SKF_AD_PROTOCOL: 785 /* A = ntohs(skb->protocol) */ 786 ctx->seen |= SEEN_SKB; 787 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 788 protocol) != 2); 789 off = offsetof(struct sk_buff, protocol); 790 emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); 791 emit_swap16(r_A, r_scratch, ctx); 792 break; 793 case BPF_ANC | SKF_AD_CPU: 794 /* r_scratch = current_thread_info() */ 795 OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); 796 /* A = current_thread_info()->cpu */ 797 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); 798 off = offsetof(struct thread_info, cpu); 799 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 800 break; 801 case BPF_ANC | SKF_AD_IFINDEX: 802 /* A = skb->dev->ifindex */ 803 ctx->seen |= SEEN_SKB; 804 off = offsetof(struct sk_buff, dev); 805 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 806 807 emit(ARM_CMP_I(r_scratch, 0), ctx); 808 emit_err_ret(ARM_COND_EQ, ctx); 809 810 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 811 ifindex) != 4); 812 off = offsetof(struct net_device, ifindex); 813 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 814 break; 815 case BPF_ANC | SKF_AD_MARK: 816 ctx->seen |= SEEN_SKB; 817 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 818 off = offsetof(struct sk_buff, mark); 819 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 820 break; 821 case BPF_ANC | SKF_AD_RXHASH: 822 ctx->seen |= SEEN_SKB; 823 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 824 off = offsetof(struct sk_buff, hash); 825 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 826 break; 827 case BPF_ANC | SKF_AD_VLAN_TAG: 828 case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: 829 ctx->seen |= SEEN_SKB; 830 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); 831 off = offsetof(struct sk_buff, vlan_tci); 832 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 833 if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) 834 OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); 835 else 836 OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); 837 break; 838 case BPF_ANC | SKF_AD_QUEUE: 839 ctx->seen |= SEEN_SKB; 840 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 841 queue_mapping) != 2); 842 BUILD_BUG_ON(offsetof(struct sk_buff, 843 queue_mapping) > 0xff); 844 off = offsetof(struct sk_buff, queue_mapping); 845 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 846 break; 847 default: 848 return -1; 849 } 850 } 851 852 /* compute offsets only during the first pass */ 853 if (ctx->target == NULL) 854 ctx->offsets[i] = ctx->idx * 4; 855 856 return 0; 857 } 858 859 860 void bpf_jit_compile(struct bpf_prog *fp) 861 { 862 struct jit_ctx ctx; 863 unsigned tmp_idx; 864 unsigned alloc_size; 865 866 if (!bpf_jit_enable) 867 return; 868 869 memset(&ctx, 0, sizeof(ctx)); 870 ctx.skf = fp; 871 ctx.ret0_fp_idx = -1; 872 873 ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL); 874 if (ctx.offsets == NULL) 875 return; 876 877 /* fake pass to fill in the ctx->seen */ 878 if (unlikely(build_body(&ctx))) 879 goto out; 880 881 tmp_idx = ctx.idx; 882 build_prologue(&ctx); 883 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 884 885 #if __LINUX_ARM_ARCH__ < 7 886 tmp_idx = ctx.idx; 887 build_epilogue(&ctx); 888 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 889 890 ctx.idx += ctx.imm_count; 891 if (ctx.imm_count) { 892 ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL); 893 if (ctx.imms == NULL) 894 goto out; 895 } 896 #else 897 /* there's nothing after the epilogue on ARMv7 */ 898 build_epilogue(&ctx); 899 #endif 900 901 alloc_size = 4 * ctx.idx; 902 ctx.target = module_alloc(alloc_size); 903 if (unlikely(ctx.target == NULL)) 904 goto out; 905 906 ctx.idx = 0; 907 build_prologue(&ctx); 908 build_body(&ctx); 909 build_epilogue(&ctx); 910 911 flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx)); 912 913 #if __LINUX_ARM_ARCH__ < 7 914 if (ctx.imm_count) 915 kfree(ctx.imms); 916 #endif 917 918 if (bpf_jit_enable > 1) 919 /* there are 2 passes here */ 920 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 921 922 fp->bpf_func = (void *)ctx.target; 923 fp->jited = 1; 924 out: 925 kfree(ctx.offsets); 926 return; 927 } 928 929 void bpf_jit_free(struct bpf_prog *fp) 930 { 931 if (fp->jited) 932 module_free(NULL, fp->bpf_func); 933 kfree(fp); 934 } 935