1 /* 2 * Just-In-Time compiler for BPF filters on 32bit ARM 3 * 4 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the 8 * Free Software Foundation; version 2 of the License. 9 */ 10 11 #include <linux/bitops.h> 12 #include <linux/compiler.h> 13 #include <linux/errno.h> 14 #include <linux/filter.h> 15 #include <linux/moduleloader.h> 16 #include <linux/netdevice.h> 17 #include <linux/string.h> 18 #include <linux/slab.h> 19 #include <linux/if_vlan.h> 20 #include <asm/cacheflush.h> 21 #include <asm/hwcap.h> 22 #include <asm/opcodes.h> 23 24 #include "bpf_jit_32.h" 25 26 /* 27 * ABI: 28 * 29 * r0 scratch register 30 * r4 BPF register A 31 * r5 BPF register X 32 * r6 pointer to the skb 33 * r7 skb->data 34 * r8 skb_headlen(skb) 35 */ 36 37 #define r_scratch ARM_R0 38 /* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ 39 #define r_off ARM_R1 40 #define r_A ARM_R4 41 #define r_X ARM_R5 42 #define r_skb ARM_R6 43 #define r_skb_data ARM_R7 44 #define r_skb_hl ARM_R8 45 46 #define SCRATCH_SP_OFFSET 0 47 #define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k)) 48 49 #define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) 50 #define SEEN_MEM_WORD(k) (1 << (k)) 51 #define SEEN_X (1 << BPF_MEMWORDS) 52 #define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) 53 #define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) 54 #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) 55 56 #define FLAG_NEED_X_RESET (1 << 0) 57 58 struct jit_ctx { 59 const struct sk_filter *skf; 60 unsigned idx; 61 unsigned prologue_bytes; 62 int ret0_fp_idx; 63 u32 seen; 64 u32 flags; 65 u32 *offsets; 66 u32 *target; 67 #if __LINUX_ARM_ARCH__ < 7 68 u16 epilogue_bytes; 69 u16 imm_count; 70 u32 *imms; 71 #endif 72 }; 73 74 int bpf_jit_enable __read_mostly; 75 76 static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) 77 { 78 u8 ret; 79 int err; 80 81 err = skb_copy_bits(skb, offset, &ret, 1); 82 83 return (u64)err << 32 | ret; 84 } 85 86 static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) 87 { 88 u16 ret; 89 int err; 90 91 err = skb_copy_bits(skb, offset, &ret, 2); 92 93 return (u64)err << 32 | ntohs(ret); 94 } 95 96 static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) 97 { 98 u32 ret; 99 int err; 100 101 err = skb_copy_bits(skb, offset, &ret, 4); 102 103 return (u64)err << 32 | ntohl(ret); 104 } 105 106 /* 107 * Wrapper that handles both OABI and EABI and assures Thumb2 interworking 108 * (where the assembly routines like __aeabi_uidiv could cause problems). 109 */ 110 static u32 jit_udiv(u32 dividend, u32 divisor) 111 { 112 return dividend / divisor; 113 } 114 115 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 116 { 117 inst |= (cond << 28); 118 inst = __opcode_to_mem_arm(inst); 119 120 if (ctx->target != NULL) 121 ctx->target[ctx->idx] = inst; 122 123 ctx->idx++; 124 } 125 126 /* 127 * Emit an instruction that will be executed unconditionally. 128 */ 129 static inline void emit(u32 inst, struct jit_ctx *ctx) 130 { 131 _emit(ARM_COND_AL, inst, ctx); 132 } 133 134 static u16 saved_regs(struct jit_ctx *ctx) 135 { 136 u16 ret = 0; 137 138 if ((ctx->skf->len > 1) || 139 (ctx->skf->insns[0].code == BPF_S_RET_A)) 140 ret |= 1 << r_A; 141 142 #ifdef CONFIG_FRAME_POINTER 143 ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); 144 #else 145 if (ctx->seen & SEEN_CALL) 146 ret |= 1 << ARM_LR; 147 #endif 148 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 149 ret |= 1 << r_skb; 150 if (ctx->seen & SEEN_DATA) 151 ret |= (1 << r_skb_data) | (1 << r_skb_hl); 152 if (ctx->seen & SEEN_X) 153 ret |= 1 << r_X; 154 155 return ret; 156 } 157 158 static inline int mem_words_used(struct jit_ctx *ctx) 159 { 160 /* yes, we do waste some stack space IF there are "holes" in the set" */ 161 return fls(ctx->seen & SEEN_MEM); 162 } 163 164 static inline bool is_load_to_a(u16 inst) 165 { 166 switch (inst) { 167 case BPF_S_LD_W_LEN: 168 case BPF_S_LD_W_ABS: 169 case BPF_S_LD_H_ABS: 170 case BPF_S_LD_B_ABS: 171 case BPF_S_ANC_CPU: 172 case BPF_S_ANC_IFINDEX: 173 case BPF_S_ANC_MARK: 174 case BPF_S_ANC_PROTOCOL: 175 case BPF_S_ANC_RXHASH: 176 case BPF_S_ANC_VLAN_TAG: 177 case BPF_S_ANC_VLAN_TAG_PRESENT: 178 case BPF_S_ANC_QUEUE: 179 return true; 180 default: 181 return false; 182 } 183 } 184 185 static void build_prologue(struct jit_ctx *ctx) 186 { 187 u16 reg_set = saved_regs(ctx); 188 u16 first_inst = ctx->skf->insns[0].code; 189 u16 off; 190 191 #ifdef CONFIG_FRAME_POINTER 192 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); 193 emit(ARM_PUSH(reg_set), ctx); 194 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 195 #else 196 if (reg_set) 197 emit(ARM_PUSH(reg_set), ctx); 198 #endif 199 200 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 201 emit(ARM_MOV_R(r_skb, ARM_R0), ctx); 202 203 if (ctx->seen & SEEN_DATA) { 204 off = offsetof(struct sk_buff, data); 205 emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); 206 /* headlen = len - data_len */ 207 off = offsetof(struct sk_buff, len); 208 emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); 209 off = offsetof(struct sk_buff, data_len); 210 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 211 emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); 212 } 213 214 if (ctx->flags & FLAG_NEED_X_RESET) 215 emit(ARM_MOV_I(r_X, 0), ctx); 216 217 /* do not leak kernel data to userspace */ 218 if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst))) 219 emit(ARM_MOV_I(r_A, 0), ctx); 220 221 /* stack space for the BPF_MEM words */ 222 if (ctx->seen & SEEN_MEM) 223 emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 224 } 225 226 static void build_epilogue(struct jit_ctx *ctx) 227 { 228 u16 reg_set = saved_regs(ctx); 229 230 if (ctx->seen & SEEN_MEM) 231 emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 232 233 reg_set &= ~(1 << ARM_LR); 234 235 #ifdef CONFIG_FRAME_POINTER 236 /* the first instruction of the prologue was: mov ip, sp */ 237 reg_set &= ~(1 << ARM_IP); 238 reg_set |= (1 << ARM_SP); 239 emit(ARM_LDM(ARM_SP, reg_set), ctx); 240 #else 241 if (reg_set) { 242 if (ctx->seen & SEEN_CALL) 243 reg_set |= 1 << ARM_PC; 244 emit(ARM_POP(reg_set), ctx); 245 } 246 247 if (!(ctx->seen & SEEN_CALL)) 248 emit(ARM_BX(ARM_LR), ctx); 249 #endif 250 } 251 252 static int16_t imm8m(u32 x) 253 { 254 u32 rot; 255 256 for (rot = 0; rot < 16; rot++) 257 if ((x & ~ror32(0xff, 2 * rot)) == 0) 258 return rol32(x, 2 * rot) | (rot << 8); 259 260 return -1; 261 } 262 263 #if __LINUX_ARM_ARCH__ < 7 264 265 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 266 { 267 unsigned i = 0, offset; 268 u16 imm; 269 270 /* on the "fake" run we just count them (duplicates included) */ 271 if (ctx->target == NULL) { 272 ctx->imm_count++; 273 return 0; 274 } 275 276 while ((i < ctx->imm_count) && ctx->imms[i]) { 277 if (ctx->imms[i] == k) 278 break; 279 i++; 280 } 281 282 if (ctx->imms[i] == 0) 283 ctx->imms[i] = k; 284 285 /* constants go just after the epilogue */ 286 offset = ctx->offsets[ctx->skf->len]; 287 offset += ctx->prologue_bytes; 288 offset += ctx->epilogue_bytes; 289 offset += i * 4; 290 291 ctx->target[offset / 4] = k; 292 293 /* PC in ARM mode == address of the instruction + 8 */ 294 imm = offset - (8 + ctx->idx * 4); 295 296 return imm; 297 } 298 299 #endif /* __LINUX_ARM_ARCH__ */ 300 301 /* 302 * Move an immediate that's not an imm8m to a core register. 303 */ 304 static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) 305 { 306 #if __LINUX_ARM_ARCH__ < 7 307 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 308 #else 309 emit(ARM_MOVW(rd, val & 0xffff), ctx); 310 if (val > 0xffff) 311 emit(ARM_MOVT(rd, val >> 16), ctx); 312 #endif 313 } 314 315 static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) 316 { 317 int imm12 = imm8m(val); 318 319 if (imm12 >= 0) 320 emit(ARM_MOV_I(rd, imm12), ctx); 321 else 322 emit_mov_i_no8m(rd, val, ctx); 323 } 324 325 #if __LINUX_ARM_ARCH__ < 6 326 327 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 328 { 329 _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); 330 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 331 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); 332 _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); 333 _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); 334 _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); 335 _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); 336 _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); 337 } 338 339 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 340 { 341 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 342 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); 343 _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); 344 } 345 346 static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) 347 { 348 /* r_dst = (r_src << 8) | (r_src >> 8) */ 349 emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx); 350 emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx); 351 352 /* 353 * we need to mask out the bits set in r_dst[23:16] due to 354 * the first shift instruction. 355 * 356 * note that 0x8ff is the encoded immediate 0x00ff0000. 357 */ 358 emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx); 359 } 360 361 #else /* ARMv6+ */ 362 363 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 364 { 365 _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); 366 #ifdef __LITTLE_ENDIAN 367 _emit(cond, ARM_REV(r_res, r_res), ctx); 368 #endif 369 } 370 371 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 372 { 373 _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); 374 #ifdef __LITTLE_ENDIAN 375 _emit(cond, ARM_REV16(r_res, r_res), ctx); 376 #endif 377 } 378 379 static inline void emit_swap16(u8 r_dst __maybe_unused, 380 u8 r_src __maybe_unused, 381 struct jit_ctx *ctx __maybe_unused) 382 { 383 #ifdef __LITTLE_ENDIAN 384 emit(ARM_REV16(r_dst, r_src), ctx); 385 #endif 386 } 387 388 #endif /* __LINUX_ARM_ARCH__ < 6 */ 389 390 391 /* Compute the immediate value for a PC-relative branch. */ 392 static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) 393 { 394 u32 imm; 395 396 if (ctx->target == NULL) 397 return 0; 398 /* 399 * BPF allows only forward jumps and the offset of the target is 400 * still the one computed during the first pass. 401 */ 402 imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); 403 404 return imm >> 2; 405 } 406 407 #define OP_IMM3(op, r1, r2, imm_val, ctx) \ 408 do { \ 409 imm12 = imm8m(imm_val); \ 410 if (imm12 < 0) { \ 411 emit_mov_i_no8m(r_scratch, imm_val, ctx); \ 412 emit(op ## _R((r1), (r2), r_scratch), ctx); \ 413 } else { \ 414 emit(op ## _I((r1), (r2), imm12), ctx); \ 415 } \ 416 } while (0) 417 418 static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) 419 { 420 if (ctx->ret0_fp_idx >= 0) { 421 _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); 422 /* NOP to keep the size constant between passes */ 423 emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); 424 } else { 425 _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); 426 _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx); 427 } 428 } 429 430 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 431 { 432 #if __LINUX_ARM_ARCH__ < 5 433 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 434 435 if (elf_hwcap & HWCAP_THUMB) 436 emit(ARM_BX(tgt_reg), ctx); 437 else 438 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 439 #else 440 emit(ARM_BLX_R(tgt_reg), ctx); 441 #endif 442 } 443 444 static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) 445 { 446 #if __LINUX_ARM_ARCH__ == 7 447 if (elf_hwcap & HWCAP_IDIVA) { 448 emit(ARM_UDIV(rd, rm, rn), ctx); 449 return; 450 } 451 #endif 452 if (rm != ARM_R0) 453 emit(ARM_MOV_R(ARM_R0, rm), ctx); 454 if (rn != ARM_R1) 455 emit(ARM_MOV_R(ARM_R1, rn), ctx); 456 457 ctx->seen |= SEEN_CALL; 458 emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); 459 emit_blx_r(ARM_R3, ctx); 460 461 if (rd != ARM_R0) 462 emit(ARM_MOV_R(rd, ARM_R0), ctx); 463 } 464 465 static inline void update_on_xread(struct jit_ctx *ctx) 466 { 467 if (!(ctx->seen & SEEN_X)) 468 ctx->flags |= FLAG_NEED_X_RESET; 469 470 ctx->seen |= SEEN_X; 471 } 472 473 static int build_body(struct jit_ctx *ctx) 474 { 475 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 476 const struct sk_filter *prog = ctx->skf; 477 const struct sock_filter *inst; 478 unsigned i, load_order, off, condt; 479 int imm12; 480 u32 k; 481 482 for (i = 0; i < prog->len; i++) { 483 inst = &(prog->insns[i]); 484 /* K as an immediate value operand */ 485 k = inst->k; 486 487 /* compute offsets only in the fake pass */ 488 if (ctx->target == NULL) 489 ctx->offsets[i] = ctx->idx * 4; 490 491 switch (inst->code) { 492 case BPF_S_LD_IMM: 493 emit_mov_i(r_A, k, ctx); 494 break; 495 case BPF_S_LD_W_LEN: 496 ctx->seen |= SEEN_SKB; 497 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 498 emit(ARM_LDR_I(r_A, r_skb, 499 offsetof(struct sk_buff, len)), ctx); 500 break; 501 case BPF_S_LD_MEM: 502 /* A = scratch[k] */ 503 ctx->seen |= SEEN_MEM_WORD(k); 504 emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 505 break; 506 case BPF_S_LD_W_ABS: 507 load_order = 2; 508 goto load; 509 case BPF_S_LD_H_ABS: 510 load_order = 1; 511 goto load; 512 case BPF_S_LD_B_ABS: 513 load_order = 0; 514 load: 515 /* the interpreter will deal with the negative K */ 516 if ((int)k < 0) 517 return -ENOTSUPP; 518 emit_mov_i(r_off, k, ctx); 519 load_common: 520 ctx->seen |= SEEN_DATA | SEEN_CALL; 521 522 if (load_order > 0) { 523 emit(ARM_SUB_I(r_scratch, r_skb_hl, 524 1 << load_order), ctx); 525 emit(ARM_CMP_R(r_scratch, r_off), ctx); 526 condt = ARM_COND_HS; 527 } else { 528 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 529 condt = ARM_COND_HI; 530 } 531 532 _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), 533 ctx); 534 535 if (load_order == 0) 536 _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0), 537 ctx); 538 else if (load_order == 1) 539 emit_load_be16(condt, r_A, r_scratch, ctx); 540 else if (load_order == 2) 541 emit_load_be32(condt, r_A, r_scratch, ctx); 542 543 _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx); 544 545 /* the slowpath */ 546 emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx); 547 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 548 /* the offset is already in R1 */ 549 emit_blx_r(ARM_R3, ctx); 550 /* check the result of skb_copy_bits */ 551 emit(ARM_CMP_I(ARM_R1, 0), ctx); 552 emit_err_ret(ARM_COND_NE, ctx); 553 emit(ARM_MOV_R(r_A, ARM_R0), ctx); 554 break; 555 case BPF_S_LD_W_IND: 556 load_order = 2; 557 goto load_ind; 558 case BPF_S_LD_H_IND: 559 load_order = 1; 560 goto load_ind; 561 case BPF_S_LD_B_IND: 562 load_order = 0; 563 load_ind: 564 OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); 565 goto load_common; 566 case BPF_S_LDX_IMM: 567 ctx->seen |= SEEN_X; 568 emit_mov_i(r_X, k, ctx); 569 break; 570 case BPF_S_LDX_W_LEN: 571 ctx->seen |= SEEN_X | SEEN_SKB; 572 emit(ARM_LDR_I(r_X, r_skb, 573 offsetof(struct sk_buff, len)), ctx); 574 break; 575 case BPF_S_LDX_MEM: 576 ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); 577 emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 578 break; 579 case BPF_S_LDX_B_MSH: 580 /* x = ((*(frame + k)) & 0xf) << 2; */ 581 ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; 582 /* the interpreter should deal with the negative K */ 583 if ((int)k < 0) 584 return -1; 585 /* offset in r1: we might have to take the slow path */ 586 emit_mov_i(r_off, k, ctx); 587 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 588 589 /* load in r0: common with the slowpath */ 590 _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data, 591 ARM_R1), ctx); 592 /* 593 * emit_mov_i() might generate one or two instructions, 594 * the same holds for emit_blx_r() 595 */ 596 _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx); 597 598 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 599 /* r_off is r1 */ 600 emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx); 601 emit_blx_r(ARM_R3, ctx); 602 /* check the return value of skb_copy_bits */ 603 emit(ARM_CMP_I(ARM_R1, 0), ctx); 604 emit_err_ret(ARM_COND_NE, ctx); 605 606 emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); 607 emit(ARM_LSL_I(r_X, r_X, 2), ctx); 608 break; 609 case BPF_S_ST: 610 ctx->seen |= SEEN_MEM_WORD(k); 611 emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 612 break; 613 case BPF_S_STX: 614 update_on_xread(ctx); 615 ctx->seen |= SEEN_MEM_WORD(k); 616 emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 617 break; 618 case BPF_S_ALU_ADD_K: 619 /* A += K */ 620 OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); 621 break; 622 case BPF_S_ALU_ADD_X: 623 update_on_xread(ctx); 624 emit(ARM_ADD_R(r_A, r_A, r_X), ctx); 625 break; 626 case BPF_S_ALU_SUB_K: 627 /* A -= K */ 628 OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); 629 break; 630 case BPF_S_ALU_SUB_X: 631 update_on_xread(ctx); 632 emit(ARM_SUB_R(r_A, r_A, r_X), ctx); 633 break; 634 case BPF_S_ALU_MUL_K: 635 /* A *= K */ 636 emit_mov_i(r_scratch, k, ctx); 637 emit(ARM_MUL(r_A, r_A, r_scratch), ctx); 638 break; 639 case BPF_S_ALU_MUL_X: 640 update_on_xread(ctx); 641 emit(ARM_MUL(r_A, r_A, r_X), ctx); 642 break; 643 case BPF_S_ALU_DIV_K: 644 /* current k == reciprocal_value(userspace k) */ 645 emit_mov_i(r_scratch, k, ctx); 646 /* A = top 32 bits of the product */ 647 emit(ARM_UMULL(r_scratch, r_A, r_A, r_scratch), ctx); 648 break; 649 case BPF_S_ALU_DIV_X: 650 update_on_xread(ctx); 651 emit(ARM_CMP_I(r_X, 0), ctx); 652 emit_err_ret(ARM_COND_EQ, ctx); 653 emit_udiv(r_A, r_A, r_X, ctx); 654 break; 655 case BPF_S_ALU_OR_K: 656 /* A |= K */ 657 OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); 658 break; 659 case BPF_S_ALU_OR_X: 660 update_on_xread(ctx); 661 emit(ARM_ORR_R(r_A, r_A, r_X), ctx); 662 break; 663 case BPF_S_ALU_XOR_K: 664 /* A ^= K; */ 665 OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); 666 break; 667 case BPF_S_ANC_ALU_XOR_X: 668 case BPF_S_ALU_XOR_X: 669 /* A ^= X */ 670 update_on_xread(ctx); 671 emit(ARM_EOR_R(r_A, r_A, r_X), ctx); 672 break; 673 case BPF_S_ALU_AND_K: 674 /* A &= K */ 675 OP_IMM3(ARM_AND, r_A, r_A, k, ctx); 676 break; 677 case BPF_S_ALU_AND_X: 678 update_on_xread(ctx); 679 emit(ARM_AND_R(r_A, r_A, r_X), ctx); 680 break; 681 case BPF_S_ALU_LSH_K: 682 if (unlikely(k > 31)) 683 return -1; 684 emit(ARM_LSL_I(r_A, r_A, k), ctx); 685 break; 686 case BPF_S_ALU_LSH_X: 687 update_on_xread(ctx); 688 emit(ARM_LSL_R(r_A, r_A, r_X), ctx); 689 break; 690 case BPF_S_ALU_RSH_K: 691 if (unlikely(k > 31)) 692 return -1; 693 emit(ARM_LSR_I(r_A, r_A, k), ctx); 694 break; 695 case BPF_S_ALU_RSH_X: 696 update_on_xread(ctx); 697 emit(ARM_LSR_R(r_A, r_A, r_X), ctx); 698 break; 699 case BPF_S_ALU_NEG: 700 /* A = -A */ 701 emit(ARM_RSB_I(r_A, r_A, 0), ctx); 702 break; 703 case BPF_S_JMP_JA: 704 /* pc += K */ 705 emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); 706 break; 707 case BPF_S_JMP_JEQ_K: 708 /* pc += (A == K) ? pc->jt : pc->jf */ 709 condt = ARM_COND_EQ; 710 goto cmp_imm; 711 case BPF_S_JMP_JGT_K: 712 /* pc += (A > K) ? pc->jt : pc->jf */ 713 condt = ARM_COND_HI; 714 goto cmp_imm; 715 case BPF_S_JMP_JGE_K: 716 /* pc += (A >= K) ? pc->jt : pc->jf */ 717 condt = ARM_COND_HS; 718 cmp_imm: 719 imm12 = imm8m(k); 720 if (imm12 < 0) { 721 emit_mov_i_no8m(r_scratch, k, ctx); 722 emit(ARM_CMP_R(r_A, r_scratch), ctx); 723 } else { 724 emit(ARM_CMP_I(r_A, imm12), ctx); 725 } 726 cond_jump: 727 if (inst->jt) 728 _emit(condt, ARM_B(b_imm(i + inst->jt + 1, 729 ctx)), ctx); 730 if (inst->jf) 731 _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, 732 ctx)), ctx); 733 break; 734 case BPF_S_JMP_JEQ_X: 735 /* pc += (A == X) ? pc->jt : pc->jf */ 736 condt = ARM_COND_EQ; 737 goto cmp_x; 738 case BPF_S_JMP_JGT_X: 739 /* pc += (A > X) ? pc->jt : pc->jf */ 740 condt = ARM_COND_HI; 741 goto cmp_x; 742 case BPF_S_JMP_JGE_X: 743 /* pc += (A >= X) ? pc->jt : pc->jf */ 744 condt = ARM_COND_CS; 745 cmp_x: 746 update_on_xread(ctx); 747 emit(ARM_CMP_R(r_A, r_X), ctx); 748 goto cond_jump; 749 case BPF_S_JMP_JSET_K: 750 /* pc += (A & K) ? pc->jt : pc->jf */ 751 condt = ARM_COND_NE; 752 /* not set iff all zeroes iff Z==1 iff EQ */ 753 754 imm12 = imm8m(k); 755 if (imm12 < 0) { 756 emit_mov_i_no8m(r_scratch, k, ctx); 757 emit(ARM_TST_R(r_A, r_scratch), ctx); 758 } else { 759 emit(ARM_TST_I(r_A, imm12), ctx); 760 } 761 goto cond_jump; 762 case BPF_S_JMP_JSET_X: 763 /* pc += (A & X) ? pc->jt : pc->jf */ 764 update_on_xread(ctx); 765 condt = ARM_COND_NE; 766 emit(ARM_TST_R(r_A, r_X), ctx); 767 goto cond_jump; 768 case BPF_S_RET_A: 769 emit(ARM_MOV_R(ARM_R0, r_A), ctx); 770 goto b_epilogue; 771 case BPF_S_RET_K: 772 if ((k == 0) && (ctx->ret0_fp_idx < 0)) 773 ctx->ret0_fp_idx = i; 774 emit_mov_i(ARM_R0, k, ctx); 775 b_epilogue: 776 if (i != ctx->skf->len - 1) 777 emit(ARM_B(b_imm(prog->len, ctx)), ctx); 778 break; 779 case BPF_S_MISC_TAX: 780 /* X = A */ 781 ctx->seen |= SEEN_X; 782 emit(ARM_MOV_R(r_X, r_A), ctx); 783 break; 784 case BPF_S_MISC_TXA: 785 /* A = X */ 786 update_on_xread(ctx); 787 emit(ARM_MOV_R(r_A, r_X), ctx); 788 break; 789 case BPF_S_ANC_PROTOCOL: 790 /* A = ntohs(skb->protocol) */ 791 ctx->seen |= SEEN_SKB; 792 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 793 protocol) != 2); 794 off = offsetof(struct sk_buff, protocol); 795 emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); 796 emit_swap16(r_A, r_scratch, ctx); 797 break; 798 case BPF_S_ANC_CPU: 799 /* r_scratch = current_thread_info() */ 800 OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); 801 /* A = current_thread_info()->cpu */ 802 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); 803 off = offsetof(struct thread_info, cpu); 804 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 805 break; 806 case BPF_S_ANC_IFINDEX: 807 /* A = skb->dev->ifindex */ 808 ctx->seen |= SEEN_SKB; 809 off = offsetof(struct sk_buff, dev); 810 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 811 812 emit(ARM_CMP_I(r_scratch, 0), ctx); 813 emit_err_ret(ARM_COND_EQ, ctx); 814 815 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 816 ifindex) != 4); 817 off = offsetof(struct net_device, ifindex); 818 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 819 break; 820 case BPF_S_ANC_MARK: 821 ctx->seen |= SEEN_SKB; 822 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 823 off = offsetof(struct sk_buff, mark); 824 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 825 break; 826 case BPF_S_ANC_RXHASH: 827 ctx->seen |= SEEN_SKB; 828 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); 829 off = offsetof(struct sk_buff, rxhash); 830 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 831 break; 832 case BPF_S_ANC_VLAN_TAG: 833 case BPF_S_ANC_VLAN_TAG_PRESENT: 834 ctx->seen |= SEEN_SKB; 835 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); 836 off = offsetof(struct sk_buff, vlan_tci); 837 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 838 if (inst->code == BPF_S_ANC_VLAN_TAG) 839 OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); 840 else 841 OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); 842 break; 843 case BPF_S_ANC_QUEUE: 844 ctx->seen |= SEEN_SKB; 845 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 846 queue_mapping) != 2); 847 BUILD_BUG_ON(offsetof(struct sk_buff, 848 queue_mapping) > 0xff); 849 off = offsetof(struct sk_buff, queue_mapping); 850 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 851 break; 852 default: 853 return -1; 854 } 855 } 856 857 /* compute offsets only during the first pass */ 858 if (ctx->target == NULL) 859 ctx->offsets[i] = ctx->idx * 4; 860 861 return 0; 862 } 863 864 865 void bpf_jit_compile(struct sk_filter *fp) 866 { 867 struct jit_ctx ctx; 868 unsigned tmp_idx; 869 unsigned alloc_size; 870 871 if (!bpf_jit_enable) 872 return; 873 874 memset(&ctx, 0, sizeof(ctx)); 875 ctx.skf = fp; 876 ctx.ret0_fp_idx = -1; 877 878 ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL); 879 if (ctx.offsets == NULL) 880 return; 881 882 /* fake pass to fill in the ctx->seen */ 883 if (unlikely(build_body(&ctx))) 884 goto out; 885 886 tmp_idx = ctx.idx; 887 build_prologue(&ctx); 888 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 889 890 #if __LINUX_ARM_ARCH__ < 7 891 tmp_idx = ctx.idx; 892 build_epilogue(&ctx); 893 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 894 895 ctx.idx += ctx.imm_count; 896 if (ctx.imm_count) { 897 ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL); 898 if (ctx.imms == NULL) 899 goto out; 900 } 901 #else 902 /* there's nothing after the epilogue on ARMv7 */ 903 build_epilogue(&ctx); 904 #endif 905 906 alloc_size = 4 * ctx.idx; 907 ctx.target = module_alloc(alloc_size); 908 if (unlikely(ctx.target == NULL)) 909 goto out; 910 911 ctx.idx = 0; 912 build_prologue(&ctx); 913 build_body(&ctx); 914 build_epilogue(&ctx); 915 916 flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx)); 917 918 #if __LINUX_ARM_ARCH__ < 7 919 if (ctx.imm_count) 920 kfree(ctx.imms); 921 #endif 922 923 if (bpf_jit_enable > 1) 924 /* there are 2 passes here */ 925 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 926 927 fp->bpf_func = (void *)ctx.target; 928 out: 929 kfree(ctx.offsets); 930 return; 931 } 932 933 void bpf_jit_free(struct sk_filter *fp) 934 { 935 if (fp->bpf_func != sk_run_filter) 936 module_free(NULL, fp->bpf_func); 937 kfree(fp); 938 } 939