1 /* 2 * Just-In-Time compiler for BPF filters on 32bit ARM 3 * 4 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the 8 * Free Software Foundation; version 2 of the License. 9 */ 10 11 #include <linux/bitops.h> 12 #include <linux/compiler.h> 13 #include <linux/errno.h> 14 #include <linux/filter.h> 15 #include <linux/moduleloader.h> 16 #include <linux/netdevice.h> 17 #include <linux/string.h> 18 #include <linux/slab.h> 19 #include <asm/cacheflush.h> 20 #include <asm/hwcap.h> 21 22 #include "bpf_jit_32.h" 23 24 /* 25 * ABI: 26 * 27 * r0 scratch register 28 * r4 BPF register A 29 * r5 BPF register X 30 * r6 pointer to the skb 31 * r7 skb->data 32 * r8 skb_headlen(skb) 33 */ 34 35 #define r_scratch ARM_R0 36 /* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ 37 #define r_off ARM_R1 38 #define r_A ARM_R4 39 #define r_X ARM_R5 40 #define r_skb ARM_R6 41 #define r_skb_data ARM_R7 42 #define r_skb_hl ARM_R8 43 44 #define SCRATCH_SP_OFFSET 0 45 #define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + (k)) 46 47 #define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) 48 #define SEEN_MEM_WORD(k) (1 << (k)) 49 #define SEEN_X (1 << BPF_MEMWORDS) 50 #define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) 51 #define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) 52 #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) 53 54 #define FLAG_NEED_X_RESET (1 << 0) 55 56 struct jit_ctx { 57 const struct sk_filter *skf; 58 unsigned idx; 59 unsigned prologue_bytes; 60 int ret0_fp_idx; 61 u32 seen; 62 u32 flags; 63 u32 *offsets; 64 u32 *target; 65 #if __LINUX_ARM_ARCH__ < 7 66 u16 epilogue_bytes; 67 u16 imm_count; 68 u32 *imms; 69 #endif 70 }; 71 72 int bpf_jit_enable __read_mostly; 73 74 static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) 75 { 76 u8 ret; 77 int err; 78 79 err = skb_copy_bits(skb, offset, &ret, 1); 80 81 return (u64)err << 32 | ret; 82 } 83 84 static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) 85 { 86 u16 ret; 87 int err; 88 89 err = skb_copy_bits(skb, offset, &ret, 2); 90 91 return (u64)err << 32 | ntohs(ret); 92 } 93 94 static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) 95 { 96 u32 ret; 97 int err; 98 99 err = skb_copy_bits(skb, offset, &ret, 4); 100 101 return (u64)err << 32 | ntohl(ret); 102 } 103 104 /* 105 * Wrapper that handles both OABI and EABI and assures Thumb2 interworking 106 * (where the assembly routines like __aeabi_uidiv could cause problems). 107 */ 108 static u32 jit_udiv(u32 dividend, u32 divisor) 109 { 110 return dividend / divisor; 111 } 112 113 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) 114 { 115 if (ctx->target != NULL) 116 ctx->target[ctx->idx] = inst | (cond << 28); 117 118 ctx->idx++; 119 } 120 121 /* 122 * Emit an instruction that will be executed unconditionally. 123 */ 124 static inline void emit(u32 inst, struct jit_ctx *ctx) 125 { 126 _emit(ARM_COND_AL, inst, ctx); 127 } 128 129 static u16 saved_regs(struct jit_ctx *ctx) 130 { 131 u16 ret = 0; 132 133 if ((ctx->skf->len > 1) || 134 (ctx->skf->insns[0].code == BPF_S_RET_A)) 135 ret |= 1 << r_A; 136 137 #ifdef CONFIG_FRAME_POINTER 138 ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); 139 #else 140 if (ctx->seen & SEEN_CALL) 141 ret |= 1 << ARM_LR; 142 #endif 143 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 144 ret |= 1 << r_skb; 145 if (ctx->seen & SEEN_DATA) 146 ret |= (1 << r_skb_data) | (1 << r_skb_hl); 147 if (ctx->seen & SEEN_X) 148 ret |= 1 << r_X; 149 150 return ret; 151 } 152 153 static inline int mem_words_used(struct jit_ctx *ctx) 154 { 155 /* yes, we do waste some stack space IF there are "holes" in the set" */ 156 return fls(ctx->seen & SEEN_MEM); 157 } 158 159 static inline bool is_load_to_a(u16 inst) 160 { 161 switch (inst) { 162 case BPF_S_LD_W_LEN: 163 case BPF_S_LD_W_ABS: 164 case BPF_S_LD_H_ABS: 165 case BPF_S_LD_B_ABS: 166 case BPF_S_ANC_CPU: 167 case BPF_S_ANC_IFINDEX: 168 case BPF_S_ANC_MARK: 169 case BPF_S_ANC_PROTOCOL: 170 case BPF_S_ANC_RXHASH: 171 case BPF_S_ANC_QUEUE: 172 return true; 173 default: 174 return false; 175 } 176 } 177 178 static void build_prologue(struct jit_ctx *ctx) 179 { 180 u16 reg_set = saved_regs(ctx); 181 u16 first_inst = ctx->skf->insns[0].code; 182 u16 off; 183 184 #ifdef CONFIG_FRAME_POINTER 185 emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); 186 emit(ARM_PUSH(reg_set), ctx); 187 emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); 188 #else 189 if (reg_set) 190 emit(ARM_PUSH(reg_set), ctx); 191 #endif 192 193 if (ctx->seen & (SEEN_DATA | SEEN_SKB)) 194 emit(ARM_MOV_R(r_skb, ARM_R0), ctx); 195 196 if (ctx->seen & SEEN_DATA) { 197 off = offsetof(struct sk_buff, data); 198 emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); 199 /* headlen = len - data_len */ 200 off = offsetof(struct sk_buff, len); 201 emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); 202 off = offsetof(struct sk_buff, data_len); 203 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 204 emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); 205 } 206 207 if (ctx->flags & FLAG_NEED_X_RESET) 208 emit(ARM_MOV_I(r_X, 0), ctx); 209 210 /* do not leak kernel data to userspace */ 211 if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst))) 212 emit(ARM_MOV_I(r_A, 0), ctx); 213 214 /* stack space for the BPF_MEM words */ 215 if (ctx->seen & SEEN_MEM) 216 emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 217 } 218 219 static void build_epilogue(struct jit_ctx *ctx) 220 { 221 u16 reg_set = saved_regs(ctx); 222 223 if (ctx->seen & SEEN_MEM) 224 emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); 225 226 reg_set &= ~(1 << ARM_LR); 227 228 #ifdef CONFIG_FRAME_POINTER 229 /* the first instruction of the prologue was: mov ip, sp */ 230 reg_set &= ~(1 << ARM_IP); 231 reg_set |= (1 << ARM_SP); 232 emit(ARM_LDM(ARM_SP, reg_set), ctx); 233 #else 234 if (reg_set) { 235 if (ctx->seen & SEEN_CALL) 236 reg_set |= 1 << ARM_PC; 237 emit(ARM_POP(reg_set), ctx); 238 } 239 240 if (!(ctx->seen & SEEN_CALL)) 241 emit(ARM_BX(ARM_LR), ctx); 242 #endif 243 } 244 245 static int16_t imm8m(u32 x) 246 { 247 u32 rot; 248 249 for (rot = 0; rot < 16; rot++) 250 if ((x & ~ror32(0xff, 2 * rot)) == 0) 251 return rol32(x, 2 * rot) | (rot << 8); 252 253 return -1; 254 } 255 256 #if __LINUX_ARM_ARCH__ < 7 257 258 static u16 imm_offset(u32 k, struct jit_ctx *ctx) 259 { 260 unsigned i = 0, offset; 261 u16 imm; 262 263 /* on the "fake" run we just count them (duplicates included) */ 264 if (ctx->target == NULL) { 265 ctx->imm_count++; 266 return 0; 267 } 268 269 while ((i < ctx->imm_count) && ctx->imms[i]) { 270 if (ctx->imms[i] == k) 271 break; 272 i++; 273 } 274 275 if (ctx->imms[i] == 0) 276 ctx->imms[i] = k; 277 278 /* constants go just after the epilogue */ 279 offset = ctx->offsets[ctx->skf->len]; 280 offset += ctx->prologue_bytes; 281 offset += ctx->epilogue_bytes; 282 offset += i * 4; 283 284 ctx->target[offset / 4] = k; 285 286 /* PC in ARM mode == address of the instruction + 8 */ 287 imm = offset - (8 + ctx->idx * 4); 288 289 return imm; 290 } 291 292 #endif /* __LINUX_ARM_ARCH__ */ 293 294 /* 295 * Move an immediate that's not an imm8m to a core register. 296 */ 297 static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) 298 { 299 #if __LINUX_ARM_ARCH__ < 7 300 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); 301 #else 302 emit(ARM_MOVW(rd, val & 0xffff), ctx); 303 if (val > 0xffff) 304 emit(ARM_MOVT(rd, val >> 16), ctx); 305 #endif 306 } 307 308 static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) 309 { 310 int imm12 = imm8m(val); 311 312 if (imm12 >= 0) 313 emit(ARM_MOV_I(rd, imm12), ctx); 314 else 315 emit_mov_i_no8m(rd, val, ctx); 316 } 317 318 #if __LINUX_ARM_ARCH__ < 6 319 320 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 321 { 322 _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); 323 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 324 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); 325 _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); 326 _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); 327 _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); 328 _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); 329 _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); 330 } 331 332 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 333 { 334 _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); 335 _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); 336 _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); 337 } 338 339 static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) 340 { 341 emit(ARM_LSL_R(ARM_R1, r_src, 8), ctx); 342 emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSL, 8), ctx); 343 emit(ARM_LSL_I(r_dst, r_dst, 8), ctx); 344 emit(ARM_LSL_R(r_dst, r_dst, 8), ctx); 345 } 346 347 #else /* ARMv6+ */ 348 349 static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 350 { 351 _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); 352 #ifdef __LITTLE_ENDIAN 353 _emit(cond, ARM_REV(r_res, r_res), ctx); 354 #endif 355 } 356 357 static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) 358 { 359 _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); 360 #ifdef __LITTLE_ENDIAN 361 _emit(cond, ARM_REV16(r_res, r_res), ctx); 362 #endif 363 } 364 365 static inline void emit_swap16(u8 r_dst __maybe_unused, 366 u8 r_src __maybe_unused, 367 struct jit_ctx *ctx __maybe_unused) 368 { 369 #ifdef __LITTLE_ENDIAN 370 emit(ARM_REV16(r_dst, r_src), ctx); 371 #endif 372 } 373 374 #endif /* __LINUX_ARM_ARCH__ < 6 */ 375 376 377 /* Compute the immediate value for a PC-relative branch. */ 378 static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) 379 { 380 u32 imm; 381 382 if (ctx->target == NULL) 383 return 0; 384 /* 385 * BPF allows only forward jumps and the offset of the target is 386 * still the one computed during the first pass. 387 */ 388 imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); 389 390 return imm >> 2; 391 } 392 393 #define OP_IMM3(op, r1, r2, imm_val, ctx) \ 394 do { \ 395 imm12 = imm8m(imm_val); \ 396 if (imm12 < 0) { \ 397 emit_mov_i_no8m(r_scratch, imm_val, ctx); \ 398 emit(op ## _R((r1), (r2), r_scratch), ctx); \ 399 } else { \ 400 emit(op ## _I((r1), (r2), imm12), ctx); \ 401 } \ 402 } while (0) 403 404 static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) 405 { 406 if (ctx->ret0_fp_idx >= 0) { 407 _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); 408 /* NOP to keep the size constant between passes */ 409 emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); 410 } else { 411 _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); 412 _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx); 413 } 414 } 415 416 static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) 417 { 418 #if __LINUX_ARM_ARCH__ < 5 419 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); 420 421 if (elf_hwcap & HWCAP_THUMB) 422 emit(ARM_BX(tgt_reg), ctx); 423 else 424 emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); 425 #else 426 emit(ARM_BLX_R(tgt_reg), ctx); 427 #endif 428 } 429 430 static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) 431 { 432 #if __LINUX_ARM_ARCH__ == 7 433 if (elf_hwcap & HWCAP_IDIVA) { 434 emit(ARM_UDIV(rd, rm, rn), ctx); 435 return; 436 } 437 #endif 438 if (rm != ARM_R0) 439 emit(ARM_MOV_R(ARM_R0, rm), ctx); 440 if (rn != ARM_R1) 441 emit(ARM_MOV_R(ARM_R1, rn), ctx); 442 443 ctx->seen |= SEEN_CALL; 444 emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); 445 emit_blx_r(ARM_R3, ctx); 446 447 if (rd != ARM_R0) 448 emit(ARM_MOV_R(rd, ARM_R0), ctx); 449 } 450 451 static inline void update_on_xread(struct jit_ctx *ctx) 452 { 453 if (!(ctx->seen & SEEN_X)) 454 ctx->flags |= FLAG_NEED_X_RESET; 455 456 ctx->seen |= SEEN_X; 457 } 458 459 static int build_body(struct jit_ctx *ctx) 460 { 461 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 462 const struct sk_filter *prog = ctx->skf; 463 const struct sock_filter *inst; 464 unsigned i, load_order, off, condt; 465 int imm12; 466 u32 k; 467 468 for (i = 0; i < prog->len; i++) { 469 inst = &(prog->insns[i]); 470 /* K as an immediate value operand */ 471 k = inst->k; 472 473 /* compute offsets only in the fake pass */ 474 if (ctx->target == NULL) 475 ctx->offsets[i] = ctx->idx * 4; 476 477 switch (inst->code) { 478 case BPF_S_LD_IMM: 479 emit_mov_i(r_A, k, ctx); 480 break; 481 case BPF_S_LD_W_LEN: 482 ctx->seen |= SEEN_SKB; 483 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 484 emit(ARM_LDR_I(r_A, r_skb, 485 offsetof(struct sk_buff, len)), ctx); 486 break; 487 case BPF_S_LD_MEM: 488 /* A = scratch[k] */ 489 ctx->seen |= SEEN_MEM_WORD(k); 490 emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 491 break; 492 case BPF_S_LD_W_ABS: 493 load_order = 2; 494 goto load; 495 case BPF_S_LD_H_ABS: 496 load_order = 1; 497 goto load; 498 case BPF_S_LD_B_ABS: 499 load_order = 0; 500 load: 501 /* the interpreter will deal with the negative K */ 502 if ((int)k < 0) 503 return -ENOTSUPP; 504 emit_mov_i(r_off, k, ctx); 505 load_common: 506 ctx->seen |= SEEN_DATA | SEEN_CALL; 507 508 if (load_order > 0) { 509 emit(ARM_SUB_I(r_scratch, r_skb_hl, 510 1 << load_order), ctx); 511 emit(ARM_CMP_R(r_scratch, r_off), ctx); 512 condt = ARM_COND_HS; 513 } else { 514 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 515 condt = ARM_COND_HI; 516 } 517 518 _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), 519 ctx); 520 521 if (load_order == 0) 522 _emit(condt, ARM_LDRB_I(r_A, r_scratch, 0), 523 ctx); 524 else if (load_order == 1) 525 emit_load_be16(condt, r_A, r_scratch, ctx); 526 else if (load_order == 2) 527 emit_load_be32(condt, r_A, r_scratch, ctx); 528 529 _emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx); 530 531 /* the slowpath */ 532 emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx); 533 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 534 /* the offset is already in R1 */ 535 emit_blx_r(ARM_R3, ctx); 536 /* check the result of skb_copy_bits */ 537 emit(ARM_CMP_I(ARM_R1, 0), ctx); 538 emit_err_ret(ARM_COND_NE, ctx); 539 emit(ARM_MOV_R(r_A, ARM_R0), ctx); 540 break; 541 case BPF_S_LD_W_IND: 542 load_order = 2; 543 goto load_ind; 544 case BPF_S_LD_H_IND: 545 load_order = 1; 546 goto load_ind; 547 case BPF_S_LD_B_IND: 548 load_order = 0; 549 load_ind: 550 OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); 551 goto load_common; 552 case BPF_S_LDX_IMM: 553 ctx->seen |= SEEN_X; 554 emit_mov_i(r_X, k, ctx); 555 break; 556 case BPF_S_LDX_W_LEN: 557 ctx->seen |= SEEN_X | SEEN_SKB; 558 emit(ARM_LDR_I(r_X, r_skb, 559 offsetof(struct sk_buff, len)), ctx); 560 break; 561 case BPF_S_LDX_MEM: 562 ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); 563 emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 564 break; 565 case BPF_S_LDX_B_MSH: 566 /* x = ((*(frame + k)) & 0xf) << 2; */ 567 ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; 568 /* the interpreter should deal with the negative K */ 569 if (k < 0) 570 return -1; 571 /* offset in r1: we might have to take the slow path */ 572 emit_mov_i(r_off, k, ctx); 573 emit(ARM_CMP_R(r_skb_hl, r_off), ctx); 574 575 /* load in r0: common with the slowpath */ 576 _emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data, 577 ARM_R1), ctx); 578 /* 579 * emit_mov_i() might generate one or two instructions, 580 * the same holds for emit_blx_r() 581 */ 582 _emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx); 583 584 emit(ARM_MOV_R(ARM_R0, r_skb), ctx); 585 /* r_off is r1 */ 586 emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx); 587 emit_blx_r(ARM_R3, ctx); 588 /* check the return value of skb_copy_bits */ 589 emit(ARM_CMP_I(ARM_R1, 0), ctx); 590 emit_err_ret(ARM_COND_NE, ctx); 591 592 emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); 593 emit(ARM_LSL_I(r_X, r_X, 2), ctx); 594 break; 595 case BPF_S_ST: 596 ctx->seen |= SEEN_MEM_WORD(k); 597 emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); 598 break; 599 case BPF_S_STX: 600 update_on_xread(ctx); 601 ctx->seen |= SEEN_MEM_WORD(k); 602 emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); 603 break; 604 case BPF_S_ALU_ADD_K: 605 /* A += K */ 606 OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); 607 break; 608 case BPF_S_ALU_ADD_X: 609 update_on_xread(ctx); 610 emit(ARM_ADD_R(r_A, r_A, r_X), ctx); 611 break; 612 case BPF_S_ALU_SUB_K: 613 /* A -= K */ 614 OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); 615 break; 616 case BPF_S_ALU_SUB_X: 617 update_on_xread(ctx); 618 emit(ARM_SUB_R(r_A, r_A, r_X), ctx); 619 break; 620 case BPF_S_ALU_MUL_K: 621 /* A *= K */ 622 emit_mov_i(r_scratch, k, ctx); 623 emit(ARM_MUL(r_A, r_A, r_scratch), ctx); 624 break; 625 case BPF_S_ALU_MUL_X: 626 update_on_xread(ctx); 627 emit(ARM_MUL(r_A, r_A, r_X), ctx); 628 break; 629 case BPF_S_ALU_DIV_K: 630 /* current k == reciprocal_value(userspace k) */ 631 emit_mov_i(r_scratch, k, ctx); 632 /* A = top 32 bits of the product */ 633 emit(ARM_UMULL(r_scratch, r_A, r_A, r_scratch), ctx); 634 break; 635 case BPF_S_ALU_DIV_X: 636 update_on_xread(ctx); 637 emit(ARM_CMP_I(r_X, 0), ctx); 638 emit_err_ret(ARM_COND_EQ, ctx); 639 emit_udiv(r_A, r_A, r_X, ctx); 640 break; 641 case BPF_S_ALU_OR_K: 642 /* A |= K */ 643 OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); 644 break; 645 case BPF_S_ALU_OR_X: 646 update_on_xread(ctx); 647 emit(ARM_ORR_R(r_A, r_A, r_X), ctx); 648 break; 649 case BPF_S_ALU_AND_K: 650 /* A &= K */ 651 OP_IMM3(ARM_AND, r_A, r_A, k, ctx); 652 break; 653 case BPF_S_ALU_AND_X: 654 update_on_xread(ctx); 655 emit(ARM_AND_R(r_A, r_A, r_X), ctx); 656 break; 657 case BPF_S_ALU_LSH_K: 658 if (unlikely(k > 31)) 659 return -1; 660 emit(ARM_LSL_I(r_A, r_A, k), ctx); 661 break; 662 case BPF_S_ALU_LSH_X: 663 update_on_xread(ctx); 664 emit(ARM_LSL_R(r_A, r_A, r_X), ctx); 665 break; 666 case BPF_S_ALU_RSH_K: 667 if (unlikely(k > 31)) 668 return -1; 669 emit(ARM_LSR_I(r_A, r_A, k), ctx); 670 break; 671 case BPF_S_ALU_RSH_X: 672 update_on_xread(ctx); 673 emit(ARM_LSR_R(r_A, r_A, r_X), ctx); 674 break; 675 case BPF_S_ALU_NEG: 676 /* A = -A */ 677 emit(ARM_RSB_I(r_A, r_A, 0), ctx); 678 break; 679 case BPF_S_JMP_JA: 680 /* pc += K */ 681 emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); 682 break; 683 case BPF_S_JMP_JEQ_K: 684 /* pc += (A == K) ? pc->jt : pc->jf */ 685 condt = ARM_COND_EQ; 686 goto cmp_imm; 687 case BPF_S_JMP_JGT_K: 688 /* pc += (A > K) ? pc->jt : pc->jf */ 689 condt = ARM_COND_HI; 690 goto cmp_imm; 691 case BPF_S_JMP_JGE_K: 692 /* pc += (A >= K) ? pc->jt : pc->jf */ 693 condt = ARM_COND_HS; 694 cmp_imm: 695 imm12 = imm8m(k); 696 if (imm12 < 0) { 697 emit_mov_i_no8m(r_scratch, k, ctx); 698 emit(ARM_CMP_R(r_A, r_scratch), ctx); 699 } else { 700 emit(ARM_CMP_I(r_A, imm12), ctx); 701 } 702 cond_jump: 703 if (inst->jt) 704 _emit(condt, ARM_B(b_imm(i + inst->jt + 1, 705 ctx)), ctx); 706 if (inst->jf) 707 _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, 708 ctx)), ctx); 709 break; 710 case BPF_S_JMP_JEQ_X: 711 /* pc += (A == X) ? pc->jt : pc->jf */ 712 condt = ARM_COND_EQ; 713 goto cmp_x; 714 case BPF_S_JMP_JGT_X: 715 /* pc += (A > X) ? pc->jt : pc->jf */ 716 condt = ARM_COND_HI; 717 goto cmp_x; 718 case BPF_S_JMP_JGE_X: 719 /* pc += (A >= X) ? pc->jt : pc->jf */ 720 condt = ARM_COND_CS; 721 cmp_x: 722 update_on_xread(ctx); 723 emit(ARM_CMP_R(r_A, r_X), ctx); 724 goto cond_jump; 725 case BPF_S_JMP_JSET_K: 726 /* pc += (A & K) ? pc->jt : pc->jf */ 727 condt = ARM_COND_NE; 728 /* not set iff all zeroes iff Z==1 iff EQ */ 729 730 imm12 = imm8m(k); 731 if (imm12 < 0) { 732 emit_mov_i_no8m(r_scratch, k, ctx); 733 emit(ARM_TST_R(r_A, r_scratch), ctx); 734 } else { 735 emit(ARM_TST_I(r_A, imm12), ctx); 736 } 737 goto cond_jump; 738 case BPF_S_JMP_JSET_X: 739 /* pc += (A & X) ? pc->jt : pc->jf */ 740 update_on_xread(ctx); 741 condt = ARM_COND_NE; 742 emit(ARM_TST_R(r_A, r_X), ctx); 743 goto cond_jump; 744 case BPF_S_RET_A: 745 emit(ARM_MOV_R(ARM_R0, r_A), ctx); 746 goto b_epilogue; 747 case BPF_S_RET_K: 748 if ((k == 0) && (ctx->ret0_fp_idx < 0)) 749 ctx->ret0_fp_idx = i; 750 emit_mov_i(ARM_R0, k, ctx); 751 b_epilogue: 752 if (i != ctx->skf->len - 1) 753 emit(ARM_B(b_imm(prog->len, ctx)), ctx); 754 break; 755 case BPF_S_MISC_TAX: 756 /* X = A */ 757 ctx->seen |= SEEN_X; 758 emit(ARM_MOV_R(r_X, r_A), ctx); 759 break; 760 case BPF_S_MISC_TXA: 761 /* A = X */ 762 update_on_xread(ctx); 763 emit(ARM_MOV_R(r_A, r_X), ctx); 764 break; 765 case BPF_S_ANC_PROTOCOL: 766 /* A = ntohs(skb->protocol) */ 767 ctx->seen |= SEEN_SKB; 768 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 769 protocol) != 2); 770 off = offsetof(struct sk_buff, protocol); 771 emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); 772 emit_swap16(r_A, r_scratch, ctx); 773 break; 774 case BPF_S_ANC_CPU: 775 /* r_scratch = current_thread_info() */ 776 OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); 777 /* A = current_thread_info()->cpu */ 778 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); 779 off = offsetof(struct thread_info, cpu); 780 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 781 break; 782 case BPF_S_ANC_IFINDEX: 783 /* A = skb->dev->ifindex */ 784 ctx->seen |= SEEN_SKB; 785 off = offsetof(struct sk_buff, dev); 786 emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); 787 788 emit(ARM_CMP_I(r_scratch, 0), ctx); 789 emit_err_ret(ARM_COND_EQ, ctx); 790 791 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 792 ifindex) != 4); 793 off = offsetof(struct net_device, ifindex); 794 emit(ARM_LDR_I(r_A, r_scratch, off), ctx); 795 break; 796 case BPF_S_ANC_MARK: 797 ctx->seen |= SEEN_SKB; 798 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 799 off = offsetof(struct sk_buff, mark); 800 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 801 break; 802 case BPF_S_ANC_RXHASH: 803 ctx->seen |= SEEN_SKB; 804 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); 805 off = offsetof(struct sk_buff, rxhash); 806 emit(ARM_LDR_I(r_A, r_skb, off), ctx); 807 break; 808 case BPF_S_ANC_QUEUE: 809 ctx->seen |= SEEN_SKB; 810 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 811 queue_mapping) != 2); 812 BUILD_BUG_ON(offsetof(struct sk_buff, 813 queue_mapping) > 0xff); 814 off = offsetof(struct sk_buff, queue_mapping); 815 emit(ARM_LDRH_I(r_A, r_skb, off), ctx); 816 break; 817 default: 818 return -1; 819 } 820 } 821 822 /* compute offsets only during the first pass */ 823 if (ctx->target == NULL) 824 ctx->offsets[i] = ctx->idx * 4; 825 826 return 0; 827 } 828 829 830 void bpf_jit_compile(struct sk_filter *fp) 831 { 832 struct jit_ctx ctx; 833 unsigned tmp_idx; 834 unsigned alloc_size; 835 836 if (!bpf_jit_enable) 837 return; 838 839 memset(&ctx, 0, sizeof(ctx)); 840 ctx.skf = fp; 841 ctx.ret0_fp_idx = -1; 842 843 ctx.offsets = kzalloc(GFP_KERNEL, 4 * (ctx.skf->len + 1)); 844 if (ctx.offsets == NULL) 845 return; 846 847 /* fake pass to fill in the ctx->seen */ 848 if (unlikely(build_body(&ctx))) 849 goto out; 850 851 tmp_idx = ctx.idx; 852 build_prologue(&ctx); 853 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 854 855 #if __LINUX_ARM_ARCH__ < 7 856 tmp_idx = ctx.idx; 857 build_epilogue(&ctx); 858 ctx.epilogue_bytes = (ctx.idx - tmp_idx) * 4; 859 860 ctx.idx += ctx.imm_count; 861 if (ctx.imm_count) { 862 ctx.imms = kzalloc(GFP_KERNEL, 4 * ctx.imm_count); 863 if (ctx.imms == NULL) 864 goto out; 865 } 866 #else 867 /* there's nothing after the epilogue on ARMv7 */ 868 build_epilogue(&ctx); 869 #endif 870 871 alloc_size = 4 * ctx.idx; 872 ctx.target = module_alloc(max(sizeof(struct work_struct), 873 alloc_size)); 874 if (unlikely(ctx.target == NULL)) 875 goto out; 876 877 ctx.idx = 0; 878 build_prologue(&ctx); 879 build_body(&ctx); 880 build_epilogue(&ctx); 881 882 flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx)); 883 884 #if __LINUX_ARM_ARCH__ < 7 885 if (ctx.imm_count) 886 kfree(ctx.imms); 887 #endif 888 889 if (bpf_jit_enable > 1) 890 print_hex_dump(KERN_INFO, "BPF JIT code: ", 891 DUMP_PREFIX_ADDRESS, 16, 4, ctx.target, 892 alloc_size, false); 893 894 fp->bpf_func = (void *)ctx.target; 895 out: 896 kfree(ctx.offsets); 897 return; 898 } 899 900 static void bpf_jit_free_worker(struct work_struct *work) 901 { 902 module_free(NULL, work); 903 } 904 905 void bpf_jit_free(struct sk_filter *fp) 906 { 907 struct work_struct *work; 908 909 if (fp->bpf_func != sk_run_filter) { 910 work = (struct work_struct *)fp->bpf_func; 911 912 INIT_WORK(work, bpf_jit_free_worker); 913 schedule_work(work); 914 } 915 } 916