1 /* 2 * bpf_jit_comp.c: BPF JIT compiler 3 * 4 * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 5 * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 */ 12 #include <linux/netdevice.h> 13 #include <linux/filter.h> 14 #include <linux/if_vlan.h> 15 #include <linux/bpf.h> 16 17 #include <asm/set_memory.h> 18 #include <asm/nospec-branch.h> 19 20 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 21 { 22 if (len == 1) 23 *ptr = bytes; 24 else if (len == 2) 25 *(u16 *)ptr = bytes; 26 else { 27 *(u32 *)ptr = bytes; 28 barrier(); 29 } 30 return ptr + len; 31 } 32 33 #define EMIT(bytes, len) \ 34 do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) 35 36 #define EMIT1(b1) EMIT(b1, 1) 37 #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 38 #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 39 #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 40 41 #define EMIT1_off32(b1, off) \ 42 do { EMIT1(b1); EMIT(off, 4); } while (0) 43 #define EMIT2_off32(b1, b2, off) \ 44 do { EMIT2(b1, b2); EMIT(off, 4); } while (0) 45 #define EMIT3_off32(b1, b2, b3, off) \ 46 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) 47 #define EMIT4_off32(b1, b2, b3, b4, off) \ 48 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) 49 50 static bool is_imm8(int value) 51 { 52 return value <= 127 && value >= -128; 53 } 54 55 static bool is_simm32(s64 value) 56 { 57 return value == (s64)(s32)value; 58 } 59 60 static bool is_uimm32(u64 value) 61 { 62 return value == (u64)(u32)value; 63 } 64 65 /* mov dst, src */ 66 #define EMIT_mov(DST, SRC) \ 67 do { \ 68 if (DST != SRC) \ 69 EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ 70 } while (0) 71 72 static int bpf_size_to_x86_bytes(int bpf_size) 73 { 74 if (bpf_size == BPF_W) 75 return 4; 76 else if (bpf_size == BPF_H) 77 return 2; 78 else if (bpf_size == BPF_B) 79 return 1; 80 else if (bpf_size == BPF_DW) 81 return 4; /* imm32 */ 82 else 83 return 0; 84 } 85 86 /* 87 * List of x86 cond jumps opcodes (. + s8) 88 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 89 */ 90 #define X86_JB 0x72 91 #define X86_JAE 0x73 92 #define X86_JE 0x74 93 #define X86_JNE 0x75 94 #define X86_JBE 0x76 95 #define X86_JA 0x77 96 #define X86_JL 0x7C 97 #define X86_JGE 0x7D 98 #define X86_JLE 0x7E 99 #define X86_JG 0x7F 100 101 /* Pick a register outside of BPF range for JIT internal work */ 102 #define AUX_REG (MAX_BPF_JIT_REG + 1) 103 104 /* 105 * The following table maps BPF registers to x86-64 registers. 106 * 107 * x86-64 register R12 is unused, since if used as base address 108 * register in load/store instructions, it always needs an 109 * extra byte of encoding and is callee saved. 110 * 111 * Also x86-64 register R9 is unused. x86-64 register R10 is 112 * used for blinding (if enabled). 113 */ 114 static const int reg2hex[] = { 115 [BPF_REG_0] = 0, /* RAX */ 116 [BPF_REG_1] = 7, /* RDI */ 117 [BPF_REG_2] = 6, /* RSI */ 118 [BPF_REG_3] = 2, /* RDX */ 119 [BPF_REG_4] = 1, /* RCX */ 120 [BPF_REG_5] = 0, /* R8 */ 121 [BPF_REG_6] = 3, /* RBX callee saved */ 122 [BPF_REG_7] = 5, /* R13 callee saved */ 123 [BPF_REG_8] = 6, /* R14 callee saved */ 124 [BPF_REG_9] = 7, /* R15 callee saved */ 125 [BPF_REG_FP] = 5, /* RBP readonly */ 126 [BPF_REG_AX] = 2, /* R10 temp register */ 127 [AUX_REG] = 3, /* R11 temp register */ 128 }; 129 130 /* 131 * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 132 * which need extra byte of encoding. 133 * rax,rcx,...,rbp have simpler encoding 134 */ 135 static bool is_ereg(u32 reg) 136 { 137 return (1 << reg) & (BIT(BPF_REG_5) | 138 BIT(AUX_REG) | 139 BIT(BPF_REG_7) | 140 BIT(BPF_REG_8) | 141 BIT(BPF_REG_9) | 142 BIT(BPF_REG_AX)); 143 } 144 145 static bool is_axreg(u32 reg) 146 { 147 return reg == BPF_REG_0; 148 } 149 150 /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ 151 static u8 add_1mod(u8 byte, u32 reg) 152 { 153 if (is_ereg(reg)) 154 byte |= 1; 155 return byte; 156 } 157 158 static u8 add_2mod(u8 byte, u32 r1, u32 r2) 159 { 160 if (is_ereg(r1)) 161 byte |= 1; 162 if (is_ereg(r2)) 163 byte |= 4; 164 return byte; 165 } 166 167 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ 168 static u8 add_1reg(u8 byte, u32 dst_reg) 169 { 170 return byte + reg2hex[dst_reg]; 171 } 172 173 /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ 174 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) 175 { 176 return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); 177 } 178 179 static void jit_fill_hole(void *area, unsigned int size) 180 { 181 /* Fill whole space with INT3 instructions */ 182 memset(area, 0xcc, size); 183 } 184 185 struct jit_context { 186 int cleanup_addr; /* Epilogue code offset */ 187 }; 188 189 /* Maximum number of bytes emitted while JITing one eBPF insn */ 190 #define BPF_MAX_INSN_SIZE 128 191 #define BPF_INSN_SAFETY 64 192 193 #define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ 194 195 #define PROLOGUE_SIZE 37 196 197 /* 198 * Emit x86-64 prologue code for BPF program and check its size. 199 * bpf_tail_call helper will skip it while jumping into another program 200 */ 201 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) 202 { 203 u8 *prog = *pprog; 204 int cnt = 0; 205 206 /* push rbp */ 207 EMIT1(0x55); 208 209 /* mov rbp,rsp */ 210 EMIT3(0x48, 0x89, 0xE5); 211 212 /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ 213 EMIT3_off32(0x48, 0x81, 0xEC, 214 round_up(stack_depth, 8) + AUX_STACK_SPACE); 215 216 /* sub rbp, AUX_STACK_SPACE */ 217 EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); 218 219 /* mov qword ptr [rbp+0],rbx */ 220 EMIT4(0x48, 0x89, 0x5D, 0); 221 /* mov qword ptr [rbp+8],r13 */ 222 EMIT4(0x4C, 0x89, 0x6D, 8); 223 /* mov qword ptr [rbp+16],r14 */ 224 EMIT4(0x4C, 0x89, 0x75, 16); 225 /* mov qword ptr [rbp+24],r15 */ 226 EMIT4(0x4C, 0x89, 0x7D, 24); 227 228 if (!ebpf_from_cbpf) { 229 /* 230 * Clear the tail call counter (tail_call_cnt): for eBPF tail 231 * calls we need to reset the counter to 0. It's done in two 232 * instructions, resetting RAX register to 0, and moving it 233 * to the counter location. 234 */ 235 236 /* xor eax, eax */ 237 EMIT2(0x31, 0xc0); 238 /* mov qword ptr [rbp+32], rax */ 239 EMIT4(0x48, 0x89, 0x45, 32); 240 241 BUILD_BUG_ON(cnt != PROLOGUE_SIZE); 242 } 243 244 *pprog = prog; 245 } 246 247 /* 248 * Generate the following code: 249 * 250 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... 251 * if (index >= array->map.max_entries) 252 * goto out; 253 * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) 254 * goto out; 255 * prog = array->ptrs[index]; 256 * if (prog == NULL) 257 * goto out; 258 * goto *(prog->bpf_func + prologue_size); 259 * out: 260 */ 261 static void emit_bpf_tail_call(u8 **pprog) 262 { 263 u8 *prog = *pprog; 264 int label1, label2, label3; 265 int cnt = 0; 266 267 /* 268 * rdi - pointer to ctx 269 * rsi - pointer to bpf_array 270 * rdx - index in bpf_array 271 */ 272 273 /* 274 * if (index >= array->map.max_entries) 275 * goto out; 276 */ 277 EMIT2(0x89, 0xD2); /* mov edx, edx */ 278 EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ 279 offsetof(struct bpf_array, map.max_entries)); 280 #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ 281 EMIT2(X86_JBE, OFFSET1); /* jbe out */ 282 label1 = cnt; 283 284 /* 285 * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 286 * goto out; 287 */ 288 EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ 289 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 290 #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) 291 EMIT2(X86_JA, OFFSET2); /* ja out */ 292 label2 = cnt; 293 EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 294 EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */ 295 296 /* prog = array->ptrs[index]; */ 297 EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ 298 offsetof(struct bpf_array, ptrs)); 299 300 /* 301 * if (prog == NULL) 302 * goto out; 303 */ 304 EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ 305 #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) 306 EMIT2(X86_JE, OFFSET3); /* je out */ 307 label3 = cnt; 308 309 /* goto *(prog->bpf_func + prologue_size); */ 310 EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */ 311 offsetof(struct bpf_prog, bpf_func)); 312 EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ 313 314 /* 315 * Wow we're ready to jump into next BPF program 316 * rdi == ctx (1st arg) 317 * rax == prog->bpf_func + prologue_size 318 */ 319 RETPOLINE_RAX_BPF_JIT(); 320 321 /* out: */ 322 BUILD_BUG_ON(cnt - label1 != OFFSET1); 323 BUILD_BUG_ON(cnt - label2 != OFFSET2); 324 BUILD_BUG_ON(cnt - label3 != OFFSET3); 325 *pprog = prog; 326 } 327 328 static void emit_mov_imm32(u8 **pprog, bool sign_propagate, 329 u32 dst_reg, const u32 imm32) 330 { 331 u8 *prog = *pprog; 332 u8 b1, b2, b3; 333 int cnt = 0; 334 335 /* 336 * Optimization: if imm32 is positive, use 'mov %eax, imm32' 337 * (which zero-extends imm32) to save 2 bytes. 338 */ 339 if (sign_propagate && (s32)imm32 < 0) { 340 /* 'mov %rax, imm32' sign extends imm32 */ 341 b1 = add_1mod(0x48, dst_reg); 342 b2 = 0xC7; 343 b3 = 0xC0; 344 EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); 345 goto done; 346 } 347 348 /* 349 * Optimization: if imm32 is zero, use 'xor %eax, %eax' 350 * to save 3 bytes. 351 */ 352 if (imm32 == 0) { 353 if (is_ereg(dst_reg)) 354 EMIT1(add_2mod(0x40, dst_reg, dst_reg)); 355 b2 = 0x31; /* xor */ 356 b3 = 0xC0; 357 EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); 358 goto done; 359 } 360 361 /* mov %eax, imm32 */ 362 if (is_ereg(dst_reg)) 363 EMIT1(add_1mod(0x40, dst_reg)); 364 EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); 365 done: 366 *pprog = prog; 367 } 368 369 static void emit_mov_imm64(u8 **pprog, u32 dst_reg, 370 const u32 imm32_hi, const u32 imm32_lo) 371 { 372 u8 *prog = *pprog; 373 int cnt = 0; 374 375 if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { 376 /* 377 * For emitting plain u32, where sign bit must not be 378 * propagated LLVM tends to load imm64 over mov32 379 * directly, so save couple of bytes by just doing 380 * 'mov %eax, imm32' instead. 381 */ 382 emit_mov_imm32(&prog, false, dst_reg, imm32_lo); 383 } else { 384 /* movabsq %rax, imm64 */ 385 EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); 386 EMIT(imm32_lo, 4); 387 EMIT(imm32_hi, 4); 388 } 389 390 *pprog = prog; 391 } 392 393 static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) 394 { 395 u8 *prog = *pprog; 396 int cnt = 0; 397 398 if (is64) { 399 /* mov dst, src */ 400 EMIT_mov(dst_reg, src_reg); 401 } else { 402 /* mov32 dst, src */ 403 if (is_ereg(dst_reg) || is_ereg(src_reg)) 404 EMIT1(add_2mod(0x40, dst_reg, src_reg)); 405 EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); 406 } 407 408 *pprog = prog; 409 } 410 411 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, 412 int oldproglen, struct jit_context *ctx) 413 { 414 struct bpf_insn *insn = bpf_prog->insnsi; 415 int insn_cnt = bpf_prog->len; 416 bool seen_exit = false; 417 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; 418 int i, cnt = 0; 419 int proglen = 0; 420 u8 *prog = temp; 421 422 emit_prologue(&prog, bpf_prog->aux->stack_depth, 423 bpf_prog_was_classic(bpf_prog)); 424 425 for (i = 0; i < insn_cnt; i++, insn++) { 426 const s32 imm32 = insn->imm; 427 u32 dst_reg = insn->dst_reg; 428 u32 src_reg = insn->src_reg; 429 u8 b2 = 0, b3 = 0; 430 s64 jmp_offset; 431 u8 jmp_cond; 432 int ilen; 433 u8 *func; 434 435 switch (insn->code) { 436 /* ALU */ 437 case BPF_ALU | BPF_ADD | BPF_X: 438 case BPF_ALU | BPF_SUB | BPF_X: 439 case BPF_ALU | BPF_AND | BPF_X: 440 case BPF_ALU | BPF_OR | BPF_X: 441 case BPF_ALU | BPF_XOR | BPF_X: 442 case BPF_ALU64 | BPF_ADD | BPF_X: 443 case BPF_ALU64 | BPF_SUB | BPF_X: 444 case BPF_ALU64 | BPF_AND | BPF_X: 445 case BPF_ALU64 | BPF_OR | BPF_X: 446 case BPF_ALU64 | BPF_XOR | BPF_X: 447 switch (BPF_OP(insn->code)) { 448 case BPF_ADD: b2 = 0x01; break; 449 case BPF_SUB: b2 = 0x29; break; 450 case BPF_AND: b2 = 0x21; break; 451 case BPF_OR: b2 = 0x09; break; 452 case BPF_XOR: b2 = 0x31; break; 453 } 454 if (BPF_CLASS(insn->code) == BPF_ALU64) 455 EMIT1(add_2mod(0x48, dst_reg, src_reg)); 456 else if (is_ereg(dst_reg) || is_ereg(src_reg)) 457 EMIT1(add_2mod(0x40, dst_reg, src_reg)); 458 EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); 459 break; 460 461 case BPF_ALU64 | BPF_MOV | BPF_X: 462 case BPF_ALU | BPF_MOV | BPF_X: 463 emit_mov_reg(&prog, 464 BPF_CLASS(insn->code) == BPF_ALU64, 465 dst_reg, src_reg); 466 break; 467 468 /* neg dst */ 469 case BPF_ALU | BPF_NEG: 470 case BPF_ALU64 | BPF_NEG: 471 if (BPF_CLASS(insn->code) == BPF_ALU64) 472 EMIT1(add_1mod(0x48, dst_reg)); 473 else if (is_ereg(dst_reg)) 474 EMIT1(add_1mod(0x40, dst_reg)); 475 EMIT2(0xF7, add_1reg(0xD8, dst_reg)); 476 break; 477 478 case BPF_ALU | BPF_ADD | BPF_K: 479 case BPF_ALU | BPF_SUB | BPF_K: 480 case BPF_ALU | BPF_AND | BPF_K: 481 case BPF_ALU | BPF_OR | BPF_K: 482 case BPF_ALU | BPF_XOR | BPF_K: 483 case BPF_ALU64 | BPF_ADD | BPF_K: 484 case BPF_ALU64 | BPF_SUB | BPF_K: 485 case BPF_ALU64 | BPF_AND | BPF_K: 486 case BPF_ALU64 | BPF_OR | BPF_K: 487 case BPF_ALU64 | BPF_XOR | BPF_K: 488 if (BPF_CLASS(insn->code) == BPF_ALU64) 489 EMIT1(add_1mod(0x48, dst_reg)); 490 else if (is_ereg(dst_reg)) 491 EMIT1(add_1mod(0x40, dst_reg)); 492 493 /* 494 * b3 holds 'normal' opcode, b2 short form only valid 495 * in case dst is eax/rax. 496 */ 497 switch (BPF_OP(insn->code)) { 498 case BPF_ADD: 499 b3 = 0xC0; 500 b2 = 0x05; 501 break; 502 case BPF_SUB: 503 b3 = 0xE8; 504 b2 = 0x2D; 505 break; 506 case BPF_AND: 507 b3 = 0xE0; 508 b2 = 0x25; 509 break; 510 case BPF_OR: 511 b3 = 0xC8; 512 b2 = 0x0D; 513 break; 514 case BPF_XOR: 515 b3 = 0xF0; 516 b2 = 0x35; 517 break; 518 } 519 520 if (is_imm8(imm32)) 521 EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 522 else if (is_axreg(dst_reg)) 523 EMIT1_off32(b2, imm32); 524 else 525 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 526 break; 527 528 case BPF_ALU64 | BPF_MOV | BPF_K: 529 case BPF_ALU | BPF_MOV | BPF_K: 530 emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, 531 dst_reg, imm32); 532 break; 533 534 case BPF_LD | BPF_IMM | BPF_DW: 535 emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); 536 insn++; 537 i++; 538 break; 539 540 /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ 541 case BPF_ALU | BPF_MOD | BPF_X: 542 case BPF_ALU | BPF_DIV | BPF_X: 543 case BPF_ALU | BPF_MOD | BPF_K: 544 case BPF_ALU | BPF_DIV | BPF_K: 545 case BPF_ALU64 | BPF_MOD | BPF_X: 546 case BPF_ALU64 | BPF_DIV | BPF_X: 547 case BPF_ALU64 | BPF_MOD | BPF_K: 548 case BPF_ALU64 | BPF_DIV | BPF_K: 549 EMIT1(0x50); /* push rax */ 550 EMIT1(0x52); /* push rdx */ 551 552 if (BPF_SRC(insn->code) == BPF_X) 553 /* mov r11, src_reg */ 554 EMIT_mov(AUX_REG, src_reg); 555 else 556 /* mov r11, imm32 */ 557 EMIT3_off32(0x49, 0xC7, 0xC3, imm32); 558 559 /* mov rax, dst_reg */ 560 EMIT_mov(BPF_REG_0, dst_reg); 561 562 /* 563 * xor edx, edx 564 * equivalent to 'xor rdx, rdx', but one byte less 565 */ 566 EMIT2(0x31, 0xd2); 567 568 if (BPF_CLASS(insn->code) == BPF_ALU64) 569 /* div r11 */ 570 EMIT3(0x49, 0xF7, 0xF3); 571 else 572 /* div r11d */ 573 EMIT3(0x41, 0xF7, 0xF3); 574 575 if (BPF_OP(insn->code) == BPF_MOD) 576 /* mov r11, rdx */ 577 EMIT3(0x49, 0x89, 0xD3); 578 else 579 /* mov r11, rax */ 580 EMIT3(0x49, 0x89, 0xC3); 581 582 EMIT1(0x5A); /* pop rdx */ 583 EMIT1(0x58); /* pop rax */ 584 585 /* mov dst_reg, r11 */ 586 EMIT_mov(dst_reg, AUX_REG); 587 break; 588 589 case BPF_ALU | BPF_MUL | BPF_K: 590 case BPF_ALU | BPF_MUL | BPF_X: 591 case BPF_ALU64 | BPF_MUL | BPF_K: 592 case BPF_ALU64 | BPF_MUL | BPF_X: 593 { 594 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 595 596 if (dst_reg != BPF_REG_0) 597 EMIT1(0x50); /* push rax */ 598 if (dst_reg != BPF_REG_3) 599 EMIT1(0x52); /* push rdx */ 600 601 /* mov r11, dst_reg */ 602 EMIT_mov(AUX_REG, dst_reg); 603 604 if (BPF_SRC(insn->code) == BPF_X) 605 emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); 606 else 607 emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); 608 609 if (is64) 610 EMIT1(add_1mod(0x48, AUX_REG)); 611 else if (is_ereg(AUX_REG)) 612 EMIT1(add_1mod(0x40, AUX_REG)); 613 /* mul(q) r11 */ 614 EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); 615 616 if (dst_reg != BPF_REG_3) 617 EMIT1(0x5A); /* pop rdx */ 618 if (dst_reg != BPF_REG_0) { 619 /* mov dst_reg, rax */ 620 EMIT_mov(dst_reg, BPF_REG_0); 621 EMIT1(0x58); /* pop rax */ 622 } 623 break; 624 } 625 /* Shifts */ 626 case BPF_ALU | BPF_LSH | BPF_K: 627 case BPF_ALU | BPF_RSH | BPF_K: 628 case BPF_ALU | BPF_ARSH | BPF_K: 629 case BPF_ALU64 | BPF_LSH | BPF_K: 630 case BPF_ALU64 | BPF_RSH | BPF_K: 631 case BPF_ALU64 | BPF_ARSH | BPF_K: 632 if (BPF_CLASS(insn->code) == BPF_ALU64) 633 EMIT1(add_1mod(0x48, dst_reg)); 634 else if (is_ereg(dst_reg)) 635 EMIT1(add_1mod(0x40, dst_reg)); 636 637 switch (BPF_OP(insn->code)) { 638 case BPF_LSH: b3 = 0xE0; break; 639 case BPF_RSH: b3 = 0xE8; break; 640 case BPF_ARSH: b3 = 0xF8; break; 641 } 642 643 if (imm32 == 1) 644 EMIT2(0xD1, add_1reg(b3, dst_reg)); 645 else 646 EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); 647 break; 648 649 case BPF_ALU | BPF_LSH | BPF_X: 650 case BPF_ALU | BPF_RSH | BPF_X: 651 case BPF_ALU | BPF_ARSH | BPF_X: 652 case BPF_ALU64 | BPF_LSH | BPF_X: 653 case BPF_ALU64 | BPF_RSH | BPF_X: 654 case BPF_ALU64 | BPF_ARSH | BPF_X: 655 656 /* Check for bad case when dst_reg == rcx */ 657 if (dst_reg == BPF_REG_4) { 658 /* mov r11, dst_reg */ 659 EMIT_mov(AUX_REG, dst_reg); 660 dst_reg = AUX_REG; 661 } 662 663 if (src_reg != BPF_REG_4) { /* common case */ 664 EMIT1(0x51); /* push rcx */ 665 666 /* mov rcx, src_reg */ 667 EMIT_mov(BPF_REG_4, src_reg); 668 } 669 670 /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ 671 if (BPF_CLASS(insn->code) == BPF_ALU64) 672 EMIT1(add_1mod(0x48, dst_reg)); 673 else if (is_ereg(dst_reg)) 674 EMIT1(add_1mod(0x40, dst_reg)); 675 676 switch (BPF_OP(insn->code)) { 677 case BPF_LSH: b3 = 0xE0; break; 678 case BPF_RSH: b3 = 0xE8; break; 679 case BPF_ARSH: b3 = 0xF8; break; 680 } 681 EMIT2(0xD3, add_1reg(b3, dst_reg)); 682 683 if (src_reg != BPF_REG_4) 684 EMIT1(0x59); /* pop rcx */ 685 686 if (insn->dst_reg == BPF_REG_4) 687 /* mov dst_reg, r11 */ 688 EMIT_mov(insn->dst_reg, AUX_REG); 689 break; 690 691 case BPF_ALU | BPF_END | BPF_FROM_BE: 692 switch (imm32) { 693 case 16: 694 /* Emit 'ror %ax, 8' to swap lower 2 bytes */ 695 EMIT1(0x66); 696 if (is_ereg(dst_reg)) 697 EMIT1(0x41); 698 EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); 699 700 /* Emit 'movzwl eax, ax' */ 701 if (is_ereg(dst_reg)) 702 EMIT3(0x45, 0x0F, 0xB7); 703 else 704 EMIT2(0x0F, 0xB7); 705 EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 706 break; 707 case 32: 708 /* Emit 'bswap eax' to swap lower 4 bytes */ 709 if (is_ereg(dst_reg)) 710 EMIT2(0x41, 0x0F); 711 else 712 EMIT1(0x0F); 713 EMIT1(add_1reg(0xC8, dst_reg)); 714 break; 715 case 64: 716 /* Emit 'bswap rax' to swap 8 bytes */ 717 EMIT3(add_1mod(0x48, dst_reg), 0x0F, 718 add_1reg(0xC8, dst_reg)); 719 break; 720 } 721 break; 722 723 case BPF_ALU | BPF_END | BPF_FROM_LE: 724 switch (imm32) { 725 case 16: 726 /* 727 * Emit 'movzwl eax, ax' to zero extend 16-bit 728 * into 64 bit 729 */ 730 if (is_ereg(dst_reg)) 731 EMIT3(0x45, 0x0F, 0xB7); 732 else 733 EMIT2(0x0F, 0xB7); 734 EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 735 break; 736 case 32: 737 /* Emit 'mov eax, eax' to clear upper 32-bits */ 738 if (is_ereg(dst_reg)) 739 EMIT1(0x45); 740 EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); 741 break; 742 case 64: 743 /* nop */ 744 break; 745 } 746 break; 747 748 /* ST: *(u8*)(dst_reg + off) = imm */ 749 case BPF_ST | BPF_MEM | BPF_B: 750 if (is_ereg(dst_reg)) 751 EMIT2(0x41, 0xC6); 752 else 753 EMIT1(0xC6); 754 goto st; 755 case BPF_ST | BPF_MEM | BPF_H: 756 if (is_ereg(dst_reg)) 757 EMIT3(0x66, 0x41, 0xC7); 758 else 759 EMIT2(0x66, 0xC7); 760 goto st; 761 case BPF_ST | BPF_MEM | BPF_W: 762 if (is_ereg(dst_reg)) 763 EMIT2(0x41, 0xC7); 764 else 765 EMIT1(0xC7); 766 goto st; 767 case BPF_ST | BPF_MEM | BPF_DW: 768 EMIT2(add_1mod(0x48, dst_reg), 0xC7); 769 770 st: if (is_imm8(insn->off)) 771 EMIT2(add_1reg(0x40, dst_reg), insn->off); 772 else 773 EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); 774 775 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); 776 break; 777 778 /* STX: *(u8*)(dst_reg + off) = src_reg */ 779 case BPF_STX | BPF_MEM | BPF_B: 780 /* Emit 'mov byte ptr [rax + off], al' */ 781 if (is_ereg(dst_reg) || is_ereg(src_reg) || 782 /* We have to add extra byte for x86 SIL, DIL regs */ 783 src_reg == BPF_REG_1 || src_reg == BPF_REG_2) 784 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 785 else 786 EMIT1(0x88); 787 goto stx; 788 case BPF_STX | BPF_MEM | BPF_H: 789 if (is_ereg(dst_reg) || is_ereg(src_reg)) 790 EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 791 else 792 EMIT2(0x66, 0x89); 793 goto stx; 794 case BPF_STX | BPF_MEM | BPF_W: 795 if (is_ereg(dst_reg) || is_ereg(src_reg)) 796 EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 797 else 798 EMIT1(0x89); 799 goto stx; 800 case BPF_STX | BPF_MEM | BPF_DW: 801 EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 802 stx: if (is_imm8(insn->off)) 803 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 804 else 805 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 806 insn->off); 807 break; 808 809 /* LDX: dst_reg = *(u8*)(src_reg + off) */ 810 case BPF_LDX | BPF_MEM | BPF_B: 811 /* Emit 'movzx rax, byte ptr [rax + off]' */ 812 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 813 goto ldx; 814 case BPF_LDX | BPF_MEM | BPF_H: 815 /* Emit 'movzx rax, word ptr [rax + off]' */ 816 EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 817 goto ldx; 818 case BPF_LDX | BPF_MEM | BPF_W: 819 /* Emit 'mov eax, dword ptr [rax+0x14]' */ 820 if (is_ereg(dst_reg) || is_ereg(src_reg)) 821 EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 822 else 823 EMIT1(0x8B); 824 goto ldx; 825 case BPF_LDX | BPF_MEM | BPF_DW: 826 /* Emit 'mov rax, qword ptr [rax+0x14]' */ 827 EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 828 ldx: /* 829 * If insn->off == 0 we can save one extra byte, but 830 * special case of x86 R13 which always needs an offset 831 * is not worth the hassle 832 */ 833 if (is_imm8(insn->off)) 834 EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); 835 else 836 EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), 837 insn->off); 838 break; 839 840 /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ 841 case BPF_STX | BPF_XADD | BPF_W: 842 /* Emit 'lock add dword ptr [rax + off], eax' */ 843 if (is_ereg(dst_reg) || is_ereg(src_reg)) 844 EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); 845 else 846 EMIT2(0xF0, 0x01); 847 goto xadd; 848 case BPF_STX | BPF_XADD | BPF_DW: 849 EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); 850 xadd: if (is_imm8(insn->off)) 851 EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 852 else 853 EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 854 insn->off); 855 break; 856 857 /* call */ 858 case BPF_JMP | BPF_CALL: 859 func = (u8 *) __bpf_call_base + imm32; 860 jmp_offset = func - (image + addrs[i]); 861 if (!imm32 || !is_simm32(jmp_offset)) { 862 pr_err("unsupported BPF func %d addr %p image %p\n", 863 imm32, func, image); 864 return -EINVAL; 865 } 866 EMIT1_off32(0xE8, jmp_offset); 867 break; 868 869 case BPF_JMP | BPF_TAIL_CALL: 870 emit_bpf_tail_call(&prog); 871 break; 872 873 /* cond jump */ 874 case BPF_JMP | BPF_JEQ | BPF_X: 875 case BPF_JMP | BPF_JNE | BPF_X: 876 case BPF_JMP | BPF_JGT | BPF_X: 877 case BPF_JMP | BPF_JLT | BPF_X: 878 case BPF_JMP | BPF_JGE | BPF_X: 879 case BPF_JMP | BPF_JLE | BPF_X: 880 case BPF_JMP | BPF_JSGT | BPF_X: 881 case BPF_JMP | BPF_JSLT | BPF_X: 882 case BPF_JMP | BPF_JSGE | BPF_X: 883 case BPF_JMP | BPF_JSLE | BPF_X: 884 /* cmp dst_reg, src_reg */ 885 EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, 886 add_2reg(0xC0, dst_reg, src_reg)); 887 goto emit_cond_jmp; 888 889 case BPF_JMP | BPF_JSET | BPF_X: 890 /* test dst_reg, src_reg */ 891 EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, 892 add_2reg(0xC0, dst_reg, src_reg)); 893 goto emit_cond_jmp; 894 895 case BPF_JMP | BPF_JSET | BPF_K: 896 /* test dst_reg, imm32 */ 897 EMIT1(add_1mod(0x48, dst_reg)); 898 EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); 899 goto emit_cond_jmp; 900 901 case BPF_JMP | BPF_JEQ | BPF_K: 902 case BPF_JMP | BPF_JNE | BPF_K: 903 case BPF_JMP | BPF_JGT | BPF_K: 904 case BPF_JMP | BPF_JLT | BPF_K: 905 case BPF_JMP | BPF_JGE | BPF_K: 906 case BPF_JMP | BPF_JLE | BPF_K: 907 case BPF_JMP | BPF_JSGT | BPF_K: 908 case BPF_JMP | BPF_JSLT | BPF_K: 909 case BPF_JMP | BPF_JSGE | BPF_K: 910 case BPF_JMP | BPF_JSLE | BPF_K: 911 /* cmp dst_reg, imm8/32 */ 912 EMIT1(add_1mod(0x48, dst_reg)); 913 914 if (is_imm8(imm32)) 915 EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); 916 else 917 EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); 918 919 emit_cond_jmp: /* Convert BPF opcode to x86 */ 920 switch (BPF_OP(insn->code)) { 921 case BPF_JEQ: 922 jmp_cond = X86_JE; 923 break; 924 case BPF_JSET: 925 case BPF_JNE: 926 jmp_cond = X86_JNE; 927 break; 928 case BPF_JGT: 929 /* GT is unsigned '>', JA in x86 */ 930 jmp_cond = X86_JA; 931 break; 932 case BPF_JLT: 933 /* LT is unsigned '<', JB in x86 */ 934 jmp_cond = X86_JB; 935 break; 936 case BPF_JGE: 937 /* GE is unsigned '>=', JAE in x86 */ 938 jmp_cond = X86_JAE; 939 break; 940 case BPF_JLE: 941 /* LE is unsigned '<=', JBE in x86 */ 942 jmp_cond = X86_JBE; 943 break; 944 case BPF_JSGT: 945 /* Signed '>', GT in x86 */ 946 jmp_cond = X86_JG; 947 break; 948 case BPF_JSLT: 949 /* Signed '<', LT in x86 */ 950 jmp_cond = X86_JL; 951 break; 952 case BPF_JSGE: 953 /* Signed '>=', GE in x86 */ 954 jmp_cond = X86_JGE; 955 break; 956 case BPF_JSLE: 957 /* Signed '<=', LE in x86 */ 958 jmp_cond = X86_JLE; 959 break; 960 default: /* to silence GCC warning */ 961 return -EFAULT; 962 } 963 jmp_offset = addrs[i + insn->off] - addrs[i]; 964 if (is_imm8(jmp_offset)) { 965 EMIT2(jmp_cond, jmp_offset); 966 } else if (is_simm32(jmp_offset)) { 967 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); 968 } else { 969 pr_err("cond_jmp gen bug %llx\n", jmp_offset); 970 return -EFAULT; 971 } 972 973 break; 974 975 case BPF_JMP | BPF_JA: 976 if (insn->off == -1) 977 /* -1 jmp instructions will always jump 978 * backwards two bytes. Explicitly handling 979 * this case avoids wasting too many passes 980 * when there are long sequences of replaced 981 * dead code. 982 */ 983 jmp_offset = -2; 984 else 985 jmp_offset = addrs[i + insn->off] - addrs[i]; 986 987 if (!jmp_offset) 988 /* Optimize out nop jumps */ 989 break; 990 emit_jmp: 991 if (is_imm8(jmp_offset)) { 992 EMIT2(0xEB, jmp_offset); 993 } else if (is_simm32(jmp_offset)) { 994 EMIT1_off32(0xE9, jmp_offset); 995 } else { 996 pr_err("jmp gen bug %llx\n", jmp_offset); 997 return -EFAULT; 998 } 999 break; 1000 1001 case BPF_JMP | BPF_EXIT: 1002 if (seen_exit) { 1003 jmp_offset = ctx->cleanup_addr - addrs[i]; 1004 goto emit_jmp; 1005 } 1006 seen_exit = true; 1007 /* Update cleanup_addr */ 1008 ctx->cleanup_addr = proglen; 1009 /* mov rbx, qword ptr [rbp+0] */ 1010 EMIT4(0x48, 0x8B, 0x5D, 0); 1011 /* mov r13, qword ptr [rbp+8] */ 1012 EMIT4(0x4C, 0x8B, 0x6D, 8); 1013 /* mov r14, qword ptr [rbp+16] */ 1014 EMIT4(0x4C, 0x8B, 0x75, 16); 1015 /* mov r15, qword ptr [rbp+24] */ 1016 EMIT4(0x4C, 0x8B, 0x7D, 24); 1017 1018 /* add rbp, AUX_STACK_SPACE */ 1019 EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE); 1020 EMIT1(0xC9); /* leave */ 1021 EMIT1(0xC3); /* ret */ 1022 break; 1023 1024 default: 1025 /* 1026 * By design x86-64 JIT should support all BPF instructions. 1027 * This error will be seen if new instruction was added 1028 * to the interpreter, but not to the JIT, or if there is 1029 * junk in bpf_prog. 1030 */ 1031 pr_err("bpf_jit: unknown opcode %02x\n", insn->code); 1032 return -EINVAL; 1033 } 1034 1035 ilen = prog - temp; 1036 if (ilen > BPF_MAX_INSN_SIZE) { 1037 pr_err("bpf_jit: fatal insn size error\n"); 1038 return -EFAULT; 1039 } 1040 1041 if (image) { 1042 if (unlikely(proglen + ilen > oldproglen)) { 1043 pr_err("bpf_jit: fatal error\n"); 1044 return -EFAULT; 1045 } 1046 memcpy(image + proglen, temp, ilen); 1047 } 1048 proglen += ilen; 1049 addrs[i] = proglen; 1050 prog = temp; 1051 } 1052 return proglen; 1053 } 1054 1055 struct x64_jit_data { 1056 struct bpf_binary_header *header; 1057 int *addrs; 1058 u8 *image; 1059 int proglen; 1060 struct jit_context ctx; 1061 }; 1062 1063 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1064 { 1065 struct bpf_binary_header *header = NULL; 1066 struct bpf_prog *tmp, *orig_prog = prog; 1067 struct x64_jit_data *jit_data; 1068 int proglen, oldproglen = 0; 1069 struct jit_context ctx = {}; 1070 bool tmp_blinded = false; 1071 bool extra_pass = false; 1072 u8 *image = NULL; 1073 int *addrs; 1074 int pass; 1075 int i; 1076 1077 if (!prog->jit_requested) 1078 return orig_prog; 1079 1080 tmp = bpf_jit_blind_constants(prog); 1081 /* 1082 * If blinding was requested and we failed during blinding, 1083 * we must fall back to the interpreter. 1084 */ 1085 if (IS_ERR(tmp)) 1086 return orig_prog; 1087 if (tmp != prog) { 1088 tmp_blinded = true; 1089 prog = tmp; 1090 } 1091 1092 jit_data = prog->aux->jit_data; 1093 if (!jit_data) { 1094 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1095 if (!jit_data) { 1096 prog = orig_prog; 1097 goto out; 1098 } 1099 prog->aux->jit_data = jit_data; 1100 } 1101 addrs = jit_data->addrs; 1102 if (addrs) { 1103 ctx = jit_data->ctx; 1104 oldproglen = jit_data->proglen; 1105 image = jit_data->image; 1106 header = jit_data->header; 1107 extra_pass = true; 1108 goto skip_init_addrs; 1109 } 1110 addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL); 1111 if (!addrs) { 1112 prog = orig_prog; 1113 goto out_addrs; 1114 } 1115 1116 /* 1117 * Before first pass, make a rough estimation of addrs[] 1118 * each BPF instruction is translated to less than 64 bytes 1119 */ 1120 for (proglen = 0, i = 0; i < prog->len; i++) { 1121 proglen += 64; 1122 addrs[i] = proglen; 1123 } 1124 ctx.cleanup_addr = proglen; 1125 skip_init_addrs: 1126 1127 /* 1128 * JITed image shrinks with every pass and the loop iterates 1129 * until the image stops shrinking. Very large BPF programs 1130 * may converge on the last pass. In such case do one more 1131 * pass to emit the final image. 1132 */ 1133 for (pass = 0; pass < 20 || image; pass++) { 1134 proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 1135 if (proglen <= 0) { 1136 out_image: 1137 image = NULL; 1138 if (header) 1139 bpf_jit_binary_free(header); 1140 prog = orig_prog; 1141 goto out_addrs; 1142 } 1143 if (image) { 1144 if (proglen != oldproglen) { 1145 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", 1146 proglen, oldproglen); 1147 goto out_image; 1148 } 1149 break; 1150 } 1151 if (proglen == oldproglen) { 1152 header = bpf_jit_binary_alloc(proglen, &image, 1153 1, jit_fill_hole); 1154 if (!header) { 1155 prog = orig_prog; 1156 goto out_addrs; 1157 } 1158 } 1159 oldproglen = proglen; 1160 cond_resched(); 1161 } 1162 1163 if (bpf_jit_enable > 1) 1164 bpf_jit_dump(prog->len, proglen, pass + 1, image); 1165 1166 if (image) { 1167 if (!prog->is_func || extra_pass) { 1168 bpf_jit_binary_lock_ro(header); 1169 } else { 1170 jit_data->addrs = addrs; 1171 jit_data->ctx = ctx; 1172 jit_data->proglen = proglen; 1173 jit_data->image = image; 1174 jit_data->header = header; 1175 } 1176 prog->bpf_func = (void *)image; 1177 prog->jited = 1; 1178 prog->jited_len = proglen; 1179 } else { 1180 prog = orig_prog; 1181 } 1182 1183 if (!image || !prog->is_func || extra_pass) { 1184 out_addrs: 1185 kfree(addrs); 1186 kfree(jit_data); 1187 prog->aux->jit_data = NULL; 1188 } 1189 out: 1190 if (tmp_blinded) 1191 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1192 tmp : orig_prog); 1193 return prog; 1194 } 1195