1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2a2c7a983SIngo Molnar /* 3a2c7a983SIngo Molnar * bpf_jit_comp.c: BPF JIT compiler 40a14842fSEric Dumazet * 53b58908aSEric Dumazet * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 662258278SAlexei Starovoitov * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 70a14842fSEric Dumazet */ 80a14842fSEric Dumazet #include <linux/netdevice.h> 90a14842fSEric Dumazet #include <linux/filter.h> 10855ddb56SEric Dumazet #include <linux/if_vlan.h> 1171d22d58SDaniel Borkmann #include <linux/bpf.h> 125964b200SAlexei Starovoitov #include <linux/memory.h> 1375ccbef6SBjörn Töpel #include <linux/sort.h> 143dec541bSAlexei Starovoitov #include <asm/extable.h> 15d1163651SLaura Abbott #include <asm/set_memory.h> 16a493a87fSDaniel Borkmann #include <asm/nospec-branch.h> 175964b200SAlexei Starovoitov #include <asm/text-patching.h> 1875ccbef6SBjörn Töpel #include <asm/asm-prototypes.h> 190a14842fSEric Dumazet 205cccc702SJoe Perches static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 210a14842fSEric Dumazet { 220a14842fSEric Dumazet if (len == 1) 230a14842fSEric Dumazet *ptr = bytes; 240a14842fSEric Dumazet else if (len == 2) 250a14842fSEric Dumazet *(u16 *)ptr = bytes; 260a14842fSEric Dumazet else { 270a14842fSEric Dumazet *(u32 *)ptr = bytes; 280a14842fSEric Dumazet barrier(); 290a14842fSEric Dumazet } 300a14842fSEric Dumazet return ptr + len; 310a14842fSEric Dumazet } 320a14842fSEric Dumazet 33b52f00e6SAlexei Starovoitov #define EMIT(bytes, len) \ 34b52f00e6SAlexei Starovoitov do { prog = emit_code(prog, bytes, len); cnt += len; } while (0) 350a14842fSEric Dumazet 360a14842fSEric Dumazet #define EMIT1(b1) EMIT(b1, 1) 370a14842fSEric Dumazet #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 380a14842fSEric Dumazet #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 390a14842fSEric Dumazet #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 40a2c7a983SIngo Molnar 4162258278SAlexei Starovoitov #define EMIT1_off32(b1, off) \ 4262258278SAlexei Starovoitov do { EMIT1(b1); EMIT(off, 4); } while (0) 4362258278SAlexei Starovoitov #define EMIT2_off32(b1, b2, off) \ 4462258278SAlexei Starovoitov do { EMIT2(b1, b2); EMIT(off, 4); } while (0) 4562258278SAlexei Starovoitov #define EMIT3_off32(b1, b2, b3, off) \ 4662258278SAlexei Starovoitov do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) 4762258278SAlexei Starovoitov #define EMIT4_off32(b1, b2, b3, b4, off) \ 4862258278SAlexei Starovoitov do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) 490a14842fSEric Dumazet 505cccc702SJoe Perches static bool is_imm8(int value) 510a14842fSEric Dumazet { 520a14842fSEric Dumazet return value <= 127 && value >= -128; 530a14842fSEric Dumazet } 540a14842fSEric Dumazet 555cccc702SJoe Perches static bool is_simm32(s64 value) 560a14842fSEric Dumazet { 5762258278SAlexei Starovoitov return value == (s64)(s32)value; 580a14842fSEric Dumazet } 590a14842fSEric Dumazet 606fe8b9c1SDaniel Borkmann static bool is_uimm32(u64 value) 616fe8b9c1SDaniel Borkmann { 626fe8b9c1SDaniel Borkmann return value == (u64)(u32)value; 636fe8b9c1SDaniel Borkmann } 646fe8b9c1SDaniel Borkmann 65e430f34eSAlexei Starovoitov /* mov dst, src */ 66e430f34eSAlexei Starovoitov #define EMIT_mov(DST, SRC) \ 67a2c7a983SIngo Molnar do { \ 68a2c7a983SIngo Molnar if (DST != SRC) \ 69e430f34eSAlexei Starovoitov EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ 700a14842fSEric Dumazet } while (0) 710a14842fSEric Dumazet 7262258278SAlexei Starovoitov static int bpf_size_to_x86_bytes(int bpf_size) 7362258278SAlexei Starovoitov { 7462258278SAlexei Starovoitov if (bpf_size == BPF_W) 7562258278SAlexei Starovoitov return 4; 7662258278SAlexei Starovoitov else if (bpf_size == BPF_H) 7762258278SAlexei Starovoitov return 2; 7862258278SAlexei Starovoitov else if (bpf_size == BPF_B) 7962258278SAlexei Starovoitov return 1; 8062258278SAlexei Starovoitov else if (bpf_size == BPF_DW) 8162258278SAlexei Starovoitov return 4; /* imm32 */ 8262258278SAlexei Starovoitov else 8362258278SAlexei Starovoitov return 0; 8462258278SAlexei Starovoitov } 8562258278SAlexei Starovoitov 86a2c7a983SIngo Molnar /* 87a2c7a983SIngo Molnar * List of x86 cond jumps opcodes (. + s8) 880a14842fSEric Dumazet * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 890a14842fSEric Dumazet */ 900a14842fSEric Dumazet #define X86_JB 0x72 910a14842fSEric Dumazet #define X86_JAE 0x73 920a14842fSEric Dumazet #define X86_JE 0x74 930a14842fSEric Dumazet #define X86_JNE 0x75 940a14842fSEric Dumazet #define X86_JBE 0x76 950a14842fSEric Dumazet #define X86_JA 0x77 9652afc51eSDaniel Borkmann #define X86_JL 0x7C 9762258278SAlexei Starovoitov #define X86_JGE 0x7D 9852afc51eSDaniel Borkmann #define X86_JLE 0x7E 9962258278SAlexei Starovoitov #define X86_JG 0x7F 1000a14842fSEric Dumazet 101a2c7a983SIngo Molnar /* Pick a register outside of BPF range for JIT internal work */ 102959a7579SDaniel Borkmann #define AUX_REG (MAX_BPF_JIT_REG + 1) 103fec56f58SAlexei Starovoitov #define X86_REG_R9 (MAX_BPF_JIT_REG + 2) 10462258278SAlexei Starovoitov 105a2c7a983SIngo Molnar /* 106a2c7a983SIngo Molnar * The following table maps BPF registers to x86-64 registers. 107959a7579SDaniel Borkmann * 108a2c7a983SIngo Molnar * x86-64 register R12 is unused, since if used as base address 109959a7579SDaniel Borkmann * register in load/store instructions, it always needs an 110959a7579SDaniel Borkmann * extra byte of encoding and is callee saved. 111959a7579SDaniel Borkmann * 112fec56f58SAlexei Starovoitov * x86-64 register R9 is not used by BPF programs, but can be used by BPF 113fec56f58SAlexei Starovoitov * trampoline. x86-64 register R10 is used for blinding (if enabled). 11462258278SAlexei Starovoitov */ 11562258278SAlexei Starovoitov static const int reg2hex[] = { 116a2c7a983SIngo Molnar [BPF_REG_0] = 0, /* RAX */ 117a2c7a983SIngo Molnar [BPF_REG_1] = 7, /* RDI */ 118a2c7a983SIngo Molnar [BPF_REG_2] = 6, /* RSI */ 119a2c7a983SIngo Molnar [BPF_REG_3] = 2, /* RDX */ 120a2c7a983SIngo Molnar [BPF_REG_4] = 1, /* RCX */ 121a2c7a983SIngo Molnar [BPF_REG_5] = 0, /* R8 */ 122a2c7a983SIngo Molnar [BPF_REG_6] = 3, /* RBX callee saved */ 123a2c7a983SIngo Molnar [BPF_REG_7] = 5, /* R13 callee saved */ 124a2c7a983SIngo Molnar [BPF_REG_8] = 6, /* R14 callee saved */ 125a2c7a983SIngo Molnar [BPF_REG_9] = 7, /* R15 callee saved */ 126a2c7a983SIngo Molnar [BPF_REG_FP] = 5, /* RBP readonly */ 127a2c7a983SIngo Molnar [BPF_REG_AX] = 2, /* R10 temp register */ 128a2c7a983SIngo Molnar [AUX_REG] = 3, /* R11 temp register */ 129fec56f58SAlexei Starovoitov [X86_REG_R9] = 1, /* R9 register, 6th function argument */ 13062258278SAlexei Starovoitov }; 13162258278SAlexei Starovoitov 1323dec541bSAlexei Starovoitov static const int reg2pt_regs[] = { 1333dec541bSAlexei Starovoitov [BPF_REG_0] = offsetof(struct pt_regs, ax), 1343dec541bSAlexei Starovoitov [BPF_REG_1] = offsetof(struct pt_regs, di), 1353dec541bSAlexei Starovoitov [BPF_REG_2] = offsetof(struct pt_regs, si), 1363dec541bSAlexei Starovoitov [BPF_REG_3] = offsetof(struct pt_regs, dx), 1373dec541bSAlexei Starovoitov [BPF_REG_4] = offsetof(struct pt_regs, cx), 1383dec541bSAlexei Starovoitov [BPF_REG_5] = offsetof(struct pt_regs, r8), 1393dec541bSAlexei Starovoitov [BPF_REG_6] = offsetof(struct pt_regs, bx), 1403dec541bSAlexei Starovoitov [BPF_REG_7] = offsetof(struct pt_regs, r13), 1413dec541bSAlexei Starovoitov [BPF_REG_8] = offsetof(struct pt_regs, r14), 1423dec541bSAlexei Starovoitov [BPF_REG_9] = offsetof(struct pt_regs, r15), 1433dec541bSAlexei Starovoitov }; 1443dec541bSAlexei Starovoitov 145a2c7a983SIngo Molnar /* 146a2c7a983SIngo Molnar * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 14762258278SAlexei Starovoitov * which need extra byte of encoding. 14862258278SAlexei Starovoitov * rax,rcx,...,rbp have simpler encoding 14962258278SAlexei Starovoitov */ 1505cccc702SJoe Perches static bool is_ereg(u32 reg) 15162258278SAlexei Starovoitov { 152d148134bSJoe Perches return (1 << reg) & (BIT(BPF_REG_5) | 153d148134bSJoe Perches BIT(AUX_REG) | 154d148134bSJoe Perches BIT(BPF_REG_7) | 155d148134bSJoe Perches BIT(BPF_REG_8) | 156959a7579SDaniel Borkmann BIT(BPF_REG_9) | 157fec56f58SAlexei Starovoitov BIT(X86_REG_R9) | 158959a7579SDaniel Borkmann BIT(BPF_REG_AX)); 15962258278SAlexei Starovoitov } 16062258278SAlexei Starovoitov 161aee194b1SLuke Nelson /* 162aee194b1SLuke Nelson * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 163aee194b1SLuke Nelson * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte 164aee194b1SLuke Nelson * of encoding. al,cl,dl,bl have simpler encoding. 165aee194b1SLuke Nelson */ 166aee194b1SLuke Nelson static bool is_ereg_8l(u32 reg) 167aee194b1SLuke Nelson { 168aee194b1SLuke Nelson return is_ereg(reg) || 169aee194b1SLuke Nelson (1 << reg) & (BIT(BPF_REG_1) | 170aee194b1SLuke Nelson BIT(BPF_REG_2) | 171aee194b1SLuke Nelson BIT(BPF_REG_FP)); 172aee194b1SLuke Nelson } 173aee194b1SLuke Nelson 174de0a444dSDaniel Borkmann static bool is_axreg(u32 reg) 175de0a444dSDaniel Borkmann { 176de0a444dSDaniel Borkmann return reg == BPF_REG_0; 177de0a444dSDaniel Borkmann } 178de0a444dSDaniel Borkmann 179a2c7a983SIngo Molnar /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ 1805cccc702SJoe Perches static u8 add_1mod(u8 byte, u32 reg) 18162258278SAlexei Starovoitov { 18262258278SAlexei Starovoitov if (is_ereg(reg)) 18362258278SAlexei Starovoitov byte |= 1; 18462258278SAlexei Starovoitov return byte; 18562258278SAlexei Starovoitov } 18662258278SAlexei Starovoitov 1875cccc702SJoe Perches static u8 add_2mod(u8 byte, u32 r1, u32 r2) 18862258278SAlexei Starovoitov { 18962258278SAlexei Starovoitov if (is_ereg(r1)) 19062258278SAlexei Starovoitov byte |= 1; 19162258278SAlexei Starovoitov if (is_ereg(r2)) 19262258278SAlexei Starovoitov byte |= 4; 19362258278SAlexei Starovoitov return byte; 19462258278SAlexei Starovoitov } 19562258278SAlexei Starovoitov 196a2c7a983SIngo Molnar /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ 1975cccc702SJoe Perches static u8 add_1reg(u8 byte, u32 dst_reg) 19862258278SAlexei Starovoitov { 199e430f34eSAlexei Starovoitov return byte + reg2hex[dst_reg]; 20062258278SAlexei Starovoitov } 20162258278SAlexei Starovoitov 202a2c7a983SIngo Molnar /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ 2035cccc702SJoe Perches static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) 20462258278SAlexei Starovoitov { 205e430f34eSAlexei Starovoitov return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); 20662258278SAlexei Starovoitov } 20762258278SAlexei Starovoitov 208738cbe72SDaniel Borkmann static void jit_fill_hole(void *area, unsigned int size) 209738cbe72SDaniel Borkmann { 210a2c7a983SIngo Molnar /* Fill whole space with INT3 instructions */ 211738cbe72SDaniel Borkmann memset(area, 0xcc, size); 212738cbe72SDaniel Borkmann } 213738cbe72SDaniel Borkmann 214f3c2af7bSAlexei Starovoitov struct jit_context { 215a2c7a983SIngo Molnar int cleanup_addr; /* Epilogue code offset */ 216f3c2af7bSAlexei Starovoitov }; 217f3c2af7bSAlexei Starovoitov 218a2c7a983SIngo Molnar /* Maximum number of bytes emitted while JITing one eBPF insn */ 219e0ee9c12SAlexei Starovoitov #define BPF_MAX_INSN_SIZE 128 220e0ee9c12SAlexei Starovoitov #define BPF_INSN_SAFETY 64 2214b3da77bSDaniel Borkmann 2224b3da77bSDaniel Borkmann /* Number of bytes emit_patch() needs to generate instructions */ 2234b3da77bSDaniel Borkmann #define X86_PATCH_SIZE 5 224e0ee9c12SAlexei Starovoitov 2259fd4a39dSAlexei Starovoitov #define PROLOGUE_SIZE 25 226b52f00e6SAlexei Starovoitov 227a2c7a983SIngo Molnar /* 228a2c7a983SIngo Molnar * Emit x86-64 prologue code for BPF program and check its size. 229b52f00e6SAlexei Starovoitov * bpf_tail_call helper will skip it while jumping into another program 230b52f00e6SAlexei Starovoitov */ 23108691752SDaniel Borkmann static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) 2320a14842fSEric Dumazet { 233b52f00e6SAlexei Starovoitov u8 *prog = *pprog; 2344b3da77bSDaniel Borkmann int cnt = X86_PATCH_SIZE; 2350a14842fSEric Dumazet 2369fd4a39dSAlexei Starovoitov /* BPF trampoline can be made to work without these nops, 2379fd4a39dSAlexei Starovoitov * but let's waste 5 bytes for now and optimize later 2389fd4a39dSAlexei Starovoitov */ 2399fd4a39dSAlexei Starovoitov memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt); 2409fd4a39dSAlexei Starovoitov prog += cnt; 241fe8d9571SAlexei Starovoitov EMIT1(0x55); /* push rbp */ 242fe8d9571SAlexei Starovoitov EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 243fe8d9571SAlexei Starovoitov /* sub rsp, rounded_stack_depth */ 244fe8d9571SAlexei Starovoitov EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); 245fe8d9571SAlexei Starovoitov EMIT1(0x53); /* push rbx */ 246fe8d9571SAlexei Starovoitov EMIT2(0x41, 0x55); /* push r13 */ 247fe8d9571SAlexei Starovoitov EMIT2(0x41, 0x56); /* push r14 */ 248fe8d9571SAlexei Starovoitov EMIT2(0x41, 0x57); /* push r15 */ 24908691752SDaniel Borkmann if (!ebpf_from_cbpf) { 250fe8d9571SAlexei Starovoitov /* zero init tail_call_cnt */ 251fe8d9571SAlexei Starovoitov EMIT2(0x6a, 0x00); 252b52f00e6SAlexei Starovoitov BUILD_BUG_ON(cnt != PROLOGUE_SIZE); 25308691752SDaniel Borkmann } 254b52f00e6SAlexei Starovoitov *pprog = prog; 255b52f00e6SAlexei Starovoitov } 256b52f00e6SAlexei Starovoitov 257428d5df1SDaniel Borkmann static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode) 258428d5df1SDaniel Borkmann { 259428d5df1SDaniel Borkmann u8 *prog = *pprog; 260428d5df1SDaniel Borkmann int cnt = 0; 261428d5df1SDaniel Borkmann s64 offset; 262428d5df1SDaniel Borkmann 263428d5df1SDaniel Borkmann offset = func - (ip + X86_PATCH_SIZE); 264428d5df1SDaniel Borkmann if (!is_simm32(offset)) { 265428d5df1SDaniel Borkmann pr_err("Target call %p is out of range\n", func); 266428d5df1SDaniel Borkmann return -ERANGE; 267428d5df1SDaniel Borkmann } 268428d5df1SDaniel Borkmann EMIT1_off32(opcode, offset); 269428d5df1SDaniel Borkmann *pprog = prog; 270428d5df1SDaniel Borkmann return 0; 271428d5df1SDaniel Borkmann } 272428d5df1SDaniel Borkmann 273428d5df1SDaniel Borkmann static int emit_call(u8 **pprog, void *func, void *ip) 274428d5df1SDaniel Borkmann { 275428d5df1SDaniel Borkmann return emit_patch(pprog, func, ip, 0xE8); 276428d5df1SDaniel Borkmann } 277428d5df1SDaniel Borkmann 278428d5df1SDaniel Borkmann static int emit_jump(u8 **pprog, void *func, void *ip) 279428d5df1SDaniel Borkmann { 280428d5df1SDaniel Borkmann return emit_patch(pprog, func, ip, 0xE9); 281428d5df1SDaniel Borkmann } 282428d5df1SDaniel Borkmann 283428d5df1SDaniel Borkmann static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 284428d5df1SDaniel Borkmann void *old_addr, void *new_addr, 285428d5df1SDaniel Borkmann const bool text_live) 286428d5df1SDaniel Borkmann { 287428d5df1SDaniel Borkmann const u8 *nop_insn = ideal_nops[NOP_ATOMIC5]; 288b553a6ecSDaniel Borkmann u8 old_insn[X86_PATCH_SIZE]; 289b553a6ecSDaniel Borkmann u8 new_insn[X86_PATCH_SIZE]; 290428d5df1SDaniel Borkmann u8 *prog; 291428d5df1SDaniel Borkmann int ret; 292428d5df1SDaniel Borkmann 293428d5df1SDaniel Borkmann memcpy(old_insn, nop_insn, X86_PATCH_SIZE); 294b553a6ecSDaniel Borkmann if (old_addr) { 295428d5df1SDaniel Borkmann prog = old_insn; 296b553a6ecSDaniel Borkmann ret = t == BPF_MOD_CALL ? 297b553a6ecSDaniel Borkmann emit_call(&prog, old_addr, ip) : 298b553a6ecSDaniel Borkmann emit_jump(&prog, old_addr, ip); 299428d5df1SDaniel Borkmann if (ret) 300428d5df1SDaniel Borkmann return ret; 301428d5df1SDaniel Borkmann } 302b553a6ecSDaniel Borkmann 303428d5df1SDaniel Borkmann memcpy(new_insn, nop_insn, X86_PATCH_SIZE); 304b553a6ecSDaniel Borkmann if (new_addr) { 305b553a6ecSDaniel Borkmann prog = new_insn; 306b553a6ecSDaniel Borkmann ret = t == BPF_MOD_CALL ? 307b553a6ecSDaniel Borkmann emit_call(&prog, new_addr, ip) : 308b553a6ecSDaniel Borkmann emit_jump(&prog, new_addr, ip); 309428d5df1SDaniel Borkmann if (ret) 310428d5df1SDaniel Borkmann return ret; 311428d5df1SDaniel Borkmann } 312428d5df1SDaniel Borkmann 313428d5df1SDaniel Borkmann ret = -EBUSY; 314428d5df1SDaniel Borkmann mutex_lock(&text_mutex); 315428d5df1SDaniel Borkmann if (memcmp(ip, old_insn, X86_PATCH_SIZE)) 316428d5df1SDaniel Borkmann goto out; 317b553a6ecSDaniel Borkmann if (memcmp(ip, new_insn, X86_PATCH_SIZE)) { 318428d5df1SDaniel Borkmann if (text_live) 319428d5df1SDaniel Borkmann text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL); 320428d5df1SDaniel Borkmann else 321428d5df1SDaniel Borkmann memcpy(ip, new_insn, X86_PATCH_SIZE); 322b553a6ecSDaniel Borkmann } 323428d5df1SDaniel Borkmann ret = 0; 324428d5df1SDaniel Borkmann out: 325428d5df1SDaniel Borkmann mutex_unlock(&text_mutex); 326428d5df1SDaniel Borkmann return ret; 327428d5df1SDaniel Borkmann } 328428d5df1SDaniel Borkmann 329428d5df1SDaniel Borkmann int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 330428d5df1SDaniel Borkmann void *old_addr, void *new_addr) 331428d5df1SDaniel Borkmann { 332428d5df1SDaniel Borkmann if (!is_kernel_text((long)ip) && 333428d5df1SDaniel Borkmann !is_bpf_text_address((long)ip)) 334428d5df1SDaniel Borkmann /* BPF poking in modules is not supported */ 335428d5df1SDaniel Borkmann return -EINVAL; 336428d5df1SDaniel Borkmann 337428d5df1SDaniel Borkmann return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); 338428d5df1SDaniel Borkmann } 339428d5df1SDaniel Borkmann 340a2c7a983SIngo Molnar /* 341a2c7a983SIngo Molnar * Generate the following code: 342a2c7a983SIngo Molnar * 343b52f00e6SAlexei Starovoitov * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... 344b52f00e6SAlexei Starovoitov * if (index >= array->map.max_entries) 345b52f00e6SAlexei Starovoitov * goto out; 346b52f00e6SAlexei Starovoitov * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) 347b52f00e6SAlexei Starovoitov * goto out; 3482a36f0b9SWang Nan * prog = array->ptrs[index]; 349b52f00e6SAlexei Starovoitov * if (prog == NULL) 350b52f00e6SAlexei Starovoitov * goto out; 351b52f00e6SAlexei Starovoitov * goto *(prog->bpf_func + prologue_size); 352b52f00e6SAlexei Starovoitov * out: 353b52f00e6SAlexei Starovoitov */ 354428d5df1SDaniel Borkmann static void emit_bpf_tail_call_indirect(u8 **pprog) 355b52f00e6SAlexei Starovoitov { 356b52f00e6SAlexei Starovoitov u8 *prog = *pprog; 357b52f00e6SAlexei Starovoitov int label1, label2, label3; 358b52f00e6SAlexei Starovoitov int cnt = 0; 359b52f00e6SAlexei Starovoitov 360a2c7a983SIngo Molnar /* 361a2c7a983SIngo Molnar * rdi - pointer to ctx 362b52f00e6SAlexei Starovoitov * rsi - pointer to bpf_array 363b52f00e6SAlexei Starovoitov * rdx - index in bpf_array 364b52f00e6SAlexei Starovoitov */ 365b52f00e6SAlexei Starovoitov 366a2c7a983SIngo Molnar /* 367a2c7a983SIngo Molnar * if (index >= array->map.max_entries) 368b52f00e6SAlexei Starovoitov * goto out; 369b52f00e6SAlexei Starovoitov */ 37090caccddSAlexei Starovoitov EMIT2(0x89, 0xD2); /* mov edx, edx */ 37190caccddSAlexei Starovoitov EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ 372b52f00e6SAlexei Starovoitov offsetof(struct bpf_array, map.max_entries)); 373a2c7a983SIngo Molnar #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ 374b52f00e6SAlexei Starovoitov EMIT2(X86_JBE, OFFSET1); /* jbe out */ 375b52f00e6SAlexei Starovoitov label1 = cnt; 376b52f00e6SAlexei Starovoitov 377a2c7a983SIngo Molnar /* 378a2c7a983SIngo Molnar * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 379b52f00e6SAlexei Starovoitov * goto out; 380b52f00e6SAlexei Starovoitov */ 381fe8d9571SAlexei Starovoitov EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ 382b52f00e6SAlexei Starovoitov EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 383a493a87fSDaniel Borkmann #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) 384b52f00e6SAlexei Starovoitov EMIT2(X86_JA, OFFSET2); /* ja out */ 385b52f00e6SAlexei Starovoitov label2 = cnt; 386b52f00e6SAlexei Starovoitov EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 387fe8d9571SAlexei Starovoitov EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ 388b52f00e6SAlexei Starovoitov 3892a36f0b9SWang Nan /* prog = array->ptrs[index]; */ 39084ccac6eSEric Dumazet EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ 3912a36f0b9SWang Nan offsetof(struct bpf_array, ptrs)); 392b52f00e6SAlexei Starovoitov 393a2c7a983SIngo Molnar /* 394a2c7a983SIngo Molnar * if (prog == NULL) 395b52f00e6SAlexei Starovoitov * goto out; 396b52f00e6SAlexei Starovoitov */ 39784ccac6eSEric Dumazet EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ 398a493a87fSDaniel Borkmann #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) 399b52f00e6SAlexei Starovoitov EMIT2(X86_JE, OFFSET3); /* je out */ 400b52f00e6SAlexei Starovoitov label3 = cnt; 401b52f00e6SAlexei Starovoitov 402b52f00e6SAlexei Starovoitov /* goto *(prog->bpf_func + prologue_size); */ 403b52f00e6SAlexei Starovoitov EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */ 404b52f00e6SAlexei Starovoitov offsetof(struct bpf_prog, bpf_func)); 405b52f00e6SAlexei Starovoitov EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ 406b52f00e6SAlexei Starovoitov 407a2c7a983SIngo Molnar /* 408a2c7a983SIngo Molnar * Wow we're ready to jump into next BPF program 409b52f00e6SAlexei Starovoitov * rdi == ctx (1st arg) 410b52f00e6SAlexei Starovoitov * rax == prog->bpf_func + prologue_size 411b52f00e6SAlexei Starovoitov */ 412a493a87fSDaniel Borkmann RETPOLINE_RAX_BPF_JIT(); 413b52f00e6SAlexei Starovoitov 414b52f00e6SAlexei Starovoitov /* out: */ 415b52f00e6SAlexei Starovoitov BUILD_BUG_ON(cnt - label1 != OFFSET1); 416b52f00e6SAlexei Starovoitov BUILD_BUG_ON(cnt - label2 != OFFSET2); 417b52f00e6SAlexei Starovoitov BUILD_BUG_ON(cnt - label3 != OFFSET3); 418b52f00e6SAlexei Starovoitov *pprog = prog; 419b52f00e6SAlexei Starovoitov } 420b52f00e6SAlexei Starovoitov 421428d5df1SDaniel Borkmann static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, 422428d5df1SDaniel Borkmann u8 **pprog, int addr, u8 *image) 423428d5df1SDaniel Borkmann { 424428d5df1SDaniel Borkmann u8 *prog = *pprog; 425428d5df1SDaniel Borkmann int cnt = 0; 426428d5df1SDaniel Borkmann 427428d5df1SDaniel Borkmann /* 428428d5df1SDaniel Borkmann * if (tail_call_cnt > MAX_TAIL_CALL_CNT) 429428d5df1SDaniel Borkmann * goto out; 430428d5df1SDaniel Borkmann */ 431428d5df1SDaniel Borkmann EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ 432428d5df1SDaniel Borkmann EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 433428d5df1SDaniel Borkmann EMIT2(X86_JA, 14); /* ja out */ 434428d5df1SDaniel Borkmann EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 435428d5df1SDaniel Borkmann EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ 436428d5df1SDaniel Borkmann 437428d5df1SDaniel Borkmann poke->ip = image + (addr - X86_PATCH_SIZE); 438428d5df1SDaniel Borkmann poke->adj_off = PROLOGUE_SIZE; 439428d5df1SDaniel Borkmann 440428d5df1SDaniel Borkmann memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); 441428d5df1SDaniel Borkmann prog += X86_PATCH_SIZE; 442428d5df1SDaniel Borkmann /* out: */ 443428d5df1SDaniel Borkmann 444428d5df1SDaniel Borkmann *pprog = prog; 445428d5df1SDaniel Borkmann } 446428d5df1SDaniel Borkmann 447428d5df1SDaniel Borkmann static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) 448428d5df1SDaniel Borkmann { 449428d5df1SDaniel Borkmann struct bpf_jit_poke_descriptor *poke; 450428d5df1SDaniel Borkmann struct bpf_array *array; 451428d5df1SDaniel Borkmann struct bpf_prog *target; 452428d5df1SDaniel Borkmann int i, ret; 453428d5df1SDaniel Borkmann 454428d5df1SDaniel Borkmann for (i = 0; i < prog->aux->size_poke_tab; i++) { 455428d5df1SDaniel Borkmann poke = &prog->aux->poke_tab[i]; 456428d5df1SDaniel Borkmann WARN_ON_ONCE(READ_ONCE(poke->ip_stable)); 457428d5df1SDaniel Borkmann 458428d5df1SDaniel Borkmann if (poke->reason != BPF_POKE_REASON_TAIL_CALL) 459428d5df1SDaniel Borkmann continue; 460428d5df1SDaniel Borkmann 461428d5df1SDaniel Borkmann array = container_of(poke->tail_call.map, struct bpf_array, map); 462428d5df1SDaniel Borkmann mutex_lock(&array->aux->poke_mutex); 463428d5df1SDaniel Borkmann target = array->ptrs[poke->tail_call.key]; 464428d5df1SDaniel Borkmann if (target) { 465428d5df1SDaniel Borkmann /* Plain memcpy is used when image is not live yet 466428d5df1SDaniel Borkmann * and still not locked as read-only. Once poke 467428d5df1SDaniel Borkmann * location is active (poke->ip_stable), any parallel 468428d5df1SDaniel Borkmann * bpf_arch_text_poke() might occur still on the 469428d5df1SDaniel Borkmann * read-write image until we finally locked it as 470428d5df1SDaniel Borkmann * read-only. Both modifications on the given image 471428d5df1SDaniel Borkmann * are under text_mutex to avoid interference. 472428d5df1SDaniel Borkmann */ 473b553a6ecSDaniel Borkmann ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL, 474428d5df1SDaniel Borkmann (u8 *)target->bpf_func + 475428d5df1SDaniel Borkmann poke->adj_off, false); 476428d5df1SDaniel Borkmann BUG_ON(ret < 0); 477428d5df1SDaniel Borkmann } 478428d5df1SDaniel Borkmann WRITE_ONCE(poke->ip_stable, true); 479428d5df1SDaniel Borkmann mutex_unlock(&array->aux->poke_mutex); 480428d5df1SDaniel Borkmann } 481428d5df1SDaniel Borkmann } 482428d5df1SDaniel Borkmann 4836fe8b9c1SDaniel Borkmann static void emit_mov_imm32(u8 **pprog, bool sign_propagate, 4846fe8b9c1SDaniel Borkmann u32 dst_reg, const u32 imm32) 4856fe8b9c1SDaniel Borkmann { 4866fe8b9c1SDaniel Borkmann u8 *prog = *pprog; 4876fe8b9c1SDaniel Borkmann u8 b1, b2, b3; 4886fe8b9c1SDaniel Borkmann int cnt = 0; 4896fe8b9c1SDaniel Borkmann 490a2c7a983SIngo Molnar /* 491a2c7a983SIngo Molnar * Optimization: if imm32 is positive, use 'mov %eax, imm32' 4926fe8b9c1SDaniel Borkmann * (which zero-extends imm32) to save 2 bytes. 4936fe8b9c1SDaniel Borkmann */ 4946fe8b9c1SDaniel Borkmann if (sign_propagate && (s32)imm32 < 0) { 4956fe8b9c1SDaniel Borkmann /* 'mov %rax, imm32' sign extends imm32 */ 4966fe8b9c1SDaniel Borkmann b1 = add_1mod(0x48, dst_reg); 4976fe8b9c1SDaniel Borkmann b2 = 0xC7; 4986fe8b9c1SDaniel Borkmann b3 = 0xC0; 4996fe8b9c1SDaniel Borkmann EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); 5006fe8b9c1SDaniel Borkmann goto done; 5016fe8b9c1SDaniel Borkmann } 5026fe8b9c1SDaniel Borkmann 503a2c7a983SIngo Molnar /* 504a2c7a983SIngo Molnar * Optimization: if imm32 is zero, use 'xor %eax, %eax' 5056fe8b9c1SDaniel Borkmann * to save 3 bytes. 5066fe8b9c1SDaniel Borkmann */ 5076fe8b9c1SDaniel Borkmann if (imm32 == 0) { 5086fe8b9c1SDaniel Borkmann if (is_ereg(dst_reg)) 5096fe8b9c1SDaniel Borkmann EMIT1(add_2mod(0x40, dst_reg, dst_reg)); 5106fe8b9c1SDaniel Borkmann b2 = 0x31; /* xor */ 5116fe8b9c1SDaniel Borkmann b3 = 0xC0; 5126fe8b9c1SDaniel Borkmann EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); 5136fe8b9c1SDaniel Borkmann goto done; 5146fe8b9c1SDaniel Borkmann } 5156fe8b9c1SDaniel Borkmann 5166fe8b9c1SDaniel Borkmann /* mov %eax, imm32 */ 5176fe8b9c1SDaniel Borkmann if (is_ereg(dst_reg)) 5186fe8b9c1SDaniel Borkmann EMIT1(add_1mod(0x40, dst_reg)); 5196fe8b9c1SDaniel Borkmann EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); 5206fe8b9c1SDaniel Borkmann done: 5216fe8b9c1SDaniel Borkmann *pprog = prog; 5226fe8b9c1SDaniel Borkmann } 5236fe8b9c1SDaniel Borkmann 5246fe8b9c1SDaniel Borkmann static void emit_mov_imm64(u8 **pprog, u32 dst_reg, 5256fe8b9c1SDaniel Borkmann const u32 imm32_hi, const u32 imm32_lo) 5266fe8b9c1SDaniel Borkmann { 5276fe8b9c1SDaniel Borkmann u8 *prog = *pprog; 5286fe8b9c1SDaniel Borkmann int cnt = 0; 5296fe8b9c1SDaniel Borkmann 5306fe8b9c1SDaniel Borkmann if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { 531a2c7a983SIngo Molnar /* 532a2c7a983SIngo Molnar * For emitting plain u32, where sign bit must not be 5336fe8b9c1SDaniel Borkmann * propagated LLVM tends to load imm64 over mov32 5346fe8b9c1SDaniel Borkmann * directly, so save couple of bytes by just doing 5356fe8b9c1SDaniel Borkmann * 'mov %eax, imm32' instead. 5366fe8b9c1SDaniel Borkmann */ 5376fe8b9c1SDaniel Borkmann emit_mov_imm32(&prog, false, dst_reg, imm32_lo); 5386fe8b9c1SDaniel Borkmann } else { 5396fe8b9c1SDaniel Borkmann /* movabsq %rax, imm64 */ 5406fe8b9c1SDaniel Borkmann EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); 5416fe8b9c1SDaniel Borkmann EMIT(imm32_lo, 4); 5426fe8b9c1SDaniel Borkmann EMIT(imm32_hi, 4); 5436fe8b9c1SDaniel Borkmann } 5446fe8b9c1SDaniel Borkmann 5456fe8b9c1SDaniel Borkmann *pprog = prog; 5466fe8b9c1SDaniel Borkmann } 5476fe8b9c1SDaniel Borkmann 5484c38e2f3SDaniel Borkmann static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) 5494c38e2f3SDaniel Borkmann { 5504c38e2f3SDaniel Borkmann u8 *prog = *pprog; 5514c38e2f3SDaniel Borkmann int cnt = 0; 5524c38e2f3SDaniel Borkmann 5534c38e2f3SDaniel Borkmann if (is64) { 5544c38e2f3SDaniel Borkmann /* mov dst, src */ 5554c38e2f3SDaniel Borkmann EMIT_mov(dst_reg, src_reg); 5564c38e2f3SDaniel Borkmann } else { 5574c38e2f3SDaniel Borkmann /* mov32 dst, src */ 5584c38e2f3SDaniel Borkmann if (is_ereg(dst_reg) || is_ereg(src_reg)) 5594c38e2f3SDaniel Borkmann EMIT1(add_2mod(0x40, dst_reg, src_reg)); 5604c38e2f3SDaniel Borkmann EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); 5614c38e2f3SDaniel Borkmann } 5624c38e2f3SDaniel Borkmann 5634c38e2f3SDaniel Borkmann *pprog = prog; 5644c38e2f3SDaniel Borkmann } 5654c38e2f3SDaniel Borkmann 5663b2744e6SAlexei Starovoitov /* LDX: dst_reg = *(u8*)(src_reg + off) */ 5673b2744e6SAlexei Starovoitov static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 5683b2744e6SAlexei Starovoitov { 5693b2744e6SAlexei Starovoitov u8 *prog = *pprog; 5703b2744e6SAlexei Starovoitov int cnt = 0; 5713b2744e6SAlexei Starovoitov 5723b2744e6SAlexei Starovoitov switch (size) { 5733b2744e6SAlexei Starovoitov case BPF_B: 5743b2744e6SAlexei Starovoitov /* Emit 'movzx rax, byte ptr [rax + off]' */ 5753b2744e6SAlexei Starovoitov EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 5763b2744e6SAlexei Starovoitov break; 5773b2744e6SAlexei Starovoitov case BPF_H: 5783b2744e6SAlexei Starovoitov /* Emit 'movzx rax, word ptr [rax + off]' */ 5793b2744e6SAlexei Starovoitov EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 5803b2744e6SAlexei Starovoitov break; 5813b2744e6SAlexei Starovoitov case BPF_W: 5823b2744e6SAlexei Starovoitov /* Emit 'mov eax, dword ptr [rax+0x14]' */ 5833b2744e6SAlexei Starovoitov if (is_ereg(dst_reg) || is_ereg(src_reg)) 5843b2744e6SAlexei Starovoitov EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 5853b2744e6SAlexei Starovoitov else 5863b2744e6SAlexei Starovoitov EMIT1(0x8B); 5873b2744e6SAlexei Starovoitov break; 5883b2744e6SAlexei Starovoitov case BPF_DW: 5893b2744e6SAlexei Starovoitov /* Emit 'mov rax, qword ptr [rax+0x14]' */ 5903b2744e6SAlexei Starovoitov EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 5913b2744e6SAlexei Starovoitov break; 5923b2744e6SAlexei Starovoitov } 5933b2744e6SAlexei Starovoitov /* 5943b2744e6SAlexei Starovoitov * If insn->off == 0 we can save one extra byte, but 5953b2744e6SAlexei Starovoitov * special case of x86 R13 which always needs an offset 5963b2744e6SAlexei Starovoitov * is not worth the hassle 5973b2744e6SAlexei Starovoitov */ 5983b2744e6SAlexei Starovoitov if (is_imm8(off)) 5993b2744e6SAlexei Starovoitov EMIT2(add_2reg(0x40, src_reg, dst_reg), off); 6003b2744e6SAlexei Starovoitov else 6013b2744e6SAlexei Starovoitov EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off); 6023b2744e6SAlexei Starovoitov *pprog = prog; 6033b2744e6SAlexei Starovoitov } 6043b2744e6SAlexei Starovoitov 6053b2744e6SAlexei Starovoitov /* STX: *(u8*)(dst_reg + off) = src_reg */ 6063b2744e6SAlexei Starovoitov static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 6073b2744e6SAlexei Starovoitov { 6083b2744e6SAlexei Starovoitov u8 *prog = *pprog; 6093b2744e6SAlexei Starovoitov int cnt = 0; 6103b2744e6SAlexei Starovoitov 6113b2744e6SAlexei Starovoitov switch (size) { 6123b2744e6SAlexei Starovoitov case BPF_B: 6133b2744e6SAlexei Starovoitov /* Emit 'mov byte ptr [rax + off], al' */ 614aee194b1SLuke Nelson if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) 615aee194b1SLuke Nelson /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ 6163b2744e6SAlexei Starovoitov EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 6173b2744e6SAlexei Starovoitov else 6183b2744e6SAlexei Starovoitov EMIT1(0x88); 6193b2744e6SAlexei Starovoitov break; 6203b2744e6SAlexei Starovoitov case BPF_H: 6213b2744e6SAlexei Starovoitov if (is_ereg(dst_reg) || is_ereg(src_reg)) 6223b2744e6SAlexei Starovoitov EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 6233b2744e6SAlexei Starovoitov else 6243b2744e6SAlexei Starovoitov EMIT2(0x66, 0x89); 6253b2744e6SAlexei Starovoitov break; 6263b2744e6SAlexei Starovoitov case BPF_W: 6273b2744e6SAlexei Starovoitov if (is_ereg(dst_reg) || is_ereg(src_reg)) 6283b2744e6SAlexei Starovoitov EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 6293b2744e6SAlexei Starovoitov else 6303b2744e6SAlexei Starovoitov EMIT1(0x89); 6313b2744e6SAlexei Starovoitov break; 6323b2744e6SAlexei Starovoitov case BPF_DW: 6333b2744e6SAlexei Starovoitov EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 6343b2744e6SAlexei Starovoitov break; 6353b2744e6SAlexei Starovoitov } 6363b2744e6SAlexei Starovoitov if (is_imm8(off)) 6373b2744e6SAlexei Starovoitov EMIT2(add_2reg(0x40, dst_reg, src_reg), off); 6383b2744e6SAlexei Starovoitov else 6393b2744e6SAlexei Starovoitov EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off); 6403b2744e6SAlexei Starovoitov *pprog = prog; 6413b2744e6SAlexei Starovoitov } 6423b2744e6SAlexei Starovoitov 6433dec541bSAlexei Starovoitov static bool ex_handler_bpf(const struct exception_table_entry *x, 6443dec541bSAlexei Starovoitov struct pt_regs *regs, int trapnr, 6453dec541bSAlexei Starovoitov unsigned long error_code, unsigned long fault_addr) 6463dec541bSAlexei Starovoitov { 6473dec541bSAlexei Starovoitov u32 reg = x->fixup >> 8; 6483dec541bSAlexei Starovoitov 6493dec541bSAlexei Starovoitov /* jump over faulting load and clear dest register */ 6503dec541bSAlexei Starovoitov *(unsigned long *)((void *)regs + reg) = 0; 6513dec541bSAlexei Starovoitov regs->ip += x->fixup & 0xff; 6523dec541bSAlexei Starovoitov return true; 6533dec541bSAlexei Starovoitov } 6543dec541bSAlexei Starovoitov 655b52f00e6SAlexei Starovoitov static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, 656b52f00e6SAlexei Starovoitov int oldproglen, struct jit_context *ctx) 657b52f00e6SAlexei Starovoitov { 658b52f00e6SAlexei Starovoitov struct bpf_insn *insn = bpf_prog->insnsi; 659b52f00e6SAlexei Starovoitov int insn_cnt = bpf_prog->len; 660b52f00e6SAlexei Starovoitov bool seen_exit = false; 661b52f00e6SAlexei Starovoitov u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; 6623dec541bSAlexei Starovoitov int i, cnt = 0, excnt = 0; 663b52f00e6SAlexei Starovoitov int proglen = 0; 664b52f00e6SAlexei Starovoitov u8 *prog = temp; 665b52f00e6SAlexei Starovoitov 66608691752SDaniel Borkmann emit_prologue(&prog, bpf_prog->aux->stack_depth, 66708691752SDaniel Borkmann bpf_prog_was_classic(bpf_prog)); 6687c2e988fSAlexei Starovoitov addrs[0] = prog - temp; 669b52f00e6SAlexei Starovoitov 6707c2e988fSAlexei Starovoitov for (i = 1; i <= insn_cnt; i++, insn++) { 671e430f34eSAlexei Starovoitov const s32 imm32 = insn->imm; 672e430f34eSAlexei Starovoitov u32 dst_reg = insn->dst_reg; 673e430f34eSAlexei Starovoitov u32 src_reg = insn->src_reg; 6746fe8b9c1SDaniel Borkmann u8 b2 = 0, b3 = 0; 67562258278SAlexei Starovoitov s64 jmp_offset; 67662258278SAlexei Starovoitov u8 jmp_cond; 67762258278SAlexei Starovoitov int ilen; 67862258278SAlexei Starovoitov u8 *func; 67962258278SAlexei Starovoitov 68062258278SAlexei Starovoitov switch (insn->code) { 68162258278SAlexei Starovoitov /* ALU */ 68262258278SAlexei Starovoitov case BPF_ALU | BPF_ADD | BPF_X: 68362258278SAlexei Starovoitov case BPF_ALU | BPF_SUB | BPF_X: 68462258278SAlexei Starovoitov case BPF_ALU | BPF_AND | BPF_X: 68562258278SAlexei Starovoitov case BPF_ALU | BPF_OR | BPF_X: 68662258278SAlexei Starovoitov case BPF_ALU | BPF_XOR | BPF_X: 68762258278SAlexei Starovoitov case BPF_ALU64 | BPF_ADD | BPF_X: 68862258278SAlexei Starovoitov case BPF_ALU64 | BPF_SUB | BPF_X: 68962258278SAlexei Starovoitov case BPF_ALU64 | BPF_AND | BPF_X: 69062258278SAlexei Starovoitov case BPF_ALU64 | BPF_OR | BPF_X: 69162258278SAlexei Starovoitov case BPF_ALU64 | BPF_XOR | BPF_X: 69262258278SAlexei Starovoitov switch (BPF_OP(insn->code)) { 69362258278SAlexei Starovoitov case BPF_ADD: b2 = 0x01; break; 69462258278SAlexei Starovoitov case BPF_SUB: b2 = 0x29; break; 69562258278SAlexei Starovoitov case BPF_AND: b2 = 0x21; break; 69662258278SAlexei Starovoitov case BPF_OR: b2 = 0x09; break; 69762258278SAlexei Starovoitov case BPF_XOR: b2 = 0x31; break; 69862258278SAlexei Starovoitov } 69962258278SAlexei Starovoitov if (BPF_CLASS(insn->code) == BPF_ALU64) 700e430f34eSAlexei Starovoitov EMIT1(add_2mod(0x48, dst_reg, src_reg)); 701e430f34eSAlexei Starovoitov else if (is_ereg(dst_reg) || is_ereg(src_reg)) 702e430f34eSAlexei Starovoitov EMIT1(add_2mod(0x40, dst_reg, src_reg)); 703e430f34eSAlexei Starovoitov EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); 7040a14842fSEric Dumazet break; 70562258278SAlexei Starovoitov 70662258278SAlexei Starovoitov case BPF_ALU64 | BPF_MOV | BPF_X: 70762258278SAlexei Starovoitov case BPF_ALU | BPF_MOV | BPF_X: 7084c38e2f3SDaniel Borkmann emit_mov_reg(&prog, 7094c38e2f3SDaniel Borkmann BPF_CLASS(insn->code) == BPF_ALU64, 7104c38e2f3SDaniel Borkmann dst_reg, src_reg); 71162258278SAlexei Starovoitov break; 71262258278SAlexei Starovoitov 713e430f34eSAlexei Starovoitov /* neg dst */ 71462258278SAlexei Starovoitov case BPF_ALU | BPF_NEG: 71562258278SAlexei Starovoitov case BPF_ALU64 | BPF_NEG: 71662258278SAlexei Starovoitov if (BPF_CLASS(insn->code) == BPF_ALU64) 717e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x48, dst_reg)); 718e430f34eSAlexei Starovoitov else if (is_ereg(dst_reg)) 719e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x40, dst_reg)); 720e430f34eSAlexei Starovoitov EMIT2(0xF7, add_1reg(0xD8, dst_reg)); 72162258278SAlexei Starovoitov break; 72262258278SAlexei Starovoitov 72362258278SAlexei Starovoitov case BPF_ALU | BPF_ADD | BPF_K: 72462258278SAlexei Starovoitov case BPF_ALU | BPF_SUB | BPF_K: 72562258278SAlexei Starovoitov case BPF_ALU | BPF_AND | BPF_K: 72662258278SAlexei Starovoitov case BPF_ALU | BPF_OR | BPF_K: 72762258278SAlexei Starovoitov case BPF_ALU | BPF_XOR | BPF_K: 72862258278SAlexei Starovoitov case BPF_ALU64 | BPF_ADD | BPF_K: 72962258278SAlexei Starovoitov case BPF_ALU64 | BPF_SUB | BPF_K: 73062258278SAlexei Starovoitov case BPF_ALU64 | BPF_AND | BPF_K: 73162258278SAlexei Starovoitov case BPF_ALU64 | BPF_OR | BPF_K: 73262258278SAlexei Starovoitov case BPF_ALU64 | BPF_XOR | BPF_K: 73362258278SAlexei Starovoitov if (BPF_CLASS(insn->code) == BPF_ALU64) 734e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x48, dst_reg)); 735e430f34eSAlexei Starovoitov else if (is_ereg(dst_reg)) 736e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x40, dst_reg)); 73762258278SAlexei Starovoitov 738a2c7a983SIngo Molnar /* 739a2c7a983SIngo Molnar * b3 holds 'normal' opcode, b2 short form only valid 740de0a444dSDaniel Borkmann * in case dst is eax/rax. 741de0a444dSDaniel Borkmann */ 74262258278SAlexei Starovoitov switch (BPF_OP(insn->code)) { 743de0a444dSDaniel Borkmann case BPF_ADD: 744de0a444dSDaniel Borkmann b3 = 0xC0; 745de0a444dSDaniel Borkmann b2 = 0x05; 746de0a444dSDaniel Borkmann break; 747de0a444dSDaniel Borkmann case BPF_SUB: 748de0a444dSDaniel Borkmann b3 = 0xE8; 749de0a444dSDaniel Borkmann b2 = 0x2D; 750de0a444dSDaniel Borkmann break; 751de0a444dSDaniel Borkmann case BPF_AND: 752de0a444dSDaniel Borkmann b3 = 0xE0; 753de0a444dSDaniel Borkmann b2 = 0x25; 754de0a444dSDaniel Borkmann break; 755de0a444dSDaniel Borkmann case BPF_OR: 756de0a444dSDaniel Borkmann b3 = 0xC8; 757de0a444dSDaniel Borkmann b2 = 0x0D; 758de0a444dSDaniel Borkmann break; 759de0a444dSDaniel Borkmann case BPF_XOR: 760de0a444dSDaniel Borkmann b3 = 0xF0; 761de0a444dSDaniel Borkmann b2 = 0x35; 762de0a444dSDaniel Borkmann break; 76362258278SAlexei Starovoitov } 76462258278SAlexei Starovoitov 765e430f34eSAlexei Starovoitov if (is_imm8(imm32)) 766e430f34eSAlexei Starovoitov EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 767de0a444dSDaniel Borkmann else if (is_axreg(dst_reg)) 768de0a444dSDaniel Borkmann EMIT1_off32(b2, imm32); 76962258278SAlexei Starovoitov else 770e430f34eSAlexei Starovoitov EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 77162258278SAlexei Starovoitov break; 77262258278SAlexei Starovoitov 77362258278SAlexei Starovoitov case BPF_ALU64 | BPF_MOV | BPF_K: 77462258278SAlexei Starovoitov case BPF_ALU | BPF_MOV | BPF_K: 7756fe8b9c1SDaniel Borkmann emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, 7766fe8b9c1SDaniel Borkmann dst_reg, imm32); 77762258278SAlexei Starovoitov break; 77862258278SAlexei Starovoitov 77902ab695bSAlexei Starovoitov case BPF_LD | BPF_IMM | BPF_DW: 7806fe8b9c1SDaniel Borkmann emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); 78102ab695bSAlexei Starovoitov insn++; 78202ab695bSAlexei Starovoitov i++; 78302ab695bSAlexei Starovoitov break; 78402ab695bSAlexei Starovoitov 785e430f34eSAlexei Starovoitov /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ 78662258278SAlexei Starovoitov case BPF_ALU | BPF_MOD | BPF_X: 78762258278SAlexei Starovoitov case BPF_ALU | BPF_DIV | BPF_X: 78862258278SAlexei Starovoitov case BPF_ALU | BPF_MOD | BPF_K: 78962258278SAlexei Starovoitov case BPF_ALU | BPF_DIV | BPF_K: 79062258278SAlexei Starovoitov case BPF_ALU64 | BPF_MOD | BPF_X: 79162258278SAlexei Starovoitov case BPF_ALU64 | BPF_DIV | BPF_X: 79262258278SAlexei Starovoitov case BPF_ALU64 | BPF_MOD | BPF_K: 79362258278SAlexei Starovoitov case BPF_ALU64 | BPF_DIV | BPF_K: 79462258278SAlexei Starovoitov EMIT1(0x50); /* push rax */ 79562258278SAlexei Starovoitov EMIT1(0x52); /* push rdx */ 79662258278SAlexei Starovoitov 79762258278SAlexei Starovoitov if (BPF_SRC(insn->code) == BPF_X) 798e430f34eSAlexei Starovoitov /* mov r11, src_reg */ 799e430f34eSAlexei Starovoitov EMIT_mov(AUX_REG, src_reg); 80062258278SAlexei Starovoitov else 801e430f34eSAlexei Starovoitov /* mov r11, imm32 */ 802e430f34eSAlexei Starovoitov EMIT3_off32(0x49, 0xC7, 0xC3, imm32); 80362258278SAlexei Starovoitov 804e430f34eSAlexei Starovoitov /* mov rax, dst_reg */ 805e430f34eSAlexei Starovoitov EMIT_mov(BPF_REG_0, dst_reg); 80662258278SAlexei Starovoitov 807a2c7a983SIngo Molnar /* 808a2c7a983SIngo Molnar * xor edx, edx 80962258278SAlexei Starovoitov * equivalent to 'xor rdx, rdx', but one byte less 81062258278SAlexei Starovoitov */ 81162258278SAlexei Starovoitov EMIT2(0x31, 0xd2); 81262258278SAlexei Starovoitov 81362258278SAlexei Starovoitov if (BPF_CLASS(insn->code) == BPF_ALU64) 81462258278SAlexei Starovoitov /* div r11 */ 81562258278SAlexei Starovoitov EMIT3(0x49, 0xF7, 0xF3); 81662258278SAlexei Starovoitov else 81762258278SAlexei Starovoitov /* div r11d */ 81862258278SAlexei Starovoitov EMIT3(0x41, 0xF7, 0xF3); 81962258278SAlexei Starovoitov 82062258278SAlexei Starovoitov if (BPF_OP(insn->code) == BPF_MOD) 82162258278SAlexei Starovoitov /* mov r11, rdx */ 82262258278SAlexei Starovoitov EMIT3(0x49, 0x89, 0xD3); 82362258278SAlexei Starovoitov else 82462258278SAlexei Starovoitov /* mov r11, rax */ 82562258278SAlexei Starovoitov EMIT3(0x49, 0x89, 0xC3); 82662258278SAlexei Starovoitov 82762258278SAlexei Starovoitov EMIT1(0x5A); /* pop rdx */ 82862258278SAlexei Starovoitov EMIT1(0x58); /* pop rax */ 82962258278SAlexei Starovoitov 830e430f34eSAlexei Starovoitov /* mov dst_reg, r11 */ 831e430f34eSAlexei Starovoitov EMIT_mov(dst_reg, AUX_REG); 83262258278SAlexei Starovoitov break; 83362258278SAlexei Starovoitov 83462258278SAlexei Starovoitov case BPF_ALU | BPF_MUL | BPF_K: 83562258278SAlexei Starovoitov case BPF_ALU | BPF_MUL | BPF_X: 83662258278SAlexei Starovoitov case BPF_ALU64 | BPF_MUL | BPF_K: 83762258278SAlexei Starovoitov case BPF_ALU64 | BPF_MUL | BPF_X: 8384c38e2f3SDaniel Borkmann { 8394c38e2f3SDaniel Borkmann bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 8404c38e2f3SDaniel Borkmann 841d806a0cfSDaniel Borkmann if (dst_reg != BPF_REG_0) 84262258278SAlexei Starovoitov EMIT1(0x50); /* push rax */ 843d806a0cfSDaniel Borkmann if (dst_reg != BPF_REG_3) 84462258278SAlexei Starovoitov EMIT1(0x52); /* push rdx */ 84562258278SAlexei Starovoitov 846e430f34eSAlexei Starovoitov /* mov r11, dst_reg */ 847e430f34eSAlexei Starovoitov EMIT_mov(AUX_REG, dst_reg); 84862258278SAlexei Starovoitov 84962258278SAlexei Starovoitov if (BPF_SRC(insn->code) == BPF_X) 8504c38e2f3SDaniel Borkmann emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); 85162258278SAlexei Starovoitov else 8524c38e2f3SDaniel Borkmann emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); 85362258278SAlexei Starovoitov 8544c38e2f3SDaniel Borkmann if (is64) 85562258278SAlexei Starovoitov EMIT1(add_1mod(0x48, AUX_REG)); 85662258278SAlexei Starovoitov else if (is_ereg(AUX_REG)) 85762258278SAlexei Starovoitov EMIT1(add_1mod(0x40, AUX_REG)); 85862258278SAlexei Starovoitov /* mul(q) r11 */ 85962258278SAlexei Starovoitov EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); 86062258278SAlexei Starovoitov 861d806a0cfSDaniel Borkmann if (dst_reg != BPF_REG_3) 86262258278SAlexei Starovoitov EMIT1(0x5A); /* pop rdx */ 863d806a0cfSDaniel Borkmann if (dst_reg != BPF_REG_0) { 864d806a0cfSDaniel Borkmann /* mov dst_reg, rax */ 865d806a0cfSDaniel Borkmann EMIT_mov(dst_reg, BPF_REG_0); 86662258278SAlexei Starovoitov EMIT1(0x58); /* pop rax */ 867d806a0cfSDaniel Borkmann } 86862258278SAlexei Starovoitov break; 8694c38e2f3SDaniel Borkmann } 870a2c7a983SIngo Molnar /* Shifts */ 87162258278SAlexei Starovoitov case BPF_ALU | BPF_LSH | BPF_K: 87262258278SAlexei Starovoitov case BPF_ALU | BPF_RSH | BPF_K: 87362258278SAlexei Starovoitov case BPF_ALU | BPF_ARSH | BPF_K: 87462258278SAlexei Starovoitov case BPF_ALU64 | BPF_LSH | BPF_K: 87562258278SAlexei Starovoitov case BPF_ALU64 | BPF_RSH | BPF_K: 87662258278SAlexei Starovoitov case BPF_ALU64 | BPF_ARSH | BPF_K: 87762258278SAlexei Starovoitov if (BPF_CLASS(insn->code) == BPF_ALU64) 878e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x48, dst_reg)); 879e430f34eSAlexei Starovoitov else if (is_ereg(dst_reg)) 880e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x40, dst_reg)); 88162258278SAlexei Starovoitov 88262258278SAlexei Starovoitov switch (BPF_OP(insn->code)) { 88362258278SAlexei Starovoitov case BPF_LSH: b3 = 0xE0; break; 88462258278SAlexei Starovoitov case BPF_RSH: b3 = 0xE8; break; 88562258278SAlexei Starovoitov case BPF_ARSH: b3 = 0xF8; break; 88662258278SAlexei Starovoitov } 88788e69a1fSDaniel Borkmann 88888e69a1fSDaniel Borkmann if (imm32 == 1) 88988e69a1fSDaniel Borkmann EMIT2(0xD1, add_1reg(b3, dst_reg)); 89088e69a1fSDaniel Borkmann else 891e430f34eSAlexei Starovoitov EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); 89262258278SAlexei Starovoitov break; 89362258278SAlexei Starovoitov 89472b603eeSAlexei Starovoitov case BPF_ALU | BPF_LSH | BPF_X: 89572b603eeSAlexei Starovoitov case BPF_ALU | BPF_RSH | BPF_X: 89672b603eeSAlexei Starovoitov case BPF_ALU | BPF_ARSH | BPF_X: 89772b603eeSAlexei Starovoitov case BPF_ALU64 | BPF_LSH | BPF_X: 89872b603eeSAlexei Starovoitov case BPF_ALU64 | BPF_RSH | BPF_X: 89972b603eeSAlexei Starovoitov case BPF_ALU64 | BPF_ARSH | BPF_X: 90072b603eeSAlexei Starovoitov 901a2c7a983SIngo Molnar /* Check for bad case when dst_reg == rcx */ 90272b603eeSAlexei Starovoitov if (dst_reg == BPF_REG_4) { 90372b603eeSAlexei Starovoitov /* mov r11, dst_reg */ 90472b603eeSAlexei Starovoitov EMIT_mov(AUX_REG, dst_reg); 90572b603eeSAlexei Starovoitov dst_reg = AUX_REG; 90672b603eeSAlexei Starovoitov } 90772b603eeSAlexei Starovoitov 90872b603eeSAlexei Starovoitov if (src_reg != BPF_REG_4) { /* common case */ 90972b603eeSAlexei Starovoitov EMIT1(0x51); /* push rcx */ 91072b603eeSAlexei Starovoitov 91172b603eeSAlexei Starovoitov /* mov rcx, src_reg */ 91272b603eeSAlexei Starovoitov EMIT_mov(BPF_REG_4, src_reg); 91372b603eeSAlexei Starovoitov } 91472b603eeSAlexei Starovoitov 91572b603eeSAlexei Starovoitov /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ 91672b603eeSAlexei Starovoitov if (BPF_CLASS(insn->code) == BPF_ALU64) 91772b603eeSAlexei Starovoitov EMIT1(add_1mod(0x48, dst_reg)); 91872b603eeSAlexei Starovoitov else if (is_ereg(dst_reg)) 91972b603eeSAlexei Starovoitov EMIT1(add_1mod(0x40, dst_reg)); 92072b603eeSAlexei Starovoitov 92172b603eeSAlexei Starovoitov switch (BPF_OP(insn->code)) { 92272b603eeSAlexei Starovoitov case BPF_LSH: b3 = 0xE0; break; 92372b603eeSAlexei Starovoitov case BPF_RSH: b3 = 0xE8; break; 92472b603eeSAlexei Starovoitov case BPF_ARSH: b3 = 0xF8; break; 92572b603eeSAlexei Starovoitov } 92672b603eeSAlexei Starovoitov EMIT2(0xD3, add_1reg(b3, dst_reg)); 92772b603eeSAlexei Starovoitov 92872b603eeSAlexei Starovoitov if (src_reg != BPF_REG_4) 92972b603eeSAlexei Starovoitov EMIT1(0x59); /* pop rcx */ 93072b603eeSAlexei Starovoitov 93172b603eeSAlexei Starovoitov if (insn->dst_reg == BPF_REG_4) 93272b603eeSAlexei Starovoitov /* mov dst_reg, r11 */ 93372b603eeSAlexei Starovoitov EMIT_mov(insn->dst_reg, AUX_REG); 93472b603eeSAlexei Starovoitov break; 93572b603eeSAlexei Starovoitov 93662258278SAlexei Starovoitov case BPF_ALU | BPF_END | BPF_FROM_BE: 937e430f34eSAlexei Starovoitov switch (imm32) { 93862258278SAlexei Starovoitov case 16: 939a2c7a983SIngo Molnar /* Emit 'ror %ax, 8' to swap lower 2 bytes */ 94062258278SAlexei Starovoitov EMIT1(0x66); 941e430f34eSAlexei Starovoitov if (is_ereg(dst_reg)) 94262258278SAlexei Starovoitov EMIT1(0x41); 943e430f34eSAlexei Starovoitov EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); 944343f845bSAlexei Starovoitov 945a2c7a983SIngo Molnar /* Emit 'movzwl eax, ax' */ 946343f845bSAlexei Starovoitov if (is_ereg(dst_reg)) 947343f845bSAlexei Starovoitov EMIT3(0x45, 0x0F, 0xB7); 948343f845bSAlexei Starovoitov else 949343f845bSAlexei Starovoitov EMIT2(0x0F, 0xB7); 950343f845bSAlexei Starovoitov EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 95162258278SAlexei Starovoitov break; 95262258278SAlexei Starovoitov case 32: 953a2c7a983SIngo Molnar /* Emit 'bswap eax' to swap lower 4 bytes */ 954e430f34eSAlexei Starovoitov if (is_ereg(dst_reg)) 95562258278SAlexei Starovoitov EMIT2(0x41, 0x0F); 95662258278SAlexei Starovoitov else 95762258278SAlexei Starovoitov EMIT1(0x0F); 958e430f34eSAlexei Starovoitov EMIT1(add_1reg(0xC8, dst_reg)); 95962258278SAlexei Starovoitov break; 96062258278SAlexei Starovoitov case 64: 961a2c7a983SIngo Molnar /* Emit 'bswap rax' to swap 8 bytes */ 962e430f34eSAlexei Starovoitov EMIT3(add_1mod(0x48, dst_reg), 0x0F, 963e430f34eSAlexei Starovoitov add_1reg(0xC8, dst_reg)); 96462258278SAlexei Starovoitov break; 96562258278SAlexei Starovoitov } 96662258278SAlexei Starovoitov break; 96762258278SAlexei Starovoitov 96862258278SAlexei Starovoitov case BPF_ALU | BPF_END | BPF_FROM_LE: 969343f845bSAlexei Starovoitov switch (imm32) { 970343f845bSAlexei Starovoitov case 16: 971a2c7a983SIngo Molnar /* 972a2c7a983SIngo Molnar * Emit 'movzwl eax, ax' to zero extend 16-bit 973343f845bSAlexei Starovoitov * into 64 bit 974343f845bSAlexei Starovoitov */ 975343f845bSAlexei Starovoitov if (is_ereg(dst_reg)) 976343f845bSAlexei Starovoitov EMIT3(0x45, 0x0F, 0xB7); 977343f845bSAlexei Starovoitov else 978343f845bSAlexei Starovoitov EMIT2(0x0F, 0xB7); 979343f845bSAlexei Starovoitov EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 980343f845bSAlexei Starovoitov break; 981343f845bSAlexei Starovoitov case 32: 982a2c7a983SIngo Molnar /* Emit 'mov eax, eax' to clear upper 32-bits */ 983343f845bSAlexei Starovoitov if (is_ereg(dst_reg)) 984343f845bSAlexei Starovoitov EMIT1(0x45); 985343f845bSAlexei Starovoitov EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); 986343f845bSAlexei Starovoitov break; 987343f845bSAlexei Starovoitov case 64: 988343f845bSAlexei Starovoitov /* nop */ 989343f845bSAlexei Starovoitov break; 990343f845bSAlexei Starovoitov } 99162258278SAlexei Starovoitov break; 99262258278SAlexei Starovoitov 993e430f34eSAlexei Starovoitov /* ST: *(u8*)(dst_reg + off) = imm */ 99462258278SAlexei Starovoitov case BPF_ST | BPF_MEM | BPF_B: 995e430f34eSAlexei Starovoitov if (is_ereg(dst_reg)) 99662258278SAlexei Starovoitov EMIT2(0x41, 0xC6); 99762258278SAlexei Starovoitov else 99862258278SAlexei Starovoitov EMIT1(0xC6); 99962258278SAlexei Starovoitov goto st; 100062258278SAlexei Starovoitov case BPF_ST | BPF_MEM | BPF_H: 1001e430f34eSAlexei Starovoitov if (is_ereg(dst_reg)) 100262258278SAlexei Starovoitov EMIT3(0x66, 0x41, 0xC7); 100362258278SAlexei Starovoitov else 100462258278SAlexei Starovoitov EMIT2(0x66, 0xC7); 100562258278SAlexei Starovoitov goto st; 100662258278SAlexei Starovoitov case BPF_ST | BPF_MEM | BPF_W: 1007e430f34eSAlexei Starovoitov if (is_ereg(dst_reg)) 100862258278SAlexei Starovoitov EMIT2(0x41, 0xC7); 100962258278SAlexei Starovoitov else 101062258278SAlexei Starovoitov EMIT1(0xC7); 101162258278SAlexei Starovoitov goto st; 101262258278SAlexei Starovoitov case BPF_ST | BPF_MEM | BPF_DW: 1013e430f34eSAlexei Starovoitov EMIT2(add_1mod(0x48, dst_reg), 0xC7); 101462258278SAlexei Starovoitov 101562258278SAlexei Starovoitov st: if (is_imm8(insn->off)) 1016e430f34eSAlexei Starovoitov EMIT2(add_1reg(0x40, dst_reg), insn->off); 101762258278SAlexei Starovoitov else 1018e430f34eSAlexei Starovoitov EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); 101962258278SAlexei Starovoitov 1020e430f34eSAlexei Starovoitov EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); 102162258278SAlexei Starovoitov break; 102262258278SAlexei Starovoitov 1023e430f34eSAlexei Starovoitov /* STX: *(u8*)(dst_reg + off) = src_reg */ 102462258278SAlexei Starovoitov case BPF_STX | BPF_MEM | BPF_B: 102562258278SAlexei Starovoitov case BPF_STX | BPF_MEM | BPF_H: 102662258278SAlexei Starovoitov case BPF_STX | BPF_MEM | BPF_W: 102762258278SAlexei Starovoitov case BPF_STX | BPF_MEM | BPF_DW: 10283b2744e6SAlexei Starovoitov emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 102962258278SAlexei Starovoitov break; 103062258278SAlexei Starovoitov 1031e430f34eSAlexei Starovoitov /* LDX: dst_reg = *(u8*)(src_reg + off) */ 103262258278SAlexei Starovoitov case BPF_LDX | BPF_MEM | BPF_B: 10333dec541bSAlexei Starovoitov case BPF_LDX | BPF_PROBE_MEM | BPF_B: 103462258278SAlexei Starovoitov case BPF_LDX | BPF_MEM | BPF_H: 10353dec541bSAlexei Starovoitov case BPF_LDX | BPF_PROBE_MEM | BPF_H: 103662258278SAlexei Starovoitov case BPF_LDX | BPF_MEM | BPF_W: 10373dec541bSAlexei Starovoitov case BPF_LDX | BPF_PROBE_MEM | BPF_W: 103862258278SAlexei Starovoitov case BPF_LDX | BPF_MEM | BPF_DW: 10393dec541bSAlexei Starovoitov case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 10403b2744e6SAlexei Starovoitov emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 10413dec541bSAlexei Starovoitov if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { 10423dec541bSAlexei Starovoitov struct exception_table_entry *ex; 10433dec541bSAlexei Starovoitov u8 *_insn = image + proglen; 10443dec541bSAlexei Starovoitov s64 delta; 10453dec541bSAlexei Starovoitov 10463dec541bSAlexei Starovoitov if (!bpf_prog->aux->extable) 10473dec541bSAlexei Starovoitov break; 10483dec541bSAlexei Starovoitov 10493dec541bSAlexei Starovoitov if (excnt >= bpf_prog->aux->num_exentries) { 10503dec541bSAlexei Starovoitov pr_err("ex gen bug\n"); 10513dec541bSAlexei Starovoitov return -EFAULT; 10523dec541bSAlexei Starovoitov } 10533dec541bSAlexei Starovoitov ex = &bpf_prog->aux->extable[excnt++]; 10543dec541bSAlexei Starovoitov 10553dec541bSAlexei Starovoitov delta = _insn - (u8 *)&ex->insn; 10563dec541bSAlexei Starovoitov if (!is_simm32(delta)) { 10573dec541bSAlexei Starovoitov pr_err("extable->insn doesn't fit into 32-bit\n"); 10583dec541bSAlexei Starovoitov return -EFAULT; 10593dec541bSAlexei Starovoitov } 10603dec541bSAlexei Starovoitov ex->insn = delta; 10613dec541bSAlexei Starovoitov 10623dec541bSAlexei Starovoitov delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; 10633dec541bSAlexei Starovoitov if (!is_simm32(delta)) { 10643dec541bSAlexei Starovoitov pr_err("extable->handler doesn't fit into 32-bit\n"); 10653dec541bSAlexei Starovoitov return -EFAULT; 10663dec541bSAlexei Starovoitov } 10673dec541bSAlexei Starovoitov ex->handler = delta; 10683dec541bSAlexei Starovoitov 10693dec541bSAlexei Starovoitov if (dst_reg > BPF_REG_9) { 10703dec541bSAlexei Starovoitov pr_err("verifier error\n"); 10713dec541bSAlexei Starovoitov return -EFAULT; 10723dec541bSAlexei Starovoitov } 10733dec541bSAlexei Starovoitov /* 10743dec541bSAlexei Starovoitov * Compute size of x86 insn and its target dest x86 register. 10753dec541bSAlexei Starovoitov * ex_handler_bpf() will use lower 8 bits to adjust 10763dec541bSAlexei Starovoitov * pt_regs->ip to jump over this x86 instruction 10773dec541bSAlexei Starovoitov * and upper bits to figure out which pt_regs to zero out. 10783dec541bSAlexei Starovoitov * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" 10793dec541bSAlexei Starovoitov * of 4 bytes will be ignored and rbx will be zero inited. 10803dec541bSAlexei Starovoitov */ 10813dec541bSAlexei Starovoitov ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); 10823dec541bSAlexei Starovoitov } 108362258278SAlexei Starovoitov break; 108462258278SAlexei Starovoitov 1085e430f34eSAlexei Starovoitov /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ 108662258278SAlexei Starovoitov case BPF_STX | BPF_XADD | BPF_W: 1087a2c7a983SIngo Molnar /* Emit 'lock add dword ptr [rax + off], eax' */ 1088e430f34eSAlexei Starovoitov if (is_ereg(dst_reg) || is_ereg(src_reg)) 1089e430f34eSAlexei Starovoitov EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); 109062258278SAlexei Starovoitov else 109162258278SAlexei Starovoitov EMIT2(0xF0, 0x01); 109262258278SAlexei Starovoitov goto xadd; 109362258278SAlexei Starovoitov case BPF_STX | BPF_XADD | BPF_DW: 1094e430f34eSAlexei Starovoitov EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); 109562258278SAlexei Starovoitov xadd: if (is_imm8(insn->off)) 1096e430f34eSAlexei Starovoitov EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 109762258278SAlexei Starovoitov else 1098e430f34eSAlexei Starovoitov EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 109962258278SAlexei Starovoitov insn->off); 110062258278SAlexei Starovoitov break; 110162258278SAlexei Starovoitov 110262258278SAlexei Starovoitov /* call */ 110362258278SAlexei Starovoitov case BPF_JMP | BPF_CALL: 1104e430f34eSAlexei Starovoitov func = (u8 *) __bpf_call_base + imm32; 11053b2744e6SAlexei Starovoitov if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) 1106f3c2af7bSAlexei Starovoitov return -EINVAL; 110762258278SAlexei Starovoitov break; 110862258278SAlexei Starovoitov 110971189fa9SAlexei Starovoitov case BPF_JMP | BPF_TAIL_CALL: 1110428d5df1SDaniel Borkmann if (imm32) 1111428d5df1SDaniel Borkmann emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], 1112428d5df1SDaniel Borkmann &prog, addrs[i], image); 1113428d5df1SDaniel Borkmann else 1114428d5df1SDaniel Borkmann emit_bpf_tail_call_indirect(&prog); 1115b52f00e6SAlexei Starovoitov break; 1116b52f00e6SAlexei Starovoitov 111762258278SAlexei Starovoitov /* cond jump */ 111862258278SAlexei Starovoitov case BPF_JMP | BPF_JEQ | BPF_X: 111962258278SAlexei Starovoitov case BPF_JMP | BPF_JNE | BPF_X: 112062258278SAlexei Starovoitov case BPF_JMP | BPF_JGT | BPF_X: 112152afc51eSDaniel Borkmann case BPF_JMP | BPF_JLT | BPF_X: 112262258278SAlexei Starovoitov case BPF_JMP | BPF_JGE | BPF_X: 112352afc51eSDaniel Borkmann case BPF_JMP | BPF_JLE | BPF_X: 112462258278SAlexei Starovoitov case BPF_JMP | BPF_JSGT | BPF_X: 112552afc51eSDaniel Borkmann case BPF_JMP | BPF_JSLT | BPF_X: 112662258278SAlexei Starovoitov case BPF_JMP | BPF_JSGE | BPF_X: 112752afc51eSDaniel Borkmann case BPF_JMP | BPF_JSLE | BPF_X: 11283f5d6525SJiong Wang case BPF_JMP32 | BPF_JEQ | BPF_X: 11293f5d6525SJiong Wang case BPF_JMP32 | BPF_JNE | BPF_X: 11303f5d6525SJiong Wang case BPF_JMP32 | BPF_JGT | BPF_X: 11313f5d6525SJiong Wang case BPF_JMP32 | BPF_JLT | BPF_X: 11323f5d6525SJiong Wang case BPF_JMP32 | BPF_JGE | BPF_X: 11333f5d6525SJiong Wang case BPF_JMP32 | BPF_JLE | BPF_X: 11343f5d6525SJiong Wang case BPF_JMP32 | BPF_JSGT | BPF_X: 11353f5d6525SJiong Wang case BPF_JMP32 | BPF_JSLT | BPF_X: 11363f5d6525SJiong Wang case BPF_JMP32 | BPF_JSGE | BPF_X: 11373f5d6525SJiong Wang case BPF_JMP32 | BPF_JSLE | BPF_X: 1138e430f34eSAlexei Starovoitov /* cmp dst_reg, src_reg */ 11393f5d6525SJiong Wang if (BPF_CLASS(insn->code) == BPF_JMP) 11403f5d6525SJiong Wang EMIT1(add_2mod(0x48, dst_reg, src_reg)); 11413f5d6525SJiong Wang else if (is_ereg(dst_reg) || is_ereg(src_reg)) 11423f5d6525SJiong Wang EMIT1(add_2mod(0x40, dst_reg, src_reg)); 11433f5d6525SJiong Wang EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); 114462258278SAlexei Starovoitov goto emit_cond_jmp; 114562258278SAlexei Starovoitov 114662258278SAlexei Starovoitov case BPF_JMP | BPF_JSET | BPF_X: 11473f5d6525SJiong Wang case BPF_JMP32 | BPF_JSET | BPF_X: 1148e430f34eSAlexei Starovoitov /* test dst_reg, src_reg */ 11493f5d6525SJiong Wang if (BPF_CLASS(insn->code) == BPF_JMP) 11503f5d6525SJiong Wang EMIT1(add_2mod(0x48, dst_reg, src_reg)); 11513f5d6525SJiong Wang else if (is_ereg(dst_reg) || is_ereg(src_reg)) 11523f5d6525SJiong Wang EMIT1(add_2mod(0x40, dst_reg, src_reg)); 11533f5d6525SJiong Wang EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); 115462258278SAlexei Starovoitov goto emit_cond_jmp; 115562258278SAlexei Starovoitov 115662258278SAlexei Starovoitov case BPF_JMP | BPF_JSET | BPF_K: 11573f5d6525SJiong Wang case BPF_JMP32 | BPF_JSET | BPF_K: 1158e430f34eSAlexei Starovoitov /* test dst_reg, imm32 */ 11593f5d6525SJiong Wang if (BPF_CLASS(insn->code) == BPF_JMP) 1160e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x48, dst_reg)); 11613f5d6525SJiong Wang else if (is_ereg(dst_reg)) 11623f5d6525SJiong Wang EMIT1(add_1mod(0x40, dst_reg)); 1163e430f34eSAlexei Starovoitov EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); 116462258278SAlexei Starovoitov goto emit_cond_jmp; 116562258278SAlexei Starovoitov 116662258278SAlexei Starovoitov case BPF_JMP | BPF_JEQ | BPF_K: 116762258278SAlexei Starovoitov case BPF_JMP | BPF_JNE | BPF_K: 116862258278SAlexei Starovoitov case BPF_JMP | BPF_JGT | BPF_K: 116952afc51eSDaniel Borkmann case BPF_JMP | BPF_JLT | BPF_K: 117062258278SAlexei Starovoitov case BPF_JMP | BPF_JGE | BPF_K: 117152afc51eSDaniel Borkmann case BPF_JMP | BPF_JLE | BPF_K: 117262258278SAlexei Starovoitov case BPF_JMP | BPF_JSGT | BPF_K: 117352afc51eSDaniel Borkmann case BPF_JMP | BPF_JSLT | BPF_K: 117462258278SAlexei Starovoitov case BPF_JMP | BPF_JSGE | BPF_K: 117552afc51eSDaniel Borkmann case BPF_JMP | BPF_JSLE | BPF_K: 11763f5d6525SJiong Wang case BPF_JMP32 | BPF_JEQ | BPF_K: 11773f5d6525SJiong Wang case BPF_JMP32 | BPF_JNE | BPF_K: 11783f5d6525SJiong Wang case BPF_JMP32 | BPF_JGT | BPF_K: 11793f5d6525SJiong Wang case BPF_JMP32 | BPF_JLT | BPF_K: 11803f5d6525SJiong Wang case BPF_JMP32 | BPF_JGE | BPF_K: 11813f5d6525SJiong Wang case BPF_JMP32 | BPF_JLE | BPF_K: 11823f5d6525SJiong Wang case BPF_JMP32 | BPF_JSGT | BPF_K: 11833f5d6525SJiong Wang case BPF_JMP32 | BPF_JSLT | BPF_K: 11843f5d6525SJiong Wang case BPF_JMP32 | BPF_JSGE | BPF_K: 11853f5d6525SJiong Wang case BPF_JMP32 | BPF_JSLE | BPF_K: 118638f51c07SDaniel Borkmann /* test dst_reg, dst_reg to save one extra byte */ 118738f51c07SDaniel Borkmann if (imm32 == 0) { 118838f51c07SDaniel Borkmann if (BPF_CLASS(insn->code) == BPF_JMP) 118938f51c07SDaniel Borkmann EMIT1(add_2mod(0x48, dst_reg, dst_reg)); 119038f51c07SDaniel Borkmann else if (is_ereg(dst_reg)) 119138f51c07SDaniel Borkmann EMIT1(add_2mod(0x40, dst_reg, dst_reg)); 119238f51c07SDaniel Borkmann EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); 119338f51c07SDaniel Borkmann goto emit_cond_jmp; 119438f51c07SDaniel Borkmann } 119538f51c07SDaniel Borkmann 1196e430f34eSAlexei Starovoitov /* cmp dst_reg, imm8/32 */ 11973f5d6525SJiong Wang if (BPF_CLASS(insn->code) == BPF_JMP) 1198e430f34eSAlexei Starovoitov EMIT1(add_1mod(0x48, dst_reg)); 11993f5d6525SJiong Wang else if (is_ereg(dst_reg)) 12003f5d6525SJiong Wang EMIT1(add_1mod(0x40, dst_reg)); 120162258278SAlexei Starovoitov 1202e430f34eSAlexei Starovoitov if (is_imm8(imm32)) 1203e430f34eSAlexei Starovoitov EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); 120462258278SAlexei Starovoitov else 1205e430f34eSAlexei Starovoitov EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); 120662258278SAlexei Starovoitov 1207a2c7a983SIngo Molnar emit_cond_jmp: /* Convert BPF opcode to x86 */ 120862258278SAlexei Starovoitov switch (BPF_OP(insn->code)) { 120962258278SAlexei Starovoitov case BPF_JEQ: 121062258278SAlexei Starovoitov jmp_cond = X86_JE; 121162258278SAlexei Starovoitov break; 121262258278SAlexei Starovoitov case BPF_JSET: 121362258278SAlexei Starovoitov case BPF_JNE: 121462258278SAlexei Starovoitov jmp_cond = X86_JNE; 121562258278SAlexei Starovoitov break; 121662258278SAlexei Starovoitov case BPF_JGT: 121762258278SAlexei Starovoitov /* GT is unsigned '>', JA in x86 */ 121862258278SAlexei Starovoitov jmp_cond = X86_JA; 121962258278SAlexei Starovoitov break; 122052afc51eSDaniel Borkmann case BPF_JLT: 122152afc51eSDaniel Borkmann /* LT is unsigned '<', JB in x86 */ 122252afc51eSDaniel Borkmann jmp_cond = X86_JB; 122352afc51eSDaniel Borkmann break; 122462258278SAlexei Starovoitov case BPF_JGE: 122562258278SAlexei Starovoitov /* GE is unsigned '>=', JAE in x86 */ 122662258278SAlexei Starovoitov jmp_cond = X86_JAE; 122762258278SAlexei Starovoitov break; 122852afc51eSDaniel Borkmann case BPF_JLE: 122952afc51eSDaniel Borkmann /* LE is unsigned '<=', JBE in x86 */ 123052afc51eSDaniel Borkmann jmp_cond = X86_JBE; 123152afc51eSDaniel Borkmann break; 123262258278SAlexei Starovoitov case BPF_JSGT: 1233a2c7a983SIngo Molnar /* Signed '>', GT in x86 */ 123462258278SAlexei Starovoitov jmp_cond = X86_JG; 123562258278SAlexei Starovoitov break; 123652afc51eSDaniel Borkmann case BPF_JSLT: 1237a2c7a983SIngo Molnar /* Signed '<', LT in x86 */ 123852afc51eSDaniel Borkmann jmp_cond = X86_JL; 123952afc51eSDaniel Borkmann break; 124062258278SAlexei Starovoitov case BPF_JSGE: 1241a2c7a983SIngo Molnar /* Signed '>=', GE in x86 */ 124262258278SAlexei Starovoitov jmp_cond = X86_JGE; 124362258278SAlexei Starovoitov break; 124452afc51eSDaniel Borkmann case BPF_JSLE: 1245a2c7a983SIngo Molnar /* Signed '<=', LE in x86 */ 124652afc51eSDaniel Borkmann jmp_cond = X86_JLE; 124752afc51eSDaniel Borkmann break; 1248a2c7a983SIngo Molnar default: /* to silence GCC warning */ 124962258278SAlexei Starovoitov return -EFAULT; 125062258278SAlexei Starovoitov } 125162258278SAlexei Starovoitov jmp_offset = addrs[i + insn->off] - addrs[i]; 125262258278SAlexei Starovoitov if (is_imm8(jmp_offset)) { 125362258278SAlexei Starovoitov EMIT2(jmp_cond, jmp_offset); 125462258278SAlexei Starovoitov } else if (is_simm32(jmp_offset)) { 125562258278SAlexei Starovoitov EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); 12563b58908aSEric Dumazet } else { 125762258278SAlexei Starovoitov pr_err("cond_jmp gen bug %llx\n", jmp_offset); 125862258278SAlexei Starovoitov return -EFAULT; 12593b58908aSEric Dumazet } 126062258278SAlexei Starovoitov 12613b58908aSEric Dumazet break; 126262258278SAlexei Starovoitov 126362258278SAlexei Starovoitov case BPF_JMP | BPF_JA: 12641612a981SGianluca Borello if (insn->off == -1) 12651612a981SGianluca Borello /* -1 jmp instructions will always jump 12661612a981SGianluca Borello * backwards two bytes. Explicitly handling 12671612a981SGianluca Borello * this case avoids wasting too many passes 12681612a981SGianluca Borello * when there are long sequences of replaced 12691612a981SGianluca Borello * dead code. 12701612a981SGianluca Borello */ 12711612a981SGianluca Borello jmp_offset = -2; 12721612a981SGianluca Borello else 127362258278SAlexei Starovoitov jmp_offset = addrs[i + insn->off] - addrs[i]; 12741612a981SGianluca Borello 127562258278SAlexei Starovoitov if (!jmp_offset) 1276a2c7a983SIngo Molnar /* Optimize out nop jumps */ 127762258278SAlexei Starovoitov break; 127862258278SAlexei Starovoitov emit_jmp: 127962258278SAlexei Starovoitov if (is_imm8(jmp_offset)) { 128062258278SAlexei Starovoitov EMIT2(0xEB, jmp_offset); 128162258278SAlexei Starovoitov } else if (is_simm32(jmp_offset)) { 128262258278SAlexei Starovoitov EMIT1_off32(0xE9, jmp_offset); 128362258278SAlexei Starovoitov } else { 128462258278SAlexei Starovoitov pr_err("jmp gen bug %llx\n", jmp_offset); 128562258278SAlexei Starovoitov return -EFAULT; 12863b58908aSEric Dumazet } 128762258278SAlexei Starovoitov break; 128862258278SAlexei Starovoitov 128962258278SAlexei Starovoitov case BPF_JMP | BPF_EXIT: 1290769e0de6SAlexei Starovoitov if (seen_exit) { 129162258278SAlexei Starovoitov jmp_offset = ctx->cleanup_addr - addrs[i]; 129262258278SAlexei Starovoitov goto emit_jmp; 129362258278SAlexei Starovoitov } 1294769e0de6SAlexei Starovoitov seen_exit = true; 1295a2c7a983SIngo Molnar /* Update cleanup_addr */ 129662258278SAlexei Starovoitov ctx->cleanup_addr = proglen; 1297fe8d9571SAlexei Starovoitov if (!bpf_prog_was_classic(bpf_prog)) 1298fe8d9571SAlexei Starovoitov EMIT1(0x5B); /* get rid of tail_call_cnt */ 1299fe8d9571SAlexei Starovoitov EMIT2(0x41, 0x5F); /* pop r15 */ 1300fe8d9571SAlexei Starovoitov EMIT2(0x41, 0x5E); /* pop r14 */ 1301fe8d9571SAlexei Starovoitov EMIT2(0x41, 0x5D); /* pop r13 */ 1302fe8d9571SAlexei Starovoitov EMIT1(0x5B); /* pop rbx */ 130362258278SAlexei Starovoitov EMIT1(0xC9); /* leave */ 130462258278SAlexei Starovoitov EMIT1(0xC3); /* ret */ 13050a14842fSEric Dumazet break; 13060a14842fSEric Dumazet 13070a14842fSEric Dumazet default: 1308a2c7a983SIngo Molnar /* 1309a2c7a983SIngo Molnar * By design x86-64 JIT should support all BPF instructions. 131062258278SAlexei Starovoitov * This error will be seen if new instruction was added 1311a2c7a983SIngo Molnar * to the interpreter, but not to the JIT, or if there is 1312a2c7a983SIngo Molnar * junk in bpf_prog. 131362258278SAlexei Starovoitov */ 131462258278SAlexei Starovoitov pr_err("bpf_jit: unknown opcode %02x\n", insn->code); 1315f3c2af7bSAlexei Starovoitov return -EINVAL; 13160a14842fSEric Dumazet } 131762258278SAlexei Starovoitov 13180a14842fSEric Dumazet ilen = prog - temp; 1319e0ee9c12SAlexei Starovoitov if (ilen > BPF_MAX_INSN_SIZE) { 13209383191dSDaniel Borkmann pr_err("bpf_jit: fatal insn size error\n"); 1321e0ee9c12SAlexei Starovoitov return -EFAULT; 1322e0ee9c12SAlexei Starovoitov } 1323e0ee9c12SAlexei Starovoitov 13240a14842fSEric Dumazet if (image) { 13250a14842fSEric Dumazet if (unlikely(proglen + ilen > oldproglen)) { 13269383191dSDaniel Borkmann pr_err("bpf_jit: fatal error\n"); 1327f3c2af7bSAlexei Starovoitov return -EFAULT; 13280a14842fSEric Dumazet } 13290a14842fSEric Dumazet memcpy(image + proglen, temp, ilen); 13300a14842fSEric Dumazet } 13310a14842fSEric Dumazet proglen += ilen; 13320a14842fSEric Dumazet addrs[i] = proglen; 13330a14842fSEric Dumazet prog = temp; 13340a14842fSEric Dumazet } 13353dec541bSAlexei Starovoitov 13363dec541bSAlexei Starovoitov if (image && excnt != bpf_prog->aux->num_exentries) { 13373dec541bSAlexei Starovoitov pr_err("extable is not populated\n"); 13383dec541bSAlexei Starovoitov return -EFAULT; 13393dec541bSAlexei Starovoitov } 1340f3c2af7bSAlexei Starovoitov return proglen; 1341f3c2af7bSAlexei Starovoitov } 1342f3c2af7bSAlexei Starovoitov 134385d33df3SMartin KaFai Lau static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args, 1344fec56f58SAlexei Starovoitov int stack_size) 1345fec56f58SAlexei Starovoitov { 1346fec56f58SAlexei Starovoitov int i; 1347fec56f58SAlexei Starovoitov /* Store function arguments to stack. 1348fec56f58SAlexei Starovoitov * For a function that accepts two pointers the sequence will be: 1349fec56f58SAlexei Starovoitov * mov QWORD PTR [rbp-0x10],rdi 1350fec56f58SAlexei Starovoitov * mov QWORD PTR [rbp-0x8],rsi 1351fec56f58SAlexei Starovoitov */ 1352fec56f58SAlexei Starovoitov for (i = 0; i < min(nr_args, 6); i++) 1353fec56f58SAlexei Starovoitov emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), 1354fec56f58SAlexei Starovoitov BPF_REG_FP, 1355fec56f58SAlexei Starovoitov i == 5 ? X86_REG_R9 : BPF_REG_1 + i, 1356fec56f58SAlexei Starovoitov -(stack_size - i * 8)); 1357fec56f58SAlexei Starovoitov } 1358fec56f58SAlexei Starovoitov 135985d33df3SMartin KaFai Lau static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, 1360fec56f58SAlexei Starovoitov int stack_size) 1361fec56f58SAlexei Starovoitov { 1362fec56f58SAlexei Starovoitov int i; 1363fec56f58SAlexei Starovoitov 1364fec56f58SAlexei Starovoitov /* Restore function arguments from stack. 1365fec56f58SAlexei Starovoitov * For a function that accepts two pointers the sequence will be: 1366fec56f58SAlexei Starovoitov * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] 1367fec56f58SAlexei Starovoitov * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] 1368fec56f58SAlexei Starovoitov */ 1369fec56f58SAlexei Starovoitov for (i = 0; i < min(nr_args, 6); i++) 1370fec56f58SAlexei Starovoitov emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), 1371fec56f58SAlexei Starovoitov i == 5 ? X86_REG_R9 : BPF_REG_1 + i, 1372fec56f58SAlexei Starovoitov BPF_REG_FP, 1373fec56f58SAlexei Starovoitov -(stack_size - i * 8)); 1374fec56f58SAlexei Starovoitov } 1375fec56f58SAlexei Starovoitov 13767e639208SKP Singh static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, 1377ae240823SKP Singh struct bpf_prog *p, int stack_size, bool mod_ret) 1378fec56f58SAlexei Starovoitov { 1379fec56f58SAlexei Starovoitov u8 *prog = *pprog; 13807e639208SKP Singh int cnt = 0; 1381fec56f58SAlexei Starovoitov 1382fec56f58SAlexei Starovoitov if (emit_call(&prog, __bpf_prog_enter, prog)) 1383fec56f58SAlexei Starovoitov return -EINVAL; 1384fec56f58SAlexei Starovoitov /* remember prog start time returned by __bpf_prog_enter */ 1385fec56f58SAlexei Starovoitov emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); 1386fec56f58SAlexei Starovoitov 1387fec56f58SAlexei Starovoitov /* arg1: lea rdi, [rbp - stack_size] */ 1388fec56f58SAlexei Starovoitov EMIT4(0x48, 0x8D, 0x7D, -stack_size); 1389fec56f58SAlexei Starovoitov /* arg2: progs[i]->insnsi for interpreter */ 13907e639208SKP Singh if (!p->jited) 1391fec56f58SAlexei Starovoitov emit_mov_imm64(&prog, BPF_REG_2, 13927e639208SKP Singh (long) p->insnsi >> 32, 13937e639208SKP Singh (u32) (long) p->insnsi); 1394fec56f58SAlexei Starovoitov /* call JITed bpf program or interpreter */ 13957e639208SKP Singh if (emit_call(&prog, p->bpf_func, prog)) 1396fec56f58SAlexei Starovoitov return -EINVAL; 1397fec56f58SAlexei Starovoitov 1398ae240823SKP Singh /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return 1399ae240823SKP Singh * of the previous call which is then passed on the stack to 1400ae240823SKP Singh * the next BPF program. 1401ae240823SKP Singh */ 1402ae240823SKP Singh if (mod_ret) 1403ae240823SKP Singh emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1404ae240823SKP Singh 1405fec56f58SAlexei Starovoitov /* arg1: mov rdi, progs[i] */ 14067e639208SKP Singh emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, 14077e639208SKP Singh (u32) (long) p); 1408fec56f58SAlexei Starovoitov /* arg2: mov rsi, rbx <- start time in nsec */ 1409fec56f58SAlexei Starovoitov emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 1410fec56f58SAlexei Starovoitov if (emit_call(&prog, __bpf_prog_exit, prog)) 1411fec56f58SAlexei Starovoitov return -EINVAL; 14127e639208SKP Singh 14137e639208SKP Singh *pprog = prog; 14147e639208SKP Singh return 0; 14157e639208SKP Singh } 14167e639208SKP Singh 14177e639208SKP Singh static void emit_nops(u8 **pprog, unsigned int len) 14187e639208SKP Singh { 14197e639208SKP Singh unsigned int i, noplen; 14207e639208SKP Singh u8 *prog = *pprog; 14217e639208SKP Singh int cnt = 0; 14227e639208SKP Singh 14237e639208SKP Singh while (len > 0) { 14247e639208SKP Singh noplen = len; 14257e639208SKP Singh 14267e639208SKP Singh if (noplen > ASM_NOP_MAX) 14277e639208SKP Singh noplen = ASM_NOP_MAX; 14287e639208SKP Singh 14297e639208SKP Singh for (i = 0; i < noplen; i++) 14307e639208SKP Singh EMIT1(ideal_nops[noplen][i]); 14317e639208SKP Singh len -= noplen; 14327e639208SKP Singh } 14337e639208SKP Singh 14347e639208SKP Singh *pprog = prog; 14357e639208SKP Singh } 14367e639208SKP Singh 14377e639208SKP Singh static void emit_align(u8 **pprog, u32 align) 14387e639208SKP Singh { 14397e639208SKP Singh u8 *target, *prog = *pprog; 14407e639208SKP Singh 14417e639208SKP Singh target = PTR_ALIGN(prog, align); 14427e639208SKP Singh if (target != prog) 14437e639208SKP Singh emit_nops(&prog, target - prog); 14447e639208SKP Singh 14457e639208SKP Singh *pprog = prog; 14467e639208SKP Singh } 14477e639208SKP Singh 14487e639208SKP Singh static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) 14497e639208SKP Singh { 14507e639208SKP Singh u8 *prog = *pprog; 14517e639208SKP Singh int cnt = 0; 14527e639208SKP Singh s64 offset; 14537e639208SKP Singh 14547e639208SKP Singh offset = func - (ip + 2 + 4); 14557e639208SKP Singh if (!is_simm32(offset)) { 14567e639208SKP Singh pr_err("Target %p is out of range\n", func); 14577e639208SKP Singh return -EINVAL; 14587e639208SKP Singh } 14597e639208SKP Singh EMIT2_off32(0x0F, jmp_cond + 0x10, offset); 14607e639208SKP Singh *pprog = prog; 14617e639208SKP Singh return 0; 14627e639208SKP Singh } 14637e639208SKP Singh 14647e639208SKP Singh static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, 14657e639208SKP Singh struct bpf_tramp_progs *tp, int stack_size) 14667e639208SKP Singh { 14677e639208SKP Singh int i; 14687e639208SKP Singh u8 *prog = *pprog; 14697e639208SKP Singh 14707e639208SKP Singh for (i = 0; i < tp->nr_progs; i++) { 1471ae240823SKP Singh if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) 14727e639208SKP Singh return -EINVAL; 1473fec56f58SAlexei Starovoitov } 1474fec56f58SAlexei Starovoitov *pprog = prog; 1475fec56f58SAlexei Starovoitov return 0; 1476fec56f58SAlexei Starovoitov } 1477fec56f58SAlexei Starovoitov 1478ae240823SKP Singh static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, 1479ae240823SKP Singh struct bpf_tramp_progs *tp, int stack_size, 1480ae240823SKP Singh u8 **branches) 1481ae240823SKP Singh { 1482ae240823SKP Singh u8 *prog = *pprog; 148313fac1d8SAlexei Starovoitov int i, cnt = 0; 1484ae240823SKP Singh 1485ae240823SKP Singh /* The first fmod_ret program will receive a garbage return value. 1486ae240823SKP Singh * Set this to 0 to avoid confusing the program. 1487ae240823SKP Singh */ 1488ae240823SKP Singh emit_mov_imm32(&prog, false, BPF_REG_0, 0); 1489ae240823SKP Singh emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1490ae240823SKP Singh for (i = 0; i < tp->nr_progs; i++) { 1491ae240823SKP Singh if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) 1492ae240823SKP Singh return -EINVAL; 1493ae240823SKP Singh 149413fac1d8SAlexei Starovoitov /* mod_ret prog stored return value into [rbp - 8]. Emit: 149513fac1d8SAlexei Starovoitov * if (*(u64 *)(rbp - 8) != 0) 1496ae240823SKP Singh * goto do_fexit; 1497ae240823SKP Singh */ 149813fac1d8SAlexei Starovoitov /* cmp QWORD PTR [rbp - 0x8], 0x0 */ 149913fac1d8SAlexei Starovoitov EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); 1500ae240823SKP Singh 1501ae240823SKP Singh /* Save the location of the branch and Generate 6 nops 1502ae240823SKP Singh * (4 bytes for an offset and 2 bytes for the jump) These nops 1503ae240823SKP Singh * are replaced with a conditional jump once do_fexit (i.e. the 1504ae240823SKP Singh * start of the fexit invocation) is finalized. 1505ae240823SKP Singh */ 1506ae240823SKP Singh branches[i] = prog; 1507ae240823SKP Singh emit_nops(&prog, 4 + 2); 1508ae240823SKP Singh } 1509ae240823SKP Singh 1510ae240823SKP Singh *pprog = prog; 1511ae240823SKP Singh return 0; 1512ae240823SKP Singh } 1513ae240823SKP Singh 1514fec56f58SAlexei Starovoitov /* Example: 1515fec56f58SAlexei Starovoitov * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); 1516fec56f58SAlexei Starovoitov * its 'struct btf_func_model' will be nr_args=2 1517fec56f58SAlexei Starovoitov * The assembly code when eth_type_trans is executing after trampoline: 1518fec56f58SAlexei Starovoitov * 1519fec56f58SAlexei Starovoitov * push rbp 1520fec56f58SAlexei Starovoitov * mov rbp, rsp 1521fec56f58SAlexei Starovoitov * sub rsp, 16 // space for skb and dev 1522fec56f58SAlexei Starovoitov * push rbx // temp regs to pass start time 1523fec56f58SAlexei Starovoitov * mov qword ptr [rbp - 16], rdi // save skb pointer to stack 1524fec56f58SAlexei Starovoitov * mov qword ptr [rbp - 8], rsi // save dev pointer to stack 1525fec56f58SAlexei Starovoitov * call __bpf_prog_enter // rcu_read_lock and preempt_disable 1526fec56f58SAlexei Starovoitov * mov rbx, rax // remember start time in bpf stats are enabled 1527fec56f58SAlexei Starovoitov * lea rdi, [rbp - 16] // R1==ctx of bpf prog 1528fec56f58SAlexei Starovoitov * call addr_of_jited_FENTRY_prog 1529fec56f58SAlexei Starovoitov * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 1530fec56f58SAlexei Starovoitov * mov rsi, rbx // prog start time 1531fec56f58SAlexei Starovoitov * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 1532fec56f58SAlexei Starovoitov * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack 1533fec56f58SAlexei Starovoitov * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack 1534fec56f58SAlexei Starovoitov * pop rbx 1535fec56f58SAlexei Starovoitov * leave 1536fec56f58SAlexei Starovoitov * ret 1537fec56f58SAlexei Starovoitov * 1538fec56f58SAlexei Starovoitov * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be 1539fec56f58SAlexei Starovoitov * replaced with 'call generated_bpf_trampoline'. When it returns 1540fec56f58SAlexei Starovoitov * eth_type_trans will continue executing with original skb and dev pointers. 1541fec56f58SAlexei Starovoitov * 1542fec56f58SAlexei Starovoitov * The assembly code when eth_type_trans is called from trampoline: 1543fec56f58SAlexei Starovoitov * 1544fec56f58SAlexei Starovoitov * push rbp 1545fec56f58SAlexei Starovoitov * mov rbp, rsp 1546fec56f58SAlexei Starovoitov * sub rsp, 24 // space for skb, dev, return value 1547fec56f58SAlexei Starovoitov * push rbx // temp regs to pass start time 1548fec56f58SAlexei Starovoitov * mov qword ptr [rbp - 24], rdi // save skb pointer to stack 1549fec56f58SAlexei Starovoitov * mov qword ptr [rbp - 16], rsi // save dev pointer to stack 1550fec56f58SAlexei Starovoitov * call __bpf_prog_enter // rcu_read_lock and preempt_disable 1551fec56f58SAlexei Starovoitov * mov rbx, rax // remember start time if bpf stats are enabled 1552fec56f58SAlexei Starovoitov * lea rdi, [rbp - 24] // R1==ctx of bpf prog 1553fec56f58SAlexei Starovoitov * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev 1554fec56f58SAlexei Starovoitov * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 1555fec56f58SAlexei Starovoitov * mov rsi, rbx // prog start time 1556fec56f58SAlexei Starovoitov * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 1557fec56f58SAlexei Starovoitov * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack 1558fec56f58SAlexei Starovoitov * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack 1559fec56f58SAlexei Starovoitov * call eth_type_trans+5 // execute body of eth_type_trans 1560fec56f58SAlexei Starovoitov * mov qword ptr [rbp - 8], rax // save return value 1561fec56f58SAlexei Starovoitov * call __bpf_prog_enter // rcu_read_lock and preempt_disable 1562fec56f58SAlexei Starovoitov * mov rbx, rax // remember start time in bpf stats are enabled 1563fec56f58SAlexei Starovoitov * lea rdi, [rbp - 24] // R1==ctx of bpf prog 1564fec56f58SAlexei Starovoitov * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value 1565fec56f58SAlexei Starovoitov * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 1566fec56f58SAlexei Starovoitov * mov rsi, rbx // prog start time 1567fec56f58SAlexei Starovoitov * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 1568fec56f58SAlexei Starovoitov * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value 1569fec56f58SAlexei Starovoitov * pop rbx 1570fec56f58SAlexei Starovoitov * leave 1571fec56f58SAlexei Starovoitov * add rsp, 8 // skip eth_type_trans's frame 1572fec56f58SAlexei Starovoitov * ret // return to its caller 1573fec56f58SAlexei Starovoitov */ 157485d33df3SMartin KaFai Lau int arch_prepare_bpf_trampoline(void *image, void *image_end, 157585d33df3SMartin KaFai Lau const struct btf_func_model *m, u32 flags, 157688fd9e53SKP Singh struct bpf_tramp_progs *tprogs, 1577fec56f58SAlexei Starovoitov void *orig_call) 1578fec56f58SAlexei Starovoitov { 1579ae240823SKP Singh int ret, i, cnt = 0, nr_args = m->nr_args; 1580fec56f58SAlexei Starovoitov int stack_size = nr_args * 8; 158188fd9e53SKP Singh struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; 158288fd9e53SKP Singh struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; 1583ae240823SKP Singh struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; 1584ae240823SKP Singh u8 **branches = NULL; 1585fec56f58SAlexei Starovoitov u8 *prog; 1586fec56f58SAlexei Starovoitov 1587fec56f58SAlexei Starovoitov /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ 1588fec56f58SAlexei Starovoitov if (nr_args > 6) 1589fec56f58SAlexei Starovoitov return -ENOTSUPP; 1590fec56f58SAlexei Starovoitov 1591fec56f58SAlexei Starovoitov if ((flags & BPF_TRAMP_F_RESTORE_REGS) && 1592fec56f58SAlexei Starovoitov (flags & BPF_TRAMP_F_SKIP_FRAME)) 1593fec56f58SAlexei Starovoitov return -EINVAL; 1594fec56f58SAlexei Starovoitov 1595fec56f58SAlexei Starovoitov if (flags & BPF_TRAMP_F_CALL_ORIG) 1596fec56f58SAlexei Starovoitov stack_size += 8; /* room for return value of orig_call */ 1597fec56f58SAlexei Starovoitov 1598fec56f58SAlexei Starovoitov if (flags & BPF_TRAMP_F_SKIP_FRAME) 1599fec56f58SAlexei Starovoitov /* skip patched call instruction and point orig_call to actual 1600fec56f58SAlexei Starovoitov * body of the kernel function. 1601fec56f58SAlexei Starovoitov */ 16024b3da77bSDaniel Borkmann orig_call += X86_PATCH_SIZE; 1603fec56f58SAlexei Starovoitov 1604fec56f58SAlexei Starovoitov prog = image; 1605fec56f58SAlexei Starovoitov 1606fec56f58SAlexei Starovoitov EMIT1(0x55); /* push rbp */ 1607fec56f58SAlexei Starovoitov EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 1608fec56f58SAlexei Starovoitov EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ 1609fec56f58SAlexei Starovoitov EMIT1(0x53); /* push rbx */ 1610fec56f58SAlexei Starovoitov 1611fec56f58SAlexei Starovoitov save_regs(m, &prog, nr_args, stack_size); 1612fec56f58SAlexei Starovoitov 161388fd9e53SKP Singh if (fentry->nr_progs) 161488fd9e53SKP Singh if (invoke_bpf(m, &prog, fentry, stack_size)) 1615fec56f58SAlexei Starovoitov return -EINVAL; 1616fec56f58SAlexei Starovoitov 1617ae240823SKP Singh if (fmod_ret->nr_progs) { 1618ae240823SKP Singh branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), 1619ae240823SKP Singh GFP_KERNEL); 1620ae240823SKP Singh if (!branches) 1621ae240823SKP Singh return -ENOMEM; 1622ae240823SKP Singh 1623ae240823SKP Singh if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, 1624ae240823SKP Singh branches)) { 1625ae240823SKP Singh ret = -EINVAL; 1626ae240823SKP Singh goto cleanup; 1627ae240823SKP Singh } 1628ae240823SKP Singh } 1629ae240823SKP Singh 1630fec56f58SAlexei Starovoitov if (flags & BPF_TRAMP_F_CALL_ORIG) { 1631ae240823SKP Singh if (fentry->nr_progs || fmod_ret->nr_progs) 1632fec56f58SAlexei Starovoitov restore_regs(m, &prog, nr_args, stack_size); 1633fec56f58SAlexei Starovoitov 1634fec56f58SAlexei Starovoitov /* call original function */ 1635ae240823SKP Singh if (emit_call(&prog, orig_call, prog)) { 1636ae240823SKP Singh ret = -EINVAL; 1637ae240823SKP Singh goto cleanup; 1638ae240823SKP Singh } 1639fec56f58SAlexei Starovoitov /* remember return value in a stack for bpf prog to access */ 1640fec56f58SAlexei Starovoitov emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1641fec56f58SAlexei Starovoitov } 1642fec56f58SAlexei Starovoitov 1643ae240823SKP Singh if (fmod_ret->nr_progs) { 1644ae240823SKP Singh /* From Intel 64 and IA-32 Architectures Optimization 1645ae240823SKP Singh * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler 1646ae240823SKP Singh * Coding Rule 11: All branch targets should be 16-byte 1647ae240823SKP Singh * aligned. 1648ae240823SKP Singh */ 1649ae240823SKP Singh emit_align(&prog, 16); 1650ae240823SKP Singh /* Update the branches saved in invoke_bpf_mod_ret with the 1651ae240823SKP Singh * aligned address of do_fexit. 1652ae240823SKP Singh */ 1653ae240823SKP Singh for (i = 0; i < fmod_ret->nr_progs; i++) 1654ae240823SKP Singh emit_cond_near_jump(&branches[i], prog, branches[i], 1655ae240823SKP Singh X86_JNE); 1656ae240823SKP Singh } 1657ae240823SKP Singh 165888fd9e53SKP Singh if (fexit->nr_progs) 1659ae240823SKP Singh if (invoke_bpf(m, &prog, fexit, stack_size)) { 1660ae240823SKP Singh ret = -EINVAL; 1661ae240823SKP Singh goto cleanup; 1662ae240823SKP Singh } 1663fec56f58SAlexei Starovoitov 1664fec56f58SAlexei Starovoitov if (flags & BPF_TRAMP_F_RESTORE_REGS) 1665fec56f58SAlexei Starovoitov restore_regs(m, &prog, nr_args, stack_size); 1666fec56f58SAlexei Starovoitov 1667ae240823SKP Singh /* This needs to be done regardless. If there were fmod_ret programs, 1668ae240823SKP Singh * the return value is only updated on the stack and still needs to be 1669ae240823SKP Singh * restored to R0. 1670ae240823SKP Singh */ 1671fec56f58SAlexei Starovoitov if (flags & BPF_TRAMP_F_CALL_ORIG) 1672fec56f58SAlexei Starovoitov /* restore original return value back into RAX */ 1673fec56f58SAlexei Starovoitov emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); 1674fec56f58SAlexei Starovoitov 1675fec56f58SAlexei Starovoitov EMIT1(0x5B); /* pop rbx */ 1676fec56f58SAlexei Starovoitov EMIT1(0xC9); /* leave */ 1677fec56f58SAlexei Starovoitov if (flags & BPF_TRAMP_F_SKIP_FRAME) 1678fec56f58SAlexei Starovoitov /* skip our return address and return to parent */ 1679fec56f58SAlexei Starovoitov EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ 1680fec56f58SAlexei Starovoitov EMIT1(0xC3); /* ret */ 168185d33df3SMartin KaFai Lau /* Make sure the trampoline generation logic doesn't overflow */ 1682ae240823SKP Singh if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { 1683ae240823SKP Singh ret = -EFAULT; 1684ae240823SKP Singh goto cleanup; 1685ae240823SKP Singh } 1686ae240823SKP Singh ret = prog - (u8 *)image; 1687ae240823SKP Singh 1688ae240823SKP Singh cleanup: 1689ae240823SKP Singh kfree(branches); 1690ae240823SKP Singh return ret; 1691fec56f58SAlexei Starovoitov } 1692fec56f58SAlexei Starovoitov 169375ccbef6SBjörn Töpel static int emit_fallback_jump(u8 **pprog) 169475ccbef6SBjörn Töpel { 169575ccbef6SBjörn Töpel u8 *prog = *pprog; 169675ccbef6SBjörn Töpel int err = 0; 169775ccbef6SBjörn Töpel 169875ccbef6SBjörn Töpel #ifdef CONFIG_RETPOLINE 169975ccbef6SBjörn Töpel /* Note that this assumes the the compiler uses external 170075ccbef6SBjörn Töpel * thunks for indirect calls. Both clang and GCC use the same 170175ccbef6SBjörn Töpel * naming convention for external thunks. 170275ccbef6SBjörn Töpel */ 170375ccbef6SBjörn Töpel err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); 170475ccbef6SBjörn Töpel #else 170575ccbef6SBjörn Töpel int cnt = 0; 170675ccbef6SBjörn Töpel 170775ccbef6SBjörn Töpel EMIT2(0xFF, 0xE2); /* jmp rdx */ 170875ccbef6SBjörn Töpel #endif 170975ccbef6SBjörn Töpel *pprog = prog; 171075ccbef6SBjörn Töpel return err; 171175ccbef6SBjörn Töpel } 171275ccbef6SBjörn Töpel 171375ccbef6SBjörn Töpel static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) 171475ccbef6SBjörn Töpel { 17157e639208SKP Singh u8 *jg_reloc, *prog = *pprog; 171675ccbef6SBjörn Töpel int pivot, err, jg_bytes = 1, cnt = 0; 171775ccbef6SBjörn Töpel s64 jg_offset; 171875ccbef6SBjörn Töpel 171975ccbef6SBjörn Töpel if (a == b) { 172075ccbef6SBjörn Töpel /* Leaf node of recursion, i.e. not a range of indices 172175ccbef6SBjörn Töpel * anymore. 172275ccbef6SBjörn Töpel */ 172375ccbef6SBjörn Töpel EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ 172475ccbef6SBjörn Töpel if (!is_simm32(progs[a])) 172575ccbef6SBjörn Töpel return -1; 172675ccbef6SBjörn Töpel EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), 172775ccbef6SBjörn Töpel progs[a]); 172875ccbef6SBjörn Töpel err = emit_cond_near_jump(&prog, /* je func */ 172975ccbef6SBjörn Töpel (void *)progs[a], prog, 173075ccbef6SBjörn Töpel X86_JE); 173175ccbef6SBjörn Töpel if (err) 173275ccbef6SBjörn Töpel return err; 173375ccbef6SBjörn Töpel 173475ccbef6SBjörn Töpel err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ 173575ccbef6SBjörn Töpel if (err) 173675ccbef6SBjörn Töpel return err; 173775ccbef6SBjörn Töpel 173875ccbef6SBjörn Töpel *pprog = prog; 173975ccbef6SBjörn Töpel return 0; 174075ccbef6SBjörn Töpel } 174175ccbef6SBjörn Töpel 174275ccbef6SBjörn Töpel /* Not a leaf node, so we pivot, and recursively descend into 174375ccbef6SBjörn Töpel * the lower and upper ranges. 174475ccbef6SBjörn Töpel */ 174575ccbef6SBjörn Töpel pivot = (b - a) / 2; 174675ccbef6SBjörn Töpel EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ 174775ccbef6SBjörn Töpel if (!is_simm32(progs[a + pivot])) 174875ccbef6SBjörn Töpel return -1; 174975ccbef6SBjörn Töpel EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]); 175075ccbef6SBjörn Töpel 175175ccbef6SBjörn Töpel if (pivot > 2) { /* jg upper_part */ 175275ccbef6SBjörn Töpel /* Require near jump. */ 175375ccbef6SBjörn Töpel jg_bytes = 4; 175475ccbef6SBjörn Töpel EMIT2_off32(0x0F, X86_JG + 0x10, 0); 175575ccbef6SBjörn Töpel } else { 175675ccbef6SBjörn Töpel EMIT2(X86_JG, 0); 175775ccbef6SBjörn Töpel } 175875ccbef6SBjörn Töpel jg_reloc = prog; 175975ccbef6SBjörn Töpel 176075ccbef6SBjörn Töpel err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ 176175ccbef6SBjörn Töpel progs); 176275ccbef6SBjörn Töpel if (err) 176375ccbef6SBjörn Töpel return err; 176475ccbef6SBjörn Töpel 1765116eb788SBjörn Töpel /* From Intel 64 and IA-32 Architectures Optimization 1766116eb788SBjörn Töpel * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler 1767116eb788SBjörn Töpel * Coding Rule 11: All branch targets should be 16-byte 1768116eb788SBjörn Töpel * aligned. 1769116eb788SBjörn Töpel */ 17707e639208SKP Singh emit_align(&prog, 16); 177175ccbef6SBjörn Töpel jg_offset = prog - jg_reloc; 177275ccbef6SBjörn Töpel emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); 177375ccbef6SBjörn Töpel 177475ccbef6SBjörn Töpel err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ 177575ccbef6SBjörn Töpel b, progs); 177675ccbef6SBjörn Töpel if (err) 177775ccbef6SBjörn Töpel return err; 177875ccbef6SBjörn Töpel 177975ccbef6SBjörn Töpel *pprog = prog; 178075ccbef6SBjörn Töpel return 0; 178175ccbef6SBjörn Töpel } 178275ccbef6SBjörn Töpel 178375ccbef6SBjörn Töpel static int cmp_ips(const void *a, const void *b) 178475ccbef6SBjörn Töpel { 178575ccbef6SBjörn Töpel const s64 *ipa = a; 178675ccbef6SBjörn Töpel const s64 *ipb = b; 178775ccbef6SBjörn Töpel 178875ccbef6SBjörn Töpel if (*ipa > *ipb) 178975ccbef6SBjörn Töpel return 1; 179075ccbef6SBjörn Töpel if (*ipa < *ipb) 179175ccbef6SBjörn Töpel return -1; 179275ccbef6SBjörn Töpel return 0; 179375ccbef6SBjörn Töpel } 179475ccbef6SBjörn Töpel 179575ccbef6SBjörn Töpel int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) 179675ccbef6SBjörn Töpel { 179775ccbef6SBjörn Töpel u8 *prog = image; 179875ccbef6SBjörn Töpel 179975ccbef6SBjörn Töpel sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); 180075ccbef6SBjörn Töpel return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); 180175ccbef6SBjörn Töpel } 180275ccbef6SBjörn Töpel 18031c2a088aSAlexei Starovoitov struct x64_jit_data { 18041c2a088aSAlexei Starovoitov struct bpf_binary_header *header; 18051c2a088aSAlexei Starovoitov int *addrs; 18061c2a088aSAlexei Starovoitov u8 *image; 18071c2a088aSAlexei Starovoitov int proglen; 18081c2a088aSAlexei Starovoitov struct jit_context ctx; 18091c2a088aSAlexei Starovoitov }; 18101c2a088aSAlexei Starovoitov 1811d1c55ab5SDaniel Borkmann struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 181262258278SAlexei Starovoitov { 1813f3c2af7bSAlexei Starovoitov struct bpf_binary_header *header = NULL; 1814959a7579SDaniel Borkmann struct bpf_prog *tmp, *orig_prog = prog; 18151c2a088aSAlexei Starovoitov struct x64_jit_data *jit_data; 1816f3c2af7bSAlexei Starovoitov int proglen, oldproglen = 0; 1817f3c2af7bSAlexei Starovoitov struct jit_context ctx = {}; 1818959a7579SDaniel Borkmann bool tmp_blinded = false; 18191c2a088aSAlexei Starovoitov bool extra_pass = false; 1820f3c2af7bSAlexei Starovoitov u8 *image = NULL; 1821f3c2af7bSAlexei Starovoitov int *addrs; 1822f3c2af7bSAlexei Starovoitov int pass; 1823f3c2af7bSAlexei Starovoitov int i; 1824f3c2af7bSAlexei Starovoitov 182560b58afcSAlexei Starovoitov if (!prog->jit_requested) 1826959a7579SDaniel Borkmann return orig_prog; 1827959a7579SDaniel Borkmann 1828959a7579SDaniel Borkmann tmp = bpf_jit_blind_constants(prog); 1829a2c7a983SIngo Molnar /* 1830a2c7a983SIngo Molnar * If blinding was requested and we failed during blinding, 1831959a7579SDaniel Borkmann * we must fall back to the interpreter. 1832959a7579SDaniel Borkmann */ 1833959a7579SDaniel Borkmann if (IS_ERR(tmp)) 1834959a7579SDaniel Borkmann return orig_prog; 1835959a7579SDaniel Borkmann if (tmp != prog) { 1836959a7579SDaniel Borkmann tmp_blinded = true; 1837959a7579SDaniel Borkmann prog = tmp; 1838959a7579SDaniel Borkmann } 1839f3c2af7bSAlexei Starovoitov 18401c2a088aSAlexei Starovoitov jit_data = prog->aux->jit_data; 18411c2a088aSAlexei Starovoitov if (!jit_data) { 18421c2a088aSAlexei Starovoitov jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 18431c2a088aSAlexei Starovoitov if (!jit_data) { 18441c2a088aSAlexei Starovoitov prog = orig_prog; 18451c2a088aSAlexei Starovoitov goto out; 18461c2a088aSAlexei Starovoitov } 18471c2a088aSAlexei Starovoitov prog->aux->jit_data = jit_data; 18481c2a088aSAlexei Starovoitov } 18491c2a088aSAlexei Starovoitov addrs = jit_data->addrs; 18501c2a088aSAlexei Starovoitov if (addrs) { 18511c2a088aSAlexei Starovoitov ctx = jit_data->ctx; 18521c2a088aSAlexei Starovoitov oldproglen = jit_data->proglen; 18531c2a088aSAlexei Starovoitov image = jit_data->image; 18541c2a088aSAlexei Starovoitov header = jit_data->header; 18551c2a088aSAlexei Starovoitov extra_pass = true; 18561c2a088aSAlexei Starovoitov goto skip_init_addrs; 18571c2a088aSAlexei Starovoitov } 18587c2e988fSAlexei Starovoitov addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); 1859959a7579SDaniel Borkmann if (!addrs) { 1860959a7579SDaniel Borkmann prog = orig_prog; 18611c2a088aSAlexei Starovoitov goto out_addrs; 1862959a7579SDaniel Borkmann } 1863f3c2af7bSAlexei Starovoitov 1864a2c7a983SIngo Molnar /* 1865a2c7a983SIngo Molnar * Before first pass, make a rough estimation of addrs[] 1866a2c7a983SIngo Molnar * each BPF instruction is translated to less than 64 bytes 1867f3c2af7bSAlexei Starovoitov */ 18687c2e988fSAlexei Starovoitov for (proglen = 0, i = 0; i <= prog->len; i++) { 1869f3c2af7bSAlexei Starovoitov proglen += 64; 1870f3c2af7bSAlexei Starovoitov addrs[i] = proglen; 1871f3c2af7bSAlexei Starovoitov } 1872f3c2af7bSAlexei Starovoitov ctx.cleanup_addr = proglen; 18731c2a088aSAlexei Starovoitov skip_init_addrs: 1874f3c2af7bSAlexei Starovoitov 1875a2c7a983SIngo Molnar /* 1876a2c7a983SIngo Molnar * JITed image shrinks with every pass and the loop iterates 1877a2c7a983SIngo Molnar * until the image stops shrinking. Very large BPF programs 18783f7352bfSAlexei Starovoitov * may converge on the last pass. In such case do one more 1879a2c7a983SIngo Molnar * pass to emit the final image. 18803f7352bfSAlexei Starovoitov */ 18816007b080SDaniel Borkmann for (pass = 0; pass < 20 || image; pass++) { 1882f3c2af7bSAlexei Starovoitov proglen = do_jit(prog, addrs, image, oldproglen, &ctx); 1883f3c2af7bSAlexei Starovoitov if (proglen <= 0) { 18843aab8884SDaniel Borkmann out_image: 1885f3c2af7bSAlexei Starovoitov image = NULL; 1886f3c2af7bSAlexei Starovoitov if (header) 1887738cbe72SDaniel Borkmann bpf_jit_binary_free(header); 1888959a7579SDaniel Borkmann prog = orig_prog; 1889959a7579SDaniel Borkmann goto out_addrs; 1890f3c2af7bSAlexei Starovoitov } 18910a14842fSEric Dumazet if (image) { 1892e0ee9c12SAlexei Starovoitov if (proglen != oldproglen) { 1893f3c2af7bSAlexei Starovoitov pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", 1894f3c2af7bSAlexei Starovoitov proglen, oldproglen); 18953aab8884SDaniel Borkmann goto out_image; 1896e0ee9c12SAlexei Starovoitov } 18970a14842fSEric Dumazet break; 18980a14842fSEric Dumazet } 18990a14842fSEric Dumazet if (proglen == oldproglen) { 19003dec541bSAlexei Starovoitov /* 19013dec541bSAlexei Starovoitov * The number of entries in extable is the number of BPF_LDX 19023dec541bSAlexei Starovoitov * insns that access kernel memory via "pointer to BTF type". 19033dec541bSAlexei Starovoitov * The verifier changed their opcode from LDX|MEM|size 19043dec541bSAlexei Starovoitov * to LDX|PROBE_MEM|size to make JITing easier. 19053dec541bSAlexei Starovoitov */ 19063dec541bSAlexei Starovoitov u32 align = __alignof__(struct exception_table_entry); 19073dec541bSAlexei Starovoitov u32 extable_size = prog->aux->num_exentries * 19083dec541bSAlexei Starovoitov sizeof(struct exception_table_entry); 19093dec541bSAlexei Starovoitov 19103dec541bSAlexei Starovoitov /* allocate module memory for x86 insns and extable */ 19113dec541bSAlexei Starovoitov header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size, 19123dec541bSAlexei Starovoitov &image, align, jit_fill_hole); 1913959a7579SDaniel Borkmann if (!header) { 1914959a7579SDaniel Borkmann prog = orig_prog; 1915959a7579SDaniel Borkmann goto out_addrs; 1916959a7579SDaniel Borkmann } 19173dec541bSAlexei Starovoitov prog->aux->extable = (void *) image + roundup(proglen, align); 19180a14842fSEric Dumazet } 19190a14842fSEric Dumazet oldproglen = proglen; 19206007b080SDaniel Borkmann cond_resched(); 19210a14842fSEric Dumazet } 192279617801SDaniel Borkmann 19230a14842fSEric Dumazet if (bpf_jit_enable > 1) 1924485d6511SDaniel Borkmann bpf_jit_dump(prog->len, proglen, pass + 1, image); 19250a14842fSEric Dumazet 19260a14842fSEric Dumazet if (image) { 19271c2a088aSAlexei Starovoitov if (!prog->is_func || extra_pass) { 1928428d5df1SDaniel Borkmann bpf_tail_call_direct_fixup(prog); 19299d876e79SDaniel Borkmann bpf_jit_binary_lock_ro(header); 19301c2a088aSAlexei Starovoitov } else { 19311c2a088aSAlexei Starovoitov jit_data->addrs = addrs; 19321c2a088aSAlexei Starovoitov jit_data->ctx = ctx; 19331c2a088aSAlexei Starovoitov jit_data->proglen = proglen; 19341c2a088aSAlexei Starovoitov jit_data->image = image; 19351c2a088aSAlexei Starovoitov jit_data->header = header; 19361c2a088aSAlexei Starovoitov } 1937f3c2af7bSAlexei Starovoitov prog->bpf_func = (void *)image; 1938a91263d5SDaniel Borkmann prog->jited = 1; 1939783d28ddSMartin KaFai Lau prog->jited_len = proglen; 19409d5ecb09SDaniel Borkmann } else { 19419d5ecb09SDaniel Borkmann prog = orig_prog; 19420a14842fSEric Dumazet } 1943959a7579SDaniel Borkmann 194439f56ca9SDaniel Borkmann if (!image || !prog->is_func || extra_pass) { 1945c454a46bSMartin KaFai Lau if (image) 19467c2e988fSAlexei Starovoitov bpf_prog_fill_jited_linfo(prog, addrs + 1); 1947959a7579SDaniel Borkmann out_addrs: 19480a14842fSEric Dumazet kfree(addrs); 19491c2a088aSAlexei Starovoitov kfree(jit_data); 19501c2a088aSAlexei Starovoitov prog->aux->jit_data = NULL; 19511c2a088aSAlexei Starovoitov } 1952959a7579SDaniel Borkmann out: 1953959a7579SDaniel Borkmann if (tmp_blinded) 1954959a7579SDaniel Borkmann bpf_jit_prog_release_other(prog, prog == orig_prog ? 1955959a7579SDaniel Borkmann tmp : orig_prog); 1956d1c55ab5SDaniel Borkmann return prog; 19570a14842fSEric Dumazet } 1958