xref: /openbmc/linux/arch/x86/net/bpf_jit_comp.c (revision aee194b1)
1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2a2c7a983SIngo Molnar /*
3a2c7a983SIngo Molnar  * bpf_jit_comp.c: BPF JIT compiler
40a14842fSEric Dumazet  *
53b58908aSEric Dumazet  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
662258278SAlexei Starovoitov  * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
70a14842fSEric Dumazet  */
80a14842fSEric Dumazet #include <linux/netdevice.h>
90a14842fSEric Dumazet #include <linux/filter.h>
10855ddb56SEric Dumazet #include <linux/if_vlan.h>
1171d22d58SDaniel Borkmann #include <linux/bpf.h>
125964b200SAlexei Starovoitov #include <linux/memory.h>
1375ccbef6SBjörn Töpel #include <linux/sort.h>
143dec541bSAlexei Starovoitov #include <asm/extable.h>
15d1163651SLaura Abbott #include <asm/set_memory.h>
16a493a87fSDaniel Borkmann #include <asm/nospec-branch.h>
175964b200SAlexei Starovoitov #include <asm/text-patching.h>
1875ccbef6SBjörn Töpel #include <asm/asm-prototypes.h>
190a14842fSEric Dumazet 
205cccc702SJoe Perches static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
210a14842fSEric Dumazet {
220a14842fSEric Dumazet 	if (len == 1)
230a14842fSEric Dumazet 		*ptr = bytes;
240a14842fSEric Dumazet 	else if (len == 2)
250a14842fSEric Dumazet 		*(u16 *)ptr = bytes;
260a14842fSEric Dumazet 	else {
270a14842fSEric Dumazet 		*(u32 *)ptr = bytes;
280a14842fSEric Dumazet 		barrier();
290a14842fSEric Dumazet 	}
300a14842fSEric Dumazet 	return ptr + len;
310a14842fSEric Dumazet }
320a14842fSEric Dumazet 
33b52f00e6SAlexei Starovoitov #define EMIT(bytes, len) \
34b52f00e6SAlexei Starovoitov 	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
350a14842fSEric Dumazet 
360a14842fSEric Dumazet #define EMIT1(b1)		EMIT(b1, 1)
370a14842fSEric Dumazet #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
380a14842fSEric Dumazet #define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
390a14842fSEric Dumazet #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
40a2c7a983SIngo Molnar 
4162258278SAlexei Starovoitov #define EMIT1_off32(b1, off) \
4262258278SAlexei Starovoitov 	do { EMIT1(b1); EMIT(off, 4); } while (0)
4362258278SAlexei Starovoitov #define EMIT2_off32(b1, b2, off) \
4462258278SAlexei Starovoitov 	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
4562258278SAlexei Starovoitov #define EMIT3_off32(b1, b2, b3, off) \
4662258278SAlexei Starovoitov 	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
4762258278SAlexei Starovoitov #define EMIT4_off32(b1, b2, b3, b4, off) \
4862258278SAlexei Starovoitov 	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
490a14842fSEric Dumazet 
505cccc702SJoe Perches static bool is_imm8(int value)
510a14842fSEric Dumazet {
520a14842fSEric Dumazet 	return value <= 127 && value >= -128;
530a14842fSEric Dumazet }
540a14842fSEric Dumazet 
555cccc702SJoe Perches static bool is_simm32(s64 value)
560a14842fSEric Dumazet {
5762258278SAlexei Starovoitov 	return value == (s64)(s32)value;
580a14842fSEric Dumazet }
590a14842fSEric Dumazet 
606fe8b9c1SDaniel Borkmann static bool is_uimm32(u64 value)
616fe8b9c1SDaniel Borkmann {
626fe8b9c1SDaniel Borkmann 	return value == (u64)(u32)value;
636fe8b9c1SDaniel Borkmann }
646fe8b9c1SDaniel Borkmann 
65e430f34eSAlexei Starovoitov /* mov dst, src */
66e430f34eSAlexei Starovoitov #define EMIT_mov(DST, SRC)								 \
67a2c7a983SIngo Molnar 	do {										 \
68a2c7a983SIngo Molnar 		if (DST != SRC)								 \
69e430f34eSAlexei Starovoitov 			EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
700a14842fSEric Dumazet 	} while (0)
710a14842fSEric Dumazet 
7262258278SAlexei Starovoitov static int bpf_size_to_x86_bytes(int bpf_size)
7362258278SAlexei Starovoitov {
7462258278SAlexei Starovoitov 	if (bpf_size == BPF_W)
7562258278SAlexei Starovoitov 		return 4;
7662258278SAlexei Starovoitov 	else if (bpf_size == BPF_H)
7762258278SAlexei Starovoitov 		return 2;
7862258278SAlexei Starovoitov 	else if (bpf_size == BPF_B)
7962258278SAlexei Starovoitov 		return 1;
8062258278SAlexei Starovoitov 	else if (bpf_size == BPF_DW)
8162258278SAlexei Starovoitov 		return 4; /* imm32 */
8262258278SAlexei Starovoitov 	else
8362258278SAlexei Starovoitov 		return 0;
8462258278SAlexei Starovoitov }
8562258278SAlexei Starovoitov 
86a2c7a983SIngo Molnar /*
87a2c7a983SIngo Molnar  * List of x86 cond jumps opcodes (. + s8)
880a14842fSEric Dumazet  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
890a14842fSEric Dumazet  */
900a14842fSEric Dumazet #define X86_JB  0x72
910a14842fSEric Dumazet #define X86_JAE 0x73
920a14842fSEric Dumazet #define X86_JE  0x74
930a14842fSEric Dumazet #define X86_JNE 0x75
940a14842fSEric Dumazet #define X86_JBE 0x76
950a14842fSEric Dumazet #define X86_JA  0x77
9652afc51eSDaniel Borkmann #define X86_JL  0x7C
9762258278SAlexei Starovoitov #define X86_JGE 0x7D
9852afc51eSDaniel Borkmann #define X86_JLE 0x7E
9962258278SAlexei Starovoitov #define X86_JG  0x7F
1000a14842fSEric Dumazet 
101a2c7a983SIngo Molnar /* Pick a register outside of BPF range for JIT internal work */
102959a7579SDaniel Borkmann #define AUX_REG (MAX_BPF_JIT_REG + 1)
103fec56f58SAlexei Starovoitov #define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
10462258278SAlexei Starovoitov 
105a2c7a983SIngo Molnar /*
106a2c7a983SIngo Molnar  * The following table maps BPF registers to x86-64 registers.
107959a7579SDaniel Borkmann  *
108a2c7a983SIngo Molnar  * x86-64 register R12 is unused, since if used as base address
109959a7579SDaniel Borkmann  * register in load/store instructions, it always needs an
110959a7579SDaniel Borkmann  * extra byte of encoding and is callee saved.
111959a7579SDaniel Borkmann  *
112fec56f58SAlexei Starovoitov  * x86-64 register R9 is not used by BPF programs, but can be used by BPF
113fec56f58SAlexei Starovoitov  * trampoline. x86-64 register R10 is used for blinding (if enabled).
11462258278SAlexei Starovoitov  */
11562258278SAlexei Starovoitov static const int reg2hex[] = {
116a2c7a983SIngo Molnar 	[BPF_REG_0] = 0,  /* RAX */
117a2c7a983SIngo Molnar 	[BPF_REG_1] = 7,  /* RDI */
118a2c7a983SIngo Molnar 	[BPF_REG_2] = 6,  /* RSI */
119a2c7a983SIngo Molnar 	[BPF_REG_3] = 2,  /* RDX */
120a2c7a983SIngo Molnar 	[BPF_REG_4] = 1,  /* RCX */
121a2c7a983SIngo Molnar 	[BPF_REG_5] = 0,  /* R8  */
122a2c7a983SIngo Molnar 	[BPF_REG_6] = 3,  /* RBX callee saved */
123a2c7a983SIngo Molnar 	[BPF_REG_7] = 5,  /* R13 callee saved */
124a2c7a983SIngo Molnar 	[BPF_REG_8] = 6,  /* R14 callee saved */
125a2c7a983SIngo Molnar 	[BPF_REG_9] = 7,  /* R15 callee saved */
126a2c7a983SIngo Molnar 	[BPF_REG_FP] = 5, /* RBP readonly */
127a2c7a983SIngo Molnar 	[BPF_REG_AX] = 2, /* R10 temp register */
128a2c7a983SIngo Molnar 	[AUX_REG] = 3,    /* R11 temp register */
129fec56f58SAlexei Starovoitov 	[X86_REG_R9] = 1, /* R9 register, 6th function argument */
13062258278SAlexei Starovoitov };
13162258278SAlexei Starovoitov 
1323dec541bSAlexei Starovoitov static const int reg2pt_regs[] = {
1333dec541bSAlexei Starovoitov 	[BPF_REG_0] = offsetof(struct pt_regs, ax),
1343dec541bSAlexei Starovoitov 	[BPF_REG_1] = offsetof(struct pt_regs, di),
1353dec541bSAlexei Starovoitov 	[BPF_REG_2] = offsetof(struct pt_regs, si),
1363dec541bSAlexei Starovoitov 	[BPF_REG_3] = offsetof(struct pt_regs, dx),
1373dec541bSAlexei Starovoitov 	[BPF_REG_4] = offsetof(struct pt_regs, cx),
1383dec541bSAlexei Starovoitov 	[BPF_REG_5] = offsetof(struct pt_regs, r8),
1393dec541bSAlexei Starovoitov 	[BPF_REG_6] = offsetof(struct pt_regs, bx),
1403dec541bSAlexei Starovoitov 	[BPF_REG_7] = offsetof(struct pt_regs, r13),
1413dec541bSAlexei Starovoitov 	[BPF_REG_8] = offsetof(struct pt_regs, r14),
1423dec541bSAlexei Starovoitov 	[BPF_REG_9] = offsetof(struct pt_regs, r15),
1433dec541bSAlexei Starovoitov };
1443dec541bSAlexei Starovoitov 
145a2c7a983SIngo Molnar /*
146a2c7a983SIngo Molnar  * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
14762258278SAlexei Starovoitov  * which need extra byte of encoding.
14862258278SAlexei Starovoitov  * rax,rcx,...,rbp have simpler encoding
14962258278SAlexei Starovoitov  */
1505cccc702SJoe Perches static bool is_ereg(u32 reg)
15162258278SAlexei Starovoitov {
152d148134bSJoe Perches 	return (1 << reg) & (BIT(BPF_REG_5) |
153d148134bSJoe Perches 			     BIT(AUX_REG) |
154d148134bSJoe Perches 			     BIT(BPF_REG_7) |
155d148134bSJoe Perches 			     BIT(BPF_REG_8) |
156959a7579SDaniel Borkmann 			     BIT(BPF_REG_9) |
157fec56f58SAlexei Starovoitov 			     BIT(X86_REG_R9) |
158959a7579SDaniel Borkmann 			     BIT(BPF_REG_AX));
15962258278SAlexei Starovoitov }
16062258278SAlexei Starovoitov 
161aee194b1SLuke Nelson /*
162aee194b1SLuke Nelson  * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64
163aee194b1SLuke Nelson  * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte
164aee194b1SLuke Nelson  * of encoding. al,cl,dl,bl have simpler encoding.
165aee194b1SLuke Nelson  */
166aee194b1SLuke Nelson static bool is_ereg_8l(u32 reg)
167aee194b1SLuke Nelson {
168aee194b1SLuke Nelson 	return is_ereg(reg) ||
169aee194b1SLuke Nelson 	    (1 << reg) & (BIT(BPF_REG_1) |
170aee194b1SLuke Nelson 			  BIT(BPF_REG_2) |
171aee194b1SLuke Nelson 			  BIT(BPF_REG_FP));
172aee194b1SLuke Nelson }
173aee194b1SLuke Nelson 
174de0a444dSDaniel Borkmann static bool is_axreg(u32 reg)
175de0a444dSDaniel Borkmann {
176de0a444dSDaniel Borkmann 	return reg == BPF_REG_0;
177de0a444dSDaniel Borkmann }
178de0a444dSDaniel Borkmann 
179a2c7a983SIngo Molnar /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
1805cccc702SJoe Perches static u8 add_1mod(u8 byte, u32 reg)
18162258278SAlexei Starovoitov {
18262258278SAlexei Starovoitov 	if (is_ereg(reg))
18362258278SAlexei Starovoitov 		byte |= 1;
18462258278SAlexei Starovoitov 	return byte;
18562258278SAlexei Starovoitov }
18662258278SAlexei Starovoitov 
1875cccc702SJoe Perches static u8 add_2mod(u8 byte, u32 r1, u32 r2)
18862258278SAlexei Starovoitov {
18962258278SAlexei Starovoitov 	if (is_ereg(r1))
19062258278SAlexei Starovoitov 		byte |= 1;
19162258278SAlexei Starovoitov 	if (is_ereg(r2))
19262258278SAlexei Starovoitov 		byte |= 4;
19362258278SAlexei Starovoitov 	return byte;
19462258278SAlexei Starovoitov }
19562258278SAlexei Starovoitov 
196a2c7a983SIngo Molnar /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
1975cccc702SJoe Perches static u8 add_1reg(u8 byte, u32 dst_reg)
19862258278SAlexei Starovoitov {
199e430f34eSAlexei Starovoitov 	return byte + reg2hex[dst_reg];
20062258278SAlexei Starovoitov }
20162258278SAlexei Starovoitov 
202a2c7a983SIngo Molnar /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
2035cccc702SJoe Perches static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
20462258278SAlexei Starovoitov {
205e430f34eSAlexei Starovoitov 	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
20662258278SAlexei Starovoitov }
20762258278SAlexei Starovoitov 
208738cbe72SDaniel Borkmann static void jit_fill_hole(void *area, unsigned int size)
209738cbe72SDaniel Borkmann {
210a2c7a983SIngo Molnar 	/* Fill whole space with INT3 instructions */
211738cbe72SDaniel Borkmann 	memset(area, 0xcc, size);
212738cbe72SDaniel Borkmann }
213738cbe72SDaniel Borkmann 
214f3c2af7bSAlexei Starovoitov struct jit_context {
215a2c7a983SIngo Molnar 	int cleanup_addr; /* Epilogue code offset */
216f3c2af7bSAlexei Starovoitov };
217f3c2af7bSAlexei Starovoitov 
218a2c7a983SIngo Molnar /* Maximum number of bytes emitted while JITing one eBPF insn */
219e0ee9c12SAlexei Starovoitov #define BPF_MAX_INSN_SIZE	128
220e0ee9c12SAlexei Starovoitov #define BPF_INSN_SAFETY		64
2214b3da77bSDaniel Borkmann 
2224b3da77bSDaniel Borkmann /* Number of bytes emit_patch() needs to generate instructions */
2234b3da77bSDaniel Borkmann #define X86_PATCH_SIZE		5
224e0ee9c12SAlexei Starovoitov 
2259fd4a39dSAlexei Starovoitov #define PROLOGUE_SIZE		25
226b52f00e6SAlexei Starovoitov 
227a2c7a983SIngo Molnar /*
228a2c7a983SIngo Molnar  * Emit x86-64 prologue code for BPF program and check its size.
229b52f00e6SAlexei Starovoitov  * bpf_tail_call helper will skip it while jumping into another program
230b52f00e6SAlexei Starovoitov  */
23108691752SDaniel Borkmann static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
2320a14842fSEric Dumazet {
233b52f00e6SAlexei Starovoitov 	u8 *prog = *pprog;
2344b3da77bSDaniel Borkmann 	int cnt = X86_PATCH_SIZE;
2350a14842fSEric Dumazet 
2369fd4a39dSAlexei Starovoitov 	/* BPF trampoline can be made to work without these nops,
2379fd4a39dSAlexei Starovoitov 	 * but let's waste 5 bytes for now and optimize later
2389fd4a39dSAlexei Starovoitov 	 */
2399fd4a39dSAlexei Starovoitov 	memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
2409fd4a39dSAlexei Starovoitov 	prog += cnt;
241fe8d9571SAlexei Starovoitov 	EMIT1(0x55);             /* push rbp */
242fe8d9571SAlexei Starovoitov 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
243fe8d9571SAlexei Starovoitov 	/* sub rsp, rounded_stack_depth */
244fe8d9571SAlexei Starovoitov 	EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
245fe8d9571SAlexei Starovoitov 	EMIT1(0x53);             /* push rbx */
246fe8d9571SAlexei Starovoitov 	EMIT2(0x41, 0x55);       /* push r13 */
247fe8d9571SAlexei Starovoitov 	EMIT2(0x41, 0x56);       /* push r14 */
248fe8d9571SAlexei Starovoitov 	EMIT2(0x41, 0x57);       /* push r15 */
24908691752SDaniel Borkmann 	if (!ebpf_from_cbpf) {
250fe8d9571SAlexei Starovoitov 		/* zero init tail_call_cnt */
251fe8d9571SAlexei Starovoitov 		EMIT2(0x6a, 0x00);
252b52f00e6SAlexei Starovoitov 		BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
25308691752SDaniel Borkmann 	}
254b52f00e6SAlexei Starovoitov 	*pprog = prog;
255b52f00e6SAlexei Starovoitov }
256b52f00e6SAlexei Starovoitov 
257428d5df1SDaniel Borkmann static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
258428d5df1SDaniel Borkmann {
259428d5df1SDaniel Borkmann 	u8 *prog = *pprog;
260428d5df1SDaniel Borkmann 	int cnt = 0;
261428d5df1SDaniel Borkmann 	s64 offset;
262428d5df1SDaniel Borkmann 
263428d5df1SDaniel Borkmann 	offset = func - (ip + X86_PATCH_SIZE);
264428d5df1SDaniel Borkmann 	if (!is_simm32(offset)) {
265428d5df1SDaniel Borkmann 		pr_err("Target call %p is out of range\n", func);
266428d5df1SDaniel Borkmann 		return -ERANGE;
267428d5df1SDaniel Borkmann 	}
268428d5df1SDaniel Borkmann 	EMIT1_off32(opcode, offset);
269428d5df1SDaniel Borkmann 	*pprog = prog;
270428d5df1SDaniel Borkmann 	return 0;
271428d5df1SDaniel Borkmann }
272428d5df1SDaniel Borkmann 
273428d5df1SDaniel Borkmann static int emit_call(u8 **pprog, void *func, void *ip)
274428d5df1SDaniel Borkmann {
275428d5df1SDaniel Borkmann 	return emit_patch(pprog, func, ip, 0xE8);
276428d5df1SDaniel Borkmann }
277428d5df1SDaniel Borkmann 
278428d5df1SDaniel Borkmann static int emit_jump(u8 **pprog, void *func, void *ip)
279428d5df1SDaniel Borkmann {
280428d5df1SDaniel Borkmann 	return emit_patch(pprog, func, ip, 0xE9);
281428d5df1SDaniel Borkmann }
282428d5df1SDaniel Borkmann 
283428d5df1SDaniel Borkmann static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
284428d5df1SDaniel Borkmann 				void *old_addr, void *new_addr,
285428d5df1SDaniel Borkmann 				const bool text_live)
286428d5df1SDaniel Borkmann {
287428d5df1SDaniel Borkmann 	const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
288b553a6ecSDaniel Borkmann 	u8 old_insn[X86_PATCH_SIZE];
289b553a6ecSDaniel Borkmann 	u8 new_insn[X86_PATCH_SIZE];
290428d5df1SDaniel Borkmann 	u8 *prog;
291428d5df1SDaniel Borkmann 	int ret;
292428d5df1SDaniel Borkmann 
293428d5df1SDaniel Borkmann 	memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
294b553a6ecSDaniel Borkmann 	if (old_addr) {
295428d5df1SDaniel Borkmann 		prog = old_insn;
296b553a6ecSDaniel Borkmann 		ret = t == BPF_MOD_CALL ?
297b553a6ecSDaniel Borkmann 		      emit_call(&prog, old_addr, ip) :
298b553a6ecSDaniel Borkmann 		      emit_jump(&prog, old_addr, ip);
299428d5df1SDaniel Borkmann 		if (ret)
300428d5df1SDaniel Borkmann 			return ret;
301428d5df1SDaniel Borkmann 	}
302b553a6ecSDaniel Borkmann 
303428d5df1SDaniel Borkmann 	memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
304b553a6ecSDaniel Borkmann 	if (new_addr) {
305b553a6ecSDaniel Borkmann 		prog = new_insn;
306b553a6ecSDaniel Borkmann 		ret = t == BPF_MOD_CALL ?
307b553a6ecSDaniel Borkmann 		      emit_call(&prog, new_addr, ip) :
308b553a6ecSDaniel Borkmann 		      emit_jump(&prog, new_addr, ip);
309428d5df1SDaniel Borkmann 		if (ret)
310428d5df1SDaniel Borkmann 			return ret;
311428d5df1SDaniel Borkmann 	}
312428d5df1SDaniel Borkmann 
313428d5df1SDaniel Borkmann 	ret = -EBUSY;
314428d5df1SDaniel Borkmann 	mutex_lock(&text_mutex);
315428d5df1SDaniel Borkmann 	if (memcmp(ip, old_insn, X86_PATCH_SIZE))
316428d5df1SDaniel Borkmann 		goto out;
317b553a6ecSDaniel Borkmann 	if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
318428d5df1SDaniel Borkmann 		if (text_live)
319428d5df1SDaniel Borkmann 			text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
320428d5df1SDaniel Borkmann 		else
321428d5df1SDaniel Borkmann 			memcpy(ip, new_insn, X86_PATCH_SIZE);
322b553a6ecSDaniel Borkmann 	}
323428d5df1SDaniel Borkmann 	ret = 0;
324428d5df1SDaniel Borkmann out:
325428d5df1SDaniel Borkmann 	mutex_unlock(&text_mutex);
326428d5df1SDaniel Borkmann 	return ret;
327428d5df1SDaniel Borkmann }
328428d5df1SDaniel Borkmann 
329428d5df1SDaniel Borkmann int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
330428d5df1SDaniel Borkmann 		       void *old_addr, void *new_addr)
331428d5df1SDaniel Borkmann {
332428d5df1SDaniel Borkmann 	if (!is_kernel_text((long)ip) &&
333428d5df1SDaniel Borkmann 	    !is_bpf_text_address((long)ip))
334428d5df1SDaniel Borkmann 		/* BPF poking in modules is not supported */
335428d5df1SDaniel Borkmann 		return -EINVAL;
336428d5df1SDaniel Borkmann 
337428d5df1SDaniel Borkmann 	return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
338428d5df1SDaniel Borkmann }
339428d5df1SDaniel Borkmann 
340a2c7a983SIngo Molnar /*
341a2c7a983SIngo Molnar  * Generate the following code:
342a2c7a983SIngo Molnar  *
343b52f00e6SAlexei Starovoitov  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
344b52f00e6SAlexei Starovoitov  *   if (index >= array->map.max_entries)
345b52f00e6SAlexei Starovoitov  *     goto out;
346b52f00e6SAlexei Starovoitov  *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
347b52f00e6SAlexei Starovoitov  *     goto out;
3482a36f0b9SWang Nan  *   prog = array->ptrs[index];
349b52f00e6SAlexei Starovoitov  *   if (prog == NULL)
350b52f00e6SAlexei Starovoitov  *     goto out;
351b52f00e6SAlexei Starovoitov  *   goto *(prog->bpf_func + prologue_size);
352b52f00e6SAlexei Starovoitov  * out:
353b52f00e6SAlexei Starovoitov  */
354428d5df1SDaniel Borkmann static void emit_bpf_tail_call_indirect(u8 **pprog)
355b52f00e6SAlexei Starovoitov {
356b52f00e6SAlexei Starovoitov 	u8 *prog = *pprog;
357b52f00e6SAlexei Starovoitov 	int label1, label2, label3;
358b52f00e6SAlexei Starovoitov 	int cnt = 0;
359b52f00e6SAlexei Starovoitov 
360a2c7a983SIngo Molnar 	/*
361a2c7a983SIngo Molnar 	 * rdi - pointer to ctx
362b52f00e6SAlexei Starovoitov 	 * rsi - pointer to bpf_array
363b52f00e6SAlexei Starovoitov 	 * rdx - index in bpf_array
364b52f00e6SAlexei Starovoitov 	 */
365b52f00e6SAlexei Starovoitov 
366a2c7a983SIngo Molnar 	/*
367a2c7a983SIngo Molnar 	 * if (index >= array->map.max_entries)
368b52f00e6SAlexei Starovoitov 	 *	goto out;
369b52f00e6SAlexei Starovoitov 	 */
37090caccddSAlexei Starovoitov 	EMIT2(0x89, 0xD2);                        /* mov edx, edx */
37190caccddSAlexei Starovoitov 	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
372b52f00e6SAlexei Starovoitov 	      offsetof(struct bpf_array, map.max_entries));
373a2c7a983SIngo Molnar #define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
374b52f00e6SAlexei Starovoitov 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
375b52f00e6SAlexei Starovoitov 	label1 = cnt;
376b52f00e6SAlexei Starovoitov 
377a2c7a983SIngo Molnar 	/*
378a2c7a983SIngo Molnar 	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
379b52f00e6SAlexei Starovoitov 	 *	goto out;
380b52f00e6SAlexei Starovoitov 	 */
381fe8d9571SAlexei Starovoitov 	EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
382b52f00e6SAlexei Starovoitov 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
383a493a87fSDaniel Borkmann #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
384b52f00e6SAlexei Starovoitov 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
385b52f00e6SAlexei Starovoitov 	label2 = cnt;
386b52f00e6SAlexei Starovoitov 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
387fe8d9571SAlexei Starovoitov 	EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
388b52f00e6SAlexei Starovoitov 
3892a36f0b9SWang Nan 	/* prog = array->ptrs[index]; */
39084ccac6eSEric Dumazet 	EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
3912a36f0b9SWang Nan 		    offsetof(struct bpf_array, ptrs));
392b52f00e6SAlexei Starovoitov 
393a2c7a983SIngo Molnar 	/*
394a2c7a983SIngo Molnar 	 * if (prog == NULL)
395b52f00e6SAlexei Starovoitov 	 *	goto out;
396b52f00e6SAlexei Starovoitov 	 */
39784ccac6eSEric Dumazet 	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */
398a493a87fSDaniel Borkmann #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
399b52f00e6SAlexei Starovoitov 	EMIT2(X86_JE, OFFSET3);                   /* je out */
400b52f00e6SAlexei Starovoitov 	label3 = cnt;
401b52f00e6SAlexei Starovoitov 
402b52f00e6SAlexei Starovoitov 	/* goto *(prog->bpf_func + prologue_size); */
403b52f00e6SAlexei Starovoitov 	EMIT4(0x48, 0x8B, 0x40,                   /* mov rax, qword ptr [rax + 32] */
404b52f00e6SAlexei Starovoitov 	      offsetof(struct bpf_prog, bpf_func));
405b52f00e6SAlexei Starovoitov 	EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE);   /* add rax, prologue_size */
406b52f00e6SAlexei Starovoitov 
407a2c7a983SIngo Molnar 	/*
408a2c7a983SIngo Molnar 	 * Wow we're ready to jump into next BPF program
409b52f00e6SAlexei Starovoitov 	 * rdi == ctx (1st arg)
410b52f00e6SAlexei Starovoitov 	 * rax == prog->bpf_func + prologue_size
411b52f00e6SAlexei Starovoitov 	 */
412a493a87fSDaniel Borkmann 	RETPOLINE_RAX_BPF_JIT();
413b52f00e6SAlexei Starovoitov 
414b52f00e6SAlexei Starovoitov 	/* out: */
415b52f00e6SAlexei Starovoitov 	BUILD_BUG_ON(cnt - label1 != OFFSET1);
416b52f00e6SAlexei Starovoitov 	BUILD_BUG_ON(cnt - label2 != OFFSET2);
417b52f00e6SAlexei Starovoitov 	BUILD_BUG_ON(cnt - label3 != OFFSET3);
418b52f00e6SAlexei Starovoitov 	*pprog = prog;
419b52f00e6SAlexei Starovoitov }
420b52f00e6SAlexei Starovoitov 
421428d5df1SDaniel Borkmann static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
422428d5df1SDaniel Borkmann 				      u8 **pprog, int addr, u8 *image)
423428d5df1SDaniel Borkmann {
424428d5df1SDaniel Borkmann 	u8 *prog = *pprog;
425428d5df1SDaniel Borkmann 	int cnt = 0;
426428d5df1SDaniel Borkmann 
427428d5df1SDaniel Borkmann 	/*
428428d5df1SDaniel Borkmann 	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
429428d5df1SDaniel Borkmann 	 *	goto out;
430428d5df1SDaniel Borkmann 	 */
431428d5df1SDaniel Borkmann 	EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
432428d5df1SDaniel Borkmann 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);         /* cmp eax, MAX_TAIL_CALL_CNT */
433428d5df1SDaniel Borkmann 	EMIT2(X86_JA, 14);                            /* ja out */
434428d5df1SDaniel Borkmann 	EMIT3(0x83, 0xC0, 0x01);                      /* add eax, 1 */
435428d5df1SDaniel Borkmann 	EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
436428d5df1SDaniel Borkmann 
437428d5df1SDaniel Borkmann 	poke->ip = image + (addr - X86_PATCH_SIZE);
438428d5df1SDaniel Borkmann 	poke->adj_off = PROLOGUE_SIZE;
439428d5df1SDaniel Borkmann 
440428d5df1SDaniel Borkmann 	memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
441428d5df1SDaniel Borkmann 	prog += X86_PATCH_SIZE;
442428d5df1SDaniel Borkmann 	/* out: */
443428d5df1SDaniel Borkmann 
444428d5df1SDaniel Borkmann 	*pprog = prog;
445428d5df1SDaniel Borkmann }
446428d5df1SDaniel Borkmann 
447428d5df1SDaniel Borkmann static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
448428d5df1SDaniel Borkmann {
449428d5df1SDaniel Borkmann 	struct bpf_jit_poke_descriptor *poke;
450428d5df1SDaniel Borkmann 	struct bpf_array *array;
451428d5df1SDaniel Borkmann 	struct bpf_prog *target;
452428d5df1SDaniel Borkmann 	int i, ret;
453428d5df1SDaniel Borkmann 
454428d5df1SDaniel Borkmann 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
455428d5df1SDaniel Borkmann 		poke = &prog->aux->poke_tab[i];
456428d5df1SDaniel Borkmann 		WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
457428d5df1SDaniel Borkmann 
458428d5df1SDaniel Borkmann 		if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
459428d5df1SDaniel Borkmann 			continue;
460428d5df1SDaniel Borkmann 
461428d5df1SDaniel Borkmann 		array = container_of(poke->tail_call.map, struct bpf_array, map);
462428d5df1SDaniel Borkmann 		mutex_lock(&array->aux->poke_mutex);
463428d5df1SDaniel Borkmann 		target = array->ptrs[poke->tail_call.key];
464428d5df1SDaniel Borkmann 		if (target) {
465428d5df1SDaniel Borkmann 			/* Plain memcpy is used when image is not live yet
466428d5df1SDaniel Borkmann 			 * and still not locked as read-only. Once poke
467428d5df1SDaniel Borkmann 			 * location is active (poke->ip_stable), any parallel
468428d5df1SDaniel Borkmann 			 * bpf_arch_text_poke() might occur still on the
469428d5df1SDaniel Borkmann 			 * read-write image until we finally locked it as
470428d5df1SDaniel Borkmann 			 * read-only. Both modifications on the given image
471428d5df1SDaniel Borkmann 			 * are under text_mutex to avoid interference.
472428d5df1SDaniel Borkmann 			 */
473b553a6ecSDaniel Borkmann 			ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
474428d5df1SDaniel Borkmann 						   (u8 *)target->bpf_func +
475428d5df1SDaniel Borkmann 						   poke->adj_off, false);
476428d5df1SDaniel Borkmann 			BUG_ON(ret < 0);
477428d5df1SDaniel Borkmann 		}
478428d5df1SDaniel Borkmann 		WRITE_ONCE(poke->ip_stable, true);
479428d5df1SDaniel Borkmann 		mutex_unlock(&array->aux->poke_mutex);
480428d5df1SDaniel Borkmann 	}
481428d5df1SDaniel Borkmann }
482428d5df1SDaniel Borkmann 
4836fe8b9c1SDaniel Borkmann static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
4846fe8b9c1SDaniel Borkmann 			   u32 dst_reg, const u32 imm32)
4856fe8b9c1SDaniel Borkmann {
4866fe8b9c1SDaniel Borkmann 	u8 *prog = *pprog;
4876fe8b9c1SDaniel Borkmann 	u8 b1, b2, b3;
4886fe8b9c1SDaniel Borkmann 	int cnt = 0;
4896fe8b9c1SDaniel Borkmann 
490a2c7a983SIngo Molnar 	/*
491a2c7a983SIngo Molnar 	 * Optimization: if imm32 is positive, use 'mov %eax, imm32'
4926fe8b9c1SDaniel Borkmann 	 * (which zero-extends imm32) to save 2 bytes.
4936fe8b9c1SDaniel Borkmann 	 */
4946fe8b9c1SDaniel Borkmann 	if (sign_propagate && (s32)imm32 < 0) {
4956fe8b9c1SDaniel Borkmann 		/* 'mov %rax, imm32' sign extends imm32 */
4966fe8b9c1SDaniel Borkmann 		b1 = add_1mod(0x48, dst_reg);
4976fe8b9c1SDaniel Borkmann 		b2 = 0xC7;
4986fe8b9c1SDaniel Borkmann 		b3 = 0xC0;
4996fe8b9c1SDaniel Borkmann 		EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
5006fe8b9c1SDaniel Borkmann 		goto done;
5016fe8b9c1SDaniel Borkmann 	}
5026fe8b9c1SDaniel Borkmann 
503a2c7a983SIngo Molnar 	/*
504a2c7a983SIngo Molnar 	 * Optimization: if imm32 is zero, use 'xor %eax, %eax'
5056fe8b9c1SDaniel Borkmann 	 * to save 3 bytes.
5066fe8b9c1SDaniel Borkmann 	 */
5076fe8b9c1SDaniel Borkmann 	if (imm32 == 0) {
5086fe8b9c1SDaniel Borkmann 		if (is_ereg(dst_reg))
5096fe8b9c1SDaniel Borkmann 			EMIT1(add_2mod(0x40, dst_reg, dst_reg));
5106fe8b9c1SDaniel Borkmann 		b2 = 0x31; /* xor */
5116fe8b9c1SDaniel Borkmann 		b3 = 0xC0;
5126fe8b9c1SDaniel Borkmann 		EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
5136fe8b9c1SDaniel Borkmann 		goto done;
5146fe8b9c1SDaniel Borkmann 	}
5156fe8b9c1SDaniel Borkmann 
5166fe8b9c1SDaniel Borkmann 	/* mov %eax, imm32 */
5176fe8b9c1SDaniel Borkmann 	if (is_ereg(dst_reg))
5186fe8b9c1SDaniel Borkmann 		EMIT1(add_1mod(0x40, dst_reg));
5196fe8b9c1SDaniel Borkmann 	EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
5206fe8b9c1SDaniel Borkmann done:
5216fe8b9c1SDaniel Borkmann 	*pprog = prog;
5226fe8b9c1SDaniel Borkmann }
5236fe8b9c1SDaniel Borkmann 
5246fe8b9c1SDaniel Borkmann static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
5256fe8b9c1SDaniel Borkmann 			   const u32 imm32_hi, const u32 imm32_lo)
5266fe8b9c1SDaniel Borkmann {
5276fe8b9c1SDaniel Borkmann 	u8 *prog = *pprog;
5286fe8b9c1SDaniel Borkmann 	int cnt = 0;
5296fe8b9c1SDaniel Borkmann 
5306fe8b9c1SDaniel Borkmann 	if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
531a2c7a983SIngo Molnar 		/*
532a2c7a983SIngo Molnar 		 * For emitting plain u32, where sign bit must not be
5336fe8b9c1SDaniel Borkmann 		 * propagated LLVM tends to load imm64 over mov32
5346fe8b9c1SDaniel Borkmann 		 * directly, so save couple of bytes by just doing
5356fe8b9c1SDaniel Borkmann 		 * 'mov %eax, imm32' instead.
5366fe8b9c1SDaniel Borkmann 		 */
5376fe8b9c1SDaniel Borkmann 		emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
5386fe8b9c1SDaniel Borkmann 	} else {
5396fe8b9c1SDaniel Borkmann 		/* movabsq %rax, imm64 */
5406fe8b9c1SDaniel Borkmann 		EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
5416fe8b9c1SDaniel Borkmann 		EMIT(imm32_lo, 4);
5426fe8b9c1SDaniel Borkmann 		EMIT(imm32_hi, 4);
5436fe8b9c1SDaniel Borkmann 	}
5446fe8b9c1SDaniel Borkmann 
5456fe8b9c1SDaniel Borkmann 	*pprog = prog;
5466fe8b9c1SDaniel Borkmann }
5476fe8b9c1SDaniel Borkmann 
5484c38e2f3SDaniel Borkmann static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
5494c38e2f3SDaniel Borkmann {
5504c38e2f3SDaniel Borkmann 	u8 *prog = *pprog;
5514c38e2f3SDaniel Borkmann 	int cnt = 0;
5524c38e2f3SDaniel Borkmann 
5534c38e2f3SDaniel Borkmann 	if (is64) {
5544c38e2f3SDaniel Borkmann 		/* mov dst, src */
5554c38e2f3SDaniel Borkmann 		EMIT_mov(dst_reg, src_reg);
5564c38e2f3SDaniel Borkmann 	} else {
5574c38e2f3SDaniel Borkmann 		/* mov32 dst, src */
5584c38e2f3SDaniel Borkmann 		if (is_ereg(dst_reg) || is_ereg(src_reg))
5594c38e2f3SDaniel Borkmann 			EMIT1(add_2mod(0x40, dst_reg, src_reg));
5604c38e2f3SDaniel Borkmann 		EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
5614c38e2f3SDaniel Borkmann 	}
5624c38e2f3SDaniel Borkmann 
5634c38e2f3SDaniel Borkmann 	*pprog = prog;
5644c38e2f3SDaniel Borkmann }
5654c38e2f3SDaniel Borkmann 
5663b2744e6SAlexei Starovoitov /* LDX: dst_reg = *(u8*)(src_reg + off) */
5673b2744e6SAlexei Starovoitov static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
5683b2744e6SAlexei Starovoitov {
5693b2744e6SAlexei Starovoitov 	u8 *prog = *pprog;
5703b2744e6SAlexei Starovoitov 	int cnt = 0;
5713b2744e6SAlexei Starovoitov 
5723b2744e6SAlexei Starovoitov 	switch (size) {
5733b2744e6SAlexei Starovoitov 	case BPF_B:
5743b2744e6SAlexei Starovoitov 		/* Emit 'movzx rax, byte ptr [rax + off]' */
5753b2744e6SAlexei Starovoitov 		EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
5763b2744e6SAlexei Starovoitov 		break;
5773b2744e6SAlexei Starovoitov 	case BPF_H:
5783b2744e6SAlexei Starovoitov 		/* Emit 'movzx rax, word ptr [rax + off]' */
5793b2744e6SAlexei Starovoitov 		EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
5803b2744e6SAlexei Starovoitov 		break;
5813b2744e6SAlexei Starovoitov 	case BPF_W:
5823b2744e6SAlexei Starovoitov 		/* Emit 'mov eax, dword ptr [rax+0x14]' */
5833b2744e6SAlexei Starovoitov 		if (is_ereg(dst_reg) || is_ereg(src_reg))
5843b2744e6SAlexei Starovoitov 			EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
5853b2744e6SAlexei Starovoitov 		else
5863b2744e6SAlexei Starovoitov 			EMIT1(0x8B);
5873b2744e6SAlexei Starovoitov 		break;
5883b2744e6SAlexei Starovoitov 	case BPF_DW:
5893b2744e6SAlexei Starovoitov 		/* Emit 'mov rax, qword ptr [rax+0x14]' */
5903b2744e6SAlexei Starovoitov 		EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
5913b2744e6SAlexei Starovoitov 		break;
5923b2744e6SAlexei Starovoitov 	}
5933b2744e6SAlexei Starovoitov 	/*
5943b2744e6SAlexei Starovoitov 	 * If insn->off == 0 we can save one extra byte, but
5953b2744e6SAlexei Starovoitov 	 * special case of x86 R13 which always needs an offset
5963b2744e6SAlexei Starovoitov 	 * is not worth the hassle
5973b2744e6SAlexei Starovoitov 	 */
5983b2744e6SAlexei Starovoitov 	if (is_imm8(off))
5993b2744e6SAlexei Starovoitov 		EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
6003b2744e6SAlexei Starovoitov 	else
6013b2744e6SAlexei Starovoitov 		EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
6023b2744e6SAlexei Starovoitov 	*pprog = prog;
6033b2744e6SAlexei Starovoitov }
6043b2744e6SAlexei Starovoitov 
6053b2744e6SAlexei Starovoitov /* STX: *(u8*)(dst_reg + off) = src_reg */
6063b2744e6SAlexei Starovoitov static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
6073b2744e6SAlexei Starovoitov {
6083b2744e6SAlexei Starovoitov 	u8 *prog = *pprog;
6093b2744e6SAlexei Starovoitov 	int cnt = 0;
6103b2744e6SAlexei Starovoitov 
6113b2744e6SAlexei Starovoitov 	switch (size) {
6123b2744e6SAlexei Starovoitov 	case BPF_B:
6133b2744e6SAlexei Starovoitov 		/* Emit 'mov byte ptr [rax + off], al' */
614aee194b1SLuke Nelson 		if (is_ereg(dst_reg) || is_ereg_8l(src_reg))
615aee194b1SLuke Nelson 			/* Add extra byte for eregs or SIL,DIL,BPL in src_reg */
6163b2744e6SAlexei Starovoitov 			EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
6173b2744e6SAlexei Starovoitov 		else
6183b2744e6SAlexei Starovoitov 			EMIT1(0x88);
6193b2744e6SAlexei Starovoitov 		break;
6203b2744e6SAlexei Starovoitov 	case BPF_H:
6213b2744e6SAlexei Starovoitov 		if (is_ereg(dst_reg) || is_ereg(src_reg))
6223b2744e6SAlexei Starovoitov 			EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
6233b2744e6SAlexei Starovoitov 		else
6243b2744e6SAlexei Starovoitov 			EMIT2(0x66, 0x89);
6253b2744e6SAlexei Starovoitov 		break;
6263b2744e6SAlexei Starovoitov 	case BPF_W:
6273b2744e6SAlexei Starovoitov 		if (is_ereg(dst_reg) || is_ereg(src_reg))
6283b2744e6SAlexei Starovoitov 			EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
6293b2744e6SAlexei Starovoitov 		else
6303b2744e6SAlexei Starovoitov 			EMIT1(0x89);
6313b2744e6SAlexei Starovoitov 		break;
6323b2744e6SAlexei Starovoitov 	case BPF_DW:
6333b2744e6SAlexei Starovoitov 		EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
6343b2744e6SAlexei Starovoitov 		break;
6353b2744e6SAlexei Starovoitov 	}
6363b2744e6SAlexei Starovoitov 	if (is_imm8(off))
6373b2744e6SAlexei Starovoitov 		EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
6383b2744e6SAlexei Starovoitov 	else
6393b2744e6SAlexei Starovoitov 		EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
6403b2744e6SAlexei Starovoitov 	*pprog = prog;
6413b2744e6SAlexei Starovoitov }
6423b2744e6SAlexei Starovoitov 
6433dec541bSAlexei Starovoitov static bool ex_handler_bpf(const struct exception_table_entry *x,
6443dec541bSAlexei Starovoitov 			   struct pt_regs *regs, int trapnr,
6453dec541bSAlexei Starovoitov 			   unsigned long error_code, unsigned long fault_addr)
6463dec541bSAlexei Starovoitov {
6473dec541bSAlexei Starovoitov 	u32 reg = x->fixup >> 8;
6483dec541bSAlexei Starovoitov 
6493dec541bSAlexei Starovoitov 	/* jump over faulting load and clear dest register */
6503dec541bSAlexei Starovoitov 	*(unsigned long *)((void *)regs + reg) = 0;
6513dec541bSAlexei Starovoitov 	regs->ip += x->fixup & 0xff;
6523dec541bSAlexei Starovoitov 	return true;
6533dec541bSAlexei Starovoitov }
6543dec541bSAlexei Starovoitov 
655b52f00e6SAlexei Starovoitov static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
656b52f00e6SAlexei Starovoitov 		  int oldproglen, struct jit_context *ctx)
657b52f00e6SAlexei Starovoitov {
658b52f00e6SAlexei Starovoitov 	struct bpf_insn *insn = bpf_prog->insnsi;
659b52f00e6SAlexei Starovoitov 	int insn_cnt = bpf_prog->len;
660b52f00e6SAlexei Starovoitov 	bool seen_exit = false;
661b52f00e6SAlexei Starovoitov 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
6623dec541bSAlexei Starovoitov 	int i, cnt = 0, excnt = 0;
663b52f00e6SAlexei Starovoitov 	int proglen = 0;
664b52f00e6SAlexei Starovoitov 	u8 *prog = temp;
665b52f00e6SAlexei Starovoitov 
66608691752SDaniel Borkmann 	emit_prologue(&prog, bpf_prog->aux->stack_depth,
66708691752SDaniel Borkmann 		      bpf_prog_was_classic(bpf_prog));
6687c2e988fSAlexei Starovoitov 	addrs[0] = prog - temp;
669b52f00e6SAlexei Starovoitov 
6707c2e988fSAlexei Starovoitov 	for (i = 1; i <= insn_cnt; i++, insn++) {
671e430f34eSAlexei Starovoitov 		const s32 imm32 = insn->imm;
672e430f34eSAlexei Starovoitov 		u32 dst_reg = insn->dst_reg;
673e430f34eSAlexei Starovoitov 		u32 src_reg = insn->src_reg;
6746fe8b9c1SDaniel Borkmann 		u8 b2 = 0, b3 = 0;
67562258278SAlexei Starovoitov 		s64 jmp_offset;
67662258278SAlexei Starovoitov 		u8 jmp_cond;
67762258278SAlexei Starovoitov 		int ilen;
67862258278SAlexei Starovoitov 		u8 *func;
67962258278SAlexei Starovoitov 
68062258278SAlexei Starovoitov 		switch (insn->code) {
68162258278SAlexei Starovoitov 			/* ALU */
68262258278SAlexei Starovoitov 		case BPF_ALU | BPF_ADD | BPF_X:
68362258278SAlexei Starovoitov 		case BPF_ALU | BPF_SUB | BPF_X:
68462258278SAlexei Starovoitov 		case BPF_ALU | BPF_AND | BPF_X:
68562258278SAlexei Starovoitov 		case BPF_ALU | BPF_OR | BPF_X:
68662258278SAlexei Starovoitov 		case BPF_ALU | BPF_XOR | BPF_X:
68762258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_ADD | BPF_X:
68862258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_SUB | BPF_X:
68962258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_AND | BPF_X:
69062258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_OR | BPF_X:
69162258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_XOR | BPF_X:
69262258278SAlexei Starovoitov 			switch (BPF_OP(insn->code)) {
69362258278SAlexei Starovoitov 			case BPF_ADD: b2 = 0x01; break;
69462258278SAlexei Starovoitov 			case BPF_SUB: b2 = 0x29; break;
69562258278SAlexei Starovoitov 			case BPF_AND: b2 = 0x21; break;
69662258278SAlexei Starovoitov 			case BPF_OR: b2 = 0x09; break;
69762258278SAlexei Starovoitov 			case BPF_XOR: b2 = 0x31; break;
69862258278SAlexei Starovoitov 			}
69962258278SAlexei Starovoitov 			if (BPF_CLASS(insn->code) == BPF_ALU64)
700e430f34eSAlexei Starovoitov 				EMIT1(add_2mod(0x48, dst_reg, src_reg));
701e430f34eSAlexei Starovoitov 			else if (is_ereg(dst_reg) || is_ereg(src_reg))
702e430f34eSAlexei Starovoitov 				EMIT1(add_2mod(0x40, dst_reg, src_reg));
703e430f34eSAlexei Starovoitov 			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
7040a14842fSEric Dumazet 			break;
70562258278SAlexei Starovoitov 
70662258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_MOV | BPF_X:
70762258278SAlexei Starovoitov 		case BPF_ALU | BPF_MOV | BPF_X:
7084c38e2f3SDaniel Borkmann 			emit_mov_reg(&prog,
7094c38e2f3SDaniel Borkmann 				     BPF_CLASS(insn->code) == BPF_ALU64,
7104c38e2f3SDaniel Borkmann 				     dst_reg, src_reg);
71162258278SAlexei Starovoitov 			break;
71262258278SAlexei Starovoitov 
713e430f34eSAlexei Starovoitov 			/* neg dst */
71462258278SAlexei Starovoitov 		case BPF_ALU | BPF_NEG:
71562258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_NEG:
71662258278SAlexei Starovoitov 			if (BPF_CLASS(insn->code) == BPF_ALU64)
717e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x48, dst_reg));
718e430f34eSAlexei Starovoitov 			else if (is_ereg(dst_reg))
719e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x40, dst_reg));
720e430f34eSAlexei Starovoitov 			EMIT2(0xF7, add_1reg(0xD8, dst_reg));
72162258278SAlexei Starovoitov 			break;
72262258278SAlexei Starovoitov 
72362258278SAlexei Starovoitov 		case BPF_ALU | BPF_ADD | BPF_K:
72462258278SAlexei Starovoitov 		case BPF_ALU | BPF_SUB | BPF_K:
72562258278SAlexei Starovoitov 		case BPF_ALU | BPF_AND | BPF_K:
72662258278SAlexei Starovoitov 		case BPF_ALU | BPF_OR | BPF_K:
72762258278SAlexei Starovoitov 		case BPF_ALU | BPF_XOR | BPF_K:
72862258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_ADD | BPF_K:
72962258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_SUB | BPF_K:
73062258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_AND | BPF_K:
73162258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_OR | BPF_K:
73262258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_XOR | BPF_K:
73362258278SAlexei Starovoitov 			if (BPF_CLASS(insn->code) == BPF_ALU64)
734e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x48, dst_reg));
735e430f34eSAlexei Starovoitov 			else if (is_ereg(dst_reg))
736e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x40, dst_reg));
73762258278SAlexei Starovoitov 
738a2c7a983SIngo Molnar 			/*
739a2c7a983SIngo Molnar 			 * b3 holds 'normal' opcode, b2 short form only valid
740de0a444dSDaniel Borkmann 			 * in case dst is eax/rax.
741de0a444dSDaniel Borkmann 			 */
74262258278SAlexei Starovoitov 			switch (BPF_OP(insn->code)) {
743de0a444dSDaniel Borkmann 			case BPF_ADD:
744de0a444dSDaniel Borkmann 				b3 = 0xC0;
745de0a444dSDaniel Borkmann 				b2 = 0x05;
746de0a444dSDaniel Borkmann 				break;
747de0a444dSDaniel Borkmann 			case BPF_SUB:
748de0a444dSDaniel Borkmann 				b3 = 0xE8;
749de0a444dSDaniel Borkmann 				b2 = 0x2D;
750de0a444dSDaniel Borkmann 				break;
751de0a444dSDaniel Borkmann 			case BPF_AND:
752de0a444dSDaniel Borkmann 				b3 = 0xE0;
753de0a444dSDaniel Borkmann 				b2 = 0x25;
754de0a444dSDaniel Borkmann 				break;
755de0a444dSDaniel Borkmann 			case BPF_OR:
756de0a444dSDaniel Borkmann 				b3 = 0xC8;
757de0a444dSDaniel Borkmann 				b2 = 0x0D;
758de0a444dSDaniel Borkmann 				break;
759de0a444dSDaniel Borkmann 			case BPF_XOR:
760de0a444dSDaniel Borkmann 				b3 = 0xF0;
761de0a444dSDaniel Borkmann 				b2 = 0x35;
762de0a444dSDaniel Borkmann 				break;
76362258278SAlexei Starovoitov 			}
76462258278SAlexei Starovoitov 
765e430f34eSAlexei Starovoitov 			if (is_imm8(imm32))
766e430f34eSAlexei Starovoitov 				EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
767de0a444dSDaniel Borkmann 			else if (is_axreg(dst_reg))
768de0a444dSDaniel Borkmann 				EMIT1_off32(b2, imm32);
76962258278SAlexei Starovoitov 			else
770e430f34eSAlexei Starovoitov 				EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
77162258278SAlexei Starovoitov 			break;
77262258278SAlexei Starovoitov 
77362258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_MOV | BPF_K:
77462258278SAlexei Starovoitov 		case BPF_ALU | BPF_MOV | BPF_K:
7756fe8b9c1SDaniel Borkmann 			emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
7766fe8b9c1SDaniel Borkmann 				       dst_reg, imm32);
77762258278SAlexei Starovoitov 			break;
77862258278SAlexei Starovoitov 
77902ab695bSAlexei Starovoitov 		case BPF_LD | BPF_IMM | BPF_DW:
7806fe8b9c1SDaniel Borkmann 			emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
78102ab695bSAlexei Starovoitov 			insn++;
78202ab695bSAlexei Starovoitov 			i++;
78302ab695bSAlexei Starovoitov 			break;
78402ab695bSAlexei Starovoitov 
785e430f34eSAlexei Starovoitov 			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
78662258278SAlexei Starovoitov 		case BPF_ALU | BPF_MOD | BPF_X:
78762258278SAlexei Starovoitov 		case BPF_ALU | BPF_DIV | BPF_X:
78862258278SAlexei Starovoitov 		case BPF_ALU | BPF_MOD | BPF_K:
78962258278SAlexei Starovoitov 		case BPF_ALU | BPF_DIV | BPF_K:
79062258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_MOD | BPF_X:
79162258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_DIV | BPF_X:
79262258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_MOD | BPF_K:
79362258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_DIV | BPF_K:
79462258278SAlexei Starovoitov 			EMIT1(0x50); /* push rax */
79562258278SAlexei Starovoitov 			EMIT1(0x52); /* push rdx */
79662258278SAlexei Starovoitov 
79762258278SAlexei Starovoitov 			if (BPF_SRC(insn->code) == BPF_X)
798e430f34eSAlexei Starovoitov 				/* mov r11, src_reg */
799e430f34eSAlexei Starovoitov 				EMIT_mov(AUX_REG, src_reg);
80062258278SAlexei Starovoitov 			else
801e430f34eSAlexei Starovoitov 				/* mov r11, imm32 */
802e430f34eSAlexei Starovoitov 				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
80362258278SAlexei Starovoitov 
804e430f34eSAlexei Starovoitov 			/* mov rax, dst_reg */
805e430f34eSAlexei Starovoitov 			EMIT_mov(BPF_REG_0, dst_reg);
80662258278SAlexei Starovoitov 
807a2c7a983SIngo Molnar 			/*
808a2c7a983SIngo Molnar 			 * xor edx, edx
80962258278SAlexei Starovoitov 			 * equivalent to 'xor rdx, rdx', but one byte less
81062258278SAlexei Starovoitov 			 */
81162258278SAlexei Starovoitov 			EMIT2(0x31, 0xd2);
81262258278SAlexei Starovoitov 
81362258278SAlexei Starovoitov 			if (BPF_CLASS(insn->code) == BPF_ALU64)
81462258278SAlexei Starovoitov 				/* div r11 */
81562258278SAlexei Starovoitov 				EMIT3(0x49, 0xF7, 0xF3);
81662258278SAlexei Starovoitov 			else
81762258278SAlexei Starovoitov 				/* div r11d */
81862258278SAlexei Starovoitov 				EMIT3(0x41, 0xF7, 0xF3);
81962258278SAlexei Starovoitov 
82062258278SAlexei Starovoitov 			if (BPF_OP(insn->code) == BPF_MOD)
82162258278SAlexei Starovoitov 				/* mov r11, rdx */
82262258278SAlexei Starovoitov 				EMIT3(0x49, 0x89, 0xD3);
82362258278SAlexei Starovoitov 			else
82462258278SAlexei Starovoitov 				/* mov r11, rax */
82562258278SAlexei Starovoitov 				EMIT3(0x49, 0x89, 0xC3);
82662258278SAlexei Starovoitov 
82762258278SAlexei Starovoitov 			EMIT1(0x5A); /* pop rdx */
82862258278SAlexei Starovoitov 			EMIT1(0x58); /* pop rax */
82962258278SAlexei Starovoitov 
830e430f34eSAlexei Starovoitov 			/* mov dst_reg, r11 */
831e430f34eSAlexei Starovoitov 			EMIT_mov(dst_reg, AUX_REG);
83262258278SAlexei Starovoitov 			break;
83362258278SAlexei Starovoitov 
83462258278SAlexei Starovoitov 		case BPF_ALU | BPF_MUL | BPF_K:
83562258278SAlexei Starovoitov 		case BPF_ALU | BPF_MUL | BPF_X:
83662258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_MUL | BPF_K:
83762258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_MUL | BPF_X:
8384c38e2f3SDaniel Borkmann 		{
8394c38e2f3SDaniel Borkmann 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
8404c38e2f3SDaniel Borkmann 
841d806a0cfSDaniel Borkmann 			if (dst_reg != BPF_REG_0)
84262258278SAlexei Starovoitov 				EMIT1(0x50); /* push rax */
843d806a0cfSDaniel Borkmann 			if (dst_reg != BPF_REG_3)
84462258278SAlexei Starovoitov 				EMIT1(0x52); /* push rdx */
84562258278SAlexei Starovoitov 
846e430f34eSAlexei Starovoitov 			/* mov r11, dst_reg */
847e430f34eSAlexei Starovoitov 			EMIT_mov(AUX_REG, dst_reg);
84862258278SAlexei Starovoitov 
84962258278SAlexei Starovoitov 			if (BPF_SRC(insn->code) == BPF_X)
8504c38e2f3SDaniel Borkmann 				emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
85162258278SAlexei Starovoitov 			else
8524c38e2f3SDaniel Borkmann 				emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
85362258278SAlexei Starovoitov 
8544c38e2f3SDaniel Borkmann 			if (is64)
85562258278SAlexei Starovoitov 				EMIT1(add_1mod(0x48, AUX_REG));
85662258278SAlexei Starovoitov 			else if (is_ereg(AUX_REG))
85762258278SAlexei Starovoitov 				EMIT1(add_1mod(0x40, AUX_REG));
85862258278SAlexei Starovoitov 			/* mul(q) r11 */
85962258278SAlexei Starovoitov 			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
86062258278SAlexei Starovoitov 
861d806a0cfSDaniel Borkmann 			if (dst_reg != BPF_REG_3)
86262258278SAlexei Starovoitov 				EMIT1(0x5A); /* pop rdx */
863d806a0cfSDaniel Borkmann 			if (dst_reg != BPF_REG_0) {
864d806a0cfSDaniel Borkmann 				/* mov dst_reg, rax */
865d806a0cfSDaniel Borkmann 				EMIT_mov(dst_reg, BPF_REG_0);
86662258278SAlexei Starovoitov 				EMIT1(0x58); /* pop rax */
867d806a0cfSDaniel Borkmann 			}
86862258278SAlexei Starovoitov 			break;
8694c38e2f3SDaniel Borkmann 		}
870a2c7a983SIngo Molnar 			/* Shifts */
87162258278SAlexei Starovoitov 		case BPF_ALU | BPF_LSH | BPF_K:
87262258278SAlexei Starovoitov 		case BPF_ALU | BPF_RSH | BPF_K:
87362258278SAlexei Starovoitov 		case BPF_ALU | BPF_ARSH | BPF_K:
87462258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_LSH | BPF_K:
87562258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_RSH | BPF_K:
87662258278SAlexei Starovoitov 		case BPF_ALU64 | BPF_ARSH | BPF_K:
87762258278SAlexei Starovoitov 			if (BPF_CLASS(insn->code) == BPF_ALU64)
878e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x48, dst_reg));
879e430f34eSAlexei Starovoitov 			else if (is_ereg(dst_reg))
880e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x40, dst_reg));
88162258278SAlexei Starovoitov 
88262258278SAlexei Starovoitov 			switch (BPF_OP(insn->code)) {
88362258278SAlexei Starovoitov 			case BPF_LSH: b3 = 0xE0; break;
88462258278SAlexei Starovoitov 			case BPF_RSH: b3 = 0xE8; break;
88562258278SAlexei Starovoitov 			case BPF_ARSH: b3 = 0xF8; break;
88662258278SAlexei Starovoitov 			}
88788e69a1fSDaniel Borkmann 
88888e69a1fSDaniel Borkmann 			if (imm32 == 1)
88988e69a1fSDaniel Borkmann 				EMIT2(0xD1, add_1reg(b3, dst_reg));
89088e69a1fSDaniel Borkmann 			else
891e430f34eSAlexei Starovoitov 				EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
89262258278SAlexei Starovoitov 			break;
89362258278SAlexei Starovoitov 
89472b603eeSAlexei Starovoitov 		case BPF_ALU | BPF_LSH | BPF_X:
89572b603eeSAlexei Starovoitov 		case BPF_ALU | BPF_RSH | BPF_X:
89672b603eeSAlexei Starovoitov 		case BPF_ALU | BPF_ARSH | BPF_X:
89772b603eeSAlexei Starovoitov 		case BPF_ALU64 | BPF_LSH | BPF_X:
89872b603eeSAlexei Starovoitov 		case BPF_ALU64 | BPF_RSH | BPF_X:
89972b603eeSAlexei Starovoitov 		case BPF_ALU64 | BPF_ARSH | BPF_X:
90072b603eeSAlexei Starovoitov 
901a2c7a983SIngo Molnar 			/* Check for bad case when dst_reg == rcx */
90272b603eeSAlexei Starovoitov 			if (dst_reg == BPF_REG_4) {
90372b603eeSAlexei Starovoitov 				/* mov r11, dst_reg */
90472b603eeSAlexei Starovoitov 				EMIT_mov(AUX_REG, dst_reg);
90572b603eeSAlexei Starovoitov 				dst_reg = AUX_REG;
90672b603eeSAlexei Starovoitov 			}
90772b603eeSAlexei Starovoitov 
90872b603eeSAlexei Starovoitov 			if (src_reg != BPF_REG_4) { /* common case */
90972b603eeSAlexei Starovoitov 				EMIT1(0x51); /* push rcx */
91072b603eeSAlexei Starovoitov 
91172b603eeSAlexei Starovoitov 				/* mov rcx, src_reg */
91272b603eeSAlexei Starovoitov 				EMIT_mov(BPF_REG_4, src_reg);
91372b603eeSAlexei Starovoitov 			}
91472b603eeSAlexei Starovoitov 
91572b603eeSAlexei Starovoitov 			/* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
91672b603eeSAlexei Starovoitov 			if (BPF_CLASS(insn->code) == BPF_ALU64)
91772b603eeSAlexei Starovoitov 				EMIT1(add_1mod(0x48, dst_reg));
91872b603eeSAlexei Starovoitov 			else if (is_ereg(dst_reg))
91972b603eeSAlexei Starovoitov 				EMIT1(add_1mod(0x40, dst_reg));
92072b603eeSAlexei Starovoitov 
92172b603eeSAlexei Starovoitov 			switch (BPF_OP(insn->code)) {
92272b603eeSAlexei Starovoitov 			case BPF_LSH: b3 = 0xE0; break;
92372b603eeSAlexei Starovoitov 			case BPF_RSH: b3 = 0xE8; break;
92472b603eeSAlexei Starovoitov 			case BPF_ARSH: b3 = 0xF8; break;
92572b603eeSAlexei Starovoitov 			}
92672b603eeSAlexei Starovoitov 			EMIT2(0xD3, add_1reg(b3, dst_reg));
92772b603eeSAlexei Starovoitov 
92872b603eeSAlexei Starovoitov 			if (src_reg != BPF_REG_4)
92972b603eeSAlexei Starovoitov 				EMIT1(0x59); /* pop rcx */
93072b603eeSAlexei Starovoitov 
93172b603eeSAlexei Starovoitov 			if (insn->dst_reg == BPF_REG_4)
93272b603eeSAlexei Starovoitov 				/* mov dst_reg, r11 */
93372b603eeSAlexei Starovoitov 				EMIT_mov(insn->dst_reg, AUX_REG);
93472b603eeSAlexei Starovoitov 			break;
93572b603eeSAlexei Starovoitov 
93662258278SAlexei Starovoitov 		case BPF_ALU | BPF_END | BPF_FROM_BE:
937e430f34eSAlexei Starovoitov 			switch (imm32) {
93862258278SAlexei Starovoitov 			case 16:
939a2c7a983SIngo Molnar 				/* Emit 'ror %ax, 8' to swap lower 2 bytes */
94062258278SAlexei Starovoitov 				EMIT1(0x66);
941e430f34eSAlexei Starovoitov 				if (is_ereg(dst_reg))
94262258278SAlexei Starovoitov 					EMIT1(0x41);
943e430f34eSAlexei Starovoitov 				EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
944343f845bSAlexei Starovoitov 
945a2c7a983SIngo Molnar 				/* Emit 'movzwl eax, ax' */
946343f845bSAlexei Starovoitov 				if (is_ereg(dst_reg))
947343f845bSAlexei Starovoitov 					EMIT3(0x45, 0x0F, 0xB7);
948343f845bSAlexei Starovoitov 				else
949343f845bSAlexei Starovoitov 					EMIT2(0x0F, 0xB7);
950343f845bSAlexei Starovoitov 				EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
95162258278SAlexei Starovoitov 				break;
95262258278SAlexei Starovoitov 			case 32:
953a2c7a983SIngo Molnar 				/* Emit 'bswap eax' to swap lower 4 bytes */
954e430f34eSAlexei Starovoitov 				if (is_ereg(dst_reg))
95562258278SAlexei Starovoitov 					EMIT2(0x41, 0x0F);
95662258278SAlexei Starovoitov 				else
95762258278SAlexei Starovoitov 					EMIT1(0x0F);
958e430f34eSAlexei Starovoitov 				EMIT1(add_1reg(0xC8, dst_reg));
95962258278SAlexei Starovoitov 				break;
96062258278SAlexei Starovoitov 			case 64:
961a2c7a983SIngo Molnar 				/* Emit 'bswap rax' to swap 8 bytes */
962e430f34eSAlexei Starovoitov 				EMIT3(add_1mod(0x48, dst_reg), 0x0F,
963e430f34eSAlexei Starovoitov 				      add_1reg(0xC8, dst_reg));
96462258278SAlexei Starovoitov 				break;
96562258278SAlexei Starovoitov 			}
96662258278SAlexei Starovoitov 			break;
96762258278SAlexei Starovoitov 
96862258278SAlexei Starovoitov 		case BPF_ALU | BPF_END | BPF_FROM_LE:
969343f845bSAlexei Starovoitov 			switch (imm32) {
970343f845bSAlexei Starovoitov 			case 16:
971a2c7a983SIngo Molnar 				/*
972a2c7a983SIngo Molnar 				 * Emit 'movzwl eax, ax' to zero extend 16-bit
973343f845bSAlexei Starovoitov 				 * into 64 bit
974343f845bSAlexei Starovoitov 				 */
975343f845bSAlexei Starovoitov 				if (is_ereg(dst_reg))
976343f845bSAlexei Starovoitov 					EMIT3(0x45, 0x0F, 0xB7);
977343f845bSAlexei Starovoitov 				else
978343f845bSAlexei Starovoitov 					EMIT2(0x0F, 0xB7);
979343f845bSAlexei Starovoitov 				EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
980343f845bSAlexei Starovoitov 				break;
981343f845bSAlexei Starovoitov 			case 32:
982a2c7a983SIngo Molnar 				/* Emit 'mov eax, eax' to clear upper 32-bits */
983343f845bSAlexei Starovoitov 				if (is_ereg(dst_reg))
984343f845bSAlexei Starovoitov 					EMIT1(0x45);
985343f845bSAlexei Starovoitov 				EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
986343f845bSAlexei Starovoitov 				break;
987343f845bSAlexei Starovoitov 			case 64:
988343f845bSAlexei Starovoitov 				/* nop */
989343f845bSAlexei Starovoitov 				break;
990343f845bSAlexei Starovoitov 			}
99162258278SAlexei Starovoitov 			break;
99262258278SAlexei Starovoitov 
993e430f34eSAlexei Starovoitov 			/* ST: *(u8*)(dst_reg + off) = imm */
99462258278SAlexei Starovoitov 		case BPF_ST | BPF_MEM | BPF_B:
995e430f34eSAlexei Starovoitov 			if (is_ereg(dst_reg))
99662258278SAlexei Starovoitov 				EMIT2(0x41, 0xC6);
99762258278SAlexei Starovoitov 			else
99862258278SAlexei Starovoitov 				EMIT1(0xC6);
99962258278SAlexei Starovoitov 			goto st;
100062258278SAlexei Starovoitov 		case BPF_ST | BPF_MEM | BPF_H:
1001e430f34eSAlexei Starovoitov 			if (is_ereg(dst_reg))
100262258278SAlexei Starovoitov 				EMIT3(0x66, 0x41, 0xC7);
100362258278SAlexei Starovoitov 			else
100462258278SAlexei Starovoitov 				EMIT2(0x66, 0xC7);
100562258278SAlexei Starovoitov 			goto st;
100662258278SAlexei Starovoitov 		case BPF_ST | BPF_MEM | BPF_W:
1007e430f34eSAlexei Starovoitov 			if (is_ereg(dst_reg))
100862258278SAlexei Starovoitov 				EMIT2(0x41, 0xC7);
100962258278SAlexei Starovoitov 			else
101062258278SAlexei Starovoitov 				EMIT1(0xC7);
101162258278SAlexei Starovoitov 			goto st;
101262258278SAlexei Starovoitov 		case BPF_ST | BPF_MEM | BPF_DW:
1013e430f34eSAlexei Starovoitov 			EMIT2(add_1mod(0x48, dst_reg), 0xC7);
101462258278SAlexei Starovoitov 
101562258278SAlexei Starovoitov st:			if (is_imm8(insn->off))
1016e430f34eSAlexei Starovoitov 				EMIT2(add_1reg(0x40, dst_reg), insn->off);
101762258278SAlexei Starovoitov 			else
1018e430f34eSAlexei Starovoitov 				EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
101962258278SAlexei Starovoitov 
1020e430f34eSAlexei Starovoitov 			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
102162258278SAlexei Starovoitov 			break;
102262258278SAlexei Starovoitov 
1023e430f34eSAlexei Starovoitov 			/* STX: *(u8*)(dst_reg + off) = src_reg */
102462258278SAlexei Starovoitov 		case BPF_STX | BPF_MEM | BPF_B:
102562258278SAlexei Starovoitov 		case BPF_STX | BPF_MEM | BPF_H:
102662258278SAlexei Starovoitov 		case BPF_STX | BPF_MEM | BPF_W:
102762258278SAlexei Starovoitov 		case BPF_STX | BPF_MEM | BPF_DW:
10283b2744e6SAlexei Starovoitov 			emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
102962258278SAlexei Starovoitov 			break;
103062258278SAlexei Starovoitov 
1031e430f34eSAlexei Starovoitov 			/* LDX: dst_reg = *(u8*)(src_reg + off) */
103262258278SAlexei Starovoitov 		case BPF_LDX | BPF_MEM | BPF_B:
10333dec541bSAlexei Starovoitov 		case BPF_LDX | BPF_PROBE_MEM | BPF_B:
103462258278SAlexei Starovoitov 		case BPF_LDX | BPF_MEM | BPF_H:
10353dec541bSAlexei Starovoitov 		case BPF_LDX | BPF_PROBE_MEM | BPF_H:
103662258278SAlexei Starovoitov 		case BPF_LDX | BPF_MEM | BPF_W:
10373dec541bSAlexei Starovoitov 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
103862258278SAlexei Starovoitov 		case BPF_LDX | BPF_MEM | BPF_DW:
10393dec541bSAlexei Starovoitov 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
10403b2744e6SAlexei Starovoitov 			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
10413dec541bSAlexei Starovoitov 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
10423dec541bSAlexei Starovoitov 				struct exception_table_entry *ex;
10433dec541bSAlexei Starovoitov 				u8 *_insn = image + proglen;
10443dec541bSAlexei Starovoitov 				s64 delta;
10453dec541bSAlexei Starovoitov 
10463dec541bSAlexei Starovoitov 				if (!bpf_prog->aux->extable)
10473dec541bSAlexei Starovoitov 					break;
10483dec541bSAlexei Starovoitov 
10493dec541bSAlexei Starovoitov 				if (excnt >= bpf_prog->aux->num_exentries) {
10503dec541bSAlexei Starovoitov 					pr_err("ex gen bug\n");
10513dec541bSAlexei Starovoitov 					return -EFAULT;
10523dec541bSAlexei Starovoitov 				}
10533dec541bSAlexei Starovoitov 				ex = &bpf_prog->aux->extable[excnt++];
10543dec541bSAlexei Starovoitov 
10553dec541bSAlexei Starovoitov 				delta = _insn - (u8 *)&ex->insn;
10563dec541bSAlexei Starovoitov 				if (!is_simm32(delta)) {
10573dec541bSAlexei Starovoitov 					pr_err("extable->insn doesn't fit into 32-bit\n");
10583dec541bSAlexei Starovoitov 					return -EFAULT;
10593dec541bSAlexei Starovoitov 				}
10603dec541bSAlexei Starovoitov 				ex->insn = delta;
10613dec541bSAlexei Starovoitov 
10623dec541bSAlexei Starovoitov 				delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
10633dec541bSAlexei Starovoitov 				if (!is_simm32(delta)) {
10643dec541bSAlexei Starovoitov 					pr_err("extable->handler doesn't fit into 32-bit\n");
10653dec541bSAlexei Starovoitov 					return -EFAULT;
10663dec541bSAlexei Starovoitov 				}
10673dec541bSAlexei Starovoitov 				ex->handler = delta;
10683dec541bSAlexei Starovoitov 
10693dec541bSAlexei Starovoitov 				if (dst_reg > BPF_REG_9) {
10703dec541bSAlexei Starovoitov 					pr_err("verifier error\n");
10713dec541bSAlexei Starovoitov 					return -EFAULT;
10723dec541bSAlexei Starovoitov 				}
10733dec541bSAlexei Starovoitov 				/*
10743dec541bSAlexei Starovoitov 				 * Compute size of x86 insn and its target dest x86 register.
10753dec541bSAlexei Starovoitov 				 * ex_handler_bpf() will use lower 8 bits to adjust
10763dec541bSAlexei Starovoitov 				 * pt_regs->ip to jump over this x86 instruction
10773dec541bSAlexei Starovoitov 				 * and upper bits to figure out which pt_regs to zero out.
10783dec541bSAlexei Starovoitov 				 * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
10793dec541bSAlexei Starovoitov 				 * of 4 bytes will be ignored and rbx will be zero inited.
10803dec541bSAlexei Starovoitov 				 */
10813dec541bSAlexei Starovoitov 				ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
10823dec541bSAlexei Starovoitov 			}
108362258278SAlexei Starovoitov 			break;
108462258278SAlexei Starovoitov 
1085e430f34eSAlexei Starovoitov 			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
108662258278SAlexei Starovoitov 		case BPF_STX | BPF_XADD | BPF_W:
1087a2c7a983SIngo Molnar 			/* Emit 'lock add dword ptr [rax + off], eax' */
1088e430f34eSAlexei Starovoitov 			if (is_ereg(dst_reg) || is_ereg(src_reg))
1089e430f34eSAlexei Starovoitov 				EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
109062258278SAlexei Starovoitov 			else
109162258278SAlexei Starovoitov 				EMIT2(0xF0, 0x01);
109262258278SAlexei Starovoitov 			goto xadd;
109362258278SAlexei Starovoitov 		case BPF_STX | BPF_XADD | BPF_DW:
1094e430f34eSAlexei Starovoitov 			EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
109562258278SAlexei Starovoitov xadd:			if (is_imm8(insn->off))
1096e430f34eSAlexei Starovoitov 				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
109762258278SAlexei Starovoitov 			else
1098e430f34eSAlexei Starovoitov 				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
109962258278SAlexei Starovoitov 					    insn->off);
110062258278SAlexei Starovoitov 			break;
110162258278SAlexei Starovoitov 
110262258278SAlexei Starovoitov 			/* call */
110362258278SAlexei Starovoitov 		case BPF_JMP | BPF_CALL:
1104e430f34eSAlexei Starovoitov 			func = (u8 *) __bpf_call_base + imm32;
11053b2744e6SAlexei Starovoitov 			if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
1106f3c2af7bSAlexei Starovoitov 				return -EINVAL;
110762258278SAlexei Starovoitov 			break;
110862258278SAlexei Starovoitov 
110971189fa9SAlexei Starovoitov 		case BPF_JMP | BPF_TAIL_CALL:
1110428d5df1SDaniel Borkmann 			if (imm32)
1111428d5df1SDaniel Borkmann 				emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
1112428d5df1SDaniel Borkmann 							  &prog, addrs[i], image);
1113428d5df1SDaniel Borkmann 			else
1114428d5df1SDaniel Borkmann 				emit_bpf_tail_call_indirect(&prog);
1115b52f00e6SAlexei Starovoitov 			break;
1116b52f00e6SAlexei Starovoitov 
111762258278SAlexei Starovoitov 			/* cond jump */
111862258278SAlexei Starovoitov 		case BPF_JMP | BPF_JEQ | BPF_X:
111962258278SAlexei Starovoitov 		case BPF_JMP | BPF_JNE | BPF_X:
112062258278SAlexei Starovoitov 		case BPF_JMP | BPF_JGT | BPF_X:
112152afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JLT | BPF_X:
112262258278SAlexei Starovoitov 		case BPF_JMP | BPF_JGE | BPF_X:
112352afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JLE | BPF_X:
112462258278SAlexei Starovoitov 		case BPF_JMP | BPF_JSGT | BPF_X:
112552afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JSLT | BPF_X:
112662258278SAlexei Starovoitov 		case BPF_JMP | BPF_JSGE | BPF_X:
112752afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JSLE | BPF_X:
11283f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JEQ | BPF_X:
11293f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JNE | BPF_X:
11303f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JGT | BPF_X:
11313f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JLT | BPF_X:
11323f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JGE | BPF_X:
11333f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JLE | BPF_X:
11343f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSGT | BPF_X:
11353f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSLT | BPF_X:
11363f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSGE | BPF_X:
11373f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSLE | BPF_X:
1138e430f34eSAlexei Starovoitov 			/* cmp dst_reg, src_reg */
11393f5d6525SJiong Wang 			if (BPF_CLASS(insn->code) == BPF_JMP)
11403f5d6525SJiong Wang 				EMIT1(add_2mod(0x48, dst_reg, src_reg));
11413f5d6525SJiong Wang 			else if (is_ereg(dst_reg) || is_ereg(src_reg))
11423f5d6525SJiong Wang 				EMIT1(add_2mod(0x40, dst_reg, src_reg));
11433f5d6525SJiong Wang 			EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
114462258278SAlexei Starovoitov 			goto emit_cond_jmp;
114562258278SAlexei Starovoitov 
114662258278SAlexei Starovoitov 		case BPF_JMP | BPF_JSET | BPF_X:
11473f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSET | BPF_X:
1148e430f34eSAlexei Starovoitov 			/* test dst_reg, src_reg */
11493f5d6525SJiong Wang 			if (BPF_CLASS(insn->code) == BPF_JMP)
11503f5d6525SJiong Wang 				EMIT1(add_2mod(0x48, dst_reg, src_reg));
11513f5d6525SJiong Wang 			else if (is_ereg(dst_reg) || is_ereg(src_reg))
11523f5d6525SJiong Wang 				EMIT1(add_2mod(0x40, dst_reg, src_reg));
11533f5d6525SJiong Wang 			EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
115462258278SAlexei Starovoitov 			goto emit_cond_jmp;
115562258278SAlexei Starovoitov 
115662258278SAlexei Starovoitov 		case BPF_JMP | BPF_JSET | BPF_K:
11573f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSET | BPF_K:
1158e430f34eSAlexei Starovoitov 			/* test dst_reg, imm32 */
11593f5d6525SJiong Wang 			if (BPF_CLASS(insn->code) == BPF_JMP)
1160e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x48, dst_reg));
11613f5d6525SJiong Wang 			else if (is_ereg(dst_reg))
11623f5d6525SJiong Wang 				EMIT1(add_1mod(0x40, dst_reg));
1163e430f34eSAlexei Starovoitov 			EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
116462258278SAlexei Starovoitov 			goto emit_cond_jmp;
116562258278SAlexei Starovoitov 
116662258278SAlexei Starovoitov 		case BPF_JMP | BPF_JEQ | BPF_K:
116762258278SAlexei Starovoitov 		case BPF_JMP | BPF_JNE | BPF_K:
116862258278SAlexei Starovoitov 		case BPF_JMP | BPF_JGT | BPF_K:
116952afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JLT | BPF_K:
117062258278SAlexei Starovoitov 		case BPF_JMP | BPF_JGE | BPF_K:
117152afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JLE | BPF_K:
117262258278SAlexei Starovoitov 		case BPF_JMP | BPF_JSGT | BPF_K:
117352afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JSLT | BPF_K:
117462258278SAlexei Starovoitov 		case BPF_JMP | BPF_JSGE | BPF_K:
117552afc51eSDaniel Borkmann 		case BPF_JMP | BPF_JSLE | BPF_K:
11763f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JEQ | BPF_K:
11773f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JNE | BPF_K:
11783f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JGT | BPF_K:
11793f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JLT | BPF_K:
11803f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JGE | BPF_K:
11813f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JLE | BPF_K:
11823f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSGT | BPF_K:
11833f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSLT | BPF_K:
11843f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSGE | BPF_K:
11853f5d6525SJiong Wang 		case BPF_JMP32 | BPF_JSLE | BPF_K:
118638f51c07SDaniel Borkmann 			/* test dst_reg, dst_reg to save one extra byte */
118738f51c07SDaniel Borkmann 			if (imm32 == 0) {
118838f51c07SDaniel Borkmann 				if (BPF_CLASS(insn->code) == BPF_JMP)
118938f51c07SDaniel Borkmann 					EMIT1(add_2mod(0x48, dst_reg, dst_reg));
119038f51c07SDaniel Borkmann 				else if (is_ereg(dst_reg))
119138f51c07SDaniel Borkmann 					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
119238f51c07SDaniel Borkmann 				EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
119338f51c07SDaniel Borkmann 				goto emit_cond_jmp;
119438f51c07SDaniel Borkmann 			}
119538f51c07SDaniel Borkmann 
1196e430f34eSAlexei Starovoitov 			/* cmp dst_reg, imm8/32 */
11973f5d6525SJiong Wang 			if (BPF_CLASS(insn->code) == BPF_JMP)
1198e430f34eSAlexei Starovoitov 				EMIT1(add_1mod(0x48, dst_reg));
11993f5d6525SJiong Wang 			else if (is_ereg(dst_reg))
12003f5d6525SJiong Wang 				EMIT1(add_1mod(0x40, dst_reg));
120162258278SAlexei Starovoitov 
1202e430f34eSAlexei Starovoitov 			if (is_imm8(imm32))
1203e430f34eSAlexei Starovoitov 				EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
120462258278SAlexei Starovoitov 			else
1205e430f34eSAlexei Starovoitov 				EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
120662258278SAlexei Starovoitov 
1207a2c7a983SIngo Molnar emit_cond_jmp:		/* Convert BPF opcode to x86 */
120862258278SAlexei Starovoitov 			switch (BPF_OP(insn->code)) {
120962258278SAlexei Starovoitov 			case BPF_JEQ:
121062258278SAlexei Starovoitov 				jmp_cond = X86_JE;
121162258278SAlexei Starovoitov 				break;
121262258278SAlexei Starovoitov 			case BPF_JSET:
121362258278SAlexei Starovoitov 			case BPF_JNE:
121462258278SAlexei Starovoitov 				jmp_cond = X86_JNE;
121562258278SAlexei Starovoitov 				break;
121662258278SAlexei Starovoitov 			case BPF_JGT:
121762258278SAlexei Starovoitov 				/* GT is unsigned '>', JA in x86 */
121862258278SAlexei Starovoitov 				jmp_cond = X86_JA;
121962258278SAlexei Starovoitov 				break;
122052afc51eSDaniel Borkmann 			case BPF_JLT:
122152afc51eSDaniel Borkmann 				/* LT is unsigned '<', JB in x86 */
122252afc51eSDaniel Borkmann 				jmp_cond = X86_JB;
122352afc51eSDaniel Borkmann 				break;
122462258278SAlexei Starovoitov 			case BPF_JGE:
122562258278SAlexei Starovoitov 				/* GE is unsigned '>=', JAE in x86 */
122662258278SAlexei Starovoitov 				jmp_cond = X86_JAE;
122762258278SAlexei Starovoitov 				break;
122852afc51eSDaniel Borkmann 			case BPF_JLE:
122952afc51eSDaniel Borkmann 				/* LE is unsigned '<=', JBE in x86 */
123052afc51eSDaniel Borkmann 				jmp_cond = X86_JBE;
123152afc51eSDaniel Borkmann 				break;
123262258278SAlexei Starovoitov 			case BPF_JSGT:
1233a2c7a983SIngo Molnar 				/* Signed '>', GT in x86 */
123462258278SAlexei Starovoitov 				jmp_cond = X86_JG;
123562258278SAlexei Starovoitov 				break;
123652afc51eSDaniel Borkmann 			case BPF_JSLT:
1237a2c7a983SIngo Molnar 				/* Signed '<', LT in x86 */
123852afc51eSDaniel Borkmann 				jmp_cond = X86_JL;
123952afc51eSDaniel Borkmann 				break;
124062258278SAlexei Starovoitov 			case BPF_JSGE:
1241a2c7a983SIngo Molnar 				/* Signed '>=', GE in x86 */
124262258278SAlexei Starovoitov 				jmp_cond = X86_JGE;
124362258278SAlexei Starovoitov 				break;
124452afc51eSDaniel Borkmann 			case BPF_JSLE:
1245a2c7a983SIngo Molnar 				/* Signed '<=', LE in x86 */
124652afc51eSDaniel Borkmann 				jmp_cond = X86_JLE;
124752afc51eSDaniel Borkmann 				break;
1248a2c7a983SIngo Molnar 			default: /* to silence GCC warning */
124962258278SAlexei Starovoitov 				return -EFAULT;
125062258278SAlexei Starovoitov 			}
125162258278SAlexei Starovoitov 			jmp_offset = addrs[i + insn->off] - addrs[i];
125262258278SAlexei Starovoitov 			if (is_imm8(jmp_offset)) {
125362258278SAlexei Starovoitov 				EMIT2(jmp_cond, jmp_offset);
125462258278SAlexei Starovoitov 			} else if (is_simm32(jmp_offset)) {
125562258278SAlexei Starovoitov 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
12563b58908aSEric Dumazet 			} else {
125762258278SAlexei Starovoitov 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
125862258278SAlexei Starovoitov 				return -EFAULT;
12593b58908aSEric Dumazet 			}
126062258278SAlexei Starovoitov 
12613b58908aSEric Dumazet 			break;
126262258278SAlexei Starovoitov 
126362258278SAlexei Starovoitov 		case BPF_JMP | BPF_JA:
12641612a981SGianluca Borello 			if (insn->off == -1)
12651612a981SGianluca Borello 				/* -1 jmp instructions will always jump
12661612a981SGianluca Borello 				 * backwards two bytes. Explicitly handling
12671612a981SGianluca Borello 				 * this case avoids wasting too many passes
12681612a981SGianluca Borello 				 * when there are long sequences of replaced
12691612a981SGianluca Borello 				 * dead code.
12701612a981SGianluca Borello 				 */
12711612a981SGianluca Borello 				jmp_offset = -2;
12721612a981SGianluca Borello 			else
127362258278SAlexei Starovoitov 				jmp_offset = addrs[i + insn->off] - addrs[i];
12741612a981SGianluca Borello 
127562258278SAlexei Starovoitov 			if (!jmp_offset)
1276a2c7a983SIngo Molnar 				/* Optimize out nop jumps */
127762258278SAlexei Starovoitov 				break;
127862258278SAlexei Starovoitov emit_jmp:
127962258278SAlexei Starovoitov 			if (is_imm8(jmp_offset)) {
128062258278SAlexei Starovoitov 				EMIT2(0xEB, jmp_offset);
128162258278SAlexei Starovoitov 			} else if (is_simm32(jmp_offset)) {
128262258278SAlexei Starovoitov 				EMIT1_off32(0xE9, jmp_offset);
128362258278SAlexei Starovoitov 			} else {
128462258278SAlexei Starovoitov 				pr_err("jmp gen bug %llx\n", jmp_offset);
128562258278SAlexei Starovoitov 				return -EFAULT;
12863b58908aSEric Dumazet 			}
128762258278SAlexei Starovoitov 			break;
128862258278SAlexei Starovoitov 
128962258278SAlexei Starovoitov 		case BPF_JMP | BPF_EXIT:
1290769e0de6SAlexei Starovoitov 			if (seen_exit) {
129162258278SAlexei Starovoitov 				jmp_offset = ctx->cleanup_addr - addrs[i];
129262258278SAlexei Starovoitov 				goto emit_jmp;
129362258278SAlexei Starovoitov 			}
1294769e0de6SAlexei Starovoitov 			seen_exit = true;
1295a2c7a983SIngo Molnar 			/* Update cleanup_addr */
129662258278SAlexei Starovoitov 			ctx->cleanup_addr = proglen;
1297fe8d9571SAlexei Starovoitov 			if (!bpf_prog_was_classic(bpf_prog))
1298fe8d9571SAlexei Starovoitov 				EMIT1(0x5B); /* get rid of tail_call_cnt */
1299fe8d9571SAlexei Starovoitov 			EMIT2(0x41, 0x5F);   /* pop r15 */
1300fe8d9571SAlexei Starovoitov 			EMIT2(0x41, 0x5E);   /* pop r14 */
1301fe8d9571SAlexei Starovoitov 			EMIT2(0x41, 0x5D);   /* pop r13 */
1302fe8d9571SAlexei Starovoitov 			EMIT1(0x5B);         /* pop rbx */
130362258278SAlexei Starovoitov 			EMIT1(0xC9);         /* leave */
130462258278SAlexei Starovoitov 			EMIT1(0xC3);         /* ret */
13050a14842fSEric Dumazet 			break;
13060a14842fSEric Dumazet 
13070a14842fSEric Dumazet 		default:
1308a2c7a983SIngo Molnar 			/*
1309a2c7a983SIngo Molnar 			 * By design x86-64 JIT should support all BPF instructions.
131062258278SAlexei Starovoitov 			 * This error will be seen if new instruction was added
1311a2c7a983SIngo Molnar 			 * to the interpreter, but not to the JIT, or if there is
1312a2c7a983SIngo Molnar 			 * junk in bpf_prog.
131362258278SAlexei Starovoitov 			 */
131462258278SAlexei Starovoitov 			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
1315f3c2af7bSAlexei Starovoitov 			return -EINVAL;
13160a14842fSEric Dumazet 		}
131762258278SAlexei Starovoitov 
13180a14842fSEric Dumazet 		ilen = prog - temp;
1319e0ee9c12SAlexei Starovoitov 		if (ilen > BPF_MAX_INSN_SIZE) {
13209383191dSDaniel Borkmann 			pr_err("bpf_jit: fatal insn size error\n");
1321e0ee9c12SAlexei Starovoitov 			return -EFAULT;
1322e0ee9c12SAlexei Starovoitov 		}
1323e0ee9c12SAlexei Starovoitov 
13240a14842fSEric Dumazet 		if (image) {
13250a14842fSEric Dumazet 			if (unlikely(proglen + ilen > oldproglen)) {
13269383191dSDaniel Borkmann 				pr_err("bpf_jit: fatal error\n");
1327f3c2af7bSAlexei Starovoitov 				return -EFAULT;
13280a14842fSEric Dumazet 			}
13290a14842fSEric Dumazet 			memcpy(image + proglen, temp, ilen);
13300a14842fSEric Dumazet 		}
13310a14842fSEric Dumazet 		proglen += ilen;
13320a14842fSEric Dumazet 		addrs[i] = proglen;
13330a14842fSEric Dumazet 		prog = temp;
13340a14842fSEric Dumazet 	}
13353dec541bSAlexei Starovoitov 
13363dec541bSAlexei Starovoitov 	if (image && excnt != bpf_prog->aux->num_exentries) {
13373dec541bSAlexei Starovoitov 		pr_err("extable is not populated\n");
13383dec541bSAlexei Starovoitov 		return -EFAULT;
13393dec541bSAlexei Starovoitov 	}
1340f3c2af7bSAlexei Starovoitov 	return proglen;
1341f3c2af7bSAlexei Starovoitov }
1342f3c2af7bSAlexei Starovoitov 
134385d33df3SMartin KaFai Lau static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
1344fec56f58SAlexei Starovoitov 		      int stack_size)
1345fec56f58SAlexei Starovoitov {
1346fec56f58SAlexei Starovoitov 	int i;
1347fec56f58SAlexei Starovoitov 	/* Store function arguments to stack.
1348fec56f58SAlexei Starovoitov 	 * For a function that accepts two pointers the sequence will be:
1349fec56f58SAlexei Starovoitov 	 * mov QWORD PTR [rbp-0x10],rdi
1350fec56f58SAlexei Starovoitov 	 * mov QWORD PTR [rbp-0x8],rsi
1351fec56f58SAlexei Starovoitov 	 */
1352fec56f58SAlexei Starovoitov 	for (i = 0; i < min(nr_args, 6); i++)
1353fec56f58SAlexei Starovoitov 		emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
1354fec56f58SAlexei Starovoitov 			 BPF_REG_FP,
1355fec56f58SAlexei Starovoitov 			 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
1356fec56f58SAlexei Starovoitov 			 -(stack_size - i * 8));
1357fec56f58SAlexei Starovoitov }
1358fec56f58SAlexei Starovoitov 
135985d33df3SMartin KaFai Lau static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
1360fec56f58SAlexei Starovoitov 			 int stack_size)
1361fec56f58SAlexei Starovoitov {
1362fec56f58SAlexei Starovoitov 	int i;
1363fec56f58SAlexei Starovoitov 
1364fec56f58SAlexei Starovoitov 	/* Restore function arguments from stack.
1365fec56f58SAlexei Starovoitov 	 * For a function that accepts two pointers the sequence will be:
1366fec56f58SAlexei Starovoitov 	 * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
1367fec56f58SAlexei Starovoitov 	 * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
1368fec56f58SAlexei Starovoitov 	 */
1369fec56f58SAlexei Starovoitov 	for (i = 0; i < min(nr_args, 6); i++)
1370fec56f58SAlexei Starovoitov 		emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
1371fec56f58SAlexei Starovoitov 			 i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
1372fec56f58SAlexei Starovoitov 			 BPF_REG_FP,
1373fec56f58SAlexei Starovoitov 			 -(stack_size - i * 8));
1374fec56f58SAlexei Starovoitov }
1375fec56f58SAlexei Starovoitov 
13767e639208SKP Singh static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
1377ae240823SKP Singh 			   struct bpf_prog *p, int stack_size, bool mod_ret)
1378fec56f58SAlexei Starovoitov {
1379fec56f58SAlexei Starovoitov 	u8 *prog = *pprog;
13807e639208SKP Singh 	int cnt = 0;
1381fec56f58SAlexei Starovoitov 
1382fec56f58SAlexei Starovoitov 	if (emit_call(&prog, __bpf_prog_enter, prog))
1383fec56f58SAlexei Starovoitov 		return -EINVAL;
1384fec56f58SAlexei Starovoitov 	/* remember prog start time returned by __bpf_prog_enter */
1385fec56f58SAlexei Starovoitov 	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
1386fec56f58SAlexei Starovoitov 
1387fec56f58SAlexei Starovoitov 	/* arg1: lea rdi, [rbp - stack_size] */
1388fec56f58SAlexei Starovoitov 	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
1389fec56f58SAlexei Starovoitov 	/* arg2: progs[i]->insnsi for interpreter */
13907e639208SKP Singh 	if (!p->jited)
1391fec56f58SAlexei Starovoitov 		emit_mov_imm64(&prog, BPF_REG_2,
13927e639208SKP Singh 			       (long) p->insnsi >> 32,
13937e639208SKP Singh 			       (u32) (long) p->insnsi);
1394fec56f58SAlexei Starovoitov 	/* call JITed bpf program or interpreter */
13957e639208SKP Singh 	if (emit_call(&prog, p->bpf_func, prog))
1396fec56f58SAlexei Starovoitov 		return -EINVAL;
1397fec56f58SAlexei Starovoitov 
1398ae240823SKP Singh 	/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
1399ae240823SKP Singh 	 * of the previous call which is then passed on the stack to
1400ae240823SKP Singh 	 * the next BPF program.
1401ae240823SKP Singh 	 */
1402ae240823SKP Singh 	if (mod_ret)
1403ae240823SKP Singh 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
1404ae240823SKP Singh 
1405fec56f58SAlexei Starovoitov 	/* arg1: mov rdi, progs[i] */
14067e639208SKP Singh 	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
14077e639208SKP Singh 		       (u32) (long) p);
1408fec56f58SAlexei Starovoitov 	/* arg2: mov rsi, rbx <- start time in nsec */
1409fec56f58SAlexei Starovoitov 	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
1410fec56f58SAlexei Starovoitov 	if (emit_call(&prog, __bpf_prog_exit, prog))
1411fec56f58SAlexei Starovoitov 		return -EINVAL;
14127e639208SKP Singh 
14137e639208SKP Singh 	*pprog = prog;
14147e639208SKP Singh 	return 0;
14157e639208SKP Singh }
14167e639208SKP Singh 
14177e639208SKP Singh static void emit_nops(u8 **pprog, unsigned int len)
14187e639208SKP Singh {
14197e639208SKP Singh 	unsigned int i, noplen;
14207e639208SKP Singh 	u8 *prog = *pprog;
14217e639208SKP Singh 	int cnt = 0;
14227e639208SKP Singh 
14237e639208SKP Singh 	while (len > 0) {
14247e639208SKP Singh 		noplen = len;
14257e639208SKP Singh 
14267e639208SKP Singh 		if (noplen > ASM_NOP_MAX)
14277e639208SKP Singh 			noplen = ASM_NOP_MAX;
14287e639208SKP Singh 
14297e639208SKP Singh 		for (i = 0; i < noplen; i++)
14307e639208SKP Singh 			EMIT1(ideal_nops[noplen][i]);
14317e639208SKP Singh 		len -= noplen;
14327e639208SKP Singh 	}
14337e639208SKP Singh 
14347e639208SKP Singh 	*pprog = prog;
14357e639208SKP Singh }
14367e639208SKP Singh 
14377e639208SKP Singh static void emit_align(u8 **pprog, u32 align)
14387e639208SKP Singh {
14397e639208SKP Singh 	u8 *target, *prog = *pprog;
14407e639208SKP Singh 
14417e639208SKP Singh 	target = PTR_ALIGN(prog, align);
14427e639208SKP Singh 	if (target != prog)
14437e639208SKP Singh 		emit_nops(&prog, target - prog);
14447e639208SKP Singh 
14457e639208SKP Singh 	*pprog = prog;
14467e639208SKP Singh }
14477e639208SKP Singh 
14487e639208SKP Singh static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
14497e639208SKP Singh {
14507e639208SKP Singh 	u8 *prog = *pprog;
14517e639208SKP Singh 	int cnt = 0;
14527e639208SKP Singh 	s64 offset;
14537e639208SKP Singh 
14547e639208SKP Singh 	offset = func - (ip + 2 + 4);
14557e639208SKP Singh 	if (!is_simm32(offset)) {
14567e639208SKP Singh 		pr_err("Target %p is out of range\n", func);
14577e639208SKP Singh 		return -EINVAL;
14587e639208SKP Singh 	}
14597e639208SKP Singh 	EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
14607e639208SKP Singh 	*pprog = prog;
14617e639208SKP Singh 	return 0;
14627e639208SKP Singh }
14637e639208SKP Singh 
14647e639208SKP Singh static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
14657e639208SKP Singh 		      struct bpf_tramp_progs *tp, int stack_size)
14667e639208SKP Singh {
14677e639208SKP Singh 	int i;
14687e639208SKP Singh 	u8 *prog = *pprog;
14697e639208SKP Singh 
14707e639208SKP Singh 	for (i = 0; i < tp->nr_progs; i++) {
1471ae240823SKP Singh 		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
14727e639208SKP Singh 			return -EINVAL;
1473fec56f58SAlexei Starovoitov 	}
1474fec56f58SAlexei Starovoitov 	*pprog = prog;
1475fec56f58SAlexei Starovoitov 	return 0;
1476fec56f58SAlexei Starovoitov }
1477fec56f58SAlexei Starovoitov 
1478ae240823SKP Singh static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
1479ae240823SKP Singh 			      struct bpf_tramp_progs *tp, int stack_size,
1480ae240823SKP Singh 			      u8 **branches)
1481ae240823SKP Singh {
1482ae240823SKP Singh 	u8 *prog = *pprog;
148313fac1d8SAlexei Starovoitov 	int i, cnt = 0;
1484ae240823SKP Singh 
1485ae240823SKP Singh 	/* The first fmod_ret program will receive a garbage return value.
1486ae240823SKP Singh 	 * Set this to 0 to avoid confusing the program.
1487ae240823SKP Singh 	 */
1488ae240823SKP Singh 	emit_mov_imm32(&prog, false, BPF_REG_0, 0);
1489ae240823SKP Singh 	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
1490ae240823SKP Singh 	for (i = 0; i < tp->nr_progs; i++) {
1491ae240823SKP Singh 		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true))
1492ae240823SKP Singh 			return -EINVAL;
1493ae240823SKP Singh 
149413fac1d8SAlexei Starovoitov 		/* mod_ret prog stored return value into [rbp - 8]. Emit:
149513fac1d8SAlexei Starovoitov 		 * if (*(u64 *)(rbp - 8) !=  0)
1496ae240823SKP Singh 		 *	goto do_fexit;
1497ae240823SKP Singh 		 */
149813fac1d8SAlexei Starovoitov 		/* cmp QWORD PTR [rbp - 0x8], 0x0 */
149913fac1d8SAlexei Starovoitov 		EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00);
1500ae240823SKP Singh 
1501ae240823SKP Singh 		/* Save the location of the branch and Generate 6 nops
1502ae240823SKP Singh 		 * (4 bytes for an offset and 2 bytes for the jump) These nops
1503ae240823SKP Singh 		 * are replaced with a conditional jump once do_fexit (i.e. the
1504ae240823SKP Singh 		 * start of the fexit invocation) is finalized.
1505ae240823SKP Singh 		 */
1506ae240823SKP Singh 		branches[i] = prog;
1507ae240823SKP Singh 		emit_nops(&prog, 4 + 2);
1508ae240823SKP Singh 	}
1509ae240823SKP Singh 
1510ae240823SKP Singh 	*pprog = prog;
1511ae240823SKP Singh 	return 0;
1512ae240823SKP Singh }
1513ae240823SKP Singh 
1514fec56f58SAlexei Starovoitov /* Example:
1515fec56f58SAlexei Starovoitov  * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
1516fec56f58SAlexei Starovoitov  * its 'struct btf_func_model' will be nr_args=2
1517fec56f58SAlexei Starovoitov  * The assembly code when eth_type_trans is executing after trampoline:
1518fec56f58SAlexei Starovoitov  *
1519fec56f58SAlexei Starovoitov  * push rbp
1520fec56f58SAlexei Starovoitov  * mov rbp, rsp
1521fec56f58SAlexei Starovoitov  * sub rsp, 16                     // space for skb and dev
1522fec56f58SAlexei Starovoitov  * push rbx                        // temp regs to pass start time
1523fec56f58SAlexei Starovoitov  * mov qword ptr [rbp - 16], rdi   // save skb pointer to stack
1524fec56f58SAlexei Starovoitov  * mov qword ptr [rbp - 8], rsi    // save dev pointer to stack
1525fec56f58SAlexei Starovoitov  * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
1526fec56f58SAlexei Starovoitov  * mov rbx, rax                    // remember start time in bpf stats are enabled
1527fec56f58SAlexei Starovoitov  * lea rdi, [rbp - 16]             // R1==ctx of bpf prog
1528fec56f58SAlexei Starovoitov  * call addr_of_jited_FENTRY_prog
1529fec56f58SAlexei Starovoitov  * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
1530fec56f58SAlexei Starovoitov  * mov rsi, rbx                    // prog start time
1531fec56f58SAlexei Starovoitov  * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
1532fec56f58SAlexei Starovoitov  * mov rdi, qword ptr [rbp - 16]   // restore skb pointer from stack
1533fec56f58SAlexei Starovoitov  * mov rsi, qword ptr [rbp - 8]    // restore dev pointer from stack
1534fec56f58SAlexei Starovoitov  * pop rbx
1535fec56f58SAlexei Starovoitov  * leave
1536fec56f58SAlexei Starovoitov  * ret
1537fec56f58SAlexei Starovoitov  *
1538fec56f58SAlexei Starovoitov  * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
1539fec56f58SAlexei Starovoitov  * replaced with 'call generated_bpf_trampoline'. When it returns
1540fec56f58SAlexei Starovoitov  * eth_type_trans will continue executing with original skb and dev pointers.
1541fec56f58SAlexei Starovoitov  *
1542fec56f58SAlexei Starovoitov  * The assembly code when eth_type_trans is called from trampoline:
1543fec56f58SAlexei Starovoitov  *
1544fec56f58SAlexei Starovoitov  * push rbp
1545fec56f58SAlexei Starovoitov  * mov rbp, rsp
1546fec56f58SAlexei Starovoitov  * sub rsp, 24                     // space for skb, dev, return value
1547fec56f58SAlexei Starovoitov  * push rbx                        // temp regs to pass start time
1548fec56f58SAlexei Starovoitov  * mov qword ptr [rbp - 24], rdi   // save skb pointer to stack
1549fec56f58SAlexei Starovoitov  * mov qword ptr [rbp - 16], rsi   // save dev pointer to stack
1550fec56f58SAlexei Starovoitov  * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
1551fec56f58SAlexei Starovoitov  * mov rbx, rax                    // remember start time if bpf stats are enabled
1552fec56f58SAlexei Starovoitov  * lea rdi, [rbp - 24]             // R1==ctx of bpf prog
1553fec56f58SAlexei Starovoitov  * call addr_of_jited_FENTRY_prog  // bpf prog can access skb and dev
1554fec56f58SAlexei Starovoitov  * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
1555fec56f58SAlexei Starovoitov  * mov rsi, rbx                    // prog start time
1556fec56f58SAlexei Starovoitov  * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
1557fec56f58SAlexei Starovoitov  * mov rdi, qword ptr [rbp - 24]   // restore skb pointer from stack
1558fec56f58SAlexei Starovoitov  * mov rsi, qword ptr [rbp - 16]   // restore dev pointer from stack
1559fec56f58SAlexei Starovoitov  * call eth_type_trans+5           // execute body of eth_type_trans
1560fec56f58SAlexei Starovoitov  * mov qword ptr [rbp - 8], rax    // save return value
1561fec56f58SAlexei Starovoitov  * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
1562fec56f58SAlexei Starovoitov  * mov rbx, rax                    // remember start time in bpf stats are enabled
1563fec56f58SAlexei Starovoitov  * lea rdi, [rbp - 24]             // R1==ctx of bpf prog
1564fec56f58SAlexei Starovoitov  * call addr_of_jited_FEXIT_prog   // bpf prog can access skb, dev, return value
1565fec56f58SAlexei Starovoitov  * movabsq rdi, 64bit_addr_of_struct_bpf_prog  // unused if bpf stats are off
1566fec56f58SAlexei Starovoitov  * mov rsi, rbx                    // prog start time
1567fec56f58SAlexei Starovoitov  * call __bpf_prog_exit            // rcu_read_unlock, preempt_enable and stats math
1568fec56f58SAlexei Starovoitov  * mov rax, qword ptr [rbp - 8]    // restore eth_type_trans's return value
1569fec56f58SAlexei Starovoitov  * pop rbx
1570fec56f58SAlexei Starovoitov  * leave
1571fec56f58SAlexei Starovoitov  * add rsp, 8                      // skip eth_type_trans's frame
1572fec56f58SAlexei Starovoitov  * ret                             // return to its caller
1573fec56f58SAlexei Starovoitov  */
157485d33df3SMartin KaFai Lau int arch_prepare_bpf_trampoline(void *image, void *image_end,
157585d33df3SMartin KaFai Lau 				const struct btf_func_model *m, u32 flags,
157688fd9e53SKP Singh 				struct bpf_tramp_progs *tprogs,
1577fec56f58SAlexei Starovoitov 				void *orig_call)
1578fec56f58SAlexei Starovoitov {
1579ae240823SKP Singh 	int ret, i, cnt = 0, nr_args = m->nr_args;
1580fec56f58SAlexei Starovoitov 	int stack_size = nr_args * 8;
158188fd9e53SKP Singh 	struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
158288fd9e53SKP Singh 	struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
1583ae240823SKP Singh 	struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
1584ae240823SKP Singh 	u8 **branches = NULL;
1585fec56f58SAlexei Starovoitov 	u8 *prog;
1586fec56f58SAlexei Starovoitov 
1587fec56f58SAlexei Starovoitov 	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
1588fec56f58SAlexei Starovoitov 	if (nr_args > 6)
1589fec56f58SAlexei Starovoitov 		return -ENOTSUPP;
1590fec56f58SAlexei Starovoitov 
1591fec56f58SAlexei Starovoitov 	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
1592fec56f58SAlexei Starovoitov 	    (flags & BPF_TRAMP_F_SKIP_FRAME))
1593fec56f58SAlexei Starovoitov 		return -EINVAL;
1594fec56f58SAlexei Starovoitov 
1595fec56f58SAlexei Starovoitov 	if (flags & BPF_TRAMP_F_CALL_ORIG)
1596fec56f58SAlexei Starovoitov 		stack_size += 8; /* room for return value of orig_call */
1597fec56f58SAlexei Starovoitov 
1598fec56f58SAlexei Starovoitov 	if (flags & BPF_TRAMP_F_SKIP_FRAME)
1599fec56f58SAlexei Starovoitov 		/* skip patched call instruction and point orig_call to actual
1600fec56f58SAlexei Starovoitov 		 * body of the kernel function.
1601fec56f58SAlexei Starovoitov 		 */
16024b3da77bSDaniel Borkmann 		orig_call += X86_PATCH_SIZE;
1603fec56f58SAlexei Starovoitov 
1604fec56f58SAlexei Starovoitov 	prog = image;
1605fec56f58SAlexei Starovoitov 
1606fec56f58SAlexei Starovoitov 	EMIT1(0x55);		 /* push rbp */
1607fec56f58SAlexei Starovoitov 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
1608fec56f58SAlexei Starovoitov 	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
1609fec56f58SAlexei Starovoitov 	EMIT1(0x53);		 /* push rbx */
1610fec56f58SAlexei Starovoitov 
1611fec56f58SAlexei Starovoitov 	save_regs(m, &prog, nr_args, stack_size);
1612fec56f58SAlexei Starovoitov 
161388fd9e53SKP Singh 	if (fentry->nr_progs)
161488fd9e53SKP Singh 		if (invoke_bpf(m, &prog, fentry, stack_size))
1615fec56f58SAlexei Starovoitov 			return -EINVAL;
1616fec56f58SAlexei Starovoitov 
1617ae240823SKP Singh 	if (fmod_ret->nr_progs) {
1618ae240823SKP Singh 		branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *),
1619ae240823SKP Singh 				   GFP_KERNEL);
1620ae240823SKP Singh 		if (!branches)
1621ae240823SKP Singh 			return -ENOMEM;
1622ae240823SKP Singh 
1623ae240823SKP Singh 		if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size,
1624ae240823SKP Singh 				       branches)) {
1625ae240823SKP Singh 			ret = -EINVAL;
1626ae240823SKP Singh 			goto cleanup;
1627ae240823SKP Singh 		}
1628ae240823SKP Singh 	}
1629ae240823SKP Singh 
1630fec56f58SAlexei Starovoitov 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
1631ae240823SKP Singh 		if (fentry->nr_progs || fmod_ret->nr_progs)
1632fec56f58SAlexei Starovoitov 			restore_regs(m, &prog, nr_args, stack_size);
1633fec56f58SAlexei Starovoitov 
1634fec56f58SAlexei Starovoitov 		/* call original function */
1635ae240823SKP Singh 		if (emit_call(&prog, orig_call, prog)) {
1636ae240823SKP Singh 			ret = -EINVAL;
1637ae240823SKP Singh 			goto cleanup;
1638ae240823SKP Singh 		}
1639fec56f58SAlexei Starovoitov 		/* remember return value in a stack for bpf prog to access */
1640fec56f58SAlexei Starovoitov 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
1641fec56f58SAlexei Starovoitov 	}
1642fec56f58SAlexei Starovoitov 
1643ae240823SKP Singh 	if (fmod_ret->nr_progs) {
1644ae240823SKP Singh 		/* From Intel 64 and IA-32 Architectures Optimization
1645ae240823SKP Singh 		 * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
1646ae240823SKP Singh 		 * Coding Rule 11: All branch targets should be 16-byte
1647ae240823SKP Singh 		 * aligned.
1648ae240823SKP Singh 		 */
1649ae240823SKP Singh 		emit_align(&prog, 16);
1650ae240823SKP Singh 		/* Update the branches saved in invoke_bpf_mod_ret with the
1651ae240823SKP Singh 		 * aligned address of do_fexit.
1652ae240823SKP Singh 		 */
1653ae240823SKP Singh 		for (i = 0; i < fmod_ret->nr_progs; i++)
1654ae240823SKP Singh 			emit_cond_near_jump(&branches[i], prog, branches[i],
1655ae240823SKP Singh 					    X86_JNE);
1656ae240823SKP Singh 	}
1657ae240823SKP Singh 
165888fd9e53SKP Singh 	if (fexit->nr_progs)
1659ae240823SKP Singh 		if (invoke_bpf(m, &prog, fexit, stack_size)) {
1660ae240823SKP Singh 			ret = -EINVAL;
1661ae240823SKP Singh 			goto cleanup;
1662ae240823SKP Singh 		}
1663fec56f58SAlexei Starovoitov 
1664fec56f58SAlexei Starovoitov 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
1665fec56f58SAlexei Starovoitov 		restore_regs(m, &prog, nr_args, stack_size);
1666fec56f58SAlexei Starovoitov 
1667ae240823SKP Singh 	/* This needs to be done regardless. If there were fmod_ret programs,
1668ae240823SKP Singh 	 * the return value is only updated on the stack and still needs to be
1669ae240823SKP Singh 	 * restored to R0.
1670ae240823SKP Singh 	 */
1671fec56f58SAlexei Starovoitov 	if (flags & BPF_TRAMP_F_CALL_ORIG)
1672fec56f58SAlexei Starovoitov 		/* restore original return value back into RAX */
1673fec56f58SAlexei Starovoitov 		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
1674fec56f58SAlexei Starovoitov 
1675fec56f58SAlexei Starovoitov 	EMIT1(0x5B); /* pop rbx */
1676fec56f58SAlexei Starovoitov 	EMIT1(0xC9); /* leave */
1677fec56f58SAlexei Starovoitov 	if (flags & BPF_TRAMP_F_SKIP_FRAME)
1678fec56f58SAlexei Starovoitov 		/* skip our return address and return to parent */
1679fec56f58SAlexei Starovoitov 		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
1680fec56f58SAlexei Starovoitov 	EMIT1(0xC3); /* ret */
168185d33df3SMartin KaFai Lau 	/* Make sure the trampoline generation logic doesn't overflow */
1682ae240823SKP Singh 	if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
1683ae240823SKP Singh 		ret = -EFAULT;
1684ae240823SKP Singh 		goto cleanup;
1685ae240823SKP Singh 	}
1686ae240823SKP Singh 	ret = prog - (u8 *)image;
1687ae240823SKP Singh 
1688ae240823SKP Singh cleanup:
1689ae240823SKP Singh 	kfree(branches);
1690ae240823SKP Singh 	return ret;
1691fec56f58SAlexei Starovoitov }
1692fec56f58SAlexei Starovoitov 
169375ccbef6SBjörn Töpel static int emit_fallback_jump(u8 **pprog)
169475ccbef6SBjörn Töpel {
169575ccbef6SBjörn Töpel 	u8 *prog = *pprog;
169675ccbef6SBjörn Töpel 	int err = 0;
169775ccbef6SBjörn Töpel 
169875ccbef6SBjörn Töpel #ifdef CONFIG_RETPOLINE
169975ccbef6SBjörn Töpel 	/* Note that this assumes the the compiler uses external
170075ccbef6SBjörn Töpel 	 * thunks for indirect calls. Both clang and GCC use the same
170175ccbef6SBjörn Töpel 	 * naming convention for external thunks.
170275ccbef6SBjörn Töpel 	 */
170375ccbef6SBjörn Töpel 	err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
170475ccbef6SBjörn Töpel #else
170575ccbef6SBjörn Töpel 	int cnt = 0;
170675ccbef6SBjörn Töpel 
170775ccbef6SBjörn Töpel 	EMIT2(0xFF, 0xE2);	/* jmp rdx */
170875ccbef6SBjörn Töpel #endif
170975ccbef6SBjörn Töpel 	*pprog = prog;
171075ccbef6SBjörn Töpel 	return err;
171175ccbef6SBjörn Töpel }
171275ccbef6SBjörn Töpel 
171375ccbef6SBjörn Töpel static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
171475ccbef6SBjörn Töpel {
17157e639208SKP Singh 	u8 *jg_reloc, *prog = *pprog;
171675ccbef6SBjörn Töpel 	int pivot, err, jg_bytes = 1, cnt = 0;
171775ccbef6SBjörn Töpel 	s64 jg_offset;
171875ccbef6SBjörn Töpel 
171975ccbef6SBjörn Töpel 	if (a == b) {
172075ccbef6SBjörn Töpel 		/* Leaf node of recursion, i.e. not a range of indices
172175ccbef6SBjörn Töpel 		 * anymore.
172275ccbef6SBjörn Töpel 		 */
172375ccbef6SBjörn Töpel 		EMIT1(add_1mod(0x48, BPF_REG_3));	/* cmp rdx,func */
172475ccbef6SBjörn Töpel 		if (!is_simm32(progs[a]))
172575ccbef6SBjörn Töpel 			return -1;
172675ccbef6SBjörn Töpel 		EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3),
172775ccbef6SBjörn Töpel 			    progs[a]);
172875ccbef6SBjörn Töpel 		err = emit_cond_near_jump(&prog,	/* je func */
172975ccbef6SBjörn Töpel 					  (void *)progs[a], prog,
173075ccbef6SBjörn Töpel 					  X86_JE);
173175ccbef6SBjörn Töpel 		if (err)
173275ccbef6SBjörn Töpel 			return err;
173375ccbef6SBjörn Töpel 
173475ccbef6SBjörn Töpel 		err = emit_fallback_jump(&prog);	/* jmp thunk/indirect */
173575ccbef6SBjörn Töpel 		if (err)
173675ccbef6SBjörn Töpel 			return err;
173775ccbef6SBjörn Töpel 
173875ccbef6SBjörn Töpel 		*pprog = prog;
173975ccbef6SBjörn Töpel 		return 0;
174075ccbef6SBjörn Töpel 	}
174175ccbef6SBjörn Töpel 
174275ccbef6SBjörn Töpel 	/* Not a leaf node, so we pivot, and recursively descend into
174375ccbef6SBjörn Töpel 	 * the lower and upper ranges.
174475ccbef6SBjörn Töpel 	 */
174575ccbef6SBjörn Töpel 	pivot = (b - a) / 2;
174675ccbef6SBjörn Töpel 	EMIT1(add_1mod(0x48, BPF_REG_3));		/* cmp rdx,func */
174775ccbef6SBjörn Töpel 	if (!is_simm32(progs[a + pivot]))
174875ccbef6SBjörn Töpel 		return -1;
174975ccbef6SBjörn Töpel 	EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]);
175075ccbef6SBjörn Töpel 
175175ccbef6SBjörn Töpel 	if (pivot > 2) {				/* jg upper_part */
175275ccbef6SBjörn Töpel 		/* Require near jump. */
175375ccbef6SBjörn Töpel 		jg_bytes = 4;
175475ccbef6SBjörn Töpel 		EMIT2_off32(0x0F, X86_JG + 0x10, 0);
175575ccbef6SBjörn Töpel 	} else {
175675ccbef6SBjörn Töpel 		EMIT2(X86_JG, 0);
175775ccbef6SBjörn Töpel 	}
175875ccbef6SBjörn Töpel 	jg_reloc = prog;
175975ccbef6SBjörn Töpel 
176075ccbef6SBjörn Töpel 	err = emit_bpf_dispatcher(&prog, a, a + pivot,	/* emit lower_part */
176175ccbef6SBjörn Töpel 				  progs);
176275ccbef6SBjörn Töpel 	if (err)
176375ccbef6SBjörn Töpel 		return err;
176475ccbef6SBjörn Töpel 
1765116eb788SBjörn Töpel 	/* From Intel 64 and IA-32 Architectures Optimization
1766116eb788SBjörn Töpel 	 * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
1767116eb788SBjörn Töpel 	 * Coding Rule 11: All branch targets should be 16-byte
1768116eb788SBjörn Töpel 	 * aligned.
1769116eb788SBjörn Töpel 	 */
17707e639208SKP Singh 	emit_align(&prog, 16);
177175ccbef6SBjörn Töpel 	jg_offset = prog - jg_reloc;
177275ccbef6SBjörn Töpel 	emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
177375ccbef6SBjörn Töpel 
177475ccbef6SBjörn Töpel 	err = emit_bpf_dispatcher(&prog, a + pivot + 1,	/* emit upper_part */
177575ccbef6SBjörn Töpel 				  b, progs);
177675ccbef6SBjörn Töpel 	if (err)
177775ccbef6SBjörn Töpel 		return err;
177875ccbef6SBjörn Töpel 
177975ccbef6SBjörn Töpel 	*pprog = prog;
178075ccbef6SBjörn Töpel 	return 0;
178175ccbef6SBjörn Töpel }
178275ccbef6SBjörn Töpel 
178375ccbef6SBjörn Töpel static int cmp_ips(const void *a, const void *b)
178475ccbef6SBjörn Töpel {
178575ccbef6SBjörn Töpel 	const s64 *ipa = a;
178675ccbef6SBjörn Töpel 	const s64 *ipb = b;
178775ccbef6SBjörn Töpel 
178875ccbef6SBjörn Töpel 	if (*ipa > *ipb)
178975ccbef6SBjörn Töpel 		return 1;
179075ccbef6SBjörn Töpel 	if (*ipa < *ipb)
179175ccbef6SBjörn Töpel 		return -1;
179275ccbef6SBjörn Töpel 	return 0;
179375ccbef6SBjörn Töpel }
179475ccbef6SBjörn Töpel 
179575ccbef6SBjörn Töpel int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
179675ccbef6SBjörn Töpel {
179775ccbef6SBjörn Töpel 	u8 *prog = image;
179875ccbef6SBjörn Töpel 
179975ccbef6SBjörn Töpel 	sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL);
180075ccbef6SBjörn Töpel 	return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs);
180175ccbef6SBjörn Töpel }
180275ccbef6SBjörn Töpel 
18031c2a088aSAlexei Starovoitov struct x64_jit_data {
18041c2a088aSAlexei Starovoitov 	struct bpf_binary_header *header;
18051c2a088aSAlexei Starovoitov 	int *addrs;
18061c2a088aSAlexei Starovoitov 	u8 *image;
18071c2a088aSAlexei Starovoitov 	int proglen;
18081c2a088aSAlexei Starovoitov 	struct jit_context ctx;
18091c2a088aSAlexei Starovoitov };
18101c2a088aSAlexei Starovoitov 
1811d1c55ab5SDaniel Borkmann struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
181262258278SAlexei Starovoitov {
1813f3c2af7bSAlexei Starovoitov 	struct bpf_binary_header *header = NULL;
1814959a7579SDaniel Borkmann 	struct bpf_prog *tmp, *orig_prog = prog;
18151c2a088aSAlexei Starovoitov 	struct x64_jit_data *jit_data;
1816f3c2af7bSAlexei Starovoitov 	int proglen, oldproglen = 0;
1817f3c2af7bSAlexei Starovoitov 	struct jit_context ctx = {};
1818959a7579SDaniel Borkmann 	bool tmp_blinded = false;
18191c2a088aSAlexei Starovoitov 	bool extra_pass = false;
1820f3c2af7bSAlexei Starovoitov 	u8 *image = NULL;
1821f3c2af7bSAlexei Starovoitov 	int *addrs;
1822f3c2af7bSAlexei Starovoitov 	int pass;
1823f3c2af7bSAlexei Starovoitov 	int i;
1824f3c2af7bSAlexei Starovoitov 
182560b58afcSAlexei Starovoitov 	if (!prog->jit_requested)
1826959a7579SDaniel Borkmann 		return orig_prog;
1827959a7579SDaniel Borkmann 
1828959a7579SDaniel Borkmann 	tmp = bpf_jit_blind_constants(prog);
1829a2c7a983SIngo Molnar 	/*
1830a2c7a983SIngo Molnar 	 * If blinding was requested and we failed during blinding,
1831959a7579SDaniel Borkmann 	 * we must fall back to the interpreter.
1832959a7579SDaniel Borkmann 	 */
1833959a7579SDaniel Borkmann 	if (IS_ERR(tmp))
1834959a7579SDaniel Borkmann 		return orig_prog;
1835959a7579SDaniel Borkmann 	if (tmp != prog) {
1836959a7579SDaniel Borkmann 		tmp_blinded = true;
1837959a7579SDaniel Borkmann 		prog = tmp;
1838959a7579SDaniel Borkmann 	}
1839f3c2af7bSAlexei Starovoitov 
18401c2a088aSAlexei Starovoitov 	jit_data = prog->aux->jit_data;
18411c2a088aSAlexei Starovoitov 	if (!jit_data) {
18421c2a088aSAlexei Starovoitov 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
18431c2a088aSAlexei Starovoitov 		if (!jit_data) {
18441c2a088aSAlexei Starovoitov 			prog = orig_prog;
18451c2a088aSAlexei Starovoitov 			goto out;
18461c2a088aSAlexei Starovoitov 		}
18471c2a088aSAlexei Starovoitov 		prog->aux->jit_data = jit_data;
18481c2a088aSAlexei Starovoitov 	}
18491c2a088aSAlexei Starovoitov 	addrs = jit_data->addrs;
18501c2a088aSAlexei Starovoitov 	if (addrs) {
18511c2a088aSAlexei Starovoitov 		ctx = jit_data->ctx;
18521c2a088aSAlexei Starovoitov 		oldproglen = jit_data->proglen;
18531c2a088aSAlexei Starovoitov 		image = jit_data->image;
18541c2a088aSAlexei Starovoitov 		header = jit_data->header;
18551c2a088aSAlexei Starovoitov 		extra_pass = true;
18561c2a088aSAlexei Starovoitov 		goto skip_init_addrs;
18571c2a088aSAlexei Starovoitov 	}
18587c2e988fSAlexei Starovoitov 	addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
1859959a7579SDaniel Borkmann 	if (!addrs) {
1860959a7579SDaniel Borkmann 		prog = orig_prog;
18611c2a088aSAlexei Starovoitov 		goto out_addrs;
1862959a7579SDaniel Borkmann 	}
1863f3c2af7bSAlexei Starovoitov 
1864a2c7a983SIngo Molnar 	/*
1865a2c7a983SIngo Molnar 	 * Before first pass, make a rough estimation of addrs[]
1866a2c7a983SIngo Molnar 	 * each BPF instruction is translated to less than 64 bytes
1867f3c2af7bSAlexei Starovoitov 	 */
18687c2e988fSAlexei Starovoitov 	for (proglen = 0, i = 0; i <= prog->len; i++) {
1869f3c2af7bSAlexei Starovoitov 		proglen += 64;
1870f3c2af7bSAlexei Starovoitov 		addrs[i] = proglen;
1871f3c2af7bSAlexei Starovoitov 	}
1872f3c2af7bSAlexei Starovoitov 	ctx.cleanup_addr = proglen;
18731c2a088aSAlexei Starovoitov skip_init_addrs:
1874f3c2af7bSAlexei Starovoitov 
1875a2c7a983SIngo Molnar 	/*
1876a2c7a983SIngo Molnar 	 * JITed image shrinks with every pass and the loop iterates
1877a2c7a983SIngo Molnar 	 * until the image stops shrinking. Very large BPF programs
18783f7352bfSAlexei Starovoitov 	 * may converge on the last pass. In such case do one more
1879a2c7a983SIngo Molnar 	 * pass to emit the final image.
18803f7352bfSAlexei Starovoitov 	 */
18816007b080SDaniel Borkmann 	for (pass = 0; pass < 20 || image; pass++) {
1882f3c2af7bSAlexei Starovoitov 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
1883f3c2af7bSAlexei Starovoitov 		if (proglen <= 0) {
18843aab8884SDaniel Borkmann out_image:
1885f3c2af7bSAlexei Starovoitov 			image = NULL;
1886f3c2af7bSAlexei Starovoitov 			if (header)
1887738cbe72SDaniel Borkmann 				bpf_jit_binary_free(header);
1888959a7579SDaniel Borkmann 			prog = orig_prog;
1889959a7579SDaniel Borkmann 			goto out_addrs;
1890f3c2af7bSAlexei Starovoitov 		}
18910a14842fSEric Dumazet 		if (image) {
1892e0ee9c12SAlexei Starovoitov 			if (proglen != oldproglen) {
1893f3c2af7bSAlexei Starovoitov 				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
1894f3c2af7bSAlexei Starovoitov 				       proglen, oldproglen);
18953aab8884SDaniel Borkmann 				goto out_image;
1896e0ee9c12SAlexei Starovoitov 			}
18970a14842fSEric Dumazet 			break;
18980a14842fSEric Dumazet 		}
18990a14842fSEric Dumazet 		if (proglen == oldproglen) {
19003dec541bSAlexei Starovoitov 			/*
19013dec541bSAlexei Starovoitov 			 * The number of entries in extable is the number of BPF_LDX
19023dec541bSAlexei Starovoitov 			 * insns that access kernel memory via "pointer to BTF type".
19033dec541bSAlexei Starovoitov 			 * The verifier changed their opcode from LDX|MEM|size
19043dec541bSAlexei Starovoitov 			 * to LDX|PROBE_MEM|size to make JITing easier.
19053dec541bSAlexei Starovoitov 			 */
19063dec541bSAlexei Starovoitov 			u32 align = __alignof__(struct exception_table_entry);
19073dec541bSAlexei Starovoitov 			u32 extable_size = prog->aux->num_exentries *
19083dec541bSAlexei Starovoitov 				sizeof(struct exception_table_entry);
19093dec541bSAlexei Starovoitov 
19103dec541bSAlexei Starovoitov 			/* allocate module memory for x86 insns and extable */
19113dec541bSAlexei Starovoitov 			header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
19123dec541bSAlexei Starovoitov 						      &image, align, jit_fill_hole);
1913959a7579SDaniel Borkmann 			if (!header) {
1914959a7579SDaniel Borkmann 				prog = orig_prog;
1915959a7579SDaniel Borkmann 				goto out_addrs;
1916959a7579SDaniel Borkmann 			}
19173dec541bSAlexei Starovoitov 			prog->aux->extable = (void *) image + roundup(proglen, align);
19180a14842fSEric Dumazet 		}
19190a14842fSEric Dumazet 		oldproglen = proglen;
19206007b080SDaniel Borkmann 		cond_resched();
19210a14842fSEric Dumazet 	}
192279617801SDaniel Borkmann 
19230a14842fSEric Dumazet 	if (bpf_jit_enable > 1)
1924485d6511SDaniel Borkmann 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
19250a14842fSEric Dumazet 
19260a14842fSEric Dumazet 	if (image) {
19271c2a088aSAlexei Starovoitov 		if (!prog->is_func || extra_pass) {
1928428d5df1SDaniel Borkmann 			bpf_tail_call_direct_fixup(prog);
19299d876e79SDaniel Borkmann 			bpf_jit_binary_lock_ro(header);
19301c2a088aSAlexei Starovoitov 		} else {
19311c2a088aSAlexei Starovoitov 			jit_data->addrs = addrs;
19321c2a088aSAlexei Starovoitov 			jit_data->ctx = ctx;
19331c2a088aSAlexei Starovoitov 			jit_data->proglen = proglen;
19341c2a088aSAlexei Starovoitov 			jit_data->image = image;
19351c2a088aSAlexei Starovoitov 			jit_data->header = header;
19361c2a088aSAlexei Starovoitov 		}
1937f3c2af7bSAlexei Starovoitov 		prog->bpf_func = (void *)image;
1938a91263d5SDaniel Borkmann 		prog->jited = 1;
1939783d28ddSMartin KaFai Lau 		prog->jited_len = proglen;
19409d5ecb09SDaniel Borkmann 	} else {
19419d5ecb09SDaniel Borkmann 		prog = orig_prog;
19420a14842fSEric Dumazet 	}
1943959a7579SDaniel Borkmann 
194439f56ca9SDaniel Borkmann 	if (!image || !prog->is_func || extra_pass) {
1945c454a46bSMartin KaFai Lau 		if (image)
19467c2e988fSAlexei Starovoitov 			bpf_prog_fill_jited_linfo(prog, addrs + 1);
1947959a7579SDaniel Borkmann out_addrs:
19480a14842fSEric Dumazet 		kfree(addrs);
19491c2a088aSAlexei Starovoitov 		kfree(jit_data);
19501c2a088aSAlexei Starovoitov 		prog->aux->jit_data = NULL;
19511c2a088aSAlexei Starovoitov 	}
1952959a7579SDaniel Borkmann out:
1953959a7579SDaniel Borkmann 	if (tmp_blinded)
1954959a7579SDaniel Borkmann 		bpf_jit_prog_release_other(prog, prog == orig_prog ?
1955959a7579SDaniel Borkmann 					   tmp : orig_prog);
1956d1c55ab5SDaniel Borkmann 	return prog;
19570a14842fSEric Dumazet }
1958