xref: /openbmc/linux/arch/x86/net/bpf_jit_comp32.c (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
4  *
5  * Author: Wang YanQing (udknight@gmail.com)
6  * The code based on code and ideas from:
7  * Eric Dumazet (eric.dumazet@gmail.com)
8  * and from:
9  * Shubham Bansal <illusionist.neo@gmail.com>
10  */
11 
12 #include <linux/netdevice.h>
13 #include <linux/filter.h>
14 #include <linux/if_vlan.h>
15 #include <asm/cacheflush.h>
16 #include <asm/set_memory.h>
17 #include <asm/nospec-branch.h>
18 #include <linux/bpf.h>
19 
20 /*
21  * eBPF prog stack layout:
22  *
23  *                         high
24  * original ESP =>        +-----+
25  *                        |     | callee saved registers
26  *                        +-----+
27  *                        | ... | eBPF JIT scratch space
28  * BPF_FP,IA32_EBP  =>    +-----+
29  *                        | ... | eBPF prog stack
30  *                        +-----+
31  *                        |RSVD | JIT scratchpad
32  * current ESP =>         +-----+
33  *                        |     |
34  *                        | ... | Function call stack
35  *                        |     |
36  *                        +-----+
37  *                          low
38  *
39  * The callee saved registers:
40  *
41  *                                high
42  * original ESP =>        +------------------+ \
43  *                        |        ebp       | |
44  * current EBP =>         +------------------+ } callee saved registers
45  *                        |    ebx,esi,edi   | |
46  *                        +------------------+ /
47  *                                low
48  */
49 
50 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
51 {
52 	if (len == 1)
53 		*ptr = bytes;
54 	else if (len == 2)
55 		*(u16 *)ptr = bytes;
56 	else {
57 		*(u32 *)ptr = bytes;
58 		barrier();
59 	}
60 	return ptr + len;
61 }
62 
63 #define EMIT(bytes, len) \
64 	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
65 
66 #define EMIT1(b1)		EMIT(b1, 1)
67 #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
68 #define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
69 #define EMIT4(b1, b2, b3, b4)   \
70 	EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
71 
72 #define EMIT1_off32(b1, off) \
73 	do { EMIT1(b1); EMIT(off, 4); } while (0)
74 #define EMIT2_off32(b1, b2, off) \
75 	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
76 #define EMIT3_off32(b1, b2, b3, off) \
77 	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
78 #define EMIT4_off32(b1, b2, b3, b4, off) \
79 	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
80 
81 #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
82 
83 static bool is_imm8(int value)
84 {
85 	return value <= 127 && value >= -128;
86 }
87 
88 static bool is_simm32(s64 value)
89 {
90 	return value == (s64) (s32) value;
91 }
92 
93 #define STACK_OFFSET(k)	(k)
94 #define TCALL_CNT	(MAX_BPF_JIT_REG + 0)	/* Tail Call Count */
95 
96 #define IA32_EAX	(0x0)
97 #define IA32_EBX	(0x3)
98 #define IA32_ECX	(0x1)
99 #define IA32_EDX	(0x2)
100 #define IA32_ESI	(0x6)
101 #define IA32_EDI	(0x7)
102 #define IA32_EBP	(0x5)
103 #define IA32_ESP	(0x4)
104 
105 /*
106  * List of x86 cond jumps opcodes (. + s8)
107  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
108  */
109 #define IA32_JB  0x72
110 #define IA32_JAE 0x73
111 #define IA32_JE  0x74
112 #define IA32_JNE 0x75
113 #define IA32_JBE 0x76
114 #define IA32_JA  0x77
115 #define IA32_JL  0x7C
116 #define IA32_JGE 0x7D
117 #define IA32_JLE 0x7E
118 #define IA32_JG  0x7F
119 
120 #define COND_JMP_OPCODE_INVALID	(0xFF)
121 
122 /*
123  * Map eBPF registers to IA32 32bit registers or stack scratch space.
124  *
125  * 1. All the registers, R0-R10, are mapped to scratch space on stack.
126  * 2. We need two 64 bit temp registers to do complex operations on eBPF
127  *    registers.
128  * 3. For performance reason, the BPF_REG_AX for blinding constant, is
129  *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
130  *
131  * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
132  * registers, we have to map each eBPF registers with two IA32 32 bit regs
133  * or scratch memory space and we have to build eBPF 64 bit register from those.
134  *
135  * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
136  */
137 static const u8 bpf2ia32[][2] = {
138 	/* Return value from in-kernel function, and exit value from eBPF */
139 	[BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
140 
141 	/* The arguments from eBPF program to in-kernel function */
142 	/* Stored on stack scratch space */
143 	[BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
144 	[BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
145 	[BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
146 	[BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
147 	[BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
148 
149 	/* Callee saved registers that in-kernel function will preserve */
150 	/* Stored on stack scratch space */
151 	[BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
152 	[BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
153 	[BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
154 	[BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
155 
156 	/* Read only Frame Pointer to access Stack */
157 	[BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
158 
159 	/* Temporary register for blinding constants. */
160 	[BPF_REG_AX] = {IA32_ESI, IA32_EDI},
161 
162 	/* Tail call count. Stored on stack scratch space. */
163 	[TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
164 };
165 
166 #define dst_lo	dst[0]
167 #define dst_hi	dst[1]
168 #define src_lo	src[0]
169 #define src_hi	src[1]
170 
171 #define STACK_ALIGNMENT	8
172 /*
173  * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
174  * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
175  * BPF_REG_FP, BPF_REG_AX and Tail call counts.
176  */
177 #define SCRATCH_SIZE 96
178 
179 /* Total stack size used in JITed code */
180 #define _STACK_SIZE	(stack_depth + SCRATCH_SIZE)
181 
182 #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
183 
184 /* Get the offset of eBPF REGISTERs stored on scratch space. */
185 #define STACK_VAR(off) (off)
186 
187 /* Encode 'dst_reg' register into IA32 opcode 'byte' */
188 static u8 add_1reg(u8 byte, u32 dst_reg)
189 {
190 	return byte + dst_reg;
191 }
192 
193 /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
194 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
195 {
196 	return byte + dst_reg + (src_reg << 3);
197 }
198 
199 static void jit_fill_hole(void *area, unsigned int size)
200 {
201 	/* Fill whole space with int3 instructions */
202 	memset(area, 0xcc, size);
203 }
204 
205 static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
206 				   u8 **pprog)
207 {
208 	u8 *prog = *pprog;
209 	int cnt = 0;
210 
211 	if (dstk) {
212 		if (val == 0) {
213 			/* xor eax,eax */
214 			EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
215 			/* mov dword ptr [ebp+off],eax */
216 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
217 			      STACK_VAR(dst));
218 		} else {
219 			EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
220 				    STACK_VAR(dst), val);
221 		}
222 	} else {
223 		if (val == 0)
224 			EMIT2(0x33, add_2reg(0xC0, dst, dst));
225 		else
226 			EMIT2_off32(0xC7, add_1reg(0xC0, dst),
227 				    val);
228 	}
229 	*pprog = prog;
230 }
231 
232 /* dst = imm (4 bytes)*/
233 static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
234 				   bool sstk, u8 **pprog)
235 {
236 	u8 *prog = *pprog;
237 	int cnt = 0;
238 	u8 sreg = sstk ? IA32_EAX : src;
239 
240 	if (sstk)
241 		/* mov eax,dword ptr [ebp+off] */
242 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
243 	if (dstk)
244 		/* mov dword ptr [ebp+off],eax */
245 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
246 	else
247 		/* mov dst,sreg */
248 		EMIT2(0x89, add_2reg(0xC0, dst, sreg));
249 
250 	*pprog = prog;
251 }
252 
253 /* dst = src */
254 static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
255 				     const u8 src[], bool dstk,
256 				     bool sstk, u8 **pprog)
257 {
258 	emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
259 	if (is64)
260 		/* complete 8 byte move */
261 		emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
262 	else
263 		/* zero out high 4 bytes */
264 		emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
265 }
266 
267 /* Sign extended move */
268 static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
269 				     const u32 val, bool dstk, u8 **pprog)
270 {
271 	u32 hi = 0;
272 
273 	if (is64 && (val & (1<<31)))
274 		hi = (u32)~0;
275 	emit_ia32_mov_i(dst_lo, val, dstk, pprog);
276 	emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
277 }
278 
279 /*
280  * ALU operation (32 bit)
281  * dst = dst * src
282  */
283 static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
284 				   bool sstk, u8 **pprog)
285 {
286 	u8 *prog = *pprog;
287 	int cnt = 0;
288 	u8 sreg = sstk ? IA32_ECX : src;
289 
290 	if (sstk)
291 		/* mov ecx,dword ptr [ebp+off] */
292 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
293 
294 	if (dstk)
295 		/* mov eax,dword ptr [ebp+off] */
296 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
297 	else
298 		/* mov eax,dst */
299 		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
300 
301 
302 	EMIT2(0xF7, add_1reg(0xE0, sreg));
303 
304 	if (dstk)
305 		/* mov dword ptr [ebp+off],eax */
306 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
307 		      STACK_VAR(dst));
308 	else
309 		/* mov dst,eax */
310 		EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
311 
312 	*pprog = prog;
313 }
314 
315 static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
316 					 bool dstk, u8 **pprog)
317 {
318 	u8 *prog = *pprog;
319 	int cnt = 0;
320 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
321 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
322 
323 	if (dstk && val != 64) {
324 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
325 		      STACK_VAR(dst_lo));
326 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
327 		      STACK_VAR(dst_hi));
328 	}
329 	switch (val) {
330 	case 16:
331 		/*
332 		 * Emit 'movzwl eax,ax' to zero extend 16-bit
333 		 * into 64 bit
334 		 */
335 		EMIT2(0x0F, 0xB7);
336 		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
337 		/* xor dreg_hi,dreg_hi */
338 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
339 		break;
340 	case 32:
341 		/* xor dreg_hi,dreg_hi */
342 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
343 		break;
344 	case 64:
345 		/* nop */
346 		break;
347 	}
348 
349 	if (dstk && val != 64) {
350 		/* mov dword ptr [ebp+off],dreg_lo */
351 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
352 		      STACK_VAR(dst_lo));
353 		/* mov dword ptr [ebp+off],dreg_hi */
354 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
355 		      STACK_VAR(dst_hi));
356 	}
357 	*pprog = prog;
358 }
359 
360 static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
361 				       bool dstk, u8 **pprog)
362 {
363 	u8 *prog = *pprog;
364 	int cnt = 0;
365 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
366 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
367 
368 	if (dstk) {
369 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
370 		      STACK_VAR(dst_lo));
371 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
372 		      STACK_VAR(dst_hi));
373 	}
374 	switch (val) {
375 	case 16:
376 		/* Emit 'ror %ax, 8' to swap lower 2 bytes */
377 		EMIT1(0x66);
378 		EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
379 
380 		EMIT2(0x0F, 0xB7);
381 		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
382 
383 		/* xor dreg_hi,dreg_hi */
384 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
385 		break;
386 	case 32:
387 		/* Emit 'bswap eax' to swap lower 4 bytes */
388 		EMIT1(0x0F);
389 		EMIT1(add_1reg(0xC8, dreg_lo));
390 
391 		/* xor dreg_hi,dreg_hi */
392 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
393 		break;
394 	case 64:
395 		/* Emit 'bswap eax' to swap lower 4 bytes */
396 		EMIT1(0x0F);
397 		EMIT1(add_1reg(0xC8, dreg_lo));
398 
399 		/* Emit 'bswap edx' to swap lower 4 bytes */
400 		EMIT1(0x0F);
401 		EMIT1(add_1reg(0xC8, dreg_hi));
402 
403 		/* mov ecx,dreg_hi */
404 		EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
405 		/* mov dreg_hi,dreg_lo */
406 		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
407 		/* mov dreg_lo,ecx */
408 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
409 
410 		break;
411 	}
412 	if (dstk) {
413 		/* mov dword ptr [ebp+off],dreg_lo */
414 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
415 		      STACK_VAR(dst_lo));
416 		/* mov dword ptr [ebp+off],dreg_hi */
417 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
418 		      STACK_VAR(dst_hi));
419 	}
420 	*pprog = prog;
421 }
422 
423 /*
424  * ALU operation (32 bit)
425  * dst = dst (div|mod) src
426  */
427 static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
428 				       bool dstk, bool sstk, u8 **pprog)
429 {
430 	u8 *prog = *pprog;
431 	int cnt = 0;
432 
433 	if (sstk)
434 		/* mov ecx,dword ptr [ebp+off] */
435 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
436 		      STACK_VAR(src));
437 	else if (src != IA32_ECX)
438 		/* mov ecx,src */
439 		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
440 
441 	if (dstk)
442 		/* mov eax,dword ptr [ebp+off] */
443 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
444 		      STACK_VAR(dst));
445 	else
446 		/* mov eax,dst */
447 		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
448 
449 	/* xor edx,edx */
450 	EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
451 	/* div ecx */
452 	EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
453 
454 	if (op == BPF_MOD) {
455 		if (dstk)
456 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
457 			      STACK_VAR(dst));
458 		else
459 			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
460 	} else {
461 		if (dstk)
462 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
463 			      STACK_VAR(dst));
464 		else
465 			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
466 	}
467 	*pprog = prog;
468 }
469 
470 /*
471  * ALU operation (32 bit)
472  * dst = dst (shift) src
473  */
474 static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
475 				     bool dstk, bool sstk, u8 **pprog)
476 {
477 	u8 *prog = *pprog;
478 	int cnt = 0;
479 	u8 dreg = dstk ? IA32_EAX : dst;
480 	u8 b2;
481 
482 	if (dstk)
483 		/* mov eax,dword ptr [ebp+off] */
484 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
485 
486 	if (sstk)
487 		/* mov ecx,dword ptr [ebp+off] */
488 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
489 	else if (src != IA32_ECX)
490 		/* mov ecx,src */
491 		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
492 
493 	switch (op) {
494 	case BPF_LSH:
495 		b2 = 0xE0; break;
496 	case BPF_RSH:
497 		b2 = 0xE8; break;
498 	case BPF_ARSH:
499 		b2 = 0xF8; break;
500 	default:
501 		return;
502 	}
503 	EMIT2(0xD3, add_1reg(b2, dreg));
504 
505 	if (dstk)
506 		/* mov dword ptr [ebp+off],dreg */
507 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
508 	*pprog = prog;
509 }
510 
511 /*
512  * ALU operation (32 bit)
513  * dst = dst (op) src
514  */
515 static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
516 				   const u8 dst, const u8 src, bool dstk,
517 				   bool sstk, u8 **pprog)
518 {
519 	u8 *prog = *pprog;
520 	int cnt = 0;
521 	u8 sreg = sstk ? IA32_EAX : src;
522 	u8 dreg = dstk ? IA32_EDX : dst;
523 
524 	if (sstk)
525 		/* mov eax,dword ptr [ebp+off] */
526 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
527 
528 	if (dstk)
529 		/* mov eax,dword ptr [ebp+off] */
530 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
531 
532 	switch (BPF_OP(op)) {
533 	/* dst = dst + src */
534 	case BPF_ADD:
535 		if (hi && is64)
536 			EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
537 		else
538 			EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
539 		break;
540 	/* dst = dst - src */
541 	case BPF_SUB:
542 		if (hi && is64)
543 			EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
544 		else
545 			EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
546 		break;
547 	/* dst = dst | src */
548 	case BPF_OR:
549 		EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
550 		break;
551 	/* dst = dst & src */
552 	case BPF_AND:
553 		EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
554 		break;
555 	/* dst = dst ^ src */
556 	case BPF_XOR:
557 		EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
558 		break;
559 	}
560 
561 	if (dstk)
562 		/* mov dword ptr [ebp+off],dreg */
563 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
564 		      STACK_VAR(dst));
565 	*pprog = prog;
566 }
567 
568 /* ALU operation (64 bit) */
569 static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
570 				     const u8 dst[], const u8 src[],
571 				     bool dstk,  bool sstk,
572 				     u8 **pprog)
573 {
574 	u8 *prog = *pprog;
575 
576 	emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
577 	if (is64)
578 		emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
579 				&prog);
580 	else
581 		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
582 	*pprog = prog;
583 }
584 
585 /*
586  * ALU operation (32 bit)
587  * dst = dst (op) val
588  */
589 static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
590 				   const u8 dst, const s32 val, bool dstk,
591 				   u8 **pprog)
592 {
593 	u8 *prog = *pprog;
594 	int cnt = 0;
595 	u8 dreg = dstk ? IA32_EAX : dst;
596 	u8 sreg = IA32_EDX;
597 
598 	if (dstk)
599 		/* mov eax,dword ptr [ebp+off] */
600 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
601 
602 	if (!is_imm8(val))
603 		/* mov edx,imm32*/
604 		EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
605 
606 	switch (op) {
607 	/* dst = dst + val */
608 	case BPF_ADD:
609 		if (hi && is64) {
610 			if (is_imm8(val))
611 				EMIT3(0x83, add_1reg(0xD0, dreg), val);
612 			else
613 				EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
614 		} else {
615 			if (is_imm8(val))
616 				EMIT3(0x83, add_1reg(0xC0, dreg), val);
617 			else
618 				EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
619 		}
620 		break;
621 	/* dst = dst - val */
622 	case BPF_SUB:
623 		if (hi && is64) {
624 			if (is_imm8(val))
625 				EMIT3(0x83, add_1reg(0xD8, dreg), val);
626 			else
627 				EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
628 		} else {
629 			if (is_imm8(val))
630 				EMIT3(0x83, add_1reg(0xE8, dreg), val);
631 			else
632 				EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
633 		}
634 		break;
635 	/* dst = dst | val */
636 	case BPF_OR:
637 		if (is_imm8(val))
638 			EMIT3(0x83, add_1reg(0xC8, dreg), val);
639 		else
640 			EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
641 		break;
642 	/* dst = dst & val */
643 	case BPF_AND:
644 		if (is_imm8(val))
645 			EMIT3(0x83, add_1reg(0xE0, dreg), val);
646 		else
647 			EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
648 		break;
649 	/* dst = dst ^ val */
650 	case BPF_XOR:
651 		if (is_imm8(val))
652 			EMIT3(0x83, add_1reg(0xF0, dreg), val);
653 		else
654 			EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
655 		break;
656 	case BPF_NEG:
657 		EMIT2(0xF7, add_1reg(0xD8, dreg));
658 		break;
659 	}
660 
661 	if (dstk)
662 		/* mov dword ptr [ebp+off],dreg */
663 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
664 		      STACK_VAR(dst));
665 	*pprog = prog;
666 }
667 
668 /* ALU operation (64 bit) */
669 static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
670 				     const u8 dst[], const u32 val,
671 				     bool dstk, u8 **pprog)
672 {
673 	u8 *prog = *pprog;
674 	u32 hi = 0;
675 
676 	if (is64 && (val & (1<<31)))
677 		hi = (u32)~0;
678 
679 	emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
680 	if (is64)
681 		emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
682 	else
683 		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
684 
685 	*pprog = prog;
686 }
687 
688 /* dst = ~dst (64 bit) */
689 static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
690 {
691 	u8 *prog = *pprog;
692 	int cnt = 0;
693 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
694 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
695 
696 	if (dstk) {
697 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
698 		      STACK_VAR(dst_lo));
699 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
700 		      STACK_VAR(dst_hi));
701 	}
702 
703 	/* neg dreg_lo */
704 	EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
705 	/* adc dreg_hi,0x0 */
706 	EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
707 	/* neg dreg_hi */
708 	EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
709 
710 	if (dstk) {
711 		/* mov dword ptr [ebp+off],dreg_lo */
712 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
713 		      STACK_VAR(dst_lo));
714 		/* mov dword ptr [ebp+off],dreg_hi */
715 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
716 		      STACK_VAR(dst_hi));
717 	}
718 	*pprog = prog;
719 }
720 
721 /* dst = dst << src */
722 static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
723 				     bool dstk, bool sstk, u8 **pprog)
724 {
725 	u8 *prog = *pprog;
726 	int cnt = 0;
727 	static int jmp_label1 = -1;
728 	static int jmp_label2 = -1;
729 	static int jmp_label3 = -1;
730 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
731 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
732 
733 	if (dstk) {
734 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
735 		      STACK_VAR(dst_lo));
736 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
737 		      STACK_VAR(dst_hi));
738 	}
739 
740 	if (sstk)
741 		/* mov ecx,dword ptr [ebp+off] */
742 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
743 		      STACK_VAR(src_lo));
744 	else
745 		/* mov ecx,src_lo */
746 		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
747 
748 	/* cmp ecx,32 */
749 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
750 	/* Jumps when >= 32 */
751 	if (is_imm8(jmp_label(jmp_label1, 2)))
752 		EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
753 	else
754 		EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
755 
756 	/* < 32 */
757 	/* shl dreg_hi,cl */
758 	EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
759 	/* mov ebx,dreg_lo */
760 	EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
761 	/* shl dreg_lo,cl */
762 	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
763 
764 	/* IA32_ECX = -IA32_ECX + 32 */
765 	/* neg ecx */
766 	EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
767 	/* add ecx,32 */
768 	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
769 
770 	/* shr ebx,cl */
771 	EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
772 	/* or dreg_hi,ebx */
773 	EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
774 
775 	/* goto out; */
776 	if (is_imm8(jmp_label(jmp_label3, 2)))
777 		EMIT2(0xEB, jmp_label(jmp_label3, 2));
778 	else
779 		EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
780 
781 	/* >= 32 */
782 	if (jmp_label1 == -1)
783 		jmp_label1 = cnt;
784 
785 	/* cmp ecx,64 */
786 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
787 	/* Jumps when >= 64 */
788 	if (is_imm8(jmp_label(jmp_label2, 2)))
789 		EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
790 	else
791 		EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
792 
793 	/* >= 32 && < 64 */
794 	/* sub ecx,32 */
795 	EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
796 	/* shl dreg_lo,cl */
797 	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
798 	/* mov dreg_hi,dreg_lo */
799 	EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
800 
801 	/* xor dreg_lo,dreg_lo */
802 	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
803 
804 	/* goto out; */
805 	if (is_imm8(jmp_label(jmp_label3, 2)))
806 		EMIT2(0xEB, jmp_label(jmp_label3, 2));
807 	else
808 		EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
809 
810 	/* >= 64 */
811 	if (jmp_label2 == -1)
812 		jmp_label2 = cnt;
813 	/* xor dreg_lo,dreg_lo */
814 	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
815 	/* xor dreg_hi,dreg_hi */
816 	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
817 
818 	if (jmp_label3 == -1)
819 		jmp_label3 = cnt;
820 
821 	if (dstk) {
822 		/* mov dword ptr [ebp+off],dreg_lo */
823 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
824 		      STACK_VAR(dst_lo));
825 		/* mov dword ptr [ebp+off],dreg_hi */
826 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
827 		      STACK_VAR(dst_hi));
828 	}
829 	/* out: */
830 	*pprog = prog;
831 }
832 
833 /* dst = dst >> src (signed)*/
834 static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
835 				      bool dstk, bool sstk, u8 **pprog)
836 {
837 	u8 *prog = *pprog;
838 	int cnt = 0;
839 	static int jmp_label1 = -1;
840 	static int jmp_label2 = -1;
841 	static int jmp_label3 = -1;
842 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
843 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
844 
845 	if (dstk) {
846 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
847 		      STACK_VAR(dst_lo));
848 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
849 		      STACK_VAR(dst_hi));
850 	}
851 
852 	if (sstk)
853 		/* mov ecx,dword ptr [ebp+off] */
854 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
855 		      STACK_VAR(src_lo));
856 	else
857 		/* mov ecx,src_lo */
858 		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
859 
860 	/* cmp ecx,32 */
861 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
862 	/* Jumps when >= 32 */
863 	if (is_imm8(jmp_label(jmp_label1, 2)))
864 		EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
865 	else
866 		EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
867 
868 	/* < 32 */
869 	/* lshr dreg_lo,cl */
870 	EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
871 	/* mov ebx,dreg_hi */
872 	EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
873 	/* ashr dreg_hi,cl */
874 	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
875 
876 	/* IA32_ECX = -IA32_ECX + 32 */
877 	/* neg ecx */
878 	EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
879 	/* add ecx,32 */
880 	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
881 
882 	/* shl ebx,cl */
883 	EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
884 	/* or dreg_lo,ebx */
885 	EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
886 
887 	/* goto out; */
888 	if (is_imm8(jmp_label(jmp_label3, 2)))
889 		EMIT2(0xEB, jmp_label(jmp_label3, 2));
890 	else
891 		EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
892 
893 	/* >= 32 */
894 	if (jmp_label1 == -1)
895 		jmp_label1 = cnt;
896 
897 	/* cmp ecx,64 */
898 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
899 	/* Jumps when >= 64 */
900 	if (is_imm8(jmp_label(jmp_label2, 2)))
901 		EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
902 	else
903 		EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
904 
905 	/* >= 32 && < 64 */
906 	/* sub ecx,32 */
907 	EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
908 	/* ashr dreg_hi,cl */
909 	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
910 	/* mov dreg_lo,dreg_hi */
911 	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
912 
913 	/* ashr dreg_hi,imm8 */
914 	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
915 
916 	/* goto out; */
917 	if (is_imm8(jmp_label(jmp_label3, 2)))
918 		EMIT2(0xEB, jmp_label(jmp_label3, 2));
919 	else
920 		EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
921 
922 	/* >= 64 */
923 	if (jmp_label2 == -1)
924 		jmp_label2 = cnt;
925 	/* ashr dreg_hi,imm8 */
926 	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
927 	/* mov dreg_lo,dreg_hi */
928 	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
929 
930 	if (jmp_label3 == -1)
931 		jmp_label3 = cnt;
932 
933 	if (dstk) {
934 		/* mov dword ptr [ebp+off],dreg_lo */
935 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
936 		      STACK_VAR(dst_lo));
937 		/* mov dword ptr [ebp+off],dreg_hi */
938 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
939 		      STACK_VAR(dst_hi));
940 	}
941 	/* out: */
942 	*pprog = prog;
943 }
944 
945 /* dst = dst >> src */
946 static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
947 				     bool sstk, u8 **pprog)
948 {
949 	u8 *prog = *pprog;
950 	int cnt = 0;
951 	static int jmp_label1 = -1;
952 	static int jmp_label2 = -1;
953 	static int jmp_label3 = -1;
954 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
955 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
956 
957 	if (dstk) {
958 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
959 		      STACK_VAR(dst_lo));
960 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
961 		      STACK_VAR(dst_hi));
962 	}
963 
964 	if (sstk)
965 		/* mov ecx,dword ptr [ebp+off] */
966 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
967 		      STACK_VAR(src_lo));
968 	else
969 		/* mov ecx,src_lo */
970 		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
971 
972 	/* cmp ecx,32 */
973 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
974 	/* Jumps when >= 32 */
975 	if (is_imm8(jmp_label(jmp_label1, 2)))
976 		EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
977 	else
978 		EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
979 
980 	/* < 32 */
981 	/* lshr dreg_lo,cl */
982 	EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
983 	/* mov ebx,dreg_hi */
984 	EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
985 	/* shr dreg_hi,cl */
986 	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
987 
988 	/* IA32_ECX = -IA32_ECX + 32 */
989 	/* neg ecx */
990 	EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
991 	/* add ecx,32 */
992 	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
993 
994 	/* shl ebx,cl */
995 	EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
996 	/* or dreg_lo,ebx */
997 	EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
998 
999 	/* goto out; */
1000 	if (is_imm8(jmp_label(jmp_label3, 2)))
1001 		EMIT2(0xEB, jmp_label(jmp_label3, 2));
1002 	else
1003 		EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1004 
1005 	/* >= 32 */
1006 	if (jmp_label1 == -1)
1007 		jmp_label1 = cnt;
1008 	/* cmp ecx,64 */
1009 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
1010 	/* Jumps when >= 64 */
1011 	if (is_imm8(jmp_label(jmp_label2, 2)))
1012 		EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
1013 	else
1014 		EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
1015 
1016 	/* >= 32 && < 64 */
1017 	/* sub ecx,32 */
1018 	EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
1019 	/* shr dreg_hi,cl */
1020 	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
1021 	/* mov dreg_lo,dreg_hi */
1022 	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1023 	/* xor dreg_hi,dreg_hi */
1024 	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1025 
1026 	/* goto out; */
1027 	if (is_imm8(jmp_label(jmp_label3, 2)))
1028 		EMIT2(0xEB, jmp_label(jmp_label3, 2));
1029 	else
1030 		EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1031 
1032 	/* >= 64 */
1033 	if (jmp_label2 == -1)
1034 		jmp_label2 = cnt;
1035 	/* xor dreg_lo,dreg_lo */
1036 	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1037 	/* xor dreg_hi,dreg_hi */
1038 	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1039 
1040 	if (jmp_label3 == -1)
1041 		jmp_label3 = cnt;
1042 
1043 	if (dstk) {
1044 		/* mov dword ptr [ebp+off],dreg_lo */
1045 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1046 		      STACK_VAR(dst_lo));
1047 		/* mov dword ptr [ebp+off],dreg_hi */
1048 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1049 		      STACK_VAR(dst_hi));
1050 	}
1051 	/* out: */
1052 	*pprog = prog;
1053 }
1054 
1055 /* dst = dst << val */
1056 static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
1057 				     bool dstk, u8 **pprog)
1058 {
1059 	u8 *prog = *pprog;
1060 	int cnt = 0;
1061 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1062 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1063 
1064 	if (dstk) {
1065 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1066 		      STACK_VAR(dst_lo));
1067 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1068 		      STACK_VAR(dst_hi));
1069 	}
1070 	/* Do LSH operation */
1071 	if (val < 32) {
1072 		/* shl dreg_hi,imm8 */
1073 		EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
1074 		/* mov ebx,dreg_lo */
1075 		EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
1076 		/* shl dreg_lo,imm8 */
1077 		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
1078 
1079 		/* IA32_ECX = 32 - val */
1080 		/* mov ecx,val */
1081 		EMIT2(0xB1, val);
1082 		/* movzx ecx,ecx */
1083 		EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1084 		/* neg ecx */
1085 		EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1086 		/* add ecx,32 */
1087 		EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1088 
1089 		/* shr ebx,cl */
1090 		EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
1091 		/* or dreg_hi,ebx */
1092 		EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
1093 	} else if (val >= 32 && val < 64) {
1094 		u32 value = val - 32;
1095 
1096 		/* shl dreg_lo,imm8 */
1097 		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
1098 		/* mov dreg_hi,dreg_lo */
1099 		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
1100 		/* xor dreg_lo,dreg_lo */
1101 		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1102 	} else {
1103 		/* xor dreg_lo,dreg_lo */
1104 		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1105 		/* xor dreg_hi,dreg_hi */
1106 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1107 	}
1108 
1109 	if (dstk) {
1110 		/* mov dword ptr [ebp+off],dreg_lo */
1111 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1112 		      STACK_VAR(dst_lo));
1113 		/* mov dword ptr [ebp+off],dreg_hi */
1114 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1115 		      STACK_VAR(dst_hi));
1116 	}
1117 	*pprog = prog;
1118 }
1119 
1120 /* dst = dst >> val */
1121 static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
1122 				     bool dstk, u8 **pprog)
1123 {
1124 	u8 *prog = *pprog;
1125 	int cnt = 0;
1126 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1127 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1128 
1129 	if (dstk) {
1130 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1131 		      STACK_VAR(dst_lo));
1132 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1133 		      STACK_VAR(dst_hi));
1134 	}
1135 
1136 	/* Do RSH operation */
1137 	if (val < 32) {
1138 		/* shr dreg_lo,imm8 */
1139 		EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1140 		/* mov ebx,dreg_hi */
1141 		EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1142 		/* shr dreg_hi,imm8 */
1143 		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
1144 
1145 		/* IA32_ECX = 32 - val */
1146 		/* mov ecx,val */
1147 		EMIT2(0xB1, val);
1148 		/* movzx ecx,ecx */
1149 		EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1150 		/* neg ecx */
1151 		EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1152 		/* add ecx,32 */
1153 		EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1154 
1155 		/* shl ebx,cl */
1156 		EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1157 		/* or dreg_lo,ebx */
1158 		EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1159 	} else if (val >= 32 && val < 64) {
1160 		u32 value = val - 32;
1161 
1162 		/* shr dreg_hi,imm8 */
1163 		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
1164 		/* mov dreg_lo,dreg_hi */
1165 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1166 		/* xor dreg_hi,dreg_hi */
1167 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1168 	} else {
1169 		/* xor dreg_lo,dreg_lo */
1170 		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1171 		/* xor dreg_hi,dreg_hi */
1172 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1173 	}
1174 
1175 	if (dstk) {
1176 		/* mov dword ptr [ebp+off],dreg_lo */
1177 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1178 		      STACK_VAR(dst_lo));
1179 		/* mov dword ptr [ebp+off],dreg_hi */
1180 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1181 		      STACK_VAR(dst_hi));
1182 	}
1183 	*pprog = prog;
1184 }
1185 
1186 /* dst = dst >> val (signed) */
1187 static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
1188 				      bool dstk, u8 **pprog)
1189 {
1190 	u8 *prog = *pprog;
1191 	int cnt = 0;
1192 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1193 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1194 
1195 	if (dstk) {
1196 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1197 		      STACK_VAR(dst_lo));
1198 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1199 		      STACK_VAR(dst_hi));
1200 	}
1201 	/* Do RSH operation */
1202 	if (val < 32) {
1203 		/* shr dreg_lo,imm8 */
1204 		EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1205 		/* mov ebx,dreg_hi */
1206 		EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1207 		/* ashr dreg_hi,imm8 */
1208 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1209 
1210 		/* IA32_ECX = 32 - val */
1211 		/* mov ecx,val */
1212 		EMIT2(0xB1, val);
1213 		/* movzx ecx,ecx */
1214 		EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1215 		/* neg ecx */
1216 		EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1217 		/* add ecx,32 */
1218 		EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1219 
1220 		/* shl ebx,cl */
1221 		EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1222 		/* or dreg_lo,ebx */
1223 		EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1224 	} else if (val >= 32 && val < 64) {
1225 		u32 value = val - 32;
1226 
1227 		/* ashr dreg_hi,imm8 */
1228 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1229 		/* mov dreg_lo,dreg_hi */
1230 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1231 
1232 		/* ashr dreg_hi,imm8 */
1233 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1234 	} else {
1235 		/* ashr dreg_hi,imm8 */
1236 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1237 		/* mov dreg_lo,dreg_hi */
1238 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1239 	}
1240 
1241 	if (dstk) {
1242 		/* mov dword ptr [ebp+off],dreg_lo */
1243 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1244 		      STACK_VAR(dst_lo));
1245 		/* mov dword ptr [ebp+off],dreg_hi */
1246 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1247 		      STACK_VAR(dst_hi));
1248 	}
1249 	*pprog = prog;
1250 }
1251 
1252 static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1253 				     bool sstk, u8 **pprog)
1254 {
1255 	u8 *prog = *pprog;
1256 	int cnt = 0;
1257 
1258 	if (dstk)
1259 		/* mov eax,dword ptr [ebp+off] */
1260 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1261 		      STACK_VAR(dst_hi));
1262 	else
1263 		/* mov eax,dst_hi */
1264 		EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1265 
1266 	if (sstk)
1267 		/* mul dword ptr [ebp+off] */
1268 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1269 	else
1270 		/* mul src_lo */
1271 		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1272 
1273 	/* mov ecx,eax */
1274 	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1275 
1276 	if (dstk)
1277 		/* mov eax,dword ptr [ebp+off] */
1278 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1279 		      STACK_VAR(dst_lo));
1280 	else
1281 		/* mov eax,dst_lo */
1282 		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1283 
1284 	if (sstk)
1285 		/* mul dword ptr [ebp+off] */
1286 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1287 	else
1288 		/* mul src_hi */
1289 		EMIT2(0xF7, add_1reg(0xE0, src_hi));
1290 
1291 	/* add eax,eax */
1292 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1293 
1294 	if (dstk)
1295 		/* mov eax,dword ptr [ebp+off] */
1296 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1297 		      STACK_VAR(dst_lo));
1298 	else
1299 		/* mov eax,dst_lo */
1300 		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1301 
1302 	if (sstk)
1303 		/* mul dword ptr [ebp+off] */
1304 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1305 	else
1306 		/* mul src_lo */
1307 		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1308 
1309 	/* add ecx,edx */
1310 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1311 
1312 	if (dstk) {
1313 		/* mov dword ptr [ebp+off],eax */
1314 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1315 		      STACK_VAR(dst_lo));
1316 		/* mov dword ptr [ebp+off],ecx */
1317 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1318 		      STACK_VAR(dst_hi));
1319 	} else {
1320 		/* mov dst_lo,eax */
1321 		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1322 		/* mov dst_hi,ecx */
1323 		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1324 	}
1325 
1326 	*pprog = prog;
1327 }
1328 
1329 static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1330 				     bool dstk, u8 **pprog)
1331 {
1332 	u8 *prog = *pprog;
1333 	int cnt = 0;
1334 	u32 hi;
1335 
1336 	hi = val & (1<<31) ? (u32)~0 : 0;
1337 	/* movl eax,imm32 */
1338 	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1339 	if (dstk)
1340 		/* mul dword ptr [ebp+off] */
1341 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1342 	else
1343 		/* mul dst_hi */
1344 		EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1345 
1346 	/* mov ecx,eax */
1347 	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1348 
1349 	/* movl eax,imm32 */
1350 	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1351 	if (dstk)
1352 		/* mul dword ptr [ebp+off] */
1353 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1354 	else
1355 		/* mul dst_lo */
1356 		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1357 	/* add ecx,eax */
1358 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1359 
1360 	/* movl eax,imm32 */
1361 	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1362 	if (dstk)
1363 		/* mul dword ptr [ebp+off] */
1364 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1365 	else
1366 		/* mul dst_lo */
1367 		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1368 
1369 	/* add ecx,edx */
1370 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1371 
1372 	if (dstk) {
1373 		/* mov dword ptr [ebp+off],eax */
1374 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1375 		      STACK_VAR(dst_lo));
1376 		/* mov dword ptr [ebp+off],ecx */
1377 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1378 		      STACK_VAR(dst_hi));
1379 	} else {
1380 		/* mov dword ptr [ebp+off],eax */
1381 		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1382 		/* mov dword ptr [ebp+off],ecx */
1383 		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1384 	}
1385 
1386 	*pprog = prog;
1387 }
1388 
1389 static int bpf_size_to_x86_bytes(int bpf_size)
1390 {
1391 	if (bpf_size == BPF_W)
1392 		return 4;
1393 	else if (bpf_size == BPF_H)
1394 		return 2;
1395 	else if (bpf_size == BPF_B)
1396 		return 1;
1397 	else if (bpf_size == BPF_DW)
1398 		return 4; /* imm32 */
1399 	else
1400 		return 0;
1401 }
1402 
1403 struct jit_context {
1404 	int cleanup_addr; /* Epilogue code offset */
1405 };
1406 
1407 /* Maximum number of bytes emitted while JITing one eBPF insn */
1408 #define BPF_MAX_INSN_SIZE	128
1409 #define BPF_INSN_SAFETY		64
1410 
1411 #define PROLOGUE_SIZE 35
1412 
1413 /*
1414  * Emit prologue code for BPF program and check it's size.
1415  * bpf_tail_call helper will skip it while jumping into another program.
1416  */
1417 static void emit_prologue(u8 **pprog, u32 stack_depth)
1418 {
1419 	u8 *prog = *pprog;
1420 	int cnt = 0;
1421 	const u8 *r1 = bpf2ia32[BPF_REG_1];
1422 	const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1423 	const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1424 	const u8 *tcc = bpf2ia32[TCALL_CNT];
1425 
1426 	/* push ebp */
1427 	EMIT1(0x55);
1428 	/* mov ebp,esp */
1429 	EMIT2(0x89, 0xE5);
1430 	/* push edi */
1431 	EMIT1(0x57);
1432 	/* push esi */
1433 	EMIT1(0x56);
1434 	/* push ebx */
1435 	EMIT1(0x53);
1436 
1437 	/* sub esp,STACK_SIZE */
1438 	EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1439 	/* sub ebp,SCRATCH_SIZE+12*/
1440 	EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1441 	/* xor ebx,ebx */
1442 	EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1443 
1444 	/* Set up BPF prog stack base register */
1445 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1446 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1447 
1448 	/* Move BPF_CTX (EAX) to BPF_REG_R1 */
1449 	/* mov dword ptr [ebp+off],eax */
1450 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1451 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1452 
1453 	/* Initialize Tail Count */
1454 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1455 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1456 
1457 	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1458 	*pprog = prog;
1459 }
1460 
1461 /* Emit epilogue code for BPF program */
1462 static void emit_epilogue(u8 **pprog, u32 stack_depth)
1463 {
1464 	u8 *prog = *pprog;
1465 	const u8 *r0 = bpf2ia32[BPF_REG_0];
1466 	int cnt = 0;
1467 
1468 	/* mov eax,dword ptr [ebp+off]*/
1469 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1470 	/* mov edx,dword ptr [ebp+off]*/
1471 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1472 
1473 	/* add ebp,SCRATCH_SIZE+12*/
1474 	EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1475 
1476 	/* mov ebx,dword ptr [ebp-12]*/
1477 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1478 	/* mov esi,dword ptr [ebp-8]*/
1479 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1480 	/* mov edi,dword ptr [ebp-4]*/
1481 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1482 
1483 	EMIT1(0xC9); /* leave */
1484 	EMIT1(0xC3); /* ret */
1485 	*pprog = prog;
1486 }
1487 
1488 /*
1489  * Generate the following code:
1490  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1491  *   if (index >= array->map.max_entries)
1492  *     goto out;
1493  *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1494  *     goto out;
1495  *   prog = array->ptrs[index];
1496  *   if (prog == NULL)
1497  *     goto out;
1498  *   goto *(prog->bpf_func + prologue_size);
1499  * out:
1500  */
1501 static void emit_bpf_tail_call(u8 **pprog)
1502 {
1503 	u8 *prog = *pprog;
1504 	int cnt = 0;
1505 	const u8 *r1 = bpf2ia32[BPF_REG_1];
1506 	const u8 *r2 = bpf2ia32[BPF_REG_2];
1507 	const u8 *r3 = bpf2ia32[BPF_REG_3];
1508 	const u8 *tcc = bpf2ia32[TCALL_CNT];
1509 	u32 lo, hi;
1510 	static int jmp_label1 = -1;
1511 
1512 	/*
1513 	 * if (index >= array->map.max_entries)
1514 	 *     goto out;
1515 	 */
1516 	/* mov eax,dword ptr [ebp+off] */
1517 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1518 	/* mov edx,dword ptr [ebp+off] */
1519 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1520 
1521 	/* cmp dword ptr [eax+off],edx */
1522 	EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1523 	      offsetof(struct bpf_array, map.max_entries));
1524 	/* jbe out */
1525 	EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1526 
1527 	/*
1528 	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1529 	 *     goto out;
1530 	 */
1531 	lo = (u32)MAX_TAIL_CALL_CNT;
1532 	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1533 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1534 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1535 
1536 	/* cmp edx,hi */
1537 	EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1538 	EMIT2(IA32_JNE, 3);
1539 	/* cmp ecx,lo */
1540 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1541 
1542 	/* ja out */
1543 	EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1544 
1545 	/* add eax,0x1 */
1546 	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1547 	/* adc ebx,0x0 */
1548 	EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1549 
1550 	/* mov dword ptr [ebp+off],eax */
1551 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1552 	/* mov dword ptr [ebp+off],edx */
1553 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1554 
1555 	/* prog = array->ptrs[index]; */
1556 	/* mov edx, [eax + edx * 4 + offsetof(...)] */
1557 	EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1558 
1559 	/*
1560 	 * if (prog == NULL)
1561 	 *     goto out;
1562 	 */
1563 	/* test edx,edx */
1564 	EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1565 	/* je out */
1566 	EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1567 
1568 	/* goto *(prog->bpf_func + prologue_size); */
1569 	/* mov edx, dword ptr [edx + 32] */
1570 	EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1571 	      offsetof(struct bpf_prog, bpf_func));
1572 	/* add edx,prologue_size */
1573 	EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1574 
1575 	/* mov eax,dword ptr [ebp+off] */
1576 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1577 
1578 	/*
1579 	 * Now we're ready to jump into next BPF program:
1580 	 * eax == ctx (1st arg)
1581 	 * edx == prog->bpf_func + prologue_size
1582 	 */
1583 	RETPOLINE_EDX_BPF_JIT();
1584 
1585 	if (jmp_label1 == -1)
1586 		jmp_label1 = cnt;
1587 
1588 	/* out: */
1589 	*pprog = prog;
1590 }
1591 
1592 /* Push the scratch stack register on top of the stack. */
1593 static inline void emit_push_r64(const u8 src[], u8 **pprog)
1594 {
1595 	u8 *prog = *pprog;
1596 	int cnt = 0;
1597 
1598 	/* mov ecx,dword ptr [ebp+off] */
1599 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1600 	/* push ecx */
1601 	EMIT1(0x51);
1602 
1603 	/* mov ecx,dword ptr [ebp+off] */
1604 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1605 	/* push ecx */
1606 	EMIT1(0x51);
1607 
1608 	*pprog = prog;
1609 }
1610 
1611 static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
1612 {
1613 	u8 jmp_cond;
1614 
1615 	/* Convert BPF opcode to x86 */
1616 	switch (op) {
1617 	case BPF_JEQ:
1618 		jmp_cond = IA32_JE;
1619 		break;
1620 	case BPF_JSET:
1621 	case BPF_JNE:
1622 		jmp_cond = IA32_JNE;
1623 		break;
1624 	case BPF_JGT:
1625 		/* GT is unsigned '>', JA in x86 */
1626 		jmp_cond = IA32_JA;
1627 		break;
1628 	case BPF_JLT:
1629 		/* LT is unsigned '<', JB in x86 */
1630 		jmp_cond = IA32_JB;
1631 		break;
1632 	case BPF_JGE:
1633 		/* GE is unsigned '>=', JAE in x86 */
1634 		jmp_cond = IA32_JAE;
1635 		break;
1636 	case BPF_JLE:
1637 		/* LE is unsigned '<=', JBE in x86 */
1638 		jmp_cond = IA32_JBE;
1639 		break;
1640 	case BPF_JSGT:
1641 		if (!is_cmp_lo)
1642 			/* Signed '>', GT in x86 */
1643 			jmp_cond = IA32_JG;
1644 		else
1645 			/* GT is unsigned '>', JA in x86 */
1646 			jmp_cond = IA32_JA;
1647 		break;
1648 	case BPF_JSLT:
1649 		if (!is_cmp_lo)
1650 			/* Signed '<', LT in x86 */
1651 			jmp_cond = IA32_JL;
1652 		else
1653 			/* LT is unsigned '<', JB in x86 */
1654 			jmp_cond = IA32_JB;
1655 		break;
1656 	case BPF_JSGE:
1657 		if (!is_cmp_lo)
1658 			/* Signed '>=', GE in x86 */
1659 			jmp_cond = IA32_JGE;
1660 		else
1661 			/* GE is unsigned '>=', JAE in x86 */
1662 			jmp_cond = IA32_JAE;
1663 		break;
1664 	case BPF_JSLE:
1665 		if (!is_cmp_lo)
1666 			/* Signed '<=', LE in x86 */
1667 			jmp_cond = IA32_JLE;
1668 		else
1669 			/* LE is unsigned '<=', JBE in x86 */
1670 			jmp_cond = IA32_JBE;
1671 		break;
1672 	default: /* to silence GCC warning */
1673 		jmp_cond = COND_JMP_OPCODE_INVALID;
1674 		break;
1675 	}
1676 
1677 	return jmp_cond;
1678 }
1679 
1680 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1681 		  int oldproglen, struct jit_context *ctx)
1682 {
1683 	struct bpf_insn *insn = bpf_prog->insnsi;
1684 	int insn_cnt = bpf_prog->len;
1685 	bool seen_exit = false;
1686 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1687 	int i, cnt = 0;
1688 	int proglen = 0;
1689 	u8 *prog = temp;
1690 
1691 	emit_prologue(&prog, bpf_prog->aux->stack_depth);
1692 
1693 	for (i = 0; i < insn_cnt; i++, insn++) {
1694 		const s32 imm32 = insn->imm;
1695 		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1696 		const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
1697 		const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
1698 		const u8 code = insn->code;
1699 		const u8 *dst = bpf2ia32[insn->dst_reg];
1700 		const u8 *src = bpf2ia32[insn->src_reg];
1701 		const u8 *r0 = bpf2ia32[BPF_REG_0];
1702 		s64 jmp_offset;
1703 		u8 jmp_cond;
1704 		int ilen;
1705 		u8 *func;
1706 
1707 		switch (code) {
1708 		/* ALU operations */
1709 		/* dst = src */
1710 		case BPF_ALU | BPF_MOV | BPF_K:
1711 		case BPF_ALU | BPF_MOV | BPF_X:
1712 		case BPF_ALU64 | BPF_MOV | BPF_K:
1713 		case BPF_ALU64 | BPF_MOV | BPF_X:
1714 			switch (BPF_SRC(code)) {
1715 			case BPF_X:
1716 				emit_ia32_mov_r64(is64, dst, src, dstk,
1717 						  sstk, &prog);
1718 				break;
1719 			case BPF_K:
1720 				/* Sign-extend immediate value to dst reg */
1721 				emit_ia32_mov_i64(is64, dst, imm32,
1722 						  dstk, &prog);
1723 				break;
1724 			}
1725 			break;
1726 		/* dst = dst + src/imm */
1727 		/* dst = dst - src/imm */
1728 		/* dst = dst | src/imm */
1729 		/* dst = dst & src/imm */
1730 		/* dst = dst ^ src/imm */
1731 		/* dst = dst * src/imm */
1732 		/* dst = dst << src */
1733 		/* dst = dst >> src */
1734 		case BPF_ALU | BPF_ADD | BPF_K:
1735 		case BPF_ALU | BPF_ADD | BPF_X:
1736 		case BPF_ALU | BPF_SUB | BPF_K:
1737 		case BPF_ALU | BPF_SUB | BPF_X:
1738 		case BPF_ALU | BPF_OR | BPF_K:
1739 		case BPF_ALU | BPF_OR | BPF_X:
1740 		case BPF_ALU | BPF_AND | BPF_K:
1741 		case BPF_ALU | BPF_AND | BPF_X:
1742 		case BPF_ALU | BPF_XOR | BPF_K:
1743 		case BPF_ALU | BPF_XOR | BPF_X:
1744 		case BPF_ALU64 | BPF_ADD | BPF_K:
1745 		case BPF_ALU64 | BPF_ADD | BPF_X:
1746 		case BPF_ALU64 | BPF_SUB | BPF_K:
1747 		case BPF_ALU64 | BPF_SUB | BPF_X:
1748 		case BPF_ALU64 | BPF_OR | BPF_K:
1749 		case BPF_ALU64 | BPF_OR | BPF_X:
1750 		case BPF_ALU64 | BPF_AND | BPF_K:
1751 		case BPF_ALU64 | BPF_AND | BPF_X:
1752 		case BPF_ALU64 | BPF_XOR | BPF_K:
1753 		case BPF_ALU64 | BPF_XOR | BPF_X:
1754 			switch (BPF_SRC(code)) {
1755 			case BPF_X:
1756 				emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1757 						  src, dstk, sstk, &prog);
1758 				break;
1759 			case BPF_K:
1760 				emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1761 						  imm32, dstk, &prog);
1762 				break;
1763 			}
1764 			break;
1765 		case BPF_ALU | BPF_MUL | BPF_K:
1766 		case BPF_ALU | BPF_MUL | BPF_X:
1767 			switch (BPF_SRC(code)) {
1768 			case BPF_X:
1769 				emit_ia32_mul_r(dst_lo, src_lo, dstk,
1770 						sstk, &prog);
1771 				break;
1772 			case BPF_K:
1773 				/* mov ecx,imm32*/
1774 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1775 					    imm32);
1776 				emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1777 						false, &prog);
1778 				break;
1779 			}
1780 			emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1781 			break;
1782 		case BPF_ALU | BPF_LSH | BPF_X:
1783 		case BPF_ALU | BPF_RSH | BPF_X:
1784 		case BPF_ALU | BPF_ARSH | BPF_K:
1785 		case BPF_ALU | BPF_ARSH | BPF_X:
1786 			switch (BPF_SRC(code)) {
1787 			case BPF_X:
1788 				emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1789 						  dstk, sstk, &prog);
1790 				break;
1791 			case BPF_K:
1792 				/* mov ecx,imm32*/
1793 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1794 					    imm32);
1795 				emit_ia32_shift_r(BPF_OP(code), dst_lo,
1796 						  IA32_ECX, dstk, false,
1797 						  &prog);
1798 				break;
1799 			}
1800 			emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1801 			break;
1802 		/* dst = dst / src(imm) */
1803 		/* dst = dst % src(imm) */
1804 		case BPF_ALU | BPF_DIV | BPF_K:
1805 		case BPF_ALU | BPF_DIV | BPF_X:
1806 		case BPF_ALU | BPF_MOD | BPF_K:
1807 		case BPF_ALU | BPF_MOD | BPF_X:
1808 			switch (BPF_SRC(code)) {
1809 			case BPF_X:
1810 				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1811 						    src_lo, dstk, sstk, &prog);
1812 				break;
1813 			case BPF_K:
1814 				/* mov ecx,imm32*/
1815 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1816 					    imm32);
1817 				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1818 						    IA32_ECX, dstk, false,
1819 						    &prog);
1820 				break;
1821 			}
1822 			emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1823 			break;
1824 		case BPF_ALU64 | BPF_DIV | BPF_K:
1825 		case BPF_ALU64 | BPF_DIV | BPF_X:
1826 		case BPF_ALU64 | BPF_MOD | BPF_K:
1827 		case BPF_ALU64 | BPF_MOD | BPF_X:
1828 			goto notyet;
1829 		/* dst = dst >> imm */
1830 		/* dst = dst << imm */
1831 		case BPF_ALU | BPF_RSH | BPF_K:
1832 		case BPF_ALU | BPF_LSH | BPF_K:
1833 			if (unlikely(imm32 > 31))
1834 				return -EINVAL;
1835 			/* mov ecx,imm32*/
1836 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1837 			emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1838 					  false, &prog);
1839 			emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1840 			break;
1841 		/* dst = dst << imm */
1842 		case BPF_ALU64 | BPF_LSH | BPF_K:
1843 			if (unlikely(imm32 > 63))
1844 				return -EINVAL;
1845 			emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1846 			break;
1847 		/* dst = dst >> imm */
1848 		case BPF_ALU64 | BPF_RSH | BPF_K:
1849 			if (unlikely(imm32 > 63))
1850 				return -EINVAL;
1851 			emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1852 			break;
1853 		/* dst = dst << src */
1854 		case BPF_ALU64 | BPF_LSH | BPF_X:
1855 			emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1856 			break;
1857 		/* dst = dst >> src */
1858 		case BPF_ALU64 | BPF_RSH | BPF_X:
1859 			emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1860 			break;
1861 		/* dst = dst >> src (signed) */
1862 		case BPF_ALU64 | BPF_ARSH | BPF_X:
1863 			emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1864 			break;
1865 		/* dst = dst >> imm (signed) */
1866 		case BPF_ALU64 | BPF_ARSH | BPF_K:
1867 			if (unlikely(imm32 > 63))
1868 				return -EINVAL;
1869 			emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1870 			break;
1871 		/* dst = ~dst */
1872 		case BPF_ALU | BPF_NEG:
1873 			emit_ia32_alu_i(is64, false, BPF_OP(code),
1874 					dst_lo, 0, dstk, &prog);
1875 			emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1876 			break;
1877 		/* dst = ~dst (64 bit) */
1878 		case BPF_ALU64 | BPF_NEG:
1879 			emit_ia32_neg64(dst, dstk, &prog);
1880 			break;
1881 		/* dst = dst * src/imm */
1882 		case BPF_ALU64 | BPF_MUL | BPF_X:
1883 		case BPF_ALU64 | BPF_MUL | BPF_K:
1884 			switch (BPF_SRC(code)) {
1885 			case BPF_X:
1886 				emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1887 				break;
1888 			case BPF_K:
1889 				emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1890 				break;
1891 			}
1892 			break;
1893 		/* dst = htole(dst) */
1894 		case BPF_ALU | BPF_END | BPF_FROM_LE:
1895 			emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
1896 			break;
1897 		/* dst = htobe(dst) */
1898 		case BPF_ALU | BPF_END | BPF_FROM_BE:
1899 			emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
1900 			break;
1901 		/* dst = imm64 */
1902 		case BPF_LD | BPF_IMM | BPF_DW: {
1903 			s32 hi, lo = imm32;
1904 
1905 			hi = insn[1].imm;
1906 			emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1907 			emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1908 			insn++;
1909 			i++;
1910 			break;
1911 		}
1912 		/* ST: *(u8*)(dst_reg + off) = imm */
1913 		case BPF_ST | BPF_MEM | BPF_H:
1914 		case BPF_ST | BPF_MEM | BPF_B:
1915 		case BPF_ST | BPF_MEM | BPF_W:
1916 		case BPF_ST | BPF_MEM | BPF_DW:
1917 			if (dstk)
1918 				/* mov eax,dword ptr [ebp+off] */
1919 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1920 				      STACK_VAR(dst_lo));
1921 			else
1922 				/* mov eax,dst_lo */
1923 				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1924 
1925 			switch (BPF_SIZE(code)) {
1926 			case BPF_B:
1927 				EMIT(0xC6, 1); break;
1928 			case BPF_H:
1929 				EMIT2(0x66, 0xC7); break;
1930 			case BPF_W:
1931 			case BPF_DW:
1932 				EMIT(0xC7, 1); break;
1933 			}
1934 
1935 			if (is_imm8(insn->off))
1936 				EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1937 			else
1938 				EMIT1_off32(add_1reg(0x80, IA32_EAX),
1939 					    insn->off);
1940 			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1941 
1942 			if (BPF_SIZE(code) == BPF_DW) {
1943 				u32 hi;
1944 
1945 				hi = imm32 & (1<<31) ? (u32)~0 : 0;
1946 				EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1947 					    insn->off + 4);
1948 				EMIT(hi, 4);
1949 			}
1950 			break;
1951 
1952 		/* STX: *(u8*)(dst_reg + off) = src_reg */
1953 		case BPF_STX | BPF_MEM | BPF_B:
1954 		case BPF_STX | BPF_MEM | BPF_H:
1955 		case BPF_STX | BPF_MEM | BPF_W:
1956 		case BPF_STX | BPF_MEM | BPF_DW:
1957 			if (dstk)
1958 				/* mov eax,dword ptr [ebp+off] */
1959 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1960 				      STACK_VAR(dst_lo));
1961 			else
1962 				/* mov eax,dst_lo */
1963 				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1964 
1965 			if (sstk)
1966 				/* mov edx,dword ptr [ebp+off] */
1967 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1968 				      STACK_VAR(src_lo));
1969 			else
1970 				/* mov edx,src_lo */
1971 				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1972 
1973 			switch (BPF_SIZE(code)) {
1974 			case BPF_B:
1975 				EMIT(0x88, 1); break;
1976 			case BPF_H:
1977 				EMIT2(0x66, 0x89); break;
1978 			case BPF_W:
1979 			case BPF_DW:
1980 				EMIT(0x89, 1); break;
1981 			}
1982 
1983 			if (is_imm8(insn->off))
1984 				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1985 				      insn->off);
1986 			else
1987 				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1988 					    insn->off);
1989 
1990 			if (BPF_SIZE(code) == BPF_DW) {
1991 				if (sstk)
1992 					/* mov edi,dword ptr [ebp+off] */
1993 					EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1994 							     IA32_EDX),
1995 					      STACK_VAR(src_hi));
1996 				else
1997 					/* mov edi,src_hi */
1998 					EMIT2(0x8B, add_2reg(0xC0, src_hi,
1999 							     IA32_EDX));
2000 				EMIT1(0x89);
2001 				if (is_imm8(insn->off + 4)) {
2002 					EMIT2(add_2reg(0x40, IA32_EAX,
2003 						       IA32_EDX),
2004 					      insn->off + 4);
2005 				} else {
2006 					EMIT1(add_2reg(0x80, IA32_EAX,
2007 						       IA32_EDX));
2008 					EMIT(insn->off + 4, 4);
2009 				}
2010 			}
2011 			break;
2012 
2013 		/* LDX: dst_reg = *(u8*)(src_reg + off) */
2014 		case BPF_LDX | BPF_MEM | BPF_B:
2015 		case BPF_LDX | BPF_MEM | BPF_H:
2016 		case BPF_LDX | BPF_MEM | BPF_W:
2017 		case BPF_LDX | BPF_MEM | BPF_DW:
2018 			if (sstk)
2019 				/* mov eax,dword ptr [ebp+off] */
2020 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2021 				      STACK_VAR(src_lo));
2022 			else
2023 				/* mov eax,dword ptr [ebp+off] */
2024 				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
2025 
2026 			switch (BPF_SIZE(code)) {
2027 			case BPF_B:
2028 				EMIT2(0x0F, 0xB6); break;
2029 			case BPF_H:
2030 				EMIT2(0x0F, 0xB7); break;
2031 			case BPF_W:
2032 			case BPF_DW:
2033 				EMIT(0x8B, 1); break;
2034 			}
2035 
2036 			if (is_imm8(insn->off))
2037 				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
2038 				      insn->off);
2039 			else
2040 				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
2041 					    insn->off);
2042 
2043 			if (dstk)
2044 				/* mov dword ptr [ebp+off],edx */
2045 				EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2046 				      STACK_VAR(dst_lo));
2047 			else
2048 				/* mov dst_lo,edx */
2049 				EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
2050 			switch (BPF_SIZE(code)) {
2051 			case BPF_B:
2052 			case BPF_H:
2053 			case BPF_W:
2054 				if (dstk) {
2055 					EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
2056 					      STACK_VAR(dst_hi));
2057 					EMIT(0x0, 4);
2058 				} else {
2059 					EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
2060 				}
2061 				break;
2062 			case BPF_DW:
2063 				EMIT2_off32(0x8B,
2064 					    add_2reg(0x80, IA32_EAX, IA32_EDX),
2065 					    insn->off + 4);
2066 				if (dstk)
2067 					EMIT3(0x89,
2068 					      add_2reg(0x40, IA32_EBP,
2069 						       IA32_EDX),
2070 					      STACK_VAR(dst_hi));
2071 				else
2072 					EMIT2(0x89,
2073 					      add_2reg(0xC0, dst_hi, IA32_EDX));
2074 				break;
2075 			default:
2076 				break;
2077 			}
2078 			break;
2079 		/* call */
2080 		case BPF_JMP | BPF_CALL:
2081 		{
2082 			const u8 *r1 = bpf2ia32[BPF_REG_1];
2083 			const u8 *r2 = bpf2ia32[BPF_REG_2];
2084 			const u8 *r3 = bpf2ia32[BPF_REG_3];
2085 			const u8 *r4 = bpf2ia32[BPF_REG_4];
2086 			const u8 *r5 = bpf2ia32[BPF_REG_5];
2087 
2088 			if (insn->src_reg == BPF_PSEUDO_CALL)
2089 				goto notyet;
2090 
2091 			func = (u8 *) __bpf_call_base + imm32;
2092 			jmp_offset = func - (image + addrs[i]);
2093 
2094 			if (!imm32 || !is_simm32(jmp_offset)) {
2095 				pr_err("unsupported BPF func %d addr %p image %p\n",
2096 				       imm32, func, image);
2097 				return -EINVAL;
2098 			}
2099 
2100 			/* mov eax,dword ptr [ebp+off] */
2101 			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2102 			      STACK_VAR(r1[0]));
2103 			/* mov edx,dword ptr [ebp+off] */
2104 			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2105 			      STACK_VAR(r1[1]));
2106 
2107 			emit_push_r64(r5, &prog);
2108 			emit_push_r64(r4, &prog);
2109 			emit_push_r64(r3, &prog);
2110 			emit_push_r64(r2, &prog);
2111 
2112 			EMIT1_off32(0xE8, jmp_offset + 9);
2113 
2114 			/* mov dword ptr [ebp+off],eax */
2115 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2116 			      STACK_VAR(r0[0]));
2117 			/* mov dword ptr [ebp+off],edx */
2118 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2119 			      STACK_VAR(r0[1]));
2120 
2121 			/* add esp,32 */
2122 			EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
2123 			break;
2124 		}
2125 		case BPF_JMP | BPF_TAIL_CALL:
2126 			emit_bpf_tail_call(&prog);
2127 			break;
2128 
2129 		/* cond jump */
2130 		case BPF_JMP | BPF_JEQ | BPF_X:
2131 		case BPF_JMP | BPF_JNE | BPF_X:
2132 		case BPF_JMP | BPF_JGT | BPF_X:
2133 		case BPF_JMP | BPF_JLT | BPF_X:
2134 		case BPF_JMP | BPF_JGE | BPF_X:
2135 		case BPF_JMP | BPF_JLE | BPF_X:
2136 		case BPF_JMP32 | BPF_JEQ | BPF_X:
2137 		case BPF_JMP32 | BPF_JNE | BPF_X:
2138 		case BPF_JMP32 | BPF_JGT | BPF_X:
2139 		case BPF_JMP32 | BPF_JLT | BPF_X:
2140 		case BPF_JMP32 | BPF_JGE | BPF_X:
2141 		case BPF_JMP32 | BPF_JLE | BPF_X:
2142 		case BPF_JMP32 | BPF_JSGT | BPF_X:
2143 		case BPF_JMP32 | BPF_JSLE | BPF_X:
2144 		case BPF_JMP32 | BPF_JSLT | BPF_X:
2145 		case BPF_JMP32 | BPF_JSGE | BPF_X: {
2146 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2147 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2148 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2149 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2150 			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2151 
2152 			if (dstk) {
2153 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2154 				      STACK_VAR(dst_lo));
2155 				if (is_jmp64)
2156 					EMIT3(0x8B,
2157 					      add_2reg(0x40, IA32_EBP,
2158 						       IA32_EDX),
2159 					      STACK_VAR(dst_hi));
2160 			}
2161 
2162 			if (sstk) {
2163 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2164 				      STACK_VAR(src_lo));
2165 				if (is_jmp64)
2166 					EMIT3(0x8B,
2167 					      add_2reg(0x40, IA32_EBP,
2168 						       IA32_EBX),
2169 					      STACK_VAR(src_hi));
2170 			}
2171 
2172 			if (is_jmp64) {
2173 				/* cmp dreg_hi,sreg_hi */
2174 				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2175 				EMIT2(IA32_JNE, 2);
2176 			}
2177 			/* cmp dreg_lo,sreg_lo */
2178 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2179 			goto emit_cond_jmp;
2180 		}
2181 		case BPF_JMP | BPF_JSGT | BPF_X:
2182 		case BPF_JMP | BPF_JSLE | BPF_X:
2183 		case BPF_JMP | BPF_JSLT | BPF_X:
2184 		case BPF_JMP | BPF_JSGE | BPF_X: {
2185 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2186 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2187 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2188 			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2189 
2190 			if (dstk) {
2191 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2192 				      STACK_VAR(dst_lo));
2193 				EMIT3(0x8B,
2194 				      add_2reg(0x40, IA32_EBP,
2195 					       IA32_EDX),
2196 				      STACK_VAR(dst_hi));
2197 			}
2198 
2199 			if (sstk) {
2200 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2201 				      STACK_VAR(src_lo));
2202 				EMIT3(0x8B,
2203 				      add_2reg(0x40, IA32_EBP,
2204 					       IA32_EBX),
2205 				      STACK_VAR(src_hi));
2206 			}
2207 
2208 			/* cmp dreg_hi,sreg_hi */
2209 			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2210 			EMIT2(IA32_JNE, 10);
2211 			/* cmp dreg_lo,sreg_lo */
2212 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2213 			goto emit_cond_jmp_signed;
2214 		}
2215 		case BPF_JMP | BPF_JSET | BPF_X:
2216 		case BPF_JMP32 | BPF_JSET | BPF_X: {
2217 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2218 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2219 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2220 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2221 			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2222 
2223 			if (dstk) {
2224 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2225 				      STACK_VAR(dst_lo));
2226 				if (is_jmp64)
2227 					EMIT3(0x8B,
2228 					      add_2reg(0x40, IA32_EBP,
2229 						       IA32_EDX),
2230 					      STACK_VAR(dst_hi));
2231 			}
2232 
2233 			if (sstk) {
2234 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2235 				      STACK_VAR(src_lo));
2236 				if (is_jmp64)
2237 					EMIT3(0x8B,
2238 					      add_2reg(0x40, IA32_EBP,
2239 						       IA32_EBX),
2240 					      STACK_VAR(src_hi));
2241 			}
2242 			/* and dreg_lo,sreg_lo */
2243 			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2244 			/* and dreg_hi,sreg_hi */
2245 			EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2246 			/* or dreg_lo,dreg_hi */
2247 			EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2248 			goto emit_cond_jmp;
2249 		}
2250 		case BPF_JMP | BPF_JSET | BPF_K:
2251 		case BPF_JMP32 | BPF_JSET | BPF_K: {
2252 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2253 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2254 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2255 			u8 sreg_lo = IA32_ECX;
2256 			u8 sreg_hi = IA32_EBX;
2257 			u32 hi;
2258 
2259 			if (dstk) {
2260 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2261 				      STACK_VAR(dst_lo));
2262 				if (is_jmp64)
2263 					EMIT3(0x8B,
2264 					      add_2reg(0x40, IA32_EBP,
2265 						       IA32_EDX),
2266 					      STACK_VAR(dst_hi));
2267 			}
2268 
2269 			/* mov ecx,imm32 */
2270 			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
2271 
2272 			/* and dreg_lo,sreg_lo */
2273 			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2274 			if (is_jmp64) {
2275 				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2276 				/* mov ebx,imm32 */
2277 				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
2278 				/* and dreg_hi,sreg_hi */
2279 				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2280 				/* or dreg_lo,dreg_hi */
2281 				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2282 			}
2283 			goto emit_cond_jmp;
2284 		}
2285 		case BPF_JMP | BPF_JEQ | BPF_K:
2286 		case BPF_JMP | BPF_JNE | BPF_K:
2287 		case BPF_JMP | BPF_JGT | BPF_K:
2288 		case BPF_JMP | BPF_JLT | BPF_K:
2289 		case BPF_JMP | BPF_JGE | BPF_K:
2290 		case BPF_JMP | BPF_JLE | BPF_K:
2291 		case BPF_JMP32 | BPF_JEQ | BPF_K:
2292 		case BPF_JMP32 | BPF_JNE | BPF_K:
2293 		case BPF_JMP32 | BPF_JGT | BPF_K:
2294 		case BPF_JMP32 | BPF_JLT | BPF_K:
2295 		case BPF_JMP32 | BPF_JGE | BPF_K:
2296 		case BPF_JMP32 | BPF_JLE | BPF_K:
2297 		case BPF_JMP32 | BPF_JSGT | BPF_K:
2298 		case BPF_JMP32 | BPF_JSLE | BPF_K:
2299 		case BPF_JMP32 | BPF_JSLT | BPF_K:
2300 		case BPF_JMP32 | BPF_JSGE | BPF_K: {
2301 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2302 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2303 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2304 			u8 sreg_lo = IA32_ECX;
2305 			u8 sreg_hi = IA32_EBX;
2306 			u32 hi;
2307 
2308 			if (dstk) {
2309 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2310 				      STACK_VAR(dst_lo));
2311 				if (is_jmp64)
2312 					EMIT3(0x8B,
2313 					      add_2reg(0x40, IA32_EBP,
2314 						       IA32_EDX),
2315 					      STACK_VAR(dst_hi));
2316 			}
2317 
2318 			/* mov ecx,imm32 */
2319 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2320 			if (is_jmp64) {
2321 				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2322 				/* mov ebx,imm32 */
2323 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2324 				/* cmp dreg_hi,sreg_hi */
2325 				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2326 				EMIT2(IA32_JNE, 2);
2327 			}
2328 			/* cmp dreg_lo,sreg_lo */
2329 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2330 
2331 emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2332 			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2333 				return -EFAULT;
2334 			jmp_offset = addrs[i + insn->off] - addrs[i];
2335 			if (is_imm8(jmp_offset)) {
2336 				EMIT2(jmp_cond, jmp_offset);
2337 			} else if (is_simm32(jmp_offset)) {
2338 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2339 			} else {
2340 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2341 				return -EFAULT;
2342 			}
2343 			break;
2344 		}
2345 		case BPF_JMP | BPF_JSGT | BPF_K:
2346 		case BPF_JMP | BPF_JSLE | BPF_K:
2347 		case BPF_JMP | BPF_JSLT | BPF_K:
2348 		case BPF_JMP | BPF_JSGE | BPF_K: {
2349 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2350 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2351 			u8 sreg_lo = IA32_ECX;
2352 			u8 sreg_hi = IA32_EBX;
2353 			u32 hi;
2354 
2355 			if (dstk) {
2356 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2357 				      STACK_VAR(dst_lo));
2358 				EMIT3(0x8B,
2359 				      add_2reg(0x40, IA32_EBP,
2360 					       IA32_EDX),
2361 				      STACK_VAR(dst_hi));
2362 			}
2363 
2364 			/* mov ecx,imm32 */
2365 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2366 			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2367 			/* mov ebx,imm32 */
2368 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2369 			/* cmp dreg_hi,sreg_hi */
2370 			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2371 			EMIT2(IA32_JNE, 10);
2372 			/* cmp dreg_lo,sreg_lo */
2373 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2374 
2375 			/*
2376 			 * For simplicity of branch offset computation,
2377 			 * let's use fixed jump coding here.
2378 			 */
2379 emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
2380 			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
2381 			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2382 				return -EFAULT;
2383 			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
2384 			if (is_simm32(jmp_offset)) {
2385 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2386 			} else {
2387 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2388 				return -EFAULT;
2389 			}
2390 			EMIT2(0xEB, 6);
2391 
2392 			/* Check the condition for high 32-bit comparison */
2393 			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2394 			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2395 				return -EFAULT;
2396 			jmp_offset = addrs[i + insn->off] - addrs[i];
2397 			if (is_simm32(jmp_offset)) {
2398 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2399 			} else {
2400 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2401 				return -EFAULT;
2402 			}
2403 			break;
2404 		}
2405 		case BPF_JMP | BPF_JA:
2406 			if (insn->off == -1)
2407 				/* -1 jmp instructions will always jump
2408 				 * backwards two bytes. Explicitly handling
2409 				 * this case avoids wasting too many passes
2410 				 * when there are long sequences of replaced
2411 				 * dead code.
2412 				 */
2413 				jmp_offset = -2;
2414 			else
2415 				jmp_offset = addrs[i + insn->off] - addrs[i];
2416 
2417 			if (!jmp_offset)
2418 				/* Optimize out nop jumps */
2419 				break;
2420 emit_jmp:
2421 			if (is_imm8(jmp_offset)) {
2422 				EMIT2(0xEB, jmp_offset);
2423 			} else if (is_simm32(jmp_offset)) {
2424 				EMIT1_off32(0xE9, jmp_offset);
2425 			} else {
2426 				pr_err("jmp gen bug %llx\n", jmp_offset);
2427 				return -EFAULT;
2428 			}
2429 			break;
2430 		/* STX XADD: lock *(u32 *)(dst + off) += src */
2431 		case BPF_STX | BPF_XADD | BPF_W:
2432 		/* STX XADD: lock *(u64 *)(dst + off) += src */
2433 		case BPF_STX | BPF_XADD | BPF_DW:
2434 			goto notyet;
2435 		case BPF_JMP | BPF_EXIT:
2436 			if (seen_exit) {
2437 				jmp_offset = ctx->cleanup_addr - addrs[i];
2438 				goto emit_jmp;
2439 			}
2440 			seen_exit = true;
2441 			/* Update cleanup_addr */
2442 			ctx->cleanup_addr = proglen;
2443 			emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2444 			break;
2445 notyet:
2446 			pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2447 			return -EFAULT;
2448 		default:
2449 			/*
2450 			 * This error will be seen if new instruction was added
2451 			 * to interpreter, but not to JIT or if there is junk in
2452 			 * bpf_prog
2453 			 */
2454 			pr_err("bpf_jit: unknown opcode %02x\n", code);
2455 			return -EINVAL;
2456 		}
2457 
2458 		ilen = prog - temp;
2459 		if (ilen > BPF_MAX_INSN_SIZE) {
2460 			pr_err("bpf_jit: fatal insn size error\n");
2461 			return -EFAULT;
2462 		}
2463 
2464 		if (image) {
2465 			if (unlikely(proglen + ilen > oldproglen)) {
2466 				pr_err("bpf_jit: fatal error\n");
2467 				return -EFAULT;
2468 			}
2469 			memcpy(image + proglen, temp, ilen);
2470 		}
2471 		proglen += ilen;
2472 		addrs[i] = proglen;
2473 		prog = temp;
2474 	}
2475 	return proglen;
2476 }
2477 
2478 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2479 {
2480 	struct bpf_binary_header *header = NULL;
2481 	struct bpf_prog *tmp, *orig_prog = prog;
2482 	int proglen, oldproglen = 0;
2483 	struct jit_context ctx = {};
2484 	bool tmp_blinded = false;
2485 	u8 *image = NULL;
2486 	int *addrs;
2487 	int pass;
2488 	int i;
2489 
2490 	if (!prog->jit_requested)
2491 		return orig_prog;
2492 
2493 	tmp = bpf_jit_blind_constants(prog);
2494 	/*
2495 	 * If blinding was requested and we failed during blinding,
2496 	 * we must fall back to the interpreter.
2497 	 */
2498 	if (IS_ERR(tmp))
2499 		return orig_prog;
2500 	if (tmp != prog) {
2501 		tmp_blinded = true;
2502 		prog = tmp;
2503 	}
2504 
2505 	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
2506 	if (!addrs) {
2507 		prog = orig_prog;
2508 		goto out;
2509 	}
2510 
2511 	/*
2512 	 * Before first pass, make a rough estimation of addrs[]
2513 	 * each BPF instruction is translated to less than 64 bytes
2514 	 */
2515 	for (proglen = 0, i = 0; i < prog->len; i++) {
2516 		proglen += 64;
2517 		addrs[i] = proglen;
2518 	}
2519 	ctx.cleanup_addr = proglen;
2520 
2521 	/*
2522 	 * JITed image shrinks with every pass and the loop iterates
2523 	 * until the image stops shrinking. Very large BPF programs
2524 	 * may converge on the last pass. In such case do one more
2525 	 * pass to emit the final image.
2526 	 */
2527 	for (pass = 0; pass < 20 || image; pass++) {
2528 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2529 		if (proglen <= 0) {
2530 out_image:
2531 			image = NULL;
2532 			if (header)
2533 				bpf_jit_binary_free(header);
2534 			prog = orig_prog;
2535 			goto out_addrs;
2536 		}
2537 		if (image) {
2538 			if (proglen != oldproglen) {
2539 				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2540 				       proglen, oldproglen);
2541 				goto out_image;
2542 			}
2543 			break;
2544 		}
2545 		if (proglen == oldproglen) {
2546 			header = bpf_jit_binary_alloc(proglen, &image,
2547 						      1, jit_fill_hole);
2548 			if (!header) {
2549 				prog = orig_prog;
2550 				goto out_addrs;
2551 			}
2552 		}
2553 		oldproglen = proglen;
2554 		cond_resched();
2555 	}
2556 
2557 	if (bpf_jit_enable > 1)
2558 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
2559 
2560 	if (image) {
2561 		bpf_jit_binary_lock_ro(header);
2562 		prog->bpf_func = (void *)image;
2563 		prog->jited = 1;
2564 		prog->jited_len = proglen;
2565 	} else {
2566 		prog = orig_prog;
2567 	}
2568 
2569 out_addrs:
2570 	kfree(addrs);
2571 out:
2572 	if (tmp_blinded)
2573 		bpf_jit_prog_release_other(prog, prog == orig_prog ?
2574 					   tmp : orig_prog);
2575 	return prog;
2576 }
2577