xref: /openbmc/linux/arch/sparc/net/bpf_jit_comp_32.c (revision 5a244f48)
1 #include <linux/moduleloader.h>
2 #include <linux/workqueue.h>
3 #include <linux/netdevice.h>
4 #include <linux/filter.h>
5 #include <linux/cache.h>
6 #include <linux/if_vlan.h>
7 
8 #include <asm/cacheflush.h>
9 #include <asm/ptrace.h>
10 
11 #include "bpf_jit_32.h"
12 
13 int bpf_jit_enable __read_mostly;
14 
15 static inline bool is_simm13(unsigned int value)
16 {
17 	return value + 0x1000 < 0x2000;
18 }
19 
20 #define SEEN_DATAREF 1 /* might call external helpers */
21 #define SEEN_XREG    2 /* ebx is used */
22 #define SEEN_MEM     4 /* use mem[] for temporary storage */
23 
24 #define S13(X)		((X) & 0x1fff)
25 #define IMMED		0x00002000
26 #define RD(X)		((X) << 25)
27 #define RS1(X)		((X) << 14)
28 #define RS2(X)		((X))
29 #define OP(X)		((X) << 30)
30 #define OP2(X)		((X) << 22)
31 #define OP3(X)		((X) << 19)
32 #define COND(X)		((X) << 25)
33 #define F1(X)		OP(X)
34 #define F2(X, Y)	(OP(X) | OP2(Y))
35 #define F3(X, Y)	(OP(X) | OP3(Y))
36 
37 #define CONDN		COND(0x0)
38 #define CONDE		COND(0x1)
39 #define CONDLE		COND(0x2)
40 #define CONDL		COND(0x3)
41 #define CONDLEU		COND(0x4)
42 #define CONDCS		COND(0x5)
43 #define CONDNEG		COND(0x6)
44 #define CONDVC		COND(0x7)
45 #define CONDA		COND(0x8)
46 #define CONDNE		COND(0x9)
47 #define CONDG		COND(0xa)
48 #define CONDGE		COND(0xb)
49 #define CONDGU		COND(0xc)
50 #define CONDCC		COND(0xd)
51 #define CONDPOS		COND(0xe)
52 #define CONDVS		COND(0xf)
53 
54 #define CONDGEU		CONDCC
55 #define CONDLU		CONDCS
56 
57 #define WDISP22(X)	(((X) >> 2) & 0x3fffff)
58 
59 #define BA		(F2(0, 2) | CONDA)
60 #define BGU		(F2(0, 2) | CONDGU)
61 #define BLEU		(F2(0, 2) | CONDLEU)
62 #define BGEU		(F2(0, 2) | CONDGEU)
63 #define BLU		(F2(0, 2) | CONDLU)
64 #define BE		(F2(0, 2) | CONDE)
65 #define BNE		(F2(0, 2) | CONDNE)
66 
67 #define BE_PTR		BE
68 
69 #define SETHI(K, REG)	\
70 	(F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
71 #define OR_LO(K, REG)	\
72 	(F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
73 
74 #define ADD		F3(2, 0x00)
75 #define AND		F3(2, 0x01)
76 #define ANDCC		F3(2, 0x11)
77 #define OR		F3(2, 0x02)
78 #define XOR		F3(2, 0x03)
79 #define SUB		F3(2, 0x04)
80 #define SUBCC		F3(2, 0x14)
81 #define MUL		F3(2, 0x0a)	/* umul */
82 #define DIV		F3(2, 0x0e)	/* udiv */
83 #define SLL		F3(2, 0x25)
84 #define SRL		F3(2, 0x26)
85 #define JMPL		F3(2, 0x38)
86 #define CALL		F1(1)
87 #define BR		F2(0, 0x01)
88 #define RD_Y		F3(2, 0x28)
89 #define WR_Y		F3(2, 0x30)
90 
91 #define LD32		F3(3, 0x00)
92 #define LD8		F3(3, 0x01)
93 #define LD16		F3(3, 0x02)
94 #define LD64		F3(3, 0x0b)
95 #define ST32		F3(3, 0x04)
96 
97 #define LDPTR		LD32
98 #define BASE_STACKFRAME	96
99 
100 #define LD32I		(LD32 | IMMED)
101 #define LD8I		(LD8 | IMMED)
102 #define LD16I		(LD16 | IMMED)
103 #define LD64I		(LD64 | IMMED)
104 #define LDPTRI		(LDPTR | IMMED)
105 #define ST32I		(ST32 | IMMED)
106 
107 #define emit_nop()		\
108 do {				\
109 	*prog++ = SETHI(0, G0);	\
110 } while (0)
111 
112 #define emit_neg()					\
113 do {	/* sub %g0, r_A, r_A */				\
114 	*prog++ = SUB | RS1(G0) | RS2(r_A) | RD(r_A);	\
115 } while (0)
116 
117 #define emit_reg_move(FROM, TO)				\
118 do {	/* or %g0, FROM, TO */				\
119 	*prog++ = OR | RS1(G0) | RS2(FROM) | RD(TO);	\
120 } while (0)
121 
122 #define emit_clear(REG)					\
123 do {	/* or %g0, %g0, REG */				\
124 	*prog++ = OR | RS1(G0) | RS2(G0) | RD(REG);	\
125 } while (0)
126 
127 #define emit_set_const(K, REG)					\
128 do {	/* sethi %hi(K), REG */					\
129 	*prog++ = SETHI(K, REG);				\
130 	/* or REG, %lo(K), REG */				\
131 	*prog++ = OR_LO(K, REG);				\
132 } while (0)
133 
134 	/* Emit
135 	 *
136 	 *	OP	r_A, r_X, r_A
137 	 */
138 #define emit_alu_X(OPCODE)					\
139 do {								\
140 	seen |= SEEN_XREG;					\
141 	*prog++ = OPCODE | RS1(r_A) | RS2(r_X) | RD(r_A);	\
142 } while (0)
143 
144 	/* Emit either:
145 	 *
146 	 *	OP	r_A, K, r_A
147 	 *
148 	 * or
149 	 *
150 	 *	sethi	%hi(K), r_TMP
151 	 *	or	r_TMP, %lo(K), r_TMP
152 	 *	OP	r_A, r_TMP, r_A
153 	 *
154 	 * depending upon whether K fits in a signed 13-bit
155 	 * immediate instruction field.  Emit nothing if K
156 	 * is zero.
157 	 */
158 #define emit_alu_K(OPCODE, K)					\
159 do {								\
160 	if (K || OPCODE == AND || OPCODE == MUL) {		\
161 		unsigned int _insn = OPCODE;			\
162 		_insn |= RS1(r_A) | RD(r_A);			\
163 		if (is_simm13(K)) {				\
164 			*prog++ = _insn | IMMED | S13(K);	\
165 		} else {					\
166 			emit_set_const(K, r_TMP);		\
167 			*prog++ = _insn | RS2(r_TMP);		\
168 		}						\
169 	}							\
170 } while (0)
171 
172 #define emit_loadimm(K, DEST)						\
173 do {									\
174 	if (is_simm13(K)) {						\
175 		/* or %g0, K, DEST */					\
176 		*prog++ = OR | IMMED | RS1(G0) | S13(K) | RD(DEST);	\
177 	} else {							\
178 		emit_set_const(K, DEST);				\
179 	}								\
180 } while (0)
181 
182 #define emit_loadptr(BASE, STRUCT, FIELD, DEST)				\
183 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
184 	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *));	\
185 	*prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST);		\
186 } while (0)
187 
188 #define emit_load32(BASE, STRUCT, FIELD, DEST)				\
189 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
190 	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32));	\
191 	*prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST);		\
192 } while (0)
193 
194 #define emit_load16(BASE, STRUCT, FIELD, DEST)				\
195 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
196 	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16));	\
197 	*prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST);		\
198 } while (0)
199 
200 #define __emit_load8(BASE, STRUCT, FIELD, DEST)				\
201 do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
202 	*prog++ = LD8I | RS1(BASE) | S13(_off) | RD(DEST);		\
203 } while (0)
204 
205 #define emit_load8(BASE, STRUCT, FIELD, DEST)				\
206 do {	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8));	\
207 	__emit_load8(BASE, STRUCT, FIELD, DEST);			\
208 } while (0)
209 
210 #define BIAS (-4)
211 
212 #define emit_ldmem(OFF, DEST)						\
213 do {	*prog++ = LD32I | RS1(SP) | S13(BIAS - (OFF)) | RD(DEST);	\
214 } while (0)
215 
216 #define emit_stmem(OFF, SRC)						\
217 do {	*prog++ = ST32I | RS1(SP) | S13(BIAS - (OFF)) | RD(SRC);	\
218 } while (0)
219 
220 #ifdef CONFIG_SMP
221 #define emit_load_cpu(REG)						\
222 	emit_load32(G6, struct thread_info, cpu, REG)
223 #else
224 #define emit_load_cpu(REG)	emit_clear(REG)
225 #endif
226 
227 #define emit_skb_loadptr(FIELD, DEST) \
228 	emit_loadptr(r_SKB, struct sk_buff, FIELD, DEST)
229 #define emit_skb_load32(FIELD, DEST) \
230 	emit_load32(r_SKB, struct sk_buff, FIELD, DEST)
231 #define emit_skb_load16(FIELD, DEST) \
232 	emit_load16(r_SKB, struct sk_buff, FIELD, DEST)
233 #define __emit_skb_load8(FIELD, DEST) \
234 	__emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
235 #define emit_skb_load8(FIELD, DEST) \
236 	emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
237 
238 #define emit_jmpl(BASE, IMM_OFF, LREG) \
239 	*prog++ = (JMPL | IMMED | RS1(BASE) | S13(IMM_OFF) | RD(LREG))
240 
241 #define emit_call(FUNC)					\
242 do {	void *_here = image + addrs[i] - 8;		\
243 	unsigned int _off = (void *)(FUNC) - _here;	\
244 	*prog++ = CALL | (((_off) >> 2) & 0x3fffffff);	\
245 	emit_nop();					\
246 } while (0)
247 
248 #define emit_branch(BR_OPC, DEST)			\
249 do {	unsigned int _here = addrs[i] - 8;		\
250 	*prog++ = BR_OPC | WDISP22((DEST) - _here);	\
251 } while (0)
252 
253 #define emit_branch_off(BR_OPC, OFF)			\
254 do {	*prog++ = BR_OPC | WDISP22(OFF);		\
255 } while (0)
256 
257 #define emit_jump(DEST)		emit_branch(BA, DEST)
258 
259 #define emit_read_y(REG)	*prog++ = RD_Y | RD(REG)
260 #define emit_write_y(REG)	*prog++ = WR_Y | IMMED | RS1(REG) | S13(0)
261 
262 #define emit_cmp(R1, R2) \
263 	*prog++ = (SUBCC | RS1(R1) | RS2(R2) | RD(G0))
264 
265 #define emit_cmpi(R1, IMM) \
266 	*prog++ = (SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
267 
268 #define emit_btst(R1, R2) \
269 	*prog++ = (ANDCC | RS1(R1) | RS2(R2) | RD(G0))
270 
271 #define emit_btsti(R1, IMM) \
272 	*prog++ = (ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
273 
274 #define emit_sub(R1, R2, R3) \
275 	*prog++ = (SUB | RS1(R1) | RS2(R2) | RD(R3))
276 
277 #define emit_subi(R1, IMM, R3) \
278 	*prog++ = (SUB | IMMED | RS1(R1) | S13(IMM) | RD(R3))
279 
280 #define emit_add(R1, R2, R3) \
281 	*prog++ = (ADD | RS1(R1) | RS2(R2) | RD(R3))
282 
283 #define emit_addi(R1, IMM, R3) \
284 	*prog++ = (ADD | IMMED | RS1(R1) | S13(IMM) | RD(R3))
285 
286 #define emit_and(R1, R2, R3) \
287 	*prog++ = (AND | RS1(R1) | RS2(R2) | RD(R3))
288 
289 #define emit_andi(R1, IMM, R3) \
290 	*prog++ = (AND | IMMED | RS1(R1) | S13(IMM) | RD(R3))
291 
292 #define emit_alloc_stack(SZ) \
293 	*prog++ = (SUB | IMMED | RS1(SP) | S13(SZ) | RD(SP))
294 
295 #define emit_release_stack(SZ) \
296 	*prog++ = (ADD | IMMED | RS1(SP) | S13(SZ) | RD(SP))
297 
298 /* A note about branch offset calculations.  The addrs[] array,
299  * indexed by BPF instruction, records the address after all the
300  * sparc instructions emitted for that BPF instruction.
301  *
302  * The most common case is to emit a branch at the end of such
303  * a code sequence.  So this would be two instructions, the
304  * branch and it's delay slot.
305  *
306  * Therefore by default the branch emitters calculate the branch
307  * offset field as:
308  *
309  *	destination - (addrs[i] - 8)
310  *
311  * This "addrs[i] - 8" is the address of the branch itself or
312  * what "." would be in assembler notation.  The "8" part is
313  * how we take into consideration the branch and it's delay
314  * slot mentioned above.
315  *
316  * Sometimes we need to emit a branch earlier in the code
317  * sequence.  And in these situations we adjust "destination"
318  * to accommodate this difference.  For example, if we needed
319  * to emit a branch (and it's delay slot) right before the
320  * final instruction emitted for a BPF opcode, we'd use
321  * "destination + 4" instead of just plain "destination" above.
322  *
323  * This is why you see all of these funny emit_branch() and
324  * emit_jump() calls with adjusted offsets.
325  */
326 
327 void bpf_jit_compile(struct bpf_prog *fp)
328 {
329 	unsigned int cleanup_addr, proglen, oldproglen = 0;
330 	u32 temp[8], *prog, *func, seen = 0, pass;
331 	const struct sock_filter *filter = fp->insns;
332 	int i, flen = fp->len, pc_ret0 = -1;
333 	unsigned int *addrs;
334 	void *image;
335 
336 	if (!bpf_jit_enable)
337 		return;
338 
339 	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
340 	if (addrs == NULL)
341 		return;
342 
343 	/* Before first pass, make a rough estimation of addrs[]
344 	 * each bpf instruction is translated to less than 64 bytes
345 	 */
346 	for (proglen = 0, i = 0; i < flen; i++) {
347 		proglen += 64;
348 		addrs[i] = proglen;
349 	}
350 	cleanup_addr = proglen; /* epilogue address */
351 	image = NULL;
352 	for (pass = 0; pass < 10; pass++) {
353 		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
354 
355 		/* no prologue/epilogue for trivial filters (RET something) */
356 		proglen = 0;
357 		prog = temp;
358 
359 		/* Prologue */
360 		if (seen_or_pass0) {
361 			if (seen_or_pass0 & SEEN_MEM) {
362 				unsigned int sz = BASE_STACKFRAME;
363 				sz += BPF_MEMWORDS * sizeof(u32);
364 				emit_alloc_stack(sz);
365 			}
366 
367 			/* Make sure we dont leek kernel memory. */
368 			if (seen_or_pass0 & SEEN_XREG)
369 				emit_clear(r_X);
370 
371 			/* If this filter needs to access skb data,
372 			 * load %o4 and %o5 with:
373 			 *  %o4 = skb->len - skb->data_len
374 			 *  %o5 = skb->data
375 			 * And also back up %o7 into r_saved_O7 so we can
376 			 * invoke the stubs using 'call'.
377 			 */
378 			if (seen_or_pass0 & SEEN_DATAREF) {
379 				emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
380 				emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
381 				emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
382 				emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
383 			}
384 		}
385 		emit_reg_move(O7, r_saved_O7);
386 
387 		/* Make sure we dont leak kernel information to the user. */
388 		if (bpf_needs_clear_a(&filter[0]))
389 			emit_clear(r_A); /* A = 0 */
390 
391 		for (i = 0; i < flen; i++) {
392 			unsigned int K = filter[i].k;
393 			unsigned int t_offset;
394 			unsigned int f_offset;
395 			u32 t_op, f_op;
396 			u16 code = bpf_anc_helper(&filter[i]);
397 			int ilen;
398 
399 			switch (code) {
400 			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
401 				emit_alu_X(ADD);
402 				break;
403 			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
404 				emit_alu_K(ADD, K);
405 				break;
406 			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
407 				emit_alu_X(SUB);
408 				break;
409 			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
410 				emit_alu_K(SUB, K);
411 				break;
412 			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
413 				emit_alu_X(AND);
414 				break;
415 			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
416 				emit_alu_K(AND, K);
417 				break;
418 			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
419 				emit_alu_X(OR);
420 				break;
421 			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
422 				emit_alu_K(OR, K);
423 				break;
424 			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
425 			case BPF_ALU | BPF_XOR | BPF_X:
426 				emit_alu_X(XOR);
427 				break;
428 			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
429 				emit_alu_K(XOR, K);
430 				break;
431 			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
432 				emit_alu_X(SLL);
433 				break;
434 			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
435 				emit_alu_K(SLL, K);
436 				break;
437 			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
438 				emit_alu_X(SRL);
439 				break;
440 			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
441 				emit_alu_K(SRL, K);
442 				break;
443 			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
444 				emit_alu_X(MUL);
445 				break;
446 			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
447 				emit_alu_K(MUL, K);
448 				break;
449 			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
450 				if (K == 1)
451 					break;
452 				emit_write_y(G0);
453 				/* The Sparc v8 architecture requires
454 				 * three instructions between a %y
455 				 * register write and the first use.
456 				 */
457 				emit_nop();
458 				emit_nop();
459 				emit_nop();
460 				emit_alu_K(DIV, K);
461 				break;
462 			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
463 				emit_cmpi(r_X, 0);
464 				if (pc_ret0 > 0) {
465 					t_offset = addrs[pc_ret0 - 1];
466 					emit_branch(BE, t_offset + 20);
467 					emit_nop(); /* delay slot */
468 				} else {
469 					emit_branch_off(BNE, 16);
470 					emit_nop();
471 					emit_jump(cleanup_addr + 20);
472 					emit_clear(r_A);
473 				}
474 				emit_write_y(G0);
475 				/* The Sparc v8 architecture requires
476 				 * three instructions between a %y
477 				 * register write and the first use.
478 				 */
479 				emit_nop();
480 				emit_nop();
481 				emit_nop();
482 				emit_alu_X(DIV);
483 				break;
484 			case BPF_ALU | BPF_NEG:
485 				emit_neg();
486 				break;
487 			case BPF_RET | BPF_K:
488 				if (!K) {
489 					if (pc_ret0 == -1)
490 						pc_ret0 = i;
491 					emit_clear(r_A);
492 				} else {
493 					emit_loadimm(K, r_A);
494 				}
495 				/* Fallthrough */
496 			case BPF_RET | BPF_A:
497 				if (seen_or_pass0) {
498 					if (i != flen - 1) {
499 						emit_jump(cleanup_addr);
500 						emit_nop();
501 						break;
502 					}
503 					if (seen_or_pass0 & SEEN_MEM) {
504 						unsigned int sz = BASE_STACKFRAME;
505 						sz += BPF_MEMWORDS * sizeof(u32);
506 						emit_release_stack(sz);
507 					}
508 				}
509 				/* jmpl %r_saved_O7 + 8, %g0 */
510 				emit_jmpl(r_saved_O7, 8, G0);
511 				emit_reg_move(r_A, O0); /* delay slot */
512 				break;
513 			case BPF_MISC | BPF_TAX:
514 				seen |= SEEN_XREG;
515 				emit_reg_move(r_A, r_X);
516 				break;
517 			case BPF_MISC | BPF_TXA:
518 				seen |= SEEN_XREG;
519 				emit_reg_move(r_X, r_A);
520 				break;
521 			case BPF_ANC | SKF_AD_CPU:
522 				emit_load_cpu(r_A);
523 				break;
524 			case BPF_ANC | SKF_AD_PROTOCOL:
525 				emit_skb_load16(protocol, r_A);
526 				break;
527 			case BPF_ANC | SKF_AD_PKTTYPE:
528 				__emit_skb_load8(__pkt_type_offset, r_A);
529 				emit_andi(r_A, PKT_TYPE_MAX, r_A);
530 				emit_alu_K(SRL, 5);
531 				break;
532 			case BPF_ANC | SKF_AD_IFINDEX:
533 				emit_skb_loadptr(dev, r_A);
534 				emit_cmpi(r_A, 0);
535 				emit_branch(BE_PTR, cleanup_addr + 4);
536 				emit_nop();
537 				emit_load32(r_A, struct net_device, ifindex, r_A);
538 				break;
539 			case BPF_ANC | SKF_AD_MARK:
540 				emit_skb_load32(mark, r_A);
541 				break;
542 			case BPF_ANC | SKF_AD_QUEUE:
543 				emit_skb_load16(queue_mapping, r_A);
544 				break;
545 			case BPF_ANC | SKF_AD_HATYPE:
546 				emit_skb_loadptr(dev, r_A);
547 				emit_cmpi(r_A, 0);
548 				emit_branch(BE_PTR, cleanup_addr + 4);
549 				emit_nop();
550 				emit_load16(r_A, struct net_device, type, r_A);
551 				break;
552 			case BPF_ANC | SKF_AD_RXHASH:
553 				emit_skb_load32(hash, r_A);
554 				break;
555 			case BPF_ANC | SKF_AD_VLAN_TAG:
556 			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
557 				emit_skb_load16(vlan_tci, r_A);
558 				if (code != (BPF_ANC | SKF_AD_VLAN_TAG)) {
559 					emit_alu_K(SRL, 12);
560 					emit_andi(r_A, 1, r_A);
561 				} else {
562 					emit_loadimm(~VLAN_TAG_PRESENT, r_TMP);
563 					emit_and(r_A, r_TMP, r_A);
564 				}
565 				break;
566 			case BPF_LD | BPF_W | BPF_LEN:
567 				emit_skb_load32(len, r_A);
568 				break;
569 			case BPF_LDX | BPF_W | BPF_LEN:
570 				emit_skb_load32(len, r_X);
571 				break;
572 			case BPF_LD | BPF_IMM:
573 				emit_loadimm(K, r_A);
574 				break;
575 			case BPF_LDX | BPF_IMM:
576 				emit_loadimm(K, r_X);
577 				break;
578 			case BPF_LD | BPF_MEM:
579 				seen |= SEEN_MEM;
580 				emit_ldmem(K * 4, r_A);
581 				break;
582 			case BPF_LDX | BPF_MEM:
583 				seen |= SEEN_MEM | SEEN_XREG;
584 				emit_ldmem(K * 4, r_X);
585 				break;
586 			case BPF_ST:
587 				seen |= SEEN_MEM;
588 				emit_stmem(K * 4, r_A);
589 				break;
590 			case BPF_STX:
591 				seen |= SEEN_MEM | SEEN_XREG;
592 				emit_stmem(K * 4, r_X);
593 				break;
594 
595 #define CHOOSE_LOAD_FUNC(K, func) \
596 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
597 
598 			case BPF_LD | BPF_W | BPF_ABS:
599 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
600 common_load:			seen |= SEEN_DATAREF;
601 				emit_loadimm(K, r_OFF);
602 				emit_call(func);
603 				break;
604 			case BPF_LD | BPF_H | BPF_ABS:
605 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
606 				goto common_load;
607 			case BPF_LD | BPF_B | BPF_ABS:
608 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
609 				goto common_load;
610 			case BPF_LDX | BPF_B | BPF_MSH:
611 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
612 				goto common_load;
613 			case BPF_LD | BPF_W | BPF_IND:
614 				func = bpf_jit_load_word;
615 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
616 				if (K) {
617 					if (is_simm13(K)) {
618 						emit_addi(r_X, K, r_OFF);
619 					} else {
620 						emit_loadimm(K, r_TMP);
621 						emit_add(r_X, r_TMP, r_OFF);
622 					}
623 				} else {
624 					emit_reg_move(r_X, r_OFF);
625 				}
626 				emit_call(func);
627 				break;
628 			case BPF_LD | BPF_H | BPF_IND:
629 				func = bpf_jit_load_half;
630 				goto common_load_ind;
631 			case BPF_LD | BPF_B | BPF_IND:
632 				func = bpf_jit_load_byte;
633 				goto common_load_ind;
634 			case BPF_JMP | BPF_JA:
635 				emit_jump(addrs[i + K]);
636 				emit_nop();
637 				break;
638 
639 #define COND_SEL(CODE, TOP, FOP)	\
640 	case CODE:			\
641 		t_op = TOP;		\
642 		f_op = FOP;		\
643 		goto cond_branch
644 
645 			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
646 			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
647 			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
648 			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
649 			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
650 			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
651 			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
652 			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);
653 
654 cond_branch:			f_offset = addrs[i + filter[i].jf];
655 				t_offset = addrs[i + filter[i].jt];
656 
657 				/* same targets, can avoid doing the test :) */
658 				if (filter[i].jt == filter[i].jf) {
659 					emit_jump(t_offset);
660 					emit_nop();
661 					break;
662 				}
663 
664 				switch (code) {
665 				case BPF_JMP | BPF_JGT | BPF_X:
666 				case BPF_JMP | BPF_JGE | BPF_X:
667 				case BPF_JMP | BPF_JEQ | BPF_X:
668 					seen |= SEEN_XREG;
669 					emit_cmp(r_A, r_X);
670 					break;
671 				case BPF_JMP | BPF_JSET | BPF_X:
672 					seen |= SEEN_XREG;
673 					emit_btst(r_A, r_X);
674 					break;
675 				case BPF_JMP | BPF_JEQ | BPF_K:
676 				case BPF_JMP | BPF_JGT | BPF_K:
677 				case BPF_JMP | BPF_JGE | BPF_K:
678 					if (is_simm13(K)) {
679 						emit_cmpi(r_A, K);
680 					} else {
681 						emit_loadimm(K, r_TMP);
682 						emit_cmp(r_A, r_TMP);
683 					}
684 					break;
685 				case BPF_JMP | BPF_JSET | BPF_K:
686 					if (is_simm13(K)) {
687 						emit_btsti(r_A, K);
688 					} else {
689 						emit_loadimm(K, r_TMP);
690 						emit_btst(r_A, r_TMP);
691 					}
692 					break;
693 				}
694 				if (filter[i].jt != 0) {
695 					if (filter[i].jf)
696 						t_offset += 8;
697 					emit_branch(t_op, t_offset);
698 					emit_nop(); /* delay slot */
699 					if (filter[i].jf) {
700 						emit_jump(f_offset);
701 						emit_nop();
702 					}
703 					break;
704 				}
705 				emit_branch(f_op, f_offset);
706 				emit_nop(); /* delay slot */
707 				break;
708 
709 			default:
710 				/* hmm, too complex filter, give up with jit compiler */
711 				goto out;
712 			}
713 			ilen = (void *) prog - (void *) temp;
714 			if (image) {
715 				if (unlikely(proglen + ilen > oldproglen)) {
716 					pr_err("bpb_jit_compile fatal error\n");
717 					kfree(addrs);
718 					module_memfree(image);
719 					return;
720 				}
721 				memcpy(image + proglen, temp, ilen);
722 			}
723 			proglen += ilen;
724 			addrs[i] = proglen;
725 			prog = temp;
726 		}
727 		/* last bpf instruction is always a RET :
728 		 * use it to give the cleanup instruction(s) addr
729 		 */
730 		cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
731 		if (seen_or_pass0 & SEEN_MEM)
732 			cleanup_addr -= 4; /* add %sp, X, %sp; */
733 
734 		if (image) {
735 			if (proglen != oldproglen)
736 				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
737 				       proglen, oldproglen);
738 			break;
739 		}
740 		if (proglen == oldproglen) {
741 			image = module_alloc(proglen);
742 			if (!image)
743 				goto out;
744 		}
745 		oldproglen = proglen;
746 	}
747 
748 	if (bpf_jit_enable > 1)
749 		bpf_jit_dump(flen, proglen, pass + 1, image);
750 
751 	if (image) {
752 		fp->bpf_func = (void *)image;
753 		fp->jited = 1;
754 	}
755 out:
756 	kfree(addrs);
757 	return;
758 }
759 
760 void bpf_jit_free(struct bpf_prog *fp)
761 {
762 	if (fp->jited)
763 		module_memfree(fp->bpf_func);
764 
765 	bpf_prog_unlock_free(fp);
766 }
767