xref: /openbmc/linux/arch/s390/net/bpf_jit_comp.c (revision bf070bb0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * BPF Jit compiler for s390.
4  *
5  * Minimum build requirements:
6  *
7  *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
8  *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
9  *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
10  *  - PACK_STACK
11  *  - 64BIT
12  *
13  * Copyright IBM Corp. 2012,2015
14  *
15  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
16  *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
17  */
18 
19 #define KMSG_COMPONENT "bpf_jit"
20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
21 
22 #include <linux/netdevice.h>
23 #include <linux/filter.h>
24 #include <linux/init.h>
25 #include <linux/bpf.h>
26 #include <asm/cacheflush.h>
27 #include <asm/dis.h>
28 #include <asm/set_memory.h>
29 #include "bpf_jit.h"
30 
31 int bpf_jit_enable __read_mostly;
32 
33 struct bpf_jit {
34 	u32 seen;		/* Flags to remember seen eBPF instructions */
35 	u32 seen_reg[16];	/* Array to remember which registers are used */
36 	u32 *addrs;		/* Array with relative instruction addresses */
37 	u8 *prg_buf;		/* Start of program */
38 	int size;		/* Size of program and literal pool */
39 	int size_prg;		/* Size of program */
40 	int prg;		/* Current position in program */
41 	int lit_start;		/* Start of literal pool */
42 	int lit;		/* Current position in literal pool */
43 	int base_ip;		/* Base address for literal pool */
44 	int ret0_ip;		/* Address of return 0 */
45 	int exit_ip;		/* Address of exit */
46 	int tail_call_start;	/* Tail call start offset */
47 	int labels[1];		/* Labels for local jumps */
48 };
49 
50 #define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
51 
52 #define SEEN_SKB	1	/* skb access */
53 #define SEEN_MEM	2	/* use mem[] for temporary storage */
54 #define SEEN_RET0	4	/* ret0_ip points to a valid return 0 */
55 #define SEEN_LITERAL	8	/* code uses literals */
56 #define SEEN_FUNC	16	/* calls C functions */
57 #define SEEN_TAIL_CALL	32	/* code uses tail calls */
58 #define SEEN_SKB_CHANGE	64	/* code changes skb data */
59 #define SEEN_REG_AX	128	/* code uses constant blinding */
60 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
61 
62 /*
63  * s390 registers
64  */
65 #define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
66 #define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
67 #define REG_SKB_DATA	(MAX_BPF_JIT_REG + 2)	/* SKB data register */
68 #define REG_L		(MAX_BPF_JIT_REG + 3)	/* Literal pool register */
69 #define REG_15		(MAX_BPF_JIT_REG + 4)	/* Register 15 */
70 #define REG_0		REG_W0			/* Register 0 */
71 #define REG_1		REG_W1			/* Register 1 */
72 #define REG_2		BPF_REG_1		/* Register 2 */
73 #define REG_14		BPF_REG_0		/* Register 14 */
74 
75 /*
76  * Mapping of BPF registers to s390 registers
77  */
78 static const int reg2hex[] = {
79 	/* Return code */
80 	[BPF_REG_0]	= 14,
81 	/* Function parameters */
82 	[BPF_REG_1]	= 2,
83 	[BPF_REG_2]	= 3,
84 	[BPF_REG_3]	= 4,
85 	[BPF_REG_4]	= 5,
86 	[BPF_REG_5]	= 6,
87 	/* Call saved registers */
88 	[BPF_REG_6]	= 7,
89 	[BPF_REG_7]	= 8,
90 	[BPF_REG_8]	= 9,
91 	[BPF_REG_9]	= 10,
92 	/* BPF stack pointer */
93 	[BPF_REG_FP]	= 13,
94 	/* Register for blinding (shared with REG_SKB_DATA) */
95 	[BPF_REG_AX]	= 12,
96 	/* SKB data pointer */
97 	[REG_SKB_DATA]	= 12,
98 	/* Work registers for s390x backend */
99 	[REG_W0]	= 0,
100 	[REG_W1]	= 1,
101 	[REG_L]		= 11,
102 	[REG_15]	= 15,
103 };
104 
105 static inline u32 reg(u32 dst_reg, u32 src_reg)
106 {
107 	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
108 }
109 
110 static inline u32 reg_high(u32 reg)
111 {
112 	return reg2hex[reg] << 4;
113 }
114 
115 static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
116 {
117 	u32 r1 = reg2hex[b1];
118 
119 	if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
120 		jit->seen_reg[r1] = 1;
121 }
122 
123 #define REG_SET_SEEN(b1)					\
124 ({								\
125 	reg_set_seen(jit, b1);					\
126 })
127 
128 #define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
129 
130 /*
131  * EMIT macros for code generation
132  */
133 
134 #define _EMIT2(op)						\
135 ({								\
136 	if (jit->prg_buf)					\
137 		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
138 	jit->prg += 2;						\
139 })
140 
141 #define EMIT2(op, b1, b2)					\
142 ({								\
143 	_EMIT2(op | reg(b1, b2));				\
144 	REG_SET_SEEN(b1);					\
145 	REG_SET_SEEN(b2);					\
146 })
147 
148 #define _EMIT4(op)						\
149 ({								\
150 	if (jit->prg_buf)					\
151 		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
152 	jit->prg += 4;						\
153 })
154 
155 #define EMIT4(op, b1, b2)					\
156 ({								\
157 	_EMIT4(op | reg(b1, b2));				\
158 	REG_SET_SEEN(b1);					\
159 	REG_SET_SEEN(b2);					\
160 })
161 
162 #define EMIT4_RRF(op, b1, b2, b3)				\
163 ({								\
164 	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
165 	REG_SET_SEEN(b1);					\
166 	REG_SET_SEEN(b2);					\
167 	REG_SET_SEEN(b3);					\
168 })
169 
170 #define _EMIT4_DISP(op, disp)					\
171 ({								\
172 	unsigned int __disp = (disp) & 0xfff;			\
173 	_EMIT4(op | __disp);					\
174 })
175 
176 #define EMIT4_DISP(op, b1, b2, disp)				\
177 ({								\
178 	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
179 		    reg_high(b2) << 8, disp);			\
180 	REG_SET_SEEN(b1);					\
181 	REG_SET_SEEN(b2);					\
182 })
183 
184 #define EMIT4_IMM(op, b1, imm)					\
185 ({								\
186 	unsigned int __imm = (imm) & 0xffff;			\
187 	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
188 	REG_SET_SEEN(b1);					\
189 })
190 
191 #define EMIT4_PCREL(op, pcrel)					\
192 ({								\
193 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
194 	_EMIT4(op | __pcrel);					\
195 })
196 
197 #define _EMIT6(op1, op2)					\
198 ({								\
199 	if (jit->prg_buf) {					\
200 		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
201 		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
202 	}							\
203 	jit->prg += 6;						\
204 })
205 
206 #define _EMIT6_DISP(op1, op2, disp)				\
207 ({								\
208 	unsigned int __disp = (disp) & 0xfff;			\
209 	_EMIT6(op1 | __disp, op2);				\
210 })
211 
212 #define _EMIT6_DISP_LH(op1, op2, disp)				\
213 ({								\
214 	u32 _disp = (u32) disp;					\
215 	unsigned int __disp_h = _disp & 0xff000;		\
216 	unsigned int __disp_l = _disp & 0x00fff;		\
217 	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
218 })
219 
220 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
221 ({								\
222 	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
223 		       reg_high(b3) << 8, op2, disp);		\
224 	REG_SET_SEEN(b1);					\
225 	REG_SET_SEEN(b2);					\
226 	REG_SET_SEEN(b3);					\
227 })
228 
229 #define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask)	\
230 ({								\
231 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
232 	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff),	\
233 	       op2 | mask << 12);				\
234 	REG_SET_SEEN(b1);					\
235 	REG_SET_SEEN(b2);					\
236 })
237 
238 #define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask)	\
239 ({								\
240 	int rel = (jit->labels[label] - jit->prg) >> 1;		\
241 	_EMIT6(op1 | (reg_high(b1) | mask) << 16 |		\
242 		(rel & 0xffff), op2 | (imm & 0xff) << 8);	\
243 	REG_SET_SEEN(b1);					\
244 	BUILD_BUG_ON(((unsigned long) imm) > 0xff);		\
245 })
246 
247 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
248 ({								\
249 	/* Branch instruction needs 6 bytes */			\
250 	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
251 	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);	\
252 	REG_SET_SEEN(b1);					\
253 	REG_SET_SEEN(b2);					\
254 })
255 
256 #define _EMIT6_IMM(op, imm)					\
257 ({								\
258 	unsigned int __imm = (imm);				\
259 	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
260 })
261 
262 #define EMIT6_IMM(op, b1, imm)					\
263 ({								\
264 	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
265 	REG_SET_SEEN(b1);					\
266 })
267 
268 #define EMIT_CONST_U32(val)					\
269 ({								\
270 	unsigned int ret;					\
271 	ret = jit->lit - jit->base_ip;				\
272 	jit->seen |= SEEN_LITERAL;				\
273 	if (jit->prg_buf)					\
274 		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
275 	jit->lit += 4;						\
276 	ret;							\
277 })
278 
279 #define EMIT_CONST_U64(val)					\
280 ({								\
281 	unsigned int ret;					\
282 	ret = jit->lit - jit->base_ip;				\
283 	jit->seen |= SEEN_LITERAL;				\
284 	if (jit->prg_buf)					\
285 		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
286 	jit->lit += 8;						\
287 	ret;							\
288 })
289 
290 #define EMIT_ZERO(b1)						\
291 ({								\
292 	/* llgfr %dst,%dst (zero extend to 64 bit) */		\
293 	EMIT4(0xb9160000, b1, b1);				\
294 	REG_SET_SEEN(b1);					\
295 })
296 
297 /*
298  * Fill whole space with illegal instructions
299  */
300 static void jit_fill_hole(void *area, unsigned int size)
301 {
302 	memset(area, 0, size);
303 }
304 
305 /*
306  * Save registers from "rs" (register start) to "re" (register end) on stack
307  */
308 static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
309 {
310 	u32 off = STK_OFF_R6 + (rs - 6) * 8;
311 
312 	if (rs == re)
313 		/* stg %rs,off(%r15) */
314 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
315 	else
316 		/* stmg %rs,%re,off(%r15) */
317 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
318 }
319 
320 /*
321  * Restore registers from "rs" (register start) to "re" (register end) on stack
322  */
323 static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
324 {
325 	u32 off = STK_OFF_R6 + (rs - 6) * 8;
326 
327 	if (jit->seen & SEEN_STACK)
328 		off += STK_OFF + stack_depth;
329 
330 	if (rs == re)
331 		/* lg %rs,off(%r15) */
332 		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
333 	else
334 		/* lmg %rs,%re,off(%r15) */
335 		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
336 }
337 
338 /*
339  * Return first seen register (from start)
340  */
341 static int get_start(struct bpf_jit *jit, int start)
342 {
343 	int i;
344 
345 	for (i = start; i <= 15; i++) {
346 		if (jit->seen_reg[i])
347 			return i;
348 	}
349 	return 0;
350 }
351 
352 /*
353  * Return last seen register (from start) (gap >= 2)
354  */
355 static int get_end(struct bpf_jit *jit, int start)
356 {
357 	int i;
358 
359 	for (i = start; i < 15; i++) {
360 		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
361 			return i - 1;
362 	}
363 	return jit->seen_reg[15] ? 15 : 14;
364 }
365 
366 #define REGS_SAVE	1
367 #define REGS_RESTORE	0
368 /*
369  * Save and restore clobbered registers (6-15) on stack.
370  * We save/restore registers in chunks with gap >= 2 registers.
371  */
372 static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
373 {
374 
375 	int re = 6, rs;
376 
377 	do {
378 		rs = get_start(jit, re);
379 		if (!rs)
380 			break;
381 		re = get_end(jit, rs + 1);
382 		if (op == REGS_SAVE)
383 			save_regs(jit, rs, re);
384 		else
385 			restore_regs(jit, rs, re, stack_depth);
386 		re++;
387 	} while (re <= 15);
388 }
389 
390 /*
391  * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
392  * we store the SKB header length on the stack and the SKB data
393  * pointer in REG_SKB_DATA if BPF_REG_AX is not used.
394  */
395 static void emit_load_skb_data_hlen(struct bpf_jit *jit)
396 {
397 	/* Header length: llgf %w1,<len>(%b1) */
398 	EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
399 		      offsetof(struct sk_buff, len));
400 	/* s %w1,<data_len>(%b1) */
401 	EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
402 		   offsetof(struct sk_buff, data_len));
403 	/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
404 	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
405 	if (!(jit->seen & SEEN_REG_AX))
406 		/* lg %skb_data,data_off(%b1) */
407 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
408 			      BPF_REG_1, offsetof(struct sk_buff, data));
409 }
410 
411 /*
412  * Emit function prologue
413  *
414  * Save registers and create stack frame if necessary.
415  * See stack frame layout desription in "bpf_jit.h"!
416  */
417 static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
418 {
419 	if (jit->seen & SEEN_TAIL_CALL) {
420 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
421 		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
422 	} else {
423 		/* j tail_call_start: NOP if no tail calls are used */
424 		EMIT4_PCREL(0xa7f40000, 6);
425 		_EMIT2(0);
426 	}
427 	/* Tail calls have to skip above initialization */
428 	jit->tail_call_start = jit->prg;
429 	/* Save registers */
430 	save_restore_regs(jit, REGS_SAVE, stack_depth);
431 	/* Setup literal pool */
432 	if (jit->seen & SEEN_LITERAL) {
433 		/* basr %r13,0 */
434 		EMIT2(0x0d00, REG_L, REG_0);
435 		jit->base_ip = jit->prg;
436 	}
437 	/* Setup stack and backchain */
438 	if (jit->seen & SEEN_STACK) {
439 		if (jit->seen & SEEN_FUNC)
440 			/* lgr %w1,%r15 (backchain) */
441 			EMIT4(0xb9040000, REG_W1, REG_15);
442 		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
443 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
444 		/* aghi %r15,-STK_OFF */
445 		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
446 		if (jit->seen & SEEN_FUNC)
447 			/* stg %w1,152(%r15) (backchain) */
448 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
449 				      REG_15, 152);
450 	}
451 	if (jit->seen & SEEN_SKB)
452 		emit_load_skb_data_hlen(jit);
453 	if (jit->seen & SEEN_SKB_CHANGE)
454 		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
455 		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
456 			      STK_OFF_SKBP);
457 }
458 
459 /*
460  * Function epilogue
461  */
462 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
463 {
464 	/* Return 0 */
465 	if (jit->seen & SEEN_RET0) {
466 		jit->ret0_ip = jit->prg;
467 		/* lghi %b0,0 */
468 		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
469 	}
470 	jit->exit_ip = jit->prg;
471 	/* Load exit code: lgr %r2,%b0 */
472 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
473 	/* Restore registers */
474 	save_restore_regs(jit, REGS_RESTORE, stack_depth);
475 	/* br %r14 */
476 	_EMIT2(0x07fe);
477 }
478 
479 /*
480  * Compile one eBPF instruction into s390x code
481  *
482  * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
483  * stack space for the large switch statement.
484  */
485 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
486 {
487 	struct bpf_insn *insn = &fp->insnsi[i];
488 	int jmp_off, last, insn_count = 1;
489 	unsigned int func_addr, mask;
490 	u32 dst_reg = insn->dst_reg;
491 	u32 src_reg = insn->src_reg;
492 	u32 *addrs = jit->addrs;
493 	s32 imm = insn->imm;
494 	s16 off = insn->off;
495 
496 	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
497 		jit->seen |= SEEN_REG_AX;
498 	switch (insn->code) {
499 	/*
500 	 * BPF_MOV
501 	 */
502 	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
503 		/* llgfr %dst,%src */
504 		EMIT4(0xb9160000, dst_reg, src_reg);
505 		break;
506 	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
507 		/* lgr %dst,%src */
508 		EMIT4(0xb9040000, dst_reg, src_reg);
509 		break;
510 	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
511 		/* llilf %dst,imm */
512 		EMIT6_IMM(0xc00f0000, dst_reg, imm);
513 		break;
514 	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
515 		/* lgfi %dst,imm */
516 		EMIT6_IMM(0xc0010000, dst_reg, imm);
517 		break;
518 	/*
519 	 * BPF_LD 64
520 	 */
521 	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
522 	{
523 		/* 16 byte instruction that uses two 'struct bpf_insn' */
524 		u64 imm64;
525 
526 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
527 		/* lg %dst,<d(imm)>(%l) */
528 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
529 			      EMIT_CONST_U64(imm64));
530 		insn_count = 2;
531 		break;
532 	}
533 	/*
534 	 * BPF_ADD
535 	 */
536 	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
537 		/* ar %dst,%src */
538 		EMIT2(0x1a00, dst_reg, src_reg);
539 		EMIT_ZERO(dst_reg);
540 		break;
541 	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
542 		/* agr %dst,%src */
543 		EMIT4(0xb9080000, dst_reg, src_reg);
544 		break;
545 	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
546 		if (!imm)
547 			break;
548 		/* alfi %dst,imm */
549 		EMIT6_IMM(0xc20b0000, dst_reg, imm);
550 		EMIT_ZERO(dst_reg);
551 		break;
552 	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
553 		if (!imm)
554 			break;
555 		/* agfi %dst,imm */
556 		EMIT6_IMM(0xc2080000, dst_reg, imm);
557 		break;
558 	/*
559 	 * BPF_SUB
560 	 */
561 	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
562 		/* sr %dst,%src */
563 		EMIT2(0x1b00, dst_reg, src_reg);
564 		EMIT_ZERO(dst_reg);
565 		break;
566 	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
567 		/* sgr %dst,%src */
568 		EMIT4(0xb9090000, dst_reg, src_reg);
569 		break;
570 	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
571 		if (!imm)
572 			break;
573 		/* alfi %dst,-imm */
574 		EMIT6_IMM(0xc20b0000, dst_reg, -imm);
575 		EMIT_ZERO(dst_reg);
576 		break;
577 	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
578 		if (!imm)
579 			break;
580 		/* agfi %dst,-imm */
581 		EMIT6_IMM(0xc2080000, dst_reg, -imm);
582 		break;
583 	/*
584 	 * BPF_MUL
585 	 */
586 	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
587 		/* msr %dst,%src */
588 		EMIT4(0xb2520000, dst_reg, src_reg);
589 		EMIT_ZERO(dst_reg);
590 		break;
591 	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
592 		/* msgr %dst,%src */
593 		EMIT4(0xb90c0000, dst_reg, src_reg);
594 		break;
595 	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
596 		if (imm == 1)
597 			break;
598 		/* msfi %r5,imm */
599 		EMIT6_IMM(0xc2010000, dst_reg, imm);
600 		EMIT_ZERO(dst_reg);
601 		break;
602 	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
603 		if (imm == 1)
604 			break;
605 		/* msgfi %dst,imm */
606 		EMIT6_IMM(0xc2000000, dst_reg, imm);
607 		break;
608 	/*
609 	 * BPF_DIV / BPF_MOD
610 	 */
611 	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
612 	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
613 	{
614 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
615 
616 		jit->seen |= SEEN_RET0;
617 		/* ltr %src,%src (if src == 0 goto fail) */
618 		EMIT2(0x1200, src_reg, src_reg);
619 		/* jz <ret0> */
620 		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
621 		/* lhi %w0,0 */
622 		EMIT4_IMM(0xa7080000, REG_W0, 0);
623 		/* lr %w1,%dst */
624 		EMIT2(0x1800, REG_W1, dst_reg);
625 		/* dlr %w0,%src */
626 		EMIT4(0xb9970000, REG_W0, src_reg);
627 		/* llgfr %dst,%rc */
628 		EMIT4(0xb9160000, dst_reg, rc_reg);
629 		break;
630 	}
631 	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
632 	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
633 	{
634 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
635 
636 		jit->seen |= SEEN_RET0;
637 		/* ltgr %src,%src (if src == 0 goto fail) */
638 		EMIT4(0xb9020000, src_reg, src_reg);
639 		/* jz <ret0> */
640 		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
641 		/* lghi %w0,0 */
642 		EMIT4_IMM(0xa7090000, REG_W0, 0);
643 		/* lgr %w1,%dst */
644 		EMIT4(0xb9040000, REG_W1, dst_reg);
645 		/* dlgr %w0,%dst */
646 		EMIT4(0xb9870000, REG_W0, src_reg);
647 		/* lgr %dst,%rc */
648 		EMIT4(0xb9040000, dst_reg, rc_reg);
649 		break;
650 	}
651 	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
652 	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
653 	{
654 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
655 
656 		if (imm == 1) {
657 			if (BPF_OP(insn->code) == BPF_MOD)
658 				/* lhgi %dst,0 */
659 				EMIT4_IMM(0xa7090000, dst_reg, 0);
660 			break;
661 		}
662 		/* lhi %w0,0 */
663 		EMIT4_IMM(0xa7080000, REG_W0, 0);
664 		/* lr %w1,%dst */
665 		EMIT2(0x1800, REG_W1, dst_reg);
666 		/* dl %w0,<d(imm)>(%l) */
667 		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
668 			      EMIT_CONST_U32(imm));
669 		/* llgfr %dst,%rc */
670 		EMIT4(0xb9160000, dst_reg, rc_reg);
671 		break;
672 	}
673 	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
674 	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
675 	{
676 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
677 
678 		if (imm == 1) {
679 			if (BPF_OP(insn->code) == BPF_MOD)
680 				/* lhgi %dst,0 */
681 				EMIT4_IMM(0xa7090000, dst_reg, 0);
682 			break;
683 		}
684 		/* lghi %w0,0 */
685 		EMIT4_IMM(0xa7090000, REG_W0, 0);
686 		/* lgr %w1,%dst */
687 		EMIT4(0xb9040000, REG_W1, dst_reg);
688 		/* dlg %w0,<d(imm)>(%l) */
689 		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
690 			      EMIT_CONST_U64(imm));
691 		/* lgr %dst,%rc */
692 		EMIT4(0xb9040000, dst_reg, rc_reg);
693 		break;
694 	}
695 	/*
696 	 * BPF_AND
697 	 */
698 	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
699 		/* nr %dst,%src */
700 		EMIT2(0x1400, dst_reg, src_reg);
701 		EMIT_ZERO(dst_reg);
702 		break;
703 	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
704 		/* ngr %dst,%src */
705 		EMIT4(0xb9800000, dst_reg, src_reg);
706 		break;
707 	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
708 		/* nilf %dst,imm */
709 		EMIT6_IMM(0xc00b0000, dst_reg, imm);
710 		EMIT_ZERO(dst_reg);
711 		break;
712 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
713 		/* ng %dst,<d(imm)>(%l) */
714 		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
715 			      EMIT_CONST_U64(imm));
716 		break;
717 	/*
718 	 * BPF_OR
719 	 */
720 	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
721 		/* or %dst,%src */
722 		EMIT2(0x1600, dst_reg, src_reg);
723 		EMIT_ZERO(dst_reg);
724 		break;
725 	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
726 		/* ogr %dst,%src */
727 		EMIT4(0xb9810000, dst_reg, src_reg);
728 		break;
729 	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
730 		/* oilf %dst,imm */
731 		EMIT6_IMM(0xc00d0000, dst_reg, imm);
732 		EMIT_ZERO(dst_reg);
733 		break;
734 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
735 		/* og %dst,<d(imm)>(%l) */
736 		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
737 			      EMIT_CONST_U64(imm));
738 		break;
739 	/*
740 	 * BPF_XOR
741 	 */
742 	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
743 		/* xr %dst,%src */
744 		EMIT2(0x1700, dst_reg, src_reg);
745 		EMIT_ZERO(dst_reg);
746 		break;
747 	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
748 		/* xgr %dst,%src */
749 		EMIT4(0xb9820000, dst_reg, src_reg);
750 		break;
751 	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
752 		if (!imm)
753 			break;
754 		/* xilf %dst,imm */
755 		EMIT6_IMM(0xc0070000, dst_reg, imm);
756 		EMIT_ZERO(dst_reg);
757 		break;
758 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
759 		/* xg %dst,<d(imm)>(%l) */
760 		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
761 			      EMIT_CONST_U64(imm));
762 		break;
763 	/*
764 	 * BPF_LSH
765 	 */
766 	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
767 		/* sll %dst,0(%src) */
768 		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
769 		EMIT_ZERO(dst_reg);
770 		break;
771 	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
772 		/* sllg %dst,%dst,0(%src) */
773 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
774 		break;
775 	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
776 		if (imm == 0)
777 			break;
778 		/* sll %dst,imm(%r0) */
779 		EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
780 		EMIT_ZERO(dst_reg);
781 		break;
782 	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
783 		if (imm == 0)
784 			break;
785 		/* sllg %dst,%dst,imm(%r0) */
786 		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
787 		break;
788 	/*
789 	 * BPF_RSH
790 	 */
791 	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
792 		/* srl %dst,0(%src) */
793 		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
794 		EMIT_ZERO(dst_reg);
795 		break;
796 	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
797 		/* srlg %dst,%dst,0(%src) */
798 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
799 		break;
800 	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
801 		if (imm == 0)
802 			break;
803 		/* srl %dst,imm(%r0) */
804 		EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
805 		EMIT_ZERO(dst_reg);
806 		break;
807 	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
808 		if (imm == 0)
809 			break;
810 		/* srlg %dst,%dst,imm(%r0) */
811 		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
812 		break;
813 	/*
814 	 * BPF_ARSH
815 	 */
816 	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
817 		/* srag %dst,%dst,0(%src) */
818 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
819 		break;
820 	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
821 		if (imm == 0)
822 			break;
823 		/* srag %dst,%dst,imm(%r0) */
824 		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
825 		break;
826 	/*
827 	 * BPF_NEG
828 	 */
829 	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
830 		/* lcr %dst,%dst */
831 		EMIT2(0x1300, dst_reg, dst_reg);
832 		EMIT_ZERO(dst_reg);
833 		break;
834 	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
835 		/* lcgr %dst,%dst */
836 		EMIT4(0xb9130000, dst_reg, dst_reg);
837 		break;
838 	/*
839 	 * BPF_FROM_BE/LE
840 	 */
841 	case BPF_ALU | BPF_END | BPF_FROM_BE:
842 		/* s390 is big endian, therefore only clear high order bytes */
843 		switch (imm) {
844 		case 16: /* dst = (u16) cpu_to_be16(dst) */
845 			/* llghr %dst,%dst */
846 			EMIT4(0xb9850000, dst_reg, dst_reg);
847 			break;
848 		case 32: /* dst = (u32) cpu_to_be32(dst) */
849 			/* llgfr %dst,%dst */
850 			EMIT4(0xb9160000, dst_reg, dst_reg);
851 			break;
852 		case 64: /* dst = (u64) cpu_to_be64(dst) */
853 			break;
854 		}
855 		break;
856 	case BPF_ALU | BPF_END | BPF_FROM_LE:
857 		switch (imm) {
858 		case 16: /* dst = (u16) cpu_to_le16(dst) */
859 			/* lrvr %dst,%dst */
860 			EMIT4(0xb91f0000, dst_reg, dst_reg);
861 			/* srl %dst,16(%r0) */
862 			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
863 			/* llghr %dst,%dst */
864 			EMIT4(0xb9850000, dst_reg, dst_reg);
865 			break;
866 		case 32: /* dst = (u32) cpu_to_le32(dst) */
867 			/* lrvr %dst,%dst */
868 			EMIT4(0xb91f0000, dst_reg, dst_reg);
869 			/* llgfr %dst,%dst */
870 			EMIT4(0xb9160000, dst_reg, dst_reg);
871 			break;
872 		case 64: /* dst = (u64) cpu_to_le64(dst) */
873 			/* lrvgr %dst,%dst */
874 			EMIT4(0xb90f0000, dst_reg, dst_reg);
875 			break;
876 		}
877 		break;
878 	/*
879 	 * BPF_ST(X)
880 	 */
881 	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
882 		/* stcy %src,off(%dst) */
883 		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
884 		jit->seen |= SEEN_MEM;
885 		break;
886 	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
887 		/* sthy %src,off(%dst) */
888 		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
889 		jit->seen |= SEEN_MEM;
890 		break;
891 	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
892 		/* sty %src,off(%dst) */
893 		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
894 		jit->seen |= SEEN_MEM;
895 		break;
896 	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
897 		/* stg %src,off(%dst) */
898 		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
899 		jit->seen |= SEEN_MEM;
900 		break;
901 	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
902 		/* lhi %w0,imm */
903 		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
904 		/* stcy %w0,off(dst) */
905 		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
906 		jit->seen |= SEEN_MEM;
907 		break;
908 	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
909 		/* lhi %w0,imm */
910 		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
911 		/* sthy %w0,off(dst) */
912 		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
913 		jit->seen |= SEEN_MEM;
914 		break;
915 	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
916 		/* llilf %w0,imm  */
917 		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
918 		/* sty %w0,off(%dst) */
919 		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
920 		jit->seen |= SEEN_MEM;
921 		break;
922 	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
923 		/* lgfi %w0,imm */
924 		EMIT6_IMM(0xc0010000, REG_W0, imm);
925 		/* stg %w0,off(%dst) */
926 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
927 		jit->seen |= SEEN_MEM;
928 		break;
929 	/*
930 	 * BPF_STX XADD (atomic_add)
931 	 */
932 	case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
933 		/* laal %w0,%src,off(%dst) */
934 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
935 			      dst_reg, off);
936 		jit->seen |= SEEN_MEM;
937 		break;
938 	case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
939 		/* laalg %w0,%src,off(%dst) */
940 		EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
941 			      dst_reg, off);
942 		jit->seen |= SEEN_MEM;
943 		break;
944 	/*
945 	 * BPF_LDX
946 	 */
947 	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
948 		/* llgc %dst,0(off,%src) */
949 		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
950 		jit->seen |= SEEN_MEM;
951 		break;
952 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
953 		/* llgh %dst,0(off,%src) */
954 		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
955 		jit->seen |= SEEN_MEM;
956 		break;
957 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
958 		/* llgf %dst,off(%src) */
959 		jit->seen |= SEEN_MEM;
960 		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
961 		break;
962 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
963 		/* lg %dst,0(off,%src) */
964 		jit->seen |= SEEN_MEM;
965 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
966 		break;
967 	/*
968 	 * BPF_JMP / CALL
969 	 */
970 	case BPF_JMP | BPF_CALL:
971 	{
972 		/*
973 		 * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
974 		 */
975 		const u64 func = (u64)__bpf_call_base + imm;
976 
977 		REG_SET_SEEN(BPF_REG_5);
978 		jit->seen |= SEEN_FUNC;
979 		/* lg %w1,<d(imm)>(%l) */
980 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
981 			      EMIT_CONST_U64(func));
982 		/* basr %r14,%w1 */
983 		EMIT2(0x0d00, REG_14, REG_W1);
984 		/* lgr %b0,%r2: load return value into %b0 */
985 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
986 		if (bpf_helper_changes_pkt_data((void *)func)) {
987 			jit->seen |= SEEN_SKB_CHANGE;
988 			/* lg %b1,ST_OFF_SKBP(%r15) */
989 			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
990 				      REG_15, STK_OFF_SKBP);
991 			emit_load_skb_data_hlen(jit);
992 		}
993 		break;
994 	}
995 	case BPF_JMP | BPF_TAIL_CALL:
996 		/*
997 		 * Implicit input:
998 		 *  B1: pointer to ctx
999 		 *  B2: pointer to bpf_array
1000 		 *  B3: index in bpf_array
1001 		 */
1002 		jit->seen |= SEEN_TAIL_CALL;
1003 
1004 		/*
1005 		 * if (index >= array->map.max_entries)
1006 		 *         goto out;
1007 		 */
1008 
1009 		/* llgf %w1,map.max_entries(%b2) */
1010 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
1011 			      offsetof(struct bpf_array, map.max_entries));
1012 		/* clgrj %b3,%w1,0xa,label0: if %b3 >= %w1 goto out */
1013 		EMIT6_PCREL_LABEL(0xec000000, 0x0065, BPF_REG_3,
1014 				  REG_W1, 0, 0xa);
1015 
1016 		/*
1017 		 * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
1018 		 *         goto out;
1019 		 */
1020 
1021 		if (jit->seen & SEEN_STACK)
1022 			off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
1023 		else
1024 			off = STK_OFF_TCCNT;
1025 		/* lhi %w0,1 */
1026 		EMIT4_IMM(0xa7080000, REG_W0, 1);
1027 		/* laal %w1,%w0,off(%r15) */
1028 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
1029 		/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
1030 		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
1031 				      MAX_TAIL_CALL_CNT, 0, 0x2);
1032 
1033 		/*
1034 		 * prog = array->ptrs[index];
1035 		 * if (prog == NULL)
1036 		 *         goto out;
1037 		 */
1038 
1039 		/* sllg %r1,%b3,3: %r1 = index * 8 */
1040 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, BPF_REG_3, REG_0, 3);
1041 		/* lg %r1,prog(%b2,%r1) */
1042 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
1043 			      REG_1, offsetof(struct bpf_array, ptrs));
1044 		/* clgij %r1,0,0x8,label0 */
1045 		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
1046 
1047 		/*
1048 		 * Restore registers before calling function
1049 		 */
1050 		save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
1051 
1052 		/*
1053 		 * goto *(prog->bpf_func + tail_call_start);
1054 		 */
1055 
1056 		/* lg %r1,bpf_func(%r1) */
1057 		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
1058 			      offsetof(struct bpf_prog, bpf_func));
1059 		/* bc 0xf,tail_call_start(%r1) */
1060 		_EMIT4(0x47f01000 + jit->tail_call_start);
1061 		/* out: */
1062 		jit->labels[0] = jit->prg;
1063 		break;
1064 	case BPF_JMP | BPF_EXIT: /* return b0 */
1065 		last = (i == fp->len - 1) ? 1 : 0;
1066 		if (last && !(jit->seen & SEEN_RET0))
1067 			break;
1068 		/* j <exit> */
1069 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
1070 		break;
1071 	/*
1072 	 * Branch relative (number of skipped instructions) to offset on
1073 	 * condition.
1074 	 *
1075 	 * Condition code to mask mapping:
1076 	 *
1077 	 * CC | Description	   | Mask
1078 	 * ------------------------------
1079 	 * 0  | Operands equal	   |	8
1080 	 * 1  | First operand low  |	4
1081 	 * 2  | First operand high |	2
1082 	 * 3  | Unused		   |	1
1083 	 *
1084 	 * For s390x relative branches: ip = ip + off_bytes
1085 	 * For BPF relative branches:	insn = insn + off_insns + 1
1086 	 *
1087 	 * For example for s390x with offset 0 we jump to the branch
1088 	 * instruction itself (loop) and for BPF with offset 0 we
1089 	 * branch to the instruction behind the branch.
1090 	 */
1091 	case BPF_JMP | BPF_JA: /* if (true) */
1092 		mask = 0xf000; /* j */
1093 		goto branch_oc;
1094 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
1095 		mask = 0x2000; /* jh */
1096 		goto branch_ks;
1097 	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
1098 		mask = 0x4000; /* jl */
1099 		goto branch_ks;
1100 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
1101 		mask = 0xa000; /* jhe */
1102 		goto branch_ks;
1103 	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
1104 		mask = 0xc000; /* jle */
1105 		goto branch_ks;
1106 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
1107 		mask = 0x2000; /* jh */
1108 		goto branch_ku;
1109 	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
1110 		mask = 0x4000; /* jl */
1111 		goto branch_ku;
1112 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
1113 		mask = 0xa000; /* jhe */
1114 		goto branch_ku;
1115 	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
1116 		mask = 0xc000; /* jle */
1117 		goto branch_ku;
1118 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
1119 		mask = 0x7000; /* jne */
1120 		goto branch_ku;
1121 	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
1122 		mask = 0x8000; /* je */
1123 		goto branch_ku;
1124 	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
1125 		mask = 0x7000; /* jnz */
1126 		/* lgfi %w1,imm (load sign extend imm) */
1127 		EMIT6_IMM(0xc0010000, REG_W1, imm);
1128 		/* ngr %w1,%dst */
1129 		EMIT4(0xb9800000, REG_W1, dst_reg);
1130 		goto branch_oc;
1131 
1132 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
1133 		mask = 0x2000; /* jh */
1134 		goto branch_xs;
1135 	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
1136 		mask = 0x4000; /* jl */
1137 		goto branch_xs;
1138 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
1139 		mask = 0xa000; /* jhe */
1140 		goto branch_xs;
1141 	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
1142 		mask = 0xc000; /* jle */
1143 		goto branch_xs;
1144 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
1145 		mask = 0x2000; /* jh */
1146 		goto branch_xu;
1147 	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
1148 		mask = 0x4000; /* jl */
1149 		goto branch_xu;
1150 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
1151 		mask = 0xa000; /* jhe */
1152 		goto branch_xu;
1153 	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
1154 		mask = 0xc000; /* jle */
1155 		goto branch_xu;
1156 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
1157 		mask = 0x7000; /* jne */
1158 		goto branch_xu;
1159 	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
1160 		mask = 0x8000; /* je */
1161 		goto branch_xu;
1162 	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
1163 		mask = 0x7000; /* jnz */
1164 		/* ngrk %w1,%dst,%src */
1165 		EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
1166 		goto branch_oc;
1167 branch_ks:
1168 		/* lgfi %w1,imm (load sign extend imm) */
1169 		EMIT6_IMM(0xc0010000, REG_W1, imm);
1170 		/* cgrj %dst,%w1,mask,off */
1171 		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
1172 		break;
1173 branch_ku:
1174 		/* lgfi %w1,imm (load sign extend imm) */
1175 		EMIT6_IMM(0xc0010000, REG_W1, imm);
1176 		/* clgrj %dst,%w1,mask,off */
1177 		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
1178 		break;
1179 branch_xs:
1180 		/* cgrj %dst,%src,mask,off */
1181 		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
1182 		break;
1183 branch_xu:
1184 		/* clgrj %dst,%src,mask,off */
1185 		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
1186 		break;
1187 branch_oc:
1188 		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
1189 		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
1190 		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
1191 		break;
1192 	/*
1193 	 * BPF_LD
1194 	 */
1195 	case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
1196 	case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
1197 		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
1198 			func_addr = __pa(sk_load_byte_pos);
1199 		else
1200 			func_addr = __pa(sk_load_byte);
1201 		goto call_fn;
1202 	case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
1203 	case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
1204 		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
1205 			func_addr = __pa(sk_load_half_pos);
1206 		else
1207 			func_addr = __pa(sk_load_half);
1208 		goto call_fn;
1209 	case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
1210 	case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
1211 		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
1212 			func_addr = __pa(sk_load_word_pos);
1213 		else
1214 			func_addr = __pa(sk_load_word);
1215 		goto call_fn;
1216 call_fn:
1217 		jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
1218 		REG_SET_SEEN(REG_14); /* Return address of possible func call */
1219 
1220 		/*
1221 		 * Implicit input:
1222 		 *  BPF_REG_6	 (R7) : skb pointer
1223 		 *  REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
1224 		 *
1225 		 * Calculated input:
1226 		 *  BPF_REG_2	 (R3) : offset of byte(s) to fetch in skb
1227 		 *  BPF_REG_5	 (R6) : return address
1228 		 *
1229 		 * Output:
1230 		 *  BPF_REG_0	 (R14): data read from skb
1231 		 *
1232 		 * Scratch registers (BPF_REG_1-5)
1233 		 */
1234 
1235 		/* Call function: llilf %w1,func_addr  */
1236 		EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
1237 
1238 		/* Offset: lgfi %b2,imm */
1239 		EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
1240 		if (BPF_MODE(insn->code) == BPF_IND)
1241 			/* agfr %b2,%src (%src is s32 here) */
1242 			EMIT4(0xb9180000, BPF_REG_2, src_reg);
1243 
1244 		/* Reload REG_SKB_DATA if BPF_REG_AX is used */
1245 		if (jit->seen & SEEN_REG_AX)
1246 			/* lg %skb_data,data_off(%b6) */
1247 			EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
1248 				      BPF_REG_6, offsetof(struct sk_buff, data));
1249 		/* basr %b5,%w1 (%b5 is call saved) */
1250 		EMIT2(0x0d00, BPF_REG_5, REG_W1);
1251 
1252 		/*
1253 		 * Note: For fast access we jump directly after the
1254 		 * jnz instruction from bpf_jit.S
1255 		 */
1256 		/* jnz <ret0> */
1257 		EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
1258 		break;
1259 	default: /* too complex, give up */
1260 		pr_err("Unknown opcode %02x\n", insn->code);
1261 		return -1;
1262 	}
1263 	return insn_count;
1264 }
1265 
1266 /*
1267  * Compile eBPF program into s390x code
1268  */
1269 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
1270 {
1271 	int i, insn_count;
1272 
1273 	jit->lit = jit->lit_start;
1274 	jit->prg = 0;
1275 
1276 	bpf_jit_prologue(jit, fp->aux->stack_depth);
1277 	for (i = 0; i < fp->len; i += insn_count) {
1278 		insn_count = bpf_jit_insn(jit, fp, i);
1279 		if (insn_count < 0)
1280 			return -1;
1281 		/* Next instruction address */
1282 		jit->addrs[i + insn_count] = jit->prg;
1283 	}
1284 	bpf_jit_epilogue(jit, fp->aux->stack_depth);
1285 
1286 	jit->lit_start = jit->prg;
1287 	jit->size = jit->lit;
1288 	jit->size_prg = jit->prg;
1289 	return 0;
1290 }
1291 
1292 /*
1293  * Compile eBPF program "fp"
1294  */
1295 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1296 {
1297 	struct bpf_prog *tmp, *orig_fp = fp;
1298 	struct bpf_binary_header *header;
1299 	bool tmp_blinded = false;
1300 	struct bpf_jit jit;
1301 	int pass;
1302 
1303 	if (!bpf_jit_enable)
1304 		return orig_fp;
1305 
1306 	tmp = bpf_jit_blind_constants(fp);
1307 	/*
1308 	 * If blinding was requested and we failed during blinding,
1309 	 * we must fall back to the interpreter.
1310 	 */
1311 	if (IS_ERR(tmp))
1312 		return orig_fp;
1313 	if (tmp != fp) {
1314 		tmp_blinded = true;
1315 		fp = tmp;
1316 	}
1317 
1318 	memset(&jit, 0, sizeof(jit));
1319 	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
1320 	if (jit.addrs == NULL) {
1321 		fp = orig_fp;
1322 		goto out;
1323 	}
1324 	/*
1325 	 * Three initial passes:
1326 	 *   - 1/2: Determine clobbered registers
1327 	 *   - 3:   Calculate program size and addrs arrray
1328 	 */
1329 	for (pass = 1; pass <= 3; pass++) {
1330 		if (bpf_jit_prog(&jit, fp)) {
1331 			fp = orig_fp;
1332 			goto free_addrs;
1333 		}
1334 	}
1335 	/*
1336 	 * Final pass: Allocate and generate program
1337 	 */
1338 	if (jit.size >= BPF_SIZE_MAX) {
1339 		fp = orig_fp;
1340 		goto free_addrs;
1341 	}
1342 	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
1343 	if (!header) {
1344 		fp = orig_fp;
1345 		goto free_addrs;
1346 	}
1347 	if (bpf_jit_prog(&jit, fp)) {
1348 		fp = orig_fp;
1349 		goto free_addrs;
1350 	}
1351 	if (bpf_jit_enable > 1) {
1352 		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
1353 		print_fn_code(jit.prg_buf, jit.size_prg);
1354 	}
1355 	bpf_jit_binary_lock_ro(header);
1356 	fp->bpf_func = (void *) jit.prg_buf;
1357 	fp->jited = 1;
1358 	fp->jited_len = jit.size;
1359 free_addrs:
1360 	kfree(jit.addrs);
1361 out:
1362 	if (tmp_blinded)
1363 		bpf_jit_prog_release_other(fp, fp == orig_fp ?
1364 					   tmp : orig_fp);
1365 	return fp;
1366 }
1367