xref: /openbmc/linux/arch/s390/net/bpf_jit_comp.c (revision d60c7ab6fad75d753f866f2fc87725196645e0f3)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * BPF Jit compiler for s390.
4   *
5   * Minimum build requirements:
6   *
7   *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
8   *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
9   *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
10   *  - 64BIT
11   *
12   * Copyright IBM Corp. 2012,2015
13   *
14   * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
15   *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
16   */
17  
18  #define KMSG_COMPONENT "bpf_jit"
19  #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20  
21  #include <linux/netdevice.h>
22  #include <linux/filter.h>
23  #include <linux/init.h>
24  #include <linux/bpf.h>
25  #include <linux/mm.h>
26  #include <linux/kernel.h>
27  #include <asm/cacheflush.h>
28  #include <asm/extable.h>
29  #include <asm/dis.h>
30  #include <asm/facility.h>
31  #include <asm/nospec-branch.h>
32  #include <asm/set_memory.h>
33  #include <asm/text-patching.h>
34  #include "bpf_jit.h"
35  
36  struct bpf_jit {
37  	u32 seen;		/* Flags to remember seen eBPF instructions */
38  	u32 seen_reg[16];	/* Array to remember which registers are used */
39  	u32 *addrs;		/* Array with relative instruction addresses */
40  	u8 *prg_buf;		/* Start of program */
41  	int size;		/* Size of program and literal pool */
42  	int size_prg;		/* Size of program */
43  	int prg;		/* Current position in program */
44  	int lit32_start;	/* Start of 32-bit literal pool */
45  	int lit32;		/* Current position in 32-bit literal pool */
46  	int lit64_start;	/* Start of 64-bit literal pool */
47  	int lit64;		/* Current position in 64-bit literal pool */
48  	int base_ip;		/* Base address for literal pool */
49  	int exit_ip;		/* Address of exit */
50  	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
51  	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
52  	int tail_call_start;	/* Tail call start offset */
53  	int excnt;		/* Number of exception table entries */
54  	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
55  	int prologue_plt;	/* Start of prologue hotpatch PLT */
56  };
57  
58  #define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
59  #define SEEN_LITERAL	BIT(1)		/* code uses literals */
60  #define SEEN_FUNC	BIT(2)		/* calls C functions */
61  #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
62  
63  /*
64   * s390 registers
65   */
66  #define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
67  #define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
68  #define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
69  #define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
70  #define REG_0		REG_W0			/* Register 0 */
71  #define REG_1		REG_W1			/* Register 1 */
72  #define REG_2		BPF_REG_1		/* Register 2 */
73  #define REG_3		BPF_REG_2		/* Register 3 */
74  #define REG_4		BPF_REG_3		/* Register 4 */
75  #define REG_7		BPF_REG_6		/* Register 7 */
76  #define REG_8		BPF_REG_7		/* Register 8 */
77  #define REG_14		BPF_REG_0		/* Register 14 */
78  
79  /*
80   * Mapping of BPF registers to s390 registers
81   */
82  static const int reg2hex[] = {
83  	/* Return code */
84  	[BPF_REG_0]	= 14,
85  	/* Function parameters */
86  	[BPF_REG_1]	= 2,
87  	[BPF_REG_2]	= 3,
88  	[BPF_REG_3]	= 4,
89  	[BPF_REG_4]	= 5,
90  	[BPF_REG_5]	= 6,
91  	/* Call saved registers */
92  	[BPF_REG_6]	= 7,
93  	[BPF_REG_7]	= 8,
94  	[BPF_REG_8]	= 9,
95  	[BPF_REG_9]	= 10,
96  	/* BPF stack pointer */
97  	[BPF_REG_FP]	= 13,
98  	/* Register for blinding */
99  	[BPF_REG_AX]	= 12,
100  	/* Work registers for s390x backend */
101  	[REG_W0]	= 0,
102  	[REG_W1]	= 1,
103  	[REG_L]		= 11,
104  	[REG_15]	= 15,
105  };
106  
reg(u32 dst_reg,u32 src_reg)107  static inline u32 reg(u32 dst_reg, u32 src_reg)
108  {
109  	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
110  }
111  
reg_high(u32 reg)112  static inline u32 reg_high(u32 reg)
113  {
114  	return reg2hex[reg] << 4;
115  }
116  
reg_set_seen(struct bpf_jit * jit,u32 b1)117  static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
118  {
119  	u32 r1 = reg2hex[b1];
120  
121  	if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
122  		jit->seen_reg[r1] = 1;
123  }
124  
125  #define REG_SET_SEEN(b1)					\
126  ({								\
127  	reg_set_seen(jit, b1);					\
128  })
129  
130  #define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
131  
132  /*
133   * EMIT macros for code generation
134   */
135  
136  #define _EMIT2(op)						\
137  ({								\
138  	if (jit->prg_buf)					\
139  		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
140  	jit->prg += 2;						\
141  })
142  
143  #define EMIT2(op, b1, b2)					\
144  ({								\
145  	_EMIT2((op) | reg(b1, b2));				\
146  	REG_SET_SEEN(b1);					\
147  	REG_SET_SEEN(b2);					\
148  })
149  
150  #define _EMIT4(op)						\
151  ({								\
152  	if (jit->prg_buf)					\
153  		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
154  	jit->prg += 4;						\
155  })
156  
157  #define EMIT4(op, b1, b2)					\
158  ({								\
159  	_EMIT4((op) | reg(b1, b2));				\
160  	REG_SET_SEEN(b1);					\
161  	REG_SET_SEEN(b2);					\
162  })
163  
164  #define EMIT4_RRF(op, b1, b2, b3)				\
165  ({								\
166  	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
167  	REG_SET_SEEN(b1);					\
168  	REG_SET_SEEN(b2);					\
169  	REG_SET_SEEN(b3);					\
170  })
171  
172  #define _EMIT4_DISP(op, disp)					\
173  ({								\
174  	unsigned int __disp = (disp) & 0xfff;			\
175  	_EMIT4((op) | __disp);					\
176  })
177  
178  #define EMIT4_DISP(op, b1, b2, disp)				\
179  ({								\
180  	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
181  		    reg_high(b2) << 8, (disp));			\
182  	REG_SET_SEEN(b1);					\
183  	REG_SET_SEEN(b2);					\
184  })
185  
186  #define EMIT4_IMM(op, b1, imm)					\
187  ({								\
188  	unsigned int __imm = (imm) & 0xffff;			\
189  	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
190  	REG_SET_SEEN(b1);					\
191  })
192  
193  #define EMIT4_PCREL(op, pcrel)					\
194  ({								\
195  	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
196  	_EMIT4((op) | __pcrel);					\
197  })
198  
199  #define EMIT4_PCREL_RIC(op, mask, target)			\
200  ({								\
201  	int __rel = ((target) - jit->prg) / 2;			\
202  	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
203  })
204  
205  #define _EMIT6(op1, op2)					\
206  ({								\
207  	if (jit->prg_buf) {					\
208  		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
209  		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
210  	}							\
211  	jit->prg += 6;						\
212  })
213  
214  #define _EMIT6_DISP(op1, op2, disp)				\
215  ({								\
216  	unsigned int __disp = (disp) & 0xfff;			\
217  	_EMIT6((op1) | __disp, op2);				\
218  })
219  
220  #define _EMIT6_DISP_LH(op1, op2, disp)				\
221  ({								\
222  	u32 _disp = (u32) (disp);				\
223  	unsigned int __disp_h = _disp & 0xff000;		\
224  	unsigned int __disp_l = _disp & 0x00fff;		\
225  	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
226  })
227  
228  #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
229  ({								\
230  	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
231  		       reg_high(b3) << 8, op2, disp);		\
232  	REG_SET_SEEN(b1);					\
233  	REG_SET_SEEN(b2);					\
234  	REG_SET_SEEN(b3);					\
235  })
236  
237  #define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
238  ({								\
239  	unsigned int rel = (int)((target) - jit->prg) / 2;	\
240  	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
241  	       (op2) | (mask) << 12);				\
242  	REG_SET_SEEN(b1);					\
243  	REG_SET_SEEN(b2);					\
244  })
245  
246  #define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
247  ({								\
248  	unsigned int rel = (int)((target) - jit->prg) / 2;	\
249  	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
250  		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
251  	REG_SET_SEEN(b1);					\
252  	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
253  })
254  
255  #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
256  ({								\
257  	int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2;	\
258  	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
259  	REG_SET_SEEN(b1);					\
260  	REG_SET_SEEN(b2);					\
261  })
262  
263  #define EMIT6_PCREL_RILB(op, b, target)				\
264  ({								\
265  	unsigned int rel = (int)((target) - jit->prg) / 2;	\
266  	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
267  	REG_SET_SEEN(b);					\
268  })
269  
270  #define EMIT6_PCREL_RIL(op, target)				\
271  ({								\
272  	unsigned int rel = (int)((target) - jit->prg) / 2;	\
273  	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
274  })
275  
276  #define EMIT6_PCREL_RILC(op, mask, target)			\
277  ({								\
278  	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
279  })
280  
281  #define _EMIT6_IMM(op, imm)					\
282  ({								\
283  	unsigned int __imm = (imm);				\
284  	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
285  })
286  
287  #define EMIT6_IMM(op, b1, imm)					\
288  ({								\
289  	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
290  	REG_SET_SEEN(b1);					\
291  })
292  
293  #define _EMIT_CONST_U32(val)					\
294  ({								\
295  	unsigned int ret;					\
296  	ret = jit->lit32;					\
297  	if (jit->prg_buf)					\
298  		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
299  	jit->lit32 += 4;					\
300  	ret;							\
301  })
302  
303  #define EMIT_CONST_U32(val)					\
304  ({								\
305  	jit->seen |= SEEN_LITERAL;				\
306  	_EMIT_CONST_U32(val) - jit->base_ip;			\
307  })
308  
309  #define _EMIT_CONST_U64(val)					\
310  ({								\
311  	unsigned int ret;					\
312  	ret = jit->lit64;					\
313  	if (jit->prg_buf)					\
314  		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
315  	jit->lit64 += 8;					\
316  	ret;							\
317  })
318  
319  #define EMIT_CONST_U64(val)					\
320  ({								\
321  	jit->seen |= SEEN_LITERAL;				\
322  	_EMIT_CONST_U64(val) - jit->base_ip;			\
323  })
324  
325  #define EMIT_ZERO(b1)						\
326  ({								\
327  	if (!fp->aux->verifier_zext) {				\
328  		/* llgfr %dst,%dst (zero extend to 64 bit) */	\
329  		EMIT4(0xb9160000, b1, b1);			\
330  		REG_SET_SEEN(b1);				\
331  	}							\
332  })
333  
334  /*
335   * Return whether this is the first pass. The first pass is special, since we
336   * don't know any sizes yet, and thus must be conservative.
337   */
is_first_pass(struct bpf_jit * jit)338  static bool is_first_pass(struct bpf_jit *jit)
339  {
340  	return jit->size == 0;
341  }
342  
343  /*
344   * Return whether this is the code generation pass. The code generation pass is
345   * special, since we should change as little as possible.
346   */
is_codegen_pass(struct bpf_jit * jit)347  static bool is_codegen_pass(struct bpf_jit *jit)
348  {
349  	return jit->prg_buf;
350  }
351  
352  /*
353   * Return whether "rel" can be encoded as a short PC-relative offset
354   */
is_valid_rel(int rel)355  static bool is_valid_rel(int rel)
356  {
357  	return rel >= -65536 && rel <= 65534;
358  }
359  
360  /*
361   * Return whether "off" can be reached using a short PC-relative offset
362   */
can_use_rel(struct bpf_jit * jit,int off)363  static bool can_use_rel(struct bpf_jit *jit, int off)
364  {
365  	return is_valid_rel(off - jit->prg);
366  }
367  
368  /*
369   * Return whether given displacement can be encoded using
370   * Long-Displacement Facility
371   */
is_valid_ldisp(int disp)372  static bool is_valid_ldisp(int disp)
373  {
374  	return disp >= -524288 && disp <= 524287;
375  }
376  
377  /*
378   * Return whether the next 32-bit literal pool entry can be referenced using
379   * Long-Displacement Facility
380   */
can_use_ldisp_for_lit32(struct bpf_jit * jit)381  static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
382  {
383  	return is_valid_ldisp(jit->lit32 - jit->base_ip);
384  }
385  
386  /*
387   * Return whether the next 64-bit literal pool entry can be referenced using
388   * Long-Displacement Facility
389   */
can_use_ldisp_for_lit64(struct bpf_jit * jit)390  static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
391  {
392  	return is_valid_ldisp(jit->lit64 - jit->base_ip);
393  }
394  
395  /*
396   * Fill whole space with illegal instructions
397   */
jit_fill_hole(void * area,unsigned int size)398  static void jit_fill_hole(void *area, unsigned int size)
399  {
400  	memset(area, 0, size);
401  }
402  
403  /*
404   * Save registers from "rs" (register start) to "re" (register end) on stack
405   */
save_regs(struct bpf_jit * jit,u32 rs,u32 re)406  static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
407  {
408  	u32 off = STK_OFF_R6 + (rs - 6) * 8;
409  
410  	if (rs == re)
411  		/* stg %rs,off(%r15) */
412  		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
413  	else
414  		/* stmg %rs,%re,off(%r15) */
415  		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
416  }
417  
418  /*
419   * Restore registers from "rs" (register start) to "re" (register end) on stack
420   */
restore_regs(struct bpf_jit * jit,u32 rs,u32 re,u32 stack_depth)421  static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
422  {
423  	u32 off = STK_OFF_R6 + (rs - 6) * 8;
424  
425  	if (jit->seen & SEEN_STACK)
426  		off += STK_OFF + stack_depth;
427  
428  	if (rs == re)
429  		/* lg %rs,off(%r15) */
430  		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
431  	else
432  		/* lmg %rs,%re,off(%r15) */
433  		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
434  }
435  
436  /*
437   * Return first seen register (from start)
438   */
get_start(struct bpf_jit * jit,int start)439  static int get_start(struct bpf_jit *jit, int start)
440  {
441  	int i;
442  
443  	for (i = start; i <= 15; i++) {
444  		if (jit->seen_reg[i])
445  			return i;
446  	}
447  	return 0;
448  }
449  
450  /*
451   * Return last seen register (from start) (gap >= 2)
452   */
get_end(struct bpf_jit * jit,int start)453  static int get_end(struct bpf_jit *jit, int start)
454  {
455  	int i;
456  
457  	for (i = start; i < 15; i++) {
458  		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
459  			return i - 1;
460  	}
461  	return jit->seen_reg[15] ? 15 : 14;
462  }
463  
464  #define REGS_SAVE	1
465  #define REGS_RESTORE	0
466  /*
467   * Save and restore clobbered registers (6-15) on stack.
468   * We save/restore registers in chunks with gap >= 2 registers.
469   */
save_restore_regs(struct bpf_jit * jit,int op,u32 stack_depth)470  static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
471  {
472  	const int last = 15, save_restore_size = 6;
473  	int re = 6, rs;
474  
475  	if (is_first_pass(jit)) {
476  		/*
477  		 * We don't know yet which registers are used. Reserve space
478  		 * conservatively.
479  		 */
480  		jit->prg += (last - re + 1) * save_restore_size;
481  		return;
482  	}
483  
484  	do {
485  		rs = get_start(jit, re);
486  		if (!rs)
487  			break;
488  		re = get_end(jit, rs + 1);
489  		if (op == REGS_SAVE)
490  			save_regs(jit, rs, re);
491  		else
492  			restore_regs(jit, rs, re, stack_depth);
493  		re++;
494  	} while (re <= last);
495  }
496  
bpf_skip(struct bpf_jit * jit,int size)497  static void bpf_skip(struct bpf_jit *jit, int size)
498  {
499  	if (size >= 6 && !is_valid_rel(size)) {
500  		/* brcl 0xf,size */
501  		EMIT6_PCREL_RIL(0xc0f4000000, size);
502  		size -= 6;
503  	} else if (size >= 4 && is_valid_rel(size)) {
504  		/* brc 0xf,size */
505  		EMIT4_PCREL(0xa7f40000, size);
506  		size -= 4;
507  	}
508  	while (size >= 2) {
509  		/* bcr 0,%0 */
510  		_EMIT2(0x0700);
511  		size -= 2;
512  	}
513  }
514  
515  /*
516   * PLT for hotpatchable calls. The calling convention is the same as for the
517   * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
518   */
519  struct bpf_plt {
520  	char code[16];
521  	void *ret;
522  	void *target;
523  } __packed;
524  extern const struct bpf_plt bpf_plt;
525  asm(
526  	".pushsection .rodata\n"
527  	"	.balign 8\n"
528  	"bpf_plt:\n"
529  	"	lgrl %r0,bpf_plt_ret\n"
530  	"	lgrl %r1,bpf_plt_target\n"
531  	"	br %r1\n"
532  	"	.balign 8\n"
533  	"bpf_plt_ret: .quad 0\n"
534  	"bpf_plt_target: .quad 0\n"
535  	"	.popsection\n"
536  );
537  
bpf_jit_plt(struct bpf_plt * plt,void * ret,void * target)538  static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
539  {
540  	memcpy(plt, &bpf_plt, sizeof(*plt));
541  	plt->ret = ret;
542  	plt->target = target;
543  }
544  
545  /*
546   * Emit function prologue
547   *
548   * Save registers and create stack frame if necessary.
549   * See stack frame layout description in "bpf_jit.h"!
550   */
bpf_jit_prologue(struct bpf_jit * jit,struct bpf_prog * fp,u32 stack_depth)551  static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
552  			     u32 stack_depth)
553  {
554  	/* No-op for hotpatching */
555  	/* brcl 0,prologue_plt */
556  	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
557  	jit->prologue_plt_ret = jit->prg;
558  
559  	if (fp->aux->func_idx == 0) {
560  		/* Initialize the tail call counter in the main program. */
561  		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
562  		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
563  	} else {
564  		/*
565  		 * Skip the tail call counter initialization in subprograms.
566  		 * Insert nops in order to have tail_call_start at a
567  		 * predictable offset.
568  		 */
569  		bpf_skip(jit, 6);
570  	}
571  	/* Tail calls have to skip above initialization */
572  	jit->tail_call_start = jit->prg;
573  	/* Save registers */
574  	save_restore_regs(jit, REGS_SAVE, stack_depth);
575  	/* Setup literal pool */
576  	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
577  		if (!is_first_pass(jit) &&
578  		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
579  			/* basr %l,0 */
580  			EMIT2(0x0d00, REG_L, REG_0);
581  			jit->base_ip = jit->prg;
582  		} else {
583  			/* larl %l,lit32_start */
584  			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
585  			jit->base_ip = jit->lit32_start;
586  		}
587  	}
588  	/* Setup stack and backchain */
589  	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
590  		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
591  			/* lgr %w1,%r15 (backchain) */
592  			EMIT4(0xb9040000, REG_W1, REG_15);
593  		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
594  		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
595  		/* aghi %r15,-STK_OFF */
596  		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
597  		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
598  			/* stg %w1,152(%r15) (backchain) */
599  			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
600  				      REG_15, 152);
601  	}
602  }
603  
604  /*
605   * Emit an expoline for a jump that follows
606   */
emit_expoline(struct bpf_jit * jit)607  static void emit_expoline(struct bpf_jit *jit)
608  {
609  	/* exrl %r0,.+10 */
610  	EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
611  	/* j . */
612  	EMIT4_PCREL(0xa7f40000, 0);
613  }
614  
615  /*
616   * Emit __s390_indirect_jump_r1 thunk if necessary
617   */
emit_r1_thunk(struct bpf_jit * jit)618  static void emit_r1_thunk(struct bpf_jit *jit)
619  {
620  	if (nospec_uses_trampoline()) {
621  		jit->r1_thunk_ip = jit->prg;
622  		emit_expoline(jit);
623  		/* br %r1 */
624  		_EMIT2(0x07f1);
625  	}
626  }
627  
628  /*
629   * Call r1 either directly or via __s390_indirect_jump_r1 thunk
630   */
call_r1(struct bpf_jit * jit)631  static void call_r1(struct bpf_jit *jit)
632  {
633  	if (nospec_uses_trampoline())
634  		/* brasl %r14,__s390_indirect_jump_r1 */
635  		EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
636  	else
637  		/* basr %r14,%r1 */
638  		EMIT2(0x0d00, REG_14, REG_1);
639  }
640  
641  /*
642   * Function epilogue
643   */
bpf_jit_epilogue(struct bpf_jit * jit,u32 stack_depth)644  static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
645  {
646  	jit->exit_ip = jit->prg;
647  	/* Load exit code: lgr %r2,%b0 */
648  	EMIT4(0xb9040000, REG_2, BPF_REG_0);
649  	/* Restore registers */
650  	save_restore_regs(jit, REGS_RESTORE, stack_depth);
651  	if (nospec_uses_trampoline()) {
652  		jit->r14_thunk_ip = jit->prg;
653  		/* Generate __s390_indirect_jump_r14 thunk */
654  		emit_expoline(jit);
655  	}
656  	/* br %r14 */
657  	_EMIT2(0x07fe);
658  
659  	if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
660  		emit_r1_thunk(jit);
661  
662  	jit->prg = ALIGN(jit->prg, 8);
663  	jit->prologue_plt = jit->prg;
664  	if (jit->prg_buf)
665  		bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
666  			    jit->prg_buf + jit->prologue_plt_ret, NULL);
667  	jit->prg += sizeof(struct bpf_plt);
668  }
669  
get_probe_mem_regno(const u8 * insn)670  static int get_probe_mem_regno(const u8 *insn)
671  {
672  	/*
673  	 * insn must point to llgc, llgh, llgf or lg, which have destination
674  	 * register at the same position.
675  	 */
676  	if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
677  		return -1;
678  	if (insn[5] != 0x90 && /* llgc */
679  	    insn[5] != 0x91 && /* llgh */
680  	    insn[5] != 0x16 && /* llgf */
681  	    insn[5] != 0x04) /* lg */
682  		return -1;
683  	return insn[1] >> 4;
684  }
685  
ex_handler_bpf(const struct exception_table_entry * x,struct pt_regs * regs)686  bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
687  {
688  	regs->psw.addr = extable_fixup(x);
689  	regs->gprs[x->data] = 0;
690  	return true;
691  }
692  
bpf_jit_probe_mem(struct bpf_jit * jit,struct bpf_prog * fp,int probe_prg,int nop_prg)693  static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
694  			     int probe_prg, int nop_prg)
695  {
696  	struct exception_table_entry *ex;
697  	int reg, prg;
698  	s64 delta;
699  	u8 *insn;
700  	int i;
701  
702  	if (!fp->aux->extable)
703  		/* Do nothing during early JIT passes. */
704  		return 0;
705  	insn = jit->prg_buf + probe_prg;
706  	reg = get_probe_mem_regno(insn);
707  	if (WARN_ON_ONCE(reg < 0))
708  		/* JIT bug - unexpected probe instruction. */
709  		return -1;
710  	if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
711  		/* JIT bug - gap between probe and nop instructions. */
712  		return -1;
713  	for (i = 0; i < 2; i++) {
714  		if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
715  			/* Verifier bug - not enough entries. */
716  			return -1;
717  		ex = &fp->aux->extable[jit->excnt];
718  		/* Add extable entries for probe and nop instructions. */
719  		prg = i == 0 ? probe_prg : nop_prg;
720  		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
721  		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
722  			/* JIT bug - code and extable must be close. */
723  			return -1;
724  		ex->insn = delta;
725  		/*
726  		 * Always land on the nop. Note that extable infrastructure
727  		 * ignores fixup field, it is handled by ex_handler_bpf().
728  		 */
729  		delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
730  		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
731  			/* JIT bug - landing pad and extable must be close. */
732  			return -1;
733  		ex->fixup = delta;
734  		ex->type = EX_TYPE_BPF;
735  		ex->data = reg;
736  		jit->excnt++;
737  	}
738  	return 0;
739  }
740  
741  /*
742   * Sign-extend the register if necessary
743   */
sign_extend(struct bpf_jit * jit,int r,u8 size,u8 flags)744  static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
745  {
746  	if (!(flags & BTF_FMODEL_SIGNED_ARG))
747  		return 0;
748  
749  	switch (size) {
750  	case 1:
751  		/* lgbr %r,%r */
752  		EMIT4(0xb9060000, r, r);
753  		return 0;
754  	case 2:
755  		/* lghr %r,%r */
756  		EMIT4(0xb9070000, r, r);
757  		return 0;
758  	case 4:
759  		/* lgfr %r,%r */
760  		EMIT4(0xb9140000, r, r);
761  		return 0;
762  	case 8:
763  		return 0;
764  	default:
765  		return -1;
766  	}
767  }
768  
769  /*
770   * Compile one eBPF instruction into s390x code
771   *
772   * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
773   * stack space for the large switch statement.
774   */
bpf_jit_insn(struct bpf_jit * jit,struct bpf_prog * fp,int i,bool extra_pass,u32 stack_depth)775  static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
776  				 int i, bool extra_pass, u32 stack_depth)
777  {
778  	struct bpf_insn *insn = &fp->insnsi[i];
779  	u32 dst_reg = insn->dst_reg;
780  	u32 src_reg = insn->src_reg;
781  	int last, insn_count = 1;
782  	u32 *addrs = jit->addrs;
783  	s32 imm = insn->imm;
784  	s16 off = insn->off;
785  	int probe_prg = -1;
786  	unsigned int mask;
787  	int nop_prg;
788  	int err;
789  
790  	if (BPF_CLASS(insn->code) == BPF_LDX &&
791  	    BPF_MODE(insn->code) == BPF_PROBE_MEM)
792  		probe_prg = jit->prg;
793  
794  	switch (insn->code) {
795  	/*
796  	 * BPF_MOV
797  	 */
798  	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
799  		/* llgfr %dst,%src */
800  		EMIT4(0xb9160000, dst_reg, src_reg);
801  		if (insn_is_zext(&insn[1]))
802  			insn_count = 2;
803  		break;
804  	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
805  		/* lgr %dst,%src */
806  		EMIT4(0xb9040000, dst_reg, src_reg);
807  		break;
808  	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
809  		/* llilf %dst,imm */
810  		EMIT6_IMM(0xc00f0000, dst_reg, imm);
811  		if (insn_is_zext(&insn[1]))
812  			insn_count = 2;
813  		break;
814  	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
815  		/* lgfi %dst,imm */
816  		EMIT6_IMM(0xc0010000, dst_reg, imm);
817  		break;
818  	/*
819  	 * BPF_LD 64
820  	 */
821  	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
822  	{
823  		/* 16 byte instruction that uses two 'struct bpf_insn' */
824  		u64 imm64;
825  
826  		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
827  		/* lgrl %dst,imm */
828  		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
829  		insn_count = 2;
830  		break;
831  	}
832  	/*
833  	 * BPF_ADD
834  	 */
835  	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
836  		/* ar %dst,%src */
837  		EMIT2(0x1a00, dst_reg, src_reg);
838  		EMIT_ZERO(dst_reg);
839  		break;
840  	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
841  		/* agr %dst,%src */
842  		EMIT4(0xb9080000, dst_reg, src_reg);
843  		break;
844  	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
845  		if (imm != 0) {
846  			/* alfi %dst,imm */
847  			EMIT6_IMM(0xc20b0000, dst_reg, imm);
848  		}
849  		EMIT_ZERO(dst_reg);
850  		break;
851  	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
852  		if (!imm)
853  			break;
854  		/* agfi %dst,imm */
855  		EMIT6_IMM(0xc2080000, dst_reg, imm);
856  		break;
857  	/*
858  	 * BPF_SUB
859  	 */
860  	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
861  		/* sr %dst,%src */
862  		EMIT2(0x1b00, dst_reg, src_reg);
863  		EMIT_ZERO(dst_reg);
864  		break;
865  	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
866  		/* sgr %dst,%src */
867  		EMIT4(0xb9090000, dst_reg, src_reg);
868  		break;
869  	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
870  		if (imm != 0) {
871  			/* alfi %dst,-imm */
872  			EMIT6_IMM(0xc20b0000, dst_reg, -imm);
873  		}
874  		EMIT_ZERO(dst_reg);
875  		break;
876  	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
877  		if (!imm)
878  			break;
879  		if (imm == -0x80000000) {
880  			/* algfi %dst,0x80000000 */
881  			EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
882  		} else {
883  			/* agfi %dst,-imm */
884  			EMIT6_IMM(0xc2080000, dst_reg, -imm);
885  		}
886  		break;
887  	/*
888  	 * BPF_MUL
889  	 */
890  	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
891  		/* msr %dst,%src */
892  		EMIT4(0xb2520000, dst_reg, src_reg);
893  		EMIT_ZERO(dst_reg);
894  		break;
895  	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
896  		/* msgr %dst,%src */
897  		EMIT4(0xb90c0000, dst_reg, src_reg);
898  		break;
899  	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
900  		if (imm != 1) {
901  			/* msfi %r5,imm */
902  			EMIT6_IMM(0xc2010000, dst_reg, imm);
903  		}
904  		EMIT_ZERO(dst_reg);
905  		break;
906  	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
907  		if (imm == 1)
908  			break;
909  		/* msgfi %dst,imm */
910  		EMIT6_IMM(0xc2000000, dst_reg, imm);
911  		break;
912  	/*
913  	 * BPF_DIV / BPF_MOD
914  	 */
915  	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
916  	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
917  	{
918  		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
919  
920  		/* lhi %w0,0 */
921  		EMIT4_IMM(0xa7080000, REG_W0, 0);
922  		/* lr %w1,%dst */
923  		EMIT2(0x1800, REG_W1, dst_reg);
924  		/* dlr %w0,%src */
925  		EMIT4(0xb9970000, REG_W0, src_reg);
926  		/* llgfr %dst,%rc */
927  		EMIT4(0xb9160000, dst_reg, rc_reg);
928  		if (insn_is_zext(&insn[1]))
929  			insn_count = 2;
930  		break;
931  	}
932  	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
933  	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
934  	{
935  		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
936  
937  		/* lghi %w0,0 */
938  		EMIT4_IMM(0xa7090000, REG_W0, 0);
939  		/* lgr %w1,%dst */
940  		EMIT4(0xb9040000, REG_W1, dst_reg);
941  		/* dlgr %w0,%dst */
942  		EMIT4(0xb9870000, REG_W0, src_reg);
943  		/* lgr %dst,%rc */
944  		EMIT4(0xb9040000, dst_reg, rc_reg);
945  		break;
946  	}
947  	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
948  	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
949  	{
950  		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
951  
952  		if (imm == 1) {
953  			if (BPF_OP(insn->code) == BPF_MOD)
954  				/* lhgi %dst,0 */
955  				EMIT4_IMM(0xa7090000, dst_reg, 0);
956  			else
957  				EMIT_ZERO(dst_reg);
958  			break;
959  		}
960  		/* lhi %w0,0 */
961  		EMIT4_IMM(0xa7080000, REG_W0, 0);
962  		/* lr %w1,%dst */
963  		EMIT2(0x1800, REG_W1, dst_reg);
964  		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
965  			/* dl %w0,<d(imm)>(%l) */
966  			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
967  				      EMIT_CONST_U32(imm));
968  		} else {
969  			/* lgfrl %dst,imm */
970  			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
971  					 _EMIT_CONST_U32(imm));
972  			jit->seen |= SEEN_LITERAL;
973  			/* dlr %w0,%dst */
974  			EMIT4(0xb9970000, REG_W0, dst_reg);
975  		}
976  		/* llgfr %dst,%rc */
977  		EMIT4(0xb9160000, dst_reg, rc_reg);
978  		if (insn_is_zext(&insn[1]))
979  			insn_count = 2;
980  		break;
981  	}
982  	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
983  	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
984  	{
985  		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
986  
987  		if (imm == 1) {
988  			if (BPF_OP(insn->code) == BPF_MOD)
989  				/* lhgi %dst,0 */
990  				EMIT4_IMM(0xa7090000, dst_reg, 0);
991  			break;
992  		}
993  		/* lghi %w0,0 */
994  		EMIT4_IMM(0xa7090000, REG_W0, 0);
995  		/* lgr %w1,%dst */
996  		EMIT4(0xb9040000, REG_W1, dst_reg);
997  		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
998  			/* dlg %w0,<d(imm)>(%l) */
999  			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
1000  				      EMIT_CONST_U64(imm));
1001  		} else {
1002  			/* lgrl %dst,imm */
1003  			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
1004  					 _EMIT_CONST_U64(imm));
1005  			jit->seen |= SEEN_LITERAL;
1006  			/* dlgr %w0,%dst */
1007  			EMIT4(0xb9870000, REG_W0, dst_reg);
1008  		}
1009  		/* lgr %dst,%rc */
1010  		EMIT4(0xb9040000, dst_reg, rc_reg);
1011  		break;
1012  	}
1013  	/*
1014  	 * BPF_AND
1015  	 */
1016  	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
1017  		/* nr %dst,%src */
1018  		EMIT2(0x1400, dst_reg, src_reg);
1019  		EMIT_ZERO(dst_reg);
1020  		break;
1021  	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
1022  		/* ngr %dst,%src */
1023  		EMIT4(0xb9800000, dst_reg, src_reg);
1024  		break;
1025  	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
1026  		/* nilf %dst,imm */
1027  		EMIT6_IMM(0xc00b0000, dst_reg, imm);
1028  		EMIT_ZERO(dst_reg);
1029  		break;
1030  	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
1031  		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1032  			/* ng %dst,<d(imm)>(%l) */
1033  			EMIT6_DISP_LH(0xe3000000, 0x0080,
1034  				      dst_reg, REG_0, REG_L,
1035  				      EMIT_CONST_U64(imm));
1036  		} else {
1037  			/* lgrl %w0,imm */
1038  			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1039  					 _EMIT_CONST_U64(imm));
1040  			jit->seen |= SEEN_LITERAL;
1041  			/* ngr %dst,%w0 */
1042  			EMIT4(0xb9800000, dst_reg, REG_W0);
1043  		}
1044  		break;
1045  	/*
1046  	 * BPF_OR
1047  	 */
1048  	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
1049  		/* or %dst,%src */
1050  		EMIT2(0x1600, dst_reg, src_reg);
1051  		EMIT_ZERO(dst_reg);
1052  		break;
1053  	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
1054  		/* ogr %dst,%src */
1055  		EMIT4(0xb9810000, dst_reg, src_reg);
1056  		break;
1057  	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
1058  		/* oilf %dst,imm */
1059  		EMIT6_IMM(0xc00d0000, dst_reg, imm);
1060  		EMIT_ZERO(dst_reg);
1061  		break;
1062  	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
1063  		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1064  			/* og %dst,<d(imm)>(%l) */
1065  			EMIT6_DISP_LH(0xe3000000, 0x0081,
1066  				      dst_reg, REG_0, REG_L,
1067  				      EMIT_CONST_U64(imm));
1068  		} else {
1069  			/* lgrl %w0,imm */
1070  			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1071  					 _EMIT_CONST_U64(imm));
1072  			jit->seen |= SEEN_LITERAL;
1073  			/* ogr %dst,%w0 */
1074  			EMIT4(0xb9810000, dst_reg, REG_W0);
1075  		}
1076  		break;
1077  	/*
1078  	 * BPF_XOR
1079  	 */
1080  	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
1081  		/* xr %dst,%src */
1082  		EMIT2(0x1700, dst_reg, src_reg);
1083  		EMIT_ZERO(dst_reg);
1084  		break;
1085  	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
1086  		/* xgr %dst,%src */
1087  		EMIT4(0xb9820000, dst_reg, src_reg);
1088  		break;
1089  	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
1090  		if (imm != 0) {
1091  			/* xilf %dst,imm */
1092  			EMIT6_IMM(0xc0070000, dst_reg, imm);
1093  		}
1094  		EMIT_ZERO(dst_reg);
1095  		break;
1096  	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
1097  		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1098  			/* xg %dst,<d(imm)>(%l) */
1099  			EMIT6_DISP_LH(0xe3000000, 0x0082,
1100  				      dst_reg, REG_0, REG_L,
1101  				      EMIT_CONST_U64(imm));
1102  		} else {
1103  			/* lgrl %w0,imm */
1104  			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1105  					 _EMIT_CONST_U64(imm));
1106  			jit->seen |= SEEN_LITERAL;
1107  			/* xgr %dst,%w0 */
1108  			EMIT4(0xb9820000, dst_reg, REG_W0);
1109  		}
1110  		break;
1111  	/*
1112  	 * BPF_LSH
1113  	 */
1114  	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
1115  		/* sll %dst,0(%src) */
1116  		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
1117  		EMIT_ZERO(dst_reg);
1118  		break;
1119  	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
1120  		/* sllg %dst,%dst,0(%src) */
1121  		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
1122  		break;
1123  	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
1124  		if (imm != 0) {
1125  			/* sll %dst,imm(%r0) */
1126  			EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
1127  		}
1128  		EMIT_ZERO(dst_reg);
1129  		break;
1130  	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
1131  		if (imm == 0)
1132  			break;
1133  		/* sllg %dst,%dst,imm(%r0) */
1134  		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
1135  		break;
1136  	/*
1137  	 * BPF_RSH
1138  	 */
1139  	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
1140  		/* srl %dst,0(%src) */
1141  		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
1142  		EMIT_ZERO(dst_reg);
1143  		break;
1144  	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
1145  		/* srlg %dst,%dst,0(%src) */
1146  		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
1147  		break;
1148  	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
1149  		if (imm != 0) {
1150  			/* srl %dst,imm(%r0) */
1151  			EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
1152  		}
1153  		EMIT_ZERO(dst_reg);
1154  		break;
1155  	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
1156  		if (imm == 0)
1157  			break;
1158  		/* srlg %dst,%dst,imm(%r0) */
1159  		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
1160  		break;
1161  	/*
1162  	 * BPF_ARSH
1163  	 */
1164  	case BPF_ALU | BPF_ARSH | BPF_X: /* ((s32) dst) >>= src */
1165  		/* sra %dst,%dst,0(%src) */
1166  		EMIT4_DISP(0x8a000000, dst_reg, src_reg, 0);
1167  		EMIT_ZERO(dst_reg);
1168  		break;
1169  	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
1170  		/* srag %dst,%dst,0(%src) */
1171  		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
1172  		break;
1173  	case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */
1174  		if (imm != 0) {
1175  			/* sra %dst,imm(%r0) */
1176  			EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
1177  		}
1178  		EMIT_ZERO(dst_reg);
1179  		break;
1180  	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
1181  		if (imm == 0)
1182  			break;
1183  		/* srag %dst,%dst,imm(%r0) */
1184  		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
1185  		break;
1186  	/*
1187  	 * BPF_NEG
1188  	 */
1189  	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
1190  		/* lcr %dst,%dst */
1191  		EMIT2(0x1300, dst_reg, dst_reg);
1192  		EMIT_ZERO(dst_reg);
1193  		break;
1194  	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
1195  		/* lcgr %dst,%dst */
1196  		EMIT4(0xb9030000, dst_reg, dst_reg);
1197  		break;
1198  	/*
1199  	 * BPF_FROM_BE/LE
1200  	 */
1201  	case BPF_ALU | BPF_END | BPF_FROM_BE:
1202  		/* s390 is big endian, therefore only clear high order bytes */
1203  		switch (imm) {
1204  		case 16: /* dst = (u16) cpu_to_be16(dst) */
1205  			/* llghr %dst,%dst */
1206  			EMIT4(0xb9850000, dst_reg, dst_reg);
1207  			if (insn_is_zext(&insn[1]))
1208  				insn_count = 2;
1209  			break;
1210  		case 32: /* dst = (u32) cpu_to_be32(dst) */
1211  			if (!fp->aux->verifier_zext)
1212  				/* llgfr %dst,%dst */
1213  				EMIT4(0xb9160000, dst_reg, dst_reg);
1214  			break;
1215  		case 64: /* dst = (u64) cpu_to_be64(dst) */
1216  			break;
1217  		}
1218  		break;
1219  	case BPF_ALU | BPF_END | BPF_FROM_LE:
1220  		switch (imm) {
1221  		case 16: /* dst = (u16) cpu_to_le16(dst) */
1222  			/* lrvr %dst,%dst */
1223  			EMIT4(0xb91f0000, dst_reg, dst_reg);
1224  			/* srl %dst,16(%r0) */
1225  			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
1226  			/* llghr %dst,%dst */
1227  			EMIT4(0xb9850000, dst_reg, dst_reg);
1228  			if (insn_is_zext(&insn[1]))
1229  				insn_count = 2;
1230  			break;
1231  		case 32: /* dst = (u32) cpu_to_le32(dst) */
1232  			/* lrvr %dst,%dst */
1233  			EMIT4(0xb91f0000, dst_reg, dst_reg);
1234  			if (!fp->aux->verifier_zext)
1235  				/* llgfr %dst,%dst */
1236  				EMIT4(0xb9160000, dst_reg, dst_reg);
1237  			break;
1238  		case 64: /* dst = (u64) cpu_to_le64(dst) */
1239  			/* lrvgr %dst,%dst */
1240  			EMIT4(0xb90f0000, dst_reg, dst_reg);
1241  			break;
1242  		}
1243  		break;
1244  	/*
1245  	 * BPF_NOSPEC (speculation barrier)
1246  	 */
1247  	case BPF_ST | BPF_NOSPEC:
1248  		break;
1249  	/*
1250  	 * BPF_ST(X)
1251  	 */
1252  	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
1253  		/* stcy %src,off(%dst) */
1254  		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
1255  		jit->seen |= SEEN_MEM;
1256  		break;
1257  	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
1258  		/* sthy %src,off(%dst) */
1259  		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
1260  		jit->seen |= SEEN_MEM;
1261  		break;
1262  	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
1263  		/* sty %src,off(%dst) */
1264  		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
1265  		jit->seen |= SEEN_MEM;
1266  		break;
1267  	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
1268  		/* stg %src,off(%dst) */
1269  		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
1270  		jit->seen |= SEEN_MEM;
1271  		break;
1272  	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
1273  		/* lhi %w0,imm */
1274  		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
1275  		/* stcy %w0,off(dst) */
1276  		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
1277  		jit->seen |= SEEN_MEM;
1278  		break;
1279  	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
1280  		/* lhi %w0,imm */
1281  		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
1282  		/* sthy %w0,off(dst) */
1283  		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
1284  		jit->seen |= SEEN_MEM;
1285  		break;
1286  	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
1287  		/* llilf %w0,imm  */
1288  		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
1289  		/* sty %w0,off(%dst) */
1290  		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
1291  		jit->seen |= SEEN_MEM;
1292  		break;
1293  	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
1294  		/* lgfi %w0,imm */
1295  		EMIT6_IMM(0xc0010000, REG_W0, imm);
1296  		/* stg %w0,off(%dst) */
1297  		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
1298  		jit->seen |= SEEN_MEM;
1299  		break;
1300  	/*
1301  	 * BPF_ATOMIC
1302  	 */
1303  	case BPF_STX | BPF_ATOMIC | BPF_DW:
1304  	case BPF_STX | BPF_ATOMIC | BPF_W:
1305  	{
1306  		bool is32 = BPF_SIZE(insn->code) == BPF_W;
1307  
1308  		switch (insn->imm) {
1309  /* {op32|op64} {%w0|%src},%src,off(%dst) */
1310  #define EMIT_ATOMIC(op32, op64) do {					\
1311  	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
1312  		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
1313  		      src_reg, dst_reg, off);				\
1314  	if (insn->imm & BPF_FETCH) {					\
1315  		/* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */	\
1316  		_EMIT2(0x07e0);						\
1317  		if (is32)                                               \
1318  			EMIT_ZERO(src_reg);				\
1319  	}								\
1320  } while (0)
1321  		case BPF_ADD:
1322  		case BPF_ADD | BPF_FETCH:
1323  			/* {laal|laalg} */
1324  			EMIT_ATOMIC(0x00fa, 0x00ea);
1325  			break;
1326  		case BPF_AND:
1327  		case BPF_AND | BPF_FETCH:
1328  			/* {lan|lang} */
1329  			EMIT_ATOMIC(0x00f4, 0x00e4);
1330  			break;
1331  		case BPF_OR:
1332  		case BPF_OR | BPF_FETCH:
1333  			/* {lao|laog} */
1334  			EMIT_ATOMIC(0x00f6, 0x00e6);
1335  			break;
1336  		case BPF_XOR:
1337  		case BPF_XOR | BPF_FETCH:
1338  			/* {lax|laxg} */
1339  			EMIT_ATOMIC(0x00f7, 0x00e7);
1340  			break;
1341  #undef EMIT_ATOMIC
1342  		case BPF_XCHG:
1343  			/* {ly|lg} %w0,off(%dst) */
1344  			EMIT6_DISP_LH(0xe3000000,
1345  				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
1346  				      dst_reg, off);
1347  			/* 0: {csy|csg} %w0,%src,off(%dst) */
1348  			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1349  				      REG_W0, src_reg, dst_reg, off);
1350  			/* brc 4,0b */
1351  			EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
1352  			/* {llgfr|lgr} %src,%w0 */
1353  			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
1354  			if (is32 && insn_is_zext(&insn[1]))
1355  				insn_count = 2;
1356  			break;
1357  		case BPF_CMPXCHG:
1358  			/* 0: {csy|csg} %b0,%src,off(%dst) */
1359  			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1360  				      BPF_REG_0, src_reg, dst_reg, off);
1361  			break;
1362  		default:
1363  			pr_err("Unknown atomic operation %02x\n", insn->imm);
1364  			return -1;
1365  		}
1366  
1367  		jit->seen |= SEEN_MEM;
1368  		break;
1369  	}
1370  	/*
1371  	 * BPF_LDX
1372  	 */
1373  	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
1374  	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1375  		/* llgc %dst,0(off,%src) */
1376  		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
1377  		jit->seen |= SEEN_MEM;
1378  		if (insn_is_zext(&insn[1]))
1379  			insn_count = 2;
1380  		break;
1381  	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
1382  	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1383  		/* llgh %dst,0(off,%src) */
1384  		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
1385  		jit->seen |= SEEN_MEM;
1386  		if (insn_is_zext(&insn[1]))
1387  			insn_count = 2;
1388  		break;
1389  	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
1390  	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1391  		/* llgf %dst,off(%src) */
1392  		jit->seen |= SEEN_MEM;
1393  		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
1394  		if (insn_is_zext(&insn[1]))
1395  			insn_count = 2;
1396  		break;
1397  	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
1398  	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1399  		/* lg %dst,0(off,%src) */
1400  		jit->seen |= SEEN_MEM;
1401  		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
1402  		break;
1403  	/*
1404  	 * BPF_JMP / CALL
1405  	 */
1406  	case BPF_JMP | BPF_CALL:
1407  	{
1408  		const struct btf_func_model *m;
1409  		bool func_addr_fixed;
1410  		int j, ret;
1411  		u64 func;
1412  
1413  		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
1414  					    &func, &func_addr_fixed);
1415  		if (ret < 0)
1416  			return -1;
1417  
1418  		REG_SET_SEEN(BPF_REG_5);
1419  		jit->seen |= SEEN_FUNC;
1420  		/*
1421  		 * Copy the tail call counter to where the callee expects it.
1422  		 *
1423  		 * Note 1: The callee can increment the tail call counter, but
1424  		 * we do not load it back, since the x86 JIT does not do this
1425  		 * either.
1426  		 *
1427  		 * Note 2: We assume that the verifier does not let us call the
1428  		 * main program, which clears the tail call counter on entry.
1429  		 */
1430  		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
1431  		_EMIT6(0xd203f000 | STK_OFF_TCCNT,
1432  		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
1433  
1434  		/* Sign-extend the kfunc arguments. */
1435  		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1436  			m = bpf_jit_find_kfunc_model(fp, insn);
1437  			if (!m)
1438  				return -1;
1439  
1440  			for (j = 0; j < m->nr_args; j++) {
1441  				if (sign_extend(jit, BPF_REG_1 + j,
1442  						m->arg_size[j],
1443  						m->arg_flags[j]))
1444  					return -1;
1445  			}
1446  		}
1447  
1448  		/* lgrl %w1,func */
1449  		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
1450  		/* %r1() */
1451  		call_r1(jit);
1452  		/* lgr %b0,%r2: load return value into %b0 */
1453  		EMIT4(0xb9040000, BPF_REG_0, REG_2);
1454  		break;
1455  	}
1456  	case BPF_JMP | BPF_TAIL_CALL: {
1457  		int patch_1_clrj, patch_2_clij, patch_3_brc;
1458  
1459  		/*
1460  		 * Implicit input:
1461  		 *  B1: pointer to ctx
1462  		 *  B2: pointer to bpf_array
1463  		 *  B3: index in bpf_array
1464  		 *
1465  		 * if (index >= array->map.max_entries)
1466  		 *         goto out;
1467  		 */
1468  
1469  		/* llgf %w1,map.max_entries(%b2) */
1470  		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
1471  			      offsetof(struct bpf_array, map.max_entries));
1472  		/* if ((u32)%b3 >= (u32)%w1) goto out; */
1473  		/* clrj %b3,%w1,0xa,out */
1474  		patch_1_clrj = jit->prg;
1475  		EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
1476  				 jit->prg);
1477  
1478  		/*
1479  		 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
1480  		 *         goto out;
1481  		 */
1482  
1483  		if (jit->seen & SEEN_STACK)
1484  			off = STK_OFF_TCCNT + STK_OFF + stack_depth;
1485  		else
1486  			off = STK_OFF_TCCNT;
1487  		/* lhi %w0,1 */
1488  		EMIT4_IMM(0xa7080000, REG_W0, 1);
1489  		/* laal %w1,%w0,off(%r15) */
1490  		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
1491  		/* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
1492  		patch_2_clij = jit->prg;
1493  		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
1494  				 2, jit->prg);
1495  
1496  		/*
1497  		 * prog = array->ptrs[index];
1498  		 * if (prog == NULL)
1499  		 *         goto out;
1500  		 */
1501  
1502  		/* llgfr %r1,%b3: %r1 = (u32) index */
1503  		EMIT4(0xb9160000, REG_1, BPF_REG_3);
1504  		/* sllg %r1,%r1,3: %r1 *= 8 */
1505  		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
1506  		/* ltg %r1,prog(%b2,%r1) */
1507  		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
1508  			      REG_1, offsetof(struct bpf_array, ptrs));
1509  		/* brc 0x8,out */
1510  		patch_3_brc = jit->prg;
1511  		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
1512  
1513  		/*
1514  		 * Restore registers before calling function
1515  		 */
1516  		save_restore_regs(jit, REGS_RESTORE, stack_depth);
1517  
1518  		/*
1519  		 * goto *(prog->bpf_func + tail_call_start);
1520  		 */
1521  
1522  		/* lg %r1,bpf_func(%r1) */
1523  		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
1524  			      offsetof(struct bpf_prog, bpf_func));
1525  		if (nospec_uses_trampoline()) {
1526  			jit->seen |= SEEN_FUNC;
1527  			/* aghi %r1,tail_call_start */
1528  			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
1529  			/* brcl 0xf,__s390_indirect_jump_r1 */
1530  			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
1531  		} else {
1532  			/* bc 0xf,tail_call_start(%r1) */
1533  			_EMIT4(0x47f01000 + jit->tail_call_start);
1534  		}
1535  		/* out: */
1536  		if (jit->prg_buf) {
1537  			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
1538  				(jit->prg - patch_1_clrj) >> 1;
1539  			*(u16 *)(jit->prg_buf + patch_2_clij + 2) =
1540  				(jit->prg - patch_2_clij) >> 1;
1541  			*(u16 *)(jit->prg_buf + patch_3_brc + 2) =
1542  				(jit->prg - patch_3_brc) >> 1;
1543  		}
1544  		break;
1545  	}
1546  	case BPF_JMP | BPF_EXIT: /* return b0 */
1547  		last = (i == fp->len - 1) ? 1 : 0;
1548  		if (last)
1549  			break;
1550  		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
1551  			/* brc 0xf, <exit> */
1552  			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
1553  		else
1554  			/* brcl 0xf, <exit> */
1555  			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
1556  		break;
1557  	/*
1558  	 * Branch relative (number of skipped instructions) to offset on
1559  	 * condition.
1560  	 *
1561  	 * Condition code to mask mapping:
1562  	 *
1563  	 * CC | Description	   | Mask
1564  	 * ------------------------------
1565  	 * 0  | Operands equal	   |	8
1566  	 * 1  | First operand low  |	4
1567  	 * 2  | First operand high |	2
1568  	 * 3  | Unused		   |	1
1569  	 *
1570  	 * For s390x relative branches: ip = ip + off_bytes
1571  	 * For BPF relative branches:	insn = insn + off_insns + 1
1572  	 *
1573  	 * For example for s390x with offset 0 we jump to the branch
1574  	 * instruction itself (loop) and for BPF with offset 0 we
1575  	 * branch to the instruction behind the branch.
1576  	 */
1577  	case BPF_JMP | BPF_JA: /* if (true) */
1578  		mask = 0xf000; /* j */
1579  		goto branch_oc;
1580  	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
1581  	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
1582  		mask = 0x2000; /* jh */
1583  		goto branch_ks;
1584  	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
1585  	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
1586  		mask = 0x4000; /* jl */
1587  		goto branch_ks;
1588  	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
1589  	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
1590  		mask = 0xa000; /* jhe */
1591  		goto branch_ks;
1592  	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
1593  	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
1594  		mask = 0xc000; /* jle */
1595  		goto branch_ks;
1596  	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
1597  	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
1598  		mask = 0x2000; /* jh */
1599  		goto branch_ku;
1600  	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
1601  	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
1602  		mask = 0x4000; /* jl */
1603  		goto branch_ku;
1604  	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
1605  	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
1606  		mask = 0xa000; /* jhe */
1607  		goto branch_ku;
1608  	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
1609  	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
1610  		mask = 0xc000; /* jle */
1611  		goto branch_ku;
1612  	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
1613  	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
1614  		mask = 0x7000; /* jne */
1615  		goto branch_ku;
1616  	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
1617  	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
1618  		mask = 0x8000; /* je */
1619  		goto branch_ku;
1620  	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
1621  	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
1622  		mask = 0x7000; /* jnz */
1623  		if (BPF_CLASS(insn->code) == BPF_JMP32) {
1624  			/* llilf %w1,imm (load zero extend imm) */
1625  			EMIT6_IMM(0xc00f0000, REG_W1, imm);
1626  			/* nr %w1,%dst */
1627  			EMIT2(0x1400, REG_W1, dst_reg);
1628  		} else {
1629  			/* lgfi %w1,imm (load sign extend imm) */
1630  			EMIT6_IMM(0xc0010000, REG_W1, imm);
1631  			/* ngr %w1,%dst */
1632  			EMIT4(0xb9800000, REG_W1, dst_reg);
1633  		}
1634  		goto branch_oc;
1635  
1636  	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
1637  	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
1638  		mask = 0x2000; /* jh */
1639  		goto branch_xs;
1640  	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
1641  	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
1642  		mask = 0x4000; /* jl */
1643  		goto branch_xs;
1644  	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
1645  	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
1646  		mask = 0xa000; /* jhe */
1647  		goto branch_xs;
1648  	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
1649  	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
1650  		mask = 0xc000; /* jle */
1651  		goto branch_xs;
1652  	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
1653  	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
1654  		mask = 0x2000; /* jh */
1655  		goto branch_xu;
1656  	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
1657  	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
1658  		mask = 0x4000; /* jl */
1659  		goto branch_xu;
1660  	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
1661  	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
1662  		mask = 0xa000; /* jhe */
1663  		goto branch_xu;
1664  	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
1665  	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
1666  		mask = 0xc000; /* jle */
1667  		goto branch_xu;
1668  	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
1669  	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
1670  		mask = 0x7000; /* jne */
1671  		goto branch_xu;
1672  	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
1673  	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
1674  		mask = 0x8000; /* je */
1675  		goto branch_xu;
1676  	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
1677  	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
1678  	{
1679  		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1680  
1681  		mask = 0x7000; /* jnz */
1682  		/* nrk or ngrk %w1,%dst,%src */
1683  		EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
1684  			  REG_W1, dst_reg, src_reg);
1685  		goto branch_oc;
1686  branch_ks:
1687  		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1688  		/* cfi or cgfi %dst,imm */
1689  		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
1690  			  dst_reg, imm);
1691  		if (!is_first_pass(jit) &&
1692  		    can_use_rel(jit, addrs[i + off + 1])) {
1693  			/* brc mask,off */
1694  			EMIT4_PCREL_RIC(0xa7040000,
1695  					mask >> 12, addrs[i + off + 1]);
1696  		} else {
1697  			/* brcl mask,off */
1698  			EMIT6_PCREL_RILC(0xc0040000,
1699  					 mask >> 12, addrs[i + off + 1]);
1700  		}
1701  		break;
1702  branch_ku:
1703  		/* lgfi %w1,imm (load sign extend imm) */
1704  		src_reg = REG_1;
1705  		EMIT6_IMM(0xc0010000, src_reg, imm);
1706  		goto branch_xu;
1707  branch_xs:
1708  		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1709  		if (!is_first_pass(jit) &&
1710  		    can_use_rel(jit, addrs[i + off + 1])) {
1711  			/* crj or cgrj %dst,%src,mask,off */
1712  			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
1713  				    dst_reg, src_reg, i, off, mask);
1714  		} else {
1715  			/* cr or cgr %dst,%src */
1716  			if (is_jmp32)
1717  				EMIT2(0x1900, dst_reg, src_reg);
1718  			else
1719  				EMIT4(0xb9200000, dst_reg, src_reg);
1720  			/* brcl mask,off */
1721  			EMIT6_PCREL_RILC(0xc0040000,
1722  					 mask >> 12, addrs[i + off + 1]);
1723  		}
1724  		break;
1725  branch_xu:
1726  		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1727  		if (!is_first_pass(jit) &&
1728  		    can_use_rel(jit, addrs[i + off + 1])) {
1729  			/* clrj or clgrj %dst,%src,mask,off */
1730  			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
1731  				    dst_reg, src_reg, i, off, mask);
1732  		} else {
1733  			/* clr or clgr %dst,%src */
1734  			if (is_jmp32)
1735  				EMIT2(0x1500, dst_reg, src_reg);
1736  			else
1737  				EMIT4(0xb9210000, dst_reg, src_reg);
1738  			/* brcl mask,off */
1739  			EMIT6_PCREL_RILC(0xc0040000,
1740  					 mask >> 12, addrs[i + off + 1]);
1741  		}
1742  		break;
1743  branch_oc:
1744  		if (!is_first_pass(jit) &&
1745  		    can_use_rel(jit, addrs[i + off + 1])) {
1746  			/* brc mask,off */
1747  			EMIT4_PCREL_RIC(0xa7040000,
1748  					mask >> 12, addrs[i + off + 1]);
1749  		} else {
1750  			/* brcl mask,off */
1751  			EMIT6_PCREL_RILC(0xc0040000,
1752  					 mask >> 12, addrs[i + off + 1]);
1753  		}
1754  		break;
1755  	}
1756  	default: /* too complex, give up */
1757  		pr_err("Unknown opcode %02x\n", insn->code);
1758  		return -1;
1759  	}
1760  
1761  	if (probe_prg != -1) {
1762  		/*
1763  		 * Handlers of certain exceptions leave psw.addr pointing to
1764  		 * the instruction directly after the failing one. Therefore,
1765  		 * create two exception table entries and also add a nop in
1766  		 * case two probing instructions come directly after each
1767  		 * other.
1768  		 */
1769  		nop_prg = jit->prg;
1770  		/* bcr 0,%0 */
1771  		_EMIT2(0x0700);
1772  		err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
1773  		if (err < 0)
1774  			return err;
1775  	}
1776  
1777  	return insn_count;
1778  }
1779  
1780  /*
1781   * Return whether new i-th instruction address does not violate any invariant
1782   */
bpf_is_new_addr_sane(struct bpf_jit * jit,int i)1783  static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
1784  {
1785  	/* On the first pass anything goes */
1786  	if (is_first_pass(jit))
1787  		return true;
1788  
1789  	/* The codegen pass must not change anything */
1790  	if (is_codegen_pass(jit))
1791  		return jit->addrs[i] == jit->prg;
1792  
1793  	/* Passes in between must not increase code size */
1794  	return jit->addrs[i] >= jit->prg;
1795  }
1796  
1797  /*
1798   * Update the address of i-th instruction
1799   */
bpf_set_addr(struct bpf_jit * jit,int i)1800  static int bpf_set_addr(struct bpf_jit *jit, int i)
1801  {
1802  	int delta;
1803  
1804  	if (is_codegen_pass(jit)) {
1805  		delta = jit->prg - jit->addrs[i];
1806  		if (delta < 0)
1807  			bpf_skip(jit, -delta);
1808  	}
1809  	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
1810  		return -1;
1811  	jit->addrs[i] = jit->prg;
1812  	return 0;
1813  }
1814  
1815  /*
1816   * Compile eBPF program into s390x code
1817   */
bpf_jit_prog(struct bpf_jit * jit,struct bpf_prog * fp,bool extra_pass,u32 stack_depth)1818  static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
1819  			bool extra_pass, u32 stack_depth)
1820  {
1821  	int i, insn_count, lit32_size, lit64_size;
1822  
1823  	jit->lit32 = jit->lit32_start;
1824  	jit->lit64 = jit->lit64_start;
1825  	jit->prg = 0;
1826  	jit->excnt = 0;
1827  
1828  	bpf_jit_prologue(jit, fp, stack_depth);
1829  	if (bpf_set_addr(jit, 0) < 0)
1830  		return -1;
1831  	for (i = 0; i < fp->len; i += insn_count) {
1832  		insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
1833  		if (insn_count < 0)
1834  			return -1;
1835  		/* Next instruction address */
1836  		if (bpf_set_addr(jit, i + insn_count) < 0)
1837  			return -1;
1838  	}
1839  	bpf_jit_epilogue(jit, stack_depth);
1840  
1841  	lit32_size = jit->lit32 - jit->lit32_start;
1842  	lit64_size = jit->lit64 - jit->lit64_start;
1843  	jit->lit32_start = jit->prg;
1844  	if (lit32_size)
1845  		jit->lit32_start = ALIGN(jit->lit32_start, 4);
1846  	jit->lit64_start = jit->lit32_start + lit32_size;
1847  	if (lit64_size)
1848  		jit->lit64_start = ALIGN(jit->lit64_start, 8);
1849  	jit->size = jit->lit64_start + lit64_size;
1850  	jit->size_prg = jit->prg;
1851  
1852  	if (WARN_ON_ONCE(fp->aux->extable &&
1853  			 jit->excnt != fp->aux->num_exentries))
1854  		/* Verifier bug - too many entries. */
1855  		return -1;
1856  
1857  	return 0;
1858  }
1859  
bpf_jit_needs_zext(void)1860  bool bpf_jit_needs_zext(void)
1861  {
1862  	return true;
1863  }
1864  
1865  struct s390_jit_data {
1866  	struct bpf_binary_header *header;
1867  	struct bpf_jit ctx;
1868  	int pass;
1869  };
1870  
bpf_jit_alloc(struct bpf_jit * jit,struct bpf_prog * fp)1871  static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
1872  					       struct bpf_prog *fp)
1873  {
1874  	struct bpf_binary_header *header;
1875  	u32 extable_size;
1876  	u32 code_size;
1877  
1878  	/* We need two entries per insn. */
1879  	fp->aux->num_exentries *= 2;
1880  
1881  	code_size = roundup(jit->size,
1882  			    __alignof__(struct exception_table_entry));
1883  	extable_size = fp->aux->num_exentries *
1884  		sizeof(struct exception_table_entry);
1885  	header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
1886  				      8, jit_fill_hole);
1887  	if (!header)
1888  		return NULL;
1889  	fp->aux->extable = (struct exception_table_entry *)
1890  		(jit->prg_buf + code_size);
1891  	return header;
1892  }
1893  
1894  /*
1895   * Compile eBPF program "fp"
1896   */
bpf_int_jit_compile(struct bpf_prog * fp)1897  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1898  {
1899  	u32 stack_depth = round_up(fp->aux->stack_depth, 8);
1900  	struct bpf_prog *tmp, *orig_fp = fp;
1901  	struct bpf_binary_header *header;
1902  	struct s390_jit_data *jit_data;
1903  	bool tmp_blinded = false;
1904  	bool extra_pass = false;
1905  	struct bpf_jit jit;
1906  	int pass;
1907  
1908  	if (!fp->jit_requested)
1909  		return orig_fp;
1910  
1911  	tmp = bpf_jit_blind_constants(fp);
1912  	/*
1913  	 * If blinding was requested and we failed during blinding,
1914  	 * we must fall back to the interpreter.
1915  	 */
1916  	if (IS_ERR(tmp))
1917  		return orig_fp;
1918  	if (tmp != fp) {
1919  		tmp_blinded = true;
1920  		fp = tmp;
1921  	}
1922  
1923  	jit_data = fp->aux->jit_data;
1924  	if (!jit_data) {
1925  		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1926  		if (!jit_data) {
1927  			fp = orig_fp;
1928  			goto out;
1929  		}
1930  		fp->aux->jit_data = jit_data;
1931  	}
1932  	if (jit_data->ctx.addrs) {
1933  		jit = jit_data->ctx;
1934  		header = jit_data->header;
1935  		extra_pass = true;
1936  		pass = jit_data->pass + 1;
1937  		goto skip_init_ctx;
1938  	}
1939  
1940  	memset(&jit, 0, sizeof(jit));
1941  	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
1942  	if (jit.addrs == NULL) {
1943  		fp = orig_fp;
1944  		goto free_addrs;
1945  	}
1946  	/*
1947  	 * Three initial passes:
1948  	 *   - 1/2: Determine clobbered registers
1949  	 *   - 3:   Calculate program size and addrs array
1950  	 */
1951  	for (pass = 1; pass <= 3; pass++) {
1952  		if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
1953  			fp = orig_fp;
1954  			goto free_addrs;
1955  		}
1956  	}
1957  	/*
1958  	 * Final pass: Allocate and generate program
1959  	 */
1960  	header = bpf_jit_alloc(&jit, fp);
1961  	if (!header) {
1962  		fp = orig_fp;
1963  		goto free_addrs;
1964  	}
1965  skip_init_ctx:
1966  	if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
1967  		bpf_jit_binary_free(header);
1968  		fp = orig_fp;
1969  		goto free_addrs;
1970  	}
1971  	if (bpf_jit_enable > 1) {
1972  		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
1973  		print_fn_code(jit.prg_buf, jit.size_prg);
1974  	}
1975  	if (!fp->is_func || extra_pass) {
1976  		bpf_jit_binary_lock_ro(header);
1977  	} else {
1978  		jit_data->header = header;
1979  		jit_data->ctx = jit;
1980  		jit_data->pass = pass;
1981  	}
1982  	fp->bpf_func = (void *) jit.prg_buf;
1983  	fp->jited = 1;
1984  	fp->jited_len = jit.size;
1985  
1986  	if (!fp->is_func || extra_pass) {
1987  		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
1988  free_addrs:
1989  		kvfree(jit.addrs);
1990  		kfree(jit_data);
1991  		fp->aux->jit_data = NULL;
1992  	}
1993  out:
1994  	if (tmp_blinded)
1995  		bpf_jit_prog_release_other(fp, fp == orig_fp ?
1996  					   tmp : orig_fp);
1997  	return fp;
1998  }
1999  
bpf_jit_supports_kfunc_call(void)2000  bool bpf_jit_supports_kfunc_call(void)
2001  {
2002  	return true;
2003  }
2004  
bpf_jit_supports_far_kfunc_call(void)2005  bool bpf_jit_supports_far_kfunc_call(void)
2006  {
2007  	return true;
2008  }
2009  
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type t,void * old_addr,void * new_addr)2010  int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2011  		       void *old_addr, void *new_addr)
2012  {
2013  	struct bpf_plt expected_plt, current_plt, new_plt, *plt;
2014  	struct {
2015  		u16 opc;
2016  		s32 disp;
2017  	} __packed insn;
2018  	char *ret;
2019  	int err;
2020  
2021  	/* Verify the branch to be patched. */
2022  	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
2023  	if (err < 0)
2024  		return err;
2025  	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
2026  		return -EINVAL;
2027  
2028  	if (t == BPF_MOD_JUMP &&
2029  	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
2030  		/*
2031  		 * The branch already points to the destination,
2032  		 * there is no PLT.
2033  		 */
2034  	} else {
2035  		/* Verify the PLT. */
2036  		plt = ip + (insn.disp << 1);
2037  		err = copy_from_kernel_nofault(&current_plt, plt,
2038  					       sizeof(current_plt));
2039  		if (err < 0)
2040  			return err;
2041  		ret = (char *)ip + 6;
2042  		bpf_jit_plt(&expected_plt, ret, old_addr);
2043  		if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
2044  			return -EINVAL;
2045  		/* Adjust the call address. */
2046  		bpf_jit_plt(&new_plt, ret, new_addr);
2047  		s390_kernel_write(&plt->target, &new_plt.target,
2048  				  sizeof(void *));
2049  	}
2050  
2051  	/* Adjust the mask of the branch. */
2052  	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
2053  	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
2054  
2055  	/* Make the new code visible to the other CPUs. */
2056  	text_poke_sync_lock();
2057  
2058  	return 0;
2059  }
2060  
2061  struct bpf_tramp_jit {
2062  	struct bpf_jit common;
2063  	int orig_stack_args_off;/* Offset of arguments placed on stack by the
2064  				 * func_addr's original caller
2065  				 */
2066  	int stack_size;		/* Trampoline stack size */
2067  	int backchain_off;	/* Offset of backchain */
2068  	int stack_args_off;	/* Offset of stack arguments for calling
2069  				 * func_addr, has to be at the top
2070  				 */
2071  	int reg_args_off;	/* Offset of register arguments for calling
2072  				 * func_addr
2073  				 */
2074  	int ip_off;		/* For bpf_get_func_ip(), has to be at
2075  				 * (ctx - 16)
2076  				 */
2077  	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at
2078  				 * (ctx - 8)
2079  				 */
2080  	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
2081  				 * of BPF arguments followed by return value
2082  				 */
2083  	int retval_off;		/* Offset of return value (see above) */
2084  	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
2085  				 * for __bpf_prog_enter() return value and
2086  				 * func_addr respectively
2087  				 */
2088  	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
2089  	int tccnt_off;		/* Offset of saved tailcall counter */
2090  	int r14_off;		/* Offset of saved %r14, has to be at the
2091  				 * bottom */
2092  	int do_fexit;		/* do_fexit: label */
2093  };
2094  
load_imm64(struct bpf_jit * jit,int dst_reg,u64 val)2095  static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
2096  {
2097  	/* llihf %dst_reg,val_hi */
2098  	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
2099  	/* oilf %rdst_reg,val_lo */
2100  	EMIT6_IMM(0xc00d0000, dst_reg, val);
2101  }
2102  
invoke_bpf_prog(struct bpf_tramp_jit * tjit,const struct btf_func_model * m,struct bpf_tramp_link * tlink,bool save_ret)2103  static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
2104  			   const struct btf_func_model *m,
2105  			   struct bpf_tramp_link *tlink, bool save_ret)
2106  {
2107  	struct bpf_jit *jit = &tjit->common;
2108  	int cookie_off = tjit->run_ctx_off +
2109  			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2110  	struct bpf_prog *p = tlink->link.prog;
2111  	int patch;
2112  
2113  	/*
2114  	 * run_ctx.cookie = tlink->cookie;
2115  	 */
2116  
2117  	/* %r0 = tlink->cookie */
2118  	load_imm64(jit, REG_W0, tlink->cookie);
2119  	/* stg %r0,cookie_off(%r15) */
2120  	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
2121  
2122  	/*
2123  	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
2124  	 *         goto skip;
2125  	 */
2126  
2127  	/* %r1 = __bpf_prog_enter */
2128  	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
2129  	/* %r2 = p */
2130  	load_imm64(jit, REG_2, (u64)p);
2131  	/* la %r3,run_ctx_off(%r15) */
2132  	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
2133  	/* %r1() */
2134  	call_r1(jit);
2135  	/* ltgr %r7,%r2 */
2136  	EMIT4(0xb9020000, REG_7, REG_2);
2137  	/* brcl 8,skip */
2138  	patch = jit->prg;
2139  	EMIT6_PCREL_RILC(0xc0040000, 8, 0);
2140  
2141  	/*
2142  	 * retval = bpf_func(args, p->insnsi);
2143  	 */
2144  
2145  	/* %r1 = p->bpf_func */
2146  	load_imm64(jit, REG_1, (u64)p->bpf_func);
2147  	/* la %r2,bpf_args_off(%r15) */
2148  	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
2149  	/* %r3 = p->insnsi */
2150  	if (!p->jited)
2151  		load_imm64(jit, REG_3, (u64)p->insnsi);
2152  	/* %r1() */
2153  	call_r1(jit);
2154  	/* stg %r2,retval_off(%r15) */
2155  	if (save_ret) {
2156  		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
2157  			return -1;
2158  		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2159  			      tjit->retval_off);
2160  	}
2161  
2162  	/* skip: */
2163  	if (jit->prg_buf)
2164  		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
2165  
2166  	/*
2167  	 * __bpf_prog_exit(p, start, &run_ctx);
2168  	 */
2169  
2170  	/* %r1 = __bpf_prog_exit */
2171  	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
2172  	/* %r2 = p */
2173  	load_imm64(jit, REG_2, (u64)p);
2174  	/* lgr %r3,%r7 */
2175  	EMIT4(0xb9040000, REG_3, REG_7);
2176  	/* la %r4,run_ctx_off(%r15) */
2177  	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
2178  	/* %r1() */
2179  	call_r1(jit);
2180  
2181  	return 0;
2182  }
2183  
alloc_stack(struct bpf_tramp_jit * tjit,size_t size)2184  static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
2185  {
2186  	int stack_offset = tjit->stack_size;
2187  
2188  	tjit->stack_size += size;
2189  	return stack_offset;
2190  }
2191  
2192  /* ABI uses %r2 - %r6 for parameter passing. */
2193  #define MAX_NR_REG_ARGS 5
2194  
2195  /* The "L" field of the "mvc" instruction is 8 bits. */
2196  #define MAX_MVC_SIZE 256
2197  #define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
2198  
2199  /* -mfentry generates a 6-byte nop on s390x. */
2200  #define S390X_PATCH_SIZE 6
2201  
__arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,struct bpf_tramp_jit * tjit,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2202  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
2203  					 struct bpf_tramp_jit *tjit,
2204  					 const struct btf_func_model *m,
2205  					 u32 flags,
2206  					 struct bpf_tramp_links *tlinks,
2207  					 void *func_addr)
2208  {
2209  	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2210  	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2211  	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2212  	int nr_bpf_args, nr_reg_args, nr_stack_args;
2213  	struct bpf_jit *jit = &tjit->common;
2214  	int arg, bpf_arg_off;
2215  	int i, j;
2216  
2217  	/* Support as many stack arguments as "mvc" instruction can handle. */
2218  	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
2219  	nr_stack_args = m->nr_args - nr_reg_args;
2220  	if (nr_stack_args > MAX_NR_STACK_ARGS)
2221  		return -ENOTSUPP;
2222  
2223  	/* Return to %r14, since func_addr and %r0 are not available. */
2224  	if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
2225  		flags |= BPF_TRAMP_F_SKIP_FRAME;
2226  
2227  	/*
2228  	 * Compute how many arguments we need to pass to BPF programs.
2229  	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
2230  	 * smaller are packed into 1 or 2 registers; larger arguments are
2231  	 * passed via pointers.
2232  	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
2233  	 * a register; larger arguments are passed via pointers.
2234  	 * We need to deal with this difference.
2235  	 */
2236  	nr_bpf_args = 0;
2237  	for (i = 0; i < m->nr_args; i++) {
2238  		if (m->arg_size[i] <= 8)
2239  			nr_bpf_args += 1;
2240  		else if (m->arg_size[i] <= 16)
2241  			nr_bpf_args += 2;
2242  		else
2243  			return -ENOTSUPP;
2244  	}
2245  
2246  	/*
2247  	 * Calculate the stack layout.
2248  	 */
2249  
2250  	/*
2251  	 * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
2252  	 * ABI requires, put our backchain at the end of the allocated memory.
2253  	 */
2254  	tjit->stack_size = STACK_FRAME_OVERHEAD;
2255  	tjit->backchain_off = tjit->stack_size - sizeof(u64);
2256  	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
2257  	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
2258  	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
2259  	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
2260  	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
2261  	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
2262  	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
2263  	tjit->run_ctx_off = alloc_stack(tjit,
2264  					sizeof(struct bpf_tramp_run_ctx));
2265  	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
2266  	tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
2267  	/*
2268  	 * In accordance with the s390x ABI, the caller has allocated
2269  	 * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
2270  	 * backchain, and the rest we can use.
2271  	 */
2272  	tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
2273  	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
2274  
2275  	/* lgr %r1,%r15 */
2276  	EMIT4(0xb9040000, REG_1, REG_15);
2277  	/* aghi %r15,-stack_size */
2278  	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
2279  	/* stg %r1,backchain_off(%r15) */
2280  	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
2281  		      tjit->backchain_off);
2282  	/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
2283  	_EMIT6(0xd203f000 | tjit->tccnt_off,
2284  	       0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
2285  	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
2286  	if (nr_reg_args)
2287  		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
2288  			      REG_2 + (nr_reg_args - 1), REG_15,
2289  			      tjit->reg_args_off);
2290  	for (i = 0, j = 0; i < m->nr_args; i++) {
2291  		if (i < MAX_NR_REG_ARGS)
2292  			arg = REG_2 + i;
2293  		else
2294  			arg = tjit->orig_stack_args_off +
2295  			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
2296  		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
2297  		if (m->arg_size[i] <= 8) {
2298  			if (i < MAX_NR_REG_ARGS)
2299  				/* stg %arg,bpf_arg_off(%r15) */
2300  				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
2301  					      REG_0, REG_15, bpf_arg_off);
2302  			else
2303  				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
2304  				_EMIT6(0xd207f000 | bpf_arg_off,
2305  				       0xf000 | arg);
2306  			j += 1;
2307  		} else {
2308  			if (i < MAX_NR_REG_ARGS) {
2309  				/* mvc bpf_arg_off(16,%r15),0(%arg) */
2310  				_EMIT6(0xd20ff000 | bpf_arg_off,
2311  				       reg2hex[arg] << 12);
2312  			} else {
2313  				/* lg %r1,arg(%r15) */
2314  				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
2315  					      REG_15, arg);
2316  				/* mvc bpf_arg_off(16,%r15),0(%r1) */
2317  				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
2318  			}
2319  			j += 2;
2320  		}
2321  	}
2322  	/* stmg %r7,%r8,r7_r8_off(%r15) */
2323  	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
2324  		      tjit->r7_r8_off);
2325  	/* stg %r14,r14_off(%r15) */
2326  	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
2327  
2328  	if (flags & BPF_TRAMP_F_ORIG_STACK) {
2329  		/*
2330  		 * The ftrace trampoline puts the return address (which is the
2331  		 * address of the original function + S390X_PATCH_SIZE) into
2332  		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
2333  		 * ftrace_init_nop() for details.
2334  		 */
2335  
2336  		/* lgr %r8,%r0 */
2337  		EMIT4(0xb9040000, REG_8, REG_0);
2338  	} else {
2339  		/* %r8 = func_addr + S390X_PATCH_SIZE */
2340  		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
2341  	}
2342  
2343  	/*
2344  	 * ip = func_addr;
2345  	 * arg_cnt = m->nr_args;
2346  	 */
2347  
2348  	if (flags & BPF_TRAMP_F_IP_ARG) {
2349  		/* %r0 = func_addr */
2350  		load_imm64(jit, REG_0, (u64)func_addr);
2351  		/* stg %r0,ip_off(%r15) */
2352  		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2353  			      tjit->ip_off);
2354  	}
2355  	/* lghi %r0,nr_bpf_args */
2356  	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
2357  	/* stg %r0,arg_cnt_off(%r15) */
2358  	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2359  		      tjit->arg_cnt_off);
2360  
2361  	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2362  		/*
2363  		 * __bpf_tramp_enter(im);
2364  		 */
2365  
2366  		/* %r1 = __bpf_tramp_enter */
2367  		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
2368  		/* %r2 = im */
2369  		load_imm64(jit, REG_2, (u64)im);
2370  		/* %r1() */
2371  		call_r1(jit);
2372  	}
2373  
2374  	for (i = 0; i < fentry->nr_links; i++)
2375  		if (invoke_bpf_prog(tjit, m, fentry->links[i],
2376  				    flags & BPF_TRAMP_F_RET_FENTRY_RET))
2377  			return -EINVAL;
2378  
2379  	if (fmod_ret->nr_links) {
2380  		/*
2381  		 * retval = 0;
2382  		 */
2383  
2384  		/* xc retval_off(8,%r15),retval_off(%r15) */
2385  		_EMIT6(0xd707f000 | tjit->retval_off,
2386  		       0xf000 | tjit->retval_off);
2387  
2388  		for (i = 0; i < fmod_ret->nr_links; i++) {
2389  			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
2390  				return -EINVAL;
2391  
2392  			/*
2393  			 * if (retval)
2394  			 *         goto do_fexit;
2395  			 */
2396  
2397  			/* ltg %r0,retval_off(%r15) */
2398  			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
2399  				      tjit->retval_off);
2400  			/* brcl 7,do_fexit */
2401  			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
2402  		}
2403  	}
2404  
2405  	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2406  		/*
2407  		 * retval = func_addr(args);
2408  		 */
2409  
2410  		/* lmg %r2,%rN,reg_args_off(%r15) */
2411  		if (nr_reg_args)
2412  			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2413  				      REG_2 + (nr_reg_args - 1), REG_15,
2414  				      tjit->reg_args_off);
2415  		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
2416  		if (nr_stack_args)
2417  			_EMIT6(0xd200f000 |
2418  				       (nr_stack_args * sizeof(u64) - 1) << 16 |
2419  				       tjit->stack_args_off,
2420  			       0xf000 | tjit->orig_stack_args_off);
2421  		/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
2422  		_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
2423  		/* lgr %r1,%r8 */
2424  		EMIT4(0xb9040000, REG_1, REG_8);
2425  		/* %r1() */
2426  		call_r1(jit);
2427  		/* stg %r2,retval_off(%r15) */
2428  		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2429  			      tjit->retval_off);
2430  
2431  		im->ip_after_call = jit->prg_buf + jit->prg;
2432  
2433  		/*
2434  		 * The following nop will be patched by bpf_tramp_image_put().
2435  		 */
2436  
2437  		/* brcl 0,im->ip_epilogue */
2438  		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
2439  	}
2440  
2441  	/* do_fexit: */
2442  	tjit->do_fexit = jit->prg;
2443  	for (i = 0; i < fexit->nr_links; i++)
2444  		if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
2445  			return -EINVAL;
2446  
2447  	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2448  		im->ip_epilogue = jit->prg_buf + jit->prg;
2449  
2450  		/*
2451  		 * __bpf_tramp_exit(im);
2452  		 */
2453  
2454  		/* %r1 = __bpf_tramp_exit */
2455  		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
2456  		/* %r2 = im */
2457  		load_imm64(jit, REG_2, (u64)im);
2458  		/* %r1() */
2459  		call_r1(jit);
2460  	}
2461  
2462  	/* lmg %r2,%rN,reg_args_off(%r15) */
2463  	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
2464  		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2465  			      REG_2 + (nr_reg_args - 1), REG_15,
2466  			      tjit->reg_args_off);
2467  	/* lgr %r1,%r8 */
2468  	if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
2469  		EMIT4(0xb9040000, REG_1, REG_8);
2470  	/* lmg %r7,%r8,r7_r8_off(%r15) */
2471  	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
2472  		      tjit->r7_r8_off);
2473  	/* lg %r14,r14_off(%r15) */
2474  	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
2475  	/* lg %r2,retval_off(%r15) */
2476  	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
2477  		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
2478  			      tjit->retval_off);
2479  	/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
2480  	_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
2481  	       0xf000 | tjit->tccnt_off);
2482  	/* aghi %r15,stack_size */
2483  	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
2484  	/* Emit an expoline for the following indirect jump. */
2485  	if (nospec_uses_trampoline())
2486  		emit_expoline(jit);
2487  	if (flags & BPF_TRAMP_F_SKIP_FRAME)
2488  		/* br %r14 */
2489  		_EMIT2(0x07fe);
2490  	else
2491  		/* br %r1 */
2492  		_EMIT2(0x07f1);
2493  
2494  	emit_r1_thunk(jit);
2495  
2496  	return 0;
2497  }
2498  
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * image,void * image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2499  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
2500  				void *image_end, const struct btf_func_model *m,
2501  				u32 flags, struct bpf_tramp_links *tlinks,
2502  				void *func_addr)
2503  {
2504  	struct bpf_tramp_jit tjit;
2505  	int ret;
2506  	int i;
2507  
2508  	for (i = 0; i < 2; i++) {
2509  		if (i == 0) {
2510  			/* Compute offsets, check whether the code fits. */
2511  			memset(&tjit, 0, sizeof(tjit));
2512  		} else {
2513  			/* Generate the code. */
2514  			tjit.common.prg = 0;
2515  			tjit.common.prg_buf = image;
2516  		}
2517  		ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
2518  						    tlinks, func_addr);
2519  		if (ret < 0)
2520  			return ret;
2521  		if (tjit.common.prg > (char *)image_end - (char *)image)
2522  			/*
2523  			 * Use the same error code as for exceeding
2524  			 * BPF_MAX_TRAMP_LINKS.
2525  			 */
2526  			return -E2BIG;
2527  	}
2528  
2529  	return tjit.common.prg;
2530  }
2531  
bpf_jit_supports_subprog_tailcalls(void)2532  bool bpf_jit_supports_subprog_tailcalls(void)
2533  {
2534  	return true;
2535  }
2536