xref: /openbmc/linux/arch/parisc/net/bpf_jit_comp32.c (revision ceb0e726)
1*ceb0e726SHelge Deller // SPDX-License-Identifier: GPL-2.0
2*ceb0e726SHelge Deller /*
3*ceb0e726SHelge Deller  * BPF JIT compiler for PA-RISC (32-bit)
4*ceb0e726SHelge Deller  *
5*ceb0e726SHelge Deller  * Copyright (c) 2023 Helge Deller <deller@gmx.de>
6*ceb0e726SHelge Deller  *
7*ceb0e726SHelge Deller  * The code is based on the BPF JIT compiler for RV64 by Björn Töpel and
8*ceb0e726SHelge Deller  * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan.
9*ceb0e726SHelge Deller  */
10*ceb0e726SHelge Deller 
11*ceb0e726SHelge Deller #include <linux/bpf.h>
12*ceb0e726SHelge Deller #include <linux/filter.h>
13*ceb0e726SHelge Deller #include <linux/libgcc.h>
14*ceb0e726SHelge Deller #include "bpf_jit.h"
15*ceb0e726SHelge Deller 
16*ceb0e726SHelge Deller /*
17*ceb0e726SHelge Deller  * Stack layout during BPF program execution (note: stack grows up):
18*ceb0e726SHelge Deller  *
19*ceb0e726SHelge Deller  *                     high
20*ceb0e726SHelge Deller  *   HPPA32 sp =>  +----------+ <= HPPA32 fp
21*ceb0e726SHelge Deller  *                 | saved sp |
22*ceb0e726SHelge Deller  *                 | saved rp |
23*ceb0e726SHelge Deller  *                 |   ...    | HPPA32 callee-saved registers
24*ceb0e726SHelge Deller  *                 | curr args|
25*ceb0e726SHelge Deller  *                 | local var|
26*ceb0e726SHelge Deller  *                 +----------+ <= (sp - 4 * NR_SAVED_REGISTERS)
27*ceb0e726SHelge Deller  *                 |  lo(R9)  |
28*ceb0e726SHelge Deller  *                 |  hi(R9)  |
29*ceb0e726SHelge Deller  *                 |  lo(FP)  | JIT scratch space for BPF registers
30*ceb0e726SHelge Deller  *                 |  hi(FP)  |
31*ceb0e726SHelge Deller  *                 |   ...    |
32*ceb0e726SHelge Deller  *                 +----------+ <= (sp - 4 * NR_SAVED_REGISTERS
33*ceb0e726SHelge Deller  *                 |          |        - 4 * BPF_JIT_SCRATCH_REGS)
34*ceb0e726SHelge Deller  *                 |          |
35*ceb0e726SHelge Deller  *                 |   ...    | BPF program stack
36*ceb0e726SHelge Deller  *                 |          |
37*ceb0e726SHelge Deller  *                 |   ...    | Function call stack
38*ceb0e726SHelge Deller  *                 |          |
39*ceb0e726SHelge Deller  *                 +----------+
40*ceb0e726SHelge Deller  *                     low
41*ceb0e726SHelge Deller  */
42*ceb0e726SHelge Deller 
43*ceb0e726SHelge Deller enum {
44*ceb0e726SHelge Deller 	/* Stack layout - these are offsets from top of JIT scratch space. */
45*ceb0e726SHelge Deller 	BPF_R8_HI,
46*ceb0e726SHelge Deller 	BPF_R8_LO,
47*ceb0e726SHelge Deller 	BPF_R9_HI,
48*ceb0e726SHelge Deller 	BPF_R9_LO,
49*ceb0e726SHelge Deller 	BPF_FP_HI,
50*ceb0e726SHelge Deller 	BPF_FP_LO,
51*ceb0e726SHelge Deller 	BPF_AX_HI,
52*ceb0e726SHelge Deller 	BPF_AX_LO,
53*ceb0e726SHelge Deller 	BPF_R0_TEMP_HI,
54*ceb0e726SHelge Deller 	BPF_R0_TEMP_LO,
55*ceb0e726SHelge Deller 	BPF_JIT_SCRATCH_REGS,
56*ceb0e726SHelge Deller };
57*ceb0e726SHelge Deller 
58*ceb0e726SHelge Deller /* Number of callee-saved registers stored to stack: rp, r3-r18. */
59*ceb0e726SHelge Deller #define NR_SAVED_REGISTERS	(18 - 3 + 1 + 8)
60*ceb0e726SHelge Deller 
61*ceb0e726SHelge Deller /* Offset from fp for BPF registers stored on stack. */
62*ceb0e726SHelge Deller #define STACK_OFFSET(k)	(- (NR_SAVED_REGISTERS + k + 1))
63*ceb0e726SHelge Deller #define STACK_ALIGN	FRAME_SIZE
64*ceb0e726SHelge Deller 
65*ceb0e726SHelge Deller #define EXIT_PTR_LOAD(reg)	hppa_ldw(-0x08, HPPA_REG_SP, reg)
66*ceb0e726SHelge Deller #define EXIT_PTR_STORE(reg)	hppa_stw(reg, -0x08, HPPA_REG_SP)
67*ceb0e726SHelge Deller #define EXIT_PTR_JUMP(reg, nop)	hppa_bv(HPPA_REG_ZERO, reg, nop)
68*ceb0e726SHelge Deller 
69*ceb0e726SHelge Deller #define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
70*ceb0e726SHelge Deller #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
71*ceb0e726SHelge Deller #define TMP_REG_R0	(MAX_BPF_JIT_REG + 2)
72*ceb0e726SHelge Deller 
73*ceb0e726SHelge Deller static const s8 regmap[][2] = {
74*ceb0e726SHelge Deller 	/* Return value from in-kernel function, and exit value from eBPF. */
75*ceb0e726SHelge Deller 	[BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1},		/* HI/LOW */
76*ceb0e726SHelge Deller 
77*ceb0e726SHelge Deller 	/* Arguments from eBPF program to in-kernel function. */
78*ceb0e726SHelge Deller 	[BPF_REG_1] = {HPPA_R(3), HPPA_R(4)},
79*ceb0e726SHelge Deller 	[BPF_REG_2] = {HPPA_R(5), HPPA_R(6)},
80*ceb0e726SHelge Deller 	[BPF_REG_3] = {HPPA_R(7), HPPA_R(8)},
81*ceb0e726SHelge Deller 	[BPF_REG_4] = {HPPA_R(9), HPPA_R(10)},
82*ceb0e726SHelge Deller 	[BPF_REG_5] = {HPPA_R(11), HPPA_R(12)},
83*ceb0e726SHelge Deller 
84*ceb0e726SHelge Deller 	[BPF_REG_6] = {HPPA_R(13), HPPA_R(14)},
85*ceb0e726SHelge Deller 	[BPF_REG_7] = {HPPA_R(15), HPPA_R(16)},
86*ceb0e726SHelge Deller 	/*
87*ceb0e726SHelge Deller 	 * Callee-saved registers that in-kernel function will preserve.
88*ceb0e726SHelge Deller 	 * Stored on the stack.
89*ceb0e726SHelge Deller 	 */
90*ceb0e726SHelge Deller 	[BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
91*ceb0e726SHelge Deller 	[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
92*ceb0e726SHelge Deller 
93*ceb0e726SHelge Deller 	/* Read-only frame pointer to access BPF stack. Not needed. */
94*ceb0e726SHelge Deller 	[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
95*ceb0e726SHelge Deller 
96*ceb0e726SHelge Deller 	/* Temporary register for blinding constants. Stored on the stack. */
97*ceb0e726SHelge Deller 	[BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
98*ceb0e726SHelge Deller 	/*
99*ceb0e726SHelge Deller 	 * Temporary registers used by the JIT to operate on registers stored
100*ceb0e726SHelge Deller 	 * on the stack. Save t0 and t1 to be used as temporaries in generated
101*ceb0e726SHelge Deller 	 * code.
102*ceb0e726SHelge Deller 	 */
103*ceb0e726SHelge Deller 	[TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2},
104*ceb0e726SHelge Deller 	[TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4},
105*ceb0e726SHelge Deller 
106*ceb0e726SHelge Deller 	/* temporary space for BPF_R0 during libgcc and millicode calls */
107*ceb0e726SHelge Deller 	[TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)},
108*ceb0e726SHelge Deller };
109*ceb0e726SHelge Deller 
hi(const s8 * r)110*ceb0e726SHelge Deller static s8 hi(const s8 *r)
111*ceb0e726SHelge Deller {
112*ceb0e726SHelge Deller 	return r[0];
113*ceb0e726SHelge Deller }
114*ceb0e726SHelge Deller 
lo(const s8 * r)115*ceb0e726SHelge Deller static s8 lo(const s8 *r)
116*ceb0e726SHelge Deller {
117*ceb0e726SHelge Deller 	return r[1];
118*ceb0e726SHelge Deller }
119*ceb0e726SHelge Deller 
emit_hppa_copy(const s8 rs,const s8 rd,struct hppa_jit_context * ctx)120*ceb0e726SHelge Deller static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
121*ceb0e726SHelge Deller {
122*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, rd);
123*ceb0e726SHelge Deller 	if (OPTIMIZE_HPPA && (rs == rd))
124*ceb0e726SHelge Deller 		return;
125*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, rs);
126*ceb0e726SHelge Deller 	emit(hppa_copy(rs, rd), ctx);
127*ceb0e726SHelge Deller }
128*ceb0e726SHelge Deller 
emit_hppa_xor(const s8 r1,const s8 r2,const s8 r3,struct hppa_jit_context * ctx)129*ceb0e726SHelge Deller static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx)
130*ceb0e726SHelge Deller {
131*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, r1);
132*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, r2);
133*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, r3);
134*ceb0e726SHelge Deller 	if (OPTIMIZE_HPPA && (r1 == r2)) {
135*ceb0e726SHelge Deller 		emit(hppa_copy(HPPA_REG_ZERO, r3), ctx);
136*ceb0e726SHelge Deller 	} else {
137*ceb0e726SHelge Deller 		emit(hppa_xor(r1, r2, r3), ctx);
138*ceb0e726SHelge Deller 	}
139*ceb0e726SHelge Deller }
140*ceb0e726SHelge Deller 
emit_imm(const s8 rd,s32 imm,struct hppa_jit_context * ctx)141*ceb0e726SHelge Deller static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx)
142*ceb0e726SHelge Deller {
143*ceb0e726SHelge Deller 	u32 lower = im11(imm);
144*ceb0e726SHelge Deller 
145*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, rd);
146*ceb0e726SHelge Deller 	if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
147*ceb0e726SHelge Deller 		emit(hppa_ldi(imm, rd), ctx);
148*ceb0e726SHelge Deller 		return;
149*ceb0e726SHelge Deller 	}
150*ceb0e726SHelge Deller 	emit(hppa_ldil(imm, rd), ctx);
151*ceb0e726SHelge Deller 	if (OPTIMIZE_HPPA && (lower == 0))
152*ceb0e726SHelge Deller 		return;
153*ceb0e726SHelge Deller 	emit(hppa_ldo(lower, rd, rd), ctx);
154*ceb0e726SHelge Deller }
155*ceb0e726SHelge Deller 
emit_imm32(const s8 * rd,s32 imm,struct hppa_jit_context * ctx)156*ceb0e726SHelge Deller static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx)
157*ceb0e726SHelge Deller {
158*ceb0e726SHelge Deller 	/* Emit immediate into lower bits. */
159*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, lo(rd));
160*ceb0e726SHelge Deller 	emit_imm(lo(rd), imm, ctx);
161*ceb0e726SHelge Deller 
162*ceb0e726SHelge Deller 	/* Sign-extend into upper bits. */
163*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, hi(rd));
164*ceb0e726SHelge Deller 	if (imm >= 0)
165*ceb0e726SHelge Deller 		emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
166*ceb0e726SHelge Deller 	else
167*ceb0e726SHelge Deller 		emit(hppa_ldi(-1, hi(rd)), ctx);
168*ceb0e726SHelge Deller }
169*ceb0e726SHelge Deller 
emit_imm64(const s8 * rd,s32 imm_hi,s32 imm_lo,struct hppa_jit_context * ctx)170*ceb0e726SHelge Deller static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo,
171*ceb0e726SHelge Deller 		       struct hppa_jit_context *ctx)
172*ceb0e726SHelge Deller {
173*ceb0e726SHelge Deller 	emit_imm(hi(rd), imm_hi, ctx);
174*ceb0e726SHelge Deller 	emit_imm(lo(rd), imm_lo, ctx);
175*ceb0e726SHelge Deller }
176*ceb0e726SHelge Deller 
__build_epilogue(bool is_tail_call,struct hppa_jit_context * ctx)177*ceb0e726SHelge Deller static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
178*ceb0e726SHelge Deller {
179*ceb0e726SHelge Deller 	const s8 *r0 = regmap[BPF_REG_0];
180*ceb0e726SHelge Deller 	int i;
181*ceb0e726SHelge Deller 
182*ceb0e726SHelge Deller 	if (is_tail_call) {
183*ceb0e726SHelge Deller 		/*
184*ceb0e726SHelge Deller 		 * goto *(t0 + 4);
185*ceb0e726SHelge Deller 		 * Skips first instruction of prologue which initializes tail
186*ceb0e726SHelge Deller 		 * call counter. Assumes t0 contains address of target program,
187*ceb0e726SHelge Deller 		 * see emit_bpf_tail_call.
188*ceb0e726SHelge Deller 		 */
189*ceb0e726SHelge Deller 		emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
190*ceb0e726SHelge Deller 		emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
191*ceb0e726SHelge Deller 		/* in delay slot: */
192*ceb0e726SHelge Deller 		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
193*ceb0e726SHelge Deller 
194*ceb0e726SHelge Deller 		return;
195*ceb0e726SHelge Deller 	}
196*ceb0e726SHelge Deller 
197*ceb0e726SHelge Deller 	/* load epilogue function pointer and jump to it. */
198*ceb0e726SHelge Deller 	/* exit point is either directly below, or the outest TCC exit function */
199*ceb0e726SHelge Deller 	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
200*ceb0e726SHelge Deller 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
201*ceb0e726SHelge Deller 
202*ceb0e726SHelge Deller 	/* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */
203*ceb0e726SHelge Deller 	emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx);
204*ceb0e726SHelge Deller 
205*ceb0e726SHelge Deller 	/* Restore callee-saved registers. */
206*ceb0e726SHelge Deller 	for (i = 3; i <= 18; i++) {
207*ceb0e726SHelge Deller 		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
208*ceb0e726SHelge Deller 			continue;
209*ceb0e726SHelge Deller 		emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx);
210*ceb0e726SHelge Deller 	}
211*ceb0e726SHelge Deller 
212*ceb0e726SHelge Deller 	/* load original return pointer (stored by outest TCC function) */
213*ceb0e726SHelge Deller 	emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx);
214*ceb0e726SHelge Deller 	emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
215*ceb0e726SHelge Deller 	/* in delay slot: */
216*ceb0e726SHelge Deller 	emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx);
217*ceb0e726SHelge Deller }
218*ceb0e726SHelge Deller 
is_stacked(s8 reg)219*ceb0e726SHelge Deller static bool is_stacked(s8 reg)
220*ceb0e726SHelge Deller {
221*ceb0e726SHelge Deller 	return reg < 0;
222*ceb0e726SHelge Deller }
223*ceb0e726SHelge Deller 
bpf_get_reg64_offset(const s8 * reg,const s8 * tmp,u16 offset_sp,struct hppa_jit_context * ctx)224*ceb0e726SHelge Deller static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp,
225*ceb0e726SHelge Deller 		u16 offset_sp, struct hppa_jit_context *ctx)
226*ceb0e726SHelge Deller {
227*ceb0e726SHelge Deller 	if (is_stacked(hi(reg))) {
228*ceb0e726SHelge Deller 		emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx);
229*ceb0e726SHelge Deller 		emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx);
230*ceb0e726SHelge Deller 		reg = tmp;
231*ceb0e726SHelge Deller 	}
232*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, hi(reg));
233*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, lo(reg));
234*ceb0e726SHelge Deller 	return reg;
235*ceb0e726SHelge Deller }
236*ceb0e726SHelge Deller 
bpf_get_reg64(const s8 * reg,const s8 * tmp,struct hppa_jit_context * ctx)237*ceb0e726SHelge Deller static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp,
238*ceb0e726SHelge Deller 			       struct hppa_jit_context *ctx)
239*ceb0e726SHelge Deller {
240*ceb0e726SHelge Deller 	return bpf_get_reg64_offset(reg, tmp, 0, ctx);
241*ceb0e726SHelge Deller }
242*ceb0e726SHelge Deller 
bpf_get_reg64_ref(const s8 * reg,const s8 * tmp,bool must_load,struct hppa_jit_context * ctx)243*ceb0e726SHelge Deller static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp,
244*ceb0e726SHelge Deller 		bool must_load, struct hppa_jit_context *ctx)
245*ceb0e726SHelge Deller {
246*ceb0e726SHelge Deller 	if (!OPTIMIZE_HPPA)
247*ceb0e726SHelge Deller 		return bpf_get_reg64(reg, tmp, ctx);
248*ceb0e726SHelge Deller 
249*ceb0e726SHelge Deller 	if (is_stacked(hi(reg))) {
250*ceb0e726SHelge Deller 		if (must_load)
251*ceb0e726SHelge Deller 			emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx);
252*ceb0e726SHelge Deller 		reg = tmp;
253*ceb0e726SHelge Deller 	}
254*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, hi(reg));
255*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, lo(reg));
256*ceb0e726SHelge Deller 	return reg;
257*ceb0e726SHelge Deller }
258*ceb0e726SHelge Deller 
259*ceb0e726SHelge Deller 
bpf_put_reg64(const s8 * reg,const s8 * src,struct hppa_jit_context * ctx)260*ceb0e726SHelge Deller static void bpf_put_reg64(const s8 *reg, const s8 *src,
261*ceb0e726SHelge Deller 			  struct hppa_jit_context *ctx)
262*ceb0e726SHelge Deller {
263*ceb0e726SHelge Deller 	if (is_stacked(hi(reg))) {
264*ceb0e726SHelge Deller 		emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
265*ceb0e726SHelge Deller 		emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
266*ceb0e726SHelge Deller 	}
267*ceb0e726SHelge Deller }
268*ceb0e726SHelge Deller 
bpf_save_R0(struct hppa_jit_context * ctx)269*ceb0e726SHelge Deller static void bpf_save_R0(struct hppa_jit_context *ctx)
270*ceb0e726SHelge Deller {
271*ceb0e726SHelge Deller 	bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
272*ceb0e726SHelge Deller }
273*ceb0e726SHelge Deller 
bpf_restore_R0(struct hppa_jit_context * ctx)274*ceb0e726SHelge Deller static void bpf_restore_R0(struct hppa_jit_context *ctx)
275*ceb0e726SHelge Deller {
276*ceb0e726SHelge Deller 	bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
277*ceb0e726SHelge Deller }
278*ceb0e726SHelge Deller 
279*ceb0e726SHelge Deller 
bpf_get_reg32(const s8 * reg,const s8 * tmp,struct hppa_jit_context * ctx)280*ceb0e726SHelge Deller static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp,
281*ceb0e726SHelge Deller 			       struct hppa_jit_context *ctx)
282*ceb0e726SHelge Deller {
283*ceb0e726SHelge Deller 	if (is_stacked(lo(reg))) {
284*ceb0e726SHelge Deller 		emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx);
285*ceb0e726SHelge Deller 		reg = tmp;
286*ceb0e726SHelge Deller 	}
287*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, lo(reg));
288*ceb0e726SHelge Deller 	return reg;
289*ceb0e726SHelge Deller }
290*ceb0e726SHelge Deller 
bpf_get_reg32_ref(const s8 * reg,const s8 * tmp,struct hppa_jit_context * ctx)291*ceb0e726SHelge Deller static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp,
292*ceb0e726SHelge Deller 		struct hppa_jit_context *ctx)
293*ceb0e726SHelge Deller {
294*ceb0e726SHelge Deller 	if (!OPTIMIZE_HPPA)
295*ceb0e726SHelge Deller 		return bpf_get_reg32(reg, tmp, ctx);
296*ceb0e726SHelge Deller 
297*ceb0e726SHelge Deller 	if (is_stacked(hi(reg))) {
298*ceb0e726SHelge Deller 		reg = tmp;
299*ceb0e726SHelge Deller 	}
300*ceb0e726SHelge Deller 	REG_SET_SEEN(ctx, lo(reg));
301*ceb0e726SHelge Deller 	return reg;
302*ceb0e726SHelge Deller }
303*ceb0e726SHelge Deller 
bpf_put_reg32(const s8 * reg,const s8 * src,struct hppa_jit_context * ctx)304*ceb0e726SHelge Deller static void bpf_put_reg32(const s8 *reg, const s8 *src,
305*ceb0e726SHelge Deller 			  struct hppa_jit_context *ctx)
306*ceb0e726SHelge Deller {
307*ceb0e726SHelge Deller 	if (is_stacked(lo(reg))) {
308*ceb0e726SHelge Deller 		REG_SET_SEEN(ctx, lo(src));
309*ceb0e726SHelge Deller 		emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
310*ceb0e726SHelge Deller 		if (1 && !ctx->prog->aux->verifier_zext) {
311*ceb0e726SHelge Deller 			REG_SET_SEEN(ctx, hi(reg));
312*ceb0e726SHelge Deller 			emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
313*ceb0e726SHelge Deller 		}
314*ceb0e726SHelge Deller 	} else if (1 && !ctx->prog->aux->verifier_zext) {
315*ceb0e726SHelge Deller 		REG_SET_SEEN(ctx, hi(reg));
316*ceb0e726SHelge Deller 		emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
317*ceb0e726SHelge Deller 	}
318*ceb0e726SHelge Deller }
319*ceb0e726SHelge Deller 
320*ceb0e726SHelge Deller /* extern hppa millicode functions */
321*ceb0e726SHelge Deller extern void $$mulI(void);
322*ceb0e726SHelge Deller extern void $$divU(void);
323*ceb0e726SHelge Deller extern void $$remU(void);
324*ceb0e726SHelge Deller 
emit_call_millicode(void * func,const s8 arg0,const s8 arg1,u8 opcode,struct hppa_jit_context * ctx)325*ceb0e726SHelge Deller static void emit_call_millicode(void *func, const s8 arg0,
326*ceb0e726SHelge Deller 		const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
327*ceb0e726SHelge Deller {
328*ceb0e726SHelge Deller 	u32 func_addr;
329*ceb0e726SHelge Deller 
330*ceb0e726SHelge Deller 	emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
331*ceb0e726SHelge Deller 	emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
332*ceb0e726SHelge Deller 
333*ceb0e726SHelge Deller 	/* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */
334*ceb0e726SHelge Deller 	if (arg0 != HPPA_REG_RET1)
335*ceb0e726SHelge Deller 		bpf_save_R0(ctx);
336*ceb0e726SHelge Deller 
337*ceb0e726SHelge Deller 	func_addr = (uintptr_t) dereference_function_descriptor(func);
338*ceb0e726SHelge Deller 	emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
339*ceb0e726SHelge Deller 	/* skip the following be_l instruction if divisor is zero. */
340*ceb0e726SHelge Deller 	if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
341*ceb0e726SHelge Deller 		if (BPF_OP(opcode) == BPF_DIV)
342*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
343*ceb0e726SHelge Deller 		else
344*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
345*ceb0e726SHelge Deller 		emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx);
346*ceb0e726SHelge Deller 	}
347*ceb0e726SHelge Deller 	/* Note: millicode functions use r31 as return pointer instead of rp */
348*ceb0e726SHelge Deller 	emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
349*ceb0e726SHelge Deller 	emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */
350*ceb0e726SHelge Deller 
351*ceb0e726SHelge Deller 	/* Note: millicode functions return result in RET1, not RET0 */
352*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_RET1, arg0, ctx);
353*ceb0e726SHelge Deller 
354*ceb0e726SHelge Deller 	/* restore HPPA_REG_RET0/1, temp. save in dest. */
355*ceb0e726SHelge Deller 	if (arg0 != HPPA_REG_RET1)
356*ceb0e726SHelge Deller 		bpf_restore_R0(ctx);
357*ceb0e726SHelge Deller }
358*ceb0e726SHelge Deller 
emit_call_libgcc_ll(void * func,const s8 * arg0,const s8 * arg1,u8 opcode,struct hppa_jit_context * ctx)359*ceb0e726SHelge Deller static void emit_call_libgcc_ll(void *func, const s8 *arg0,
360*ceb0e726SHelge Deller 		const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx)
361*ceb0e726SHelge Deller {
362*ceb0e726SHelge Deller 	u32 func_addr;
363*ceb0e726SHelge Deller 
364*ceb0e726SHelge Deller 	emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx);
365*ceb0e726SHelge Deller 	emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx);
366*ceb0e726SHelge Deller 	emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx);
367*ceb0e726SHelge Deller 	emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx);
368*ceb0e726SHelge Deller 
369*ceb0e726SHelge Deller 	/* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */
370*ceb0e726SHelge Deller 	if (hi(arg0) != HPPA_REG_RET0)
371*ceb0e726SHelge Deller 		bpf_save_R0(ctx);
372*ceb0e726SHelge Deller 
373*ceb0e726SHelge Deller 	/* prepare stack */
374*ceb0e726SHelge Deller 	emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
375*ceb0e726SHelge Deller 
376*ceb0e726SHelge Deller 	func_addr = (uintptr_t) dereference_function_descriptor(func);
377*ceb0e726SHelge Deller 	emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
378*ceb0e726SHelge Deller         /* zero out the following be_l instruction if divisor is 0 (and set default values) */
379*ceb0e726SHelge Deller 	if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
380*ceb0e726SHelge Deller 		emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
381*ceb0e726SHelge Deller 		if (BPF_OP(opcode) == BPF_DIV)
382*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
383*ceb0e726SHelge Deller 		else
384*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
385*ceb0e726SHelge Deller 		emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx);
386*ceb0e726SHelge Deller 	}
387*ceb0e726SHelge Deller 	emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
388*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
389*ceb0e726SHelge Deller 
390*ceb0e726SHelge Deller 	/* restore stack */
391*ceb0e726SHelge Deller 	emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
392*ceb0e726SHelge Deller 
393*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx);
394*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx);
395*ceb0e726SHelge Deller 
396*ceb0e726SHelge Deller 	/* restore HPPA_REG_RET0/_RET1 */
397*ceb0e726SHelge Deller 	if (hi(arg0) != HPPA_REG_RET0)
398*ceb0e726SHelge Deller 		bpf_restore_R0(ctx);
399*ceb0e726SHelge Deller }
400*ceb0e726SHelge Deller 
emit_jump(s32 paoff,bool force_far,struct hppa_jit_context * ctx)401*ceb0e726SHelge Deller static void emit_jump(s32 paoff, bool force_far,
402*ceb0e726SHelge Deller 			       struct hppa_jit_context *ctx)
403*ceb0e726SHelge Deller {
404*ceb0e726SHelge Deller 	unsigned long pc, addr;
405*ceb0e726SHelge Deller 
406*ceb0e726SHelge Deller 	/* Note: allocate 2 instructions for jumps if force_far is set. */
407*ceb0e726SHelge Deller 	if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) {
408*ceb0e726SHelge Deller 		/* use BL,short branch followed by nop() */
409*ceb0e726SHelge Deller 		emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx);
410*ceb0e726SHelge Deller 		if (force_far)
411*ceb0e726SHelge Deller 			emit(hppa_nop(), ctx);
412*ceb0e726SHelge Deller 		return;
413*ceb0e726SHelge Deller 	}
414*ceb0e726SHelge Deller 
415*ceb0e726SHelge Deller 	pc = (uintptr_t) &ctx->insns[ctx->ninsns];
416*ceb0e726SHelge Deller 	addr = pc + (paoff * HPPA_INSN_SIZE);
417*ceb0e726SHelge Deller 	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
418*ceb0e726SHelge Deller 	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31
419*ceb0e726SHelge Deller }
420*ceb0e726SHelge Deller 
emit_alu_i64(const s8 * dst,s32 imm,struct hppa_jit_context * ctx,const u8 op)421*ceb0e726SHelge Deller static void emit_alu_i64(const s8 *dst, s32 imm,
422*ceb0e726SHelge Deller 			 struct hppa_jit_context *ctx, const u8 op)
423*ceb0e726SHelge Deller {
424*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
425*ceb0e726SHelge Deller 	const s8 *rd;
426*ceb0e726SHelge Deller 
427*ceb0e726SHelge Deller 	if (0 && op == BPF_MOV)
428*ceb0e726SHelge Deller 		rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
429*ceb0e726SHelge Deller 	else
430*ceb0e726SHelge Deller 		rd = bpf_get_reg64(dst, tmp1, ctx);
431*ceb0e726SHelge Deller 
432*ceb0e726SHelge Deller 	/* dst = dst OP imm */
433*ceb0e726SHelge Deller 	switch (op) {
434*ceb0e726SHelge Deller 	case BPF_MOV:
435*ceb0e726SHelge Deller 		emit_imm32(rd, imm, ctx);
436*ceb0e726SHelge Deller 		break;
437*ceb0e726SHelge Deller 	case BPF_AND:
438*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
439*ceb0e726SHelge Deller 		emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
440*ceb0e726SHelge Deller 		if (imm >= 0)
441*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
442*ceb0e726SHelge Deller 		break;
443*ceb0e726SHelge Deller 	case BPF_OR:
444*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
445*ceb0e726SHelge Deller 		emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
446*ceb0e726SHelge Deller 		if (imm < 0)
447*ceb0e726SHelge Deller 			emit_imm(hi(rd), -1, ctx);
448*ceb0e726SHelge Deller 		break;
449*ceb0e726SHelge Deller 	case BPF_XOR:
450*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
451*ceb0e726SHelge Deller 		emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
452*ceb0e726SHelge Deller 		if (imm < 0) {
453*ceb0e726SHelge Deller 			emit_imm(HPPA_REG_T0, -1, ctx);
454*ceb0e726SHelge Deller 			emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx);
455*ceb0e726SHelge Deller 		}
456*ceb0e726SHelge Deller 		break;
457*ceb0e726SHelge Deller 	case BPF_LSH:
458*ceb0e726SHelge Deller 		if (imm == 0)
459*ceb0e726SHelge Deller 			break;
460*ceb0e726SHelge Deller 		if (imm > 32) {
461*ceb0e726SHelge Deller 			imm -= 32;
462*ceb0e726SHelge Deller 			emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx);
463*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
464*ceb0e726SHelge Deller 		} else if (imm == 32) {
465*ceb0e726SHelge Deller 			emit_hppa_copy(lo(rd), hi(rd), ctx);
466*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
467*ceb0e726SHelge Deller 		} else {
468*ceb0e726SHelge Deller 			emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx);
469*ceb0e726SHelge Deller 			emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
470*ceb0e726SHelge Deller 		}
471*ceb0e726SHelge Deller 		break;
472*ceb0e726SHelge Deller 	case BPF_RSH:
473*ceb0e726SHelge Deller 		if (imm == 0)
474*ceb0e726SHelge Deller 			break;
475*ceb0e726SHelge Deller 		if (imm > 32) {
476*ceb0e726SHelge Deller 			imm -= 32;
477*ceb0e726SHelge Deller 			emit(hppa_shr(hi(rd), imm, lo(rd)), ctx);
478*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
479*ceb0e726SHelge Deller 		} else if (imm == 32) {
480*ceb0e726SHelge Deller 			emit_hppa_copy(hi(rd), lo(rd), ctx);
481*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
482*ceb0e726SHelge Deller 		} else {
483*ceb0e726SHelge Deller 			emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
484*ceb0e726SHelge Deller 			emit(hppa_shr(hi(rd), imm, hi(rd)), ctx);
485*ceb0e726SHelge Deller 		}
486*ceb0e726SHelge Deller 		break;
487*ceb0e726SHelge Deller 	case BPF_ARSH:
488*ceb0e726SHelge Deller 		if (imm == 0)
489*ceb0e726SHelge Deller 			break;
490*ceb0e726SHelge Deller 		if (imm > 32) {
491*ceb0e726SHelge Deller 			imm -= 32;
492*ceb0e726SHelge Deller 			emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx);
493*ceb0e726SHelge Deller 			emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
494*ceb0e726SHelge Deller 		} else if (imm == 32) {
495*ceb0e726SHelge Deller 			emit_hppa_copy(hi(rd), lo(rd), ctx);
496*ceb0e726SHelge Deller 			emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
497*ceb0e726SHelge Deller 		} else {
498*ceb0e726SHelge Deller 			emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
499*ceb0e726SHelge Deller 			emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx);
500*ceb0e726SHelge Deller 		}
501*ceb0e726SHelge Deller 		break;
502*ceb0e726SHelge Deller 	default:
503*ceb0e726SHelge Deller 		WARN_ON(1);
504*ceb0e726SHelge Deller 	}
505*ceb0e726SHelge Deller 
506*ceb0e726SHelge Deller 	bpf_put_reg64(dst, rd, ctx);
507*ceb0e726SHelge Deller }
508*ceb0e726SHelge Deller 
emit_alu_i32(const s8 * dst,s32 imm,struct hppa_jit_context * ctx,const u8 op)509*ceb0e726SHelge Deller static void emit_alu_i32(const s8 *dst, s32 imm,
510*ceb0e726SHelge Deller 			 struct hppa_jit_context *ctx, const u8 op)
511*ceb0e726SHelge Deller {
512*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
513*ceb0e726SHelge Deller 	const s8 *rd = bpf_get_reg32(dst, tmp1, ctx);
514*ceb0e726SHelge Deller 
515*ceb0e726SHelge Deller 	if (op == BPF_MOV)
516*ceb0e726SHelge Deller 		rd = bpf_get_reg32_ref(dst, tmp1, ctx);
517*ceb0e726SHelge Deller 	else
518*ceb0e726SHelge Deller 		rd = bpf_get_reg32(dst, tmp1, ctx);
519*ceb0e726SHelge Deller 
520*ceb0e726SHelge Deller 	/* dst = dst OP imm */
521*ceb0e726SHelge Deller 	switch (op) {
522*ceb0e726SHelge Deller 	case BPF_MOV:
523*ceb0e726SHelge Deller 		emit_imm(lo(rd), imm, ctx);
524*ceb0e726SHelge Deller 		break;
525*ceb0e726SHelge Deller 	case BPF_ADD:
526*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
527*ceb0e726SHelge Deller 		emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
528*ceb0e726SHelge Deller 		break;
529*ceb0e726SHelge Deller 	case BPF_SUB:
530*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
531*ceb0e726SHelge Deller 		emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
532*ceb0e726SHelge Deller 		break;
533*ceb0e726SHelge Deller 	case BPF_AND:
534*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
535*ceb0e726SHelge Deller 		emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
536*ceb0e726SHelge Deller 		break;
537*ceb0e726SHelge Deller 	case BPF_OR:
538*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
539*ceb0e726SHelge Deller 		emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
540*ceb0e726SHelge Deller 		break;
541*ceb0e726SHelge Deller 	case BPF_XOR:
542*ceb0e726SHelge Deller 		emit_imm(HPPA_REG_T0, imm, ctx);
543*ceb0e726SHelge Deller 		emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
544*ceb0e726SHelge Deller 		break;
545*ceb0e726SHelge Deller 	case BPF_LSH:
546*ceb0e726SHelge Deller 		if (imm != 0)
547*ceb0e726SHelge Deller 			emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
548*ceb0e726SHelge Deller 		break;
549*ceb0e726SHelge Deller 	case BPF_RSH:
550*ceb0e726SHelge Deller 		if (imm != 0)
551*ceb0e726SHelge Deller 			emit(hppa_shr(lo(rd), imm, lo(rd)), ctx);
552*ceb0e726SHelge Deller 		break;
553*ceb0e726SHelge Deller 	case BPF_ARSH:
554*ceb0e726SHelge Deller 		if (imm != 0)
555*ceb0e726SHelge Deller 			emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx);
556*ceb0e726SHelge Deller 		break;
557*ceb0e726SHelge Deller 	default:
558*ceb0e726SHelge Deller 		WARN_ON(1);
559*ceb0e726SHelge Deller 	}
560*ceb0e726SHelge Deller 
561*ceb0e726SHelge Deller 	bpf_put_reg32(dst, rd, ctx);
562*ceb0e726SHelge Deller }
563*ceb0e726SHelge Deller 
emit_alu_r64(const s8 * dst,const s8 * src,struct hppa_jit_context * ctx,const u8 op)564*ceb0e726SHelge Deller static void emit_alu_r64(const s8 *dst, const s8 *src,
565*ceb0e726SHelge Deller 			 struct hppa_jit_context *ctx, const u8 op)
566*ceb0e726SHelge Deller {
567*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
568*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
569*ceb0e726SHelge Deller 	const s8 *rd;
570*ceb0e726SHelge Deller 	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
571*ceb0e726SHelge Deller 
572*ceb0e726SHelge Deller 	if (op == BPF_MOV)
573*ceb0e726SHelge Deller 		rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
574*ceb0e726SHelge Deller 	else
575*ceb0e726SHelge Deller 		rd = bpf_get_reg64(dst, tmp1, ctx);
576*ceb0e726SHelge Deller 
577*ceb0e726SHelge Deller 	/* dst = dst OP src */
578*ceb0e726SHelge Deller 	switch (op) {
579*ceb0e726SHelge Deller 	case BPF_MOV:
580*ceb0e726SHelge Deller 		emit_hppa_copy(lo(rs), lo(rd), ctx);
581*ceb0e726SHelge Deller 		emit_hppa_copy(hi(rs), hi(rd), ctx);
582*ceb0e726SHelge Deller 		break;
583*ceb0e726SHelge Deller 	case BPF_ADD:
584*ceb0e726SHelge Deller 		emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
585*ceb0e726SHelge Deller 		emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx);
586*ceb0e726SHelge Deller 		break;
587*ceb0e726SHelge Deller 	case BPF_SUB:
588*ceb0e726SHelge Deller 		emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
589*ceb0e726SHelge Deller 		emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx);
590*ceb0e726SHelge Deller 		break;
591*ceb0e726SHelge Deller 	case BPF_AND:
592*ceb0e726SHelge Deller 		emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
593*ceb0e726SHelge Deller 		emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx);
594*ceb0e726SHelge Deller 		break;
595*ceb0e726SHelge Deller 	case BPF_OR:
596*ceb0e726SHelge Deller 		emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
597*ceb0e726SHelge Deller 		emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx);
598*ceb0e726SHelge Deller 		break;
599*ceb0e726SHelge Deller 	case BPF_XOR:
600*ceb0e726SHelge Deller 		emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
601*ceb0e726SHelge Deller 		emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx);
602*ceb0e726SHelge Deller 		break;
603*ceb0e726SHelge Deller 	case BPF_MUL:
604*ceb0e726SHelge Deller 		emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx);
605*ceb0e726SHelge Deller 		break;
606*ceb0e726SHelge Deller 	case BPF_DIV:
607*ceb0e726SHelge Deller 		emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx);
608*ceb0e726SHelge Deller 		break;
609*ceb0e726SHelge Deller 	case BPF_MOD:
610*ceb0e726SHelge Deller 		emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx);
611*ceb0e726SHelge Deller 		break;
612*ceb0e726SHelge Deller 	case BPF_LSH:
613*ceb0e726SHelge Deller 		emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx);
614*ceb0e726SHelge Deller 		break;
615*ceb0e726SHelge Deller 	case BPF_RSH:
616*ceb0e726SHelge Deller 		emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx);
617*ceb0e726SHelge Deller 		break;
618*ceb0e726SHelge Deller 	case BPF_ARSH:
619*ceb0e726SHelge Deller 		emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx);
620*ceb0e726SHelge Deller 		break;
621*ceb0e726SHelge Deller 	case BPF_NEG:
622*ceb0e726SHelge Deller 		emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);
623*ceb0e726SHelge Deller 		emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx);
624*ceb0e726SHelge Deller 		break;
625*ceb0e726SHelge Deller 	default:
626*ceb0e726SHelge Deller 		WARN_ON(1);
627*ceb0e726SHelge Deller 	}
628*ceb0e726SHelge Deller 
629*ceb0e726SHelge Deller 	bpf_put_reg64(dst, rd, ctx);
630*ceb0e726SHelge Deller }
631*ceb0e726SHelge Deller 
emit_alu_r32(const s8 * dst,const s8 * src,struct hppa_jit_context * ctx,const u8 op)632*ceb0e726SHelge Deller static void emit_alu_r32(const s8 *dst, const s8 *src,
633*ceb0e726SHelge Deller 			 struct hppa_jit_context *ctx, const u8 op)
634*ceb0e726SHelge Deller {
635*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
636*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
637*ceb0e726SHelge Deller 	const s8 *rd;
638*ceb0e726SHelge Deller 	const s8 *rs = bpf_get_reg32(src, tmp2, ctx);
639*ceb0e726SHelge Deller 
640*ceb0e726SHelge Deller 	if (op == BPF_MOV)
641*ceb0e726SHelge Deller 		rd = bpf_get_reg32_ref(dst, tmp1, ctx);
642*ceb0e726SHelge Deller 	else
643*ceb0e726SHelge Deller 		rd = bpf_get_reg32(dst, tmp1, ctx);
644*ceb0e726SHelge Deller 
645*ceb0e726SHelge Deller 	/* dst = dst OP src */
646*ceb0e726SHelge Deller 	switch (op) {
647*ceb0e726SHelge Deller 	case BPF_MOV:
648*ceb0e726SHelge Deller 		emit_hppa_copy(lo(rs), lo(rd), ctx);
649*ceb0e726SHelge Deller 		break;
650*ceb0e726SHelge Deller 	case BPF_ADD:
651*ceb0e726SHelge Deller 		emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
652*ceb0e726SHelge Deller 		break;
653*ceb0e726SHelge Deller 	case BPF_SUB:
654*ceb0e726SHelge Deller 		emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
655*ceb0e726SHelge Deller 		break;
656*ceb0e726SHelge Deller 	case BPF_AND:
657*ceb0e726SHelge Deller 		emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
658*ceb0e726SHelge Deller 		break;
659*ceb0e726SHelge Deller 	case BPF_OR:
660*ceb0e726SHelge Deller 		emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
661*ceb0e726SHelge Deller 		break;
662*ceb0e726SHelge Deller 	case BPF_XOR:
663*ceb0e726SHelge Deller 		emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
664*ceb0e726SHelge Deller 		break;
665*ceb0e726SHelge Deller 	case BPF_MUL:
666*ceb0e726SHelge Deller 		emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx);
667*ceb0e726SHelge Deller 		break;
668*ceb0e726SHelge Deller 	case BPF_DIV:
669*ceb0e726SHelge Deller 		emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx);
670*ceb0e726SHelge Deller 		break;
671*ceb0e726SHelge Deller 	case BPF_MOD:
672*ceb0e726SHelge Deller 		emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx);
673*ceb0e726SHelge Deller 		break;
674*ceb0e726SHelge Deller 	case BPF_LSH:
675*ceb0e726SHelge Deller 		emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
676*ceb0e726SHelge Deller 		emit(hppa_mtsar(HPPA_REG_T0), ctx);
677*ceb0e726SHelge Deller 		emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx);
678*ceb0e726SHelge Deller 		break;
679*ceb0e726SHelge Deller 	case BPF_RSH:
680*ceb0e726SHelge Deller 		emit(hppa_mtsar(lo(rs)), ctx);
681*ceb0e726SHelge Deller 		emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx);
682*ceb0e726SHelge Deller 		break;
683*ceb0e726SHelge Deller 	case BPF_ARSH: /* sign extending arithmetic shift right */
684*ceb0e726SHelge Deller 		// emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx);
685*ceb0e726SHelge Deller 		emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
686*ceb0e726SHelge Deller 		emit(hppa_mtsar(HPPA_REG_T0), ctx);
687*ceb0e726SHelge Deller 		emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx);
688*ceb0e726SHelge Deller 		break;
689*ceb0e726SHelge Deller 	case BPF_NEG:
690*ceb0e726SHelge Deller 		emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);  // sub r0,rd,rd
691*ceb0e726SHelge Deller 		break;
692*ceb0e726SHelge Deller 	default:
693*ceb0e726SHelge Deller 		WARN_ON(1);
694*ceb0e726SHelge Deller 	}
695*ceb0e726SHelge Deller 
696*ceb0e726SHelge Deller 	bpf_put_reg32(dst, rd, ctx);
697*ceb0e726SHelge Deller }
698*ceb0e726SHelge Deller 
emit_branch_r64(const s8 * src1,const s8 * src2,s32 paoff,struct hppa_jit_context * ctx,const u8 op)699*ceb0e726SHelge Deller static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff,
700*ceb0e726SHelge Deller 			   struct hppa_jit_context *ctx, const u8 op)
701*ceb0e726SHelge Deller {
702*ceb0e726SHelge Deller 	int e, s = ctx->ninsns;
703*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
704*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
705*ceb0e726SHelge Deller 
706*ceb0e726SHelge Deller 	const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx);
707*ceb0e726SHelge Deller 	const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx);
708*ceb0e726SHelge Deller 
709*ceb0e726SHelge Deller 	/*
710*ceb0e726SHelge Deller 	 * NO_JUMP skips over the rest of the instructions and the
711*ceb0e726SHelge Deller 	 * emit_jump, meaning the BPF branch is not taken.
712*ceb0e726SHelge Deller 	 * JUMP skips directly to the emit_jump, meaning
713*ceb0e726SHelge Deller 	 * the BPF branch is taken.
714*ceb0e726SHelge Deller 	 *
715*ceb0e726SHelge Deller 	 * The fallthrough case results in the BPF branch being taken.
716*ceb0e726SHelge Deller 	 */
717*ceb0e726SHelge Deller #define NO_JUMP(idx)	(2 + (idx) - 1)
718*ceb0e726SHelge Deller #define JUMP(idx)	(0 + (idx) - 1)
719*ceb0e726SHelge Deller 
720*ceb0e726SHelge Deller 	switch (op) {
721*ceb0e726SHelge Deller 	case BPF_JEQ:
722*ceb0e726SHelge Deller 		emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
723*ceb0e726SHelge Deller 		emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
724*ceb0e726SHelge Deller 		break;
725*ceb0e726SHelge Deller 	case BPF_JGT:
726*ceb0e726SHelge Deller 		emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
727*ceb0e726SHelge Deller 		emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
728*ceb0e726SHelge Deller 		emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
729*ceb0e726SHelge Deller 		break;
730*ceb0e726SHelge Deller 	case BPF_JLT:
731*ceb0e726SHelge Deller 		emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
732*ceb0e726SHelge Deller 		emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
733*ceb0e726SHelge Deller 		emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
734*ceb0e726SHelge Deller 		break;
735*ceb0e726SHelge Deller 	case BPF_JGE:
736*ceb0e726SHelge Deller 		emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
737*ceb0e726SHelge Deller 		emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
738*ceb0e726SHelge Deller 		emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
739*ceb0e726SHelge Deller 		break;
740*ceb0e726SHelge Deller 	case BPF_JLE:
741*ceb0e726SHelge Deller 		emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
742*ceb0e726SHelge Deller 		emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
743*ceb0e726SHelge Deller 		emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
744*ceb0e726SHelge Deller 		break;
745*ceb0e726SHelge Deller 	case BPF_JNE:
746*ceb0e726SHelge Deller 		emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx);
747*ceb0e726SHelge Deller 		emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
748*ceb0e726SHelge Deller 		break;
749*ceb0e726SHelge Deller 	case BPF_JSGT:
750*ceb0e726SHelge Deller 		emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
751*ceb0e726SHelge Deller 		emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
752*ceb0e726SHelge Deller 		emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
753*ceb0e726SHelge Deller 		break;
754*ceb0e726SHelge Deller 	case BPF_JSLT:
755*ceb0e726SHelge Deller 		emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
756*ceb0e726SHelge Deller 		emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
757*ceb0e726SHelge Deller 		emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
758*ceb0e726SHelge Deller 		break;
759*ceb0e726SHelge Deller 	case BPF_JSGE:
760*ceb0e726SHelge Deller 		emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
761*ceb0e726SHelge Deller 		emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
762*ceb0e726SHelge Deller 		emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
763*ceb0e726SHelge Deller 		break;
764*ceb0e726SHelge Deller 	case BPF_JSLE:
765*ceb0e726SHelge Deller 		emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
766*ceb0e726SHelge Deller 		emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
767*ceb0e726SHelge Deller 		emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
768*ceb0e726SHelge Deller 		break;
769*ceb0e726SHelge Deller 	case BPF_JSET:
770*ceb0e726SHelge Deller 		emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx);
771*ceb0e726SHelge Deller 		emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx);
772*ceb0e726SHelge Deller 		emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx);
773*ceb0e726SHelge Deller 		emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx);
774*ceb0e726SHelge Deller 		break;
775*ceb0e726SHelge Deller 	default:
776*ceb0e726SHelge Deller 		WARN_ON(1);
777*ceb0e726SHelge Deller 	}
778*ceb0e726SHelge Deller 
779*ceb0e726SHelge Deller #undef NO_JUMP
780*ceb0e726SHelge Deller #undef JUMP
781*ceb0e726SHelge Deller 
782*ceb0e726SHelge Deller 	e = ctx->ninsns;
783*ceb0e726SHelge Deller 	/* Adjust for extra insns. */
784*ceb0e726SHelge Deller 	paoff -= (e - s);
785*ceb0e726SHelge Deller 	emit_jump(paoff, true, ctx);
786*ceb0e726SHelge Deller 	return 0;
787*ceb0e726SHelge Deller }
788*ceb0e726SHelge Deller 
emit_bcc(u8 op,u8 rd,u8 rs,int paoff,struct hppa_jit_context * ctx)789*ceb0e726SHelge Deller static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx)
790*ceb0e726SHelge Deller {
791*ceb0e726SHelge Deller 	int e, s;
792*ceb0e726SHelge Deller 	bool far = false;
793*ceb0e726SHelge Deller 	int off;
794*ceb0e726SHelge Deller 
795*ceb0e726SHelge Deller 	if (op == BPF_JSET) {
796*ceb0e726SHelge Deller 		/*
797*ceb0e726SHelge Deller 		 * BPF_JSET is a special case: it has no inverse so we always
798*ceb0e726SHelge Deller 		 * treat it as a far branch.
799*ceb0e726SHelge Deller 		 */
800*ceb0e726SHelge Deller 		emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
801*ceb0e726SHelge Deller 		paoff -= 1; /* reduce offset due to hppa_and() above */
802*ceb0e726SHelge Deller 		rd = HPPA_REG_T0;
803*ceb0e726SHelge Deller 		rs = HPPA_REG_ZERO;
804*ceb0e726SHelge Deller 		op = BPF_JNE;
805*ceb0e726SHelge Deller 	}
806*ceb0e726SHelge Deller 
807*ceb0e726SHelge Deller 	s = ctx->ninsns;
808*ceb0e726SHelge Deller 
809*ceb0e726SHelge Deller 	if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) {
810*ceb0e726SHelge Deller 		op = invert_bpf_cond(op);
811*ceb0e726SHelge Deller 		far = true;
812*ceb0e726SHelge Deller 	}
813*ceb0e726SHelge Deller 
814*ceb0e726SHelge Deller 	/*
815*ceb0e726SHelge Deller 	 * For a far branch, the condition is negated and we jump over the
816*ceb0e726SHelge Deller 	 * branch itself, and the three instructions from emit_jump.
817*ceb0e726SHelge Deller 	 * For a near branch, just use paoff.
818*ceb0e726SHelge Deller 	 */
819*ceb0e726SHelge Deller 	off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT;
820*ceb0e726SHelge Deller 
821*ceb0e726SHelge Deller 	switch (op) {
822*ceb0e726SHelge Deller 	/* IF (dst COND src) JUMP off */
823*ceb0e726SHelge Deller 	case BPF_JEQ:
824*ceb0e726SHelge Deller 		emit(hppa_beq(rd, rs, off), ctx);
825*ceb0e726SHelge Deller 		break;
826*ceb0e726SHelge Deller 	case BPF_JGT:
827*ceb0e726SHelge Deller 		emit(hppa_bgtu(rd, rs, off), ctx);
828*ceb0e726SHelge Deller 		break;
829*ceb0e726SHelge Deller 	case BPF_JLT:
830*ceb0e726SHelge Deller 		emit(hppa_bltu(rd, rs, off), ctx);
831*ceb0e726SHelge Deller 		break;
832*ceb0e726SHelge Deller 	case BPF_JGE:
833*ceb0e726SHelge Deller 		emit(hppa_bgeu(rd, rs, off), ctx);
834*ceb0e726SHelge Deller 		break;
835*ceb0e726SHelge Deller 	case BPF_JLE:
836*ceb0e726SHelge Deller 		emit(hppa_bleu(rd, rs, off), ctx);
837*ceb0e726SHelge Deller 		break;
838*ceb0e726SHelge Deller 	case BPF_JNE:
839*ceb0e726SHelge Deller 		emit(hppa_bne(rd, rs, off), ctx);
840*ceb0e726SHelge Deller 		break;
841*ceb0e726SHelge Deller 	case BPF_JSGT:
842*ceb0e726SHelge Deller 		emit(hppa_bgt(rd, rs, off), ctx);
843*ceb0e726SHelge Deller 		break;
844*ceb0e726SHelge Deller 	case BPF_JSLT:
845*ceb0e726SHelge Deller 		emit(hppa_blt(rd, rs, off), ctx);
846*ceb0e726SHelge Deller 		break;
847*ceb0e726SHelge Deller 	case BPF_JSGE:
848*ceb0e726SHelge Deller 		emit(hppa_bge(rd, rs, off), ctx);
849*ceb0e726SHelge Deller 		break;
850*ceb0e726SHelge Deller 	case BPF_JSLE:
851*ceb0e726SHelge Deller 		emit(hppa_ble(rd, rs, off), ctx);
852*ceb0e726SHelge Deller 		break;
853*ceb0e726SHelge Deller 	default:
854*ceb0e726SHelge Deller 		WARN_ON(1);
855*ceb0e726SHelge Deller 	}
856*ceb0e726SHelge Deller 
857*ceb0e726SHelge Deller 	if (far) {
858*ceb0e726SHelge Deller 		e = ctx->ninsns;
859*ceb0e726SHelge Deller 		/* Adjust for extra insns. */
860*ceb0e726SHelge Deller 		paoff -= (e - s);
861*ceb0e726SHelge Deller 		emit_jump(paoff, true, ctx);
862*ceb0e726SHelge Deller 	}
863*ceb0e726SHelge Deller 	return 0;
864*ceb0e726SHelge Deller }
865*ceb0e726SHelge Deller 
emit_branch_r32(const s8 * src1,const s8 * src2,s32 paoff,struct hppa_jit_context * ctx,const u8 op)866*ceb0e726SHelge Deller static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff,
867*ceb0e726SHelge Deller 			   struct hppa_jit_context *ctx, const u8 op)
868*ceb0e726SHelge Deller {
869*ceb0e726SHelge Deller 	int e, s = ctx->ninsns;
870*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
871*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
872*ceb0e726SHelge Deller 
873*ceb0e726SHelge Deller 	const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx);
874*ceb0e726SHelge Deller 	const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx);
875*ceb0e726SHelge Deller 
876*ceb0e726SHelge Deller 	e = ctx->ninsns;
877*ceb0e726SHelge Deller 	/* Adjust for extra insns. */
878*ceb0e726SHelge Deller 	paoff -= (e - s);
879*ceb0e726SHelge Deller 
880*ceb0e726SHelge Deller 	if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx))
881*ceb0e726SHelge Deller 		return -1;
882*ceb0e726SHelge Deller 
883*ceb0e726SHelge Deller 	return 0;
884*ceb0e726SHelge Deller }
885*ceb0e726SHelge Deller 
emit_call(bool fixed,u64 addr,struct hppa_jit_context * ctx)886*ceb0e726SHelge Deller static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx)
887*ceb0e726SHelge Deller {
888*ceb0e726SHelge Deller 	const s8 *tmp = regmap[TMP_REG_1];
889*ceb0e726SHelge Deller 	const s8 *r0 = regmap[BPF_REG_0];
890*ceb0e726SHelge Deller 	const s8 *reg;
891*ceb0e726SHelge Deller 	const int offset_sp = 2 * STACK_ALIGN;
892*ceb0e726SHelge Deller 
893*ceb0e726SHelge Deller 	/* prepare stack */
894*ceb0e726SHelge Deller 	emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
895*ceb0e726SHelge Deller 
896*ceb0e726SHelge Deller 	/* load R1 & R2 in registers, R3-R5 to stack. */
897*ceb0e726SHelge Deller 	reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx);
898*ceb0e726SHelge Deller 	emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx);
899*ceb0e726SHelge Deller 	emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx);
900*ceb0e726SHelge Deller 
901*ceb0e726SHelge Deller 	reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx);
902*ceb0e726SHelge Deller 	emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx);
903*ceb0e726SHelge Deller 	emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx);
904*ceb0e726SHelge Deller 
905*ceb0e726SHelge Deller 	reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx);
906*ceb0e726SHelge Deller 	emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx);
907*ceb0e726SHelge Deller 	emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx);
908*ceb0e726SHelge Deller 
909*ceb0e726SHelge Deller 	reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx);
910*ceb0e726SHelge Deller 	emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx);
911*ceb0e726SHelge Deller 	emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx);
912*ceb0e726SHelge Deller 
913*ceb0e726SHelge Deller 	reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx);
914*ceb0e726SHelge Deller 	emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx);
915*ceb0e726SHelge Deller 	emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx);
916*ceb0e726SHelge Deller 
917*ceb0e726SHelge Deller 	/* backup TCC */
918*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
919*ceb0e726SHelge Deller 		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
920*ceb0e726SHelge Deller 
921*ceb0e726SHelge Deller 	/*
922*ceb0e726SHelge Deller 	 * Use ldil() to load absolute address. Don't use emit_imm as the
923*ceb0e726SHelge Deller 	 * number of emitted instructions should not depend on the value of
924*ceb0e726SHelge Deller 	 * addr.
925*ceb0e726SHelge Deller 	 */
926*ceb0e726SHelge Deller 	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
927*ceb0e726SHelge Deller 	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
928*ceb0e726SHelge Deller 	/* set return address in delay slot */
929*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
930*ceb0e726SHelge Deller 
931*ceb0e726SHelge Deller 	/* restore TCC */
932*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
933*ceb0e726SHelge Deller 		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
934*ceb0e726SHelge Deller 
935*ceb0e726SHelge Deller 	/* restore stack */
936*ceb0e726SHelge Deller 	emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
937*ceb0e726SHelge Deller 
938*ceb0e726SHelge Deller 	/* set return value. */
939*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx);
940*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx);
941*ceb0e726SHelge Deller }
942*ceb0e726SHelge Deller 
emit_bpf_tail_call(int insn,struct hppa_jit_context * ctx)943*ceb0e726SHelge Deller static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
944*ceb0e726SHelge Deller {
945*ceb0e726SHelge Deller 	/*
946*ceb0e726SHelge Deller 	 * R1 -> &ctx
947*ceb0e726SHelge Deller 	 * R2 -> &array
948*ceb0e726SHelge Deller 	 * R3 -> index
949*ceb0e726SHelge Deller 	 */
950*ceb0e726SHelge Deller 	int off;
951*ceb0e726SHelge Deller 	const s8 *arr_reg = regmap[BPF_REG_2];
952*ceb0e726SHelge Deller 	const s8 *idx_reg = regmap[BPF_REG_3];
953*ceb0e726SHelge Deller 	struct bpf_array bpfa;
954*ceb0e726SHelge Deller 	struct bpf_prog bpfp;
955*ceb0e726SHelge Deller 
956*ceb0e726SHelge Deller 	/* get address of TCC main exit function for error case into rp */
957*ceb0e726SHelge Deller 	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
958*ceb0e726SHelge Deller 
959*ceb0e726SHelge Deller 	/* max_entries = array->map.max_entries; */
960*ceb0e726SHelge Deller 	off = offsetof(struct bpf_array, map.max_entries);
961*ceb0e726SHelge Deller 	BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
962*ceb0e726SHelge Deller 	emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx);
963*ceb0e726SHelge Deller 
964*ceb0e726SHelge Deller 	/*
965*ceb0e726SHelge Deller 	 * if (index >= max_entries)
966*ceb0e726SHelge Deller 	 *   goto out;
967*ceb0e726SHelge Deller 	 */
968*ceb0e726SHelge Deller 	emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
969*ceb0e726SHelge Deller 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
970*ceb0e726SHelge Deller 
971*ceb0e726SHelge Deller 	/*
972*ceb0e726SHelge Deller 	 * if (--tcc < 0)
973*ceb0e726SHelge Deller 	 *   goto out;
974*ceb0e726SHelge Deller 	 */
975*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
976*ceb0e726SHelge Deller 	emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
977*ceb0e726SHelge Deller 	emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
978*ceb0e726SHelge Deller 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
979*ceb0e726SHelge Deller 
980*ceb0e726SHelge Deller 	/*
981*ceb0e726SHelge Deller 	 * prog = array->ptrs[index];
982*ceb0e726SHelge Deller 	 * if (!prog)
983*ceb0e726SHelge Deller 	 *   goto out;
984*ceb0e726SHelge Deller 	 */
985*ceb0e726SHelge Deller 	BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4);
986*ceb0e726SHelge Deller 	emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx);
987*ceb0e726SHelge Deller 	off = offsetof(struct bpf_array, ptrs);
988*ceb0e726SHelge Deller 	BUILD_BUG_ON(!relative_bits_ok(off, 11));
989*ceb0e726SHelge Deller 	emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
990*ceb0e726SHelge Deller 	emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
991*ceb0e726SHelge Deller 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
992*ceb0e726SHelge Deller 
993*ceb0e726SHelge Deller 	/*
994*ceb0e726SHelge Deller 	 * tcc = temp_tcc;
995*ceb0e726SHelge Deller 	 * goto *(prog->bpf_func + 4);
996*ceb0e726SHelge Deller 	 */
997*ceb0e726SHelge Deller 	off = offsetof(struct bpf_prog, bpf_func);
998*ceb0e726SHelge Deller 	BUILD_BUG_ON(!relative_bits_ok(off, 11));
999*ceb0e726SHelge Deller 	BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4);
1000*ceb0e726SHelge Deller 	emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
1001*ceb0e726SHelge Deller 	/* Epilogue jumps to *(t0 + 4). */
1002*ceb0e726SHelge Deller 	__build_epilogue(true, ctx);
1003*ceb0e726SHelge Deller 	return 0;
1004*ceb0e726SHelge Deller }
1005*ceb0e726SHelge Deller 
emit_load_r64(const s8 * dst,const s8 * src,s16 off,struct hppa_jit_context * ctx,const u8 size)1006*ceb0e726SHelge Deller static int emit_load_r64(const s8 *dst, const s8 *src, s16 off,
1007*ceb0e726SHelge Deller 			 struct hppa_jit_context *ctx, const u8 size)
1008*ceb0e726SHelge Deller {
1009*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
1010*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
1011*ceb0e726SHelge Deller 	const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx);
1012*ceb0e726SHelge Deller 	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1013*ceb0e726SHelge Deller 	s8 srcreg;
1014*ceb0e726SHelge Deller 
1015*ceb0e726SHelge Deller 	/* need to calculate address since offset does not fit in 14 bits? */
1016*ceb0e726SHelge Deller 	if (relative_bits_ok(off, 14))
1017*ceb0e726SHelge Deller 		srcreg = lo(rs);
1018*ceb0e726SHelge Deller 	else {
1019*ceb0e726SHelge Deller 		/* need to use R1 here, since addil puts result into R1 */
1020*ceb0e726SHelge Deller 		srcreg = HPPA_REG_R1;
1021*ceb0e726SHelge Deller 		emit(hppa_addil(off, lo(rs)), ctx);
1022*ceb0e726SHelge Deller 		off = im11(off);
1023*ceb0e726SHelge Deller 	}
1024*ceb0e726SHelge Deller 
1025*ceb0e726SHelge Deller 	/* LDX: dst = *(size *)(src + off) */
1026*ceb0e726SHelge Deller 	switch (size) {
1027*ceb0e726SHelge Deller 	case BPF_B:
1028*ceb0e726SHelge Deller 		emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx);
1029*ceb0e726SHelge Deller 		if (!ctx->prog->aux->verifier_zext)
1030*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1031*ceb0e726SHelge Deller 		break;
1032*ceb0e726SHelge Deller 	case BPF_H:
1033*ceb0e726SHelge Deller 		emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx);
1034*ceb0e726SHelge Deller 		if (!ctx->prog->aux->verifier_zext)
1035*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1036*ceb0e726SHelge Deller 		break;
1037*ceb0e726SHelge Deller 	case BPF_W:
1038*ceb0e726SHelge Deller 		emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx);
1039*ceb0e726SHelge Deller 		if (!ctx->prog->aux->verifier_zext)
1040*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1041*ceb0e726SHelge Deller 		break;
1042*ceb0e726SHelge Deller 	case BPF_DW:
1043*ceb0e726SHelge Deller 		emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx);
1044*ceb0e726SHelge Deller 		emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx);
1045*ceb0e726SHelge Deller 		break;
1046*ceb0e726SHelge Deller 	}
1047*ceb0e726SHelge Deller 
1048*ceb0e726SHelge Deller 	bpf_put_reg64(dst, rd, ctx);
1049*ceb0e726SHelge Deller 	return 0;
1050*ceb0e726SHelge Deller }
1051*ceb0e726SHelge Deller 
emit_store_r64(const s8 * dst,const s8 * src,s16 off,struct hppa_jit_context * ctx,const u8 size,const u8 mode)1052*ceb0e726SHelge Deller static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
1053*ceb0e726SHelge Deller 			  struct hppa_jit_context *ctx, const u8 size,
1054*ceb0e726SHelge Deller 			  const u8 mode)
1055*ceb0e726SHelge Deller {
1056*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
1057*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
1058*ceb0e726SHelge Deller 	const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1059*ceb0e726SHelge Deller 	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1060*ceb0e726SHelge Deller 	s8 dstreg;
1061*ceb0e726SHelge Deller 
1062*ceb0e726SHelge Deller 	/* need to calculate address since offset does not fit in 14 bits? */
1063*ceb0e726SHelge Deller 	if (relative_bits_ok(off, 14))
1064*ceb0e726SHelge Deller 		dstreg = lo(rd);
1065*ceb0e726SHelge Deller 	else {
1066*ceb0e726SHelge Deller 		/* need to use R1 here, since addil puts result into R1 */
1067*ceb0e726SHelge Deller 		dstreg = HPPA_REG_R1;
1068*ceb0e726SHelge Deller 		emit(hppa_addil(off, lo(rd)), ctx);
1069*ceb0e726SHelge Deller 		off = im11(off);
1070*ceb0e726SHelge Deller 	}
1071*ceb0e726SHelge Deller 
1072*ceb0e726SHelge Deller 	/* ST: *(size *)(dst + off) = imm */
1073*ceb0e726SHelge Deller 	switch (size) {
1074*ceb0e726SHelge Deller 	case BPF_B:
1075*ceb0e726SHelge Deller 		emit(hppa_stb(lo(rs), off + 0, dstreg), ctx);
1076*ceb0e726SHelge Deller 		break;
1077*ceb0e726SHelge Deller 	case BPF_H:
1078*ceb0e726SHelge Deller 		emit(hppa_sth(lo(rs), off + 0, dstreg), ctx);
1079*ceb0e726SHelge Deller 		break;
1080*ceb0e726SHelge Deller 	case BPF_W:
1081*ceb0e726SHelge Deller 		emit(hppa_stw(lo(rs), off + 0, dstreg), ctx);
1082*ceb0e726SHelge Deller 		break;
1083*ceb0e726SHelge Deller 	case BPF_DW:
1084*ceb0e726SHelge Deller 		emit(hppa_stw(hi(rs), off + 0, dstreg), ctx);
1085*ceb0e726SHelge Deller 		emit(hppa_stw(lo(rs), off + 4, dstreg), ctx);
1086*ceb0e726SHelge Deller 		break;
1087*ceb0e726SHelge Deller 	}
1088*ceb0e726SHelge Deller 
1089*ceb0e726SHelge Deller 	return 0;
1090*ceb0e726SHelge Deller }
1091*ceb0e726SHelge Deller 
emit_rev16(const s8 rd,struct hppa_jit_context * ctx)1092*ceb0e726SHelge Deller static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx)
1093*ceb0e726SHelge Deller {
1094*ceb0e726SHelge Deller 	emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx);
1095*ceb0e726SHelge Deller 	emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
1096*ceb0e726SHelge Deller 	emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
1097*ceb0e726SHelge Deller }
1098*ceb0e726SHelge Deller 
emit_rev32(const s8 rs,const s8 rd,struct hppa_jit_context * ctx)1099*ceb0e726SHelge Deller static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
1100*ceb0e726SHelge Deller {
1101*ceb0e726SHelge Deller 	emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx);
1102*ceb0e726SHelge Deller 	emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx);
1103*ceb0e726SHelge Deller 	emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx);
1104*ceb0e726SHelge Deller }
1105*ceb0e726SHelge Deller 
emit_zext64(const s8 * dst,struct hppa_jit_context * ctx)1106*ceb0e726SHelge Deller static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx)
1107*ceb0e726SHelge Deller {
1108*ceb0e726SHelge Deller 	const s8 *rd;
1109*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
1110*ceb0e726SHelge Deller 
1111*ceb0e726SHelge Deller 	rd = bpf_get_reg64(dst, tmp1, ctx);
1112*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1113*ceb0e726SHelge Deller 	bpf_put_reg64(dst, rd, ctx);
1114*ceb0e726SHelge Deller }
1115*ceb0e726SHelge Deller 
bpf_jit_emit_insn(const struct bpf_insn * insn,struct hppa_jit_context * ctx,bool extra_pass)1116*ceb0e726SHelge Deller int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
1117*ceb0e726SHelge Deller 		      bool extra_pass)
1118*ceb0e726SHelge Deller {
1119*ceb0e726SHelge Deller 	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1120*ceb0e726SHelge Deller 		BPF_CLASS(insn->code) == BPF_JMP;
1121*ceb0e726SHelge Deller 	int s, e, paoff, i = insn - ctx->prog->insnsi;
1122*ceb0e726SHelge Deller 	u8 code = insn->code;
1123*ceb0e726SHelge Deller 	s16 off = insn->off;
1124*ceb0e726SHelge Deller 	s32 imm = insn->imm;
1125*ceb0e726SHelge Deller 
1126*ceb0e726SHelge Deller 	const s8 *dst = regmap[insn->dst_reg];
1127*ceb0e726SHelge Deller 	const s8 *src = regmap[insn->src_reg];
1128*ceb0e726SHelge Deller 	const s8 *tmp1 = regmap[TMP_REG_1];
1129*ceb0e726SHelge Deller 	const s8 *tmp2 = regmap[TMP_REG_2];
1130*ceb0e726SHelge Deller 
1131*ceb0e726SHelge Deller 	if (0) printk("CLASS %03d  CODE %#02x ALU64:%d BPF_SIZE %#02x  "
1132*ceb0e726SHelge Deller 		"BPF_CODE %#02x  src_reg %d  dst_reg %d\n",
1133*ceb0e726SHelge Deller 		BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code),
1134*ceb0e726SHelge Deller 		BPF_OP(code), insn->src_reg, insn->dst_reg);
1135*ceb0e726SHelge Deller 
1136*ceb0e726SHelge Deller 	switch (code) {
1137*ceb0e726SHelge Deller 	/* dst = src */
1138*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_MOV | BPF_X:
1139*ceb0e726SHelge Deller 
1140*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_ADD | BPF_X:
1141*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_ADD | BPF_K:
1142*ceb0e726SHelge Deller 
1143*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_SUB | BPF_X:
1144*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_SUB | BPF_K:
1145*ceb0e726SHelge Deller 
1146*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_AND | BPF_X:
1147*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_OR | BPF_X:
1148*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_XOR | BPF_X:
1149*ceb0e726SHelge Deller 
1150*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_MUL | BPF_X:
1151*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_MUL | BPF_K:
1152*ceb0e726SHelge Deller 
1153*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_DIV | BPF_X:
1154*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_DIV | BPF_K:
1155*ceb0e726SHelge Deller 
1156*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_MOD | BPF_X:
1157*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_MOD | BPF_K:
1158*ceb0e726SHelge Deller 
1159*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_LSH | BPF_X:
1160*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_RSH | BPF_X:
1161*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_ARSH | BPF_X:
1162*ceb0e726SHelge Deller 		if (BPF_SRC(code) == BPF_K) {
1163*ceb0e726SHelge Deller 			emit_imm32(tmp2, imm, ctx);
1164*ceb0e726SHelge Deller 			src = tmp2;
1165*ceb0e726SHelge Deller 		}
1166*ceb0e726SHelge Deller 		emit_alu_r64(dst, src, ctx, BPF_OP(code));
1167*ceb0e726SHelge Deller 		break;
1168*ceb0e726SHelge Deller 
1169*ceb0e726SHelge Deller 	/* dst = -dst */
1170*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_NEG:
1171*ceb0e726SHelge Deller 		emit_alu_r64(dst, tmp2, ctx, BPF_OP(code));
1172*ceb0e726SHelge Deller 		break;
1173*ceb0e726SHelge Deller 
1174*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_MOV | BPF_K:
1175*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_AND | BPF_K:
1176*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_OR | BPF_K:
1177*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_XOR | BPF_K:
1178*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_LSH | BPF_K:
1179*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_RSH | BPF_K:
1180*ceb0e726SHelge Deller 	case BPF_ALU64 | BPF_ARSH | BPF_K:
1181*ceb0e726SHelge Deller 		emit_alu_i64(dst, imm, ctx, BPF_OP(code));
1182*ceb0e726SHelge Deller 		break;
1183*ceb0e726SHelge Deller 
1184*ceb0e726SHelge Deller 	case BPF_ALU | BPF_MOV | BPF_X:
1185*ceb0e726SHelge Deller 		if (imm == 1) {
1186*ceb0e726SHelge Deller 			/* Special mov32 for zext. */
1187*ceb0e726SHelge Deller 			emit_zext64(dst, ctx);
1188*ceb0e726SHelge Deller 			break;
1189*ceb0e726SHelge Deller 		}
1190*ceb0e726SHelge Deller 		fallthrough;
1191*ceb0e726SHelge Deller 	/* dst = dst OP src */
1192*ceb0e726SHelge Deller 	case BPF_ALU | BPF_ADD | BPF_X:
1193*ceb0e726SHelge Deller 	case BPF_ALU | BPF_SUB | BPF_X:
1194*ceb0e726SHelge Deller 	case BPF_ALU | BPF_AND | BPF_X:
1195*ceb0e726SHelge Deller 	case BPF_ALU | BPF_OR | BPF_X:
1196*ceb0e726SHelge Deller 	case BPF_ALU | BPF_XOR | BPF_X:
1197*ceb0e726SHelge Deller 
1198*ceb0e726SHelge Deller 	case BPF_ALU | BPF_MUL | BPF_X:
1199*ceb0e726SHelge Deller 	case BPF_ALU | BPF_MUL | BPF_K:
1200*ceb0e726SHelge Deller 
1201*ceb0e726SHelge Deller 	case BPF_ALU | BPF_DIV | BPF_X:
1202*ceb0e726SHelge Deller 	case BPF_ALU | BPF_DIV | BPF_K:
1203*ceb0e726SHelge Deller 
1204*ceb0e726SHelge Deller 	case BPF_ALU | BPF_MOD | BPF_X:
1205*ceb0e726SHelge Deller 	case BPF_ALU | BPF_MOD | BPF_K:
1206*ceb0e726SHelge Deller 
1207*ceb0e726SHelge Deller 	case BPF_ALU | BPF_LSH | BPF_X:
1208*ceb0e726SHelge Deller 	case BPF_ALU | BPF_RSH | BPF_X:
1209*ceb0e726SHelge Deller 	case BPF_ALU | BPF_ARSH | BPF_X:
1210*ceb0e726SHelge Deller 		if (BPF_SRC(code) == BPF_K) {
1211*ceb0e726SHelge Deller 			emit_imm32(tmp2, imm, ctx);
1212*ceb0e726SHelge Deller 			src = tmp2;
1213*ceb0e726SHelge Deller 		}
1214*ceb0e726SHelge Deller 		emit_alu_r32(dst, src, ctx, BPF_OP(code));
1215*ceb0e726SHelge Deller 		break;
1216*ceb0e726SHelge Deller 
1217*ceb0e726SHelge Deller 	/* dst = dst OP imm */
1218*ceb0e726SHelge Deller 	case BPF_ALU | BPF_MOV | BPF_K:
1219*ceb0e726SHelge Deller 	case BPF_ALU | BPF_ADD | BPF_K:
1220*ceb0e726SHelge Deller 	case BPF_ALU | BPF_SUB | BPF_K:
1221*ceb0e726SHelge Deller 	case BPF_ALU | BPF_AND | BPF_K:
1222*ceb0e726SHelge Deller 	case BPF_ALU | BPF_OR | BPF_K:
1223*ceb0e726SHelge Deller 	case BPF_ALU | BPF_XOR | BPF_K:
1224*ceb0e726SHelge Deller 	case BPF_ALU | BPF_LSH | BPF_K:
1225*ceb0e726SHelge Deller 	case BPF_ALU | BPF_RSH | BPF_K:
1226*ceb0e726SHelge Deller 	case BPF_ALU | BPF_ARSH | BPF_K:
1227*ceb0e726SHelge Deller 		/*
1228*ceb0e726SHelge Deller 		 * mul,div,mod are handled in the BPF_X case.
1229*ceb0e726SHelge Deller 		 */
1230*ceb0e726SHelge Deller 		emit_alu_i32(dst, imm, ctx, BPF_OP(code));
1231*ceb0e726SHelge Deller 		break;
1232*ceb0e726SHelge Deller 
1233*ceb0e726SHelge Deller 	/* dst = -dst */
1234*ceb0e726SHelge Deller 	case BPF_ALU | BPF_NEG:
1235*ceb0e726SHelge Deller 		/*
1236*ceb0e726SHelge Deller 		 * src is ignored---choose tmp2 as a dummy register since it
1237*ceb0e726SHelge Deller 		 * is not on the stack.
1238*ceb0e726SHelge Deller 		 */
1239*ceb0e726SHelge Deller 		emit_alu_r32(dst, tmp2, ctx, BPF_OP(code));
1240*ceb0e726SHelge Deller 		break;
1241*ceb0e726SHelge Deller 
1242*ceb0e726SHelge Deller 	/* dst = BSWAP##imm(dst) */
1243*ceb0e726SHelge Deller 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1244*ceb0e726SHelge Deller 	{
1245*ceb0e726SHelge Deller 		const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1246*ceb0e726SHelge Deller 
1247*ceb0e726SHelge Deller 		switch (imm) {
1248*ceb0e726SHelge Deller 		case 16:
1249*ceb0e726SHelge Deller 			/* zero-extend 16 bits into 64 bits */
1250*ceb0e726SHelge Deller 			emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx);
1251*ceb0e726SHelge Deller 			fallthrough;
1252*ceb0e726SHelge Deller 		case 32:
1253*ceb0e726SHelge Deller 			/* zero-extend 32 bits into 64 bits */
1254*ceb0e726SHelge Deller 			if (!ctx->prog->aux->verifier_zext)
1255*ceb0e726SHelge Deller 				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1256*ceb0e726SHelge Deller 			break;
1257*ceb0e726SHelge Deller 		case 64:
1258*ceb0e726SHelge Deller 			/* Do nothing. */
1259*ceb0e726SHelge Deller 			break;
1260*ceb0e726SHelge Deller 		default:
1261*ceb0e726SHelge Deller 			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1262*ceb0e726SHelge Deller 			return -1;
1263*ceb0e726SHelge Deller 		}
1264*ceb0e726SHelge Deller 
1265*ceb0e726SHelge Deller 		bpf_put_reg64(dst, rd, ctx);
1266*ceb0e726SHelge Deller 		break;
1267*ceb0e726SHelge Deller 	}
1268*ceb0e726SHelge Deller 
1269*ceb0e726SHelge Deller 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1270*ceb0e726SHelge Deller 	{
1271*ceb0e726SHelge Deller 		const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1272*ceb0e726SHelge Deller 
1273*ceb0e726SHelge Deller 		switch (imm) {
1274*ceb0e726SHelge Deller 		case 16:
1275*ceb0e726SHelge Deller 			emit_rev16(lo(rd), ctx);
1276*ceb0e726SHelge Deller 			if (!ctx->prog->aux->verifier_zext)
1277*ceb0e726SHelge Deller 				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1278*ceb0e726SHelge Deller 			break;
1279*ceb0e726SHelge Deller 		case 32:
1280*ceb0e726SHelge Deller 			emit_rev32(lo(rd), lo(rd), ctx);
1281*ceb0e726SHelge Deller 			if (!ctx->prog->aux->verifier_zext)
1282*ceb0e726SHelge Deller 				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1283*ceb0e726SHelge Deller 			break;
1284*ceb0e726SHelge Deller 		case 64:
1285*ceb0e726SHelge Deller 			/* Swap upper and lower halves, then each half. */
1286*ceb0e726SHelge Deller 			emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx);
1287*ceb0e726SHelge Deller 			emit_rev32(lo(rd), hi(rd), ctx);
1288*ceb0e726SHelge Deller 			emit_rev32(HPPA_REG_T0, lo(rd), ctx);
1289*ceb0e726SHelge Deller 			break;
1290*ceb0e726SHelge Deller 		default:
1291*ceb0e726SHelge Deller 			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1292*ceb0e726SHelge Deller 			return -1;
1293*ceb0e726SHelge Deller 		}
1294*ceb0e726SHelge Deller 
1295*ceb0e726SHelge Deller 		bpf_put_reg64(dst, rd, ctx);
1296*ceb0e726SHelge Deller 		break;
1297*ceb0e726SHelge Deller 	}
1298*ceb0e726SHelge Deller 	/* JUMP off */
1299*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JA:
1300*ceb0e726SHelge Deller 		paoff = hppa_offset(i, off, ctx);
1301*ceb0e726SHelge Deller 		emit_jump(paoff, false, ctx);
1302*ceb0e726SHelge Deller 		break;
1303*ceb0e726SHelge Deller 	/* function call */
1304*ceb0e726SHelge Deller 	case BPF_JMP | BPF_CALL:
1305*ceb0e726SHelge Deller 	{
1306*ceb0e726SHelge Deller 		bool fixed;
1307*ceb0e726SHelge Deller 		int ret;
1308*ceb0e726SHelge Deller 		u64 addr;
1309*ceb0e726SHelge Deller 
1310*ceb0e726SHelge Deller 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
1311*ceb0e726SHelge Deller 					    &fixed);
1312*ceb0e726SHelge Deller 		if (ret < 0)
1313*ceb0e726SHelge Deller 			return ret;
1314*ceb0e726SHelge Deller 		emit_call(fixed, addr, ctx);
1315*ceb0e726SHelge Deller 		break;
1316*ceb0e726SHelge Deller 	}
1317*ceb0e726SHelge Deller 	/* tail call */
1318*ceb0e726SHelge Deller 	case BPF_JMP | BPF_TAIL_CALL:
1319*ceb0e726SHelge Deller 		REG_SET_SEEN_ALL(ctx);
1320*ceb0e726SHelge Deller 		if (emit_bpf_tail_call(i, ctx))
1321*ceb0e726SHelge Deller 			return -1;
1322*ceb0e726SHelge Deller 		break;
1323*ceb0e726SHelge Deller 	/* IF (dst COND imm) JUMP off */
1324*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JEQ | BPF_X:
1325*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JEQ | BPF_K:
1326*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JEQ | BPF_X:
1327*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JEQ | BPF_K:
1328*ceb0e726SHelge Deller 
1329*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JNE | BPF_X:
1330*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JNE | BPF_K:
1331*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JNE | BPF_X:
1332*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JNE | BPF_K:
1333*ceb0e726SHelge Deller 
1334*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JLE | BPF_X:
1335*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JLE | BPF_K:
1336*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JLE | BPF_X:
1337*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JLE | BPF_K:
1338*ceb0e726SHelge Deller 
1339*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JLT | BPF_X:
1340*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JLT | BPF_K:
1341*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JLT | BPF_X:
1342*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JLT | BPF_K:
1343*ceb0e726SHelge Deller 
1344*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JGE | BPF_X:
1345*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JGE | BPF_K:
1346*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JGE | BPF_X:
1347*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JGE | BPF_K:
1348*ceb0e726SHelge Deller 
1349*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JGT | BPF_X:
1350*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JGT | BPF_K:
1351*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JGT | BPF_X:
1352*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JGT | BPF_K:
1353*ceb0e726SHelge Deller 
1354*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSLE | BPF_X:
1355*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSLE | BPF_K:
1356*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSLE | BPF_X:
1357*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1358*ceb0e726SHelge Deller 
1359*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSLT | BPF_X:
1360*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSLT | BPF_K:
1361*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSLT | BPF_X:
1362*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1363*ceb0e726SHelge Deller 
1364*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSGE | BPF_X:
1365*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSGE | BPF_K:
1366*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSGE | BPF_X:
1367*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1368*ceb0e726SHelge Deller 
1369*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSGT | BPF_X:
1370*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSGT | BPF_K:
1371*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSGT | BPF_X:
1372*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSGT | BPF_K:
1373*ceb0e726SHelge Deller 
1374*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSET | BPF_X:
1375*ceb0e726SHelge Deller 	case BPF_JMP | BPF_JSET | BPF_K:
1376*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSET | BPF_X:
1377*ceb0e726SHelge Deller 	case BPF_JMP32 | BPF_JSET | BPF_K:
1378*ceb0e726SHelge Deller 		paoff = hppa_offset(i, off, ctx);
1379*ceb0e726SHelge Deller 		if (BPF_SRC(code) == BPF_K) {
1380*ceb0e726SHelge Deller 			s = ctx->ninsns;
1381*ceb0e726SHelge Deller 			emit_imm32(tmp2, imm, ctx);
1382*ceb0e726SHelge Deller 			src = tmp2;
1383*ceb0e726SHelge Deller 			e = ctx->ninsns;
1384*ceb0e726SHelge Deller 			paoff -= (e - s);
1385*ceb0e726SHelge Deller 		}
1386*ceb0e726SHelge Deller 		if (is64)
1387*ceb0e726SHelge Deller 			emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code));
1388*ceb0e726SHelge Deller 		else
1389*ceb0e726SHelge Deller 			emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code));
1390*ceb0e726SHelge Deller 		break;
1391*ceb0e726SHelge Deller 	/* function return */
1392*ceb0e726SHelge Deller 	case BPF_JMP | BPF_EXIT:
1393*ceb0e726SHelge Deller 		if (i == ctx->prog->len - 1)
1394*ceb0e726SHelge Deller 			break;
1395*ceb0e726SHelge Deller 		/* load epilogue function pointer and jump to it. */
1396*ceb0e726SHelge Deller 		emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
1397*ceb0e726SHelge Deller 		emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
1398*ceb0e726SHelge Deller 		break;
1399*ceb0e726SHelge Deller 
1400*ceb0e726SHelge Deller 	/* dst = imm64 */
1401*ceb0e726SHelge Deller 	case BPF_LD | BPF_IMM | BPF_DW:
1402*ceb0e726SHelge Deller 	{
1403*ceb0e726SHelge Deller 		struct bpf_insn insn1 = insn[1];
1404*ceb0e726SHelge Deller 		u32 upper = insn1.imm;
1405*ceb0e726SHelge Deller 		u32 lower = imm;
1406*ceb0e726SHelge Deller 		const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
1407*ceb0e726SHelge Deller 
1408*ceb0e726SHelge Deller 		if (0 && bpf_pseudo_func(insn)) {
1409*ceb0e726SHelge Deller 			WARN_ON(upper); /* we are 32-bit! */
1410*ceb0e726SHelge Deller 			upper = 0;
1411*ceb0e726SHelge Deller 			lower = (uintptr_t) dereference_function_descriptor(lower);
1412*ceb0e726SHelge Deller 		}
1413*ceb0e726SHelge Deller 
1414*ceb0e726SHelge Deller 		emit_imm64(rd, upper, lower, ctx);
1415*ceb0e726SHelge Deller 		bpf_put_reg64(dst, rd, ctx);
1416*ceb0e726SHelge Deller 		return 1;
1417*ceb0e726SHelge Deller 	}
1418*ceb0e726SHelge Deller 
1419*ceb0e726SHelge Deller 	/* LDX: dst = *(size *)(src + off) */
1420*ceb0e726SHelge Deller 	case BPF_LDX | BPF_MEM | BPF_B:
1421*ceb0e726SHelge Deller 	case BPF_LDX | BPF_MEM | BPF_H:
1422*ceb0e726SHelge Deller 	case BPF_LDX | BPF_MEM | BPF_W:
1423*ceb0e726SHelge Deller 	case BPF_LDX | BPF_MEM | BPF_DW:
1424*ceb0e726SHelge Deller 		if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code)))
1425*ceb0e726SHelge Deller 			return -1;
1426*ceb0e726SHelge Deller 		break;
1427*ceb0e726SHelge Deller 
1428*ceb0e726SHelge Deller 	/* speculation barrier */
1429*ceb0e726SHelge Deller 	case BPF_ST | BPF_NOSPEC:
1430*ceb0e726SHelge Deller 		break;
1431*ceb0e726SHelge Deller 
1432*ceb0e726SHelge Deller 	/* ST: *(size *)(dst + off) = imm */
1433*ceb0e726SHelge Deller 	case BPF_ST | BPF_MEM | BPF_B:
1434*ceb0e726SHelge Deller 	case BPF_ST | BPF_MEM | BPF_H:
1435*ceb0e726SHelge Deller 	case BPF_ST | BPF_MEM | BPF_W:
1436*ceb0e726SHelge Deller 	case BPF_ST | BPF_MEM | BPF_DW:
1437*ceb0e726SHelge Deller 
1438*ceb0e726SHelge Deller 	case BPF_STX | BPF_MEM | BPF_B:
1439*ceb0e726SHelge Deller 	case BPF_STX | BPF_MEM | BPF_H:
1440*ceb0e726SHelge Deller 	case BPF_STX | BPF_MEM | BPF_W:
1441*ceb0e726SHelge Deller 	case BPF_STX | BPF_MEM | BPF_DW:
1442*ceb0e726SHelge Deller 		if (BPF_CLASS(code) == BPF_ST) {
1443*ceb0e726SHelge Deller 			emit_imm32(tmp2, imm, ctx);
1444*ceb0e726SHelge Deller 			src = tmp2;
1445*ceb0e726SHelge Deller 		}
1446*ceb0e726SHelge Deller 
1447*ceb0e726SHelge Deller 		if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code),
1448*ceb0e726SHelge Deller 				   BPF_MODE(code)))
1449*ceb0e726SHelge Deller 			return -1;
1450*ceb0e726SHelge Deller 		break;
1451*ceb0e726SHelge Deller 
1452*ceb0e726SHelge Deller 	case BPF_STX | BPF_ATOMIC | BPF_W:
1453*ceb0e726SHelge Deller 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1454*ceb0e726SHelge Deller 		pr_info_once(
1455*ceb0e726SHelge Deller 			"bpf-jit: not supported: atomic operation %02x ***\n",
1456*ceb0e726SHelge Deller 			insn->imm);
1457*ceb0e726SHelge Deller 		return -EFAULT;
1458*ceb0e726SHelge Deller 
1459*ceb0e726SHelge Deller 	default:
1460*ceb0e726SHelge Deller 		pr_err("bpf-jit: unknown opcode %02x\n", code);
1461*ceb0e726SHelge Deller 		return -EINVAL;
1462*ceb0e726SHelge Deller 	}
1463*ceb0e726SHelge Deller 
1464*ceb0e726SHelge Deller 	return 0;
1465*ceb0e726SHelge Deller }
1466*ceb0e726SHelge Deller 
bpf_jit_build_prologue(struct hppa_jit_context * ctx)1467*ceb0e726SHelge Deller void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1468*ceb0e726SHelge Deller {
1469*ceb0e726SHelge Deller 	const s8 *tmp = regmap[TMP_REG_1];
1470*ceb0e726SHelge Deller 	const s8 *dst, *reg;
1471*ceb0e726SHelge Deller 	int stack_adjust = 0;
1472*ceb0e726SHelge Deller 	int i;
1473*ceb0e726SHelge Deller 	unsigned long addr;
1474*ceb0e726SHelge Deller 	int bpf_stack_adjust;
1475*ceb0e726SHelge Deller 
1476*ceb0e726SHelge Deller 	/*
1477*ceb0e726SHelge Deller 	 * stack on hppa grows up, so if tail calls are used we need to
1478*ceb0e726SHelge Deller 	 * allocate the maximum stack size
1479*ceb0e726SHelge Deller 	 */
1480*ceb0e726SHelge Deller 	if (REG_ALL_SEEN(ctx))
1481*ceb0e726SHelge Deller 		bpf_stack_adjust = MAX_BPF_STACK;
1482*ceb0e726SHelge Deller 	else
1483*ceb0e726SHelge Deller 		bpf_stack_adjust = ctx->prog->aux->stack_depth;
1484*ceb0e726SHelge Deller 	bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1485*ceb0e726SHelge Deller 
1486*ceb0e726SHelge Deller 	/* make space for callee-saved registers. */
1487*ceb0e726SHelge Deller 	stack_adjust += NR_SAVED_REGISTERS * REG_SIZE;
1488*ceb0e726SHelge Deller 	/* make space for BPF registers on stack. */
1489*ceb0e726SHelge Deller 	stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE;
1490*ceb0e726SHelge Deller 	/* make space for BPF stack. */
1491*ceb0e726SHelge Deller 	stack_adjust += bpf_stack_adjust;
1492*ceb0e726SHelge Deller 	/* round up for stack alignment. */
1493*ceb0e726SHelge Deller 	stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1494*ceb0e726SHelge Deller 
1495*ceb0e726SHelge Deller 	/*
1496*ceb0e726SHelge Deller 	 * The first instruction sets the tail-call-counter (TCC) register.
1497*ceb0e726SHelge Deller 	 * This instruction is skipped by tail calls.
1498*ceb0e726SHelge Deller 	 * Use a temporary register instead of a caller-saved register initially.
1499*ceb0e726SHelge Deller 	 */
1500*ceb0e726SHelge Deller 	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1501*ceb0e726SHelge Deller 
1502*ceb0e726SHelge Deller 	/*
1503*ceb0e726SHelge Deller 	 * skip all initializations when called as BPF TAIL call.
1504*ceb0e726SHelge Deller 	 */
1505*ceb0e726SHelge Deller 	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1506*ceb0e726SHelge Deller 	emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
1507*ceb0e726SHelge Deller 
1508*ceb0e726SHelge Deller 	/* set up hppa stack frame. */
1509*ceb0e726SHelge Deller 	emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx);			// copy sp,r1 (=prev_sp)
1510*ceb0e726SHelge Deller 	emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx);	// ldo stack_adjust(sp),sp (increase stack)
1511*ceb0e726SHelge Deller 	emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx);	// stw prev_sp,-0x04(sp)
1512*ceb0e726SHelge Deller 	emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx);		// stw rp,-0x14(sp)
1513*ceb0e726SHelge Deller 
1514*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1515*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1516*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1517*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_T3);
1518*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_T4);
1519*ceb0e726SHelge Deller 	REG_FORCE_SEEN(ctx, HPPA_REG_T5);
1520*ceb0e726SHelge Deller 
1521*ceb0e726SHelge Deller 	/* save callee-save registers. */
1522*ceb0e726SHelge Deller 	for (i = 3; i <= 18; i++) {
1523*ceb0e726SHelge Deller 		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1524*ceb0e726SHelge Deller 			continue;
1525*ceb0e726SHelge Deller 		emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx);	// stw ri,-save_area(sp)
1526*ceb0e726SHelge Deller 	}
1527*ceb0e726SHelge Deller 
1528*ceb0e726SHelge Deller 	/*
1529*ceb0e726SHelge Deller 	 * now really set the tail call counter (TCC) register.
1530*ceb0e726SHelge Deller 	 */
1531*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1532*ceb0e726SHelge Deller 		emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1533*ceb0e726SHelge Deller 
1534*ceb0e726SHelge Deller 	/*
1535*ceb0e726SHelge Deller 	 * save epilogue function pointer for outer TCC call chain.
1536*ceb0e726SHelge Deller 	 * The main TCC call stores the final RP on stack.
1537*ceb0e726SHelge Deller 	 */
1538*ceb0e726SHelge Deller 	addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1539*ceb0e726SHelge Deller 	/* skip first two instructions of exit function, which jump to exit */
1540*ceb0e726SHelge Deller 	addr += 2 * HPPA_INSN_SIZE;
1541*ceb0e726SHelge Deller 	emit(hppa_ldil(addr, HPPA_REG_T2), ctx);
1542*ceb0e726SHelge Deller 	emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx);
1543*ceb0e726SHelge Deller 	emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1544*ceb0e726SHelge Deller 
1545*ceb0e726SHelge Deller 	/* load R1 & R2 from registers, R3-R5 from stack. */
1546*ceb0e726SHelge Deller 	/* use HPPA_REG_R1 which holds the old stack value */
1547*ceb0e726SHelge Deller 	dst = regmap[BPF_REG_5];
1548*ceb0e726SHelge Deller 	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1549*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1550*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, hi(reg)))
1551*ceb0e726SHelge Deller 			emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx);
1552*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, lo(reg)))
1553*ceb0e726SHelge Deller 			emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx);
1554*ceb0e726SHelge Deller 		bpf_put_reg64(dst, tmp, ctx);
1555*ceb0e726SHelge Deller 	}
1556*ceb0e726SHelge Deller 
1557*ceb0e726SHelge Deller 	dst = regmap[BPF_REG_4];
1558*ceb0e726SHelge Deller 	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1559*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1560*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, hi(reg)))
1561*ceb0e726SHelge Deller 			emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx);
1562*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, lo(reg)))
1563*ceb0e726SHelge Deller 			emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx);
1564*ceb0e726SHelge Deller 		bpf_put_reg64(dst, tmp, ctx);
1565*ceb0e726SHelge Deller 	}
1566*ceb0e726SHelge Deller 
1567*ceb0e726SHelge Deller 	dst = regmap[BPF_REG_3];
1568*ceb0e726SHelge Deller 	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1569*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1570*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, hi(reg)))
1571*ceb0e726SHelge Deller 			emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx);
1572*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, lo(reg)))
1573*ceb0e726SHelge Deller 			emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx);
1574*ceb0e726SHelge Deller 		bpf_put_reg64(dst, tmp, ctx);
1575*ceb0e726SHelge Deller 	}
1576*ceb0e726SHelge Deller 
1577*ceb0e726SHelge Deller 	dst = regmap[BPF_REG_2];
1578*ceb0e726SHelge Deller 	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1579*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1580*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, hi(reg)))
1581*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx);
1582*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, lo(reg)))
1583*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx);
1584*ceb0e726SHelge Deller 		bpf_put_reg64(dst, tmp, ctx);
1585*ceb0e726SHelge Deller 	}
1586*ceb0e726SHelge Deller 
1587*ceb0e726SHelge Deller 	dst = regmap[BPF_REG_1];
1588*ceb0e726SHelge Deller 	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1589*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1590*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, hi(reg)))
1591*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx);
1592*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, lo(reg)))
1593*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx);
1594*ceb0e726SHelge Deller 		bpf_put_reg64(dst, tmp, ctx);
1595*ceb0e726SHelge Deller 	}
1596*ceb0e726SHelge Deller 
1597*ceb0e726SHelge Deller 	/* Set up BPF frame pointer. */
1598*ceb0e726SHelge Deller 	dst = regmap[BPF_REG_FP];
1599*ceb0e726SHelge Deller 	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1600*ceb0e726SHelge Deller 	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1601*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, lo(reg)))
1602*ceb0e726SHelge Deller 			emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS),
1603*ceb0e726SHelge Deller 				HPPA_REG_SP, lo(reg)), ctx);
1604*ceb0e726SHelge Deller 		if (REG_WAS_SEEN(ctx, hi(reg)))
1605*ceb0e726SHelge Deller 			emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
1606*ceb0e726SHelge Deller 		bpf_put_reg64(dst, tmp, ctx);
1607*ceb0e726SHelge Deller 	}
1608*ceb0e726SHelge Deller 
1609*ceb0e726SHelge Deller 	emit(hppa_nop(), ctx);
1610*ceb0e726SHelge Deller }
1611*ceb0e726SHelge Deller 
bpf_jit_build_epilogue(struct hppa_jit_context * ctx)1612*ceb0e726SHelge Deller void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1613*ceb0e726SHelge Deller {
1614*ceb0e726SHelge Deller 	__build_epilogue(false, ctx);
1615*ceb0e726SHelge Deller }
1616