xref: /openbmc/linux/arch/loongarch/net/bpf_jit.c (revision 5dc615520c4dfb358245680f1904bad61116648e)
1*5dc61552STiezhu Yang // SPDX-License-Identifier: GPL-2.0-only
2*5dc61552STiezhu Yang /*
3*5dc61552STiezhu Yang  * BPF JIT compiler for LoongArch
4*5dc61552STiezhu Yang  *
5*5dc61552STiezhu Yang  * Copyright (C) 2022 Loongson Technology Corporation Limited
6*5dc61552STiezhu Yang  */
7*5dc61552STiezhu Yang #include "bpf_jit.h"
8*5dc61552STiezhu Yang 
9*5dc61552STiezhu Yang #define REG_TCC		LOONGARCH_GPR_A6
10*5dc61552STiezhu Yang #define TCC_SAVED	LOONGARCH_GPR_S5
11*5dc61552STiezhu Yang 
12*5dc61552STiezhu Yang #define SAVE_RA		BIT(0)
13*5dc61552STiezhu Yang #define SAVE_TCC	BIT(1)
14*5dc61552STiezhu Yang 
15*5dc61552STiezhu Yang static const int regmap[] = {
16*5dc61552STiezhu Yang 	/* return value from in-kernel function, and exit value for eBPF program */
17*5dc61552STiezhu Yang 	[BPF_REG_0] = LOONGARCH_GPR_A5,
18*5dc61552STiezhu Yang 	/* arguments from eBPF program to in-kernel function */
19*5dc61552STiezhu Yang 	[BPF_REG_1] = LOONGARCH_GPR_A0,
20*5dc61552STiezhu Yang 	[BPF_REG_2] = LOONGARCH_GPR_A1,
21*5dc61552STiezhu Yang 	[BPF_REG_3] = LOONGARCH_GPR_A2,
22*5dc61552STiezhu Yang 	[BPF_REG_4] = LOONGARCH_GPR_A3,
23*5dc61552STiezhu Yang 	[BPF_REG_5] = LOONGARCH_GPR_A4,
24*5dc61552STiezhu Yang 	/* callee saved registers that in-kernel function will preserve */
25*5dc61552STiezhu Yang 	[BPF_REG_6] = LOONGARCH_GPR_S0,
26*5dc61552STiezhu Yang 	[BPF_REG_7] = LOONGARCH_GPR_S1,
27*5dc61552STiezhu Yang 	[BPF_REG_8] = LOONGARCH_GPR_S2,
28*5dc61552STiezhu Yang 	[BPF_REG_9] = LOONGARCH_GPR_S3,
29*5dc61552STiezhu Yang 	/* read-only frame pointer to access stack */
30*5dc61552STiezhu Yang 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31*5dc61552STiezhu Yang 	/* temporary register for blinding constants */
32*5dc61552STiezhu Yang 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33*5dc61552STiezhu Yang };
34*5dc61552STiezhu Yang 
35*5dc61552STiezhu Yang static void mark_call(struct jit_ctx *ctx)
36*5dc61552STiezhu Yang {
37*5dc61552STiezhu Yang 	ctx->flags |= SAVE_RA;
38*5dc61552STiezhu Yang }
39*5dc61552STiezhu Yang 
40*5dc61552STiezhu Yang static void mark_tail_call(struct jit_ctx *ctx)
41*5dc61552STiezhu Yang {
42*5dc61552STiezhu Yang 	ctx->flags |= SAVE_TCC;
43*5dc61552STiezhu Yang }
44*5dc61552STiezhu Yang 
45*5dc61552STiezhu Yang static bool seen_call(struct jit_ctx *ctx)
46*5dc61552STiezhu Yang {
47*5dc61552STiezhu Yang 	return (ctx->flags & SAVE_RA);
48*5dc61552STiezhu Yang }
49*5dc61552STiezhu Yang 
50*5dc61552STiezhu Yang static bool seen_tail_call(struct jit_ctx *ctx)
51*5dc61552STiezhu Yang {
52*5dc61552STiezhu Yang 	return (ctx->flags & SAVE_TCC);
53*5dc61552STiezhu Yang }
54*5dc61552STiezhu Yang 
55*5dc61552STiezhu Yang static u8 tail_call_reg(struct jit_ctx *ctx)
56*5dc61552STiezhu Yang {
57*5dc61552STiezhu Yang 	if (seen_call(ctx))
58*5dc61552STiezhu Yang 		return TCC_SAVED;
59*5dc61552STiezhu Yang 
60*5dc61552STiezhu Yang 	return REG_TCC;
61*5dc61552STiezhu Yang }
62*5dc61552STiezhu Yang 
63*5dc61552STiezhu Yang /*
64*5dc61552STiezhu Yang  * eBPF prog stack layout:
65*5dc61552STiezhu Yang  *
66*5dc61552STiezhu Yang  *                                        high
67*5dc61552STiezhu Yang  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68*5dc61552STiezhu Yang  *                            |           $ra           |
69*5dc61552STiezhu Yang  *                            +-------------------------+
70*5dc61552STiezhu Yang  *                            |           $fp           |
71*5dc61552STiezhu Yang  *                            +-------------------------+
72*5dc61552STiezhu Yang  *                            |           $s0           |
73*5dc61552STiezhu Yang  *                            +-------------------------+
74*5dc61552STiezhu Yang  *                            |           $s1           |
75*5dc61552STiezhu Yang  *                            +-------------------------+
76*5dc61552STiezhu Yang  *                            |           $s2           |
77*5dc61552STiezhu Yang  *                            +-------------------------+
78*5dc61552STiezhu Yang  *                            |           $s3           |
79*5dc61552STiezhu Yang  *                            +-------------------------+
80*5dc61552STiezhu Yang  *                            |           $s4           |
81*5dc61552STiezhu Yang  *                            +-------------------------+
82*5dc61552STiezhu Yang  *                            |           $s5           |
83*5dc61552STiezhu Yang  *                            +-------------------------+ <--BPF_REG_FP
84*5dc61552STiezhu Yang  *                            |  prog->aux->stack_depth |
85*5dc61552STiezhu Yang  *                            |        (optional)       |
86*5dc61552STiezhu Yang  * current $sp -------------> +-------------------------+
87*5dc61552STiezhu Yang  *                                        low
88*5dc61552STiezhu Yang  */
89*5dc61552STiezhu Yang static void build_prologue(struct jit_ctx *ctx)
90*5dc61552STiezhu Yang {
91*5dc61552STiezhu Yang 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92*5dc61552STiezhu Yang 
93*5dc61552STiezhu Yang 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94*5dc61552STiezhu Yang 
95*5dc61552STiezhu Yang 	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96*5dc61552STiezhu Yang 	stack_adjust += sizeof(long) * 8;
97*5dc61552STiezhu Yang 
98*5dc61552STiezhu Yang 	stack_adjust = round_up(stack_adjust, 16);
99*5dc61552STiezhu Yang 	stack_adjust += bpf_stack_adjust;
100*5dc61552STiezhu Yang 
101*5dc61552STiezhu Yang 	/*
102*5dc61552STiezhu Yang 	 * First instruction initializes the tail call count (TCC).
103*5dc61552STiezhu Yang 	 * On tail call we skip this instruction, and the TCC is
104*5dc61552STiezhu Yang 	 * passed in REG_TCC from the caller.
105*5dc61552STiezhu Yang 	 */
106*5dc61552STiezhu Yang 	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107*5dc61552STiezhu Yang 
108*5dc61552STiezhu Yang 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109*5dc61552STiezhu Yang 
110*5dc61552STiezhu Yang 	store_offset = stack_adjust - sizeof(long);
111*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112*5dc61552STiezhu Yang 
113*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
114*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115*5dc61552STiezhu Yang 
116*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
117*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118*5dc61552STiezhu Yang 
119*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
120*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121*5dc61552STiezhu Yang 
122*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
123*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124*5dc61552STiezhu Yang 
125*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
126*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127*5dc61552STiezhu Yang 
128*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
129*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130*5dc61552STiezhu Yang 
131*5dc61552STiezhu Yang 	store_offset -= sizeof(long);
132*5dc61552STiezhu Yang 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133*5dc61552STiezhu Yang 
134*5dc61552STiezhu Yang 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135*5dc61552STiezhu Yang 
136*5dc61552STiezhu Yang 	if (bpf_stack_adjust)
137*5dc61552STiezhu Yang 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138*5dc61552STiezhu Yang 
139*5dc61552STiezhu Yang 	/*
140*5dc61552STiezhu Yang 	 * Program contains calls and tail calls, so REG_TCC need
141*5dc61552STiezhu Yang 	 * to be saved across calls.
142*5dc61552STiezhu Yang 	 */
143*5dc61552STiezhu Yang 	if (seen_tail_call(ctx) && seen_call(ctx))
144*5dc61552STiezhu Yang 		move_reg(ctx, TCC_SAVED, REG_TCC);
145*5dc61552STiezhu Yang 
146*5dc61552STiezhu Yang 	ctx->stack_size = stack_adjust;
147*5dc61552STiezhu Yang }
148*5dc61552STiezhu Yang 
149*5dc61552STiezhu Yang static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
150*5dc61552STiezhu Yang {
151*5dc61552STiezhu Yang 	int stack_adjust = ctx->stack_size;
152*5dc61552STiezhu Yang 	int load_offset;
153*5dc61552STiezhu Yang 
154*5dc61552STiezhu Yang 	load_offset = stack_adjust - sizeof(long);
155*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
156*5dc61552STiezhu Yang 
157*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
158*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
159*5dc61552STiezhu Yang 
160*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
161*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
162*5dc61552STiezhu Yang 
163*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
164*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
165*5dc61552STiezhu Yang 
166*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
167*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
168*5dc61552STiezhu Yang 
169*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
170*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
171*5dc61552STiezhu Yang 
172*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
173*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
174*5dc61552STiezhu Yang 
175*5dc61552STiezhu Yang 	load_offset -= sizeof(long);
176*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
177*5dc61552STiezhu Yang 
178*5dc61552STiezhu Yang 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
179*5dc61552STiezhu Yang 
180*5dc61552STiezhu Yang 	if (!is_tail_call) {
181*5dc61552STiezhu Yang 		/* Set return value */
182*5dc61552STiezhu Yang 		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183*5dc61552STiezhu Yang 		/* Return to the caller */
184*5dc61552STiezhu Yang 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
185*5dc61552STiezhu Yang 	} else {
186*5dc61552STiezhu Yang 		/*
187*5dc61552STiezhu Yang 		 * Call the next bpf prog and skip the first instruction
188*5dc61552STiezhu Yang 		 * of TCC initialization.
189*5dc61552STiezhu Yang 		 */
190*5dc61552STiezhu Yang 		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
191*5dc61552STiezhu Yang 	}
192*5dc61552STiezhu Yang }
193*5dc61552STiezhu Yang 
194*5dc61552STiezhu Yang static void build_epilogue(struct jit_ctx *ctx)
195*5dc61552STiezhu Yang {
196*5dc61552STiezhu Yang 	__build_epilogue(ctx, false);
197*5dc61552STiezhu Yang }
198*5dc61552STiezhu Yang 
199*5dc61552STiezhu Yang bool bpf_jit_supports_kfunc_call(void)
200*5dc61552STiezhu Yang {
201*5dc61552STiezhu Yang 	return true;
202*5dc61552STiezhu Yang }
203*5dc61552STiezhu Yang 
204*5dc61552STiezhu Yang /* initialized on the first pass of build_body() */
205*5dc61552STiezhu Yang static int out_offset = -1;
206*5dc61552STiezhu Yang static int emit_bpf_tail_call(struct jit_ctx *ctx)
207*5dc61552STiezhu Yang {
208*5dc61552STiezhu Yang 	int off;
209*5dc61552STiezhu Yang 	u8 tcc = tail_call_reg(ctx);
210*5dc61552STiezhu Yang 	u8 a1 = LOONGARCH_GPR_A1;
211*5dc61552STiezhu Yang 	u8 a2 = LOONGARCH_GPR_A2;
212*5dc61552STiezhu Yang 	u8 t1 = LOONGARCH_GPR_T1;
213*5dc61552STiezhu Yang 	u8 t2 = LOONGARCH_GPR_T2;
214*5dc61552STiezhu Yang 	u8 t3 = LOONGARCH_GPR_T3;
215*5dc61552STiezhu Yang 	const int idx0 = ctx->idx;
216*5dc61552STiezhu Yang 
217*5dc61552STiezhu Yang #define cur_offset (ctx->idx - idx0)
218*5dc61552STiezhu Yang #define jmp_offset (out_offset - (cur_offset))
219*5dc61552STiezhu Yang 
220*5dc61552STiezhu Yang 	/*
221*5dc61552STiezhu Yang 	 * a0: &ctx
222*5dc61552STiezhu Yang 	 * a1: &array
223*5dc61552STiezhu Yang 	 * a2: index
224*5dc61552STiezhu Yang 	 *
225*5dc61552STiezhu Yang 	 * if (index >= array->map.max_entries)
226*5dc61552STiezhu Yang 	 *	 goto out;
227*5dc61552STiezhu Yang 	 */
228*5dc61552STiezhu Yang 	off = offsetof(struct bpf_array, map.max_entries);
229*5dc61552STiezhu Yang 	emit_insn(ctx, ldwu, t1, a1, off);
230*5dc61552STiezhu Yang 	/* bgeu $a2, $t1, jmp_offset */
231*5dc61552STiezhu Yang 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
232*5dc61552STiezhu Yang 		goto toofar;
233*5dc61552STiezhu Yang 
234*5dc61552STiezhu Yang 	/*
235*5dc61552STiezhu Yang 	 * if (--TCC < 0)
236*5dc61552STiezhu Yang 	 *	 goto out;
237*5dc61552STiezhu Yang 	 */
238*5dc61552STiezhu Yang 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
239*5dc61552STiezhu Yang 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
240*5dc61552STiezhu Yang 		goto toofar;
241*5dc61552STiezhu Yang 
242*5dc61552STiezhu Yang 	/*
243*5dc61552STiezhu Yang 	 * prog = array->ptrs[index];
244*5dc61552STiezhu Yang 	 * if (!prog)
245*5dc61552STiezhu Yang 	 *	 goto out;
246*5dc61552STiezhu Yang 	 */
247*5dc61552STiezhu Yang 	emit_insn(ctx, alsld, t2, a2, a1, 2);
248*5dc61552STiezhu Yang 	off = offsetof(struct bpf_array, ptrs);
249*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, t2, t2, off);
250*5dc61552STiezhu Yang 	/* beq $t2, $zero, jmp_offset */
251*5dc61552STiezhu Yang 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
252*5dc61552STiezhu Yang 		goto toofar;
253*5dc61552STiezhu Yang 
254*5dc61552STiezhu Yang 	/* goto *(prog->bpf_func + 4); */
255*5dc61552STiezhu Yang 	off = offsetof(struct bpf_prog, bpf_func);
256*5dc61552STiezhu Yang 	emit_insn(ctx, ldd, t3, t2, off);
257*5dc61552STiezhu Yang 	__build_epilogue(ctx, true);
258*5dc61552STiezhu Yang 
259*5dc61552STiezhu Yang 	/* out: */
260*5dc61552STiezhu Yang 	if (out_offset == -1)
261*5dc61552STiezhu Yang 		out_offset = cur_offset;
262*5dc61552STiezhu Yang 	if (cur_offset != out_offset) {
263*5dc61552STiezhu Yang 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
264*5dc61552STiezhu Yang 			    cur_offset, out_offset);
265*5dc61552STiezhu Yang 		return -1;
266*5dc61552STiezhu Yang 	}
267*5dc61552STiezhu Yang 
268*5dc61552STiezhu Yang 	return 0;
269*5dc61552STiezhu Yang 
270*5dc61552STiezhu Yang toofar:
271*5dc61552STiezhu Yang 	pr_info_once("tail_call: jump too far\n");
272*5dc61552STiezhu Yang 	return -1;
273*5dc61552STiezhu Yang #undef cur_offset
274*5dc61552STiezhu Yang #undef jmp_offset
275*5dc61552STiezhu Yang }
276*5dc61552STiezhu Yang 
277*5dc61552STiezhu Yang static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
278*5dc61552STiezhu Yang {
279*5dc61552STiezhu Yang 	const u8 t1 = LOONGARCH_GPR_T1;
280*5dc61552STiezhu Yang 	const u8 t2 = LOONGARCH_GPR_T2;
281*5dc61552STiezhu Yang 	const u8 t3 = LOONGARCH_GPR_T3;
282*5dc61552STiezhu Yang 	const u8 src = regmap[insn->src_reg];
283*5dc61552STiezhu Yang 	const u8 dst = regmap[insn->dst_reg];
284*5dc61552STiezhu Yang 	const s16 off = insn->off;
285*5dc61552STiezhu Yang 	const s32 imm = insn->imm;
286*5dc61552STiezhu Yang 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
287*5dc61552STiezhu Yang 
288*5dc61552STiezhu Yang 	move_imm(ctx, t1, off, false);
289*5dc61552STiezhu Yang 	emit_insn(ctx, addd, t1, dst, t1);
290*5dc61552STiezhu Yang 	move_reg(ctx, t3, src);
291*5dc61552STiezhu Yang 
292*5dc61552STiezhu Yang 	switch (imm) {
293*5dc61552STiezhu Yang 	/* lock *(size *)(dst + off) <op>= src */
294*5dc61552STiezhu Yang 	case BPF_ADD:
295*5dc61552STiezhu Yang 		if (isdw)
296*5dc61552STiezhu Yang 			emit_insn(ctx, amaddd, t2, t1, src);
297*5dc61552STiezhu Yang 		else
298*5dc61552STiezhu Yang 			emit_insn(ctx, amaddw, t2, t1, src);
299*5dc61552STiezhu Yang 		break;
300*5dc61552STiezhu Yang 	case BPF_AND:
301*5dc61552STiezhu Yang 		if (isdw)
302*5dc61552STiezhu Yang 			emit_insn(ctx, amandd, t2, t1, src);
303*5dc61552STiezhu Yang 		else
304*5dc61552STiezhu Yang 			emit_insn(ctx, amandw, t2, t1, src);
305*5dc61552STiezhu Yang 		break;
306*5dc61552STiezhu Yang 	case BPF_OR:
307*5dc61552STiezhu Yang 		if (isdw)
308*5dc61552STiezhu Yang 			emit_insn(ctx, amord, t2, t1, src);
309*5dc61552STiezhu Yang 		else
310*5dc61552STiezhu Yang 			emit_insn(ctx, amorw, t2, t1, src);
311*5dc61552STiezhu Yang 		break;
312*5dc61552STiezhu Yang 	case BPF_XOR:
313*5dc61552STiezhu Yang 		if (isdw)
314*5dc61552STiezhu Yang 			emit_insn(ctx, amxord, t2, t1, src);
315*5dc61552STiezhu Yang 		else
316*5dc61552STiezhu Yang 			emit_insn(ctx, amxorw, t2, t1, src);
317*5dc61552STiezhu Yang 		break;
318*5dc61552STiezhu Yang 	/* src = atomic_fetch_<op>(dst + off, src) */
319*5dc61552STiezhu Yang 	case BPF_ADD | BPF_FETCH:
320*5dc61552STiezhu Yang 		if (isdw) {
321*5dc61552STiezhu Yang 			emit_insn(ctx, amaddd, src, t1, t3);
322*5dc61552STiezhu Yang 		} else {
323*5dc61552STiezhu Yang 			emit_insn(ctx, amaddw, src, t1, t3);
324*5dc61552STiezhu Yang 			emit_zext_32(ctx, src, true);
325*5dc61552STiezhu Yang 		}
326*5dc61552STiezhu Yang 		break;
327*5dc61552STiezhu Yang 	case BPF_AND | BPF_FETCH:
328*5dc61552STiezhu Yang 		if (isdw) {
329*5dc61552STiezhu Yang 			emit_insn(ctx, amandd, src, t1, t3);
330*5dc61552STiezhu Yang 		} else {
331*5dc61552STiezhu Yang 			emit_insn(ctx, amandw, src, t1, t3);
332*5dc61552STiezhu Yang 			emit_zext_32(ctx, src, true);
333*5dc61552STiezhu Yang 		}
334*5dc61552STiezhu Yang 		break;
335*5dc61552STiezhu Yang 	case BPF_OR | BPF_FETCH:
336*5dc61552STiezhu Yang 		if (isdw) {
337*5dc61552STiezhu Yang 			emit_insn(ctx, amord, src, t1, t3);
338*5dc61552STiezhu Yang 		} else {
339*5dc61552STiezhu Yang 			emit_insn(ctx, amorw, src, t1, t3);
340*5dc61552STiezhu Yang 			emit_zext_32(ctx, src, true);
341*5dc61552STiezhu Yang 		}
342*5dc61552STiezhu Yang 		break;
343*5dc61552STiezhu Yang 	case BPF_XOR | BPF_FETCH:
344*5dc61552STiezhu Yang 		if (isdw) {
345*5dc61552STiezhu Yang 			emit_insn(ctx, amxord, src, t1, t3);
346*5dc61552STiezhu Yang 		} else {
347*5dc61552STiezhu Yang 			emit_insn(ctx, amxorw, src, t1, t3);
348*5dc61552STiezhu Yang 			emit_zext_32(ctx, src, true);
349*5dc61552STiezhu Yang 		}
350*5dc61552STiezhu Yang 		break;
351*5dc61552STiezhu Yang 	/* src = atomic_xchg(dst + off, src); */
352*5dc61552STiezhu Yang 	case BPF_XCHG:
353*5dc61552STiezhu Yang 		if (isdw) {
354*5dc61552STiezhu Yang 			emit_insn(ctx, amswapd, src, t1, t3);
355*5dc61552STiezhu Yang 		} else {
356*5dc61552STiezhu Yang 			emit_insn(ctx, amswapw, src, t1, t3);
357*5dc61552STiezhu Yang 			emit_zext_32(ctx, src, true);
358*5dc61552STiezhu Yang 		}
359*5dc61552STiezhu Yang 		break;
360*5dc61552STiezhu Yang 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
361*5dc61552STiezhu Yang 	case BPF_CMPXCHG:
362*5dc61552STiezhu Yang 		u8 r0 = regmap[BPF_REG_0];
363*5dc61552STiezhu Yang 
364*5dc61552STiezhu Yang 		move_reg(ctx, t2, r0);
365*5dc61552STiezhu Yang 		if (isdw) {
366*5dc61552STiezhu Yang 			emit_insn(ctx, lld, r0, t1, 0);
367*5dc61552STiezhu Yang 			emit_insn(ctx, bne, t2, r0, 4);
368*5dc61552STiezhu Yang 			move_reg(ctx, t3, src);
369*5dc61552STiezhu Yang 			emit_insn(ctx, scd, t3, t1, 0);
370*5dc61552STiezhu Yang 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
371*5dc61552STiezhu Yang 		} else {
372*5dc61552STiezhu Yang 			emit_insn(ctx, llw, r0, t1, 0);
373*5dc61552STiezhu Yang 			emit_zext_32(ctx, t2, true);
374*5dc61552STiezhu Yang 			emit_zext_32(ctx, r0, true);
375*5dc61552STiezhu Yang 			emit_insn(ctx, bne, t2, r0, 4);
376*5dc61552STiezhu Yang 			move_reg(ctx, t3, src);
377*5dc61552STiezhu Yang 			emit_insn(ctx, scw, t3, t1, 0);
378*5dc61552STiezhu Yang 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
379*5dc61552STiezhu Yang 			emit_zext_32(ctx, r0, true);
380*5dc61552STiezhu Yang 		}
381*5dc61552STiezhu Yang 		break;
382*5dc61552STiezhu Yang 	}
383*5dc61552STiezhu Yang }
384*5dc61552STiezhu Yang 
385*5dc61552STiezhu Yang static bool is_signed_bpf_cond(u8 cond)
386*5dc61552STiezhu Yang {
387*5dc61552STiezhu Yang 	return cond == BPF_JSGT || cond == BPF_JSLT ||
388*5dc61552STiezhu Yang 	       cond == BPF_JSGE || cond == BPF_JSLE;
389*5dc61552STiezhu Yang }
390*5dc61552STiezhu Yang 
391*5dc61552STiezhu Yang static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
392*5dc61552STiezhu Yang {
393*5dc61552STiezhu Yang 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU ||
394*5dc61552STiezhu Yang 			  BPF_CLASS(insn->code) == BPF_JMP32;
395*5dc61552STiezhu Yang 	const u8 code = insn->code;
396*5dc61552STiezhu Yang 	const u8 cond = BPF_OP(code);
397*5dc61552STiezhu Yang 	const u8 t1 = LOONGARCH_GPR_T1;
398*5dc61552STiezhu Yang 	const u8 t2 = LOONGARCH_GPR_T2;
399*5dc61552STiezhu Yang 	const u8 src = regmap[insn->src_reg];
400*5dc61552STiezhu Yang 	const u8 dst = regmap[insn->dst_reg];
401*5dc61552STiezhu Yang 	const s16 off = insn->off;
402*5dc61552STiezhu Yang 	const s32 imm = insn->imm;
403*5dc61552STiezhu Yang 	int jmp_offset;
404*5dc61552STiezhu Yang 	int i = insn - ctx->prog->insnsi;
405*5dc61552STiezhu Yang 
406*5dc61552STiezhu Yang 	switch (code) {
407*5dc61552STiezhu Yang 	/* dst = src */
408*5dc61552STiezhu Yang 	case BPF_ALU | BPF_MOV | BPF_X:
409*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_MOV | BPF_X:
410*5dc61552STiezhu Yang 		move_reg(ctx, dst, src);
411*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
412*5dc61552STiezhu Yang 		break;
413*5dc61552STiezhu Yang 
414*5dc61552STiezhu Yang 	/* dst = imm */
415*5dc61552STiezhu Yang 	case BPF_ALU | BPF_MOV | BPF_K:
416*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_MOV | BPF_K:
417*5dc61552STiezhu Yang 		move_imm(ctx, dst, imm, is32);
418*5dc61552STiezhu Yang 		break;
419*5dc61552STiezhu Yang 
420*5dc61552STiezhu Yang 	/* dst = dst + src */
421*5dc61552STiezhu Yang 	case BPF_ALU | BPF_ADD | BPF_X:
422*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_ADD | BPF_X:
423*5dc61552STiezhu Yang 		emit_insn(ctx, addd, dst, dst, src);
424*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
425*5dc61552STiezhu Yang 		break;
426*5dc61552STiezhu Yang 
427*5dc61552STiezhu Yang 	/* dst = dst + imm */
428*5dc61552STiezhu Yang 	case BPF_ALU | BPF_ADD | BPF_K:
429*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_ADD | BPF_K:
430*5dc61552STiezhu Yang 		if (is_signed_imm12(imm)) {
431*5dc61552STiezhu Yang 			emit_insn(ctx, addid, dst, dst, imm);
432*5dc61552STiezhu Yang 		} else {
433*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
434*5dc61552STiezhu Yang 			emit_insn(ctx, addd, dst, dst, t1);
435*5dc61552STiezhu Yang 		}
436*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
437*5dc61552STiezhu Yang 		break;
438*5dc61552STiezhu Yang 
439*5dc61552STiezhu Yang 	/* dst = dst - src */
440*5dc61552STiezhu Yang 	case BPF_ALU | BPF_SUB | BPF_X:
441*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_SUB | BPF_X:
442*5dc61552STiezhu Yang 		emit_insn(ctx, subd, dst, dst, src);
443*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
444*5dc61552STiezhu Yang 		break;
445*5dc61552STiezhu Yang 
446*5dc61552STiezhu Yang 	/* dst = dst - imm */
447*5dc61552STiezhu Yang 	case BPF_ALU | BPF_SUB | BPF_K:
448*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_SUB | BPF_K:
449*5dc61552STiezhu Yang 		if (is_signed_imm12(-imm)) {
450*5dc61552STiezhu Yang 			emit_insn(ctx, addid, dst, dst, -imm);
451*5dc61552STiezhu Yang 		} else {
452*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
453*5dc61552STiezhu Yang 			emit_insn(ctx, subd, dst, dst, t1);
454*5dc61552STiezhu Yang 		}
455*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
456*5dc61552STiezhu Yang 		break;
457*5dc61552STiezhu Yang 
458*5dc61552STiezhu Yang 	/* dst = dst * src */
459*5dc61552STiezhu Yang 	case BPF_ALU | BPF_MUL | BPF_X:
460*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_MUL | BPF_X:
461*5dc61552STiezhu Yang 		emit_insn(ctx, muld, dst, dst, src);
462*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
463*5dc61552STiezhu Yang 		break;
464*5dc61552STiezhu Yang 
465*5dc61552STiezhu Yang 	/* dst = dst * imm */
466*5dc61552STiezhu Yang 	case BPF_ALU | BPF_MUL | BPF_K:
467*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_MUL | BPF_K:
468*5dc61552STiezhu Yang 		move_imm(ctx, t1, imm, is32);
469*5dc61552STiezhu Yang 		emit_insn(ctx, muld, dst, dst, t1);
470*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
471*5dc61552STiezhu Yang 		break;
472*5dc61552STiezhu Yang 
473*5dc61552STiezhu Yang 	/* dst = dst / src */
474*5dc61552STiezhu Yang 	case BPF_ALU | BPF_DIV | BPF_X:
475*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_DIV | BPF_X:
476*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
477*5dc61552STiezhu Yang 		move_reg(ctx, t1, src);
478*5dc61552STiezhu Yang 		emit_zext_32(ctx, t1, is32);
479*5dc61552STiezhu Yang 		emit_insn(ctx, divdu, dst, dst, t1);
480*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
481*5dc61552STiezhu Yang 		break;
482*5dc61552STiezhu Yang 
483*5dc61552STiezhu Yang 	/* dst = dst / imm */
484*5dc61552STiezhu Yang 	case BPF_ALU | BPF_DIV | BPF_K:
485*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_DIV | BPF_K:
486*5dc61552STiezhu Yang 		move_imm(ctx, t1, imm, is32);
487*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
488*5dc61552STiezhu Yang 		emit_insn(ctx, divdu, dst, dst, t1);
489*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
490*5dc61552STiezhu Yang 		break;
491*5dc61552STiezhu Yang 
492*5dc61552STiezhu Yang 	/* dst = dst % src */
493*5dc61552STiezhu Yang 	case BPF_ALU | BPF_MOD | BPF_X:
494*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_MOD | BPF_X:
495*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
496*5dc61552STiezhu Yang 		move_reg(ctx, t1, src);
497*5dc61552STiezhu Yang 		emit_zext_32(ctx, t1, is32);
498*5dc61552STiezhu Yang 		emit_insn(ctx, moddu, dst, dst, t1);
499*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
500*5dc61552STiezhu Yang 		break;
501*5dc61552STiezhu Yang 
502*5dc61552STiezhu Yang 	/* dst = dst % imm */
503*5dc61552STiezhu Yang 	case BPF_ALU | BPF_MOD | BPF_K:
504*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_MOD | BPF_K:
505*5dc61552STiezhu Yang 		move_imm(ctx, t1, imm, is32);
506*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
507*5dc61552STiezhu Yang 		emit_insn(ctx, moddu, dst, dst, t1);
508*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
509*5dc61552STiezhu Yang 		break;
510*5dc61552STiezhu Yang 
511*5dc61552STiezhu Yang 	/* dst = -dst */
512*5dc61552STiezhu Yang 	case BPF_ALU | BPF_NEG:
513*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_NEG:
514*5dc61552STiezhu Yang 		move_imm(ctx, t1, imm, is32);
515*5dc61552STiezhu Yang 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
516*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
517*5dc61552STiezhu Yang 		break;
518*5dc61552STiezhu Yang 
519*5dc61552STiezhu Yang 	/* dst = dst & src */
520*5dc61552STiezhu Yang 	case BPF_ALU | BPF_AND | BPF_X:
521*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_AND | BPF_X:
522*5dc61552STiezhu Yang 		emit_insn(ctx, and, dst, dst, src);
523*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
524*5dc61552STiezhu Yang 		break;
525*5dc61552STiezhu Yang 
526*5dc61552STiezhu Yang 	/* dst = dst & imm */
527*5dc61552STiezhu Yang 	case BPF_ALU | BPF_AND | BPF_K:
528*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_AND | BPF_K:
529*5dc61552STiezhu Yang 		if (is_unsigned_imm12(imm)) {
530*5dc61552STiezhu Yang 			emit_insn(ctx, andi, dst, dst, imm);
531*5dc61552STiezhu Yang 		} else {
532*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
533*5dc61552STiezhu Yang 			emit_insn(ctx, and, dst, dst, t1);
534*5dc61552STiezhu Yang 		}
535*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
536*5dc61552STiezhu Yang 		break;
537*5dc61552STiezhu Yang 
538*5dc61552STiezhu Yang 	/* dst = dst | src */
539*5dc61552STiezhu Yang 	case BPF_ALU | BPF_OR | BPF_X:
540*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_OR | BPF_X:
541*5dc61552STiezhu Yang 		emit_insn(ctx, or, dst, dst, src);
542*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
543*5dc61552STiezhu Yang 		break;
544*5dc61552STiezhu Yang 
545*5dc61552STiezhu Yang 	/* dst = dst | imm */
546*5dc61552STiezhu Yang 	case BPF_ALU | BPF_OR | BPF_K:
547*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_OR | BPF_K:
548*5dc61552STiezhu Yang 		if (is_unsigned_imm12(imm)) {
549*5dc61552STiezhu Yang 			emit_insn(ctx, ori, dst, dst, imm);
550*5dc61552STiezhu Yang 		} else {
551*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
552*5dc61552STiezhu Yang 			emit_insn(ctx, or, dst, dst, t1);
553*5dc61552STiezhu Yang 		}
554*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
555*5dc61552STiezhu Yang 		break;
556*5dc61552STiezhu Yang 
557*5dc61552STiezhu Yang 	/* dst = dst ^ src */
558*5dc61552STiezhu Yang 	case BPF_ALU | BPF_XOR | BPF_X:
559*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_XOR | BPF_X:
560*5dc61552STiezhu Yang 		emit_insn(ctx, xor, dst, dst, src);
561*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
562*5dc61552STiezhu Yang 		break;
563*5dc61552STiezhu Yang 
564*5dc61552STiezhu Yang 	/* dst = dst ^ imm */
565*5dc61552STiezhu Yang 	case BPF_ALU | BPF_XOR | BPF_K:
566*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_XOR | BPF_K:
567*5dc61552STiezhu Yang 		if (is_unsigned_imm12(imm)) {
568*5dc61552STiezhu Yang 			emit_insn(ctx, xori, dst, dst, imm);
569*5dc61552STiezhu Yang 		} else {
570*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
571*5dc61552STiezhu Yang 			emit_insn(ctx, xor, dst, dst, t1);
572*5dc61552STiezhu Yang 		}
573*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
574*5dc61552STiezhu Yang 		break;
575*5dc61552STiezhu Yang 
576*5dc61552STiezhu Yang 	/* dst = dst << src (logical) */
577*5dc61552STiezhu Yang 	case BPF_ALU | BPF_LSH | BPF_X:
578*5dc61552STiezhu Yang 		emit_insn(ctx, sllw, dst, dst, src);
579*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
580*5dc61552STiezhu Yang 		break;
581*5dc61552STiezhu Yang 
582*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_LSH | BPF_X:
583*5dc61552STiezhu Yang 		emit_insn(ctx, slld, dst, dst, src);
584*5dc61552STiezhu Yang 		break;
585*5dc61552STiezhu Yang 
586*5dc61552STiezhu Yang 	/* dst = dst << imm (logical) */
587*5dc61552STiezhu Yang 	case BPF_ALU | BPF_LSH | BPF_K:
588*5dc61552STiezhu Yang 		emit_insn(ctx, slliw, dst, dst, imm);
589*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
590*5dc61552STiezhu Yang 		break;
591*5dc61552STiezhu Yang 
592*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_LSH | BPF_K:
593*5dc61552STiezhu Yang 		emit_insn(ctx, sllid, dst, dst, imm);
594*5dc61552STiezhu Yang 		break;
595*5dc61552STiezhu Yang 
596*5dc61552STiezhu Yang 	/* dst = dst >> src (logical) */
597*5dc61552STiezhu Yang 	case BPF_ALU | BPF_RSH | BPF_X:
598*5dc61552STiezhu Yang 		emit_insn(ctx, srlw, dst, dst, src);
599*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
600*5dc61552STiezhu Yang 		break;
601*5dc61552STiezhu Yang 
602*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_RSH | BPF_X:
603*5dc61552STiezhu Yang 		emit_insn(ctx, srld, dst, dst, src);
604*5dc61552STiezhu Yang 		break;
605*5dc61552STiezhu Yang 
606*5dc61552STiezhu Yang 	/* dst = dst >> imm (logical) */
607*5dc61552STiezhu Yang 	case BPF_ALU | BPF_RSH | BPF_K:
608*5dc61552STiezhu Yang 		emit_insn(ctx, srliw, dst, dst, imm);
609*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
610*5dc61552STiezhu Yang 		break;
611*5dc61552STiezhu Yang 
612*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_RSH | BPF_K:
613*5dc61552STiezhu Yang 		emit_insn(ctx, srlid, dst, dst, imm);
614*5dc61552STiezhu Yang 		break;
615*5dc61552STiezhu Yang 
616*5dc61552STiezhu Yang 	/* dst = dst >> src (arithmetic) */
617*5dc61552STiezhu Yang 	case BPF_ALU | BPF_ARSH | BPF_X:
618*5dc61552STiezhu Yang 		emit_insn(ctx, sraw, dst, dst, src);
619*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
620*5dc61552STiezhu Yang 		break;
621*5dc61552STiezhu Yang 
622*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_ARSH | BPF_X:
623*5dc61552STiezhu Yang 		emit_insn(ctx, srad, dst, dst, src);
624*5dc61552STiezhu Yang 		break;
625*5dc61552STiezhu Yang 
626*5dc61552STiezhu Yang 	/* dst = dst >> imm (arithmetic) */
627*5dc61552STiezhu Yang 	case BPF_ALU | BPF_ARSH | BPF_K:
628*5dc61552STiezhu Yang 		emit_insn(ctx, sraiw, dst, dst, imm);
629*5dc61552STiezhu Yang 		emit_zext_32(ctx, dst, is32);
630*5dc61552STiezhu Yang 		break;
631*5dc61552STiezhu Yang 
632*5dc61552STiezhu Yang 	case BPF_ALU64 | BPF_ARSH | BPF_K:
633*5dc61552STiezhu Yang 		emit_insn(ctx, sraid, dst, dst, imm);
634*5dc61552STiezhu Yang 		break;
635*5dc61552STiezhu Yang 
636*5dc61552STiezhu Yang 	/* dst = BSWAP##imm(dst) */
637*5dc61552STiezhu Yang 	case BPF_ALU | BPF_END | BPF_FROM_LE:
638*5dc61552STiezhu Yang 		switch (imm) {
639*5dc61552STiezhu Yang 		case 16:
640*5dc61552STiezhu Yang 			/* zero-extend 16 bits into 64 bits */
641*5dc61552STiezhu Yang 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
642*5dc61552STiezhu Yang 			break;
643*5dc61552STiezhu Yang 		case 32:
644*5dc61552STiezhu Yang 			/* zero-extend 32 bits into 64 bits */
645*5dc61552STiezhu Yang 			emit_zext_32(ctx, dst, is32);
646*5dc61552STiezhu Yang 			break;
647*5dc61552STiezhu Yang 		case 64:
648*5dc61552STiezhu Yang 			/* do nothing */
649*5dc61552STiezhu Yang 			break;
650*5dc61552STiezhu Yang 		}
651*5dc61552STiezhu Yang 		break;
652*5dc61552STiezhu Yang 
653*5dc61552STiezhu Yang 	case BPF_ALU | BPF_END | BPF_FROM_BE:
654*5dc61552STiezhu Yang 		switch (imm) {
655*5dc61552STiezhu Yang 		case 16:
656*5dc61552STiezhu Yang 			emit_insn(ctx, revb2h, dst, dst);
657*5dc61552STiezhu Yang 			/* zero-extend 16 bits into 64 bits */
658*5dc61552STiezhu Yang 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
659*5dc61552STiezhu Yang 			break;
660*5dc61552STiezhu Yang 		case 32:
661*5dc61552STiezhu Yang 			emit_insn(ctx, revb2w, dst, dst);
662*5dc61552STiezhu Yang 			/* zero-extend 32 bits into 64 bits */
663*5dc61552STiezhu Yang 			emit_zext_32(ctx, dst, is32);
664*5dc61552STiezhu Yang 			break;
665*5dc61552STiezhu Yang 		case 64:
666*5dc61552STiezhu Yang 			emit_insn(ctx, revbd, dst, dst);
667*5dc61552STiezhu Yang 			break;
668*5dc61552STiezhu Yang 		}
669*5dc61552STiezhu Yang 		break;
670*5dc61552STiezhu Yang 
671*5dc61552STiezhu Yang 	/* PC += off if dst cond src */
672*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JEQ | BPF_X:
673*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JNE | BPF_X:
674*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JGT | BPF_X:
675*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JGE | BPF_X:
676*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JLT | BPF_X:
677*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JLE | BPF_X:
678*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSGT | BPF_X:
679*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSGE | BPF_X:
680*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSLT | BPF_X:
681*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSLE | BPF_X:
682*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JEQ | BPF_X:
683*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JNE | BPF_X:
684*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JGT | BPF_X:
685*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JGE | BPF_X:
686*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JLT | BPF_X:
687*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JLE | BPF_X:
688*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSGT | BPF_X:
689*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSGE | BPF_X:
690*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSLT | BPF_X:
691*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSLE | BPF_X:
692*5dc61552STiezhu Yang 		jmp_offset = bpf2la_offset(i, off, ctx);
693*5dc61552STiezhu Yang 		move_reg(ctx, t1, dst);
694*5dc61552STiezhu Yang 		move_reg(ctx, t2, src);
695*5dc61552STiezhu Yang 		if (is_signed_bpf_cond(BPF_OP(code))) {
696*5dc61552STiezhu Yang 			emit_sext_32(ctx, t1, is32);
697*5dc61552STiezhu Yang 			emit_sext_32(ctx, t2, is32);
698*5dc61552STiezhu Yang 		} else {
699*5dc61552STiezhu Yang 			emit_zext_32(ctx, t1, is32);
700*5dc61552STiezhu Yang 			emit_zext_32(ctx, t2, is32);
701*5dc61552STiezhu Yang 		}
702*5dc61552STiezhu Yang 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
703*5dc61552STiezhu Yang 			goto toofar;
704*5dc61552STiezhu Yang 		break;
705*5dc61552STiezhu Yang 
706*5dc61552STiezhu Yang 	/* PC += off if dst cond imm */
707*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JEQ | BPF_K:
708*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JNE | BPF_K:
709*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JGT | BPF_K:
710*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JGE | BPF_K:
711*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JLT | BPF_K:
712*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JLE | BPF_K:
713*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSGT | BPF_K:
714*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSGE | BPF_K:
715*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSLT | BPF_K:
716*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSLE | BPF_K:
717*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JEQ | BPF_K:
718*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JNE | BPF_K:
719*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JGT | BPF_K:
720*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JGE | BPF_K:
721*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JLT | BPF_K:
722*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JLE | BPF_K:
723*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSGT | BPF_K:
724*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSGE | BPF_K:
725*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSLT | BPF_K:
726*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSLE | BPF_K:
727*5dc61552STiezhu Yang 		u8 t7 = -1;
728*5dc61552STiezhu Yang 		jmp_offset = bpf2la_offset(i, off, ctx);
729*5dc61552STiezhu Yang 		if (imm) {
730*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, false);
731*5dc61552STiezhu Yang 			t7 = t1;
732*5dc61552STiezhu Yang 		} else {
733*5dc61552STiezhu Yang 			/* If imm is 0, simply use zero register. */
734*5dc61552STiezhu Yang 			t7 = LOONGARCH_GPR_ZERO;
735*5dc61552STiezhu Yang 		}
736*5dc61552STiezhu Yang 		move_reg(ctx, t2, dst);
737*5dc61552STiezhu Yang 		if (is_signed_bpf_cond(BPF_OP(code))) {
738*5dc61552STiezhu Yang 			emit_sext_32(ctx, t7, is32);
739*5dc61552STiezhu Yang 			emit_sext_32(ctx, t2, is32);
740*5dc61552STiezhu Yang 		} else {
741*5dc61552STiezhu Yang 			emit_zext_32(ctx, t7, is32);
742*5dc61552STiezhu Yang 			emit_zext_32(ctx, t2, is32);
743*5dc61552STiezhu Yang 		}
744*5dc61552STiezhu Yang 		if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0)
745*5dc61552STiezhu Yang 			goto toofar;
746*5dc61552STiezhu Yang 		break;
747*5dc61552STiezhu Yang 
748*5dc61552STiezhu Yang 	/* PC += off if dst & src */
749*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSET | BPF_X:
750*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSET | BPF_X:
751*5dc61552STiezhu Yang 		jmp_offset = bpf2la_offset(i, off, ctx);
752*5dc61552STiezhu Yang 		emit_insn(ctx, and, t1, dst, src);
753*5dc61552STiezhu Yang 		emit_zext_32(ctx, t1, is32);
754*5dc61552STiezhu Yang 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
755*5dc61552STiezhu Yang 			goto toofar;
756*5dc61552STiezhu Yang 		break;
757*5dc61552STiezhu Yang 
758*5dc61552STiezhu Yang 	/* PC += off if dst & imm */
759*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JSET | BPF_K:
760*5dc61552STiezhu Yang 	case BPF_JMP32 | BPF_JSET | BPF_K:
761*5dc61552STiezhu Yang 		jmp_offset = bpf2la_offset(i, off, ctx);
762*5dc61552STiezhu Yang 		move_imm(ctx, t1, imm, is32);
763*5dc61552STiezhu Yang 		emit_insn(ctx, and, t1, dst, t1);
764*5dc61552STiezhu Yang 		emit_zext_32(ctx, t1, is32);
765*5dc61552STiezhu Yang 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
766*5dc61552STiezhu Yang 			goto toofar;
767*5dc61552STiezhu Yang 		break;
768*5dc61552STiezhu Yang 
769*5dc61552STiezhu Yang 	/* PC += off */
770*5dc61552STiezhu Yang 	case BPF_JMP | BPF_JA:
771*5dc61552STiezhu Yang 		jmp_offset = bpf2la_offset(i, off, ctx);
772*5dc61552STiezhu Yang 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
773*5dc61552STiezhu Yang 			goto toofar;
774*5dc61552STiezhu Yang 		break;
775*5dc61552STiezhu Yang 
776*5dc61552STiezhu Yang 	/* function call */
777*5dc61552STiezhu Yang 	case BPF_JMP | BPF_CALL:
778*5dc61552STiezhu Yang 		int ret;
779*5dc61552STiezhu Yang 		u64 func_addr;
780*5dc61552STiezhu Yang 		bool func_addr_fixed;
781*5dc61552STiezhu Yang 
782*5dc61552STiezhu Yang 		mark_call(ctx);
783*5dc61552STiezhu Yang 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
784*5dc61552STiezhu Yang 					    &func_addr, &func_addr_fixed);
785*5dc61552STiezhu Yang 		if (ret < 0)
786*5dc61552STiezhu Yang 			return ret;
787*5dc61552STiezhu Yang 
788*5dc61552STiezhu Yang 		move_imm(ctx, t1, func_addr, is32);
789*5dc61552STiezhu Yang 		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
790*5dc61552STiezhu Yang 		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
791*5dc61552STiezhu Yang 		break;
792*5dc61552STiezhu Yang 
793*5dc61552STiezhu Yang 	/* tail call */
794*5dc61552STiezhu Yang 	case BPF_JMP | BPF_TAIL_CALL:
795*5dc61552STiezhu Yang 		mark_tail_call(ctx);
796*5dc61552STiezhu Yang 		if (emit_bpf_tail_call(ctx) < 0)
797*5dc61552STiezhu Yang 			return -EINVAL;
798*5dc61552STiezhu Yang 		break;
799*5dc61552STiezhu Yang 
800*5dc61552STiezhu Yang 	/* function return */
801*5dc61552STiezhu Yang 	case BPF_JMP | BPF_EXIT:
802*5dc61552STiezhu Yang 		emit_sext_32(ctx, regmap[BPF_REG_0], true);
803*5dc61552STiezhu Yang 
804*5dc61552STiezhu Yang 		if (i == ctx->prog->len - 1)
805*5dc61552STiezhu Yang 			break;
806*5dc61552STiezhu Yang 
807*5dc61552STiezhu Yang 		jmp_offset = epilogue_offset(ctx);
808*5dc61552STiezhu Yang 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
809*5dc61552STiezhu Yang 			goto toofar;
810*5dc61552STiezhu Yang 		break;
811*5dc61552STiezhu Yang 
812*5dc61552STiezhu Yang 	/* dst = imm64 */
813*5dc61552STiezhu Yang 	case BPF_LD | BPF_IMM | BPF_DW:
814*5dc61552STiezhu Yang 		u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
815*5dc61552STiezhu Yang 
816*5dc61552STiezhu Yang 		move_imm(ctx, dst, imm64, is32);
817*5dc61552STiezhu Yang 		return 1;
818*5dc61552STiezhu Yang 
819*5dc61552STiezhu Yang 	/* dst = *(size *)(src + off) */
820*5dc61552STiezhu Yang 	case BPF_LDX | BPF_MEM | BPF_B:
821*5dc61552STiezhu Yang 	case BPF_LDX | BPF_MEM | BPF_H:
822*5dc61552STiezhu Yang 	case BPF_LDX | BPF_MEM | BPF_W:
823*5dc61552STiezhu Yang 	case BPF_LDX | BPF_MEM | BPF_DW:
824*5dc61552STiezhu Yang 		switch (BPF_SIZE(code)) {
825*5dc61552STiezhu Yang 		case BPF_B:
826*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
827*5dc61552STiezhu Yang 				emit_insn(ctx, ldbu, dst, src, off);
828*5dc61552STiezhu Yang 			} else {
829*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
830*5dc61552STiezhu Yang 				emit_insn(ctx, ldxbu, dst, src, t1);
831*5dc61552STiezhu Yang 			}
832*5dc61552STiezhu Yang 			break;
833*5dc61552STiezhu Yang 		case BPF_H:
834*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
835*5dc61552STiezhu Yang 				emit_insn(ctx, ldhu, dst, src, off);
836*5dc61552STiezhu Yang 			} else {
837*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
838*5dc61552STiezhu Yang 				emit_insn(ctx, ldxhu, dst, src, t1);
839*5dc61552STiezhu Yang 			}
840*5dc61552STiezhu Yang 			break;
841*5dc61552STiezhu Yang 		case BPF_W:
842*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
843*5dc61552STiezhu Yang 				emit_insn(ctx, ldwu, dst, src, off);
844*5dc61552STiezhu Yang 			} else if (is_signed_imm14(off)) {
845*5dc61552STiezhu Yang 				emit_insn(ctx, ldptrw, dst, src, off);
846*5dc61552STiezhu Yang 			} else {
847*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
848*5dc61552STiezhu Yang 				emit_insn(ctx, ldxwu, dst, src, t1);
849*5dc61552STiezhu Yang 			}
850*5dc61552STiezhu Yang 			break;
851*5dc61552STiezhu Yang 		case BPF_DW:
852*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
853*5dc61552STiezhu Yang 				emit_insn(ctx, ldd, dst, src, off);
854*5dc61552STiezhu Yang 			} else if (is_signed_imm14(off)) {
855*5dc61552STiezhu Yang 				emit_insn(ctx, ldptrd, dst, src, off);
856*5dc61552STiezhu Yang 			} else {
857*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
858*5dc61552STiezhu Yang 				emit_insn(ctx, ldxd, dst, src, t1);
859*5dc61552STiezhu Yang 			}
860*5dc61552STiezhu Yang 			break;
861*5dc61552STiezhu Yang 		}
862*5dc61552STiezhu Yang 		break;
863*5dc61552STiezhu Yang 
864*5dc61552STiezhu Yang 	/* *(size *)(dst + off) = imm */
865*5dc61552STiezhu Yang 	case BPF_ST | BPF_MEM | BPF_B:
866*5dc61552STiezhu Yang 	case BPF_ST | BPF_MEM | BPF_H:
867*5dc61552STiezhu Yang 	case BPF_ST | BPF_MEM | BPF_W:
868*5dc61552STiezhu Yang 	case BPF_ST | BPF_MEM | BPF_DW:
869*5dc61552STiezhu Yang 		switch (BPF_SIZE(code)) {
870*5dc61552STiezhu Yang 		case BPF_B:
871*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
872*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
873*5dc61552STiezhu Yang 				emit_insn(ctx, stb, t1, dst, off);
874*5dc61552STiezhu Yang 			} else {
875*5dc61552STiezhu Yang 				move_imm(ctx, t2, off, is32);
876*5dc61552STiezhu Yang 				emit_insn(ctx, stxb, t1, dst, t2);
877*5dc61552STiezhu Yang 			}
878*5dc61552STiezhu Yang 			break;
879*5dc61552STiezhu Yang 		case BPF_H:
880*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
881*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
882*5dc61552STiezhu Yang 				emit_insn(ctx, sth, t1, dst, off);
883*5dc61552STiezhu Yang 			} else {
884*5dc61552STiezhu Yang 				move_imm(ctx, t2, off, is32);
885*5dc61552STiezhu Yang 				emit_insn(ctx, stxh, t1, dst, t2);
886*5dc61552STiezhu Yang 			}
887*5dc61552STiezhu Yang 			break;
888*5dc61552STiezhu Yang 		case BPF_W:
889*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
890*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
891*5dc61552STiezhu Yang 				emit_insn(ctx, stw, t1, dst, off);
892*5dc61552STiezhu Yang 			} else if (is_signed_imm14(off)) {
893*5dc61552STiezhu Yang 				emit_insn(ctx, stptrw, t1, dst, off);
894*5dc61552STiezhu Yang 			} else {
895*5dc61552STiezhu Yang 				move_imm(ctx, t2, off, is32);
896*5dc61552STiezhu Yang 				emit_insn(ctx, stxw, t1, dst, t2);
897*5dc61552STiezhu Yang 			}
898*5dc61552STiezhu Yang 			break;
899*5dc61552STiezhu Yang 		case BPF_DW:
900*5dc61552STiezhu Yang 			move_imm(ctx, t1, imm, is32);
901*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
902*5dc61552STiezhu Yang 				emit_insn(ctx, std, t1, dst, off);
903*5dc61552STiezhu Yang 			} else if (is_signed_imm14(off)) {
904*5dc61552STiezhu Yang 				emit_insn(ctx, stptrd, t1, dst, off);
905*5dc61552STiezhu Yang 			} else {
906*5dc61552STiezhu Yang 				move_imm(ctx, t2, off, is32);
907*5dc61552STiezhu Yang 				emit_insn(ctx, stxd, t1, dst, t2);
908*5dc61552STiezhu Yang 			}
909*5dc61552STiezhu Yang 			break;
910*5dc61552STiezhu Yang 		}
911*5dc61552STiezhu Yang 		break;
912*5dc61552STiezhu Yang 
913*5dc61552STiezhu Yang 	/* *(size *)(dst + off) = src */
914*5dc61552STiezhu Yang 	case BPF_STX | BPF_MEM | BPF_B:
915*5dc61552STiezhu Yang 	case BPF_STX | BPF_MEM | BPF_H:
916*5dc61552STiezhu Yang 	case BPF_STX | BPF_MEM | BPF_W:
917*5dc61552STiezhu Yang 	case BPF_STX | BPF_MEM | BPF_DW:
918*5dc61552STiezhu Yang 		switch (BPF_SIZE(code)) {
919*5dc61552STiezhu Yang 		case BPF_B:
920*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
921*5dc61552STiezhu Yang 				emit_insn(ctx, stb, src, dst, off);
922*5dc61552STiezhu Yang 			} else {
923*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
924*5dc61552STiezhu Yang 				emit_insn(ctx, stxb, src, dst, t1);
925*5dc61552STiezhu Yang 			}
926*5dc61552STiezhu Yang 			break;
927*5dc61552STiezhu Yang 		case BPF_H:
928*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
929*5dc61552STiezhu Yang 				emit_insn(ctx, sth, src, dst, off);
930*5dc61552STiezhu Yang 			} else {
931*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
932*5dc61552STiezhu Yang 				emit_insn(ctx, stxh, src, dst, t1);
933*5dc61552STiezhu Yang 			}
934*5dc61552STiezhu Yang 			break;
935*5dc61552STiezhu Yang 		case BPF_W:
936*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
937*5dc61552STiezhu Yang 				emit_insn(ctx, stw, src, dst, off);
938*5dc61552STiezhu Yang 			} else if (is_signed_imm14(off)) {
939*5dc61552STiezhu Yang 				emit_insn(ctx, stptrw, src, dst, off);
940*5dc61552STiezhu Yang 			} else {
941*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
942*5dc61552STiezhu Yang 				emit_insn(ctx, stxw, src, dst, t1);
943*5dc61552STiezhu Yang 			}
944*5dc61552STiezhu Yang 			break;
945*5dc61552STiezhu Yang 		case BPF_DW:
946*5dc61552STiezhu Yang 			if (is_signed_imm12(off)) {
947*5dc61552STiezhu Yang 				emit_insn(ctx, std, src, dst, off);
948*5dc61552STiezhu Yang 			} else if (is_signed_imm14(off)) {
949*5dc61552STiezhu Yang 				emit_insn(ctx, stptrd, src, dst, off);
950*5dc61552STiezhu Yang 			} else {
951*5dc61552STiezhu Yang 				move_imm(ctx, t1, off, is32);
952*5dc61552STiezhu Yang 				emit_insn(ctx, stxd, src, dst, t1);
953*5dc61552STiezhu Yang 			}
954*5dc61552STiezhu Yang 			break;
955*5dc61552STiezhu Yang 		}
956*5dc61552STiezhu Yang 		break;
957*5dc61552STiezhu Yang 
958*5dc61552STiezhu Yang 	case BPF_STX | BPF_ATOMIC | BPF_W:
959*5dc61552STiezhu Yang 	case BPF_STX | BPF_ATOMIC | BPF_DW:
960*5dc61552STiezhu Yang 		emit_atomic(insn, ctx);
961*5dc61552STiezhu Yang 		break;
962*5dc61552STiezhu Yang 
963*5dc61552STiezhu Yang 	default:
964*5dc61552STiezhu Yang 		pr_err("bpf_jit: unknown opcode %02x\n", code);
965*5dc61552STiezhu Yang 		return -EINVAL;
966*5dc61552STiezhu Yang 	}
967*5dc61552STiezhu Yang 
968*5dc61552STiezhu Yang 	return 0;
969*5dc61552STiezhu Yang 
970*5dc61552STiezhu Yang toofar:
971*5dc61552STiezhu Yang 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
972*5dc61552STiezhu Yang 	return -E2BIG;
973*5dc61552STiezhu Yang }
974*5dc61552STiezhu Yang 
975*5dc61552STiezhu Yang static int build_body(struct jit_ctx *ctx, bool extra_pass)
976*5dc61552STiezhu Yang {
977*5dc61552STiezhu Yang 	int i;
978*5dc61552STiezhu Yang 	const struct bpf_prog *prog = ctx->prog;
979*5dc61552STiezhu Yang 
980*5dc61552STiezhu Yang 	for (i = 0; i < prog->len; i++) {
981*5dc61552STiezhu Yang 		const struct bpf_insn *insn = &prog->insnsi[i];
982*5dc61552STiezhu Yang 		int ret;
983*5dc61552STiezhu Yang 
984*5dc61552STiezhu Yang 		if (ctx->image == NULL)
985*5dc61552STiezhu Yang 			ctx->offset[i] = ctx->idx;
986*5dc61552STiezhu Yang 
987*5dc61552STiezhu Yang 		ret = build_insn(insn, ctx, extra_pass);
988*5dc61552STiezhu Yang 		if (ret > 0) {
989*5dc61552STiezhu Yang 			i++;
990*5dc61552STiezhu Yang 			if (ctx->image == NULL)
991*5dc61552STiezhu Yang 				ctx->offset[i] = ctx->idx;
992*5dc61552STiezhu Yang 			continue;
993*5dc61552STiezhu Yang 		}
994*5dc61552STiezhu Yang 		if (ret)
995*5dc61552STiezhu Yang 			return ret;
996*5dc61552STiezhu Yang 	}
997*5dc61552STiezhu Yang 
998*5dc61552STiezhu Yang 	if (ctx->image == NULL)
999*5dc61552STiezhu Yang 		ctx->offset[i] = ctx->idx;
1000*5dc61552STiezhu Yang 
1001*5dc61552STiezhu Yang 	return 0;
1002*5dc61552STiezhu Yang }
1003*5dc61552STiezhu Yang 
1004*5dc61552STiezhu Yang /* Fill space with break instructions */
1005*5dc61552STiezhu Yang static void jit_fill_hole(void *area, unsigned int size)
1006*5dc61552STiezhu Yang {
1007*5dc61552STiezhu Yang 	u32 *ptr;
1008*5dc61552STiezhu Yang 
1009*5dc61552STiezhu Yang 	/* We are guaranteed to have aligned memory */
1010*5dc61552STiezhu Yang 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1011*5dc61552STiezhu Yang 		*ptr++ = INSN_BREAK;
1012*5dc61552STiezhu Yang }
1013*5dc61552STiezhu Yang 
1014*5dc61552STiezhu Yang static int validate_code(struct jit_ctx *ctx)
1015*5dc61552STiezhu Yang {
1016*5dc61552STiezhu Yang 	int i;
1017*5dc61552STiezhu Yang 	union loongarch_instruction insn;
1018*5dc61552STiezhu Yang 
1019*5dc61552STiezhu Yang 	for (i = 0; i < ctx->idx; i++) {
1020*5dc61552STiezhu Yang 		insn = ctx->image[i];
1021*5dc61552STiezhu Yang 		/* Check INSN_BREAK */
1022*5dc61552STiezhu Yang 		if (insn.word == INSN_BREAK)
1023*5dc61552STiezhu Yang 			return -1;
1024*5dc61552STiezhu Yang 	}
1025*5dc61552STiezhu Yang 
1026*5dc61552STiezhu Yang 	return 0;
1027*5dc61552STiezhu Yang }
1028*5dc61552STiezhu Yang 
1029*5dc61552STiezhu Yang struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1030*5dc61552STiezhu Yang {
1031*5dc61552STiezhu Yang 	bool tmp_blinded = false, extra_pass = false;
1032*5dc61552STiezhu Yang 	u8 *image_ptr;
1033*5dc61552STiezhu Yang 	int image_size;
1034*5dc61552STiezhu Yang 	struct jit_ctx ctx;
1035*5dc61552STiezhu Yang 	struct jit_data *jit_data;
1036*5dc61552STiezhu Yang 	struct bpf_binary_header *header;
1037*5dc61552STiezhu Yang 	struct bpf_prog *tmp, *orig_prog = prog;
1038*5dc61552STiezhu Yang 
1039*5dc61552STiezhu Yang 	/*
1040*5dc61552STiezhu Yang 	 * If BPF JIT was not enabled then we must fall back to
1041*5dc61552STiezhu Yang 	 * the interpreter.
1042*5dc61552STiezhu Yang 	 */
1043*5dc61552STiezhu Yang 	if (!prog->jit_requested)
1044*5dc61552STiezhu Yang 		return orig_prog;
1045*5dc61552STiezhu Yang 
1046*5dc61552STiezhu Yang 	tmp = bpf_jit_blind_constants(prog);
1047*5dc61552STiezhu Yang 	/*
1048*5dc61552STiezhu Yang 	 * If blinding was requested and we failed during blinding,
1049*5dc61552STiezhu Yang 	 * we must fall back to the interpreter. Otherwise, we save
1050*5dc61552STiezhu Yang 	 * the new JITed code.
1051*5dc61552STiezhu Yang 	 */
1052*5dc61552STiezhu Yang 	if (IS_ERR(tmp))
1053*5dc61552STiezhu Yang 		return orig_prog;
1054*5dc61552STiezhu Yang 
1055*5dc61552STiezhu Yang 	if (tmp != prog) {
1056*5dc61552STiezhu Yang 		tmp_blinded = true;
1057*5dc61552STiezhu Yang 		prog = tmp;
1058*5dc61552STiezhu Yang 	}
1059*5dc61552STiezhu Yang 
1060*5dc61552STiezhu Yang 	jit_data = prog->aux->jit_data;
1061*5dc61552STiezhu Yang 	if (!jit_data) {
1062*5dc61552STiezhu Yang 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1063*5dc61552STiezhu Yang 		if (!jit_data) {
1064*5dc61552STiezhu Yang 			prog = orig_prog;
1065*5dc61552STiezhu Yang 			goto out;
1066*5dc61552STiezhu Yang 		}
1067*5dc61552STiezhu Yang 		prog->aux->jit_data = jit_data;
1068*5dc61552STiezhu Yang 	}
1069*5dc61552STiezhu Yang 	if (jit_data->ctx.offset) {
1070*5dc61552STiezhu Yang 		ctx = jit_data->ctx;
1071*5dc61552STiezhu Yang 		image_ptr = jit_data->image;
1072*5dc61552STiezhu Yang 		header = jit_data->header;
1073*5dc61552STiezhu Yang 		extra_pass = true;
1074*5dc61552STiezhu Yang 		image_size = sizeof(u32) * ctx.idx;
1075*5dc61552STiezhu Yang 		goto skip_init_ctx;
1076*5dc61552STiezhu Yang 	}
1077*5dc61552STiezhu Yang 
1078*5dc61552STiezhu Yang 	memset(&ctx, 0, sizeof(ctx));
1079*5dc61552STiezhu Yang 	ctx.prog = prog;
1080*5dc61552STiezhu Yang 
1081*5dc61552STiezhu Yang 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1082*5dc61552STiezhu Yang 	if (ctx.offset == NULL) {
1083*5dc61552STiezhu Yang 		prog = orig_prog;
1084*5dc61552STiezhu Yang 		goto out_offset;
1085*5dc61552STiezhu Yang 	}
1086*5dc61552STiezhu Yang 
1087*5dc61552STiezhu Yang 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1088*5dc61552STiezhu Yang 	build_prologue(&ctx);
1089*5dc61552STiezhu Yang 	if (build_body(&ctx, extra_pass)) {
1090*5dc61552STiezhu Yang 		prog = orig_prog;
1091*5dc61552STiezhu Yang 		goto out_offset;
1092*5dc61552STiezhu Yang 	}
1093*5dc61552STiezhu Yang 	ctx.epilogue_offset = ctx.idx;
1094*5dc61552STiezhu Yang 	build_epilogue(&ctx);
1095*5dc61552STiezhu Yang 
1096*5dc61552STiezhu Yang 	/* Now we know the actual image size.
1097*5dc61552STiezhu Yang 	 * As each LoongArch instruction is of length 32bit,
1098*5dc61552STiezhu Yang 	 * we are translating number of JITed intructions into
1099*5dc61552STiezhu Yang 	 * the size required to store these JITed code.
1100*5dc61552STiezhu Yang 	 */
1101*5dc61552STiezhu Yang 	image_size = sizeof(u32) * ctx.idx;
1102*5dc61552STiezhu Yang 	/* Now we know the size of the structure to make */
1103*5dc61552STiezhu Yang 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1104*5dc61552STiezhu Yang 				      sizeof(u32), jit_fill_hole);
1105*5dc61552STiezhu Yang 	if (header == NULL) {
1106*5dc61552STiezhu Yang 		prog = orig_prog;
1107*5dc61552STiezhu Yang 		goto out_offset;
1108*5dc61552STiezhu Yang 	}
1109*5dc61552STiezhu Yang 
1110*5dc61552STiezhu Yang 	/* 2. Now, the actual pass to generate final JIT code */
1111*5dc61552STiezhu Yang 	ctx.image = (union loongarch_instruction *)image_ptr;
1112*5dc61552STiezhu Yang 
1113*5dc61552STiezhu Yang skip_init_ctx:
1114*5dc61552STiezhu Yang 	ctx.idx = 0;
1115*5dc61552STiezhu Yang 
1116*5dc61552STiezhu Yang 	build_prologue(&ctx);
1117*5dc61552STiezhu Yang 	if (build_body(&ctx, extra_pass)) {
1118*5dc61552STiezhu Yang 		bpf_jit_binary_free(header);
1119*5dc61552STiezhu Yang 		prog = orig_prog;
1120*5dc61552STiezhu Yang 		goto out_offset;
1121*5dc61552STiezhu Yang 	}
1122*5dc61552STiezhu Yang 	build_epilogue(&ctx);
1123*5dc61552STiezhu Yang 
1124*5dc61552STiezhu Yang 	/* 3. Extra pass to validate JITed code */
1125*5dc61552STiezhu Yang 	if (validate_code(&ctx)) {
1126*5dc61552STiezhu Yang 		bpf_jit_binary_free(header);
1127*5dc61552STiezhu Yang 		prog = orig_prog;
1128*5dc61552STiezhu Yang 		goto out_offset;
1129*5dc61552STiezhu Yang 	}
1130*5dc61552STiezhu Yang 
1131*5dc61552STiezhu Yang 	/* And we're done */
1132*5dc61552STiezhu Yang 	if (bpf_jit_enable > 1)
1133*5dc61552STiezhu Yang 		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
1134*5dc61552STiezhu Yang 
1135*5dc61552STiezhu Yang 	/* Update the icache */
1136*5dc61552STiezhu Yang 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1137*5dc61552STiezhu Yang 
1138*5dc61552STiezhu Yang 	if (!prog->is_func || extra_pass) {
1139*5dc61552STiezhu Yang 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1140*5dc61552STiezhu Yang 			pr_err_once("multi-func JIT bug %d != %d\n",
1141*5dc61552STiezhu Yang 				    ctx.idx, jit_data->ctx.idx);
1142*5dc61552STiezhu Yang 			bpf_jit_binary_free(header);
1143*5dc61552STiezhu Yang 			prog->bpf_func = NULL;
1144*5dc61552STiezhu Yang 			prog->jited = 0;
1145*5dc61552STiezhu Yang 			prog->jited_len = 0;
1146*5dc61552STiezhu Yang 			goto out_offset;
1147*5dc61552STiezhu Yang 		}
1148*5dc61552STiezhu Yang 		bpf_jit_binary_lock_ro(header);
1149*5dc61552STiezhu Yang 	} else {
1150*5dc61552STiezhu Yang 		jit_data->ctx = ctx;
1151*5dc61552STiezhu Yang 		jit_data->image = image_ptr;
1152*5dc61552STiezhu Yang 		jit_data->header = header;
1153*5dc61552STiezhu Yang 	}
1154*5dc61552STiezhu Yang 	prog->jited = 1;
1155*5dc61552STiezhu Yang 	prog->jited_len = image_size;
1156*5dc61552STiezhu Yang 	prog->bpf_func = (void *)ctx.image;
1157*5dc61552STiezhu Yang 
1158*5dc61552STiezhu Yang 	if (!prog->is_func || extra_pass) {
1159*5dc61552STiezhu Yang 		int i;
1160*5dc61552STiezhu Yang 
1161*5dc61552STiezhu Yang 		/* offset[prog->len] is the size of program */
1162*5dc61552STiezhu Yang 		for (i = 0; i <= prog->len; i++)
1163*5dc61552STiezhu Yang 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1164*5dc61552STiezhu Yang 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1165*5dc61552STiezhu Yang 
1166*5dc61552STiezhu Yang out_offset:
1167*5dc61552STiezhu Yang 		kvfree(ctx.offset);
1168*5dc61552STiezhu Yang 		kfree(jit_data);
1169*5dc61552STiezhu Yang 		prog->aux->jit_data = NULL;
1170*5dc61552STiezhu Yang 	}
1171*5dc61552STiezhu Yang 
1172*5dc61552STiezhu Yang out:
1173*5dc61552STiezhu Yang 	if (tmp_blinded)
1174*5dc61552STiezhu Yang 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1175*5dc61552STiezhu Yang 
1176*5dc61552STiezhu Yang 	out_offset = -1;
1177*5dc61552STiezhu Yang 
1178*5dc61552STiezhu Yang 	return prog;
1179*5dc61552STiezhu Yang }
1180