xref: /openbmc/linux/arch/loongarch/net/bpf_jit.c (revision ae108c48)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include "bpf_jit.h"
8 
9 #define REG_TCC		LOONGARCH_GPR_A6
10 #define TCC_SAVED	LOONGARCH_GPR_S5
11 
12 #define SAVE_RA		BIT(0)
13 #define SAVE_TCC	BIT(1)
14 
15 static const int regmap[] = {
16 	/* return value from in-kernel function, and exit value for eBPF program */
17 	[BPF_REG_0] = LOONGARCH_GPR_A5,
18 	/* arguments from eBPF program to in-kernel function */
19 	[BPF_REG_1] = LOONGARCH_GPR_A0,
20 	[BPF_REG_2] = LOONGARCH_GPR_A1,
21 	[BPF_REG_3] = LOONGARCH_GPR_A2,
22 	[BPF_REG_4] = LOONGARCH_GPR_A3,
23 	[BPF_REG_5] = LOONGARCH_GPR_A4,
24 	/* callee saved registers that in-kernel function will preserve */
25 	[BPF_REG_6] = LOONGARCH_GPR_S0,
26 	[BPF_REG_7] = LOONGARCH_GPR_S1,
27 	[BPF_REG_8] = LOONGARCH_GPR_S2,
28 	[BPF_REG_9] = LOONGARCH_GPR_S3,
29 	/* read-only frame pointer to access stack */
30 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31 	/* temporary register for blinding constants */
32 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33 };
34 
35 static void mark_call(struct jit_ctx *ctx)
36 {
37 	ctx->flags |= SAVE_RA;
38 }
39 
40 static void mark_tail_call(struct jit_ctx *ctx)
41 {
42 	ctx->flags |= SAVE_TCC;
43 }
44 
45 static bool seen_call(struct jit_ctx *ctx)
46 {
47 	return (ctx->flags & SAVE_RA);
48 }
49 
50 static bool seen_tail_call(struct jit_ctx *ctx)
51 {
52 	return (ctx->flags & SAVE_TCC);
53 }
54 
55 static u8 tail_call_reg(struct jit_ctx *ctx)
56 {
57 	if (seen_call(ctx))
58 		return TCC_SAVED;
59 
60 	return REG_TCC;
61 }
62 
63 /*
64  * eBPF prog stack layout:
65  *
66  *                                        high
67  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68  *                            |           $ra           |
69  *                            +-------------------------+
70  *                            |           $fp           |
71  *                            +-------------------------+
72  *                            |           $s0           |
73  *                            +-------------------------+
74  *                            |           $s1           |
75  *                            +-------------------------+
76  *                            |           $s2           |
77  *                            +-------------------------+
78  *                            |           $s3           |
79  *                            +-------------------------+
80  *                            |           $s4           |
81  *                            +-------------------------+
82  *                            |           $s5           |
83  *                            +-------------------------+ <--BPF_REG_FP
84  *                            |  prog->aux->stack_depth |
85  *                            |        (optional)       |
86  * current $sp -------------> +-------------------------+
87  *                                        low
88  */
89 static void build_prologue(struct jit_ctx *ctx)
90 {
91 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92 
93 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94 
95 	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96 	stack_adjust += sizeof(long) * 8;
97 
98 	stack_adjust = round_up(stack_adjust, 16);
99 	stack_adjust += bpf_stack_adjust;
100 
101 	/*
102 	 * First instruction initializes the tail call count (TCC).
103 	 * On tail call we skip this instruction, and the TCC is
104 	 * passed in REG_TCC from the caller.
105 	 */
106 	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107 
108 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109 
110 	store_offset = stack_adjust - sizeof(long);
111 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112 
113 	store_offset -= sizeof(long);
114 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115 
116 	store_offset -= sizeof(long);
117 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118 
119 	store_offset -= sizeof(long);
120 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121 
122 	store_offset -= sizeof(long);
123 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124 
125 	store_offset -= sizeof(long);
126 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127 
128 	store_offset -= sizeof(long);
129 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130 
131 	store_offset -= sizeof(long);
132 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133 
134 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135 
136 	if (bpf_stack_adjust)
137 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138 
139 	/*
140 	 * Program contains calls and tail calls, so REG_TCC need
141 	 * to be saved across calls.
142 	 */
143 	if (seen_tail_call(ctx) && seen_call(ctx))
144 		move_reg(ctx, TCC_SAVED, REG_TCC);
145 
146 	ctx->stack_size = stack_adjust;
147 }
148 
149 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
150 {
151 	int stack_adjust = ctx->stack_size;
152 	int load_offset;
153 
154 	load_offset = stack_adjust - sizeof(long);
155 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
156 
157 	load_offset -= sizeof(long);
158 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
159 
160 	load_offset -= sizeof(long);
161 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
162 
163 	load_offset -= sizeof(long);
164 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
165 
166 	load_offset -= sizeof(long);
167 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
168 
169 	load_offset -= sizeof(long);
170 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
171 
172 	load_offset -= sizeof(long);
173 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
174 
175 	load_offset -= sizeof(long);
176 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
177 
178 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
179 
180 	if (!is_tail_call) {
181 		/* Set return value */
182 		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183 		/* Return to the caller */
184 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
185 	} else {
186 		/*
187 		 * Call the next bpf prog and skip the first instruction
188 		 * of TCC initialization.
189 		 */
190 		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
191 	}
192 }
193 
194 static void build_epilogue(struct jit_ctx *ctx)
195 {
196 	__build_epilogue(ctx, false);
197 }
198 
199 bool bpf_jit_supports_kfunc_call(void)
200 {
201 	return true;
202 }
203 
204 /* initialized on the first pass of build_body() */
205 static int out_offset = -1;
206 static int emit_bpf_tail_call(struct jit_ctx *ctx)
207 {
208 	int off;
209 	u8 tcc = tail_call_reg(ctx);
210 	u8 a1 = LOONGARCH_GPR_A1;
211 	u8 a2 = LOONGARCH_GPR_A2;
212 	u8 t1 = LOONGARCH_GPR_T1;
213 	u8 t2 = LOONGARCH_GPR_T2;
214 	u8 t3 = LOONGARCH_GPR_T3;
215 	const int idx0 = ctx->idx;
216 
217 #define cur_offset (ctx->idx - idx0)
218 #define jmp_offset (out_offset - (cur_offset))
219 
220 	/*
221 	 * a0: &ctx
222 	 * a1: &array
223 	 * a2: index
224 	 *
225 	 * if (index >= array->map.max_entries)
226 	 *	 goto out;
227 	 */
228 	off = offsetof(struct bpf_array, map.max_entries);
229 	emit_insn(ctx, ldwu, t1, a1, off);
230 	/* bgeu $a2, $t1, jmp_offset */
231 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
232 		goto toofar;
233 
234 	/*
235 	 * if (--TCC < 0)
236 	 *	 goto out;
237 	 */
238 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
239 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
240 		goto toofar;
241 
242 	/*
243 	 * prog = array->ptrs[index];
244 	 * if (!prog)
245 	 *	 goto out;
246 	 */
247 	emit_insn(ctx, alsld, t2, a2, a1, 2);
248 	off = offsetof(struct bpf_array, ptrs);
249 	emit_insn(ctx, ldd, t2, t2, off);
250 	/* beq $t2, $zero, jmp_offset */
251 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
252 		goto toofar;
253 
254 	/* goto *(prog->bpf_func + 4); */
255 	off = offsetof(struct bpf_prog, bpf_func);
256 	emit_insn(ctx, ldd, t3, t2, off);
257 	__build_epilogue(ctx, true);
258 
259 	/* out: */
260 	if (out_offset == -1)
261 		out_offset = cur_offset;
262 	if (cur_offset != out_offset) {
263 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
264 			    cur_offset, out_offset);
265 		return -1;
266 	}
267 
268 	return 0;
269 
270 toofar:
271 	pr_info_once("tail_call: jump too far\n");
272 	return -1;
273 #undef cur_offset
274 #undef jmp_offset
275 }
276 
277 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
278 {
279 	const u8 t1 = LOONGARCH_GPR_T1;
280 	const u8 t2 = LOONGARCH_GPR_T2;
281 	const u8 t3 = LOONGARCH_GPR_T3;
282 	const u8 src = regmap[insn->src_reg];
283 	const u8 dst = regmap[insn->dst_reg];
284 	const s16 off = insn->off;
285 	const s32 imm = insn->imm;
286 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
287 
288 	move_imm(ctx, t1, off, false);
289 	emit_insn(ctx, addd, t1, dst, t1);
290 	move_reg(ctx, t3, src);
291 
292 	switch (imm) {
293 	/* lock *(size *)(dst + off) <op>= src */
294 	case BPF_ADD:
295 		if (isdw)
296 			emit_insn(ctx, amaddd, t2, t1, src);
297 		else
298 			emit_insn(ctx, amaddw, t2, t1, src);
299 		break;
300 	case BPF_AND:
301 		if (isdw)
302 			emit_insn(ctx, amandd, t2, t1, src);
303 		else
304 			emit_insn(ctx, amandw, t2, t1, src);
305 		break;
306 	case BPF_OR:
307 		if (isdw)
308 			emit_insn(ctx, amord, t2, t1, src);
309 		else
310 			emit_insn(ctx, amorw, t2, t1, src);
311 		break;
312 	case BPF_XOR:
313 		if (isdw)
314 			emit_insn(ctx, amxord, t2, t1, src);
315 		else
316 			emit_insn(ctx, amxorw, t2, t1, src);
317 		break;
318 	/* src = atomic_fetch_<op>(dst + off, src) */
319 	case BPF_ADD | BPF_FETCH:
320 		if (isdw) {
321 			emit_insn(ctx, amaddd, src, t1, t3);
322 		} else {
323 			emit_insn(ctx, amaddw, src, t1, t3);
324 			emit_zext_32(ctx, src, true);
325 		}
326 		break;
327 	case BPF_AND | BPF_FETCH:
328 		if (isdw) {
329 			emit_insn(ctx, amandd, src, t1, t3);
330 		} else {
331 			emit_insn(ctx, amandw, src, t1, t3);
332 			emit_zext_32(ctx, src, true);
333 		}
334 		break;
335 	case BPF_OR | BPF_FETCH:
336 		if (isdw) {
337 			emit_insn(ctx, amord, src, t1, t3);
338 		} else {
339 			emit_insn(ctx, amorw, src, t1, t3);
340 			emit_zext_32(ctx, src, true);
341 		}
342 		break;
343 	case BPF_XOR | BPF_FETCH:
344 		if (isdw) {
345 			emit_insn(ctx, amxord, src, t1, t3);
346 		} else {
347 			emit_insn(ctx, amxorw, src, t1, t3);
348 			emit_zext_32(ctx, src, true);
349 		}
350 		break;
351 	/* src = atomic_xchg(dst + off, src); */
352 	case BPF_XCHG:
353 		if (isdw) {
354 			emit_insn(ctx, amswapd, src, t1, t3);
355 		} else {
356 			emit_insn(ctx, amswapw, src, t1, t3);
357 			emit_zext_32(ctx, src, true);
358 		}
359 		break;
360 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
361 	case BPF_CMPXCHG:
362 		u8 r0 = regmap[BPF_REG_0];
363 
364 		move_reg(ctx, t2, r0);
365 		if (isdw) {
366 			emit_insn(ctx, lld, r0, t1, 0);
367 			emit_insn(ctx, bne, t2, r0, 4);
368 			move_reg(ctx, t3, src);
369 			emit_insn(ctx, scd, t3, t1, 0);
370 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
371 		} else {
372 			emit_insn(ctx, llw, r0, t1, 0);
373 			emit_zext_32(ctx, t2, true);
374 			emit_zext_32(ctx, r0, true);
375 			emit_insn(ctx, bne, t2, r0, 4);
376 			move_reg(ctx, t3, src);
377 			emit_insn(ctx, scw, t3, t1, 0);
378 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
379 			emit_zext_32(ctx, r0, true);
380 		}
381 		break;
382 	}
383 }
384 
385 static bool is_signed_bpf_cond(u8 cond)
386 {
387 	return cond == BPF_JSGT || cond == BPF_JSLT ||
388 	       cond == BPF_JSGE || cond == BPF_JSLE;
389 }
390 
391 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
392 {
393 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU ||
394 			  BPF_CLASS(insn->code) == BPF_JMP32;
395 	const u8 code = insn->code;
396 	const u8 cond = BPF_OP(code);
397 	const u8 t1 = LOONGARCH_GPR_T1;
398 	const u8 t2 = LOONGARCH_GPR_T2;
399 	const u8 src = regmap[insn->src_reg];
400 	const u8 dst = regmap[insn->dst_reg];
401 	const s16 off = insn->off;
402 	const s32 imm = insn->imm;
403 	int jmp_offset;
404 	int i = insn - ctx->prog->insnsi;
405 
406 	switch (code) {
407 	/* dst = src */
408 	case BPF_ALU | BPF_MOV | BPF_X:
409 	case BPF_ALU64 | BPF_MOV | BPF_X:
410 		move_reg(ctx, dst, src);
411 		emit_zext_32(ctx, dst, is32);
412 		break;
413 
414 	/* dst = imm */
415 	case BPF_ALU | BPF_MOV | BPF_K:
416 	case BPF_ALU64 | BPF_MOV | BPF_K:
417 		move_imm(ctx, dst, imm, is32);
418 		break;
419 
420 	/* dst = dst + src */
421 	case BPF_ALU | BPF_ADD | BPF_X:
422 	case BPF_ALU64 | BPF_ADD | BPF_X:
423 		emit_insn(ctx, addd, dst, dst, src);
424 		emit_zext_32(ctx, dst, is32);
425 		break;
426 
427 	/* dst = dst + imm */
428 	case BPF_ALU | BPF_ADD | BPF_K:
429 	case BPF_ALU64 | BPF_ADD | BPF_K:
430 		if (is_signed_imm12(imm)) {
431 			emit_insn(ctx, addid, dst, dst, imm);
432 		} else {
433 			move_imm(ctx, t1, imm, is32);
434 			emit_insn(ctx, addd, dst, dst, t1);
435 		}
436 		emit_zext_32(ctx, dst, is32);
437 		break;
438 
439 	/* dst = dst - src */
440 	case BPF_ALU | BPF_SUB | BPF_X:
441 	case BPF_ALU64 | BPF_SUB | BPF_X:
442 		emit_insn(ctx, subd, dst, dst, src);
443 		emit_zext_32(ctx, dst, is32);
444 		break;
445 
446 	/* dst = dst - imm */
447 	case BPF_ALU | BPF_SUB | BPF_K:
448 	case BPF_ALU64 | BPF_SUB | BPF_K:
449 		if (is_signed_imm12(-imm)) {
450 			emit_insn(ctx, addid, dst, dst, -imm);
451 		} else {
452 			move_imm(ctx, t1, imm, is32);
453 			emit_insn(ctx, subd, dst, dst, t1);
454 		}
455 		emit_zext_32(ctx, dst, is32);
456 		break;
457 
458 	/* dst = dst * src */
459 	case BPF_ALU | BPF_MUL | BPF_X:
460 	case BPF_ALU64 | BPF_MUL | BPF_X:
461 		emit_insn(ctx, muld, dst, dst, src);
462 		emit_zext_32(ctx, dst, is32);
463 		break;
464 
465 	/* dst = dst * imm */
466 	case BPF_ALU | BPF_MUL | BPF_K:
467 	case BPF_ALU64 | BPF_MUL | BPF_K:
468 		move_imm(ctx, t1, imm, is32);
469 		emit_insn(ctx, muld, dst, dst, t1);
470 		emit_zext_32(ctx, dst, is32);
471 		break;
472 
473 	/* dst = dst / src */
474 	case BPF_ALU | BPF_DIV | BPF_X:
475 	case BPF_ALU64 | BPF_DIV | BPF_X:
476 		emit_zext_32(ctx, dst, is32);
477 		move_reg(ctx, t1, src);
478 		emit_zext_32(ctx, t1, is32);
479 		emit_insn(ctx, divdu, dst, dst, t1);
480 		emit_zext_32(ctx, dst, is32);
481 		break;
482 
483 	/* dst = dst / imm */
484 	case BPF_ALU | BPF_DIV | BPF_K:
485 	case BPF_ALU64 | BPF_DIV | BPF_K:
486 		move_imm(ctx, t1, imm, is32);
487 		emit_zext_32(ctx, dst, is32);
488 		emit_insn(ctx, divdu, dst, dst, t1);
489 		emit_zext_32(ctx, dst, is32);
490 		break;
491 
492 	/* dst = dst % src */
493 	case BPF_ALU | BPF_MOD | BPF_X:
494 	case BPF_ALU64 | BPF_MOD | BPF_X:
495 		emit_zext_32(ctx, dst, is32);
496 		move_reg(ctx, t1, src);
497 		emit_zext_32(ctx, t1, is32);
498 		emit_insn(ctx, moddu, dst, dst, t1);
499 		emit_zext_32(ctx, dst, is32);
500 		break;
501 
502 	/* dst = dst % imm */
503 	case BPF_ALU | BPF_MOD | BPF_K:
504 	case BPF_ALU64 | BPF_MOD | BPF_K:
505 		move_imm(ctx, t1, imm, is32);
506 		emit_zext_32(ctx, dst, is32);
507 		emit_insn(ctx, moddu, dst, dst, t1);
508 		emit_zext_32(ctx, dst, is32);
509 		break;
510 
511 	/* dst = -dst */
512 	case BPF_ALU | BPF_NEG:
513 	case BPF_ALU64 | BPF_NEG:
514 		move_imm(ctx, t1, imm, is32);
515 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
516 		emit_zext_32(ctx, dst, is32);
517 		break;
518 
519 	/* dst = dst & src */
520 	case BPF_ALU | BPF_AND | BPF_X:
521 	case BPF_ALU64 | BPF_AND | BPF_X:
522 		emit_insn(ctx, and, dst, dst, src);
523 		emit_zext_32(ctx, dst, is32);
524 		break;
525 
526 	/* dst = dst & imm */
527 	case BPF_ALU | BPF_AND | BPF_K:
528 	case BPF_ALU64 | BPF_AND | BPF_K:
529 		if (is_unsigned_imm12(imm)) {
530 			emit_insn(ctx, andi, dst, dst, imm);
531 		} else {
532 			move_imm(ctx, t1, imm, is32);
533 			emit_insn(ctx, and, dst, dst, t1);
534 		}
535 		emit_zext_32(ctx, dst, is32);
536 		break;
537 
538 	/* dst = dst | src */
539 	case BPF_ALU | BPF_OR | BPF_X:
540 	case BPF_ALU64 | BPF_OR | BPF_X:
541 		emit_insn(ctx, or, dst, dst, src);
542 		emit_zext_32(ctx, dst, is32);
543 		break;
544 
545 	/* dst = dst | imm */
546 	case BPF_ALU | BPF_OR | BPF_K:
547 	case BPF_ALU64 | BPF_OR | BPF_K:
548 		if (is_unsigned_imm12(imm)) {
549 			emit_insn(ctx, ori, dst, dst, imm);
550 		} else {
551 			move_imm(ctx, t1, imm, is32);
552 			emit_insn(ctx, or, dst, dst, t1);
553 		}
554 		emit_zext_32(ctx, dst, is32);
555 		break;
556 
557 	/* dst = dst ^ src */
558 	case BPF_ALU | BPF_XOR | BPF_X:
559 	case BPF_ALU64 | BPF_XOR | BPF_X:
560 		emit_insn(ctx, xor, dst, dst, src);
561 		emit_zext_32(ctx, dst, is32);
562 		break;
563 
564 	/* dst = dst ^ imm */
565 	case BPF_ALU | BPF_XOR | BPF_K:
566 	case BPF_ALU64 | BPF_XOR | BPF_K:
567 		if (is_unsigned_imm12(imm)) {
568 			emit_insn(ctx, xori, dst, dst, imm);
569 		} else {
570 			move_imm(ctx, t1, imm, is32);
571 			emit_insn(ctx, xor, dst, dst, t1);
572 		}
573 		emit_zext_32(ctx, dst, is32);
574 		break;
575 
576 	/* dst = dst << src (logical) */
577 	case BPF_ALU | BPF_LSH | BPF_X:
578 		emit_insn(ctx, sllw, dst, dst, src);
579 		emit_zext_32(ctx, dst, is32);
580 		break;
581 
582 	case BPF_ALU64 | BPF_LSH | BPF_X:
583 		emit_insn(ctx, slld, dst, dst, src);
584 		break;
585 
586 	/* dst = dst << imm (logical) */
587 	case BPF_ALU | BPF_LSH | BPF_K:
588 		emit_insn(ctx, slliw, dst, dst, imm);
589 		emit_zext_32(ctx, dst, is32);
590 		break;
591 
592 	case BPF_ALU64 | BPF_LSH | BPF_K:
593 		emit_insn(ctx, sllid, dst, dst, imm);
594 		break;
595 
596 	/* dst = dst >> src (logical) */
597 	case BPF_ALU | BPF_RSH | BPF_X:
598 		emit_insn(ctx, srlw, dst, dst, src);
599 		emit_zext_32(ctx, dst, is32);
600 		break;
601 
602 	case BPF_ALU64 | BPF_RSH | BPF_X:
603 		emit_insn(ctx, srld, dst, dst, src);
604 		break;
605 
606 	/* dst = dst >> imm (logical) */
607 	case BPF_ALU | BPF_RSH | BPF_K:
608 		emit_insn(ctx, srliw, dst, dst, imm);
609 		emit_zext_32(ctx, dst, is32);
610 		break;
611 
612 	case BPF_ALU64 | BPF_RSH | BPF_K:
613 		emit_insn(ctx, srlid, dst, dst, imm);
614 		break;
615 
616 	/* dst = dst >> src (arithmetic) */
617 	case BPF_ALU | BPF_ARSH | BPF_X:
618 		emit_insn(ctx, sraw, dst, dst, src);
619 		emit_zext_32(ctx, dst, is32);
620 		break;
621 
622 	case BPF_ALU64 | BPF_ARSH | BPF_X:
623 		emit_insn(ctx, srad, dst, dst, src);
624 		break;
625 
626 	/* dst = dst >> imm (arithmetic) */
627 	case BPF_ALU | BPF_ARSH | BPF_K:
628 		emit_insn(ctx, sraiw, dst, dst, imm);
629 		emit_zext_32(ctx, dst, is32);
630 		break;
631 
632 	case BPF_ALU64 | BPF_ARSH | BPF_K:
633 		emit_insn(ctx, sraid, dst, dst, imm);
634 		break;
635 
636 	/* dst = BSWAP##imm(dst) */
637 	case BPF_ALU | BPF_END | BPF_FROM_LE:
638 		switch (imm) {
639 		case 16:
640 			/* zero-extend 16 bits into 64 bits */
641 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
642 			break;
643 		case 32:
644 			/* zero-extend 32 bits into 64 bits */
645 			emit_zext_32(ctx, dst, is32);
646 			break;
647 		case 64:
648 			/* do nothing */
649 			break;
650 		}
651 		break;
652 
653 	case BPF_ALU | BPF_END | BPF_FROM_BE:
654 		switch (imm) {
655 		case 16:
656 			emit_insn(ctx, revb2h, dst, dst);
657 			/* zero-extend 16 bits into 64 bits */
658 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
659 			break;
660 		case 32:
661 			emit_insn(ctx, revb2w, dst, dst);
662 			/* zero-extend 32 bits into 64 bits */
663 			emit_zext_32(ctx, dst, is32);
664 			break;
665 		case 64:
666 			emit_insn(ctx, revbd, dst, dst);
667 			break;
668 		}
669 		break;
670 
671 	/* PC += off if dst cond src */
672 	case BPF_JMP | BPF_JEQ | BPF_X:
673 	case BPF_JMP | BPF_JNE | BPF_X:
674 	case BPF_JMP | BPF_JGT | BPF_X:
675 	case BPF_JMP | BPF_JGE | BPF_X:
676 	case BPF_JMP | BPF_JLT | BPF_X:
677 	case BPF_JMP | BPF_JLE | BPF_X:
678 	case BPF_JMP | BPF_JSGT | BPF_X:
679 	case BPF_JMP | BPF_JSGE | BPF_X:
680 	case BPF_JMP | BPF_JSLT | BPF_X:
681 	case BPF_JMP | BPF_JSLE | BPF_X:
682 	case BPF_JMP32 | BPF_JEQ | BPF_X:
683 	case BPF_JMP32 | BPF_JNE | BPF_X:
684 	case BPF_JMP32 | BPF_JGT | BPF_X:
685 	case BPF_JMP32 | BPF_JGE | BPF_X:
686 	case BPF_JMP32 | BPF_JLT | BPF_X:
687 	case BPF_JMP32 | BPF_JLE | BPF_X:
688 	case BPF_JMP32 | BPF_JSGT | BPF_X:
689 	case BPF_JMP32 | BPF_JSGE | BPF_X:
690 	case BPF_JMP32 | BPF_JSLT | BPF_X:
691 	case BPF_JMP32 | BPF_JSLE | BPF_X:
692 		jmp_offset = bpf2la_offset(i, off, ctx);
693 		move_reg(ctx, t1, dst);
694 		move_reg(ctx, t2, src);
695 		if (is_signed_bpf_cond(BPF_OP(code))) {
696 			emit_sext_32(ctx, t1, is32);
697 			emit_sext_32(ctx, t2, is32);
698 		} else {
699 			emit_zext_32(ctx, t1, is32);
700 			emit_zext_32(ctx, t2, is32);
701 		}
702 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
703 			goto toofar;
704 		break;
705 
706 	/* PC += off if dst cond imm */
707 	case BPF_JMP | BPF_JEQ | BPF_K:
708 	case BPF_JMP | BPF_JNE | BPF_K:
709 	case BPF_JMP | BPF_JGT | BPF_K:
710 	case BPF_JMP | BPF_JGE | BPF_K:
711 	case BPF_JMP | BPF_JLT | BPF_K:
712 	case BPF_JMP | BPF_JLE | BPF_K:
713 	case BPF_JMP | BPF_JSGT | BPF_K:
714 	case BPF_JMP | BPF_JSGE | BPF_K:
715 	case BPF_JMP | BPF_JSLT | BPF_K:
716 	case BPF_JMP | BPF_JSLE | BPF_K:
717 	case BPF_JMP32 | BPF_JEQ | BPF_K:
718 	case BPF_JMP32 | BPF_JNE | BPF_K:
719 	case BPF_JMP32 | BPF_JGT | BPF_K:
720 	case BPF_JMP32 | BPF_JGE | BPF_K:
721 	case BPF_JMP32 | BPF_JLT | BPF_K:
722 	case BPF_JMP32 | BPF_JLE | BPF_K:
723 	case BPF_JMP32 | BPF_JSGT | BPF_K:
724 	case BPF_JMP32 | BPF_JSGE | BPF_K:
725 	case BPF_JMP32 | BPF_JSLT | BPF_K:
726 	case BPF_JMP32 | BPF_JSLE | BPF_K:
727 		u8 t7 = -1;
728 		jmp_offset = bpf2la_offset(i, off, ctx);
729 		if (imm) {
730 			move_imm(ctx, t1, imm, false);
731 			t7 = t1;
732 		} else {
733 			/* If imm is 0, simply use zero register. */
734 			t7 = LOONGARCH_GPR_ZERO;
735 		}
736 		move_reg(ctx, t2, dst);
737 		if (is_signed_bpf_cond(BPF_OP(code))) {
738 			emit_sext_32(ctx, t7, is32);
739 			emit_sext_32(ctx, t2, is32);
740 		} else {
741 			emit_zext_32(ctx, t7, is32);
742 			emit_zext_32(ctx, t2, is32);
743 		}
744 		if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0)
745 			goto toofar;
746 		break;
747 
748 	/* PC += off if dst & src */
749 	case BPF_JMP | BPF_JSET | BPF_X:
750 	case BPF_JMP32 | BPF_JSET | BPF_X:
751 		jmp_offset = bpf2la_offset(i, off, ctx);
752 		emit_insn(ctx, and, t1, dst, src);
753 		emit_zext_32(ctx, t1, is32);
754 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
755 			goto toofar;
756 		break;
757 
758 	/* PC += off if dst & imm */
759 	case BPF_JMP | BPF_JSET | BPF_K:
760 	case BPF_JMP32 | BPF_JSET | BPF_K:
761 		jmp_offset = bpf2la_offset(i, off, ctx);
762 		move_imm(ctx, t1, imm, is32);
763 		emit_insn(ctx, and, t1, dst, t1);
764 		emit_zext_32(ctx, t1, is32);
765 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
766 			goto toofar;
767 		break;
768 
769 	/* PC += off */
770 	case BPF_JMP | BPF_JA:
771 		jmp_offset = bpf2la_offset(i, off, ctx);
772 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
773 			goto toofar;
774 		break;
775 
776 	/* function call */
777 	case BPF_JMP | BPF_CALL:
778 		int ret;
779 		u64 func_addr;
780 		bool func_addr_fixed;
781 
782 		mark_call(ctx);
783 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
784 					    &func_addr, &func_addr_fixed);
785 		if (ret < 0)
786 			return ret;
787 
788 		move_imm(ctx, t1, func_addr, is32);
789 		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
790 		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
791 		break;
792 
793 	/* tail call */
794 	case BPF_JMP | BPF_TAIL_CALL:
795 		mark_tail_call(ctx);
796 		if (emit_bpf_tail_call(ctx) < 0)
797 			return -EINVAL;
798 		break;
799 
800 	/* function return */
801 	case BPF_JMP | BPF_EXIT:
802 		emit_sext_32(ctx, regmap[BPF_REG_0], true);
803 
804 		if (i == ctx->prog->len - 1)
805 			break;
806 
807 		jmp_offset = epilogue_offset(ctx);
808 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
809 			goto toofar;
810 		break;
811 
812 	/* dst = imm64 */
813 	case BPF_LD | BPF_IMM | BPF_DW:
814 		u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
815 
816 		move_imm(ctx, dst, imm64, is32);
817 		return 1;
818 
819 	/* dst = *(size *)(src + off) */
820 	case BPF_LDX | BPF_MEM | BPF_B:
821 	case BPF_LDX | BPF_MEM | BPF_H:
822 	case BPF_LDX | BPF_MEM | BPF_W:
823 	case BPF_LDX | BPF_MEM | BPF_DW:
824 		switch (BPF_SIZE(code)) {
825 		case BPF_B:
826 			if (is_signed_imm12(off)) {
827 				emit_insn(ctx, ldbu, dst, src, off);
828 			} else {
829 				move_imm(ctx, t1, off, is32);
830 				emit_insn(ctx, ldxbu, dst, src, t1);
831 			}
832 			break;
833 		case BPF_H:
834 			if (is_signed_imm12(off)) {
835 				emit_insn(ctx, ldhu, dst, src, off);
836 			} else {
837 				move_imm(ctx, t1, off, is32);
838 				emit_insn(ctx, ldxhu, dst, src, t1);
839 			}
840 			break;
841 		case BPF_W:
842 			if (is_signed_imm12(off)) {
843 				emit_insn(ctx, ldwu, dst, src, off);
844 			} else if (is_signed_imm14(off)) {
845 				emit_insn(ctx, ldptrw, dst, src, off);
846 			} else {
847 				move_imm(ctx, t1, off, is32);
848 				emit_insn(ctx, ldxwu, dst, src, t1);
849 			}
850 			break;
851 		case BPF_DW:
852 			if (is_signed_imm12(off)) {
853 				emit_insn(ctx, ldd, dst, src, off);
854 			} else if (is_signed_imm14(off)) {
855 				emit_insn(ctx, ldptrd, dst, src, off);
856 			} else {
857 				move_imm(ctx, t1, off, is32);
858 				emit_insn(ctx, ldxd, dst, src, t1);
859 			}
860 			break;
861 		}
862 		break;
863 
864 	/* *(size *)(dst + off) = imm */
865 	case BPF_ST | BPF_MEM | BPF_B:
866 	case BPF_ST | BPF_MEM | BPF_H:
867 	case BPF_ST | BPF_MEM | BPF_W:
868 	case BPF_ST | BPF_MEM | BPF_DW:
869 		switch (BPF_SIZE(code)) {
870 		case BPF_B:
871 			move_imm(ctx, t1, imm, is32);
872 			if (is_signed_imm12(off)) {
873 				emit_insn(ctx, stb, t1, dst, off);
874 			} else {
875 				move_imm(ctx, t2, off, is32);
876 				emit_insn(ctx, stxb, t1, dst, t2);
877 			}
878 			break;
879 		case BPF_H:
880 			move_imm(ctx, t1, imm, is32);
881 			if (is_signed_imm12(off)) {
882 				emit_insn(ctx, sth, t1, dst, off);
883 			} else {
884 				move_imm(ctx, t2, off, is32);
885 				emit_insn(ctx, stxh, t1, dst, t2);
886 			}
887 			break;
888 		case BPF_W:
889 			move_imm(ctx, t1, imm, is32);
890 			if (is_signed_imm12(off)) {
891 				emit_insn(ctx, stw, t1, dst, off);
892 			} else if (is_signed_imm14(off)) {
893 				emit_insn(ctx, stptrw, t1, dst, off);
894 			} else {
895 				move_imm(ctx, t2, off, is32);
896 				emit_insn(ctx, stxw, t1, dst, t2);
897 			}
898 			break;
899 		case BPF_DW:
900 			move_imm(ctx, t1, imm, is32);
901 			if (is_signed_imm12(off)) {
902 				emit_insn(ctx, std, t1, dst, off);
903 			} else if (is_signed_imm14(off)) {
904 				emit_insn(ctx, stptrd, t1, dst, off);
905 			} else {
906 				move_imm(ctx, t2, off, is32);
907 				emit_insn(ctx, stxd, t1, dst, t2);
908 			}
909 			break;
910 		}
911 		break;
912 
913 	/* *(size *)(dst + off) = src */
914 	case BPF_STX | BPF_MEM | BPF_B:
915 	case BPF_STX | BPF_MEM | BPF_H:
916 	case BPF_STX | BPF_MEM | BPF_W:
917 	case BPF_STX | BPF_MEM | BPF_DW:
918 		switch (BPF_SIZE(code)) {
919 		case BPF_B:
920 			if (is_signed_imm12(off)) {
921 				emit_insn(ctx, stb, src, dst, off);
922 			} else {
923 				move_imm(ctx, t1, off, is32);
924 				emit_insn(ctx, stxb, src, dst, t1);
925 			}
926 			break;
927 		case BPF_H:
928 			if (is_signed_imm12(off)) {
929 				emit_insn(ctx, sth, src, dst, off);
930 			} else {
931 				move_imm(ctx, t1, off, is32);
932 				emit_insn(ctx, stxh, src, dst, t1);
933 			}
934 			break;
935 		case BPF_W:
936 			if (is_signed_imm12(off)) {
937 				emit_insn(ctx, stw, src, dst, off);
938 			} else if (is_signed_imm14(off)) {
939 				emit_insn(ctx, stptrw, src, dst, off);
940 			} else {
941 				move_imm(ctx, t1, off, is32);
942 				emit_insn(ctx, stxw, src, dst, t1);
943 			}
944 			break;
945 		case BPF_DW:
946 			if (is_signed_imm12(off)) {
947 				emit_insn(ctx, std, src, dst, off);
948 			} else if (is_signed_imm14(off)) {
949 				emit_insn(ctx, stptrd, src, dst, off);
950 			} else {
951 				move_imm(ctx, t1, off, is32);
952 				emit_insn(ctx, stxd, src, dst, t1);
953 			}
954 			break;
955 		}
956 		break;
957 
958 	case BPF_STX | BPF_ATOMIC | BPF_W:
959 	case BPF_STX | BPF_ATOMIC | BPF_DW:
960 		emit_atomic(insn, ctx);
961 		break;
962 
963 	default:
964 		pr_err("bpf_jit: unknown opcode %02x\n", code);
965 		return -EINVAL;
966 	}
967 
968 	return 0;
969 
970 toofar:
971 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
972 	return -E2BIG;
973 }
974 
975 static int build_body(struct jit_ctx *ctx, bool extra_pass)
976 {
977 	int i;
978 	const struct bpf_prog *prog = ctx->prog;
979 
980 	for (i = 0; i < prog->len; i++) {
981 		const struct bpf_insn *insn = &prog->insnsi[i];
982 		int ret;
983 
984 		if (ctx->image == NULL)
985 			ctx->offset[i] = ctx->idx;
986 
987 		ret = build_insn(insn, ctx, extra_pass);
988 		if (ret > 0) {
989 			i++;
990 			if (ctx->image == NULL)
991 				ctx->offset[i] = ctx->idx;
992 			continue;
993 		}
994 		if (ret)
995 			return ret;
996 	}
997 
998 	if (ctx->image == NULL)
999 		ctx->offset[i] = ctx->idx;
1000 
1001 	return 0;
1002 }
1003 
1004 /* Fill space with break instructions */
1005 static void jit_fill_hole(void *area, unsigned int size)
1006 {
1007 	u32 *ptr;
1008 
1009 	/* We are guaranteed to have aligned memory */
1010 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1011 		*ptr++ = INSN_BREAK;
1012 }
1013 
1014 static int validate_code(struct jit_ctx *ctx)
1015 {
1016 	int i;
1017 	union loongarch_instruction insn;
1018 
1019 	for (i = 0; i < ctx->idx; i++) {
1020 		insn = ctx->image[i];
1021 		/* Check INSN_BREAK */
1022 		if (insn.word == INSN_BREAK)
1023 			return -1;
1024 	}
1025 
1026 	return 0;
1027 }
1028 
1029 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1030 {
1031 	bool tmp_blinded = false, extra_pass = false;
1032 	u8 *image_ptr;
1033 	int image_size;
1034 	struct jit_ctx ctx;
1035 	struct jit_data *jit_data;
1036 	struct bpf_binary_header *header;
1037 	struct bpf_prog *tmp, *orig_prog = prog;
1038 
1039 	/*
1040 	 * If BPF JIT was not enabled then we must fall back to
1041 	 * the interpreter.
1042 	 */
1043 	if (!prog->jit_requested)
1044 		return orig_prog;
1045 
1046 	tmp = bpf_jit_blind_constants(prog);
1047 	/*
1048 	 * If blinding was requested and we failed during blinding,
1049 	 * we must fall back to the interpreter. Otherwise, we save
1050 	 * the new JITed code.
1051 	 */
1052 	if (IS_ERR(tmp))
1053 		return orig_prog;
1054 
1055 	if (tmp != prog) {
1056 		tmp_blinded = true;
1057 		prog = tmp;
1058 	}
1059 
1060 	jit_data = prog->aux->jit_data;
1061 	if (!jit_data) {
1062 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1063 		if (!jit_data) {
1064 			prog = orig_prog;
1065 			goto out;
1066 		}
1067 		prog->aux->jit_data = jit_data;
1068 	}
1069 	if (jit_data->ctx.offset) {
1070 		ctx = jit_data->ctx;
1071 		image_ptr = jit_data->image;
1072 		header = jit_data->header;
1073 		extra_pass = true;
1074 		image_size = sizeof(u32) * ctx.idx;
1075 		goto skip_init_ctx;
1076 	}
1077 
1078 	memset(&ctx, 0, sizeof(ctx));
1079 	ctx.prog = prog;
1080 
1081 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1082 	if (ctx.offset == NULL) {
1083 		prog = orig_prog;
1084 		goto out_offset;
1085 	}
1086 
1087 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1088 	build_prologue(&ctx);
1089 	if (build_body(&ctx, extra_pass)) {
1090 		prog = orig_prog;
1091 		goto out_offset;
1092 	}
1093 	ctx.epilogue_offset = ctx.idx;
1094 	build_epilogue(&ctx);
1095 
1096 	/* Now we know the actual image size.
1097 	 * As each LoongArch instruction is of length 32bit,
1098 	 * we are translating number of JITed intructions into
1099 	 * the size required to store these JITed code.
1100 	 */
1101 	image_size = sizeof(u32) * ctx.idx;
1102 	/* Now we know the size of the structure to make */
1103 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1104 				      sizeof(u32), jit_fill_hole);
1105 	if (header == NULL) {
1106 		prog = orig_prog;
1107 		goto out_offset;
1108 	}
1109 
1110 	/* 2. Now, the actual pass to generate final JIT code */
1111 	ctx.image = (union loongarch_instruction *)image_ptr;
1112 
1113 skip_init_ctx:
1114 	ctx.idx = 0;
1115 
1116 	build_prologue(&ctx);
1117 	if (build_body(&ctx, extra_pass)) {
1118 		bpf_jit_binary_free(header);
1119 		prog = orig_prog;
1120 		goto out_offset;
1121 	}
1122 	build_epilogue(&ctx);
1123 
1124 	/* 3. Extra pass to validate JITed code */
1125 	if (validate_code(&ctx)) {
1126 		bpf_jit_binary_free(header);
1127 		prog = orig_prog;
1128 		goto out_offset;
1129 	}
1130 
1131 	/* And we're done */
1132 	if (bpf_jit_enable > 1)
1133 		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
1134 
1135 	/* Update the icache */
1136 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1137 
1138 	if (!prog->is_func || extra_pass) {
1139 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1140 			pr_err_once("multi-func JIT bug %d != %d\n",
1141 				    ctx.idx, jit_data->ctx.idx);
1142 			bpf_jit_binary_free(header);
1143 			prog->bpf_func = NULL;
1144 			prog->jited = 0;
1145 			prog->jited_len = 0;
1146 			goto out_offset;
1147 		}
1148 		bpf_jit_binary_lock_ro(header);
1149 	} else {
1150 		jit_data->ctx = ctx;
1151 		jit_data->image = image_ptr;
1152 		jit_data->header = header;
1153 	}
1154 	prog->jited = 1;
1155 	prog->jited_len = image_size;
1156 	prog->bpf_func = (void *)ctx.image;
1157 
1158 	if (!prog->is_func || extra_pass) {
1159 		int i;
1160 
1161 		/* offset[prog->len] is the size of program */
1162 		for (i = 0; i <= prog->len; i++)
1163 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1164 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1165 
1166 out_offset:
1167 		kvfree(ctx.offset);
1168 		kfree(jit_data);
1169 		prog->aux->jit_data = NULL;
1170 	}
1171 
1172 out:
1173 	if (tmp_blinded)
1174 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1175 
1176 	out_offset = -1;
1177 
1178 	return prog;
1179 }
1180