xref: /openbmc/linux/arch/loongarch/net/bpf_jit.c (revision 7f8256ae)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include "bpf_jit.h"
8 
9 #define REG_TCC		LOONGARCH_GPR_A6
10 #define TCC_SAVED	LOONGARCH_GPR_S5
11 
12 #define SAVE_RA		BIT(0)
13 #define SAVE_TCC	BIT(1)
14 
15 static const int regmap[] = {
16 	/* return value from in-kernel function, and exit value for eBPF program */
17 	[BPF_REG_0] = LOONGARCH_GPR_A5,
18 	/* arguments from eBPF program to in-kernel function */
19 	[BPF_REG_1] = LOONGARCH_GPR_A0,
20 	[BPF_REG_2] = LOONGARCH_GPR_A1,
21 	[BPF_REG_3] = LOONGARCH_GPR_A2,
22 	[BPF_REG_4] = LOONGARCH_GPR_A3,
23 	[BPF_REG_5] = LOONGARCH_GPR_A4,
24 	/* callee saved registers that in-kernel function will preserve */
25 	[BPF_REG_6] = LOONGARCH_GPR_S0,
26 	[BPF_REG_7] = LOONGARCH_GPR_S1,
27 	[BPF_REG_8] = LOONGARCH_GPR_S2,
28 	[BPF_REG_9] = LOONGARCH_GPR_S3,
29 	/* read-only frame pointer to access stack */
30 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31 	/* temporary register for blinding constants */
32 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33 };
34 
35 static void mark_call(struct jit_ctx *ctx)
36 {
37 	ctx->flags |= SAVE_RA;
38 }
39 
40 static void mark_tail_call(struct jit_ctx *ctx)
41 {
42 	ctx->flags |= SAVE_TCC;
43 }
44 
45 static bool seen_call(struct jit_ctx *ctx)
46 {
47 	return (ctx->flags & SAVE_RA);
48 }
49 
50 static bool seen_tail_call(struct jit_ctx *ctx)
51 {
52 	return (ctx->flags & SAVE_TCC);
53 }
54 
55 static u8 tail_call_reg(struct jit_ctx *ctx)
56 {
57 	if (seen_call(ctx))
58 		return TCC_SAVED;
59 
60 	return REG_TCC;
61 }
62 
63 /*
64  * eBPF prog stack layout:
65  *
66  *                                        high
67  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68  *                            |           $ra           |
69  *                            +-------------------------+
70  *                            |           $fp           |
71  *                            +-------------------------+
72  *                            |           $s0           |
73  *                            +-------------------------+
74  *                            |           $s1           |
75  *                            +-------------------------+
76  *                            |           $s2           |
77  *                            +-------------------------+
78  *                            |           $s3           |
79  *                            +-------------------------+
80  *                            |           $s4           |
81  *                            +-------------------------+
82  *                            |           $s5           |
83  *                            +-------------------------+ <--BPF_REG_FP
84  *                            |  prog->aux->stack_depth |
85  *                            |        (optional)       |
86  * current $sp -------------> +-------------------------+
87  *                                        low
88  */
89 static void build_prologue(struct jit_ctx *ctx)
90 {
91 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92 
93 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94 
95 	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96 	stack_adjust += sizeof(long) * 8;
97 
98 	stack_adjust = round_up(stack_adjust, 16);
99 	stack_adjust += bpf_stack_adjust;
100 
101 	/*
102 	 * First instruction initializes the tail call count (TCC).
103 	 * On tail call we skip this instruction, and the TCC is
104 	 * passed in REG_TCC from the caller.
105 	 */
106 	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107 
108 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109 
110 	store_offset = stack_adjust - sizeof(long);
111 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112 
113 	store_offset -= sizeof(long);
114 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115 
116 	store_offset -= sizeof(long);
117 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118 
119 	store_offset -= sizeof(long);
120 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121 
122 	store_offset -= sizeof(long);
123 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124 
125 	store_offset -= sizeof(long);
126 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127 
128 	store_offset -= sizeof(long);
129 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130 
131 	store_offset -= sizeof(long);
132 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133 
134 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135 
136 	if (bpf_stack_adjust)
137 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138 
139 	/*
140 	 * Program contains calls and tail calls, so REG_TCC need
141 	 * to be saved across calls.
142 	 */
143 	if (seen_tail_call(ctx) && seen_call(ctx))
144 		move_reg(ctx, TCC_SAVED, REG_TCC);
145 
146 	ctx->stack_size = stack_adjust;
147 }
148 
149 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
150 {
151 	int stack_adjust = ctx->stack_size;
152 	int load_offset;
153 
154 	load_offset = stack_adjust - sizeof(long);
155 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
156 
157 	load_offset -= sizeof(long);
158 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
159 
160 	load_offset -= sizeof(long);
161 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
162 
163 	load_offset -= sizeof(long);
164 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
165 
166 	load_offset -= sizeof(long);
167 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
168 
169 	load_offset -= sizeof(long);
170 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
171 
172 	load_offset -= sizeof(long);
173 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
174 
175 	load_offset -= sizeof(long);
176 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
177 
178 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
179 
180 	if (!is_tail_call) {
181 		/* Set return value */
182 		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183 		/* Return to the caller */
184 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
185 	} else {
186 		/*
187 		 * Call the next bpf prog and skip the first instruction
188 		 * of TCC initialization.
189 		 */
190 		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
191 	}
192 }
193 
194 static void build_epilogue(struct jit_ctx *ctx)
195 {
196 	__build_epilogue(ctx, false);
197 }
198 
199 bool bpf_jit_supports_kfunc_call(void)
200 {
201 	return true;
202 }
203 
204 /* initialized on the first pass of build_body() */
205 static int out_offset = -1;
206 static int emit_bpf_tail_call(struct jit_ctx *ctx)
207 {
208 	int off;
209 	u8 tcc = tail_call_reg(ctx);
210 	u8 a1 = LOONGARCH_GPR_A1;
211 	u8 a2 = LOONGARCH_GPR_A2;
212 	u8 t1 = LOONGARCH_GPR_T1;
213 	u8 t2 = LOONGARCH_GPR_T2;
214 	u8 t3 = LOONGARCH_GPR_T3;
215 	const int idx0 = ctx->idx;
216 
217 #define cur_offset (ctx->idx - idx0)
218 #define jmp_offset (out_offset - (cur_offset))
219 
220 	/*
221 	 * a0: &ctx
222 	 * a1: &array
223 	 * a2: index
224 	 *
225 	 * if (index >= array->map.max_entries)
226 	 *	 goto out;
227 	 */
228 	off = offsetof(struct bpf_array, map.max_entries);
229 	emit_insn(ctx, ldwu, t1, a1, off);
230 	/* bgeu $a2, $t1, jmp_offset */
231 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
232 		goto toofar;
233 
234 	/*
235 	 * if (--TCC < 0)
236 	 *	 goto out;
237 	 */
238 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
239 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
240 		goto toofar;
241 
242 	/*
243 	 * prog = array->ptrs[index];
244 	 * if (!prog)
245 	 *	 goto out;
246 	 */
247 	emit_insn(ctx, alsld, t2, a2, a1, 2);
248 	off = offsetof(struct bpf_array, ptrs);
249 	emit_insn(ctx, ldd, t2, t2, off);
250 	/* beq $t2, $zero, jmp_offset */
251 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
252 		goto toofar;
253 
254 	/* goto *(prog->bpf_func + 4); */
255 	off = offsetof(struct bpf_prog, bpf_func);
256 	emit_insn(ctx, ldd, t3, t2, off);
257 	__build_epilogue(ctx, true);
258 
259 	/* out: */
260 	if (out_offset == -1)
261 		out_offset = cur_offset;
262 	if (cur_offset != out_offset) {
263 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
264 			    cur_offset, out_offset);
265 		return -1;
266 	}
267 
268 	return 0;
269 
270 toofar:
271 	pr_info_once("tail_call: jump too far\n");
272 	return -1;
273 #undef cur_offset
274 #undef jmp_offset
275 }
276 
277 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
278 {
279 	const u8 t1 = LOONGARCH_GPR_T1;
280 	const u8 t2 = LOONGARCH_GPR_T2;
281 	const u8 t3 = LOONGARCH_GPR_T3;
282 	const u8 r0 = regmap[BPF_REG_0];
283 	const u8 src = regmap[insn->src_reg];
284 	const u8 dst = regmap[insn->dst_reg];
285 	const s16 off = insn->off;
286 	const s32 imm = insn->imm;
287 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
288 
289 	move_imm(ctx, t1, off, false);
290 	emit_insn(ctx, addd, t1, dst, t1);
291 	move_reg(ctx, t3, src);
292 
293 	switch (imm) {
294 	/* lock *(size *)(dst + off) <op>= src */
295 	case BPF_ADD:
296 		if (isdw)
297 			emit_insn(ctx, amaddd, t2, t1, src);
298 		else
299 			emit_insn(ctx, amaddw, t2, t1, src);
300 		break;
301 	case BPF_AND:
302 		if (isdw)
303 			emit_insn(ctx, amandd, t2, t1, src);
304 		else
305 			emit_insn(ctx, amandw, t2, t1, src);
306 		break;
307 	case BPF_OR:
308 		if (isdw)
309 			emit_insn(ctx, amord, t2, t1, src);
310 		else
311 			emit_insn(ctx, amorw, t2, t1, src);
312 		break;
313 	case BPF_XOR:
314 		if (isdw)
315 			emit_insn(ctx, amxord, t2, t1, src);
316 		else
317 			emit_insn(ctx, amxorw, t2, t1, src);
318 		break;
319 	/* src = atomic_fetch_<op>(dst + off, src) */
320 	case BPF_ADD | BPF_FETCH:
321 		if (isdw) {
322 			emit_insn(ctx, amaddd, src, t1, t3);
323 		} else {
324 			emit_insn(ctx, amaddw, src, t1, t3);
325 			emit_zext_32(ctx, src, true);
326 		}
327 		break;
328 	case BPF_AND | BPF_FETCH:
329 		if (isdw) {
330 			emit_insn(ctx, amandd, src, t1, t3);
331 		} else {
332 			emit_insn(ctx, amandw, src, t1, t3);
333 			emit_zext_32(ctx, src, true);
334 		}
335 		break;
336 	case BPF_OR | BPF_FETCH:
337 		if (isdw) {
338 			emit_insn(ctx, amord, src, t1, t3);
339 		} else {
340 			emit_insn(ctx, amorw, src, t1, t3);
341 			emit_zext_32(ctx, src, true);
342 		}
343 		break;
344 	case BPF_XOR | BPF_FETCH:
345 		if (isdw) {
346 			emit_insn(ctx, amxord, src, t1, t3);
347 		} else {
348 			emit_insn(ctx, amxorw, src, t1, t3);
349 			emit_zext_32(ctx, src, true);
350 		}
351 		break;
352 	/* src = atomic_xchg(dst + off, src); */
353 	case BPF_XCHG:
354 		if (isdw) {
355 			emit_insn(ctx, amswapd, src, t1, t3);
356 		} else {
357 			emit_insn(ctx, amswapw, src, t1, t3);
358 			emit_zext_32(ctx, src, true);
359 		}
360 		break;
361 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
362 	case BPF_CMPXCHG:
363 		move_reg(ctx, t2, r0);
364 		if (isdw) {
365 			emit_insn(ctx, lld, r0, t1, 0);
366 			emit_insn(ctx, bne, t2, r0, 4);
367 			move_reg(ctx, t3, src);
368 			emit_insn(ctx, scd, t3, t1, 0);
369 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
370 		} else {
371 			emit_insn(ctx, llw, r0, t1, 0);
372 			emit_zext_32(ctx, t2, true);
373 			emit_zext_32(ctx, r0, true);
374 			emit_insn(ctx, bne, t2, r0, 4);
375 			move_reg(ctx, t3, src);
376 			emit_insn(ctx, scw, t3, t1, 0);
377 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
378 			emit_zext_32(ctx, r0, true);
379 		}
380 		break;
381 	}
382 }
383 
384 static bool is_signed_bpf_cond(u8 cond)
385 {
386 	return cond == BPF_JSGT || cond == BPF_JSLT ||
387 	       cond == BPF_JSGE || cond == BPF_JSLE;
388 }
389 
390 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
391 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
392 
393 bool ex_handler_bpf(const struct exception_table_entry *ex,
394 		    struct pt_regs *regs)
395 {
396 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
397 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
398 
399 	regs->regs[dst_reg] = 0;
400 	regs->csr_era = (unsigned long)&ex->fixup - offset;
401 
402 	return true;
403 }
404 
405 /* For accesses to BTF pointers, add an entry to the exception table */
406 static int add_exception_handler(const struct bpf_insn *insn,
407 				 struct jit_ctx *ctx,
408 				 int dst_reg)
409 {
410 	unsigned long pc;
411 	off_t offset;
412 	struct exception_table_entry *ex;
413 
414 	if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
415 		return 0;
416 
417 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
418 		return -EINVAL;
419 
420 	ex = &ctx->prog->aux->extable[ctx->num_exentries];
421 	pc = (unsigned long)&ctx->image[ctx->idx - 1];
422 
423 	offset = pc - (long)&ex->insn;
424 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
425 		return -ERANGE;
426 
427 	ex->insn = offset;
428 
429 	/*
430 	 * Since the extable follows the program, the fixup offset is always
431 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
432 	 * to keep things simple, and put the destination register in the upper
433 	 * bits. We don't need to worry about buildtime or runtime sort
434 	 * modifying the upper bits because the table is already sorted, and
435 	 * isn't part of the main exception table.
436 	 */
437 	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
438 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
439 		return -ERANGE;
440 
441 	ex->type = EX_TYPE_BPF;
442 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
443 
444 	ctx->num_exentries++;
445 
446 	return 0;
447 }
448 
449 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
450 {
451 	u8 tm = -1;
452 	u64 func_addr;
453 	bool func_addr_fixed;
454 	int i = insn - ctx->prog->insnsi;
455 	int ret, jmp_offset;
456 	const u8 code = insn->code;
457 	const u8 cond = BPF_OP(code);
458 	const u8 t1 = LOONGARCH_GPR_T1;
459 	const u8 t2 = LOONGARCH_GPR_T2;
460 	const u8 src = regmap[insn->src_reg];
461 	const u8 dst = regmap[insn->dst_reg];
462 	const s16 off = insn->off;
463 	const s32 imm = insn->imm;
464 	const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
465 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
466 
467 	switch (code) {
468 	/* dst = src */
469 	case BPF_ALU | BPF_MOV | BPF_X:
470 	case BPF_ALU64 | BPF_MOV | BPF_X:
471 		move_reg(ctx, dst, src);
472 		emit_zext_32(ctx, dst, is32);
473 		break;
474 
475 	/* dst = imm */
476 	case BPF_ALU | BPF_MOV | BPF_K:
477 	case BPF_ALU64 | BPF_MOV | BPF_K:
478 		move_imm(ctx, dst, imm, is32);
479 		break;
480 
481 	/* dst = dst + src */
482 	case BPF_ALU | BPF_ADD | BPF_X:
483 	case BPF_ALU64 | BPF_ADD | BPF_X:
484 		emit_insn(ctx, addd, dst, dst, src);
485 		emit_zext_32(ctx, dst, is32);
486 		break;
487 
488 	/* dst = dst + imm */
489 	case BPF_ALU | BPF_ADD | BPF_K:
490 	case BPF_ALU64 | BPF_ADD | BPF_K:
491 		if (is_signed_imm12(imm)) {
492 			emit_insn(ctx, addid, dst, dst, imm);
493 		} else {
494 			move_imm(ctx, t1, imm, is32);
495 			emit_insn(ctx, addd, dst, dst, t1);
496 		}
497 		emit_zext_32(ctx, dst, is32);
498 		break;
499 
500 	/* dst = dst - src */
501 	case BPF_ALU | BPF_SUB | BPF_X:
502 	case BPF_ALU64 | BPF_SUB | BPF_X:
503 		emit_insn(ctx, subd, dst, dst, src);
504 		emit_zext_32(ctx, dst, is32);
505 		break;
506 
507 	/* dst = dst - imm */
508 	case BPF_ALU | BPF_SUB | BPF_K:
509 	case BPF_ALU64 | BPF_SUB | BPF_K:
510 		if (is_signed_imm12(-imm)) {
511 			emit_insn(ctx, addid, dst, dst, -imm);
512 		} else {
513 			move_imm(ctx, t1, imm, is32);
514 			emit_insn(ctx, subd, dst, dst, t1);
515 		}
516 		emit_zext_32(ctx, dst, is32);
517 		break;
518 
519 	/* dst = dst * src */
520 	case BPF_ALU | BPF_MUL | BPF_X:
521 	case BPF_ALU64 | BPF_MUL | BPF_X:
522 		emit_insn(ctx, muld, dst, dst, src);
523 		emit_zext_32(ctx, dst, is32);
524 		break;
525 
526 	/* dst = dst * imm */
527 	case BPF_ALU | BPF_MUL | BPF_K:
528 	case BPF_ALU64 | BPF_MUL | BPF_K:
529 		move_imm(ctx, t1, imm, is32);
530 		emit_insn(ctx, muld, dst, dst, t1);
531 		emit_zext_32(ctx, dst, is32);
532 		break;
533 
534 	/* dst = dst / src */
535 	case BPF_ALU | BPF_DIV | BPF_X:
536 	case BPF_ALU64 | BPF_DIV | BPF_X:
537 		emit_zext_32(ctx, dst, is32);
538 		move_reg(ctx, t1, src);
539 		emit_zext_32(ctx, t1, is32);
540 		emit_insn(ctx, divdu, dst, dst, t1);
541 		emit_zext_32(ctx, dst, is32);
542 		break;
543 
544 	/* dst = dst / imm */
545 	case BPF_ALU | BPF_DIV | BPF_K:
546 	case BPF_ALU64 | BPF_DIV | BPF_K:
547 		move_imm(ctx, t1, imm, is32);
548 		emit_zext_32(ctx, dst, is32);
549 		emit_insn(ctx, divdu, dst, dst, t1);
550 		emit_zext_32(ctx, dst, is32);
551 		break;
552 
553 	/* dst = dst % src */
554 	case BPF_ALU | BPF_MOD | BPF_X:
555 	case BPF_ALU64 | BPF_MOD | BPF_X:
556 		emit_zext_32(ctx, dst, is32);
557 		move_reg(ctx, t1, src);
558 		emit_zext_32(ctx, t1, is32);
559 		emit_insn(ctx, moddu, dst, dst, t1);
560 		emit_zext_32(ctx, dst, is32);
561 		break;
562 
563 	/* dst = dst % imm */
564 	case BPF_ALU | BPF_MOD | BPF_K:
565 	case BPF_ALU64 | BPF_MOD | BPF_K:
566 		move_imm(ctx, t1, imm, is32);
567 		emit_zext_32(ctx, dst, is32);
568 		emit_insn(ctx, moddu, dst, dst, t1);
569 		emit_zext_32(ctx, dst, is32);
570 		break;
571 
572 	/* dst = -dst */
573 	case BPF_ALU | BPF_NEG:
574 	case BPF_ALU64 | BPF_NEG:
575 		move_imm(ctx, t1, imm, is32);
576 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
577 		emit_zext_32(ctx, dst, is32);
578 		break;
579 
580 	/* dst = dst & src */
581 	case BPF_ALU | BPF_AND | BPF_X:
582 	case BPF_ALU64 | BPF_AND | BPF_X:
583 		emit_insn(ctx, and, dst, dst, src);
584 		emit_zext_32(ctx, dst, is32);
585 		break;
586 
587 	/* dst = dst & imm */
588 	case BPF_ALU | BPF_AND | BPF_K:
589 	case BPF_ALU64 | BPF_AND | BPF_K:
590 		if (is_unsigned_imm12(imm)) {
591 			emit_insn(ctx, andi, dst, dst, imm);
592 		} else {
593 			move_imm(ctx, t1, imm, is32);
594 			emit_insn(ctx, and, dst, dst, t1);
595 		}
596 		emit_zext_32(ctx, dst, is32);
597 		break;
598 
599 	/* dst = dst | src */
600 	case BPF_ALU | BPF_OR | BPF_X:
601 	case BPF_ALU64 | BPF_OR | BPF_X:
602 		emit_insn(ctx, or, dst, dst, src);
603 		emit_zext_32(ctx, dst, is32);
604 		break;
605 
606 	/* dst = dst | imm */
607 	case BPF_ALU | BPF_OR | BPF_K:
608 	case BPF_ALU64 | BPF_OR | BPF_K:
609 		if (is_unsigned_imm12(imm)) {
610 			emit_insn(ctx, ori, dst, dst, imm);
611 		} else {
612 			move_imm(ctx, t1, imm, is32);
613 			emit_insn(ctx, or, dst, dst, t1);
614 		}
615 		emit_zext_32(ctx, dst, is32);
616 		break;
617 
618 	/* dst = dst ^ src */
619 	case BPF_ALU | BPF_XOR | BPF_X:
620 	case BPF_ALU64 | BPF_XOR | BPF_X:
621 		emit_insn(ctx, xor, dst, dst, src);
622 		emit_zext_32(ctx, dst, is32);
623 		break;
624 
625 	/* dst = dst ^ imm */
626 	case BPF_ALU | BPF_XOR | BPF_K:
627 	case BPF_ALU64 | BPF_XOR | BPF_K:
628 		if (is_unsigned_imm12(imm)) {
629 			emit_insn(ctx, xori, dst, dst, imm);
630 		} else {
631 			move_imm(ctx, t1, imm, is32);
632 			emit_insn(ctx, xor, dst, dst, t1);
633 		}
634 		emit_zext_32(ctx, dst, is32);
635 		break;
636 
637 	/* dst = dst << src (logical) */
638 	case BPF_ALU | BPF_LSH | BPF_X:
639 		emit_insn(ctx, sllw, dst, dst, src);
640 		emit_zext_32(ctx, dst, is32);
641 		break;
642 
643 	case BPF_ALU64 | BPF_LSH | BPF_X:
644 		emit_insn(ctx, slld, dst, dst, src);
645 		break;
646 
647 	/* dst = dst << imm (logical) */
648 	case BPF_ALU | BPF_LSH | BPF_K:
649 		emit_insn(ctx, slliw, dst, dst, imm);
650 		emit_zext_32(ctx, dst, is32);
651 		break;
652 
653 	case BPF_ALU64 | BPF_LSH | BPF_K:
654 		emit_insn(ctx, sllid, dst, dst, imm);
655 		break;
656 
657 	/* dst = dst >> src (logical) */
658 	case BPF_ALU | BPF_RSH | BPF_X:
659 		emit_insn(ctx, srlw, dst, dst, src);
660 		emit_zext_32(ctx, dst, is32);
661 		break;
662 
663 	case BPF_ALU64 | BPF_RSH | BPF_X:
664 		emit_insn(ctx, srld, dst, dst, src);
665 		break;
666 
667 	/* dst = dst >> imm (logical) */
668 	case BPF_ALU | BPF_RSH | BPF_K:
669 		emit_insn(ctx, srliw, dst, dst, imm);
670 		emit_zext_32(ctx, dst, is32);
671 		break;
672 
673 	case BPF_ALU64 | BPF_RSH | BPF_K:
674 		emit_insn(ctx, srlid, dst, dst, imm);
675 		break;
676 
677 	/* dst = dst >> src (arithmetic) */
678 	case BPF_ALU | BPF_ARSH | BPF_X:
679 		emit_insn(ctx, sraw, dst, dst, src);
680 		emit_zext_32(ctx, dst, is32);
681 		break;
682 
683 	case BPF_ALU64 | BPF_ARSH | BPF_X:
684 		emit_insn(ctx, srad, dst, dst, src);
685 		break;
686 
687 	/* dst = dst >> imm (arithmetic) */
688 	case BPF_ALU | BPF_ARSH | BPF_K:
689 		emit_insn(ctx, sraiw, dst, dst, imm);
690 		emit_zext_32(ctx, dst, is32);
691 		break;
692 
693 	case BPF_ALU64 | BPF_ARSH | BPF_K:
694 		emit_insn(ctx, sraid, dst, dst, imm);
695 		break;
696 
697 	/* dst = BSWAP##imm(dst) */
698 	case BPF_ALU | BPF_END | BPF_FROM_LE:
699 		switch (imm) {
700 		case 16:
701 			/* zero-extend 16 bits into 64 bits */
702 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
703 			break;
704 		case 32:
705 			/* zero-extend 32 bits into 64 bits */
706 			emit_zext_32(ctx, dst, is32);
707 			break;
708 		case 64:
709 			/* do nothing */
710 			break;
711 		}
712 		break;
713 
714 	case BPF_ALU | BPF_END | BPF_FROM_BE:
715 		switch (imm) {
716 		case 16:
717 			emit_insn(ctx, revb2h, dst, dst);
718 			/* zero-extend 16 bits into 64 bits */
719 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
720 			break;
721 		case 32:
722 			emit_insn(ctx, revb2w, dst, dst);
723 			/* zero-extend 32 bits into 64 bits */
724 			emit_zext_32(ctx, dst, is32);
725 			break;
726 		case 64:
727 			emit_insn(ctx, revbd, dst, dst);
728 			break;
729 		}
730 		break;
731 
732 	/* PC += off if dst cond src */
733 	case BPF_JMP | BPF_JEQ | BPF_X:
734 	case BPF_JMP | BPF_JNE | BPF_X:
735 	case BPF_JMP | BPF_JGT | BPF_X:
736 	case BPF_JMP | BPF_JGE | BPF_X:
737 	case BPF_JMP | BPF_JLT | BPF_X:
738 	case BPF_JMP | BPF_JLE | BPF_X:
739 	case BPF_JMP | BPF_JSGT | BPF_X:
740 	case BPF_JMP | BPF_JSGE | BPF_X:
741 	case BPF_JMP | BPF_JSLT | BPF_X:
742 	case BPF_JMP | BPF_JSLE | BPF_X:
743 	case BPF_JMP32 | BPF_JEQ | BPF_X:
744 	case BPF_JMP32 | BPF_JNE | BPF_X:
745 	case BPF_JMP32 | BPF_JGT | BPF_X:
746 	case BPF_JMP32 | BPF_JGE | BPF_X:
747 	case BPF_JMP32 | BPF_JLT | BPF_X:
748 	case BPF_JMP32 | BPF_JLE | BPF_X:
749 	case BPF_JMP32 | BPF_JSGT | BPF_X:
750 	case BPF_JMP32 | BPF_JSGE | BPF_X:
751 	case BPF_JMP32 | BPF_JSLT | BPF_X:
752 	case BPF_JMP32 | BPF_JSLE | BPF_X:
753 		jmp_offset = bpf2la_offset(i, off, ctx);
754 		move_reg(ctx, t1, dst);
755 		move_reg(ctx, t2, src);
756 		if (is_signed_bpf_cond(BPF_OP(code))) {
757 			emit_sext_32(ctx, t1, is32);
758 			emit_sext_32(ctx, t2, is32);
759 		} else {
760 			emit_zext_32(ctx, t1, is32);
761 			emit_zext_32(ctx, t2, is32);
762 		}
763 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
764 			goto toofar;
765 		break;
766 
767 	/* PC += off if dst cond imm */
768 	case BPF_JMP | BPF_JEQ | BPF_K:
769 	case BPF_JMP | BPF_JNE | BPF_K:
770 	case BPF_JMP | BPF_JGT | BPF_K:
771 	case BPF_JMP | BPF_JGE | BPF_K:
772 	case BPF_JMP | BPF_JLT | BPF_K:
773 	case BPF_JMP | BPF_JLE | BPF_K:
774 	case BPF_JMP | BPF_JSGT | BPF_K:
775 	case BPF_JMP | BPF_JSGE | BPF_K:
776 	case BPF_JMP | BPF_JSLT | BPF_K:
777 	case BPF_JMP | BPF_JSLE | BPF_K:
778 	case BPF_JMP32 | BPF_JEQ | BPF_K:
779 	case BPF_JMP32 | BPF_JNE | BPF_K:
780 	case BPF_JMP32 | BPF_JGT | BPF_K:
781 	case BPF_JMP32 | BPF_JGE | BPF_K:
782 	case BPF_JMP32 | BPF_JLT | BPF_K:
783 	case BPF_JMP32 | BPF_JLE | BPF_K:
784 	case BPF_JMP32 | BPF_JSGT | BPF_K:
785 	case BPF_JMP32 | BPF_JSGE | BPF_K:
786 	case BPF_JMP32 | BPF_JSLT | BPF_K:
787 	case BPF_JMP32 | BPF_JSLE | BPF_K:
788 		jmp_offset = bpf2la_offset(i, off, ctx);
789 		if (imm) {
790 			move_imm(ctx, t1, imm, false);
791 			tm = t1;
792 		} else {
793 			/* If imm is 0, simply use zero register. */
794 			tm = LOONGARCH_GPR_ZERO;
795 		}
796 		move_reg(ctx, t2, dst);
797 		if (is_signed_bpf_cond(BPF_OP(code))) {
798 			emit_sext_32(ctx, tm, is32);
799 			emit_sext_32(ctx, t2, is32);
800 		} else {
801 			emit_zext_32(ctx, tm, is32);
802 			emit_zext_32(ctx, t2, is32);
803 		}
804 		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
805 			goto toofar;
806 		break;
807 
808 	/* PC += off if dst & src */
809 	case BPF_JMP | BPF_JSET | BPF_X:
810 	case BPF_JMP32 | BPF_JSET | BPF_X:
811 		jmp_offset = bpf2la_offset(i, off, ctx);
812 		emit_insn(ctx, and, t1, dst, src);
813 		emit_zext_32(ctx, t1, is32);
814 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
815 			goto toofar;
816 		break;
817 
818 	/* PC += off if dst & imm */
819 	case BPF_JMP | BPF_JSET | BPF_K:
820 	case BPF_JMP32 | BPF_JSET | BPF_K:
821 		jmp_offset = bpf2la_offset(i, off, ctx);
822 		move_imm(ctx, t1, imm, is32);
823 		emit_insn(ctx, and, t1, dst, t1);
824 		emit_zext_32(ctx, t1, is32);
825 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
826 			goto toofar;
827 		break;
828 
829 	/* PC += off */
830 	case BPF_JMP | BPF_JA:
831 		jmp_offset = bpf2la_offset(i, off, ctx);
832 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
833 			goto toofar;
834 		break;
835 
836 	/* function call */
837 	case BPF_JMP | BPF_CALL:
838 		mark_call(ctx);
839 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
840 					    &func_addr, &func_addr_fixed);
841 		if (ret < 0)
842 			return ret;
843 
844 		move_addr(ctx, t1, func_addr);
845 		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
846 		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
847 		break;
848 
849 	/* tail call */
850 	case BPF_JMP | BPF_TAIL_CALL:
851 		mark_tail_call(ctx);
852 		if (emit_bpf_tail_call(ctx) < 0)
853 			return -EINVAL;
854 		break;
855 
856 	/* function return */
857 	case BPF_JMP | BPF_EXIT:
858 		emit_sext_32(ctx, regmap[BPF_REG_0], true);
859 
860 		if (i == ctx->prog->len - 1)
861 			break;
862 
863 		jmp_offset = epilogue_offset(ctx);
864 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
865 			goto toofar;
866 		break;
867 
868 	/* dst = imm64 */
869 	case BPF_LD | BPF_IMM | BPF_DW:
870 		move_imm(ctx, dst, imm64, is32);
871 		return 1;
872 
873 	/* dst = *(size *)(src + off) */
874 	case BPF_LDX | BPF_MEM | BPF_B:
875 	case BPF_LDX | BPF_MEM | BPF_H:
876 	case BPF_LDX | BPF_MEM | BPF_W:
877 	case BPF_LDX | BPF_MEM | BPF_DW:
878 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
879 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
880 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
881 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
882 		switch (BPF_SIZE(code)) {
883 		case BPF_B:
884 			if (is_signed_imm12(off)) {
885 				emit_insn(ctx, ldbu, dst, src, off);
886 			} else {
887 				move_imm(ctx, t1, off, is32);
888 				emit_insn(ctx, ldxbu, dst, src, t1);
889 			}
890 			break;
891 		case BPF_H:
892 			if (is_signed_imm12(off)) {
893 				emit_insn(ctx, ldhu, dst, src, off);
894 			} else {
895 				move_imm(ctx, t1, off, is32);
896 				emit_insn(ctx, ldxhu, dst, src, t1);
897 			}
898 			break;
899 		case BPF_W:
900 			if (is_signed_imm12(off)) {
901 				emit_insn(ctx, ldwu, dst, src, off);
902 			} else if (is_signed_imm14(off)) {
903 				emit_insn(ctx, ldptrw, dst, src, off);
904 			} else {
905 				move_imm(ctx, t1, off, is32);
906 				emit_insn(ctx, ldxwu, dst, src, t1);
907 			}
908 			break;
909 		case BPF_DW:
910 			if (is_signed_imm12(off)) {
911 				emit_insn(ctx, ldd, dst, src, off);
912 			} else if (is_signed_imm14(off)) {
913 				emit_insn(ctx, ldptrd, dst, src, off);
914 			} else {
915 				move_imm(ctx, t1, off, is32);
916 				emit_insn(ctx, ldxd, dst, src, t1);
917 			}
918 			break;
919 		}
920 
921 		ret = add_exception_handler(insn, ctx, dst);
922 		if (ret)
923 			return ret;
924 		break;
925 
926 	/* *(size *)(dst + off) = imm */
927 	case BPF_ST | BPF_MEM | BPF_B:
928 	case BPF_ST | BPF_MEM | BPF_H:
929 	case BPF_ST | BPF_MEM | BPF_W:
930 	case BPF_ST | BPF_MEM | BPF_DW:
931 		switch (BPF_SIZE(code)) {
932 		case BPF_B:
933 			move_imm(ctx, t1, imm, is32);
934 			if (is_signed_imm12(off)) {
935 				emit_insn(ctx, stb, t1, dst, off);
936 			} else {
937 				move_imm(ctx, t2, off, is32);
938 				emit_insn(ctx, stxb, t1, dst, t2);
939 			}
940 			break;
941 		case BPF_H:
942 			move_imm(ctx, t1, imm, is32);
943 			if (is_signed_imm12(off)) {
944 				emit_insn(ctx, sth, t1, dst, off);
945 			} else {
946 				move_imm(ctx, t2, off, is32);
947 				emit_insn(ctx, stxh, t1, dst, t2);
948 			}
949 			break;
950 		case BPF_W:
951 			move_imm(ctx, t1, imm, is32);
952 			if (is_signed_imm12(off)) {
953 				emit_insn(ctx, stw, t1, dst, off);
954 			} else if (is_signed_imm14(off)) {
955 				emit_insn(ctx, stptrw, t1, dst, off);
956 			} else {
957 				move_imm(ctx, t2, off, is32);
958 				emit_insn(ctx, stxw, t1, dst, t2);
959 			}
960 			break;
961 		case BPF_DW:
962 			move_imm(ctx, t1, imm, is32);
963 			if (is_signed_imm12(off)) {
964 				emit_insn(ctx, std, t1, dst, off);
965 			} else if (is_signed_imm14(off)) {
966 				emit_insn(ctx, stptrd, t1, dst, off);
967 			} else {
968 				move_imm(ctx, t2, off, is32);
969 				emit_insn(ctx, stxd, t1, dst, t2);
970 			}
971 			break;
972 		}
973 		break;
974 
975 	/* *(size *)(dst + off) = src */
976 	case BPF_STX | BPF_MEM | BPF_B:
977 	case BPF_STX | BPF_MEM | BPF_H:
978 	case BPF_STX | BPF_MEM | BPF_W:
979 	case BPF_STX | BPF_MEM | BPF_DW:
980 		switch (BPF_SIZE(code)) {
981 		case BPF_B:
982 			if (is_signed_imm12(off)) {
983 				emit_insn(ctx, stb, src, dst, off);
984 			} else {
985 				move_imm(ctx, t1, off, is32);
986 				emit_insn(ctx, stxb, src, dst, t1);
987 			}
988 			break;
989 		case BPF_H:
990 			if (is_signed_imm12(off)) {
991 				emit_insn(ctx, sth, src, dst, off);
992 			} else {
993 				move_imm(ctx, t1, off, is32);
994 				emit_insn(ctx, stxh, src, dst, t1);
995 			}
996 			break;
997 		case BPF_W:
998 			if (is_signed_imm12(off)) {
999 				emit_insn(ctx, stw, src, dst, off);
1000 			} else if (is_signed_imm14(off)) {
1001 				emit_insn(ctx, stptrw, src, dst, off);
1002 			} else {
1003 				move_imm(ctx, t1, off, is32);
1004 				emit_insn(ctx, stxw, src, dst, t1);
1005 			}
1006 			break;
1007 		case BPF_DW:
1008 			if (is_signed_imm12(off)) {
1009 				emit_insn(ctx, std, src, dst, off);
1010 			} else if (is_signed_imm14(off)) {
1011 				emit_insn(ctx, stptrd, src, dst, off);
1012 			} else {
1013 				move_imm(ctx, t1, off, is32);
1014 				emit_insn(ctx, stxd, src, dst, t1);
1015 			}
1016 			break;
1017 		}
1018 		break;
1019 
1020 	case BPF_STX | BPF_ATOMIC | BPF_W:
1021 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1022 		emit_atomic(insn, ctx);
1023 		break;
1024 
1025 	/* Speculation barrier */
1026 	case BPF_ST | BPF_NOSPEC:
1027 		break;
1028 
1029 	default:
1030 		pr_err("bpf_jit: unknown opcode %02x\n", code);
1031 		return -EINVAL;
1032 	}
1033 
1034 	return 0;
1035 
1036 toofar:
1037 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1038 	return -E2BIG;
1039 }
1040 
1041 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1042 {
1043 	int i;
1044 	const struct bpf_prog *prog = ctx->prog;
1045 
1046 	for (i = 0; i < prog->len; i++) {
1047 		const struct bpf_insn *insn = &prog->insnsi[i];
1048 		int ret;
1049 
1050 		if (ctx->image == NULL)
1051 			ctx->offset[i] = ctx->idx;
1052 
1053 		ret = build_insn(insn, ctx, extra_pass);
1054 		if (ret > 0) {
1055 			i++;
1056 			if (ctx->image == NULL)
1057 				ctx->offset[i] = ctx->idx;
1058 			continue;
1059 		}
1060 		if (ret)
1061 			return ret;
1062 	}
1063 
1064 	if (ctx->image == NULL)
1065 		ctx->offset[i] = ctx->idx;
1066 
1067 	return 0;
1068 }
1069 
1070 /* Fill space with break instructions */
1071 static void jit_fill_hole(void *area, unsigned int size)
1072 {
1073 	u32 *ptr;
1074 
1075 	/* We are guaranteed to have aligned memory */
1076 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1077 		*ptr++ = INSN_BREAK;
1078 }
1079 
1080 static int validate_code(struct jit_ctx *ctx)
1081 {
1082 	int i;
1083 	union loongarch_instruction insn;
1084 
1085 	for (i = 0; i < ctx->idx; i++) {
1086 		insn = ctx->image[i];
1087 		/* Check INSN_BREAK */
1088 		if (insn.word == INSN_BREAK)
1089 			return -1;
1090 	}
1091 
1092 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1093 		return -1;
1094 
1095 	return 0;
1096 }
1097 
1098 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1099 {
1100 	bool tmp_blinded = false, extra_pass = false;
1101 	u8 *image_ptr;
1102 	int image_size, prog_size, extable_size;
1103 	struct jit_ctx ctx;
1104 	struct jit_data *jit_data;
1105 	struct bpf_binary_header *header;
1106 	struct bpf_prog *tmp, *orig_prog = prog;
1107 
1108 	/*
1109 	 * If BPF JIT was not enabled then we must fall back to
1110 	 * the interpreter.
1111 	 */
1112 	if (!prog->jit_requested)
1113 		return orig_prog;
1114 
1115 	tmp = bpf_jit_blind_constants(prog);
1116 	/*
1117 	 * If blinding was requested and we failed during blinding,
1118 	 * we must fall back to the interpreter. Otherwise, we save
1119 	 * the new JITed code.
1120 	 */
1121 	if (IS_ERR(tmp))
1122 		return orig_prog;
1123 
1124 	if (tmp != prog) {
1125 		tmp_blinded = true;
1126 		prog = tmp;
1127 	}
1128 
1129 	jit_data = prog->aux->jit_data;
1130 	if (!jit_data) {
1131 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1132 		if (!jit_data) {
1133 			prog = orig_prog;
1134 			goto out;
1135 		}
1136 		prog->aux->jit_data = jit_data;
1137 	}
1138 	if (jit_data->ctx.offset) {
1139 		ctx = jit_data->ctx;
1140 		image_ptr = jit_data->image;
1141 		header = jit_data->header;
1142 		extra_pass = true;
1143 		prog_size = sizeof(u32) * ctx.idx;
1144 		goto skip_init_ctx;
1145 	}
1146 
1147 	memset(&ctx, 0, sizeof(ctx));
1148 	ctx.prog = prog;
1149 
1150 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1151 	if (ctx.offset == NULL) {
1152 		prog = orig_prog;
1153 		goto out_offset;
1154 	}
1155 
1156 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1157 	build_prologue(&ctx);
1158 	if (build_body(&ctx, extra_pass)) {
1159 		prog = orig_prog;
1160 		goto out_offset;
1161 	}
1162 	ctx.epilogue_offset = ctx.idx;
1163 	build_epilogue(&ctx);
1164 
1165 	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1166 
1167 	/* Now we know the actual image size.
1168 	 * As each LoongArch instruction is of length 32bit,
1169 	 * we are translating number of JITed intructions into
1170 	 * the size required to store these JITed code.
1171 	 */
1172 	prog_size = sizeof(u32) * ctx.idx;
1173 	image_size = prog_size + extable_size;
1174 	/* Now we know the size of the structure to make */
1175 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1176 				      sizeof(u32), jit_fill_hole);
1177 	if (header == NULL) {
1178 		prog = orig_prog;
1179 		goto out_offset;
1180 	}
1181 
1182 	/* 2. Now, the actual pass to generate final JIT code */
1183 	ctx.image = (union loongarch_instruction *)image_ptr;
1184 	if (extable_size)
1185 		prog->aux->extable = (void *)image_ptr + prog_size;
1186 
1187 skip_init_ctx:
1188 	ctx.idx = 0;
1189 	ctx.num_exentries = 0;
1190 
1191 	build_prologue(&ctx);
1192 	if (build_body(&ctx, extra_pass)) {
1193 		bpf_jit_binary_free(header);
1194 		prog = orig_prog;
1195 		goto out_offset;
1196 	}
1197 	build_epilogue(&ctx);
1198 
1199 	/* 3. Extra pass to validate JITed code */
1200 	if (validate_code(&ctx)) {
1201 		bpf_jit_binary_free(header);
1202 		prog = orig_prog;
1203 		goto out_offset;
1204 	}
1205 
1206 	/* And we're done */
1207 	if (bpf_jit_enable > 1)
1208 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1209 
1210 	/* Update the icache */
1211 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1212 
1213 	if (!prog->is_func || extra_pass) {
1214 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1215 			pr_err_once("multi-func JIT bug %d != %d\n",
1216 				    ctx.idx, jit_data->ctx.idx);
1217 			bpf_jit_binary_free(header);
1218 			prog->bpf_func = NULL;
1219 			prog->jited = 0;
1220 			prog->jited_len = 0;
1221 			goto out_offset;
1222 		}
1223 		bpf_jit_binary_lock_ro(header);
1224 	} else {
1225 		jit_data->ctx = ctx;
1226 		jit_data->image = image_ptr;
1227 		jit_data->header = header;
1228 	}
1229 	prog->jited = 1;
1230 	prog->jited_len = prog_size;
1231 	prog->bpf_func = (void *)ctx.image;
1232 
1233 	if (!prog->is_func || extra_pass) {
1234 		int i;
1235 
1236 		/* offset[prog->len] is the size of program */
1237 		for (i = 0; i <= prog->len; i++)
1238 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1239 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1240 
1241 out_offset:
1242 		kvfree(ctx.offset);
1243 		kfree(jit_data);
1244 		prog->aux->jit_data = NULL;
1245 	}
1246 
1247 out:
1248 	if (tmp_blinded)
1249 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1250 
1251 	out_offset = -1;
1252 
1253 	return prog;
1254 }
1255 
1256 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
1257 bool bpf_jit_supports_subprog_tailcalls(void)
1258 {
1259 	return true;
1260 }
1261