xref: /openbmc/linux/arch/parisc/net/bpf_jit_comp64.c (revision 7a836736b6537b0e2633381d743d9c1559ce243c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * BPF JIT compiler for PA-RISC (64-bit)
4  *
5  * Copyright(c) 2023 Helge Deller <deller@gmx.de>
6  *
7  * The code is based on the BPF JIT compiler for RV64 by Björn Töpel.
8  *
9  * TODO:
10  * - check if bpf_jit_needs_zext() is needed (currently enabled)
11  * - implement arch_prepare_bpf_trampoline(), poke(), ...
12  */
13 
14 #include <linux/bitfield.h>
15 #include <linux/bpf.h>
16 #include <linux/filter.h>
17 #include <linux/libgcc.h>
18 #include "bpf_jit.h"
19 
20 static const int regmap[] = {
21 	[BPF_REG_0] =	HPPA_REG_RET0,
22 	[BPF_REG_1] =	HPPA_R(5),
23 	[BPF_REG_2] =	HPPA_R(6),
24 	[BPF_REG_3] =	HPPA_R(7),
25 	[BPF_REG_4] =	HPPA_R(8),
26 	[BPF_REG_5] =	HPPA_R(9),
27 	[BPF_REG_6] =	HPPA_R(10),
28 	[BPF_REG_7] =	HPPA_R(11),
29 	[BPF_REG_8] =	HPPA_R(12),
30 	[BPF_REG_9] =	HPPA_R(13),
31 	[BPF_REG_FP] =	HPPA_R(14),
32 	[BPF_REG_AX] =	HPPA_R(15),
33 };
34 
35 /*
36  * Stack layout during BPF program execution (note: stack grows up):
37  *
38  *                     high
39  *   HPPA64 sp =>  +----------+ <= HPPA64 fp
40  *                 | saved sp |
41  *                 | saved rp |
42  *                 |   ...    | HPPA64 callee-saved registers
43  *                 | curr args|
44  *                 | local var|
45  *                 +----------+ <= (BPF FP)
46  *                 |          |
47  *                 |   ...    | BPF program stack
48  *                 |          |
49  *                 |   ...    | Function call stack
50  *                 |          |
51  *                 +----------+
52  *                     low
53  */
54 
55 /* Offset from fp for BPF registers stored on stack. */
56 #define STACK_ALIGN	FRAME_SIZE
57 
58 #define EXIT_PTR_LOAD(reg)	hppa64_ldd_im16(-FRAME_SIZE, HPPA_REG_SP, reg)
59 #define EXIT_PTR_STORE(reg)	hppa64_std_im16(reg, -FRAME_SIZE, HPPA_REG_SP)
60 #define EXIT_PTR_JUMP(reg, nop)	hppa_bv(HPPA_REG_ZERO, reg, nop)
61 
62 static u8 bpf_to_hppa_reg(int bpf_reg, struct hppa_jit_context *ctx)
63 {
64 	u8 reg = regmap[bpf_reg];
65 
66 	REG_SET_SEEN(ctx, reg);
67 	return reg;
68 };
69 
70 static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
71 {
72 	REG_SET_SEEN(ctx, rd);
73 	if (OPTIMIZE_HPPA && (rs == rd))
74 		return;
75 	REG_SET_SEEN(ctx, rs);
76 	emit(hppa_copy(rs, rd), ctx);
77 }
78 
79 static void emit_hppa64_depd(u8 src, u8 pos, u8 len, u8 target, bool no_zero, struct hppa_jit_context *ctx)
80 {
81 	int c;
82 
83 	pos &= (BITS_PER_LONG - 1);
84 	pos = 63 - pos;
85 	len = 64 - len;
86 	c =  (len < 32)  ? 0x4 : 0;
87 	c |= (pos >= 32) ? 0x2 : 0;
88 	c |= (no_zero)   ? 0x1 : 0;
89 	emit(hppa_t10_insn(0x3c, target, src, 0, c, pos & 0x1f, len & 0x1f), ctx);
90 }
91 
92 static void emit_hppa64_shld(u8 src, int num, u8 target, struct hppa_jit_context *ctx)
93 {
94 	emit_hppa64_depd(src, 63-num, 64-num, target, 0, ctx);
95 }
96 
97 static void emit_hppa64_extrd(u8 src, u8 pos, u8 len, u8 target, bool signed_op, struct hppa_jit_context *ctx)
98 {
99 	int c;
100 
101 	pos &= (BITS_PER_LONG - 1);
102 	len = 64 - len;
103 	c =  (len <  32) ? 0x4 : 0;
104 	c |= (pos >= 32) ? 0x2 : 0;
105 	c |= signed_op   ? 0x1 : 0;
106 	emit(hppa_t10_insn(0x36, src, target, 0, c, pos & 0x1f, len & 0x1f), ctx);
107 }
108 
109 static void emit_hppa64_extrw(u8 src, u8 pos, u8 len, u8 target, bool signed_op, struct hppa_jit_context *ctx)
110 {
111 	int c;
112 
113 	pos &= (32 - 1);
114 	len = 32 - len;
115 	c = 0x06 | (signed_op ? 1 : 0);
116 	emit(hppa_t10_insn(0x34, src, target, 0, c, pos, len), ctx);
117 }
118 
119 #define emit_hppa64_zext32(r, target, ctx) \
120 	emit_hppa64_extrd(r, 63, 32, target, false, ctx)
121 #define emit_hppa64_sext32(r, target, ctx) \
122 	emit_hppa64_extrd(r, 63, 32, target, true, ctx)
123 
124 static void emit_hppa64_shrd(u8 src, int num, u8 target, bool signed_op, struct hppa_jit_context *ctx)
125 {
126 	emit_hppa64_extrd(src, 63-num, 64-num, target, signed_op, ctx);
127 }
128 
129 static void emit_hppa64_shrw(u8 src, int num, u8 target, bool signed_op, struct hppa_jit_context *ctx)
130 {
131 	emit_hppa64_extrw(src, 31-num, 32-num, target, signed_op, ctx);
132 }
133 
134 /* Emit variable-length instructions for 32-bit imm */
135 static void emit_imm32(u8 rd, s32 imm, struct hppa_jit_context *ctx)
136 {
137 	u32 lower = im11(imm);
138 
139 	REG_SET_SEEN(ctx, rd);
140 	if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
141 		emit(hppa_ldi(imm, rd), ctx);
142 		return;
143 	}
144 	if (OPTIMIZE_HPPA && lower == imm) {
145 		emit(hppa_ldo(lower, HPPA_REG_ZERO, rd), ctx);
146 		return;
147 	}
148 	emit(hppa_ldil(imm, rd), ctx);
149 	if (OPTIMIZE_HPPA && (lower == 0))
150 		return;
151 	emit(hppa_ldo(lower, rd, rd), ctx);
152 }
153 
154 static bool is_32b_int(s64 val)
155 {
156 	return val == (s32) val;
157 }
158 
159 /* Emit variable-length instructions for 64-bit imm */
160 static void emit_imm(u8 rd, s64 imm, u8 tmpreg, struct hppa_jit_context *ctx)
161 {
162 	u32 upper32;
163 
164 	/* get lower 32-bits into rd, sign extended */
165 	emit_imm32(rd, imm, ctx);
166 
167 	/* do we have upper 32-bits too ? */
168 	if (OPTIMIZE_HPPA && is_32b_int(imm))
169 		return;
170 
171 	/* load upper 32-bits into lower tmpreg and deposit into rd */
172 	upper32 = imm >> 32;
173 	if (upper32 || !OPTIMIZE_HPPA) {
174 		emit_imm32(tmpreg, upper32, ctx);
175 		emit_hppa64_depd(tmpreg, 31, 32, rd, 1, ctx);
176 	} else
177 		emit_hppa64_depd(HPPA_REG_ZERO, 31, 32, rd, 1, ctx);
178 
179 }
180 
181 static int emit_jump(signed long paoff, bool force_far,
182 			       struct hppa_jit_context *ctx)
183 {
184 	unsigned long pc, addr;
185 
186 	/* Note: Use 2 instructions for jumps if force_far is set. */
187 	if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 22)) {
188 		/* use BL,long branch followed by nop() */
189 		emit(hppa64_bl_long(paoff - HPPA_BRANCH_DISPLACEMENT), ctx);
190 		if (force_far)
191 			emit(hppa_nop(), ctx);
192 		return 0;
193 	}
194 
195 	pc = (uintptr_t) &ctx->insns[ctx->ninsns];
196 	addr = pc + (paoff * HPPA_INSN_SIZE);
197 	/* even the 64-bit kernel runs in memory below 4GB */
198 	if (WARN_ON_ONCE(addr >> 32))
199 		return -E2BIG;
200 	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
201 	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
202 	return 0;
203 }
204 
205 static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
206 {
207 	int i;
208 
209 	if (is_tail_call) {
210 		/*
211 		 * goto *(t0 + 4);
212 		 * Skips first instruction of prologue which initializes tail
213 		 * call counter. Assumes t0 contains address of target program,
214 		 * see emit_bpf_tail_call.
215 		 */
216 		emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
217 		emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
218 		/* in delay slot: */
219 		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
220 
221 		return;
222 	}
223 
224 	/* load epilogue function pointer and jump to it. */
225 	/* exit point is either at next instruction, or the outest TCC exit function */
226 	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
227 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
228 
229 	/* NOTE: we are 64-bit and big-endian, so return lower sign-extended 32-bit value */
230 	emit_hppa64_sext32(regmap[BPF_REG_0], HPPA_REG_RET0, ctx);
231 
232 	/* Restore callee-saved registers. */
233 	for (i = 3; i <= 15; i++) {
234 		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
235 			continue;
236 		emit(hppa64_ldd_im16(-REG_SIZE * i, HPPA_REG_SP, HPPA_R(i)), ctx);
237 	}
238 
239 	/* load original return pointer (stored by outest TCC function) */
240 	emit(hppa64_ldd_im16(-2*REG_SIZE, HPPA_REG_SP, HPPA_REG_RP), ctx);
241 	emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
242 	/* in delay slot: */
243 	emit(hppa64_ldd_im5(-REG_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
244 
245 	emit(hppa_nop(), ctx); // XXX WARUM einer zu wenig ??
246 }
247 
248 static int emit_branch(u8 op, u8 rd, u8 rs, signed long paoff,
249 			struct hppa_jit_context *ctx)
250 {
251 	int e, s;
252 	bool far = false;
253 	int off;
254 
255 	if (op == BPF_JSET) {
256 		/*
257 		 * BPF_JSET is a special case: it has no inverse so translate
258 		 * to and() function and compare against zero
259 		 */
260 		emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
261 		paoff -= 1; /* reduce offset due to hppa_and() above */
262 		rd = HPPA_REG_T0;
263 		rs = HPPA_REG_ZERO;
264 		op = BPF_JNE;
265 	}
266 
267 	/* set start after BPF_JSET */
268 	s = ctx->ninsns;
269 
270 	if (!relative_branch_ok(paoff - HPPA_BRANCH_DISPLACEMENT + 1, 12)) {
271 		op = invert_bpf_cond(op);
272 		far = true;
273 	}
274 
275 	/*
276 	 * For a far branch, the condition is negated and we jump over the
277 	 * branch itself, and the two instructions from emit_jump.
278 	 * For a near branch, just use paoff.
279 	 */
280 	off = far ? (2 - HPPA_BRANCH_DISPLACEMENT) : paoff - HPPA_BRANCH_DISPLACEMENT;
281 
282 	switch (op) {
283 	/* IF (dst COND src) JUMP off */
284 	case BPF_JEQ:
285 		emit(hppa_beq(rd, rs, off), ctx);
286 		break;
287 	case BPF_JGT:
288 		emit(hppa_bgtu(rd, rs, off), ctx);
289 		break;
290 	case BPF_JLT:
291 		emit(hppa_bltu(rd, rs, off), ctx);
292 		break;
293 	case BPF_JGE:
294 		emit(hppa_bgeu(rd, rs, off), ctx);
295 		break;
296 	case BPF_JLE:
297 		emit(hppa_bleu(rd, rs, off), ctx);
298 		break;
299 	case BPF_JNE:
300 		emit(hppa_bne(rd, rs, off), ctx);
301 		break;
302 	case BPF_JSGT:
303 		emit(hppa_bgt(rd, rs, off), ctx);
304 		break;
305 	case BPF_JSLT:
306 		emit(hppa_blt(rd, rs, off), ctx);
307 		break;
308 	case BPF_JSGE:
309 		emit(hppa_bge(rd, rs, off), ctx);
310 		break;
311 	case BPF_JSLE:
312 		emit(hppa_ble(rd, rs, off), ctx);
313 		break;
314 	default:
315 		WARN_ON(1);
316 	}
317 
318 	if (far) {
319 		int ret;
320 		e = ctx->ninsns;
321 		/* Adjust for extra insns. */
322 		paoff -= (e - s);
323 		ret = emit_jump(paoff, true, ctx);
324 		if (ret)
325 			return ret;
326 	} else {
327 		/*
328 		 * always allocate 2 nops instead of the far branch to
329 		 * reduce translation loops
330 		 */
331 		emit(hppa_nop(), ctx);
332 		emit(hppa_nop(), ctx);
333 	}
334 	return 0;
335 }
336 
337 static void emit_zext_32(u8 reg, struct hppa_jit_context *ctx)
338 {
339 	emit_hppa64_zext32(reg, reg, ctx);
340 }
341 
342 static void emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
343 {
344 	/*
345 	 * R1 -> &ctx
346 	 * R2 -> &array
347 	 * R3 -> index
348 	 */
349 	int off;
350 	const s8 arr_reg = regmap[BPF_REG_2];
351 	const s8 idx_reg = regmap[BPF_REG_3];
352 	struct bpf_array bpfa;
353 	struct bpf_prog bpfp;
354 
355 	/* if there is any tail call, we need to save & restore all registers */
356 	REG_SET_SEEN_ALL(ctx);
357 
358 	/* get address of TCC main exit function for error case into rp */
359 	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
360 
361 	/* max_entries = array->map.max_entries; */
362 	off = offsetof(struct bpf_array, map.max_entries);
363 	BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
364 	emit(hppa_ldw(off, arr_reg, HPPA_REG_T1), ctx);
365 
366 	/*
367 	 * if (index >= max_entries)
368 	 *   goto out;
369 	 */
370 	emit(hppa_bltu(idx_reg, HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
371 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
372 
373 	/*
374 	 * if (--tcc < 0)
375 	 *   goto out;
376 	 */
377 	REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
378 	emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
379 	emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
380 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
381 
382 	/*
383 	 * prog = array->ptrs[index];
384 	 * if (!prog)
385 	 *   goto out;
386 	 */
387 	BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 8);
388 	emit(hppa64_shladd(idx_reg, 3, arr_reg, HPPA_REG_T0), ctx);
389 	off = offsetof(struct bpf_array, ptrs);
390 	BUILD_BUG_ON(off < 16);
391 	emit(hppa64_ldd_im16(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
392 	emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
393 	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
394 
395 	/*
396 	 * tcc = temp_tcc;
397 	 * goto *(prog->bpf_func + 4);
398 	 */
399 	off = offsetof(struct bpf_prog, bpf_func);
400 	BUILD_BUG_ON(off < 16);
401 	BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 8);
402 	emit(hppa64_ldd_im16(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
403 	/* Epilogue jumps to *(t0 + 4). */
404 	__build_epilogue(true, ctx);
405 }
406 
407 static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
408 		      struct hppa_jit_context *ctx)
409 {
410 	u8 code = insn->code;
411 
412 	switch (code) {
413 	case BPF_JMP | BPF_JA:
414 	case BPF_JMP | BPF_CALL:
415 	case BPF_JMP | BPF_EXIT:
416 	case BPF_JMP | BPF_TAIL_CALL:
417 		break;
418 	default:
419 		*rd = bpf_to_hppa_reg(insn->dst_reg, ctx);
420 	}
421 
422 	if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) ||
423 	    code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) ||
424 	    code & BPF_LDX || code & BPF_STX)
425 		*rs = bpf_to_hppa_reg(insn->src_reg, ctx);
426 }
427 
428 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct hppa_jit_context *ctx)
429 {
430 	emit_hppa64_zext32(*rd, HPPA_REG_T2, ctx);
431 	*rd = HPPA_REG_T2;
432 	emit_hppa64_zext32(*rs, HPPA_REG_T1, ctx);
433 	*rs = HPPA_REG_T1;
434 }
435 
436 static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct hppa_jit_context *ctx)
437 {
438 	emit_hppa64_sext32(*rd, HPPA_REG_T2, ctx);
439 	*rd = HPPA_REG_T2;
440 	emit_hppa64_sext32(*rs, HPPA_REG_T1, ctx);
441 	*rs = HPPA_REG_T1;
442 }
443 
444 static void emit_zext_32_rd_t1(u8 *rd, struct hppa_jit_context *ctx)
445 {
446 	emit_hppa64_zext32(*rd, HPPA_REG_T2, ctx);
447 	*rd = HPPA_REG_T2;
448 	emit_zext_32(HPPA_REG_T1, ctx);
449 }
450 
451 static void emit_sext_32_rd(u8 *rd, struct hppa_jit_context *ctx)
452 {
453 	emit_hppa64_sext32(*rd, HPPA_REG_T2, ctx);
454 	*rd = HPPA_REG_T2;
455 }
456 
457 static bool is_signed_bpf_cond(u8 cond)
458 {
459 	return cond == BPF_JSGT || cond == BPF_JSLT ||
460 		cond == BPF_JSGE || cond == BPF_JSLE;
461 }
462 
463 static void emit_call(u64 addr, bool fixed, struct hppa_jit_context *ctx)
464 {
465 	const int offset_sp = 2*FRAME_SIZE;
466 
467 	emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
468 
469 	emit_hppa_copy(regmap[BPF_REG_1], HPPA_REG_ARG0, ctx);
470 	emit_hppa_copy(regmap[BPF_REG_2], HPPA_REG_ARG1, ctx);
471 	emit_hppa_copy(regmap[BPF_REG_3], HPPA_REG_ARG2, ctx);
472 	emit_hppa_copy(regmap[BPF_REG_4], HPPA_REG_ARG3, ctx);
473 	emit_hppa_copy(regmap[BPF_REG_5], HPPA_REG_ARG4, ctx);
474 
475 	/* Backup TCC. */
476 	REG_FORCE_SEEN(ctx, HPPA_REG_TCC_SAVED);
477 	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
478 		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
479 
480 	/*
481 	 * Use ldil() to load absolute address. Don't use emit_imm as the
482 	 * number of emitted instructions should not depend on the value of
483 	 * addr.
484 	 */
485 	WARN_ON(addr >> 32);
486 	/* load function address and gp from Elf64_Fdesc descriptor */
487 	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
488 	emit(hppa_ldo(im11(addr), HPPA_REG_R31, HPPA_REG_R31), ctx);
489 	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, addr),
490 			     HPPA_REG_R31, HPPA_REG_RP), ctx);
491 	emit(hppa64_bve_l_rp(HPPA_REG_RP), ctx);
492 	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, gp),
493 			     HPPA_REG_R31, HPPA_REG_GP), ctx);
494 
495 	/* Restore TCC. */
496 	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
497 		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
498 
499 	emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
500 
501 	/* Set return value. */
502 	emit_hppa_copy(HPPA_REG_RET0, regmap[BPF_REG_0], ctx);
503 }
504 
505 static void emit_call_libgcc_ll(void *func, const s8 arg0,
506 		const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
507 {
508 	u64 func_addr;
509 
510 	if (BPF_CLASS(opcode) == BPF_ALU) {
511 		emit_hppa64_zext32(arg0, HPPA_REG_ARG0, ctx);
512 		emit_hppa64_zext32(arg1, HPPA_REG_ARG1, ctx);
513 	} else {
514 		emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
515 		emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
516 	}
517 
518 	/* libcgcc overwrites HPPA_REG_RET0, so keep copy in HPPA_REG_TCC_SAVED */
519 	if (arg0 != HPPA_REG_RET0) {
520 		REG_SET_SEEN(ctx, HPPA_REG_TCC_SAVED);
521 		emit(hppa_copy(HPPA_REG_RET0, HPPA_REG_TCC_SAVED), ctx);
522 	}
523 
524 	/* set up stack */
525 	emit(hppa_ldo(FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
526 
527 	func_addr = (uintptr_t) func;
528 	/* load function func_address and gp from Elf64_Fdesc descriptor */
529 	emit_imm(HPPA_REG_R31, func_addr, arg0, ctx);
530 	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, addr),
531 			     HPPA_REG_R31, HPPA_REG_RP), ctx);
532         /* skip the following bve_l instruction if divisor is 0. */
533         if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
534 		if (BPF_OP(opcode) == BPF_DIV)
535 			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
536 		else {
537 			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET0, ctx);
538 		}
539 		emit(hppa_beq(HPPA_REG_ARG1, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
540 	}
541 	emit(hppa64_bve_l_rp(HPPA_REG_RP), ctx);
542 	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, gp),
543 			     HPPA_REG_R31, HPPA_REG_GP), ctx);
544 
545 	emit(hppa_ldo(-FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
546 
547 	emit_hppa_copy(HPPA_REG_RET0, arg0, ctx);
548 
549 	/* restore HPPA_REG_RET0 */
550 	if (arg0 != HPPA_REG_RET0)
551 		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_RET0), ctx);
552 }
553 
554 static void emit_store(const s8 rd, const s8 rs, s16 off,
555 			  struct hppa_jit_context *ctx, const u8 size,
556 			  const u8 mode)
557 {
558 	s8 dstreg;
559 
560 	/* need to calculate address since offset does not fit in 14 bits? */
561 	if (relative_bits_ok(off, 14))
562 		dstreg = rd;
563 	else {
564 		/* need to use R1 here, since addil puts result into R1 */
565 		dstreg = HPPA_REG_R1;
566 		emit(hppa_addil(off, rd), ctx);
567 		off = im11(off);
568 	}
569 
570 	switch (size) {
571 	case BPF_B:
572 		emit(hppa_stb(rs, off, dstreg), ctx);
573 		break;
574 	case BPF_H:
575 		emit(hppa_sth(rs, off, dstreg), ctx);
576 		break;
577 	case BPF_W:
578 		emit(hppa_stw(rs, off, dstreg), ctx);
579 		break;
580 	case BPF_DW:
581 		if (off & 7) {
582 			emit(hppa_ldo(off, dstreg, HPPA_REG_R1), ctx);
583 			emit(hppa64_std_im5(rs, 0, HPPA_REG_R1), ctx);
584 		} else if (off >= -16 && off <= 15)
585 			emit(hppa64_std_im5(rs, off, dstreg), ctx);
586 		else
587 			emit(hppa64_std_im16(rs, off, dstreg), ctx);
588 		break;
589 	}
590 }
591 
592 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
593 		      bool extra_pass)
594 {
595 	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
596 		    BPF_CLASS(insn->code) == BPF_JMP;
597 	int s, e, ret, i = insn - ctx->prog->insnsi;
598 	s64 paoff;
599 	struct bpf_prog_aux *aux = ctx->prog->aux;
600 	u8 rd = -1, rs = -1, code = insn->code;
601 	s16 off = insn->off;
602 	s32 imm = insn->imm;
603 
604 	init_regs(&rd, &rs, insn, ctx);
605 
606 	switch (code) {
607 	/* dst = src */
608 	case BPF_ALU | BPF_MOV | BPF_X:
609 	case BPF_ALU64 | BPF_MOV | BPF_X:
610 		if (imm == 1) {
611 			/* Special mov32 for zext */
612 			emit_zext_32(rd, ctx);
613 			break;
614 		}
615 		if (!is64 && !aux->verifier_zext)
616 			emit_hppa64_zext32(rs, rd, ctx);
617 		else
618 			emit_hppa_copy(rs, rd, ctx);
619 		break;
620 
621 	/* dst = dst OP src */
622 	case BPF_ALU | BPF_ADD | BPF_X:
623 	case BPF_ALU64 | BPF_ADD | BPF_X:
624                 emit(hppa_add(rd, rs, rd), ctx);
625 		if (!is64 && !aux->verifier_zext)
626 			emit_zext_32(rd, ctx);
627 		break;
628 	case BPF_ALU | BPF_SUB | BPF_X:
629 	case BPF_ALU64 | BPF_SUB | BPF_X:
630                 emit(hppa_sub(rd, rs, rd), ctx);
631 		if (!is64 && !aux->verifier_zext)
632 			emit_zext_32(rd, ctx);
633 		break;
634 	case BPF_ALU | BPF_AND | BPF_X:
635 	case BPF_ALU64 | BPF_AND | BPF_X:
636                 emit(hppa_and(rd, rs, rd), ctx);
637 		if (!is64 && !aux->verifier_zext)
638 			emit_zext_32(rd, ctx);
639 		break;
640 	case BPF_ALU | BPF_OR | BPF_X:
641 	case BPF_ALU64 | BPF_OR | BPF_X:
642                 emit(hppa_or(rd, rs, rd), ctx);
643 		if (!is64 && !aux->verifier_zext)
644 			emit_zext_32(rd, ctx);
645 		break;
646 	case BPF_ALU | BPF_XOR | BPF_X:
647 	case BPF_ALU64 | BPF_XOR | BPF_X:
648                 emit(hppa_xor(rd, rs, rd), ctx);
649 		if (!is64 && !aux->verifier_zext && rs != rd)
650 			emit_zext_32(rd, ctx);
651 		break;
652 	case BPF_ALU | BPF_MUL | BPF_K:
653 	case BPF_ALU64 | BPF_MUL | BPF_K:
654 		emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx);
655 		rs = HPPA_REG_T1;
656 		fallthrough;
657 	case BPF_ALU | BPF_MUL | BPF_X:
658 	case BPF_ALU64 | BPF_MUL | BPF_X:
659 		emit_call_libgcc_ll(__muldi3, rd, rs, code, ctx);
660 		if (!is64 && !aux->verifier_zext)
661 			emit_zext_32(rd, ctx);
662 		break;
663 	case BPF_ALU | BPF_DIV | BPF_K:
664 	case BPF_ALU64 | BPF_DIV | BPF_K:
665 		emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx);
666 		rs = HPPA_REG_T1;
667 		fallthrough;
668 	case BPF_ALU | BPF_DIV | BPF_X:
669 	case BPF_ALU64 | BPF_DIV | BPF_X:
670 		emit_call_libgcc_ll(&hppa_div64, rd, rs, code, ctx);
671 		if (!is64 && !aux->verifier_zext)
672 			emit_zext_32(rd, ctx);
673 		break;
674 	case BPF_ALU | BPF_MOD | BPF_K:
675 	case BPF_ALU64 | BPF_MOD | BPF_K:
676 		emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx);
677 		rs = HPPA_REG_T1;
678 		fallthrough;
679 	case BPF_ALU | BPF_MOD | BPF_X:
680 	case BPF_ALU64 | BPF_MOD | BPF_X:
681 		emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, code, ctx);
682 		if (!is64 && !aux->verifier_zext)
683 			emit_zext_32(rd, ctx);
684 		break;
685 
686 	case BPF_ALU | BPF_LSH | BPF_X:
687 	case BPF_ALU64 | BPF_LSH | BPF_X:
688 		emit_hppa64_sext32(rs, HPPA_REG_T0, ctx);
689 		emit(hppa64_mtsarcm(HPPA_REG_T0), ctx);
690 		if (is64)
691 			emit(hppa64_depdz_sar(rd, rd), ctx);
692 		else
693 			emit(hppa_depwz_sar(rd, rd), ctx);
694 		if (!is64 && !aux->verifier_zext)
695 			emit_zext_32(rd, ctx);
696 		break;
697 	case BPF_ALU | BPF_RSH | BPF_X:
698 	case BPF_ALU64 | BPF_RSH | BPF_X:
699 		emit(hppa_mtsar(rs), ctx);
700 		if (is64)
701 			emit(hppa64_shrpd_sar(rd, rd), ctx);
702 		else
703 			emit(hppa_shrpw_sar(rd, rd), ctx);
704 		if (!is64 && !aux->verifier_zext)
705 			emit_zext_32(rd, ctx);
706 		break;
707 	case BPF_ALU | BPF_ARSH | BPF_X:
708 	case BPF_ALU64 | BPF_ARSH | BPF_X:
709 		emit_hppa64_sext32(rs, HPPA_REG_T0, ctx);
710                 emit(hppa64_mtsarcm(HPPA_REG_T0), ctx);
711 		if (is64)
712 			emit(hppa_extrd_sar(rd, rd, 1), ctx);
713 		else
714 			emit(hppa_extrws_sar(rd, rd), ctx);
715 		if (!is64 && !aux->verifier_zext)
716 			emit_zext_32(rd, ctx);
717 		break;
718 
719 	/* dst = -dst */
720 	case BPF_ALU | BPF_NEG:
721 	case BPF_ALU64 | BPF_NEG:
722 		emit(hppa_sub(HPPA_REG_ZERO, rd, rd), ctx);
723 		if (!is64 && !aux->verifier_zext)
724 			emit_zext_32(rd, ctx);
725 		break;
726 
727 	/* dst = BSWAP##imm(dst) */
728 	case BPF_ALU | BPF_END | BPF_FROM_BE:
729 		switch (imm) {
730 		case 16:
731 			/* zero-extend 16 bits into 64 bits */
732 			emit_hppa64_depd(HPPA_REG_ZERO, 63-16, 64-16, rd, 1, ctx);
733 			break;
734 		case 32:
735 			if (!aux->verifier_zext)
736 				emit_zext_32(rd, ctx);
737 			break;
738 		case 64:
739 			/* Do nothing */
740 			break;
741 		}
742 		break;
743 
744 	case BPF_ALU | BPF_END | BPF_FROM_LE:
745 		switch (imm) {
746 		case 16:
747 			emit(hppa_extru(rd, 31 - 8, 8, HPPA_REG_T1), ctx);
748 			emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
749 			emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
750 			emit_hppa64_extrd(HPPA_REG_T1, 63, 16, rd, 0, ctx);
751 			break;
752 		case 32:
753 			emit(hppa_shrpw(rd, rd, 16, HPPA_REG_T1), ctx);
754 			emit_hppa64_depd(HPPA_REG_T1, 63-16, 8, HPPA_REG_T1, 1, ctx);
755 			emit(hppa_shrpw(rd, HPPA_REG_T1, 8, HPPA_REG_T1), ctx);
756 			emit_hppa64_extrd(HPPA_REG_T1, 63, 32, rd, 0, ctx);
757 			break;
758 		case 64:
759 			emit(hppa64_permh_3210(rd, HPPA_REG_T1), ctx);
760 			emit(hppa64_hshl(HPPA_REG_T1, 8, HPPA_REG_T2), ctx);
761 			emit(hppa64_hshr_u(HPPA_REG_T1, 8, HPPA_REG_T1), ctx);
762 			emit(hppa_or(HPPA_REG_T2, HPPA_REG_T1, rd), ctx);
763 			break;
764 		default:
765 			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
766 			return -1;
767 		}
768 		break;
769 
770 	/* dst = imm */
771 	case BPF_ALU | BPF_MOV | BPF_K:
772 	case BPF_ALU64 | BPF_MOV | BPF_K:
773 		emit_imm(rd, imm, HPPA_REG_T2, ctx);
774 		if (!is64 && !aux->verifier_zext)
775 			emit_zext_32(rd, ctx);
776 		break;
777 
778 	/* dst = dst OP imm */
779 	case BPF_ALU | BPF_ADD | BPF_K:
780 	case BPF_ALU64 | BPF_ADD | BPF_K:
781 		if (relative_bits_ok(imm, 14)) {
782 			emit(hppa_ldo(imm, rd, rd), ctx);
783 		} else {
784 			emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
785 			emit(hppa_add(rd, HPPA_REG_T1, rd), ctx);
786 		}
787 		if (!is64 && !aux->verifier_zext)
788 			emit_zext_32(rd, ctx);
789 		break;
790 	case BPF_ALU | BPF_SUB | BPF_K:
791 	case BPF_ALU64 | BPF_SUB | BPF_K:
792 		if (relative_bits_ok(-imm, 14)) {
793 			emit(hppa_ldo(-imm, rd, rd), ctx);
794 		} else {
795 			emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
796 			emit(hppa_sub(rd, HPPA_REG_T1, rd), ctx);
797 		}
798 		if (!is64 && !aux->verifier_zext)
799 			emit_zext_32(rd, ctx);
800 		break;
801 	case BPF_ALU | BPF_AND | BPF_K:
802 	case BPF_ALU64 | BPF_AND | BPF_K:
803 		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
804                 emit(hppa_and(rd, HPPA_REG_T1, rd), ctx);
805 		if (!is64 && !aux->verifier_zext)
806 			emit_zext_32(rd, ctx);
807 		break;
808 	case BPF_ALU | BPF_OR | BPF_K:
809 	case BPF_ALU64 | BPF_OR | BPF_K:
810 		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
811                 emit(hppa_or(rd, HPPA_REG_T1, rd), ctx);
812 		if (!is64 && !aux->verifier_zext)
813 			emit_zext_32(rd, ctx);
814 		break;
815 	case BPF_ALU | BPF_XOR | BPF_K:
816 	case BPF_ALU64 | BPF_XOR | BPF_K:
817 		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
818                 emit(hppa_xor(rd, HPPA_REG_T1, rd), ctx);
819 		if (!is64 && !aux->verifier_zext)
820 			emit_zext_32(rd, ctx);
821 		break;
822 	case BPF_ALU | BPF_LSH | BPF_K:
823 	case BPF_ALU64 | BPF_LSH | BPF_K:
824 		if (imm != 0) {
825 			emit_hppa64_shld(rd, imm, rd, ctx);
826 		}
827 
828 		if (!is64 && !aux->verifier_zext)
829 			emit_zext_32(rd, ctx);
830 		break;
831 	case BPF_ALU | BPF_RSH | BPF_K:
832 	case BPF_ALU64 | BPF_RSH | BPF_K:
833 		if (imm != 0) {
834 			if (is64)
835 				emit_hppa64_shrd(rd, imm, rd, false, ctx);
836 			else
837 				emit_hppa64_shrw(rd, imm, rd, false, ctx);
838 		}
839 
840 		if (!is64 && !aux->verifier_zext)
841 			emit_zext_32(rd, ctx);
842 		break;
843 	case BPF_ALU | BPF_ARSH | BPF_K:
844 	case BPF_ALU64 | BPF_ARSH | BPF_K:
845 		if (imm != 0) {
846 			if (is64)
847 				emit_hppa64_shrd(rd, imm, rd, true, ctx);
848 			else
849 				emit_hppa64_shrw(rd, imm, rd, true, ctx);
850 		}
851 
852 		if (!is64 && !aux->verifier_zext)
853 			emit_zext_32(rd, ctx);
854 		break;
855 
856 	/* JUMP off */
857 	case BPF_JMP | BPF_JA:
858 		paoff = hppa_offset(i, off, ctx);
859 		ret = emit_jump(paoff, false, ctx);
860 		if (ret)
861 			return ret;
862 		break;
863 
864 	/* IF (dst COND src) JUMP off */
865 	case BPF_JMP | BPF_JEQ | BPF_X:
866 	case BPF_JMP32 | BPF_JEQ | BPF_X:
867 	case BPF_JMP | BPF_JGT | BPF_X:
868 	case BPF_JMP32 | BPF_JGT | BPF_X:
869 	case BPF_JMP | BPF_JLT | BPF_X:
870 	case BPF_JMP32 | BPF_JLT | BPF_X:
871 	case BPF_JMP | BPF_JGE | BPF_X:
872 	case BPF_JMP32 | BPF_JGE | BPF_X:
873 	case BPF_JMP | BPF_JLE | BPF_X:
874 	case BPF_JMP32 | BPF_JLE | BPF_X:
875 	case BPF_JMP | BPF_JNE | BPF_X:
876 	case BPF_JMP32 | BPF_JNE | BPF_X:
877 	case BPF_JMP | BPF_JSGT | BPF_X:
878 	case BPF_JMP32 | BPF_JSGT | BPF_X:
879 	case BPF_JMP | BPF_JSLT | BPF_X:
880 	case BPF_JMP32 | BPF_JSLT | BPF_X:
881 	case BPF_JMP | BPF_JSGE | BPF_X:
882 	case BPF_JMP32 | BPF_JSGE | BPF_X:
883 	case BPF_JMP | BPF_JSLE | BPF_X:
884 	case BPF_JMP32 | BPF_JSLE | BPF_X:
885 	case BPF_JMP | BPF_JSET | BPF_X:
886 	case BPF_JMP32 | BPF_JSET | BPF_X:
887 		paoff = hppa_offset(i, off, ctx);
888 		if (!is64) {
889 			s = ctx->ninsns;
890 			if (is_signed_bpf_cond(BPF_OP(code)))
891 				emit_sext_32_rd_rs(&rd, &rs, ctx);
892 			else
893 				emit_zext_32_rd_rs(&rd, &rs, ctx);
894 			e = ctx->ninsns;
895 
896 			/* Adjust for extra insns */
897 			paoff -= (e - s);
898 		}
899 		if (BPF_OP(code) == BPF_JSET) {
900 			/* Adjust for and */
901 			paoff -= 1;
902 			emit(hppa_and(rs, rd, HPPA_REG_T1), ctx);
903 			emit_branch(BPF_JNE, HPPA_REG_T1, HPPA_REG_ZERO, paoff,
904 				    ctx);
905 		} else {
906 			emit_branch(BPF_OP(code), rd, rs, paoff, ctx);
907 		}
908 		break;
909 
910 	/* IF (dst COND imm) JUMP off */
911 	case BPF_JMP | BPF_JEQ | BPF_K:
912 	case BPF_JMP32 | BPF_JEQ | BPF_K:
913 	case BPF_JMP | BPF_JGT | BPF_K:
914 	case BPF_JMP32 | BPF_JGT | BPF_K:
915 	case BPF_JMP | BPF_JLT | BPF_K:
916 	case BPF_JMP32 | BPF_JLT | BPF_K:
917 	case BPF_JMP | BPF_JGE | BPF_K:
918 	case BPF_JMP32 | BPF_JGE | BPF_K:
919 	case BPF_JMP | BPF_JLE | BPF_K:
920 	case BPF_JMP32 | BPF_JLE | BPF_K:
921 	case BPF_JMP | BPF_JNE | BPF_K:
922 	case BPF_JMP32 | BPF_JNE | BPF_K:
923 	case BPF_JMP | BPF_JSGT | BPF_K:
924 	case BPF_JMP32 | BPF_JSGT | BPF_K:
925 	case BPF_JMP | BPF_JSLT | BPF_K:
926 	case BPF_JMP32 | BPF_JSLT | BPF_K:
927 	case BPF_JMP | BPF_JSGE | BPF_K:
928 	case BPF_JMP32 | BPF_JSGE | BPF_K:
929 	case BPF_JMP | BPF_JSLE | BPF_K:
930 	case BPF_JMP32 | BPF_JSLE | BPF_K:
931 		paoff = hppa_offset(i, off, ctx);
932 		s = ctx->ninsns;
933 		if (imm) {
934 			emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
935 			rs = HPPA_REG_T1;
936 		} else {
937 			rs = HPPA_REG_ZERO;
938 		}
939 		if (!is64) {
940 			if (is_signed_bpf_cond(BPF_OP(code)))
941 				emit_sext_32_rd(&rd, ctx);
942 			else
943 				emit_zext_32_rd_t1(&rd, ctx);
944 		}
945 		e = ctx->ninsns;
946 
947 		/* Adjust for extra insns */
948 		paoff -= (e - s);
949 		emit_branch(BPF_OP(code), rd, rs, paoff, ctx);
950 		break;
951 	case BPF_JMP | BPF_JSET | BPF_K:
952 	case BPF_JMP32 | BPF_JSET | BPF_K:
953 		paoff = hppa_offset(i, off, ctx);
954 		s = ctx->ninsns;
955 		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
956 		emit(hppa_and(HPPA_REG_T1, rd, HPPA_REG_T1), ctx);
957 		/* For jset32, we should clear the upper 32 bits of t1, but
958 		 * sign-extension is sufficient here and saves one instruction,
959 		 * as t1 is used only in comparison against zero.
960 		 */
961 		if (!is64 && imm < 0)
962 			emit_hppa64_sext32(HPPA_REG_T1, HPPA_REG_T1, ctx);
963 		e = ctx->ninsns;
964 		paoff -= (e - s);
965 		emit_branch(BPF_JNE, HPPA_REG_T1, HPPA_REG_ZERO, paoff, ctx);
966 		break;
967 	/* function call */
968 	case BPF_JMP | BPF_CALL:
969 	{
970 		bool fixed_addr;
971 		u64 addr;
972 
973 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
974 					    &addr, &fixed_addr);
975 		if (ret < 0)
976 			return ret;
977 
978 		REG_SET_SEEN_ALL(ctx);
979 		emit_call(addr, fixed_addr, ctx);
980 		break;
981 	}
982 	/* tail call */
983 	case BPF_JMP | BPF_TAIL_CALL:
984 		emit_bpf_tail_call(i, ctx);
985 		break;
986 
987 	/* function return */
988 	case BPF_JMP | BPF_EXIT:
989 		if (i == ctx->prog->len - 1)
990 			break;
991 
992 		paoff = epilogue_offset(ctx);
993 		ret = emit_jump(paoff, false, ctx);
994 		if (ret)
995 			return ret;
996 		break;
997 
998 	/* dst = imm64 */
999 	case BPF_LD | BPF_IMM | BPF_DW:
1000 	{
1001 		struct bpf_insn insn1 = insn[1];
1002 		u64 imm64 = (u64)insn1.imm << 32 | (u32)imm;
1003 		if (bpf_pseudo_func(insn))
1004 			imm64 = (uintptr_t)dereference_function_descriptor((void*)imm64);
1005 		emit_imm(rd, imm64, HPPA_REG_T2, ctx);
1006 
1007 		return 1;
1008 	}
1009 
1010 	/* LDX: dst = *(size *)(src + off) */
1011 	case BPF_LDX | BPF_MEM | BPF_B:
1012 	case BPF_LDX | BPF_MEM | BPF_H:
1013 	case BPF_LDX | BPF_MEM | BPF_W:
1014 	case BPF_LDX | BPF_MEM | BPF_DW:
1015 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1016 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1017 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1018 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1019 	{
1020 		u8 srcreg;
1021 
1022 		/* need to calculate address since offset does not fit in 14 bits? */
1023 		if (relative_bits_ok(off, 14))
1024 			srcreg = rs;
1025 		else {
1026 			/* need to use R1 here, since addil puts result into R1 */
1027 			srcreg = HPPA_REG_R1;
1028 			BUG_ON(rs == HPPA_REG_R1);
1029 			BUG_ON(rd == HPPA_REG_R1);
1030 			emit(hppa_addil(off, rs), ctx);
1031 			off = im11(off);
1032 		}
1033 
1034 		switch (BPF_SIZE(code)) {
1035 		case BPF_B:
1036 			emit(hppa_ldb(off, srcreg, rd), ctx);
1037 			if (insn_is_zext(&insn[1]))
1038 				return 1;
1039 			break;
1040 		case BPF_H:
1041 			emit(hppa_ldh(off, srcreg, rd), ctx);
1042 			if (insn_is_zext(&insn[1]))
1043 				return 1;
1044 			break;
1045 		case BPF_W:
1046 			emit(hppa_ldw(off, srcreg, rd), ctx);
1047 			if (insn_is_zext(&insn[1]))
1048 				return 1;
1049 			break;
1050 		case BPF_DW:
1051 			if (off & 7) {
1052 				emit(hppa_ldo(off, srcreg, HPPA_REG_R1), ctx);
1053 				emit(hppa64_ldd_reg(HPPA_REG_ZERO, HPPA_REG_R1, rd), ctx);
1054 			} else if (off >= -16 && off <= 15)
1055 				emit(hppa64_ldd_im5(off, srcreg, rd), ctx);
1056 			else
1057 				emit(hppa64_ldd_im16(off, srcreg, rd), ctx);
1058 			break;
1059 		}
1060 		break;
1061 	}
1062 	/* speculation barrier */
1063 	case BPF_ST | BPF_NOSPEC:
1064 		break;
1065 
1066 	/* ST: *(size *)(dst + off) = imm */
1067 	/* STX: *(size *)(dst + off) = src */
1068 	case BPF_ST | BPF_MEM | BPF_B:
1069 	case BPF_ST | BPF_MEM | BPF_H:
1070 	case BPF_ST | BPF_MEM | BPF_W:
1071 	case BPF_ST | BPF_MEM | BPF_DW:
1072 
1073 	case BPF_STX | BPF_MEM | BPF_B:
1074 	case BPF_STX | BPF_MEM | BPF_H:
1075 	case BPF_STX | BPF_MEM | BPF_W:
1076 	case BPF_STX | BPF_MEM | BPF_DW:
1077 		if (BPF_CLASS(code) == BPF_ST) {
1078 			emit_imm(HPPA_REG_T2, imm, HPPA_REG_T1, ctx);
1079 			rs = HPPA_REG_T2;
1080 		}
1081 
1082 		emit_store(rd, rs, off, ctx, BPF_SIZE(code), BPF_MODE(code));
1083 		break;
1084 
1085 	case BPF_STX | BPF_ATOMIC | BPF_W:
1086 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1087 		pr_info_once(
1088 			"bpf-jit: not supported: atomic operation %02x ***\n",
1089 			insn->imm);
1090 		return -EFAULT;
1091 
1092 	default:
1093 		pr_err("bpf-jit: unknown opcode %02x\n", code);
1094 		return -EINVAL;
1095 	}
1096 
1097 	return 0;
1098 }
1099 
1100 void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1101 {
1102 	int bpf_stack_adjust, stack_adjust, i;
1103 	unsigned long addr;
1104 	s8 reg;
1105 
1106 	/*
1107 	 * stack on hppa grows up, so if tail calls are used we need to
1108 	 * allocate the maximum stack size
1109 	 */
1110 	if (REG_ALL_SEEN(ctx))
1111 		bpf_stack_adjust = MAX_BPF_STACK;
1112 	else
1113 		bpf_stack_adjust = ctx->prog->aux->stack_depth;
1114 	bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1115 
1116 	stack_adjust = FRAME_SIZE + bpf_stack_adjust;
1117 	stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1118 
1119 	/*
1120 	 * NOTE: We construct an Elf64_Fdesc descriptor here.
1121 	 * The first 4 words initialize the TCC and compares them.
1122 	 * Then follows the virtual address of the eBPF function,
1123 	 * and the gp for this function.
1124 	 *
1125 	 * The first instruction sets the tail-call-counter (TCC) register.
1126 	 * This instruction is skipped by tail calls.
1127 	 * Use a temporary register instead of a caller-saved register initially.
1128 	 */
1129 	REG_FORCE_SEEN(ctx, HPPA_REG_TCC_IN_INIT);
1130 	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1131 
1132 	/*
1133 	 * Skip all initializations when called as BPF TAIL call.
1134 	 */
1135 	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1136 	emit(hppa_beq(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, 6 - HPPA_BRANCH_DISPLACEMENT), ctx);
1137 	emit(hppa64_bl_long(ctx->prologue_len - 3 - HPPA_BRANCH_DISPLACEMENT), ctx);
1138 
1139 	/* store entry address of this eBPF function */
1140 	addr = (uintptr_t) &ctx->insns[0];
1141 	emit(addr >> 32, ctx);
1142 	emit(addr & 0xffffffff, ctx);
1143 
1144 	/* store gp of this eBPF function */
1145 	asm("copy %%r27,%0" : "=r" (addr) );
1146 	emit(addr >> 32, ctx);
1147 	emit(addr & 0xffffffff, ctx);
1148 
1149 	/* Set up hppa stack frame. */
1150 	emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx);
1151 	emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx);
1152 	emit(hppa64_std_im5 (HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx);
1153 	emit(hppa64_std_im16(HPPA_REG_RP, -2*REG_SIZE, HPPA_REG_SP), ctx);
1154 
1155 	/* Save callee-save registers. */
1156 	for (i = 3; i <= 15; i++) {
1157 		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1158 			continue;
1159 		emit(hppa64_std_im16(HPPA_R(i), -REG_SIZE * i, HPPA_REG_SP), ctx);
1160 	}
1161 
1162 	/* load function parameters; load all if we use tail functions */
1163 	#define LOAD_PARAM(arg, dst) \
1164 		if (REG_WAS_SEEN(ctx, regmap[dst]) ||	\
1165 		    REG_WAS_SEEN(ctx, HPPA_REG_TCC))	\
1166 			emit_hppa_copy(arg, regmap[dst], ctx)
1167 	LOAD_PARAM(HPPA_REG_ARG0, BPF_REG_1);
1168 	LOAD_PARAM(HPPA_REG_ARG1, BPF_REG_2);
1169 	LOAD_PARAM(HPPA_REG_ARG2, BPF_REG_3);
1170 	LOAD_PARAM(HPPA_REG_ARG3, BPF_REG_4);
1171 	LOAD_PARAM(HPPA_REG_ARG4, BPF_REG_5);
1172 	#undef LOAD_PARAM
1173 
1174 	REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1175 	REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1176 	REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1177 
1178 	/*
1179 	 * Now really set the tail call counter (TCC) register.
1180 	 */
1181 	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1182 		emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1183 
1184 	/*
1185 	 * Save epilogue function pointer for outer TCC call chain.
1186 	 * The main TCC call stores the final RP on stack.
1187 	 */
1188 	addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1189 	/* skip first two instructions which jump to exit */
1190 	addr += 2 * HPPA_INSN_SIZE;
1191 	emit_imm(HPPA_REG_T2, addr, HPPA_REG_T1, ctx);
1192 	emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1193 
1194 	/* Set up BPF frame pointer. */
1195 	reg = regmap[BPF_REG_FP];	/* -> HPPA_REG_FP */
1196 	if (REG_WAS_SEEN(ctx, reg)) {
1197 		emit(hppa_ldo(-FRAME_SIZE, HPPA_REG_SP, reg), ctx);
1198 	}
1199 }
1200 
1201 void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1202 {
1203 	__build_epilogue(false, ctx);
1204 }
1205 
1206 bool bpf_jit_supports_kfunc_call(void)
1207 {
1208 	return true;
1209 }
1210