xref: /openbmc/linux/arch/arm64/net/bpf_jit_comp.c (revision 029f7f3b8701cc7aca8bdb31f0c7edd6a479e357)
1 /*
2  * BPF JIT compiler for ARM64
3  *
4  * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #define pr_fmt(fmt) "bpf_jit: " fmt
20 
21 #include <linux/filter.h>
22 #include <linux/printk.h>
23 #include <linux/skbuff.h>
24 #include <linux/slab.h>
25 
26 #include <asm/byteorder.h>
27 #include <asm/cacheflush.h>
28 #include <asm/debug-monitors.h>
29 
30 #include "bpf_jit.h"
31 
32 int bpf_jit_enable __read_mostly;
33 
34 #define TMP_REG_1 (MAX_BPF_REG + 0)
35 #define TMP_REG_2 (MAX_BPF_REG + 1)
36 
37 /* Map BPF registers to A64 registers */
38 static const int bpf2a64[] = {
39 	/* return value from in-kernel function, and exit value from eBPF */
40 	[BPF_REG_0] = A64_R(7),
41 	/* arguments from eBPF program to in-kernel function */
42 	[BPF_REG_1] = A64_R(0),
43 	[BPF_REG_2] = A64_R(1),
44 	[BPF_REG_3] = A64_R(2),
45 	[BPF_REG_4] = A64_R(3),
46 	[BPF_REG_5] = A64_R(4),
47 	/* callee saved registers that in-kernel function will preserve */
48 	[BPF_REG_6] = A64_R(19),
49 	[BPF_REG_7] = A64_R(20),
50 	[BPF_REG_8] = A64_R(21),
51 	[BPF_REG_9] = A64_R(22),
52 	/* read-only frame pointer to access stack */
53 	[BPF_REG_FP] = A64_R(25),
54 	/* temporary register for internal BPF JIT */
55 	[TMP_REG_1] = A64_R(23),
56 	[TMP_REG_2] = A64_R(24),
57 };
58 
59 struct jit_ctx {
60 	const struct bpf_prog *prog;
61 	int idx;
62 	int tmp_used;
63 	int epilogue_offset;
64 	int *offset;
65 	u32 *image;
66 };
67 
68 static inline void emit(const u32 insn, struct jit_ctx *ctx)
69 {
70 	if (ctx->image != NULL)
71 		ctx->image[ctx->idx] = cpu_to_le32(insn);
72 
73 	ctx->idx++;
74 }
75 
76 static inline void emit_a64_mov_i64(const int reg, const u64 val,
77 				    struct jit_ctx *ctx)
78 {
79 	u64 tmp = val;
80 	int shift = 0;
81 
82 	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
83 	tmp >>= 16;
84 	shift += 16;
85 	while (tmp) {
86 		if (tmp & 0xffff)
87 			emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
88 		tmp >>= 16;
89 		shift += 16;
90 	}
91 }
92 
93 static inline void emit_a64_mov_i(const int is64, const int reg,
94 				  const s32 val, struct jit_ctx *ctx)
95 {
96 	u16 hi = val >> 16;
97 	u16 lo = val & 0xffff;
98 
99 	if (hi & 0x8000) {
100 		if (hi == 0xffff) {
101 			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
102 		} else {
103 			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
104 			emit(A64_MOVK(is64, reg, lo, 0), ctx);
105 		}
106 	} else {
107 		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
108 		if (hi)
109 			emit(A64_MOVK(is64, reg, hi, 16), ctx);
110 	}
111 }
112 
113 static inline int bpf2a64_offset(int bpf_to, int bpf_from,
114 				 const struct jit_ctx *ctx)
115 {
116 	int to = ctx->offset[bpf_to];
117 	/* -1 to account for the Branch instruction */
118 	int from = ctx->offset[bpf_from] - 1;
119 
120 	return to - from;
121 }
122 
123 static void jit_fill_hole(void *area, unsigned int size)
124 {
125 	u32 *ptr;
126 	/* We are guaranteed to have aligned memory. */
127 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
128 		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
129 }
130 
131 static inline int epilogue_offset(const struct jit_ctx *ctx)
132 {
133 	int to = ctx->epilogue_offset;
134 	int from = ctx->idx;
135 
136 	return to - from;
137 }
138 
139 /* Stack must be multiples of 16B */
140 #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
141 
142 static void build_prologue(struct jit_ctx *ctx)
143 {
144 	const u8 r6 = bpf2a64[BPF_REG_6];
145 	const u8 r7 = bpf2a64[BPF_REG_7];
146 	const u8 r8 = bpf2a64[BPF_REG_8];
147 	const u8 r9 = bpf2a64[BPF_REG_9];
148 	const u8 fp = bpf2a64[BPF_REG_FP];
149 	const u8 ra = bpf2a64[BPF_REG_A];
150 	const u8 rx = bpf2a64[BPF_REG_X];
151 	const u8 tmp1 = bpf2a64[TMP_REG_1];
152 	const u8 tmp2 = bpf2a64[TMP_REG_2];
153 	int stack_size = MAX_BPF_STACK;
154 
155 	stack_size += 4; /* extra for skb_copy_bits buffer */
156 	stack_size = STACK_ALIGN(stack_size);
157 
158 	/*
159 	 * BPF prog stack layout
160 	 *
161 	 *                         high
162 	 * original A64_SP =>   0:+-----+ BPF prologue
163 	 *                        |FP/LR|
164 	 * current A64_FP =>  -16:+-----+
165 	 *                        | ... | callee saved registers
166 	 *                        +-----+
167 	 *                        |     | x25/x26
168 	 * BPF fp register => -80:+-----+
169 	 *                        |     |
170 	 *                        | ... | BPF prog stack
171 	 *                        |     |
172 	 *                        |     |
173 	 * current A64_SP =>      +-----+
174 	 *                        |     |
175 	 *                        | ... | Function call stack
176 	 *                        |     |
177 	 *                        +-----+
178 	 *                          low
179 	 *
180 	 */
181 
182 	/* Save FP and LR registers to stay align with ARM64 AAPCS */
183 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
184 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
185 
186 	/* Save callee-saved register */
187 	emit(A64_PUSH(r6, r7, A64_SP), ctx);
188 	emit(A64_PUSH(r8, r9, A64_SP), ctx);
189 	if (ctx->tmp_used)
190 		emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
191 
192 	/* Save fp (x25) and x26. SP requires 16 bytes alignment */
193 	emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
194 
195 	/* Set up BPF prog stack base register (x25) */
196 	emit(A64_MOV(1, fp, A64_SP), ctx);
197 
198 	/* Set up function call stack */
199 	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
200 
201 	/* Clear registers A and X */
202 	emit_a64_mov_i64(ra, 0, ctx);
203 	emit_a64_mov_i64(rx, 0, ctx);
204 }
205 
206 static void build_epilogue(struct jit_ctx *ctx)
207 {
208 	const u8 r0 = bpf2a64[BPF_REG_0];
209 	const u8 r6 = bpf2a64[BPF_REG_6];
210 	const u8 r7 = bpf2a64[BPF_REG_7];
211 	const u8 r8 = bpf2a64[BPF_REG_8];
212 	const u8 r9 = bpf2a64[BPF_REG_9];
213 	const u8 fp = bpf2a64[BPF_REG_FP];
214 	const u8 tmp1 = bpf2a64[TMP_REG_1];
215 	const u8 tmp2 = bpf2a64[TMP_REG_2];
216 	int stack_size = MAX_BPF_STACK;
217 
218 	stack_size += 4; /* extra for skb_copy_bits buffer */
219 	stack_size = STACK_ALIGN(stack_size);
220 
221 	/* We're done with BPF stack */
222 	emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
223 
224 	/* Restore fs (x25) and x26 */
225 	emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
226 
227 	/* Restore callee-saved register */
228 	if (ctx->tmp_used)
229 		emit(A64_POP(tmp1, tmp2, A64_SP), ctx);
230 	emit(A64_POP(r8, r9, A64_SP), ctx);
231 	emit(A64_POP(r6, r7, A64_SP), ctx);
232 
233 	/* Restore FP/LR registers */
234 	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
235 
236 	/* Set return value */
237 	emit(A64_MOV(1, A64_R(0), r0), ctx);
238 
239 	emit(A64_RET(A64_LR), ctx);
240 }
241 
242 /* JITs an eBPF instruction.
243  * Returns:
244  * 0  - successfully JITed an 8-byte eBPF instruction.
245  * >0 - successfully JITed a 16-byte eBPF instruction.
246  * <0 - failed to JIT.
247  */
248 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
249 {
250 	const u8 code = insn->code;
251 	const u8 dst = bpf2a64[insn->dst_reg];
252 	const u8 src = bpf2a64[insn->src_reg];
253 	const u8 tmp = bpf2a64[TMP_REG_1];
254 	const u8 tmp2 = bpf2a64[TMP_REG_2];
255 	const s16 off = insn->off;
256 	const s32 imm = insn->imm;
257 	const int i = insn - ctx->prog->insnsi;
258 	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
259 	u8 jmp_cond;
260 	s32 jmp_offset;
261 
262 #define check_imm(bits, imm) do {				\
263 	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
264 	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
265 		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
266 			i, imm, imm);				\
267 		return -EINVAL;					\
268 	}							\
269 } while (0)
270 #define check_imm19(imm) check_imm(19, imm)
271 #define check_imm26(imm) check_imm(26, imm)
272 
273 	switch (code) {
274 	/* dst = src */
275 	case BPF_ALU | BPF_MOV | BPF_X:
276 	case BPF_ALU64 | BPF_MOV | BPF_X:
277 		emit(A64_MOV(is64, dst, src), ctx);
278 		break;
279 	/* dst = dst OP src */
280 	case BPF_ALU | BPF_ADD | BPF_X:
281 	case BPF_ALU64 | BPF_ADD | BPF_X:
282 		emit(A64_ADD(is64, dst, dst, src), ctx);
283 		break;
284 	case BPF_ALU | BPF_SUB | BPF_X:
285 	case BPF_ALU64 | BPF_SUB | BPF_X:
286 		emit(A64_SUB(is64, dst, dst, src), ctx);
287 		break;
288 	case BPF_ALU | BPF_AND | BPF_X:
289 	case BPF_ALU64 | BPF_AND | BPF_X:
290 		emit(A64_AND(is64, dst, dst, src), ctx);
291 		break;
292 	case BPF_ALU | BPF_OR | BPF_X:
293 	case BPF_ALU64 | BPF_OR | BPF_X:
294 		emit(A64_ORR(is64, dst, dst, src), ctx);
295 		break;
296 	case BPF_ALU | BPF_XOR | BPF_X:
297 	case BPF_ALU64 | BPF_XOR | BPF_X:
298 		emit(A64_EOR(is64, dst, dst, src), ctx);
299 		break;
300 	case BPF_ALU | BPF_MUL | BPF_X:
301 	case BPF_ALU64 | BPF_MUL | BPF_X:
302 		emit(A64_MUL(is64, dst, dst, src), ctx);
303 		break;
304 	case BPF_ALU | BPF_DIV | BPF_X:
305 	case BPF_ALU64 | BPF_DIV | BPF_X:
306 	case BPF_ALU | BPF_MOD | BPF_X:
307 	case BPF_ALU64 | BPF_MOD | BPF_X:
308 	{
309 		const u8 r0 = bpf2a64[BPF_REG_0];
310 
311 		/* if (src == 0) return 0 */
312 		jmp_offset = 3; /* skip ahead to else path */
313 		check_imm19(jmp_offset);
314 		emit(A64_CBNZ(is64, src, jmp_offset), ctx);
315 		emit(A64_MOVZ(1, r0, 0, 0), ctx);
316 		jmp_offset = epilogue_offset(ctx);
317 		check_imm26(jmp_offset);
318 		emit(A64_B(jmp_offset), ctx);
319 		/* else */
320 		switch (BPF_OP(code)) {
321 		case BPF_DIV:
322 			emit(A64_UDIV(is64, dst, dst, src), ctx);
323 			break;
324 		case BPF_MOD:
325 			ctx->tmp_used = 1;
326 			emit(A64_UDIV(is64, tmp, dst, src), ctx);
327 			emit(A64_MUL(is64, tmp, tmp, src), ctx);
328 			emit(A64_SUB(is64, dst, dst, tmp), ctx);
329 			break;
330 		}
331 		break;
332 	}
333 	case BPF_ALU | BPF_LSH | BPF_X:
334 	case BPF_ALU64 | BPF_LSH | BPF_X:
335 		emit(A64_LSLV(is64, dst, dst, src), ctx);
336 		break;
337 	case BPF_ALU | BPF_RSH | BPF_X:
338 	case BPF_ALU64 | BPF_RSH | BPF_X:
339 		emit(A64_LSRV(is64, dst, dst, src), ctx);
340 		break;
341 	case BPF_ALU | BPF_ARSH | BPF_X:
342 	case BPF_ALU64 | BPF_ARSH | BPF_X:
343 		emit(A64_ASRV(is64, dst, dst, src), ctx);
344 		break;
345 	/* dst = -dst */
346 	case BPF_ALU | BPF_NEG:
347 	case BPF_ALU64 | BPF_NEG:
348 		emit(A64_NEG(is64, dst, dst), ctx);
349 		break;
350 	/* dst = BSWAP##imm(dst) */
351 	case BPF_ALU | BPF_END | BPF_FROM_LE:
352 	case BPF_ALU | BPF_END | BPF_FROM_BE:
353 #ifdef CONFIG_CPU_BIG_ENDIAN
354 		if (BPF_SRC(code) == BPF_FROM_BE)
355 			goto emit_bswap_uxt;
356 #else /* !CONFIG_CPU_BIG_ENDIAN */
357 		if (BPF_SRC(code) == BPF_FROM_LE)
358 			goto emit_bswap_uxt;
359 #endif
360 		switch (imm) {
361 		case 16:
362 			emit(A64_REV16(is64, dst, dst), ctx);
363 			/* zero-extend 16 bits into 64 bits */
364 			emit(A64_UXTH(is64, dst, dst), ctx);
365 			break;
366 		case 32:
367 			emit(A64_REV32(is64, dst, dst), ctx);
368 			/* upper 32 bits already cleared */
369 			break;
370 		case 64:
371 			emit(A64_REV64(dst, dst), ctx);
372 			break;
373 		}
374 		break;
375 emit_bswap_uxt:
376 		switch (imm) {
377 		case 16:
378 			/* zero-extend 16 bits into 64 bits */
379 			emit(A64_UXTH(is64, dst, dst), ctx);
380 			break;
381 		case 32:
382 			/* zero-extend 32 bits into 64 bits */
383 			emit(A64_UXTW(is64, dst, dst), ctx);
384 			break;
385 		case 64:
386 			/* nop */
387 			break;
388 		}
389 		break;
390 	/* dst = imm */
391 	case BPF_ALU | BPF_MOV | BPF_K:
392 	case BPF_ALU64 | BPF_MOV | BPF_K:
393 		emit_a64_mov_i(is64, dst, imm, ctx);
394 		break;
395 	/* dst = dst OP imm */
396 	case BPF_ALU | BPF_ADD | BPF_K:
397 	case BPF_ALU64 | BPF_ADD | BPF_K:
398 		ctx->tmp_used = 1;
399 		emit_a64_mov_i(is64, tmp, imm, ctx);
400 		emit(A64_ADD(is64, dst, dst, tmp), ctx);
401 		break;
402 	case BPF_ALU | BPF_SUB | BPF_K:
403 	case BPF_ALU64 | BPF_SUB | BPF_K:
404 		ctx->tmp_used = 1;
405 		emit_a64_mov_i(is64, tmp, imm, ctx);
406 		emit(A64_SUB(is64, dst, dst, tmp), ctx);
407 		break;
408 	case BPF_ALU | BPF_AND | BPF_K:
409 	case BPF_ALU64 | BPF_AND | BPF_K:
410 		ctx->tmp_used = 1;
411 		emit_a64_mov_i(is64, tmp, imm, ctx);
412 		emit(A64_AND(is64, dst, dst, tmp), ctx);
413 		break;
414 	case BPF_ALU | BPF_OR | BPF_K:
415 	case BPF_ALU64 | BPF_OR | BPF_K:
416 		ctx->tmp_used = 1;
417 		emit_a64_mov_i(is64, tmp, imm, ctx);
418 		emit(A64_ORR(is64, dst, dst, tmp), ctx);
419 		break;
420 	case BPF_ALU | BPF_XOR | BPF_K:
421 	case BPF_ALU64 | BPF_XOR | BPF_K:
422 		ctx->tmp_used = 1;
423 		emit_a64_mov_i(is64, tmp, imm, ctx);
424 		emit(A64_EOR(is64, dst, dst, tmp), ctx);
425 		break;
426 	case BPF_ALU | BPF_MUL | BPF_K:
427 	case BPF_ALU64 | BPF_MUL | BPF_K:
428 		ctx->tmp_used = 1;
429 		emit_a64_mov_i(is64, tmp, imm, ctx);
430 		emit(A64_MUL(is64, dst, dst, tmp), ctx);
431 		break;
432 	case BPF_ALU | BPF_DIV | BPF_K:
433 	case BPF_ALU64 | BPF_DIV | BPF_K:
434 		ctx->tmp_used = 1;
435 		emit_a64_mov_i(is64, tmp, imm, ctx);
436 		emit(A64_UDIV(is64, dst, dst, tmp), ctx);
437 		break;
438 	case BPF_ALU | BPF_MOD | BPF_K:
439 	case BPF_ALU64 | BPF_MOD | BPF_K:
440 		ctx->tmp_used = 1;
441 		emit_a64_mov_i(is64, tmp2, imm, ctx);
442 		emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
443 		emit(A64_MUL(is64, tmp, tmp, tmp2), ctx);
444 		emit(A64_SUB(is64, dst, dst, tmp), ctx);
445 		break;
446 	case BPF_ALU | BPF_LSH | BPF_K:
447 	case BPF_ALU64 | BPF_LSH | BPF_K:
448 		emit(A64_LSL(is64, dst, dst, imm), ctx);
449 		break;
450 	case BPF_ALU | BPF_RSH | BPF_K:
451 	case BPF_ALU64 | BPF_RSH | BPF_K:
452 		emit(A64_LSR(is64, dst, dst, imm), ctx);
453 		break;
454 	case BPF_ALU | BPF_ARSH | BPF_K:
455 	case BPF_ALU64 | BPF_ARSH | BPF_K:
456 		emit(A64_ASR(is64, dst, dst, imm), ctx);
457 		break;
458 
459 	/* JUMP off */
460 	case BPF_JMP | BPF_JA:
461 		jmp_offset = bpf2a64_offset(i + off, i, ctx);
462 		check_imm26(jmp_offset);
463 		emit(A64_B(jmp_offset), ctx);
464 		break;
465 	/* IF (dst COND src) JUMP off */
466 	case BPF_JMP | BPF_JEQ | BPF_X:
467 	case BPF_JMP | BPF_JGT | BPF_X:
468 	case BPF_JMP | BPF_JGE | BPF_X:
469 	case BPF_JMP | BPF_JNE | BPF_X:
470 	case BPF_JMP | BPF_JSGT | BPF_X:
471 	case BPF_JMP | BPF_JSGE | BPF_X:
472 		emit(A64_CMP(1, dst, src), ctx);
473 emit_cond_jmp:
474 		jmp_offset = bpf2a64_offset(i + off, i, ctx);
475 		check_imm19(jmp_offset);
476 		switch (BPF_OP(code)) {
477 		case BPF_JEQ:
478 			jmp_cond = A64_COND_EQ;
479 			break;
480 		case BPF_JGT:
481 			jmp_cond = A64_COND_HI;
482 			break;
483 		case BPF_JGE:
484 			jmp_cond = A64_COND_CS;
485 			break;
486 		case BPF_JNE:
487 			jmp_cond = A64_COND_NE;
488 			break;
489 		case BPF_JSGT:
490 			jmp_cond = A64_COND_GT;
491 			break;
492 		case BPF_JSGE:
493 			jmp_cond = A64_COND_GE;
494 			break;
495 		default:
496 			return -EFAULT;
497 		}
498 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
499 		break;
500 	case BPF_JMP | BPF_JSET | BPF_X:
501 		emit(A64_TST(1, dst, src), ctx);
502 		goto emit_cond_jmp;
503 	/* IF (dst COND imm) JUMP off */
504 	case BPF_JMP | BPF_JEQ | BPF_K:
505 	case BPF_JMP | BPF_JGT | BPF_K:
506 	case BPF_JMP | BPF_JGE | BPF_K:
507 	case BPF_JMP | BPF_JNE | BPF_K:
508 	case BPF_JMP | BPF_JSGT | BPF_K:
509 	case BPF_JMP | BPF_JSGE | BPF_K:
510 		ctx->tmp_used = 1;
511 		emit_a64_mov_i(1, tmp, imm, ctx);
512 		emit(A64_CMP(1, dst, tmp), ctx);
513 		goto emit_cond_jmp;
514 	case BPF_JMP | BPF_JSET | BPF_K:
515 		ctx->tmp_used = 1;
516 		emit_a64_mov_i(1, tmp, imm, ctx);
517 		emit(A64_TST(1, dst, tmp), ctx);
518 		goto emit_cond_jmp;
519 	/* function call */
520 	case BPF_JMP | BPF_CALL:
521 	{
522 		const u8 r0 = bpf2a64[BPF_REG_0];
523 		const u64 func = (u64)__bpf_call_base + imm;
524 
525 		ctx->tmp_used = 1;
526 		emit_a64_mov_i64(tmp, func, ctx);
527 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
528 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
529 		emit(A64_BLR(tmp), ctx);
530 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
531 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
532 		break;
533 	}
534 	/* function return */
535 	case BPF_JMP | BPF_EXIT:
536 		/* Optimization: when last instruction is EXIT,
537 		   simply fallthrough to epilogue. */
538 		if (i == ctx->prog->len - 1)
539 			break;
540 		jmp_offset = epilogue_offset(ctx);
541 		check_imm26(jmp_offset);
542 		emit(A64_B(jmp_offset), ctx);
543 		break;
544 
545 	/* dst = imm64 */
546 	case BPF_LD | BPF_IMM | BPF_DW:
547 	{
548 		const struct bpf_insn insn1 = insn[1];
549 		u64 imm64;
550 
551 		if (insn1.code != 0 || insn1.src_reg != 0 ||
552 		    insn1.dst_reg != 0 || insn1.off != 0) {
553 			/* Note: verifier in BPF core must catch invalid
554 			 * instructions.
555 			 */
556 			pr_err_once("Invalid BPF_LD_IMM64 instruction\n");
557 			return -EINVAL;
558 		}
559 
560 		imm64 = (u64)insn1.imm << 32 | (u32)imm;
561 		emit_a64_mov_i64(dst, imm64, ctx);
562 
563 		return 1;
564 	}
565 
566 	/* LDX: dst = *(size *)(src + off) */
567 	case BPF_LDX | BPF_MEM | BPF_W:
568 	case BPF_LDX | BPF_MEM | BPF_H:
569 	case BPF_LDX | BPF_MEM | BPF_B:
570 	case BPF_LDX | BPF_MEM | BPF_DW:
571 		ctx->tmp_used = 1;
572 		emit_a64_mov_i(1, tmp, off, ctx);
573 		switch (BPF_SIZE(code)) {
574 		case BPF_W:
575 			emit(A64_LDR32(dst, src, tmp), ctx);
576 			break;
577 		case BPF_H:
578 			emit(A64_LDRH(dst, src, tmp), ctx);
579 			break;
580 		case BPF_B:
581 			emit(A64_LDRB(dst, src, tmp), ctx);
582 			break;
583 		case BPF_DW:
584 			emit(A64_LDR64(dst, src, tmp), ctx);
585 			break;
586 		}
587 		break;
588 
589 	/* ST: *(size *)(dst + off) = imm */
590 	case BPF_ST | BPF_MEM | BPF_W:
591 	case BPF_ST | BPF_MEM | BPF_H:
592 	case BPF_ST | BPF_MEM | BPF_B:
593 	case BPF_ST | BPF_MEM | BPF_DW:
594 		goto notyet;
595 
596 	/* STX: *(size *)(dst + off) = src */
597 	case BPF_STX | BPF_MEM | BPF_W:
598 	case BPF_STX | BPF_MEM | BPF_H:
599 	case BPF_STX | BPF_MEM | BPF_B:
600 	case BPF_STX | BPF_MEM | BPF_DW:
601 		ctx->tmp_used = 1;
602 		emit_a64_mov_i(1, tmp, off, ctx);
603 		switch (BPF_SIZE(code)) {
604 		case BPF_W:
605 			emit(A64_STR32(src, dst, tmp), ctx);
606 			break;
607 		case BPF_H:
608 			emit(A64_STRH(src, dst, tmp), ctx);
609 			break;
610 		case BPF_B:
611 			emit(A64_STRB(src, dst, tmp), ctx);
612 			break;
613 		case BPF_DW:
614 			emit(A64_STR64(src, dst, tmp), ctx);
615 			break;
616 		}
617 		break;
618 	/* STX XADD: lock *(u32 *)(dst + off) += src */
619 	case BPF_STX | BPF_XADD | BPF_W:
620 	/* STX XADD: lock *(u64 *)(dst + off) += src */
621 	case BPF_STX | BPF_XADD | BPF_DW:
622 		goto notyet;
623 
624 	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
625 	case BPF_LD | BPF_ABS | BPF_W:
626 	case BPF_LD | BPF_ABS | BPF_H:
627 	case BPF_LD | BPF_ABS | BPF_B:
628 	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
629 	case BPF_LD | BPF_IND | BPF_W:
630 	case BPF_LD | BPF_IND | BPF_H:
631 	case BPF_LD | BPF_IND | BPF_B:
632 	{
633 		const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
634 		const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
635 		const u8 fp = bpf2a64[BPF_REG_FP];
636 		const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
637 		const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
638 		const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
639 		const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
640 		const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
641 		int size;
642 
643 		emit(A64_MOV(1, r1, r6), ctx);
644 		emit_a64_mov_i(0, r2, imm, ctx);
645 		if (BPF_MODE(code) == BPF_IND)
646 			emit(A64_ADD(0, r2, r2, src), ctx);
647 		switch (BPF_SIZE(code)) {
648 		case BPF_W:
649 			size = 4;
650 			break;
651 		case BPF_H:
652 			size = 2;
653 			break;
654 		case BPF_B:
655 			size = 1;
656 			break;
657 		default:
658 			return -EINVAL;
659 		}
660 		emit_a64_mov_i64(r3, size, ctx);
661 		emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx);
662 		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
663 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
664 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
665 		emit(A64_BLR(r5), ctx);
666 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
667 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
668 
669 		jmp_offset = epilogue_offset(ctx);
670 		check_imm19(jmp_offset);
671 		emit(A64_CBZ(1, r0, jmp_offset), ctx);
672 		emit(A64_MOV(1, r5, r0), ctx);
673 		switch (BPF_SIZE(code)) {
674 		case BPF_W:
675 			emit(A64_LDR32(r0, r5, A64_ZR), ctx);
676 #ifndef CONFIG_CPU_BIG_ENDIAN
677 			emit(A64_REV32(0, r0, r0), ctx);
678 #endif
679 			break;
680 		case BPF_H:
681 			emit(A64_LDRH(r0, r5, A64_ZR), ctx);
682 #ifndef CONFIG_CPU_BIG_ENDIAN
683 			emit(A64_REV16(0, r0, r0), ctx);
684 #endif
685 			break;
686 		case BPF_B:
687 			emit(A64_LDRB(r0, r5, A64_ZR), ctx);
688 			break;
689 		}
690 		break;
691 	}
692 notyet:
693 		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
694 		return -EFAULT;
695 
696 	default:
697 		pr_err_once("unknown opcode %02x\n", code);
698 		return -EINVAL;
699 	}
700 
701 	return 0;
702 }
703 
704 static int build_body(struct jit_ctx *ctx)
705 {
706 	const struct bpf_prog *prog = ctx->prog;
707 	int i;
708 
709 	for (i = 0; i < prog->len; i++) {
710 		const struct bpf_insn *insn = &prog->insnsi[i];
711 		int ret;
712 
713 		ret = build_insn(insn, ctx);
714 
715 		if (ctx->image == NULL)
716 			ctx->offset[i] = ctx->idx;
717 
718 		if (ret > 0) {
719 			i++;
720 			continue;
721 		}
722 		if (ret)
723 			return ret;
724 	}
725 
726 	return 0;
727 }
728 
729 static inline void bpf_flush_icache(void *start, void *end)
730 {
731 	flush_icache_range((unsigned long)start, (unsigned long)end);
732 }
733 
734 void bpf_jit_compile(struct bpf_prog *prog)
735 {
736 	/* Nothing to do here. We support Internal BPF. */
737 }
738 
739 void bpf_int_jit_compile(struct bpf_prog *prog)
740 {
741 	struct bpf_binary_header *header;
742 	struct jit_ctx ctx;
743 	int image_size;
744 	u8 *image_ptr;
745 
746 	if (!bpf_jit_enable)
747 		return;
748 
749 	if (!prog || !prog->len)
750 		return;
751 
752 	memset(&ctx, 0, sizeof(ctx));
753 	ctx.prog = prog;
754 
755 	ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
756 	if (ctx.offset == NULL)
757 		return;
758 
759 	/* 1. Initial fake pass to compute ctx->idx. */
760 
761 	/* Fake pass to fill in ctx->offset and ctx->tmp_used. */
762 	if (build_body(&ctx))
763 		goto out;
764 
765 	build_prologue(&ctx);
766 
767 	ctx.epilogue_offset = ctx.idx;
768 	build_epilogue(&ctx);
769 
770 	/* Now we know the actual image size. */
771 	image_size = sizeof(u32) * ctx.idx;
772 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
773 				      sizeof(u32), jit_fill_hole);
774 	if (header == NULL)
775 		goto out;
776 
777 	/* 2. Now, the actual pass. */
778 
779 	ctx.image = (u32 *)image_ptr;
780 	ctx.idx = 0;
781 
782 	build_prologue(&ctx);
783 
784 	if (build_body(&ctx)) {
785 		bpf_jit_binary_free(header);
786 		goto out;
787 	}
788 
789 	build_epilogue(&ctx);
790 
791 	/* And we're done. */
792 	if (bpf_jit_enable > 1)
793 		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
794 
795 	bpf_flush_icache(header, ctx.image + ctx.idx);
796 
797 	set_memory_ro((unsigned long)header, header->pages);
798 	prog->bpf_func = (void *)ctx.image;
799 	prog->jited = 1;
800 out:
801 	kfree(ctx.offset);
802 }
803 
804 void bpf_jit_free(struct bpf_prog *prog)
805 {
806 	unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK;
807 	struct bpf_binary_header *header = (void *)addr;
808 
809 	if (!prog->jited)
810 		goto free_filter;
811 
812 	set_memory_rw(addr, header->pages);
813 	bpf_jit_binary_free(header);
814 
815 free_filter:
816 	bpf_prog_unlock_free(prog);
817 }
818