1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * BPF JIT compiler for ARM64
4 *
5 * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
6 */
7
8 #define pr_fmt(fmt) "bpf_jit: " fmt
9
10 #include <linux/bitfield.h>
11 #include <linux/bpf.h>
12 #include <linux/filter.h>
13 #include <linux/memory.h>
14 #include <linux/printk.h>
15 #include <linux/slab.h>
16
17 #include <asm/asm-extable.h>
18 #include <asm/byteorder.h>
19 #include <asm/cacheflush.h>
20 #include <asm/debug-monitors.h>
21 #include <asm/insn.h>
22 #include <asm/patching.h>
23 #include <asm/set_memory.h>
24
25 #include "bpf_jit.h"
26
27 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
28 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
29 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
30 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
31 #define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
32
33 #define check_imm(bits, imm) do { \
34 if ((((imm) > 0) && ((imm) >> (bits))) || \
35 (((imm) < 0) && (~(imm) >> (bits)))) { \
36 pr_info("[%2d] imm=%d(0x%x) out of range\n", \
37 i, imm, imm); \
38 return -EINVAL; \
39 } \
40 } while (0)
41 #define check_imm19(imm) check_imm(19, imm)
42 #define check_imm26(imm) check_imm(26, imm)
43
44 /* Map BPF registers to A64 registers */
45 static const int bpf2a64[] = {
46 /* return value from in-kernel function, and exit value from eBPF */
47 [BPF_REG_0] = A64_R(7),
48 /* arguments from eBPF program to in-kernel function */
49 [BPF_REG_1] = A64_R(0),
50 [BPF_REG_2] = A64_R(1),
51 [BPF_REG_3] = A64_R(2),
52 [BPF_REG_4] = A64_R(3),
53 [BPF_REG_5] = A64_R(4),
54 /* callee saved registers that in-kernel function will preserve */
55 [BPF_REG_6] = A64_R(19),
56 [BPF_REG_7] = A64_R(20),
57 [BPF_REG_8] = A64_R(21),
58 [BPF_REG_9] = A64_R(22),
59 /* read-only frame pointer to access stack */
60 [BPF_REG_FP] = A64_R(25),
61 /* temporary registers for BPF JIT */
62 [TMP_REG_1] = A64_R(10),
63 [TMP_REG_2] = A64_R(11),
64 [TMP_REG_3] = A64_R(12),
65 /* tail_call_cnt */
66 [TCALL_CNT] = A64_R(26),
67 /* temporary register for blinding constants */
68 [BPF_REG_AX] = A64_R(9),
69 [FP_BOTTOM] = A64_R(27),
70 };
71
72 struct jit_ctx {
73 const struct bpf_prog *prog;
74 int idx;
75 int epilogue_offset;
76 int *offset;
77 int exentry_idx;
78 __le32 *image;
79 u32 stack_size;
80 int fpb_offset;
81 };
82
83 struct bpf_plt {
84 u32 insn_ldr; /* load target */
85 u32 insn_br; /* branch to target */
86 u64 target; /* target value */
87 };
88
89 #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
90 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
91
emit(const u32 insn,struct jit_ctx * ctx)92 static inline void emit(const u32 insn, struct jit_ctx *ctx)
93 {
94 if (ctx->image != NULL)
95 ctx->image[ctx->idx] = cpu_to_le32(insn);
96
97 ctx->idx++;
98 }
99
emit_a64_mov_i(const int is64,const int reg,const s32 val,struct jit_ctx * ctx)100 static inline void emit_a64_mov_i(const int is64, const int reg,
101 const s32 val, struct jit_ctx *ctx)
102 {
103 u16 hi = val >> 16;
104 u16 lo = val & 0xffff;
105
106 if (hi & 0x8000) {
107 if (hi == 0xffff) {
108 emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
109 } else {
110 emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
111 if (lo != 0xffff)
112 emit(A64_MOVK(is64, reg, lo, 0), ctx);
113 }
114 } else {
115 emit(A64_MOVZ(is64, reg, lo, 0), ctx);
116 if (hi)
117 emit(A64_MOVK(is64, reg, hi, 16), ctx);
118 }
119 }
120
i64_i16_blocks(const u64 val,bool inverse)121 static int i64_i16_blocks(const u64 val, bool inverse)
122 {
123 return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
124 (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
125 (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
126 (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
127 }
128
emit_a64_mov_i64(const int reg,const u64 val,struct jit_ctx * ctx)129 static inline void emit_a64_mov_i64(const int reg, const u64 val,
130 struct jit_ctx *ctx)
131 {
132 u64 nrm_tmp = val, rev_tmp = ~val;
133 bool inverse;
134 int shift;
135
136 if (!(nrm_tmp >> 32))
137 return emit_a64_mov_i(0, reg, (u32)val, ctx);
138
139 inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
140 shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
141 (fls64(nrm_tmp) - 1)), 16), 0);
142 if (inverse)
143 emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
144 else
145 emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
146 shift -= 16;
147 while (shift >= 0) {
148 if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
149 emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
150 shift -= 16;
151 }
152 }
153
emit_bti(u32 insn,struct jit_ctx * ctx)154 static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
155 {
156 if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
157 emit(insn, ctx);
158 }
159
160 /*
161 * Kernel addresses in the vmalloc space use at most 48 bits, and the
162 * remaining bits are guaranteed to be 0x1. So we can compose the address
163 * with a fixed length movn/movk/movk sequence.
164 */
emit_addr_mov_i64(const int reg,const u64 val,struct jit_ctx * ctx)165 static inline void emit_addr_mov_i64(const int reg, const u64 val,
166 struct jit_ctx *ctx)
167 {
168 u64 tmp = val;
169 int shift = 0;
170
171 emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
172 while (shift < 32) {
173 tmp >>= 16;
174 shift += 16;
175 emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
176 }
177 }
178
emit_call(u64 target,struct jit_ctx * ctx)179 static inline void emit_call(u64 target, struct jit_ctx *ctx)
180 {
181 u8 tmp = bpf2a64[TMP_REG_1];
182
183 emit_addr_mov_i64(tmp, target, ctx);
184 emit(A64_BLR(tmp), ctx);
185 }
186
bpf2a64_offset(int bpf_insn,int off,const struct jit_ctx * ctx)187 static inline int bpf2a64_offset(int bpf_insn, int off,
188 const struct jit_ctx *ctx)
189 {
190 /* BPF JMP offset is relative to the next instruction */
191 bpf_insn++;
192 /*
193 * Whereas arm64 branch instructions encode the offset
194 * from the branch itself, so we must subtract 1 from the
195 * instruction offset.
196 */
197 return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
198 }
199
jit_fill_hole(void * area,unsigned int size)200 static void jit_fill_hole(void *area, unsigned int size)
201 {
202 __le32 *ptr;
203 /* We are guaranteed to have aligned memory. */
204 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
205 *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
206 }
207
epilogue_offset(const struct jit_ctx * ctx)208 static inline int epilogue_offset(const struct jit_ctx *ctx)
209 {
210 int to = ctx->epilogue_offset;
211 int from = ctx->idx;
212
213 return to - from;
214 }
215
is_addsub_imm(u32 imm)216 static bool is_addsub_imm(u32 imm)
217 {
218 /* Either imm12 or shifted imm12. */
219 return !(imm & ~0xfff) || !(imm & ~0xfff000);
220 }
221
222 /*
223 * There are 3 types of AArch64 LDR/STR (immediate) instruction:
224 * Post-index, Pre-index, Unsigned offset.
225 *
226 * For BPF ldr/str, the "unsigned offset" type is sufficient.
227 *
228 * "Unsigned offset" type LDR(immediate) format:
229 *
230 * 3 2 1 0
231 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
232 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
233 * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt |
234 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
235 * scale
236 *
237 * "Unsigned offset" type STR(immediate) format:
238 * 3 2 1 0
239 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
240 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
241 * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt |
242 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
243 * scale
244 *
245 * The offset is calculated from imm12 and scale in the following way:
246 *
247 * offset = (u64)imm12 << scale
248 */
is_lsi_offset(int offset,int scale)249 static bool is_lsi_offset(int offset, int scale)
250 {
251 if (offset < 0)
252 return false;
253
254 if (offset > (0xFFF << scale))
255 return false;
256
257 if (offset & ((1 << scale) - 1))
258 return false;
259
260 return true;
261 }
262
263 /* generated prologue:
264 * bti c // if CONFIG_ARM64_BTI_KERNEL
265 * mov x9, lr
266 * nop // POKE_OFFSET
267 * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
268 * stp x29, lr, [sp, #-16]!
269 * mov x29, sp
270 * stp x19, x20, [sp, #-16]!
271 * stp x21, x22, [sp, #-16]!
272 * stp x25, x26, [sp, #-16]!
273 * stp x27, x28, [sp, #-16]!
274 * mov x25, sp
275 * mov tcc, #0
276 * // PROLOGUE_OFFSET
277 */
278
279 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
280 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
281
282 /* Offset of nop instruction in bpf prog entry to be poked */
283 #define POKE_OFFSET (BTI_INSNS + 1)
284
285 /* Tail call offset to jump into */
286 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
287
build_prologue(struct jit_ctx * ctx,bool ebpf_from_cbpf)288 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
289 {
290 const struct bpf_prog *prog = ctx->prog;
291 const bool is_main_prog = prog->aux->func_idx == 0;
292 const u8 r6 = bpf2a64[BPF_REG_6];
293 const u8 r7 = bpf2a64[BPF_REG_7];
294 const u8 r8 = bpf2a64[BPF_REG_8];
295 const u8 r9 = bpf2a64[BPF_REG_9];
296 const u8 fp = bpf2a64[BPF_REG_FP];
297 const u8 tcc = bpf2a64[TCALL_CNT];
298 const u8 fpb = bpf2a64[FP_BOTTOM];
299 const int idx0 = ctx->idx;
300 int cur_offset;
301
302 /*
303 * BPF prog stack layout
304 *
305 * high
306 * original A64_SP => 0:+-----+ BPF prologue
307 * |FP/LR|
308 * current A64_FP => -16:+-----+
309 * | ... | callee saved registers
310 * BPF fp register => -64:+-----+ <= (BPF_FP)
311 * | |
312 * | ... | BPF prog stack
313 * | |
314 * +-----+ <= (BPF_FP - prog->aux->stack_depth)
315 * |RSVD | padding
316 * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
317 * | |
318 * | ... | Function call stack
319 * | |
320 * +-----+
321 * low
322 *
323 */
324
325 /* bpf function may be invoked by 3 instruction types:
326 * 1. bl, attached via freplace to bpf prog via short jump
327 * 2. br, attached via freplace to bpf prog via long jump
328 * 3. blr, working as a function pointer, used by emit_call.
329 * So BTI_JC should used here to support both br and blr.
330 */
331 emit_bti(A64_BTI_JC, ctx);
332
333 emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
334 emit(A64_NOP, ctx);
335
336 /* Sign lr */
337 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
338 emit(A64_PACIASP, ctx);
339
340 /* Save FP and LR registers to stay align with ARM64 AAPCS */
341 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
342 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
343
344 /* Save callee-saved registers */
345 emit(A64_PUSH(r6, r7, A64_SP), ctx);
346 emit(A64_PUSH(r8, r9, A64_SP), ctx);
347 emit(A64_PUSH(fp, tcc, A64_SP), ctx);
348 emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
349
350 /* Set up BPF prog stack base register */
351 emit(A64_MOV(1, fp, A64_SP), ctx);
352
353 if (!ebpf_from_cbpf && is_main_prog) {
354 /* Initialize tail_call_cnt */
355 emit(A64_MOVZ(1, tcc, 0, 0), ctx);
356
357 cur_offset = ctx->idx - idx0;
358 if (cur_offset != PROLOGUE_OFFSET) {
359 pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
360 cur_offset, PROLOGUE_OFFSET);
361 return -1;
362 }
363
364 /* BTI landing pad for the tail call, done with a BR */
365 emit_bti(A64_BTI_J, ctx);
366 }
367
368 emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
369
370 /* Stack must be multiples of 16B */
371 ctx->stack_size = round_up(prog->aux->stack_depth, 16);
372
373 /* Set up function call stack */
374 emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
375 return 0;
376 }
377
378 static int out_offset = -1; /* initialized on the first pass of build_body() */
emit_bpf_tail_call(struct jit_ctx * ctx)379 static int emit_bpf_tail_call(struct jit_ctx *ctx)
380 {
381 /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
382 const u8 r2 = bpf2a64[BPF_REG_2];
383 const u8 r3 = bpf2a64[BPF_REG_3];
384
385 const u8 tmp = bpf2a64[TMP_REG_1];
386 const u8 prg = bpf2a64[TMP_REG_2];
387 const u8 tcc = bpf2a64[TCALL_CNT];
388 const int idx0 = ctx->idx;
389 #define cur_offset (ctx->idx - idx0)
390 #define jmp_offset (out_offset - (cur_offset))
391 size_t off;
392
393 /* if (index >= array->map.max_entries)
394 * goto out;
395 */
396 off = offsetof(struct bpf_array, map.max_entries);
397 emit_a64_mov_i64(tmp, off, ctx);
398 emit(A64_LDR32(tmp, r2, tmp), ctx);
399 emit(A64_MOV(0, r3, r3), ctx);
400 emit(A64_CMP(0, r3, tmp), ctx);
401 emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
402
403 /*
404 * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
405 * goto out;
406 * tail_call_cnt++;
407 */
408 emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
409 emit(A64_CMP(1, tcc, tmp), ctx);
410 emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
411 emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
412
413 /* prog = array->ptrs[index];
414 * if (prog == NULL)
415 * goto out;
416 */
417 off = offsetof(struct bpf_array, ptrs);
418 emit_a64_mov_i64(tmp, off, ctx);
419 emit(A64_ADD(1, tmp, r2, tmp), ctx);
420 emit(A64_LSL(1, prg, r3, 3), ctx);
421 emit(A64_LDR64(prg, tmp, prg), ctx);
422 emit(A64_CBZ(1, prg, jmp_offset), ctx);
423
424 /* goto *(prog->bpf_func + prologue_offset); */
425 off = offsetof(struct bpf_prog, bpf_func);
426 emit_a64_mov_i64(tmp, off, ctx);
427 emit(A64_LDR64(tmp, prg, tmp), ctx);
428 emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
429 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
430 emit(A64_BR(tmp), ctx);
431
432 /* out: */
433 if (out_offset == -1)
434 out_offset = cur_offset;
435 if (cur_offset != out_offset) {
436 pr_err_once("tail_call out_offset = %d, expected %d!\n",
437 cur_offset, out_offset);
438 return -1;
439 }
440 return 0;
441 #undef cur_offset
442 #undef jmp_offset
443 }
444
445 #ifdef CONFIG_ARM64_LSE_ATOMICS
emit_lse_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)446 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
447 {
448 const u8 code = insn->code;
449 const u8 dst = bpf2a64[insn->dst_reg];
450 const u8 src = bpf2a64[insn->src_reg];
451 const u8 tmp = bpf2a64[TMP_REG_1];
452 const u8 tmp2 = bpf2a64[TMP_REG_2];
453 const bool isdw = BPF_SIZE(code) == BPF_DW;
454 const s16 off = insn->off;
455 u8 reg;
456
457 if (!off) {
458 reg = dst;
459 } else {
460 emit_a64_mov_i(1, tmp, off, ctx);
461 emit(A64_ADD(1, tmp, tmp, dst), ctx);
462 reg = tmp;
463 }
464
465 switch (insn->imm) {
466 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
467 case BPF_ADD:
468 emit(A64_STADD(isdw, reg, src), ctx);
469 break;
470 case BPF_AND:
471 emit(A64_MVN(isdw, tmp2, src), ctx);
472 emit(A64_STCLR(isdw, reg, tmp2), ctx);
473 break;
474 case BPF_OR:
475 emit(A64_STSET(isdw, reg, src), ctx);
476 break;
477 case BPF_XOR:
478 emit(A64_STEOR(isdw, reg, src), ctx);
479 break;
480 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
481 case BPF_ADD | BPF_FETCH:
482 emit(A64_LDADDAL(isdw, src, reg, src), ctx);
483 break;
484 case BPF_AND | BPF_FETCH:
485 emit(A64_MVN(isdw, tmp2, src), ctx);
486 emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
487 break;
488 case BPF_OR | BPF_FETCH:
489 emit(A64_LDSETAL(isdw, src, reg, src), ctx);
490 break;
491 case BPF_XOR | BPF_FETCH:
492 emit(A64_LDEORAL(isdw, src, reg, src), ctx);
493 break;
494 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
495 case BPF_XCHG:
496 emit(A64_SWPAL(isdw, src, reg, src), ctx);
497 break;
498 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
499 case BPF_CMPXCHG:
500 emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
501 break;
502 default:
503 pr_err_once("unknown atomic op code %02x\n", insn->imm);
504 return -EINVAL;
505 }
506
507 return 0;
508 }
509 #else
emit_lse_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)510 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
511 {
512 return -EINVAL;
513 }
514 #endif
515
emit_ll_sc_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)516 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
517 {
518 const u8 code = insn->code;
519 const u8 dst = bpf2a64[insn->dst_reg];
520 const u8 src = bpf2a64[insn->src_reg];
521 const u8 tmp = bpf2a64[TMP_REG_1];
522 const u8 tmp2 = bpf2a64[TMP_REG_2];
523 const u8 tmp3 = bpf2a64[TMP_REG_3];
524 const int i = insn - ctx->prog->insnsi;
525 const s32 imm = insn->imm;
526 const s16 off = insn->off;
527 const bool isdw = BPF_SIZE(code) == BPF_DW;
528 u8 reg;
529 s32 jmp_offset;
530
531 if (!off) {
532 reg = dst;
533 } else {
534 emit_a64_mov_i(1, tmp, off, ctx);
535 emit(A64_ADD(1, tmp, tmp, dst), ctx);
536 reg = tmp;
537 }
538
539 if (imm == BPF_ADD || imm == BPF_AND ||
540 imm == BPF_OR || imm == BPF_XOR) {
541 /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
542 emit(A64_LDXR(isdw, tmp2, reg), ctx);
543 if (imm == BPF_ADD)
544 emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
545 else if (imm == BPF_AND)
546 emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
547 else if (imm == BPF_OR)
548 emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
549 else
550 emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
551 emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
552 jmp_offset = -3;
553 check_imm19(jmp_offset);
554 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
555 } else if (imm == (BPF_ADD | BPF_FETCH) ||
556 imm == (BPF_AND | BPF_FETCH) ||
557 imm == (BPF_OR | BPF_FETCH) ||
558 imm == (BPF_XOR | BPF_FETCH)) {
559 /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
560 const u8 ax = bpf2a64[BPF_REG_AX];
561
562 emit(A64_MOV(isdw, ax, src), ctx);
563 emit(A64_LDXR(isdw, src, reg), ctx);
564 if (imm == (BPF_ADD | BPF_FETCH))
565 emit(A64_ADD(isdw, tmp2, src, ax), ctx);
566 else if (imm == (BPF_AND | BPF_FETCH))
567 emit(A64_AND(isdw, tmp2, src, ax), ctx);
568 else if (imm == (BPF_OR | BPF_FETCH))
569 emit(A64_ORR(isdw, tmp2, src, ax), ctx);
570 else
571 emit(A64_EOR(isdw, tmp2, src, ax), ctx);
572 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
573 jmp_offset = -3;
574 check_imm19(jmp_offset);
575 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
576 emit(A64_DMB_ISH, ctx);
577 } else if (imm == BPF_XCHG) {
578 /* src_reg = atomic_xchg(dst_reg + off, src_reg); */
579 emit(A64_MOV(isdw, tmp2, src), ctx);
580 emit(A64_LDXR(isdw, src, reg), ctx);
581 emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
582 jmp_offset = -2;
583 check_imm19(jmp_offset);
584 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
585 emit(A64_DMB_ISH, ctx);
586 } else if (imm == BPF_CMPXCHG) {
587 /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
588 const u8 r0 = bpf2a64[BPF_REG_0];
589
590 emit(A64_MOV(isdw, tmp2, r0), ctx);
591 emit(A64_LDXR(isdw, r0, reg), ctx);
592 emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
593 jmp_offset = 4;
594 check_imm19(jmp_offset);
595 emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
596 emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
597 jmp_offset = -4;
598 check_imm19(jmp_offset);
599 emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
600 emit(A64_DMB_ISH, ctx);
601 } else {
602 pr_err_once("unknown atomic op code %02x\n", imm);
603 return -EINVAL;
604 }
605
606 return 0;
607 }
608
609 void dummy_tramp(void);
610
611 asm (
612 " .pushsection .text, \"ax\", @progbits\n"
613 " .global dummy_tramp\n"
614 " .type dummy_tramp, %function\n"
615 "dummy_tramp:"
616 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
617 " bti j\n" /* dummy_tramp is called via "br x10" */
618 #endif
619 " mov x10, x30\n"
620 " mov x30, x9\n"
621 " ret x10\n"
622 " .size dummy_tramp, .-dummy_tramp\n"
623 " .popsection\n"
624 );
625
626 /* build a plt initialized like this:
627 *
628 * plt:
629 * ldr tmp, target
630 * br tmp
631 * target:
632 * .quad dummy_tramp
633 *
634 * when a long jump trampoline is attached, target is filled with the
635 * trampoline address, and when the trampoline is removed, target is
636 * restored to dummy_tramp address.
637 */
build_plt(struct jit_ctx * ctx)638 static void build_plt(struct jit_ctx *ctx)
639 {
640 const u8 tmp = bpf2a64[TMP_REG_1];
641 struct bpf_plt *plt = NULL;
642
643 /* make sure target is 64-bit aligned */
644 if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
645 emit(A64_NOP, ctx);
646
647 plt = (struct bpf_plt *)(ctx->image + ctx->idx);
648 /* plt is called via bl, no BTI needed here */
649 emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
650 emit(A64_BR(tmp), ctx);
651
652 if (ctx->image)
653 plt->target = (u64)&dummy_tramp;
654 }
655
build_epilogue(struct jit_ctx * ctx)656 static void build_epilogue(struct jit_ctx *ctx)
657 {
658 const u8 r0 = bpf2a64[BPF_REG_0];
659 const u8 r6 = bpf2a64[BPF_REG_6];
660 const u8 r7 = bpf2a64[BPF_REG_7];
661 const u8 r8 = bpf2a64[BPF_REG_8];
662 const u8 r9 = bpf2a64[BPF_REG_9];
663 const u8 fp = bpf2a64[BPF_REG_FP];
664 const u8 fpb = bpf2a64[FP_BOTTOM];
665
666 /* We're done with BPF stack */
667 emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
668
669 /* Restore x27 and x28 */
670 emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
671 /* Restore fs (x25) and x26 */
672 emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
673
674 /* Restore callee-saved register */
675 emit(A64_POP(r8, r9, A64_SP), ctx);
676 emit(A64_POP(r6, r7, A64_SP), ctx);
677
678 /* Restore FP/LR registers */
679 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
680
681 /* Set return value */
682 emit(A64_MOV(1, A64_R(0), r0), ctx);
683
684 /* Authenticate lr */
685 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
686 emit(A64_AUTIASP, ctx);
687
688 emit(A64_RET(A64_LR), ctx);
689 }
690
691 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
692 #define BPF_FIXUP_REG_MASK GENMASK(31, 27)
693
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)694 bool ex_handler_bpf(const struct exception_table_entry *ex,
695 struct pt_regs *regs)
696 {
697 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
698 int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
699
700 regs->regs[dst_reg] = 0;
701 regs->pc = (unsigned long)&ex->fixup - offset;
702 return true;
703 }
704
705 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)706 static int add_exception_handler(const struct bpf_insn *insn,
707 struct jit_ctx *ctx,
708 int dst_reg)
709 {
710 off_t offset;
711 unsigned long pc;
712 struct exception_table_entry *ex;
713
714 if (!ctx->image)
715 /* First pass */
716 return 0;
717
718 if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
719 BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
720 return 0;
721
722 if (!ctx->prog->aux->extable ||
723 WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
724 return -EINVAL;
725
726 ex = &ctx->prog->aux->extable[ctx->exentry_idx];
727 pc = (unsigned long)&ctx->image[ctx->idx - 1];
728
729 offset = pc - (long)&ex->insn;
730 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
731 return -ERANGE;
732 ex->insn = offset;
733
734 /*
735 * Since the extable follows the program, the fixup offset is always
736 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
737 * to keep things simple, and put the destination register in the upper
738 * bits. We don't need to worry about buildtime or runtime sort
739 * modifying the upper bits because the table is already sorted, and
740 * isn't part of the main exception table.
741 */
742 offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
743 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
744 return -ERANGE;
745
746 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
747 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
748
749 ex->type = EX_TYPE_BPF;
750
751 ctx->exentry_idx++;
752 return 0;
753 }
754
755 /* JITs an eBPF instruction.
756 * Returns:
757 * 0 - successfully JITed an 8-byte eBPF instruction.
758 * >0 - successfully JITed a 16-byte eBPF instruction.
759 * <0 - failed to JIT.
760 */
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)761 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
762 bool extra_pass)
763 {
764 const u8 code = insn->code;
765 const u8 dst = bpf2a64[insn->dst_reg];
766 const u8 src = bpf2a64[insn->src_reg];
767 const u8 tmp = bpf2a64[TMP_REG_1];
768 const u8 tmp2 = bpf2a64[TMP_REG_2];
769 const u8 fp = bpf2a64[BPF_REG_FP];
770 const u8 fpb = bpf2a64[FP_BOTTOM];
771 const s16 off = insn->off;
772 const s32 imm = insn->imm;
773 const int i = insn - ctx->prog->insnsi;
774 const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
775 BPF_CLASS(code) == BPF_JMP;
776 u8 jmp_cond;
777 s32 jmp_offset;
778 u32 a64_insn;
779 u8 src_adj;
780 u8 dst_adj;
781 int off_adj;
782 int ret;
783 bool sign_extend;
784
785 switch (code) {
786 /* dst = src */
787 case BPF_ALU | BPF_MOV | BPF_X:
788 case BPF_ALU64 | BPF_MOV | BPF_X:
789 switch (insn->off) {
790 case 0:
791 emit(A64_MOV(is64, dst, src), ctx);
792 break;
793 case 8:
794 emit(A64_SXTB(is64, dst, src), ctx);
795 break;
796 case 16:
797 emit(A64_SXTH(is64, dst, src), ctx);
798 break;
799 case 32:
800 emit(A64_SXTW(is64, dst, src), ctx);
801 break;
802 }
803 break;
804 /* dst = dst OP src */
805 case BPF_ALU | BPF_ADD | BPF_X:
806 case BPF_ALU64 | BPF_ADD | BPF_X:
807 emit(A64_ADD(is64, dst, dst, src), ctx);
808 break;
809 case BPF_ALU | BPF_SUB | BPF_X:
810 case BPF_ALU64 | BPF_SUB | BPF_X:
811 emit(A64_SUB(is64, dst, dst, src), ctx);
812 break;
813 case BPF_ALU | BPF_AND | BPF_X:
814 case BPF_ALU64 | BPF_AND | BPF_X:
815 emit(A64_AND(is64, dst, dst, src), ctx);
816 break;
817 case BPF_ALU | BPF_OR | BPF_X:
818 case BPF_ALU64 | BPF_OR | BPF_X:
819 emit(A64_ORR(is64, dst, dst, src), ctx);
820 break;
821 case BPF_ALU | BPF_XOR | BPF_X:
822 case BPF_ALU64 | BPF_XOR | BPF_X:
823 emit(A64_EOR(is64, dst, dst, src), ctx);
824 break;
825 case BPF_ALU | BPF_MUL | BPF_X:
826 case BPF_ALU64 | BPF_MUL | BPF_X:
827 emit(A64_MUL(is64, dst, dst, src), ctx);
828 break;
829 case BPF_ALU | BPF_DIV | BPF_X:
830 case BPF_ALU64 | BPF_DIV | BPF_X:
831 if (!off)
832 emit(A64_UDIV(is64, dst, dst, src), ctx);
833 else
834 emit(A64_SDIV(is64, dst, dst, src), ctx);
835 break;
836 case BPF_ALU | BPF_MOD | BPF_X:
837 case BPF_ALU64 | BPF_MOD | BPF_X:
838 if (!off)
839 emit(A64_UDIV(is64, tmp, dst, src), ctx);
840 else
841 emit(A64_SDIV(is64, tmp, dst, src), ctx);
842 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
843 break;
844 case BPF_ALU | BPF_LSH | BPF_X:
845 case BPF_ALU64 | BPF_LSH | BPF_X:
846 emit(A64_LSLV(is64, dst, dst, src), ctx);
847 break;
848 case BPF_ALU | BPF_RSH | BPF_X:
849 case BPF_ALU64 | BPF_RSH | BPF_X:
850 emit(A64_LSRV(is64, dst, dst, src), ctx);
851 break;
852 case BPF_ALU | BPF_ARSH | BPF_X:
853 case BPF_ALU64 | BPF_ARSH | BPF_X:
854 emit(A64_ASRV(is64, dst, dst, src), ctx);
855 break;
856 /* dst = -dst */
857 case BPF_ALU | BPF_NEG:
858 case BPF_ALU64 | BPF_NEG:
859 emit(A64_NEG(is64, dst, dst), ctx);
860 break;
861 /* dst = BSWAP##imm(dst) */
862 case BPF_ALU | BPF_END | BPF_FROM_LE:
863 case BPF_ALU | BPF_END | BPF_FROM_BE:
864 case BPF_ALU64 | BPF_END | BPF_FROM_LE:
865 #ifdef CONFIG_CPU_BIG_ENDIAN
866 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE)
867 goto emit_bswap_uxt;
868 #else /* !CONFIG_CPU_BIG_ENDIAN */
869 if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE)
870 goto emit_bswap_uxt;
871 #endif
872 switch (imm) {
873 case 16:
874 emit(A64_REV16(is64, dst, dst), ctx);
875 /* zero-extend 16 bits into 64 bits */
876 emit(A64_UXTH(is64, dst, dst), ctx);
877 break;
878 case 32:
879 emit(A64_REV32(0, dst, dst), ctx);
880 /* upper 32 bits already cleared */
881 break;
882 case 64:
883 emit(A64_REV64(dst, dst), ctx);
884 break;
885 }
886 break;
887 emit_bswap_uxt:
888 switch (imm) {
889 case 16:
890 /* zero-extend 16 bits into 64 bits */
891 emit(A64_UXTH(is64, dst, dst), ctx);
892 break;
893 case 32:
894 /* zero-extend 32 bits into 64 bits */
895 emit(A64_UXTW(is64, dst, dst), ctx);
896 break;
897 case 64:
898 /* nop */
899 break;
900 }
901 break;
902 /* dst = imm */
903 case BPF_ALU | BPF_MOV | BPF_K:
904 case BPF_ALU64 | BPF_MOV | BPF_K:
905 emit_a64_mov_i(is64, dst, imm, ctx);
906 break;
907 /* dst = dst OP imm */
908 case BPF_ALU | BPF_ADD | BPF_K:
909 case BPF_ALU64 | BPF_ADD | BPF_K:
910 if (is_addsub_imm(imm)) {
911 emit(A64_ADD_I(is64, dst, dst, imm), ctx);
912 } else if (is_addsub_imm(-imm)) {
913 emit(A64_SUB_I(is64, dst, dst, -imm), ctx);
914 } else {
915 emit_a64_mov_i(is64, tmp, imm, ctx);
916 emit(A64_ADD(is64, dst, dst, tmp), ctx);
917 }
918 break;
919 case BPF_ALU | BPF_SUB | BPF_K:
920 case BPF_ALU64 | BPF_SUB | BPF_K:
921 if (is_addsub_imm(imm)) {
922 emit(A64_SUB_I(is64, dst, dst, imm), ctx);
923 } else if (is_addsub_imm(-imm)) {
924 emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
925 } else {
926 emit_a64_mov_i(is64, tmp, imm, ctx);
927 emit(A64_SUB(is64, dst, dst, tmp), ctx);
928 }
929 break;
930 case BPF_ALU | BPF_AND | BPF_K:
931 case BPF_ALU64 | BPF_AND | BPF_K:
932 a64_insn = A64_AND_I(is64, dst, dst, imm);
933 if (a64_insn != AARCH64_BREAK_FAULT) {
934 emit(a64_insn, ctx);
935 } else {
936 emit_a64_mov_i(is64, tmp, imm, ctx);
937 emit(A64_AND(is64, dst, dst, tmp), ctx);
938 }
939 break;
940 case BPF_ALU | BPF_OR | BPF_K:
941 case BPF_ALU64 | BPF_OR | BPF_K:
942 a64_insn = A64_ORR_I(is64, dst, dst, imm);
943 if (a64_insn != AARCH64_BREAK_FAULT) {
944 emit(a64_insn, ctx);
945 } else {
946 emit_a64_mov_i(is64, tmp, imm, ctx);
947 emit(A64_ORR(is64, dst, dst, tmp), ctx);
948 }
949 break;
950 case BPF_ALU | BPF_XOR | BPF_K:
951 case BPF_ALU64 | BPF_XOR | BPF_K:
952 a64_insn = A64_EOR_I(is64, dst, dst, imm);
953 if (a64_insn != AARCH64_BREAK_FAULT) {
954 emit(a64_insn, ctx);
955 } else {
956 emit_a64_mov_i(is64, tmp, imm, ctx);
957 emit(A64_EOR(is64, dst, dst, tmp), ctx);
958 }
959 break;
960 case BPF_ALU | BPF_MUL | BPF_K:
961 case BPF_ALU64 | BPF_MUL | BPF_K:
962 emit_a64_mov_i(is64, tmp, imm, ctx);
963 emit(A64_MUL(is64, dst, dst, tmp), ctx);
964 break;
965 case BPF_ALU | BPF_DIV | BPF_K:
966 case BPF_ALU64 | BPF_DIV | BPF_K:
967 emit_a64_mov_i(is64, tmp, imm, ctx);
968 if (!off)
969 emit(A64_UDIV(is64, dst, dst, tmp), ctx);
970 else
971 emit(A64_SDIV(is64, dst, dst, tmp), ctx);
972 break;
973 case BPF_ALU | BPF_MOD | BPF_K:
974 case BPF_ALU64 | BPF_MOD | BPF_K:
975 emit_a64_mov_i(is64, tmp2, imm, ctx);
976 if (!off)
977 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
978 else
979 emit(A64_SDIV(is64, tmp, dst, tmp2), ctx);
980 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
981 break;
982 case BPF_ALU | BPF_LSH | BPF_K:
983 case BPF_ALU64 | BPF_LSH | BPF_K:
984 emit(A64_LSL(is64, dst, dst, imm), ctx);
985 break;
986 case BPF_ALU | BPF_RSH | BPF_K:
987 case BPF_ALU64 | BPF_RSH | BPF_K:
988 emit(A64_LSR(is64, dst, dst, imm), ctx);
989 break;
990 case BPF_ALU | BPF_ARSH | BPF_K:
991 case BPF_ALU64 | BPF_ARSH | BPF_K:
992 emit(A64_ASR(is64, dst, dst, imm), ctx);
993 break;
994
995 /* JUMP off */
996 case BPF_JMP | BPF_JA:
997 case BPF_JMP32 | BPF_JA:
998 if (BPF_CLASS(code) == BPF_JMP)
999 jmp_offset = bpf2a64_offset(i, off, ctx);
1000 else
1001 jmp_offset = bpf2a64_offset(i, imm, ctx);
1002 check_imm26(jmp_offset);
1003 emit(A64_B(jmp_offset), ctx);
1004 break;
1005 /* IF (dst COND src) JUMP off */
1006 case BPF_JMP | BPF_JEQ | BPF_X:
1007 case BPF_JMP | BPF_JGT | BPF_X:
1008 case BPF_JMP | BPF_JLT | BPF_X:
1009 case BPF_JMP | BPF_JGE | BPF_X:
1010 case BPF_JMP | BPF_JLE | BPF_X:
1011 case BPF_JMP | BPF_JNE | BPF_X:
1012 case BPF_JMP | BPF_JSGT | BPF_X:
1013 case BPF_JMP | BPF_JSLT | BPF_X:
1014 case BPF_JMP | BPF_JSGE | BPF_X:
1015 case BPF_JMP | BPF_JSLE | BPF_X:
1016 case BPF_JMP32 | BPF_JEQ | BPF_X:
1017 case BPF_JMP32 | BPF_JGT | BPF_X:
1018 case BPF_JMP32 | BPF_JLT | BPF_X:
1019 case BPF_JMP32 | BPF_JGE | BPF_X:
1020 case BPF_JMP32 | BPF_JLE | BPF_X:
1021 case BPF_JMP32 | BPF_JNE | BPF_X:
1022 case BPF_JMP32 | BPF_JSGT | BPF_X:
1023 case BPF_JMP32 | BPF_JSLT | BPF_X:
1024 case BPF_JMP32 | BPF_JSGE | BPF_X:
1025 case BPF_JMP32 | BPF_JSLE | BPF_X:
1026 emit(A64_CMP(is64, dst, src), ctx);
1027 emit_cond_jmp:
1028 jmp_offset = bpf2a64_offset(i, off, ctx);
1029 check_imm19(jmp_offset);
1030 switch (BPF_OP(code)) {
1031 case BPF_JEQ:
1032 jmp_cond = A64_COND_EQ;
1033 break;
1034 case BPF_JGT:
1035 jmp_cond = A64_COND_HI;
1036 break;
1037 case BPF_JLT:
1038 jmp_cond = A64_COND_CC;
1039 break;
1040 case BPF_JGE:
1041 jmp_cond = A64_COND_CS;
1042 break;
1043 case BPF_JLE:
1044 jmp_cond = A64_COND_LS;
1045 break;
1046 case BPF_JSET:
1047 case BPF_JNE:
1048 jmp_cond = A64_COND_NE;
1049 break;
1050 case BPF_JSGT:
1051 jmp_cond = A64_COND_GT;
1052 break;
1053 case BPF_JSLT:
1054 jmp_cond = A64_COND_LT;
1055 break;
1056 case BPF_JSGE:
1057 jmp_cond = A64_COND_GE;
1058 break;
1059 case BPF_JSLE:
1060 jmp_cond = A64_COND_LE;
1061 break;
1062 default:
1063 return -EFAULT;
1064 }
1065 emit(A64_B_(jmp_cond, jmp_offset), ctx);
1066 break;
1067 case BPF_JMP | BPF_JSET | BPF_X:
1068 case BPF_JMP32 | BPF_JSET | BPF_X:
1069 emit(A64_TST(is64, dst, src), ctx);
1070 goto emit_cond_jmp;
1071 /* IF (dst COND imm) JUMP off */
1072 case BPF_JMP | BPF_JEQ | BPF_K:
1073 case BPF_JMP | BPF_JGT | BPF_K:
1074 case BPF_JMP | BPF_JLT | BPF_K:
1075 case BPF_JMP | BPF_JGE | BPF_K:
1076 case BPF_JMP | BPF_JLE | BPF_K:
1077 case BPF_JMP | BPF_JNE | BPF_K:
1078 case BPF_JMP | BPF_JSGT | BPF_K:
1079 case BPF_JMP | BPF_JSLT | BPF_K:
1080 case BPF_JMP | BPF_JSGE | BPF_K:
1081 case BPF_JMP | BPF_JSLE | BPF_K:
1082 case BPF_JMP32 | BPF_JEQ | BPF_K:
1083 case BPF_JMP32 | BPF_JGT | BPF_K:
1084 case BPF_JMP32 | BPF_JLT | BPF_K:
1085 case BPF_JMP32 | BPF_JGE | BPF_K:
1086 case BPF_JMP32 | BPF_JLE | BPF_K:
1087 case BPF_JMP32 | BPF_JNE | BPF_K:
1088 case BPF_JMP32 | BPF_JSGT | BPF_K:
1089 case BPF_JMP32 | BPF_JSLT | BPF_K:
1090 case BPF_JMP32 | BPF_JSGE | BPF_K:
1091 case BPF_JMP32 | BPF_JSLE | BPF_K:
1092 if (is_addsub_imm(imm)) {
1093 emit(A64_CMP_I(is64, dst, imm), ctx);
1094 } else if (is_addsub_imm(-imm)) {
1095 emit(A64_CMN_I(is64, dst, -imm), ctx);
1096 } else {
1097 emit_a64_mov_i(is64, tmp, imm, ctx);
1098 emit(A64_CMP(is64, dst, tmp), ctx);
1099 }
1100 goto emit_cond_jmp;
1101 case BPF_JMP | BPF_JSET | BPF_K:
1102 case BPF_JMP32 | BPF_JSET | BPF_K:
1103 a64_insn = A64_TST_I(is64, dst, imm);
1104 if (a64_insn != AARCH64_BREAK_FAULT) {
1105 emit(a64_insn, ctx);
1106 } else {
1107 emit_a64_mov_i(is64, tmp, imm, ctx);
1108 emit(A64_TST(is64, dst, tmp), ctx);
1109 }
1110 goto emit_cond_jmp;
1111 /* function call */
1112 case BPF_JMP | BPF_CALL:
1113 {
1114 const u8 r0 = bpf2a64[BPF_REG_0];
1115 bool func_addr_fixed;
1116 u64 func_addr;
1117
1118 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1119 &func_addr, &func_addr_fixed);
1120 if (ret < 0)
1121 return ret;
1122 emit_call(func_addr, ctx);
1123 emit(A64_MOV(1, r0, A64_R(0)), ctx);
1124 break;
1125 }
1126 /* tail call */
1127 case BPF_JMP | BPF_TAIL_CALL:
1128 if (emit_bpf_tail_call(ctx))
1129 return -EFAULT;
1130 break;
1131 /* function return */
1132 case BPF_JMP | BPF_EXIT:
1133 /* Optimization: when last instruction is EXIT,
1134 simply fallthrough to epilogue. */
1135 if (i == ctx->prog->len - 1)
1136 break;
1137 jmp_offset = epilogue_offset(ctx);
1138 check_imm26(jmp_offset);
1139 emit(A64_B(jmp_offset), ctx);
1140 break;
1141
1142 /* dst = imm64 */
1143 case BPF_LD | BPF_IMM | BPF_DW:
1144 {
1145 const struct bpf_insn insn1 = insn[1];
1146 u64 imm64;
1147
1148 imm64 = (u64)insn1.imm << 32 | (u32)imm;
1149 if (bpf_pseudo_func(insn))
1150 emit_addr_mov_i64(dst, imm64, ctx);
1151 else
1152 emit_a64_mov_i64(dst, imm64, ctx);
1153
1154 return 1;
1155 }
1156
1157 /* LDX: dst = (u64)*(unsigned size *)(src + off) */
1158 case BPF_LDX | BPF_MEM | BPF_W:
1159 case BPF_LDX | BPF_MEM | BPF_H:
1160 case BPF_LDX | BPF_MEM | BPF_B:
1161 case BPF_LDX | BPF_MEM | BPF_DW:
1162 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1163 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1164 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1165 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1166 /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */
1167 case BPF_LDX | BPF_MEMSX | BPF_B:
1168 case BPF_LDX | BPF_MEMSX | BPF_H:
1169 case BPF_LDX | BPF_MEMSX | BPF_W:
1170 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1171 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1172 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1173 if (ctx->fpb_offset > 0 && src == fp) {
1174 src_adj = fpb;
1175 off_adj = off + ctx->fpb_offset;
1176 } else {
1177 src_adj = src;
1178 off_adj = off;
1179 }
1180 sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
1181 BPF_MODE(insn->code) == BPF_PROBE_MEMSX);
1182 switch (BPF_SIZE(code)) {
1183 case BPF_W:
1184 if (is_lsi_offset(off_adj, 2)) {
1185 if (sign_extend)
1186 emit(A64_LDRSWI(dst, src_adj, off_adj), ctx);
1187 else
1188 emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
1189 } else {
1190 emit_a64_mov_i(1, tmp, off, ctx);
1191 if (sign_extend)
1192 emit(A64_LDRSW(dst, src, tmp), ctx);
1193 else
1194 emit(A64_LDR32(dst, src, tmp), ctx);
1195 }
1196 break;
1197 case BPF_H:
1198 if (is_lsi_offset(off_adj, 1)) {
1199 if (sign_extend)
1200 emit(A64_LDRSHI(dst, src_adj, off_adj), ctx);
1201 else
1202 emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
1203 } else {
1204 emit_a64_mov_i(1, tmp, off, ctx);
1205 if (sign_extend)
1206 emit(A64_LDRSH(dst, src, tmp), ctx);
1207 else
1208 emit(A64_LDRH(dst, src, tmp), ctx);
1209 }
1210 break;
1211 case BPF_B:
1212 if (is_lsi_offset(off_adj, 0)) {
1213 if (sign_extend)
1214 emit(A64_LDRSBI(dst, src_adj, off_adj), ctx);
1215 else
1216 emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
1217 } else {
1218 emit_a64_mov_i(1, tmp, off, ctx);
1219 if (sign_extend)
1220 emit(A64_LDRSB(dst, src, tmp), ctx);
1221 else
1222 emit(A64_LDRB(dst, src, tmp), ctx);
1223 }
1224 break;
1225 case BPF_DW:
1226 if (is_lsi_offset(off_adj, 3)) {
1227 emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
1228 } else {
1229 emit_a64_mov_i(1, tmp, off, ctx);
1230 emit(A64_LDR64(dst, src, tmp), ctx);
1231 }
1232 break;
1233 }
1234
1235 ret = add_exception_handler(insn, ctx, dst);
1236 if (ret)
1237 return ret;
1238 break;
1239
1240 /* speculation barrier */
1241 case BPF_ST | BPF_NOSPEC:
1242 /*
1243 * Nothing required here.
1244 *
1245 * In case of arm64, we rely on the firmware mitigation of
1246 * Speculative Store Bypass as controlled via the ssbd kernel
1247 * parameter. Whenever the mitigation is enabled, it works
1248 * for all of the kernel code with no need to provide any
1249 * additional instructions.
1250 */
1251 break;
1252
1253 /* ST: *(size *)(dst + off) = imm */
1254 case BPF_ST | BPF_MEM | BPF_W:
1255 case BPF_ST | BPF_MEM | BPF_H:
1256 case BPF_ST | BPF_MEM | BPF_B:
1257 case BPF_ST | BPF_MEM | BPF_DW:
1258 if (ctx->fpb_offset > 0 && dst == fp) {
1259 dst_adj = fpb;
1260 off_adj = off + ctx->fpb_offset;
1261 } else {
1262 dst_adj = dst;
1263 off_adj = off;
1264 }
1265 /* Load imm to a register then store it */
1266 emit_a64_mov_i(1, tmp, imm, ctx);
1267 switch (BPF_SIZE(code)) {
1268 case BPF_W:
1269 if (is_lsi_offset(off_adj, 2)) {
1270 emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
1271 } else {
1272 emit_a64_mov_i(1, tmp2, off, ctx);
1273 emit(A64_STR32(tmp, dst, tmp2), ctx);
1274 }
1275 break;
1276 case BPF_H:
1277 if (is_lsi_offset(off_adj, 1)) {
1278 emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
1279 } else {
1280 emit_a64_mov_i(1, tmp2, off, ctx);
1281 emit(A64_STRH(tmp, dst, tmp2), ctx);
1282 }
1283 break;
1284 case BPF_B:
1285 if (is_lsi_offset(off_adj, 0)) {
1286 emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
1287 } else {
1288 emit_a64_mov_i(1, tmp2, off, ctx);
1289 emit(A64_STRB(tmp, dst, tmp2), ctx);
1290 }
1291 break;
1292 case BPF_DW:
1293 if (is_lsi_offset(off_adj, 3)) {
1294 emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
1295 } else {
1296 emit_a64_mov_i(1, tmp2, off, ctx);
1297 emit(A64_STR64(tmp, dst, tmp2), ctx);
1298 }
1299 break;
1300 }
1301 break;
1302
1303 /* STX: *(size *)(dst + off) = src */
1304 case BPF_STX | BPF_MEM | BPF_W:
1305 case BPF_STX | BPF_MEM | BPF_H:
1306 case BPF_STX | BPF_MEM | BPF_B:
1307 case BPF_STX | BPF_MEM | BPF_DW:
1308 if (ctx->fpb_offset > 0 && dst == fp) {
1309 dst_adj = fpb;
1310 off_adj = off + ctx->fpb_offset;
1311 } else {
1312 dst_adj = dst;
1313 off_adj = off;
1314 }
1315 switch (BPF_SIZE(code)) {
1316 case BPF_W:
1317 if (is_lsi_offset(off_adj, 2)) {
1318 emit(A64_STR32I(src, dst_adj, off_adj), ctx);
1319 } else {
1320 emit_a64_mov_i(1, tmp, off, ctx);
1321 emit(A64_STR32(src, dst, tmp), ctx);
1322 }
1323 break;
1324 case BPF_H:
1325 if (is_lsi_offset(off_adj, 1)) {
1326 emit(A64_STRHI(src, dst_adj, off_adj), ctx);
1327 } else {
1328 emit_a64_mov_i(1, tmp, off, ctx);
1329 emit(A64_STRH(src, dst, tmp), ctx);
1330 }
1331 break;
1332 case BPF_B:
1333 if (is_lsi_offset(off_adj, 0)) {
1334 emit(A64_STRBI(src, dst_adj, off_adj), ctx);
1335 } else {
1336 emit_a64_mov_i(1, tmp, off, ctx);
1337 emit(A64_STRB(src, dst, tmp), ctx);
1338 }
1339 break;
1340 case BPF_DW:
1341 if (is_lsi_offset(off_adj, 3)) {
1342 emit(A64_STR64I(src, dst_adj, off_adj), ctx);
1343 } else {
1344 emit_a64_mov_i(1, tmp, off, ctx);
1345 emit(A64_STR64(src, dst, tmp), ctx);
1346 }
1347 break;
1348 }
1349 break;
1350
1351 case BPF_STX | BPF_ATOMIC | BPF_W:
1352 case BPF_STX | BPF_ATOMIC | BPF_DW:
1353 if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1354 ret = emit_lse_atomic(insn, ctx);
1355 else
1356 ret = emit_ll_sc_atomic(insn, ctx);
1357 if (ret)
1358 return ret;
1359 break;
1360
1361 default:
1362 pr_err_once("unknown opcode %02x\n", code);
1363 return -EINVAL;
1364 }
1365
1366 return 0;
1367 }
1368
1369 /*
1370 * Return 0 if FP may change at runtime, otherwise find the minimum negative
1371 * offset to FP, converts it to positive number, and align down to 8 bytes.
1372 */
find_fpb_offset(struct bpf_prog * prog)1373 static int find_fpb_offset(struct bpf_prog *prog)
1374 {
1375 int i;
1376 int offset = 0;
1377
1378 for (i = 0; i < prog->len; i++) {
1379 const struct bpf_insn *insn = &prog->insnsi[i];
1380 const u8 class = BPF_CLASS(insn->code);
1381 const u8 mode = BPF_MODE(insn->code);
1382 const u8 src = insn->src_reg;
1383 const u8 dst = insn->dst_reg;
1384 const s32 imm = insn->imm;
1385 const s16 off = insn->off;
1386
1387 switch (class) {
1388 case BPF_STX:
1389 case BPF_ST:
1390 /* fp holds atomic operation result */
1391 if (class == BPF_STX && mode == BPF_ATOMIC &&
1392 ((imm == BPF_XCHG ||
1393 imm == (BPF_FETCH | BPF_ADD) ||
1394 imm == (BPF_FETCH | BPF_AND) ||
1395 imm == (BPF_FETCH | BPF_XOR) ||
1396 imm == (BPF_FETCH | BPF_OR)) &&
1397 src == BPF_REG_FP))
1398 return 0;
1399
1400 if (mode == BPF_MEM && dst == BPF_REG_FP &&
1401 off < offset)
1402 offset = insn->off;
1403 break;
1404
1405 case BPF_JMP32:
1406 case BPF_JMP:
1407 break;
1408
1409 case BPF_LDX:
1410 case BPF_LD:
1411 /* fp holds load result */
1412 if (dst == BPF_REG_FP)
1413 return 0;
1414
1415 if (class == BPF_LDX && mode == BPF_MEM &&
1416 src == BPF_REG_FP && off < offset)
1417 offset = off;
1418 break;
1419
1420 case BPF_ALU:
1421 case BPF_ALU64:
1422 default:
1423 /* fp holds ALU result */
1424 if (dst == BPF_REG_FP)
1425 return 0;
1426 }
1427 }
1428
1429 if (offset < 0) {
1430 /*
1431 * safely be converted to a positive 'int', since insn->off
1432 * is 's16'
1433 */
1434 offset = -offset;
1435 /* align down to 8 bytes */
1436 offset = ALIGN_DOWN(offset, 8);
1437 }
1438
1439 return offset;
1440 }
1441
build_body(struct jit_ctx * ctx,bool extra_pass)1442 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1443 {
1444 const struct bpf_prog *prog = ctx->prog;
1445 int i;
1446
1447 /*
1448 * - offset[0] offset of the end of prologue,
1449 * start of the 1st instruction.
1450 * - offset[1] - offset of the end of 1st instruction,
1451 * start of the 2nd instruction
1452 * [....]
1453 * - offset[3] - offset of the end of 3rd instruction,
1454 * start of 4th instruction
1455 */
1456 for (i = 0; i < prog->len; i++) {
1457 const struct bpf_insn *insn = &prog->insnsi[i];
1458 int ret;
1459
1460 if (ctx->image == NULL)
1461 ctx->offset[i] = ctx->idx;
1462 ret = build_insn(insn, ctx, extra_pass);
1463 if (ret > 0) {
1464 i++;
1465 if (ctx->image == NULL)
1466 ctx->offset[i] = ctx->idx;
1467 continue;
1468 }
1469 if (ret)
1470 return ret;
1471 }
1472 /*
1473 * offset is allocated with prog->len + 1 so fill in
1474 * the last element with the offset after the last
1475 * instruction (end of program)
1476 */
1477 if (ctx->image == NULL)
1478 ctx->offset[i] = ctx->idx;
1479
1480 return 0;
1481 }
1482
validate_code(struct jit_ctx * ctx)1483 static int validate_code(struct jit_ctx *ctx)
1484 {
1485 int i;
1486
1487 for (i = 0; i < ctx->idx; i++) {
1488 u32 a64_insn = le32_to_cpu(ctx->image[i]);
1489
1490 if (a64_insn == AARCH64_BREAK_FAULT)
1491 return -1;
1492 }
1493 return 0;
1494 }
1495
validate_ctx(struct jit_ctx * ctx)1496 static int validate_ctx(struct jit_ctx *ctx)
1497 {
1498 if (validate_code(ctx))
1499 return -1;
1500
1501 if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
1502 return -1;
1503
1504 return 0;
1505 }
1506
bpf_flush_icache(void * start,void * end)1507 static inline void bpf_flush_icache(void *start, void *end)
1508 {
1509 flush_icache_range((unsigned long)start, (unsigned long)end);
1510 }
1511
1512 struct arm64_jit_data {
1513 struct bpf_binary_header *header;
1514 u8 *image;
1515 struct jit_ctx ctx;
1516 };
1517
bpf_int_jit_compile(struct bpf_prog * prog)1518 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1519 {
1520 int image_size, prog_size, extable_size, extable_align, extable_offset;
1521 struct bpf_prog *tmp, *orig_prog = prog;
1522 struct bpf_binary_header *header;
1523 struct arm64_jit_data *jit_data;
1524 bool was_classic = bpf_prog_was_classic(prog);
1525 bool tmp_blinded = false;
1526 bool extra_pass = false;
1527 struct jit_ctx ctx;
1528 u8 *image_ptr;
1529
1530 if (!prog->jit_requested)
1531 return orig_prog;
1532
1533 tmp = bpf_jit_blind_constants(prog);
1534 /* If blinding was requested and we failed during blinding,
1535 * we must fall back to the interpreter.
1536 */
1537 if (IS_ERR(tmp))
1538 return orig_prog;
1539 if (tmp != prog) {
1540 tmp_blinded = true;
1541 prog = tmp;
1542 }
1543
1544 jit_data = prog->aux->jit_data;
1545 if (!jit_data) {
1546 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1547 if (!jit_data) {
1548 prog = orig_prog;
1549 goto out;
1550 }
1551 prog->aux->jit_data = jit_data;
1552 }
1553 if (jit_data->ctx.offset) {
1554 ctx = jit_data->ctx;
1555 image_ptr = jit_data->image;
1556 header = jit_data->header;
1557 extra_pass = true;
1558 prog_size = sizeof(u32) * ctx.idx;
1559 goto skip_init_ctx;
1560 }
1561 memset(&ctx, 0, sizeof(ctx));
1562 ctx.prog = prog;
1563
1564 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
1565 if (ctx.offset == NULL) {
1566 prog = orig_prog;
1567 goto out_off;
1568 }
1569
1570 ctx.fpb_offset = find_fpb_offset(prog);
1571
1572 /*
1573 * 1. Initial fake pass to compute ctx->idx and ctx->offset.
1574 *
1575 * BPF line info needs ctx->offset[i] to be the offset of
1576 * instruction[i] in jited image, so build prologue first.
1577 */
1578 if (build_prologue(&ctx, was_classic)) {
1579 prog = orig_prog;
1580 goto out_off;
1581 }
1582
1583 if (build_body(&ctx, extra_pass)) {
1584 prog = orig_prog;
1585 goto out_off;
1586 }
1587
1588 ctx.epilogue_offset = ctx.idx;
1589 build_epilogue(&ctx);
1590 build_plt(&ctx);
1591
1592 extable_align = __alignof__(struct exception_table_entry);
1593 extable_size = prog->aux->num_exentries *
1594 sizeof(struct exception_table_entry);
1595
1596 /* Now we know the actual image size. */
1597 prog_size = sizeof(u32) * ctx.idx;
1598 /* also allocate space for plt target */
1599 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
1600 image_size = extable_offset + extable_size;
1601 header = bpf_jit_binary_alloc(image_size, &image_ptr,
1602 sizeof(u32), jit_fill_hole);
1603 if (header == NULL) {
1604 prog = orig_prog;
1605 goto out_off;
1606 }
1607
1608 /* 2. Now, the actual pass. */
1609
1610 ctx.image = (__le32 *)image_ptr;
1611 if (extable_size)
1612 prog->aux->extable = (void *)image_ptr + extable_offset;
1613 skip_init_ctx:
1614 ctx.idx = 0;
1615 ctx.exentry_idx = 0;
1616
1617 build_prologue(&ctx, was_classic);
1618
1619 if (build_body(&ctx, extra_pass)) {
1620 bpf_jit_binary_free(header);
1621 prog = orig_prog;
1622 goto out_off;
1623 }
1624
1625 build_epilogue(&ctx);
1626 build_plt(&ctx);
1627
1628 /* 3. Extra pass to validate JITed code. */
1629 if (validate_ctx(&ctx)) {
1630 bpf_jit_binary_free(header);
1631 prog = orig_prog;
1632 goto out_off;
1633 }
1634
1635 /* And we're done. */
1636 if (bpf_jit_enable > 1)
1637 bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1638
1639 bpf_flush_icache(header, ctx.image + ctx.idx);
1640
1641 if (!prog->is_func || extra_pass) {
1642 if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1643 pr_err_once("multi-func JIT bug %d != %d\n",
1644 ctx.idx, jit_data->ctx.idx);
1645 bpf_jit_binary_free(header);
1646 prog->bpf_func = NULL;
1647 prog->jited = 0;
1648 prog->jited_len = 0;
1649 goto out_off;
1650 }
1651 bpf_jit_binary_lock_ro(header);
1652 } else {
1653 jit_data->ctx = ctx;
1654 jit_data->image = image_ptr;
1655 jit_data->header = header;
1656 }
1657 prog->bpf_func = (void *)ctx.image;
1658 prog->jited = 1;
1659 prog->jited_len = prog_size;
1660
1661 if (!prog->is_func || extra_pass) {
1662 int i;
1663
1664 /* offset[prog->len] is the size of program */
1665 for (i = 0; i <= prog->len; i++)
1666 ctx.offset[i] *= AARCH64_INSN_SIZE;
1667 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1668 out_off:
1669 kvfree(ctx.offset);
1670 kfree(jit_data);
1671 prog->aux->jit_data = NULL;
1672 }
1673 out:
1674 if (tmp_blinded)
1675 bpf_jit_prog_release_other(prog, prog == orig_prog ?
1676 tmp : orig_prog);
1677 return prog;
1678 }
1679
bpf_jit_supports_kfunc_call(void)1680 bool bpf_jit_supports_kfunc_call(void)
1681 {
1682 return true;
1683 }
1684
bpf_jit_alloc_exec_limit(void)1685 u64 bpf_jit_alloc_exec_limit(void)
1686 {
1687 return VMALLOC_END - VMALLOC_START;
1688 }
1689
bpf_jit_alloc_exec(unsigned long size)1690 void *bpf_jit_alloc_exec(unsigned long size)
1691 {
1692 /* Memory is intended to be executable, reset the pointer tag. */
1693 return kasan_reset_tag(vmalloc(size));
1694 }
1695
bpf_jit_free_exec(void * addr)1696 void bpf_jit_free_exec(void *addr)
1697 {
1698 return vfree(addr);
1699 }
1700
1701 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)1702 bool bpf_jit_supports_subprog_tailcalls(void)
1703 {
1704 return true;
1705 }
1706
invoke_bpf_prog(struct jit_ctx * ctx,struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret)1707 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
1708 int args_off, int retval_off, int run_ctx_off,
1709 bool save_ret)
1710 {
1711 __le32 *branch;
1712 u64 enter_prog;
1713 u64 exit_prog;
1714 struct bpf_prog *p = l->link.prog;
1715 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
1716
1717 enter_prog = (u64)bpf_trampoline_enter(p);
1718 exit_prog = (u64)bpf_trampoline_exit(p);
1719
1720 if (l->cookie == 0) {
1721 /* if cookie is zero, one instruction is enough to store it */
1722 emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
1723 } else {
1724 emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
1725 emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
1726 ctx);
1727 }
1728
1729 /* save p to callee saved register x19 to avoid loading p with mov_i64
1730 * each time.
1731 */
1732 emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
1733
1734 /* arg1: prog */
1735 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
1736 /* arg2: &run_ctx */
1737 emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
1738
1739 emit_call(enter_prog, ctx);
1740
1741 /* save return value to callee saved register x20 */
1742 emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
1743
1744 /* if (__bpf_prog_enter(prog) == 0)
1745 * goto skip_exec_of_prog;
1746 */
1747 branch = ctx->image + ctx->idx;
1748 emit(A64_NOP, ctx);
1749
1750 emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
1751 if (!p->jited)
1752 emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
1753
1754 emit_call((const u64)p->bpf_func, ctx);
1755
1756 if (save_ret)
1757 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
1758
1759 if (ctx->image) {
1760 int offset = &ctx->image[ctx->idx] - branch;
1761 *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
1762 }
1763
1764 /* arg1: prog */
1765 emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
1766 /* arg2: start time */
1767 emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
1768 /* arg3: &run_ctx */
1769 emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
1770
1771 emit_call(exit_prog, ctx);
1772 }
1773
invoke_bpf_mod_ret(struct jit_ctx * ctx,struct bpf_tramp_links * tl,int args_off,int retval_off,int run_ctx_off,__le32 ** branches)1774 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
1775 int args_off, int retval_off, int run_ctx_off,
1776 __le32 **branches)
1777 {
1778 int i;
1779
1780 /* The first fmod_ret program will receive a garbage return value.
1781 * Set this to 0 to avoid confusing the program.
1782 */
1783 emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
1784 for (i = 0; i < tl->nr_links; i++) {
1785 invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
1786 run_ctx_off, true);
1787 /* if (*(u64 *)(sp + retval_off) != 0)
1788 * goto do_fexit;
1789 */
1790 emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
1791 /* Save the location of branch, and generate a nop.
1792 * This nop will be replaced with a cbnz later.
1793 */
1794 branches[i] = ctx->image + ctx->idx;
1795 emit(A64_NOP, ctx);
1796 }
1797 }
1798
save_args(struct jit_ctx * ctx,int args_off,int nregs)1799 static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
1800 {
1801 int i;
1802
1803 for (i = 0; i < nregs; i++) {
1804 emit(A64_STR64I(i, A64_SP, args_off), ctx);
1805 args_off += 8;
1806 }
1807 }
1808
restore_args(struct jit_ctx * ctx,int args_off,int nregs)1809 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
1810 {
1811 int i;
1812
1813 for (i = 0; i < nregs; i++) {
1814 emit(A64_LDR64I(i, A64_SP, args_off), ctx);
1815 args_off += 8;
1816 }
1817 }
1818
is_struct_ops_tramp(const struct bpf_tramp_links * fentry_links)1819 static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
1820 {
1821 return fentry_links->nr_links == 1 &&
1822 fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
1823 }
1824
1825 /* Based on the x86's implementation of arch_prepare_bpf_trampoline().
1826 *
1827 * bpf prog and function entry before bpf trampoline hooked:
1828 * mov x9, lr
1829 * nop
1830 *
1831 * bpf prog and function entry after bpf trampoline hooked:
1832 * mov x9, lr
1833 * bl <bpf_trampoline or plt>
1834 *
1835 */
prepare_trampoline(struct jit_ctx * ctx,struct bpf_tramp_image * im,struct bpf_tramp_links * tlinks,void * orig_call,int nregs,u32 flags)1836 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
1837 struct bpf_tramp_links *tlinks, void *orig_call,
1838 int nregs, u32 flags)
1839 {
1840 int i;
1841 int stack_size;
1842 int retaddr_off;
1843 int regs_off;
1844 int retval_off;
1845 int args_off;
1846 int nregs_off;
1847 int ip_off;
1848 int run_ctx_off;
1849 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
1850 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
1851 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
1852 bool save_ret;
1853 __le32 **branches = NULL;
1854 bool is_struct_ops = is_struct_ops_tramp(fentry);
1855
1856 /* trampoline stack layout:
1857 * [ parent ip ]
1858 * [ FP ]
1859 * SP + retaddr_off [ self ip ]
1860 * [ FP ]
1861 *
1862 * [ padding ] align SP to multiples of 16
1863 *
1864 * [ x20 ] callee saved reg x20
1865 * SP + regs_off [ x19 ] callee saved reg x19
1866 *
1867 * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
1868 * BPF_TRAMP_F_RET_FENTRY_RET
1869 *
1870 * [ arg reg N ]
1871 * [ ... ]
1872 * SP + args_off [ arg reg 1 ]
1873 *
1874 * SP + nregs_off [ arg regs count ]
1875 *
1876 * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
1877 *
1878 * SP + run_ctx_off [ bpf_tramp_run_ctx ]
1879 */
1880
1881 stack_size = 0;
1882 run_ctx_off = stack_size;
1883 /* room for bpf_tramp_run_ctx */
1884 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
1885
1886 ip_off = stack_size;
1887 /* room for IP address argument */
1888 if (flags & BPF_TRAMP_F_IP_ARG)
1889 stack_size += 8;
1890
1891 nregs_off = stack_size;
1892 /* room for args count */
1893 stack_size += 8;
1894
1895 args_off = stack_size;
1896 /* room for args */
1897 stack_size += nregs * 8;
1898
1899 /* room for return value */
1900 retval_off = stack_size;
1901 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
1902 if (save_ret)
1903 stack_size += 8;
1904
1905 /* room for callee saved registers, currently x19 and x20 are used */
1906 regs_off = stack_size;
1907 stack_size += 16;
1908
1909 /* round up to multiples of 16 to avoid SPAlignmentFault */
1910 stack_size = round_up(stack_size, 16);
1911
1912 /* return address locates above FP */
1913 retaddr_off = stack_size + 8;
1914
1915 /* bpf trampoline may be invoked by 3 instruction types:
1916 * 1. bl, attached to bpf prog or kernel function via short jump
1917 * 2. br, attached to bpf prog or kernel function via long jump
1918 * 3. blr, working as a function pointer, used by struct_ops.
1919 * So BTI_JC should used here to support both br and blr.
1920 */
1921 emit_bti(A64_BTI_JC, ctx);
1922
1923 /* x9 is not set for struct_ops */
1924 if (!is_struct_ops) {
1925 /* frame for parent function */
1926 emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
1927 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
1928 }
1929
1930 /* frame for patched function for tracing, or caller for struct_ops */
1931 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
1932 emit(A64_MOV(1, A64_FP, A64_SP), ctx);
1933
1934 /* allocate stack space */
1935 emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
1936
1937 if (flags & BPF_TRAMP_F_IP_ARG) {
1938 /* save ip address of the traced function */
1939 emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx);
1940 emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
1941 }
1942
1943 /* save arg regs count*/
1944 emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
1945 emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
1946
1947 /* save arg regs */
1948 save_args(ctx, args_off, nregs);
1949
1950 /* save callee saved registers */
1951 emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
1952 emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
1953
1954 if (flags & BPF_TRAMP_F_CALL_ORIG) {
1955 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
1956 emit_call((const u64)__bpf_tramp_enter, ctx);
1957 }
1958
1959 for (i = 0; i < fentry->nr_links; i++)
1960 invoke_bpf_prog(ctx, fentry->links[i], args_off,
1961 retval_off, run_ctx_off,
1962 flags & BPF_TRAMP_F_RET_FENTRY_RET);
1963
1964 if (fmod_ret->nr_links) {
1965 branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
1966 GFP_KERNEL);
1967 if (!branches)
1968 return -ENOMEM;
1969
1970 invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
1971 run_ctx_off, branches);
1972 }
1973
1974 if (flags & BPF_TRAMP_F_CALL_ORIG) {
1975 restore_args(ctx, args_off, nregs);
1976 /* call original func */
1977 emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
1978 emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
1979 emit(A64_RET(A64_R(10)), ctx);
1980 /* store return value */
1981 emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
1982 /* reserve a nop for bpf_tramp_image_put */
1983 im->ip_after_call = ctx->image + ctx->idx;
1984 emit(A64_NOP, ctx);
1985 }
1986
1987 /* update the branches saved in invoke_bpf_mod_ret with cbnz */
1988 for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
1989 int offset = &ctx->image[ctx->idx] - branches[i];
1990 *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
1991 }
1992
1993 for (i = 0; i < fexit->nr_links; i++)
1994 invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
1995 run_ctx_off, false);
1996
1997 if (flags & BPF_TRAMP_F_CALL_ORIG) {
1998 im->ip_epilogue = ctx->image + ctx->idx;
1999 emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
2000 emit_call((const u64)__bpf_tramp_exit, ctx);
2001 }
2002
2003 if (flags & BPF_TRAMP_F_RESTORE_REGS)
2004 restore_args(ctx, args_off, nregs);
2005
2006 /* restore callee saved register x19 and x20 */
2007 emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
2008 emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2009
2010 if (save_ret)
2011 emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
2012
2013 /* reset SP */
2014 emit(A64_MOV(1, A64_SP, A64_FP), ctx);
2015
2016 if (is_struct_ops) {
2017 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2018 emit(A64_RET(A64_LR), ctx);
2019 } else {
2020 /* pop frames */
2021 emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2022 emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
2023
2024 if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2025 /* skip patched function, return to parent */
2026 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2027 emit(A64_RET(A64_R(9)), ctx);
2028 } else {
2029 /* return to patched function */
2030 emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
2031 emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2032 emit(A64_RET(A64_R(10)), ctx);
2033 }
2034 }
2035
2036 if (ctx->image)
2037 bpf_flush_icache(ctx->image, ctx->image + ctx->idx);
2038
2039 kfree(branches);
2040
2041 return ctx->idx;
2042 }
2043
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * image,void * image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * orig_call)2044 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
2045 void *image_end, const struct btf_func_model *m,
2046 u32 flags, struct bpf_tramp_links *tlinks,
2047 void *orig_call)
2048 {
2049 int i, ret;
2050 int nregs = m->nr_args;
2051 int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
2052 struct jit_ctx ctx = {
2053 .image = NULL,
2054 .idx = 0,
2055 };
2056
2057 /* extra registers needed for struct argument */
2058 for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
2059 /* The arg_size is at most 16 bytes, enforced by the verifier. */
2060 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
2061 nregs += (m->arg_size[i] + 7) / 8 - 1;
2062 }
2063
2064 /* the first 8 registers are used for arguments */
2065 if (nregs > 8)
2066 return -ENOTSUPP;
2067
2068 ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
2069 if (ret < 0)
2070 return ret;
2071
2072 if (ret > max_insns)
2073 return -EFBIG;
2074
2075 ctx.image = image;
2076 ctx.idx = 0;
2077
2078 jit_fill_hole(image, (unsigned int)(image_end - image));
2079 ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
2080
2081 if (ret > 0 && validate_code(&ctx) < 0)
2082 ret = -EINVAL;
2083
2084 if (ret > 0)
2085 ret *= AARCH64_INSN_SIZE;
2086
2087 return ret;
2088 }
2089
is_long_jump(void * ip,void * target)2090 static bool is_long_jump(void *ip, void *target)
2091 {
2092 long offset;
2093
2094 /* NULL target means this is a NOP */
2095 if (!target)
2096 return false;
2097
2098 offset = (long)target - (long)ip;
2099 return offset < -SZ_128M || offset >= SZ_128M;
2100 }
2101
gen_branch_or_nop(enum aarch64_insn_branch_type type,void * ip,void * addr,void * plt,u32 * insn)2102 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
2103 void *addr, void *plt, u32 *insn)
2104 {
2105 void *target;
2106
2107 if (!addr) {
2108 *insn = aarch64_insn_gen_nop();
2109 return 0;
2110 }
2111
2112 if (is_long_jump(ip, addr))
2113 target = plt;
2114 else
2115 target = addr;
2116
2117 *insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
2118 (unsigned long)target,
2119 type);
2120
2121 return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
2122 }
2123
2124 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
2125 * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
2126 * or @new_addr is NULL, the old or new instruction is NOP.
2127 *
2128 * When @ip is the bpf prog entry, a bpf trampoline is being attached or
2129 * detached. Since bpf trampoline and bpf prog are allocated separately with
2130 * vmalloc, the address distance may exceed 128MB, the maximum branch range.
2131 * So long jump should be handled.
2132 *
2133 * When a bpf prog is constructed, a plt pointing to empty trampoline
2134 * dummy_tramp is placed at the end:
2135 *
2136 * bpf_prog:
2137 * mov x9, lr
2138 * nop // patchsite
2139 * ...
2140 * ret
2141 *
2142 * plt:
2143 * ldr x10, target
2144 * br x10
2145 * target:
2146 * .quad dummy_tramp // plt target
2147 *
2148 * This is also the state when no trampoline is attached.
2149 *
2150 * When a short-jump bpf trampoline is attached, the patchsite is patched
2151 * to a bl instruction to the trampoline directly:
2152 *
2153 * bpf_prog:
2154 * mov x9, lr
2155 * bl <short-jump bpf trampoline address> // patchsite
2156 * ...
2157 * ret
2158 *
2159 * plt:
2160 * ldr x10, target
2161 * br x10
2162 * target:
2163 * .quad dummy_tramp // plt target
2164 *
2165 * When a long-jump bpf trampoline is attached, the plt target is filled with
2166 * the trampoline address and the patchsite is patched to a bl instruction to
2167 * the plt:
2168 *
2169 * bpf_prog:
2170 * mov x9, lr
2171 * bl plt // patchsite
2172 * ...
2173 * ret
2174 *
2175 * plt:
2176 * ldr x10, target
2177 * br x10
2178 * target:
2179 * .quad <long-jump bpf trampoline address> // plt target
2180 *
2181 * The dummy_tramp is used to prevent another CPU from jumping to unknown
2182 * locations during the patching process, making the patching process easier.
2183 */
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type poke_type,void * old_addr,void * new_addr)2184 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
2185 void *old_addr, void *new_addr)
2186 {
2187 int ret;
2188 u32 old_insn;
2189 u32 new_insn;
2190 u32 replaced;
2191 struct bpf_plt *plt = NULL;
2192 unsigned long size = 0UL;
2193 unsigned long offset = ~0UL;
2194 enum aarch64_insn_branch_type branch_type;
2195 char namebuf[KSYM_NAME_LEN];
2196 void *image = NULL;
2197 u64 plt_target = 0ULL;
2198 bool poking_bpf_entry;
2199
2200 if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
2201 /* Only poking bpf text is supported. Since kernel function
2202 * entry is set up by ftrace, we reply on ftrace to poke kernel
2203 * functions.
2204 */
2205 return -ENOTSUPP;
2206
2207 image = ip - offset;
2208 /* zero offset means we're poking bpf prog entry */
2209 poking_bpf_entry = (offset == 0UL);
2210
2211 /* bpf prog entry, find plt and the real patchsite */
2212 if (poking_bpf_entry) {
2213 /* plt locates at the end of bpf prog */
2214 plt = image + size - PLT_TARGET_OFFSET;
2215
2216 /* skip to the nop instruction in bpf prog entry:
2217 * bti c // if BTI enabled
2218 * mov x9, x30
2219 * nop
2220 */
2221 ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
2222 }
2223
2224 /* long jump is only possible at bpf prog entry */
2225 if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
2226 !poking_bpf_entry))
2227 return -EINVAL;
2228
2229 if (poke_type == BPF_MOD_CALL)
2230 branch_type = AARCH64_INSN_BRANCH_LINK;
2231 else
2232 branch_type = AARCH64_INSN_BRANCH_NOLINK;
2233
2234 if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
2235 return -EFAULT;
2236
2237 if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
2238 return -EFAULT;
2239
2240 if (is_long_jump(ip, new_addr))
2241 plt_target = (u64)new_addr;
2242 else if (is_long_jump(ip, old_addr))
2243 /* if the old target is a long jump and the new target is not,
2244 * restore the plt target to dummy_tramp, so there is always a
2245 * legal and harmless address stored in plt target, and we'll
2246 * never jump from plt to an unknown place.
2247 */
2248 plt_target = (u64)&dummy_tramp;
2249
2250 if (plt_target) {
2251 /* non-zero plt_target indicates we're patching a bpf prog,
2252 * which is read only.
2253 */
2254 if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
2255 return -EFAULT;
2256 WRITE_ONCE(plt->target, plt_target);
2257 set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
2258 /* since plt target points to either the new trampoline
2259 * or dummy_tramp, even if another CPU reads the old plt
2260 * target value before fetching the bl instruction to plt,
2261 * it will be brought back by dummy_tramp, so no barrier is
2262 * required here.
2263 */
2264 }
2265
2266 /* if the old target and the new target are both long jumps, no
2267 * patching is required
2268 */
2269 if (old_insn == new_insn)
2270 return 0;
2271
2272 mutex_lock(&text_mutex);
2273 if (aarch64_insn_read(ip, &replaced)) {
2274 ret = -EFAULT;
2275 goto out;
2276 }
2277
2278 if (replaced != old_insn) {
2279 ret = -EFAULT;
2280 goto out;
2281 }
2282
2283 /* We call aarch64_insn_patch_text_nosync() to replace instruction
2284 * atomically, so no other CPUs will fetch a half-new and half-old
2285 * instruction. But there is chance that another CPU executes the
2286 * old instruction after the patching operation finishes (e.g.,
2287 * pipeline not flushed, or icache not synchronized yet).
2288 *
2289 * 1. when a new trampoline is attached, it is not a problem for
2290 * different CPUs to jump to different trampolines temporarily.
2291 *
2292 * 2. when an old trampoline is freed, we should wait for all other
2293 * CPUs to exit the trampoline and make sure the trampoline is no
2294 * longer reachable, since bpf_tramp_image_put() function already
2295 * uses percpu_ref and task-based rcu to do the sync, no need to call
2296 * the sync version here, see bpf_tramp_image_put() for details.
2297 */
2298 ret = aarch64_insn_patch_text_nosync(ip, new_insn);
2299 out:
2300 mutex_unlock(&text_mutex);
2301
2302 return ret;
2303 }
2304