1 // SPDX-License-Identifier: GPL-2.0
2 /* BPF JIT compiler for RV64G
3 *
4 * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com>
5 *
6 */
7
8 #include <linux/bitfield.h>
9 #include <linux/bpf.h>
10 #include <linux/filter.h>
11 #include <linux/memory.h>
12 #include <linux/stop_machine.h>
13 #include <asm/patch.h>
14 #include "bpf_jit.h"
15
16 #define RV_FENTRY_NINSNS 2
17 #define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4)
18
19 #define RV_REG_TCC RV_REG_A6
20 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
21
22 static const int regmap[] = {
23 [BPF_REG_0] = RV_REG_A5,
24 [BPF_REG_1] = RV_REG_A0,
25 [BPF_REG_2] = RV_REG_A1,
26 [BPF_REG_3] = RV_REG_A2,
27 [BPF_REG_4] = RV_REG_A3,
28 [BPF_REG_5] = RV_REG_A4,
29 [BPF_REG_6] = RV_REG_S1,
30 [BPF_REG_7] = RV_REG_S2,
31 [BPF_REG_8] = RV_REG_S3,
32 [BPF_REG_9] = RV_REG_S4,
33 [BPF_REG_FP] = RV_REG_S5,
34 [BPF_REG_AX] = RV_REG_T0,
35 };
36
37 static const int pt_regmap[] = {
38 [RV_REG_A0] = offsetof(struct pt_regs, a0),
39 [RV_REG_A1] = offsetof(struct pt_regs, a1),
40 [RV_REG_A2] = offsetof(struct pt_regs, a2),
41 [RV_REG_A3] = offsetof(struct pt_regs, a3),
42 [RV_REG_A4] = offsetof(struct pt_regs, a4),
43 [RV_REG_A5] = offsetof(struct pt_regs, a5),
44 [RV_REG_S1] = offsetof(struct pt_regs, s1),
45 [RV_REG_S2] = offsetof(struct pt_regs, s2),
46 [RV_REG_S3] = offsetof(struct pt_regs, s3),
47 [RV_REG_S4] = offsetof(struct pt_regs, s4),
48 [RV_REG_S5] = offsetof(struct pt_regs, s5),
49 [RV_REG_T0] = offsetof(struct pt_regs, t0),
50 };
51
52 enum {
53 RV_CTX_F_SEEN_TAIL_CALL = 0,
54 RV_CTX_F_SEEN_CALL = RV_REG_RA,
55 RV_CTX_F_SEEN_S1 = RV_REG_S1,
56 RV_CTX_F_SEEN_S2 = RV_REG_S2,
57 RV_CTX_F_SEEN_S3 = RV_REG_S3,
58 RV_CTX_F_SEEN_S4 = RV_REG_S4,
59 RV_CTX_F_SEEN_S5 = RV_REG_S5,
60 RV_CTX_F_SEEN_S6 = RV_REG_S6,
61 };
62
bpf_to_rv_reg(int bpf_reg,struct rv_jit_context * ctx)63 static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
64 {
65 u8 reg = regmap[bpf_reg];
66
67 switch (reg) {
68 case RV_CTX_F_SEEN_S1:
69 case RV_CTX_F_SEEN_S2:
70 case RV_CTX_F_SEEN_S3:
71 case RV_CTX_F_SEEN_S4:
72 case RV_CTX_F_SEEN_S5:
73 case RV_CTX_F_SEEN_S6:
74 __set_bit(reg, &ctx->flags);
75 }
76 return reg;
77 };
78
seen_reg(int reg,struct rv_jit_context * ctx)79 static bool seen_reg(int reg, struct rv_jit_context *ctx)
80 {
81 switch (reg) {
82 case RV_CTX_F_SEEN_CALL:
83 case RV_CTX_F_SEEN_S1:
84 case RV_CTX_F_SEEN_S2:
85 case RV_CTX_F_SEEN_S3:
86 case RV_CTX_F_SEEN_S4:
87 case RV_CTX_F_SEEN_S5:
88 case RV_CTX_F_SEEN_S6:
89 return test_bit(reg, &ctx->flags);
90 }
91 return false;
92 }
93
mark_fp(struct rv_jit_context * ctx)94 static void mark_fp(struct rv_jit_context *ctx)
95 {
96 __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
97 }
98
mark_call(struct rv_jit_context * ctx)99 static void mark_call(struct rv_jit_context *ctx)
100 {
101 __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
102 }
103
seen_call(struct rv_jit_context * ctx)104 static bool seen_call(struct rv_jit_context *ctx)
105 {
106 return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
107 }
108
mark_tail_call(struct rv_jit_context * ctx)109 static void mark_tail_call(struct rv_jit_context *ctx)
110 {
111 __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
112 }
113
seen_tail_call(struct rv_jit_context * ctx)114 static bool seen_tail_call(struct rv_jit_context *ctx)
115 {
116 return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
117 }
118
rv_tail_call_reg(struct rv_jit_context * ctx)119 static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
120 {
121 mark_tail_call(ctx);
122
123 if (seen_call(ctx)) {
124 __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags);
125 return RV_REG_S6;
126 }
127 return RV_REG_A6;
128 }
129
is_32b_int(s64 val)130 static bool is_32b_int(s64 val)
131 {
132 return -(1L << 31) <= val && val < (1L << 31);
133 }
134
in_auipc_jalr_range(s64 val)135 static bool in_auipc_jalr_range(s64 val)
136 {
137 /*
138 * auipc+jalr can reach any signed PC-relative offset in the range
139 * [-2^31 - 2^11, 2^31 - 2^11).
140 */
141 return (-(1L << 31) - (1L << 11)) <= val &&
142 val < ((1L << 31) - (1L << 11));
143 }
144
145 /* Emit fixed-length instructions for address */
emit_addr(u8 rd,u64 addr,bool extra_pass,struct rv_jit_context * ctx)146 static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
147 {
148 /*
149 * Use the ro_insns(RX) to calculate the offset as the BPF program will
150 * finally run from this memory region.
151 */
152 u64 ip = (u64)(ctx->ro_insns + ctx->ninsns);
153 s64 off = addr - ip;
154 s64 upper = (off + (1 << 11)) >> 12;
155 s64 lower = off & 0xfff;
156
157 if (extra_pass && !in_auipc_jalr_range(off)) {
158 pr_err("bpf-jit: target offset 0x%llx is out of range\n", off);
159 return -ERANGE;
160 }
161
162 emit(rv_auipc(rd, upper), ctx);
163 emit(rv_addi(rd, rd, lower), ctx);
164 return 0;
165 }
166
167 /* Emit variable-length instructions for 32-bit and 64-bit imm */
emit_imm(u8 rd,s64 val,struct rv_jit_context * ctx)168 static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
169 {
170 /* Note that the immediate from the add is sign-extended,
171 * which means that we need to compensate this by adding 2^12,
172 * when the 12th bit is set. A simpler way of doing this, and
173 * getting rid of the check, is to just add 2**11 before the
174 * shift. The "Loading a 32-Bit constant" example from the
175 * "Computer Organization and Design, RISC-V edition" book by
176 * Patterson/Hennessy highlights this fact.
177 *
178 * This also means that we need to process LSB to MSB.
179 */
180 s64 upper = (val + (1 << 11)) >> 12;
181 /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
182 * and addi are signed and RVC checks will perform signed comparisons.
183 */
184 s64 lower = ((val & 0xfff) << 52) >> 52;
185 int shift;
186
187 if (is_32b_int(val)) {
188 if (upper)
189 emit_lui(rd, upper, ctx);
190
191 if (!upper) {
192 emit_li(rd, lower, ctx);
193 return;
194 }
195
196 emit_addiw(rd, rd, lower, ctx);
197 return;
198 }
199
200 shift = __ffs(upper);
201 upper >>= shift;
202 shift += 12;
203
204 emit_imm(rd, upper, ctx);
205
206 emit_slli(rd, rd, shift, ctx);
207 if (lower)
208 emit_addi(rd, rd, lower, ctx);
209 }
210
__build_epilogue(bool is_tail_call,struct rv_jit_context * ctx)211 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
212 {
213 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
214
215 if (seen_reg(RV_REG_RA, ctx)) {
216 emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
217 store_offset -= 8;
218 }
219 emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
220 store_offset -= 8;
221 if (seen_reg(RV_REG_S1, ctx)) {
222 emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
223 store_offset -= 8;
224 }
225 if (seen_reg(RV_REG_S2, ctx)) {
226 emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
227 store_offset -= 8;
228 }
229 if (seen_reg(RV_REG_S3, ctx)) {
230 emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
231 store_offset -= 8;
232 }
233 if (seen_reg(RV_REG_S4, ctx)) {
234 emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
235 store_offset -= 8;
236 }
237 if (seen_reg(RV_REG_S5, ctx)) {
238 emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
239 store_offset -= 8;
240 }
241 if (seen_reg(RV_REG_S6, ctx)) {
242 emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
243 store_offset -= 8;
244 }
245
246 emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
247 /* Set return value. */
248 if (!is_tail_call)
249 emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx);
250 emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
251 is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */
252 ctx);
253 }
254
emit_bcc(u8 cond,u8 rd,u8 rs,int rvoff,struct rv_jit_context * ctx)255 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
256 struct rv_jit_context *ctx)
257 {
258 switch (cond) {
259 case BPF_JEQ:
260 emit(rv_beq(rd, rs, rvoff >> 1), ctx);
261 return;
262 case BPF_JGT:
263 emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
264 return;
265 case BPF_JLT:
266 emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
267 return;
268 case BPF_JGE:
269 emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
270 return;
271 case BPF_JLE:
272 emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
273 return;
274 case BPF_JNE:
275 emit(rv_bne(rd, rs, rvoff >> 1), ctx);
276 return;
277 case BPF_JSGT:
278 emit(rv_blt(rs, rd, rvoff >> 1), ctx);
279 return;
280 case BPF_JSLT:
281 emit(rv_blt(rd, rs, rvoff >> 1), ctx);
282 return;
283 case BPF_JSGE:
284 emit(rv_bge(rd, rs, rvoff >> 1), ctx);
285 return;
286 case BPF_JSLE:
287 emit(rv_bge(rs, rd, rvoff >> 1), ctx);
288 }
289 }
290
emit_branch(u8 cond,u8 rd,u8 rs,int rvoff,struct rv_jit_context * ctx)291 static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
292 struct rv_jit_context *ctx)
293 {
294 s64 upper, lower;
295
296 if (is_13b_int(rvoff)) {
297 emit_bcc(cond, rd, rs, rvoff, ctx);
298 return;
299 }
300
301 /* Adjust for jal */
302 rvoff -= 4;
303
304 /* Transform, e.g.:
305 * bne rd,rs,foo
306 * to
307 * beq rd,rs,<.L1>
308 * (auipc foo)
309 * jal(r) foo
310 * .L1
311 */
312 cond = invert_bpf_cond(cond);
313 if (is_21b_int(rvoff)) {
314 emit_bcc(cond, rd, rs, 8, ctx);
315 emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
316 return;
317 }
318
319 /* 32b No need for an additional rvoff adjustment, since we
320 * get that from the auipc at PC', where PC = PC' + 4.
321 */
322 upper = (rvoff + (1 << 11)) >> 12;
323 lower = rvoff & 0xfff;
324
325 emit_bcc(cond, rd, rs, 12, ctx);
326 emit(rv_auipc(RV_REG_T1, upper), ctx);
327 emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
328 }
329
emit_zext_32(u8 reg,struct rv_jit_context * ctx)330 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
331 {
332 emit_slli(reg, reg, 32, ctx);
333 emit_srli(reg, reg, 32, ctx);
334 }
335
emit_bpf_tail_call(int insn,struct rv_jit_context * ctx)336 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
337 {
338 int tc_ninsn, off, start_insn = ctx->ninsns;
339 u8 tcc = rv_tail_call_reg(ctx);
340
341 /* a0: &ctx
342 * a1: &array
343 * a2: index
344 *
345 * if (index >= array->map.max_entries)
346 * goto out;
347 */
348 tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] :
349 ctx->offset[0];
350 emit_zext_32(RV_REG_A2, ctx);
351
352 off = offsetof(struct bpf_array, map.max_entries);
353 if (is_12b_check(off, insn))
354 return -1;
355 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
356 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
357 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
358
359 /* if (--TCC < 0)
360 * goto out;
361 */
362 emit_addi(RV_REG_TCC, tcc, -1, ctx);
363 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
364 emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
365
366 /* prog = array->ptrs[index];
367 * if (!prog)
368 * goto out;
369 */
370 emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
371 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
372 off = offsetof(struct bpf_array, ptrs);
373 if (is_12b_check(off, insn))
374 return -1;
375 emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
376 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
377 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
378
379 /* goto *(prog->bpf_func + 4); */
380 off = offsetof(struct bpf_prog, bpf_func);
381 if (is_12b_check(off, insn))
382 return -1;
383 emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
384 __build_epilogue(true, ctx);
385 return 0;
386 }
387
init_regs(u8 * rd,u8 * rs,const struct bpf_insn * insn,struct rv_jit_context * ctx)388 static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
389 struct rv_jit_context *ctx)
390 {
391 u8 code = insn->code;
392
393 switch (code) {
394 case BPF_JMP | BPF_JA:
395 case BPF_JMP | BPF_CALL:
396 case BPF_JMP | BPF_EXIT:
397 case BPF_JMP | BPF_TAIL_CALL:
398 break;
399 default:
400 *rd = bpf_to_rv_reg(insn->dst_reg, ctx);
401 }
402
403 if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) ||
404 code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) ||
405 code & BPF_LDX || code & BPF_STX)
406 *rs = bpf_to_rv_reg(insn->src_reg, ctx);
407 }
408
emit_zext_32_rd_rs(u8 * rd,u8 * rs,struct rv_jit_context * ctx)409 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
410 {
411 emit_mv(RV_REG_T2, *rd, ctx);
412 emit_zext_32(RV_REG_T2, ctx);
413 emit_mv(RV_REG_T1, *rs, ctx);
414 emit_zext_32(RV_REG_T1, ctx);
415 *rd = RV_REG_T2;
416 *rs = RV_REG_T1;
417 }
418
emit_sext_32_rd_rs(u8 * rd,u8 * rs,struct rv_jit_context * ctx)419 static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
420 {
421 emit_addiw(RV_REG_T2, *rd, 0, ctx);
422 emit_addiw(RV_REG_T1, *rs, 0, ctx);
423 *rd = RV_REG_T2;
424 *rs = RV_REG_T1;
425 }
426
emit_zext_32_rd_t1(u8 * rd,struct rv_jit_context * ctx)427 static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
428 {
429 emit_mv(RV_REG_T2, *rd, ctx);
430 emit_zext_32(RV_REG_T2, ctx);
431 emit_zext_32(RV_REG_T1, ctx);
432 *rd = RV_REG_T2;
433 }
434
emit_sext_32_rd(u8 * rd,struct rv_jit_context * ctx)435 static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
436 {
437 emit_addiw(RV_REG_T2, *rd, 0, ctx);
438 *rd = RV_REG_T2;
439 }
440
emit_jump_and_link(u8 rd,s64 rvoff,bool fixed_addr,struct rv_jit_context * ctx)441 static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr,
442 struct rv_jit_context *ctx)
443 {
444 s64 upper, lower;
445
446 if (rvoff && fixed_addr && is_21b_int(rvoff)) {
447 emit(rv_jal(rd, rvoff >> 1), ctx);
448 return 0;
449 } else if (in_auipc_jalr_range(rvoff)) {
450 upper = (rvoff + (1 << 11)) >> 12;
451 lower = rvoff & 0xfff;
452 emit(rv_auipc(RV_REG_T1, upper), ctx);
453 emit(rv_jalr(rd, RV_REG_T1, lower), ctx);
454 return 0;
455 }
456
457 pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff);
458 return -ERANGE;
459 }
460
is_signed_bpf_cond(u8 cond)461 static bool is_signed_bpf_cond(u8 cond)
462 {
463 return cond == BPF_JSGT || cond == BPF_JSLT ||
464 cond == BPF_JSGE || cond == BPF_JSLE;
465 }
466
emit_call(u64 addr,bool fixed_addr,struct rv_jit_context * ctx)467 static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
468 {
469 s64 off = 0;
470 u64 ip;
471
472 if (addr && ctx->insns && ctx->ro_insns) {
473 /*
474 * Use the ro_insns(RX) to calculate the offset as the BPF
475 * program will finally run from this memory region.
476 */
477 ip = (u64)(long)(ctx->ro_insns + ctx->ninsns);
478 off = addr - ip;
479 }
480
481 return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx);
482 }
483
emit_atomic(u8 rd,u8 rs,s16 off,s32 imm,bool is64,struct rv_jit_context * ctx)484 static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
485 struct rv_jit_context *ctx)
486 {
487 u8 r0;
488 int jmp_offset;
489
490 if (off) {
491 if (is_12b_int(off)) {
492 emit_addi(RV_REG_T1, rd, off, ctx);
493 } else {
494 emit_imm(RV_REG_T1, off, ctx);
495 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
496 }
497 rd = RV_REG_T1;
498 }
499
500 switch (imm) {
501 /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */
502 case BPF_ADD:
503 emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) :
504 rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
505 break;
506 case BPF_AND:
507 emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) :
508 rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
509 break;
510 case BPF_OR:
511 emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) :
512 rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
513 break;
514 case BPF_XOR:
515 emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) :
516 rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
517 break;
518 /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
519 case BPF_ADD | BPF_FETCH:
520 emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) :
521 rv_amoadd_w(rs, rs, rd, 1, 1), ctx);
522 if (!is64)
523 emit_zext_32(rs, ctx);
524 break;
525 case BPF_AND | BPF_FETCH:
526 emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) :
527 rv_amoand_w(rs, rs, rd, 1, 1), ctx);
528 if (!is64)
529 emit_zext_32(rs, ctx);
530 break;
531 case BPF_OR | BPF_FETCH:
532 emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) :
533 rv_amoor_w(rs, rs, rd, 1, 1), ctx);
534 if (!is64)
535 emit_zext_32(rs, ctx);
536 break;
537 case BPF_XOR | BPF_FETCH:
538 emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) :
539 rv_amoxor_w(rs, rs, rd, 1, 1), ctx);
540 if (!is64)
541 emit_zext_32(rs, ctx);
542 break;
543 /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
544 case BPF_XCHG:
545 emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) :
546 rv_amoswap_w(rs, rs, rd, 1, 1), ctx);
547 if (!is64)
548 emit_zext_32(rs, ctx);
549 break;
550 /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
551 case BPF_CMPXCHG:
552 r0 = bpf_to_rv_reg(BPF_REG_0, ctx);
553 emit(is64 ? rv_addi(RV_REG_T2, r0, 0) :
554 rv_addiw(RV_REG_T2, r0, 0), ctx);
555 emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
556 rv_lr_w(r0, 0, rd, 0, 0), ctx);
557 jmp_offset = ninsns_rvoff(8);
558 emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
559 emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) :
560 rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
561 jmp_offset = ninsns_rvoff(-6);
562 emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
563 emit(rv_fence(0x3, 0x3), ctx);
564 break;
565 }
566 }
567
568 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
569 #define BPF_FIXUP_REG_MASK GENMASK(31, 27)
570
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)571 bool ex_handler_bpf(const struct exception_table_entry *ex,
572 struct pt_regs *regs)
573 {
574 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
575 int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
576
577 *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
578 regs->epc = (unsigned long)&ex->fixup - offset;
579
580 return true;
581 }
582
583 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct rv_jit_context * ctx,int dst_reg,int insn_len)584 static int add_exception_handler(const struct bpf_insn *insn,
585 struct rv_jit_context *ctx,
586 int dst_reg, int insn_len)
587 {
588 struct exception_table_entry *ex;
589 unsigned long pc;
590 off_t ins_offset;
591 off_t fixup_offset;
592
593 if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
594 (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
595 return 0;
596
597 if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
598 return -EINVAL;
599
600 if (WARN_ON_ONCE(insn_len > ctx->ninsns))
601 return -EINVAL;
602
603 if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1))
604 return -EINVAL;
605
606 ex = &ctx->prog->aux->extable[ctx->nexentries];
607 pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
608
609 /*
610 * This is the relative offset of the instruction that may fault from
611 * the exception table itself. This will be written to the exception
612 * table and if this instruction faults, the destination register will
613 * be set to '0' and the execution will jump to the next instruction.
614 */
615 ins_offset = pc - (long)&ex->insn;
616 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
617 return -ERANGE;
618
619 /*
620 * Since the extable follows the program, the fixup offset is always
621 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
622 * to keep things simple, and put the destination register in the upper
623 * bits. We don't need to worry about buildtime or runtime sort
624 * modifying the upper bits because the table is already sorted, and
625 * isn't part of the main exception table.
626 *
627 * The fixup_offset is set to the next instruction from the instruction
628 * that may fault. The execution will jump to this after handling the
629 * fault.
630 */
631 fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
632 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
633 return -ERANGE;
634
635 /*
636 * The offsets above have been calculated using the RO buffer but we
637 * need to use the R/W buffer for writes.
638 * switch ex to rw buffer for writing.
639 */
640 ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns);
641
642 ex->insn = ins_offset;
643
644 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
645 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
646 ex->type = EX_TYPE_BPF;
647
648 ctx->nexentries++;
649 return 0;
650 }
651
gen_jump_or_nops(void * target,void * ip,u32 * insns,bool is_call)652 static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
653 {
654 s64 rvoff;
655 struct rv_jit_context ctx;
656
657 ctx.ninsns = 0;
658 ctx.insns = (u16 *)insns;
659
660 if (!target) {
661 emit(rv_nop(), &ctx);
662 emit(rv_nop(), &ctx);
663 return 0;
664 }
665
666 rvoff = (s64)(target - ip);
667 return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx);
668 }
669
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type poke_type,void * old_addr,void * new_addr)670 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
671 void *old_addr, void *new_addr)
672 {
673 u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS];
674 bool is_call = poke_type == BPF_MOD_CALL;
675 int ret;
676
677 if (!is_kernel_text((unsigned long)ip) &&
678 !is_bpf_text_address((unsigned long)ip))
679 return -ENOTSUPP;
680
681 ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
682 if (ret)
683 return ret;
684
685 if (memcmp(ip, old_insns, RV_FENTRY_NBYTES))
686 return -EFAULT;
687
688 ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
689 if (ret)
690 return ret;
691
692 cpus_read_lock();
693 mutex_lock(&text_mutex);
694 if (memcmp(ip, new_insns, RV_FENTRY_NBYTES))
695 ret = patch_text(ip, new_insns, RV_FENTRY_NBYTES);
696 mutex_unlock(&text_mutex);
697 cpus_read_unlock();
698
699 return ret;
700 }
701
store_args(int nregs,int args_off,struct rv_jit_context * ctx)702 static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
703 {
704 int i;
705
706 for (i = 0; i < nregs; i++) {
707 emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
708 args_off -= 8;
709 }
710 }
711
restore_args(int nregs,int args_off,struct rv_jit_context * ctx)712 static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
713 {
714 int i;
715
716 for (i = 0; i < nregs; i++) {
717 emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
718 args_off -= 8;
719 }
720 }
721
invoke_bpf_prog(struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret,struct rv_jit_context * ctx)722 static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
723 int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
724 {
725 int ret, branch_off;
726 struct bpf_prog *p = l->link.prog;
727 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
728
729 if (l->cookie) {
730 emit_imm(RV_REG_T1, l->cookie, ctx);
731 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_T1, ctx);
732 } else {
733 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
734 }
735
736 /* arg1: prog */
737 emit_imm(RV_REG_A0, (const s64)p, ctx);
738 /* arg2: &run_ctx */
739 emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx);
740 ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx);
741 if (ret)
742 return ret;
743
744 /* store prog start time */
745 emit_mv(RV_REG_S1, RV_REG_A0, ctx);
746
747 /* if (__bpf_prog_enter(prog) == 0)
748 * goto skip_exec_of_prog;
749 */
750 branch_off = ctx->ninsns;
751 /* nop reserved for conditional jump */
752 emit(rv_nop(), ctx);
753
754 /* arg1: &args_off */
755 emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
756 if (!p->jited)
757 /* arg2: progs[i]->insnsi for interpreter */
758 emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx);
759 ret = emit_call((const u64)p->bpf_func, true, ctx);
760 if (ret)
761 return ret;
762
763 if (save_ret) {
764 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
765 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
766 }
767
768 /* update branch with beqz */
769 if (ctx->insns) {
770 int offset = ninsns_rvoff(ctx->ninsns - branch_off);
771 u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1);
772 *(u32 *)(ctx->insns + branch_off) = insn;
773 }
774
775 /* arg1: prog */
776 emit_imm(RV_REG_A0, (const s64)p, ctx);
777 /* arg2: prog start time */
778 emit_mv(RV_REG_A1, RV_REG_S1, ctx);
779 /* arg3: &run_ctx */
780 emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx);
781 ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx);
782
783 return ret;
784 }
785
__arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,const struct btf_func_model * m,struct bpf_tramp_links * tlinks,void * func_addr,u32 flags,struct rv_jit_context * ctx)786 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
787 const struct btf_func_model *m,
788 struct bpf_tramp_links *tlinks,
789 void *func_addr, u32 flags,
790 struct rv_jit_context *ctx)
791 {
792 int i, ret, offset;
793 int *branches_off = NULL;
794 int stack_size = 0, nregs = m->nr_args;
795 int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off;
796 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
797 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
798 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
799 void *orig_call = func_addr;
800 bool save_ret;
801 u32 insn;
802
803 /* Two types of generated trampoline stack layout:
804 *
805 * 1. trampoline called from function entry
806 * --------------------------------------
807 * FP + 8 [ RA to parent func ] return address to parent
808 * function
809 * FP + 0 [ FP of parent func ] frame pointer of parent
810 * function
811 * FP - 8 [ T0 to traced func ] return address of traced
812 * function
813 * FP - 16 [ FP of traced func ] frame pointer of traced
814 * function
815 * --------------------------------------
816 *
817 * 2. trampoline called directly
818 * --------------------------------------
819 * FP - 8 [ RA to caller func ] return address to caller
820 * function
821 * FP - 16 [ FP of caller func ] frame pointer of caller
822 * function
823 * --------------------------------------
824 *
825 * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
826 * BPF_TRAMP_F_RET_FENTRY_RET
827 * [ argN ]
828 * [ ... ]
829 * FP - args_off [ arg1 ]
830 *
831 * FP - nregs_off [ regs count ]
832 *
833 * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG
834 *
835 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
836 *
837 * FP - sreg_off [ callee saved reg ]
838 *
839 * [ pads ] pads for 16 bytes alignment
840 */
841
842 if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
843 return -ENOTSUPP;
844
845 /* extra regiters for struct arguments */
846 for (i = 0; i < m->nr_args; i++)
847 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
848 nregs += round_up(m->arg_size[i], 8) / 8 - 1;
849
850 /* 8 arguments passed by registers */
851 if (nregs > 8)
852 return -ENOTSUPP;
853
854 /* room of trampoline frame to store return address and frame pointer */
855 stack_size += 16;
856
857 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
858 if (save_ret) {
859 stack_size += 16; /* Save both A5 (BPF R0) and A0 */
860 retval_off = stack_size;
861 }
862
863 stack_size += nregs * 8;
864 args_off = stack_size;
865
866 stack_size += 8;
867 nregs_off = stack_size;
868
869 if (flags & BPF_TRAMP_F_IP_ARG) {
870 stack_size += 8;
871 ip_off = stack_size;
872 }
873
874 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
875 run_ctx_off = stack_size;
876
877 stack_size += 8;
878 sreg_off = stack_size;
879
880 stack_size = round_up(stack_size, 16);
881
882 if (func_addr) {
883 /* For the trampoline called from function entry,
884 * the frame of traced function and the frame of
885 * trampoline need to be considered.
886 */
887 emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx);
888 emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx);
889 emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx);
890 emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx);
891
892 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
893 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx);
894 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
895 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
896 } else {
897 /* For the trampoline called directly, just handle
898 * the frame of trampoline.
899 */
900 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
901 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx);
902 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
903 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
904 }
905
906 /* callee saved register S1 to pass start time */
907 emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx);
908
909 /* store ip address of the traced function */
910 if (flags & BPF_TRAMP_F_IP_ARG) {
911 emit_imm(RV_REG_T1, (const s64)func_addr, ctx);
912 emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx);
913 }
914
915 emit_li(RV_REG_T1, nregs, ctx);
916 emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx);
917
918 store_args(nregs, args_off, ctx);
919
920 /* skip to actual body of traced function */
921 if (flags & BPF_TRAMP_F_SKIP_FRAME)
922 orig_call += RV_FENTRY_NINSNS * 4;
923
924 if (flags & BPF_TRAMP_F_CALL_ORIG) {
925 emit_imm(RV_REG_A0, (const s64)im, ctx);
926 ret = emit_call((const u64)__bpf_tramp_enter, true, ctx);
927 if (ret)
928 return ret;
929 }
930
931 for (i = 0; i < fentry->nr_links; i++) {
932 ret = invoke_bpf_prog(fentry->links[i], args_off, retval_off, run_ctx_off,
933 flags & BPF_TRAMP_F_RET_FENTRY_RET, ctx);
934 if (ret)
935 return ret;
936 }
937
938 if (fmod_ret->nr_links) {
939 branches_off = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL);
940 if (!branches_off)
941 return -ENOMEM;
942
943 /* cleanup to avoid garbage return value confusion */
944 emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
945 for (i = 0; i < fmod_ret->nr_links; i++) {
946 ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
947 run_ctx_off, true, ctx);
948 if (ret)
949 goto out;
950 emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx);
951 branches_off[i] = ctx->ninsns;
952 /* nop reserved for conditional jump */
953 emit(rv_nop(), ctx);
954 }
955 }
956
957 if (flags & BPF_TRAMP_F_CALL_ORIG) {
958 restore_args(nregs, args_off, ctx);
959 ret = emit_call((const u64)orig_call, true, ctx);
960 if (ret)
961 goto out;
962 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
963 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
964 im->ip_after_call = ctx->insns + ctx->ninsns;
965 /* 2 nops reserved for auipc+jalr pair */
966 emit(rv_nop(), ctx);
967 emit(rv_nop(), ctx);
968 }
969
970 /* update branches saved in invoke_bpf_mod_ret with bnez */
971 for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
972 offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
973 insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
974 *(u32 *)(ctx->insns + branches_off[i]) = insn;
975 }
976
977 for (i = 0; i < fexit->nr_links; i++) {
978 ret = invoke_bpf_prog(fexit->links[i], args_off, retval_off,
979 run_ctx_off, false, ctx);
980 if (ret)
981 goto out;
982 }
983
984 if (flags & BPF_TRAMP_F_CALL_ORIG) {
985 im->ip_epilogue = ctx->insns + ctx->ninsns;
986 emit_imm(RV_REG_A0, (const s64)im, ctx);
987 ret = emit_call((const u64)__bpf_tramp_exit, true, ctx);
988 if (ret)
989 goto out;
990 }
991
992 if (flags & BPF_TRAMP_F_RESTORE_REGS)
993 restore_args(nregs, args_off, ctx);
994
995 if (save_ret) {
996 emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
997 emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx);
998 }
999
1000 emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
1001
1002 if (func_addr) {
1003 /* trampoline called from function entry */
1004 emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
1005 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
1006 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
1007
1008 emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx);
1009 emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx);
1010 emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx);
1011
1012 if (flags & BPF_TRAMP_F_SKIP_FRAME)
1013 /* return to parent function */
1014 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
1015 else
1016 /* return to traced function */
1017 emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx);
1018 } else {
1019 /* trampoline called directly */
1020 emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
1021 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
1022 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
1023
1024 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
1025 }
1026
1027 ret = ctx->ninsns;
1028 out:
1029 kfree(branches_off);
1030 return ret;
1031 }
1032
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * image,void * image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)1033 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
1034 void *image_end, const struct btf_func_model *m,
1035 u32 flags, struct bpf_tramp_links *tlinks,
1036 void *func_addr)
1037 {
1038 int ret;
1039 struct rv_jit_context ctx;
1040
1041 ctx.ninsns = 0;
1042 ctx.insns = NULL;
1043 ctx.ro_insns = NULL;
1044 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
1045 if (ret < 0)
1046 return ret;
1047
1048 if (ninsns_rvoff(ret) > (long)image_end - (long)image)
1049 return -EFBIG;
1050
1051 ctx.ninsns = 0;
1052 /*
1053 * The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to write the
1054 * JITed instructions and later copies it to a RX region (ctx.ro_insns).
1055 * It also uses ctx.ro_insns to calculate offsets for jumps etc. As the
1056 * trampoline image uses the same memory area for writing and execution,
1057 * both ctx.insns and ctx.ro_insns can be set to image.
1058 */
1059 ctx.insns = image;
1060 ctx.ro_insns = image;
1061 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
1062 if (ret < 0)
1063 return ret;
1064
1065 bpf_flush_icache(ctx.insns, ctx.insns + ctx.ninsns);
1066
1067 return ninsns_rvoff(ret);
1068 }
1069
bpf_jit_emit_insn(const struct bpf_insn * insn,struct rv_jit_context * ctx,bool extra_pass)1070 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
1071 bool extra_pass)
1072 {
1073 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1074 BPF_CLASS(insn->code) == BPF_JMP;
1075 int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
1076 struct bpf_prog_aux *aux = ctx->prog->aux;
1077 u8 rd = -1, rs = -1, code = insn->code;
1078 s16 off = insn->off;
1079 s32 imm = insn->imm;
1080
1081 init_regs(&rd, &rs, insn, ctx);
1082
1083 switch (code) {
1084 /* dst = src */
1085 case BPF_ALU | BPF_MOV | BPF_X:
1086 case BPF_ALU64 | BPF_MOV | BPF_X:
1087 if (imm == 1) {
1088 /* Special mov32 for zext */
1089 emit_zext_32(rd, ctx);
1090 break;
1091 }
1092 switch (insn->off) {
1093 case 0:
1094 emit_mv(rd, rs, ctx);
1095 break;
1096 case 8:
1097 case 16:
1098 emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx);
1099 emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx);
1100 break;
1101 case 32:
1102 emit_addiw(rd, rs, 0, ctx);
1103 break;
1104 }
1105 if (!is64 && !aux->verifier_zext)
1106 emit_zext_32(rd, ctx);
1107 break;
1108
1109 /* dst = dst OP src */
1110 case BPF_ALU | BPF_ADD | BPF_X:
1111 case BPF_ALU64 | BPF_ADD | BPF_X:
1112 emit_add(rd, rd, rs, ctx);
1113 if (!is64 && !aux->verifier_zext)
1114 emit_zext_32(rd, ctx);
1115 break;
1116 case BPF_ALU | BPF_SUB | BPF_X:
1117 case BPF_ALU64 | BPF_SUB | BPF_X:
1118 if (is64)
1119 emit_sub(rd, rd, rs, ctx);
1120 else
1121 emit_subw(rd, rd, rs, ctx);
1122
1123 if (!is64 && !aux->verifier_zext)
1124 emit_zext_32(rd, ctx);
1125 break;
1126 case BPF_ALU | BPF_AND | BPF_X:
1127 case BPF_ALU64 | BPF_AND | BPF_X:
1128 emit_and(rd, rd, rs, ctx);
1129 if (!is64 && !aux->verifier_zext)
1130 emit_zext_32(rd, ctx);
1131 break;
1132 case BPF_ALU | BPF_OR | BPF_X:
1133 case BPF_ALU64 | BPF_OR | BPF_X:
1134 emit_or(rd, rd, rs, ctx);
1135 if (!is64 && !aux->verifier_zext)
1136 emit_zext_32(rd, ctx);
1137 break;
1138 case BPF_ALU | BPF_XOR | BPF_X:
1139 case BPF_ALU64 | BPF_XOR | BPF_X:
1140 emit_xor(rd, rd, rs, ctx);
1141 if (!is64 && !aux->verifier_zext)
1142 emit_zext_32(rd, ctx);
1143 break;
1144 case BPF_ALU | BPF_MUL | BPF_X:
1145 case BPF_ALU64 | BPF_MUL | BPF_X:
1146 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
1147 if (!is64 && !aux->verifier_zext)
1148 emit_zext_32(rd, ctx);
1149 break;
1150 case BPF_ALU | BPF_DIV | BPF_X:
1151 case BPF_ALU64 | BPF_DIV | BPF_X:
1152 if (off)
1153 emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx);
1154 else
1155 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
1156 if (!is64 && !aux->verifier_zext)
1157 emit_zext_32(rd, ctx);
1158 break;
1159 case BPF_ALU | BPF_MOD | BPF_X:
1160 case BPF_ALU64 | BPF_MOD | BPF_X:
1161 if (off)
1162 emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx);
1163 else
1164 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
1165 if (!is64 && !aux->verifier_zext)
1166 emit_zext_32(rd, ctx);
1167 break;
1168 case BPF_ALU | BPF_LSH | BPF_X:
1169 case BPF_ALU64 | BPF_LSH | BPF_X:
1170 emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
1171 if (!is64 && !aux->verifier_zext)
1172 emit_zext_32(rd, ctx);
1173 break;
1174 case BPF_ALU | BPF_RSH | BPF_X:
1175 case BPF_ALU64 | BPF_RSH | BPF_X:
1176 emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
1177 if (!is64 && !aux->verifier_zext)
1178 emit_zext_32(rd, ctx);
1179 break;
1180 case BPF_ALU | BPF_ARSH | BPF_X:
1181 case BPF_ALU64 | BPF_ARSH | BPF_X:
1182 emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
1183 if (!is64 && !aux->verifier_zext)
1184 emit_zext_32(rd, ctx);
1185 break;
1186
1187 /* dst = -dst */
1188 case BPF_ALU | BPF_NEG:
1189 case BPF_ALU64 | BPF_NEG:
1190 emit_sub(rd, RV_REG_ZERO, rd, ctx);
1191 if (!is64 && !aux->verifier_zext)
1192 emit_zext_32(rd, ctx);
1193 break;
1194
1195 /* dst = BSWAP##imm(dst) */
1196 case BPF_ALU | BPF_END | BPF_FROM_LE:
1197 switch (imm) {
1198 case 16:
1199 emit_slli(rd, rd, 48, ctx);
1200 emit_srli(rd, rd, 48, ctx);
1201 break;
1202 case 32:
1203 if (!aux->verifier_zext)
1204 emit_zext_32(rd, ctx);
1205 break;
1206 case 64:
1207 /* Do nothing */
1208 break;
1209 }
1210 break;
1211
1212 case BPF_ALU | BPF_END | BPF_FROM_BE:
1213 case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1214 emit_li(RV_REG_T2, 0, ctx);
1215
1216 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1217 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1218 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1219 emit_srli(rd, rd, 8, ctx);
1220 if (imm == 16)
1221 goto out_be;
1222
1223 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1224 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1225 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1226 emit_srli(rd, rd, 8, ctx);
1227
1228 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1229 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1230 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1231 emit_srli(rd, rd, 8, ctx);
1232 if (imm == 32)
1233 goto out_be;
1234
1235 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1236 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1237 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1238 emit_srli(rd, rd, 8, ctx);
1239
1240 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1241 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1242 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1243 emit_srli(rd, rd, 8, ctx);
1244
1245 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1246 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1247 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1248 emit_srli(rd, rd, 8, ctx);
1249
1250 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1251 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1252 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
1253 emit_srli(rd, rd, 8, ctx);
1254 out_be:
1255 emit_andi(RV_REG_T1, rd, 0xff, ctx);
1256 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
1257
1258 emit_mv(rd, RV_REG_T2, ctx);
1259 break;
1260
1261 /* dst = imm */
1262 case BPF_ALU | BPF_MOV | BPF_K:
1263 case BPF_ALU64 | BPF_MOV | BPF_K:
1264 emit_imm(rd, imm, ctx);
1265 if (!is64 && !aux->verifier_zext)
1266 emit_zext_32(rd, ctx);
1267 break;
1268
1269 /* dst = dst OP imm */
1270 case BPF_ALU | BPF_ADD | BPF_K:
1271 case BPF_ALU64 | BPF_ADD | BPF_K:
1272 if (is_12b_int(imm)) {
1273 emit_addi(rd, rd, imm, ctx);
1274 } else {
1275 emit_imm(RV_REG_T1, imm, ctx);
1276 emit_add(rd, rd, RV_REG_T1, ctx);
1277 }
1278 if (!is64 && !aux->verifier_zext)
1279 emit_zext_32(rd, ctx);
1280 break;
1281 case BPF_ALU | BPF_SUB | BPF_K:
1282 case BPF_ALU64 | BPF_SUB | BPF_K:
1283 if (is_12b_int(-imm)) {
1284 emit_addi(rd, rd, -imm, ctx);
1285 } else {
1286 emit_imm(RV_REG_T1, imm, ctx);
1287 emit_sub(rd, rd, RV_REG_T1, ctx);
1288 }
1289 if (!is64 && !aux->verifier_zext)
1290 emit_zext_32(rd, ctx);
1291 break;
1292 case BPF_ALU | BPF_AND | BPF_K:
1293 case BPF_ALU64 | BPF_AND | BPF_K:
1294 if (is_12b_int(imm)) {
1295 emit_andi(rd, rd, imm, ctx);
1296 } else {
1297 emit_imm(RV_REG_T1, imm, ctx);
1298 emit_and(rd, rd, RV_REG_T1, ctx);
1299 }
1300 if (!is64 && !aux->verifier_zext)
1301 emit_zext_32(rd, ctx);
1302 break;
1303 case BPF_ALU | BPF_OR | BPF_K:
1304 case BPF_ALU64 | BPF_OR | BPF_K:
1305 if (is_12b_int(imm)) {
1306 emit(rv_ori(rd, rd, imm), ctx);
1307 } else {
1308 emit_imm(RV_REG_T1, imm, ctx);
1309 emit_or(rd, rd, RV_REG_T1, ctx);
1310 }
1311 if (!is64 && !aux->verifier_zext)
1312 emit_zext_32(rd, ctx);
1313 break;
1314 case BPF_ALU | BPF_XOR | BPF_K:
1315 case BPF_ALU64 | BPF_XOR | BPF_K:
1316 if (is_12b_int(imm)) {
1317 emit(rv_xori(rd, rd, imm), ctx);
1318 } else {
1319 emit_imm(RV_REG_T1, imm, ctx);
1320 emit_xor(rd, rd, RV_REG_T1, ctx);
1321 }
1322 if (!is64 && !aux->verifier_zext)
1323 emit_zext_32(rd, ctx);
1324 break;
1325 case BPF_ALU | BPF_MUL | BPF_K:
1326 case BPF_ALU64 | BPF_MUL | BPF_K:
1327 emit_imm(RV_REG_T1, imm, ctx);
1328 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
1329 rv_mulw(rd, rd, RV_REG_T1), ctx);
1330 if (!is64 && !aux->verifier_zext)
1331 emit_zext_32(rd, ctx);
1332 break;
1333 case BPF_ALU | BPF_DIV | BPF_K:
1334 case BPF_ALU64 | BPF_DIV | BPF_K:
1335 emit_imm(RV_REG_T1, imm, ctx);
1336 if (off)
1337 emit(is64 ? rv_div(rd, rd, RV_REG_T1) :
1338 rv_divw(rd, rd, RV_REG_T1), ctx);
1339 else
1340 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
1341 rv_divuw(rd, rd, RV_REG_T1), ctx);
1342 if (!is64 && !aux->verifier_zext)
1343 emit_zext_32(rd, ctx);
1344 break;
1345 case BPF_ALU | BPF_MOD | BPF_K:
1346 case BPF_ALU64 | BPF_MOD | BPF_K:
1347 emit_imm(RV_REG_T1, imm, ctx);
1348 if (off)
1349 emit(is64 ? rv_rem(rd, rd, RV_REG_T1) :
1350 rv_remw(rd, rd, RV_REG_T1), ctx);
1351 else
1352 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
1353 rv_remuw(rd, rd, RV_REG_T1), ctx);
1354 if (!is64 && !aux->verifier_zext)
1355 emit_zext_32(rd, ctx);
1356 break;
1357 case BPF_ALU | BPF_LSH | BPF_K:
1358 case BPF_ALU64 | BPF_LSH | BPF_K:
1359 emit_slli(rd, rd, imm, ctx);
1360
1361 if (!is64 && !aux->verifier_zext)
1362 emit_zext_32(rd, ctx);
1363 break;
1364 case BPF_ALU | BPF_RSH | BPF_K:
1365 case BPF_ALU64 | BPF_RSH | BPF_K:
1366 if (is64)
1367 emit_srli(rd, rd, imm, ctx);
1368 else
1369 emit(rv_srliw(rd, rd, imm), ctx);
1370
1371 if (!is64 && !aux->verifier_zext)
1372 emit_zext_32(rd, ctx);
1373 break;
1374 case BPF_ALU | BPF_ARSH | BPF_K:
1375 case BPF_ALU64 | BPF_ARSH | BPF_K:
1376 if (is64)
1377 emit_srai(rd, rd, imm, ctx);
1378 else
1379 emit(rv_sraiw(rd, rd, imm), ctx);
1380
1381 if (!is64 && !aux->verifier_zext)
1382 emit_zext_32(rd, ctx);
1383 break;
1384
1385 /* JUMP off */
1386 case BPF_JMP | BPF_JA:
1387 case BPF_JMP32 | BPF_JA:
1388 if (BPF_CLASS(code) == BPF_JMP)
1389 rvoff = rv_offset(i, off, ctx);
1390 else
1391 rvoff = rv_offset(i, imm, ctx);
1392 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
1393 if (ret)
1394 return ret;
1395 break;
1396
1397 /* IF (dst COND src) JUMP off */
1398 case BPF_JMP | BPF_JEQ | BPF_X:
1399 case BPF_JMP32 | BPF_JEQ | BPF_X:
1400 case BPF_JMP | BPF_JGT | BPF_X:
1401 case BPF_JMP32 | BPF_JGT | BPF_X:
1402 case BPF_JMP | BPF_JLT | BPF_X:
1403 case BPF_JMP32 | BPF_JLT | BPF_X:
1404 case BPF_JMP | BPF_JGE | BPF_X:
1405 case BPF_JMP32 | BPF_JGE | BPF_X:
1406 case BPF_JMP | BPF_JLE | BPF_X:
1407 case BPF_JMP32 | BPF_JLE | BPF_X:
1408 case BPF_JMP | BPF_JNE | BPF_X:
1409 case BPF_JMP32 | BPF_JNE | BPF_X:
1410 case BPF_JMP | BPF_JSGT | BPF_X:
1411 case BPF_JMP32 | BPF_JSGT | BPF_X:
1412 case BPF_JMP | BPF_JSLT | BPF_X:
1413 case BPF_JMP32 | BPF_JSLT | BPF_X:
1414 case BPF_JMP | BPF_JSGE | BPF_X:
1415 case BPF_JMP32 | BPF_JSGE | BPF_X:
1416 case BPF_JMP | BPF_JSLE | BPF_X:
1417 case BPF_JMP32 | BPF_JSLE | BPF_X:
1418 case BPF_JMP | BPF_JSET | BPF_X:
1419 case BPF_JMP32 | BPF_JSET | BPF_X:
1420 rvoff = rv_offset(i, off, ctx);
1421 if (!is64) {
1422 s = ctx->ninsns;
1423 if (is_signed_bpf_cond(BPF_OP(code)))
1424 emit_sext_32_rd_rs(&rd, &rs, ctx);
1425 else
1426 emit_zext_32_rd_rs(&rd, &rs, ctx);
1427 e = ctx->ninsns;
1428
1429 /* Adjust for extra insns */
1430 rvoff -= ninsns_rvoff(e - s);
1431 }
1432
1433 if (BPF_OP(code) == BPF_JSET) {
1434 /* Adjust for and */
1435 rvoff -= 4;
1436 emit_and(RV_REG_T1, rd, rs, ctx);
1437 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
1438 ctx);
1439 } else {
1440 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
1441 }
1442 break;
1443
1444 /* IF (dst COND imm) JUMP off */
1445 case BPF_JMP | BPF_JEQ | BPF_K:
1446 case BPF_JMP32 | BPF_JEQ | BPF_K:
1447 case BPF_JMP | BPF_JGT | BPF_K:
1448 case BPF_JMP32 | BPF_JGT | BPF_K:
1449 case BPF_JMP | BPF_JLT | BPF_K:
1450 case BPF_JMP32 | BPF_JLT | BPF_K:
1451 case BPF_JMP | BPF_JGE | BPF_K:
1452 case BPF_JMP32 | BPF_JGE | BPF_K:
1453 case BPF_JMP | BPF_JLE | BPF_K:
1454 case BPF_JMP32 | BPF_JLE | BPF_K:
1455 case BPF_JMP | BPF_JNE | BPF_K:
1456 case BPF_JMP32 | BPF_JNE | BPF_K:
1457 case BPF_JMP | BPF_JSGT | BPF_K:
1458 case BPF_JMP32 | BPF_JSGT | BPF_K:
1459 case BPF_JMP | BPF_JSLT | BPF_K:
1460 case BPF_JMP32 | BPF_JSLT | BPF_K:
1461 case BPF_JMP | BPF_JSGE | BPF_K:
1462 case BPF_JMP32 | BPF_JSGE | BPF_K:
1463 case BPF_JMP | BPF_JSLE | BPF_K:
1464 case BPF_JMP32 | BPF_JSLE | BPF_K:
1465 rvoff = rv_offset(i, off, ctx);
1466 s = ctx->ninsns;
1467 if (imm) {
1468 emit_imm(RV_REG_T1, imm, ctx);
1469 rs = RV_REG_T1;
1470 } else {
1471 /* If imm is 0, simply use zero register. */
1472 rs = RV_REG_ZERO;
1473 }
1474 if (!is64) {
1475 if (is_signed_bpf_cond(BPF_OP(code)))
1476 emit_sext_32_rd(&rd, ctx);
1477 else
1478 emit_zext_32_rd_t1(&rd, ctx);
1479 }
1480 e = ctx->ninsns;
1481
1482 /* Adjust for extra insns */
1483 rvoff -= ninsns_rvoff(e - s);
1484 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
1485 break;
1486
1487 case BPF_JMP | BPF_JSET | BPF_K:
1488 case BPF_JMP32 | BPF_JSET | BPF_K:
1489 rvoff = rv_offset(i, off, ctx);
1490 s = ctx->ninsns;
1491 if (is_12b_int(imm)) {
1492 emit_andi(RV_REG_T1, rd, imm, ctx);
1493 } else {
1494 emit_imm(RV_REG_T1, imm, ctx);
1495 emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
1496 }
1497 /* For jset32, we should clear the upper 32 bits of t1, but
1498 * sign-extension is sufficient here and saves one instruction,
1499 * as t1 is used only in comparison against zero.
1500 */
1501 if (!is64 && imm < 0)
1502 emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
1503 e = ctx->ninsns;
1504 rvoff -= ninsns_rvoff(e - s);
1505 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
1506 break;
1507
1508 /* function call */
1509 case BPF_JMP | BPF_CALL:
1510 {
1511 bool fixed_addr;
1512 u64 addr;
1513
1514 mark_call(ctx);
1515 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1516 &addr, &fixed_addr);
1517 if (ret < 0)
1518 return ret;
1519
1520 ret = emit_call(addr, fixed_addr, ctx);
1521 if (ret)
1522 return ret;
1523
1524 if (insn->src_reg != BPF_PSEUDO_CALL)
1525 emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx);
1526 break;
1527 }
1528 /* tail call */
1529 case BPF_JMP | BPF_TAIL_CALL:
1530 if (emit_bpf_tail_call(i, ctx))
1531 return -1;
1532 break;
1533
1534 /* function return */
1535 case BPF_JMP | BPF_EXIT:
1536 if (i == ctx->prog->len - 1)
1537 break;
1538
1539 rvoff = epilogue_offset(ctx);
1540 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
1541 if (ret)
1542 return ret;
1543 break;
1544
1545 /* dst = imm64 */
1546 case BPF_LD | BPF_IMM | BPF_DW:
1547 {
1548 struct bpf_insn insn1 = insn[1];
1549 u64 imm64;
1550
1551 imm64 = (u64)insn1.imm << 32 | (u32)imm;
1552 if (bpf_pseudo_func(insn)) {
1553 /* fixed-length insns for extra jit pass */
1554 ret = emit_addr(rd, imm64, extra_pass, ctx);
1555 if (ret)
1556 return ret;
1557 } else {
1558 emit_imm(rd, imm64, ctx);
1559 }
1560
1561 return 1;
1562 }
1563
1564 /* LDX: dst = *(unsigned size *)(src + off) */
1565 case BPF_LDX | BPF_MEM | BPF_B:
1566 case BPF_LDX | BPF_MEM | BPF_H:
1567 case BPF_LDX | BPF_MEM | BPF_W:
1568 case BPF_LDX | BPF_MEM | BPF_DW:
1569 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1570 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1571 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1572 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1573 /* LDSX: dst = *(signed size *)(src + off) */
1574 case BPF_LDX | BPF_MEMSX | BPF_B:
1575 case BPF_LDX | BPF_MEMSX | BPF_H:
1576 case BPF_LDX | BPF_MEMSX | BPF_W:
1577 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1578 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1579 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1580 {
1581 int insn_len, insns_start;
1582 bool sign_ext;
1583
1584 sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
1585 BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
1586
1587 switch (BPF_SIZE(code)) {
1588 case BPF_B:
1589 if (is_12b_int(off)) {
1590 insns_start = ctx->ninsns;
1591 if (sign_ext)
1592 emit(rv_lb(rd, off, rs), ctx);
1593 else
1594 emit(rv_lbu(rd, off, rs), ctx);
1595 insn_len = ctx->ninsns - insns_start;
1596 break;
1597 }
1598
1599 emit_imm(RV_REG_T1, off, ctx);
1600 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1601 insns_start = ctx->ninsns;
1602 if (sign_ext)
1603 emit(rv_lb(rd, 0, RV_REG_T1), ctx);
1604 else
1605 emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
1606 insn_len = ctx->ninsns - insns_start;
1607 break;
1608 case BPF_H:
1609 if (is_12b_int(off)) {
1610 insns_start = ctx->ninsns;
1611 if (sign_ext)
1612 emit(rv_lh(rd, off, rs), ctx);
1613 else
1614 emit(rv_lhu(rd, off, rs), ctx);
1615 insn_len = ctx->ninsns - insns_start;
1616 break;
1617 }
1618
1619 emit_imm(RV_REG_T1, off, ctx);
1620 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1621 insns_start = ctx->ninsns;
1622 if (sign_ext)
1623 emit(rv_lh(rd, 0, RV_REG_T1), ctx);
1624 else
1625 emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
1626 insn_len = ctx->ninsns - insns_start;
1627 break;
1628 case BPF_W:
1629 if (is_12b_int(off)) {
1630 insns_start = ctx->ninsns;
1631 if (sign_ext)
1632 emit(rv_lw(rd, off, rs), ctx);
1633 else
1634 emit(rv_lwu(rd, off, rs), ctx);
1635 insn_len = ctx->ninsns - insns_start;
1636 break;
1637 }
1638
1639 emit_imm(RV_REG_T1, off, ctx);
1640 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1641 insns_start = ctx->ninsns;
1642 if (sign_ext)
1643 emit(rv_lw(rd, 0, RV_REG_T1), ctx);
1644 else
1645 emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
1646 insn_len = ctx->ninsns - insns_start;
1647 break;
1648 case BPF_DW:
1649 if (is_12b_int(off)) {
1650 insns_start = ctx->ninsns;
1651 emit_ld(rd, off, rs, ctx);
1652 insn_len = ctx->ninsns - insns_start;
1653 break;
1654 }
1655
1656 emit_imm(RV_REG_T1, off, ctx);
1657 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1658 insns_start = ctx->ninsns;
1659 emit_ld(rd, 0, RV_REG_T1, ctx);
1660 insn_len = ctx->ninsns - insns_start;
1661 break;
1662 }
1663
1664 ret = add_exception_handler(insn, ctx, rd, insn_len);
1665 if (ret)
1666 return ret;
1667
1668 if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1]))
1669 return 1;
1670 break;
1671 }
1672 /* speculation barrier */
1673 case BPF_ST | BPF_NOSPEC:
1674 break;
1675
1676 /* ST: *(size *)(dst + off) = imm */
1677 case BPF_ST | BPF_MEM | BPF_B:
1678 emit_imm(RV_REG_T1, imm, ctx);
1679 if (is_12b_int(off)) {
1680 emit(rv_sb(rd, off, RV_REG_T1), ctx);
1681 break;
1682 }
1683
1684 emit_imm(RV_REG_T2, off, ctx);
1685 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1686 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
1687 break;
1688
1689 case BPF_ST | BPF_MEM | BPF_H:
1690 emit_imm(RV_REG_T1, imm, ctx);
1691 if (is_12b_int(off)) {
1692 emit(rv_sh(rd, off, RV_REG_T1), ctx);
1693 break;
1694 }
1695
1696 emit_imm(RV_REG_T2, off, ctx);
1697 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1698 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
1699 break;
1700 case BPF_ST | BPF_MEM | BPF_W:
1701 emit_imm(RV_REG_T1, imm, ctx);
1702 if (is_12b_int(off)) {
1703 emit_sw(rd, off, RV_REG_T1, ctx);
1704 break;
1705 }
1706
1707 emit_imm(RV_REG_T2, off, ctx);
1708 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1709 emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
1710 break;
1711 case BPF_ST | BPF_MEM | BPF_DW:
1712 emit_imm(RV_REG_T1, imm, ctx);
1713 if (is_12b_int(off)) {
1714 emit_sd(rd, off, RV_REG_T1, ctx);
1715 break;
1716 }
1717
1718 emit_imm(RV_REG_T2, off, ctx);
1719 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1720 emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
1721 break;
1722
1723 /* STX: *(size *)(dst + off) = src */
1724 case BPF_STX | BPF_MEM | BPF_B:
1725 if (is_12b_int(off)) {
1726 emit(rv_sb(rd, off, rs), ctx);
1727 break;
1728 }
1729
1730 emit_imm(RV_REG_T1, off, ctx);
1731 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1732 emit(rv_sb(RV_REG_T1, 0, rs), ctx);
1733 break;
1734 case BPF_STX | BPF_MEM | BPF_H:
1735 if (is_12b_int(off)) {
1736 emit(rv_sh(rd, off, rs), ctx);
1737 break;
1738 }
1739
1740 emit_imm(RV_REG_T1, off, ctx);
1741 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1742 emit(rv_sh(RV_REG_T1, 0, rs), ctx);
1743 break;
1744 case BPF_STX | BPF_MEM | BPF_W:
1745 if (is_12b_int(off)) {
1746 emit_sw(rd, off, rs, ctx);
1747 break;
1748 }
1749
1750 emit_imm(RV_REG_T1, off, ctx);
1751 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1752 emit_sw(RV_REG_T1, 0, rs, ctx);
1753 break;
1754 case BPF_STX | BPF_MEM | BPF_DW:
1755 if (is_12b_int(off)) {
1756 emit_sd(rd, off, rs, ctx);
1757 break;
1758 }
1759
1760 emit_imm(RV_REG_T1, off, ctx);
1761 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1762 emit_sd(RV_REG_T1, 0, rs, ctx);
1763 break;
1764 case BPF_STX | BPF_ATOMIC | BPF_W:
1765 case BPF_STX | BPF_ATOMIC | BPF_DW:
1766 emit_atomic(rd, rs, off, imm,
1767 BPF_SIZE(code) == BPF_DW, ctx);
1768 break;
1769 default:
1770 pr_err("bpf-jit: unknown opcode %02x\n", code);
1771 return -EINVAL;
1772 }
1773
1774 return 0;
1775 }
1776
bpf_jit_build_prologue(struct rv_jit_context * ctx)1777 void bpf_jit_build_prologue(struct rv_jit_context *ctx)
1778 {
1779 int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
1780
1781 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
1782 if (bpf_stack_adjust)
1783 mark_fp(ctx);
1784
1785 if (seen_reg(RV_REG_RA, ctx))
1786 stack_adjust += 8;
1787 stack_adjust += 8; /* RV_REG_FP */
1788 if (seen_reg(RV_REG_S1, ctx))
1789 stack_adjust += 8;
1790 if (seen_reg(RV_REG_S2, ctx))
1791 stack_adjust += 8;
1792 if (seen_reg(RV_REG_S3, ctx))
1793 stack_adjust += 8;
1794 if (seen_reg(RV_REG_S4, ctx))
1795 stack_adjust += 8;
1796 if (seen_reg(RV_REG_S5, ctx))
1797 stack_adjust += 8;
1798 if (seen_reg(RV_REG_S6, ctx))
1799 stack_adjust += 8;
1800
1801 stack_adjust = round_up(stack_adjust, 16);
1802 stack_adjust += bpf_stack_adjust;
1803
1804 store_offset = stack_adjust - 8;
1805
1806 /* nops reserved for auipc+jalr pair */
1807 for (i = 0; i < RV_FENTRY_NINSNS; i++)
1808 emit(rv_nop(), ctx);
1809
1810 /* First instruction is always setting the tail-call-counter
1811 * (TCC) register. This instruction is skipped for tail calls.
1812 * Force using a 4-byte (non-compressed) instruction.
1813 */
1814 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
1815
1816 emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
1817
1818 if (seen_reg(RV_REG_RA, ctx)) {
1819 emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
1820 store_offset -= 8;
1821 }
1822 emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
1823 store_offset -= 8;
1824 if (seen_reg(RV_REG_S1, ctx)) {
1825 emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
1826 store_offset -= 8;
1827 }
1828 if (seen_reg(RV_REG_S2, ctx)) {
1829 emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
1830 store_offset -= 8;
1831 }
1832 if (seen_reg(RV_REG_S3, ctx)) {
1833 emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
1834 store_offset -= 8;
1835 }
1836 if (seen_reg(RV_REG_S4, ctx)) {
1837 emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
1838 store_offset -= 8;
1839 }
1840 if (seen_reg(RV_REG_S5, ctx)) {
1841 emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
1842 store_offset -= 8;
1843 }
1844 if (seen_reg(RV_REG_S6, ctx)) {
1845 emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
1846 store_offset -= 8;
1847 }
1848
1849 emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
1850
1851 if (bpf_stack_adjust)
1852 emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
1853
1854 /* Program contains calls and tail calls, so RV_REG_TCC need
1855 * to be saved across calls.
1856 */
1857 if (seen_tail_call(ctx) && seen_call(ctx))
1858 emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
1859
1860 ctx->stack_size = stack_adjust;
1861 }
1862
bpf_jit_build_epilogue(struct rv_jit_context * ctx)1863 void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
1864 {
1865 __build_epilogue(false, ctx);
1866 }
1867
bpf_jit_supports_kfunc_call(void)1868 bool bpf_jit_supports_kfunc_call(void)
1869 {
1870 return true;
1871 }
1872