1 // SPDX-License-Identifier: GPL-2.0 2 /* BPF JIT compiler for RV64G 3 * 4 * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com> 5 * 6 */ 7 8 #include <linux/bpf.h> 9 #include <linux/filter.h> 10 #include "bpf_jit.h" 11 12 #define RV_REG_TCC RV_REG_A6 13 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ 14 15 static const int regmap[] = { 16 [BPF_REG_0] = RV_REG_A5, 17 [BPF_REG_1] = RV_REG_A0, 18 [BPF_REG_2] = RV_REG_A1, 19 [BPF_REG_3] = RV_REG_A2, 20 [BPF_REG_4] = RV_REG_A3, 21 [BPF_REG_5] = RV_REG_A4, 22 [BPF_REG_6] = RV_REG_S1, 23 [BPF_REG_7] = RV_REG_S2, 24 [BPF_REG_8] = RV_REG_S3, 25 [BPF_REG_9] = RV_REG_S4, 26 [BPF_REG_FP] = RV_REG_S5, 27 [BPF_REG_AX] = RV_REG_T0, 28 }; 29 30 enum { 31 RV_CTX_F_SEEN_TAIL_CALL = 0, 32 RV_CTX_F_SEEN_CALL = RV_REG_RA, 33 RV_CTX_F_SEEN_S1 = RV_REG_S1, 34 RV_CTX_F_SEEN_S2 = RV_REG_S2, 35 RV_CTX_F_SEEN_S3 = RV_REG_S3, 36 RV_CTX_F_SEEN_S4 = RV_REG_S4, 37 RV_CTX_F_SEEN_S5 = RV_REG_S5, 38 RV_CTX_F_SEEN_S6 = RV_REG_S6, 39 }; 40 41 static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) 42 { 43 u8 reg = regmap[bpf_reg]; 44 45 switch (reg) { 46 case RV_CTX_F_SEEN_S1: 47 case RV_CTX_F_SEEN_S2: 48 case RV_CTX_F_SEEN_S3: 49 case RV_CTX_F_SEEN_S4: 50 case RV_CTX_F_SEEN_S5: 51 case RV_CTX_F_SEEN_S6: 52 __set_bit(reg, &ctx->flags); 53 } 54 return reg; 55 }; 56 57 static bool seen_reg(int reg, struct rv_jit_context *ctx) 58 { 59 switch (reg) { 60 case RV_CTX_F_SEEN_CALL: 61 case RV_CTX_F_SEEN_S1: 62 case RV_CTX_F_SEEN_S2: 63 case RV_CTX_F_SEEN_S3: 64 case RV_CTX_F_SEEN_S4: 65 case RV_CTX_F_SEEN_S5: 66 case RV_CTX_F_SEEN_S6: 67 return test_bit(reg, &ctx->flags); 68 } 69 return false; 70 } 71 72 static void mark_fp(struct rv_jit_context *ctx) 73 { 74 __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); 75 } 76 77 static void mark_call(struct rv_jit_context *ctx) 78 { 79 __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); 80 } 81 82 static bool seen_call(struct rv_jit_context *ctx) 83 { 84 return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); 85 } 86 87 static void mark_tail_call(struct rv_jit_context *ctx) 88 { 89 __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); 90 } 91 92 static bool seen_tail_call(struct rv_jit_context *ctx) 93 { 94 return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags); 95 } 96 97 static u8 rv_tail_call_reg(struct rv_jit_context *ctx) 98 { 99 mark_tail_call(ctx); 100 101 if (seen_call(ctx)) { 102 __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags); 103 return RV_REG_S6; 104 } 105 return RV_REG_A6; 106 } 107 108 static bool is_32b_int(s64 val) 109 { 110 return -(1L << 31) <= val && val < (1L << 31); 111 } 112 113 static bool in_auipc_jalr_range(s64 val) 114 { 115 /* 116 * auipc+jalr can reach any signed PC-relative offset in the range 117 * [-2^31 - 2^11, 2^31 - 2^11). 118 */ 119 return (-(1L << 31) - (1L << 11)) <= val && 120 val < ((1L << 31) - (1L << 11)); 121 } 122 123 static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) 124 { 125 /* Note that the immediate from the add is sign-extended, 126 * which means that we need to compensate this by adding 2^12, 127 * when the 12th bit is set. A simpler way of doing this, and 128 * getting rid of the check, is to just add 2**11 before the 129 * shift. The "Loading a 32-Bit constant" example from the 130 * "Computer Organization and Design, RISC-V edition" book by 131 * Patterson/Hennessy highlights this fact. 132 * 133 * This also means that we need to process LSB to MSB. 134 */ 135 s64 upper = (val + (1 << 11)) >> 12; 136 /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw, 137 * and addi are signed and RVC checks will perform signed comparisons. 138 */ 139 s64 lower = ((val & 0xfff) << 52) >> 52; 140 int shift; 141 142 if (is_32b_int(val)) { 143 if (upper) 144 emit_lui(rd, upper, ctx); 145 146 if (!upper) { 147 emit_li(rd, lower, ctx); 148 return; 149 } 150 151 emit_addiw(rd, rd, lower, ctx); 152 return; 153 } 154 155 shift = __ffs(upper); 156 upper >>= shift; 157 shift += 12; 158 159 emit_imm(rd, upper, ctx); 160 161 emit_slli(rd, rd, shift, ctx); 162 if (lower) 163 emit_addi(rd, rd, lower, ctx); 164 } 165 166 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) 167 { 168 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; 169 170 if (seen_reg(RV_REG_RA, ctx)) { 171 emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx); 172 store_offset -= 8; 173 } 174 emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx); 175 store_offset -= 8; 176 if (seen_reg(RV_REG_S1, ctx)) { 177 emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx); 178 store_offset -= 8; 179 } 180 if (seen_reg(RV_REG_S2, ctx)) { 181 emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx); 182 store_offset -= 8; 183 } 184 if (seen_reg(RV_REG_S3, ctx)) { 185 emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx); 186 store_offset -= 8; 187 } 188 if (seen_reg(RV_REG_S4, ctx)) { 189 emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx); 190 store_offset -= 8; 191 } 192 if (seen_reg(RV_REG_S5, ctx)) { 193 emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx); 194 store_offset -= 8; 195 } 196 if (seen_reg(RV_REG_S6, ctx)) { 197 emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); 198 store_offset -= 8; 199 } 200 201 emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); 202 /* Set return value. */ 203 if (!is_tail_call) 204 emit_mv(RV_REG_A0, RV_REG_A5, ctx); 205 emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 206 is_tail_call ? 4 : 0, /* skip TCC init */ 207 ctx); 208 } 209 210 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, 211 struct rv_jit_context *ctx) 212 { 213 switch (cond) { 214 case BPF_JEQ: 215 emit(rv_beq(rd, rs, rvoff >> 1), ctx); 216 return; 217 case BPF_JGT: 218 emit(rv_bltu(rs, rd, rvoff >> 1), ctx); 219 return; 220 case BPF_JLT: 221 emit(rv_bltu(rd, rs, rvoff >> 1), ctx); 222 return; 223 case BPF_JGE: 224 emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); 225 return; 226 case BPF_JLE: 227 emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); 228 return; 229 case BPF_JNE: 230 emit(rv_bne(rd, rs, rvoff >> 1), ctx); 231 return; 232 case BPF_JSGT: 233 emit(rv_blt(rs, rd, rvoff >> 1), ctx); 234 return; 235 case BPF_JSLT: 236 emit(rv_blt(rd, rs, rvoff >> 1), ctx); 237 return; 238 case BPF_JSGE: 239 emit(rv_bge(rd, rs, rvoff >> 1), ctx); 240 return; 241 case BPF_JSLE: 242 emit(rv_bge(rs, rd, rvoff >> 1), ctx); 243 } 244 } 245 246 static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, 247 struct rv_jit_context *ctx) 248 { 249 s64 upper, lower; 250 251 if (is_13b_int(rvoff)) { 252 emit_bcc(cond, rd, rs, rvoff, ctx); 253 return; 254 } 255 256 /* Adjust for jal */ 257 rvoff -= 4; 258 259 /* Transform, e.g.: 260 * bne rd,rs,foo 261 * to 262 * beq rd,rs,<.L1> 263 * (auipc foo) 264 * jal(r) foo 265 * .L1 266 */ 267 cond = invert_bpf_cond(cond); 268 if (is_21b_int(rvoff)) { 269 emit_bcc(cond, rd, rs, 8, ctx); 270 emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); 271 return; 272 } 273 274 /* 32b No need for an additional rvoff adjustment, since we 275 * get that from the auipc at PC', where PC = PC' + 4. 276 */ 277 upper = (rvoff + (1 << 11)) >> 12; 278 lower = rvoff & 0xfff; 279 280 emit_bcc(cond, rd, rs, 12, ctx); 281 emit(rv_auipc(RV_REG_T1, upper), ctx); 282 emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); 283 } 284 285 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) 286 { 287 emit_slli(reg, reg, 32, ctx); 288 emit_srli(reg, reg, 32, ctx); 289 } 290 291 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) 292 { 293 int tc_ninsn, off, start_insn = ctx->ninsns; 294 u8 tcc = rv_tail_call_reg(ctx); 295 296 /* a0: &ctx 297 * a1: &array 298 * a2: index 299 * 300 * if (index >= array->map.max_entries) 301 * goto out; 302 */ 303 tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : 304 ctx->offset[0]; 305 emit_zext_32(RV_REG_A2, ctx); 306 307 off = offsetof(struct bpf_array, map.max_entries); 308 if (is_12b_check(off, insn)) 309 return -1; 310 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); 311 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 312 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); 313 314 /* if (TCC-- < 0) 315 * goto out; 316 */ 317 emit_addi(RV_REG_T1, tcc, -1, ctx); 318 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 319 emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); 320 321 /* prog = array->ptrs[index]; 322 * if (!prog) 323 * goto out; 324 */ 325 emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx); 326 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx); 327 off = offsetof(struct bpf_array, ptrs); 328 if (is_12b_check(off, insn)) 329 return -1; 330 emit_ld(RV_REG_T2, off, RV_REG_T2, ctx); 331 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); 332 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); 333 334 /* goto *(prog->bpf_func + 4); */ 335 off = offsetof(struct bpf_prog, bpf_func); 336 if (is_12b_check(off, insn)) 337 return -1; 338 emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); 339 emit_mv(RV_REG_TCC, RV_REG_T1, ctx); 340 __build_epilogue(true, ctx); 341 return 0; 342 } 343 344 static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, 345 struct rv_jit_context *ctx) 346 { 347 u8 code = insn->code; 348 349 switch (code) { 350 case BPF_JMP | BPF_JA: 351 case BPF_JMP | BPF_CALL: 352 case BPF_JMP | BPF_EXIT: 353 case BPF_JMP | BPF_TAIL_CALL: 354 break; 355 default: 356 *rd = bpf_to_rv_reg(insn->dst_reg, ctx); 357 } 358 359 if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) || 360 code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) || 361 code & BPF_LDX || code & BPF_STX) 362 *rs = bpf_to_rv_reg(insn->src_reg, ctx); 363 } 364 365 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) 366 { 367 emit_mv(RV_REG_T2, *rd, ctx); 368 emit_zext_32(RV_REG_T2, ctx); 369 emit_mv(RV_REG_T1, *rs, ctx); 370 emit_zext_32(RV_REG_T1, ctx); 371 *rd = RV_REG_T2; 372 *rs = RV_REG_T1; 373 } 374 375 static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) 376 { 377 emit_addiw(RV_REG_T2, *rd, 0, ctx); 378 emit_addiw(RV_REG_T1, *rs, 0, ctx); 379 *rd = RV_REG_T2; 380 *rs = RV_REG_T1; 381 } 382 383 static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx) 384 { 385 emit_mv(RV_REG_T2, *rd, ctx); 386 emit_zext_32(RV_REG_T2, ctx); 387 emit_zext_32(RV_REG_T1, ctx); 388 *rd = RV_REG_T2; 389 } 390 391 static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) 392 { 393 emit_addiw(RV_REG_T2, *rd, 0, ctx); 394 *rd = RV_REG_T2; 395 } 396 397 static int emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, 398 struct rv_jit_context *ctx) 399 { 400 s64 upper, lower; 401 402 if (rvoff && is_21b_int(rvoff) && !force_jalr) { 403 emit(rv_jal(rd, rvoff >> 1), ctx); 404 return 0; 405 } else if (in_auipc_jalr_range(rvoff)) { 406 upper = (rvoff + (1 << 11)) >> 12; 407 lower = rvoff & 0xfff; 408 emit(rv_auipc(RV_REG_T1, upper), ctx); 409 emit(rv_jalr(rd, RV_REG_T1, lower), ctx); 410 return 0; 411 } 412 413 pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff); 414 return -ERANGE; 415 } 416 417 static bool is_signed_bpf_cond(u8 cond) 418 { 419 return cond == BPF_JSGT || cond == BPF_JSLT || 420 cond == BPF_JSGE || cond == BPF_JSLE; 421 } 422 423 static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) 424 { 425 s64 off = 0; 426 u64 ip; 427 u8 rd; 428 int ret; 429 430 if (addr && ctx->insns) { 431 ip = (u64)(long)(ctx->insns + ctx->ninsns); 432 off = addr - ip; 433 } 434 435 ret = emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); 436 if (ret) 437 return ret; 438 rd = bpf_to_rv_reg(BPF_REG_0, ctx); 439 emit_mv(rd, RV_REG_A0, ctx); 440 return 0; 441 } 442 443 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 444 bool extra_pass) 445 { 446 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 447 BPF_CLASS(insn->code) == BPF_JMP; 448 int s, e, rvoff, ret, i = insn - ctx->prog->insnsi; 449 struct bpf_prog_aux *aux = ctx->prog->aux; 450 u8 rd = -1, rs = -1, code = insn->code; 451 s16 off = insn->off; 452 s32 imm = insn->imm; 453 454 init_regs(&rd, &rs, insn, ctx); 455 456 switch (code) { 457 /* dst = src */ 458 case BPF_ALU | BPF_MOV | BPF_X: 459 case BPF_ALU64 | BPF_MOV | BPF_X: 460 if (imm == 1) { 461 /* Special mov32 for zext */ 462 emit_zext_32(rd, ctx); 463 break; 464 } 465 emit_mv(rd, rs, ctx); 466 if (!is64 && !aux->verifier_zext) 467 emit_zext_32(rd, ctx); 468 break; 469 470 /* dst = dst OP src */ 471 case BPF_ALU | BPF_ADD | BPF_X: 472 case BPF_ALU64 | BPF_ADD | BPF_X: 473 emit_add(rd, rd, rs, ctx); 474 if (!is64 && !aux->verifier_zext) 475 emit_zext_32(rd, ctx); 476 break; 477 case BPF_ALU | BPF_SUB | BPF_X: 478 case BPF_ALU64 | BPF_SUB | BPF_X: 479 if (is64) 480 emit_sub(rd, rd, rs, ctx); 481 else 482 emit_subw(rd, rd, rs, ctx); 483 484 if (!is64 && !aux->verifier_zext) 485 emit_zext_32(rd, ctx); 486 break; 487 case BPF_ALU | BPF_AND | BPF_X: 488 case BPF_ALU64 | BPF_AND | BPF_X: 489 emit_and(rd, rd, rs, ctx); 490 if (!is64 && !aux->verifier_zext) 491 emit_zext_32(rd, ctx); 492 break; 493 case BPF_ALU | BPF_OR | BPF_X: 494 case BPF_ALU64 | BPF_OR | BPF_X: 495 emit_or(rd, rd, rs, ctx); 496 if (!is64 && !aux->verifier_zext) 497 emit_zext_32(rd, ctx); 498 break; 499 case BPF_ALU | BPF_XOR | BPF_X: 500 case BPF_ALU64 | BPF_XOR | BPF_X: 501 emit_xor(rd, rd, rs, ctx); 502 if (!is64 && !aux->verifier_zext) 503 emit_zext_32(rd, ctx); 504 break; 505 case BPF_ALU | BPF_MUL | BPF_X: 506 case BPF_ALU64 | BPF_MUL | BPF_X: 507 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); 508 if (!is64 && !aux->verifier_zext) 509 emit_zext_32(rd, ctx); 510 break; 511 case BPF_ALU | BPF_DIV | BPF_X: 512 case BPF_ALU64 | BPF_DIV | BPF_X: 513 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 514 if (!is64 && !aux->verifier_zext) 515 emit_zext_32(rd, ctx); 516 break; 517 case BPF_ALU | BPF_MOD | BPF_X: 518 case BPF_ALU64 | BPF_MOD | BPF_X: 519 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 520 if (!is64 && !aux->verifier_zext) 521 emit_zext_32(rd, ctx); 522 break; 523 case BPF_ALU | BPF_LSH | BPF_X: 524 case BPF_ALU64 | BPF_LSH | BPF_X: 525 emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); 526 if (!is64 && !aux->verifier_zext) 527 emit_zext_32(rd, ctx); 528 break; 529 case BPF_ALU | BPF_RSH | BPF_X: 530 case BPF_ALU64 | BPF_RSH | BPF_X: 531 emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); 532 if (!is64 && !aux->verifier_zext) 533 emit_zext_32(rd, ctx); 534 break; 535 case BPF_ALU | BPF_ARSH | BPF_X: 536 case BPF_ALU64 | BPF_ARSH | BPF_X: 537 emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); 538 if (!is64 && !aux->verifier_zext) 539 emit_zext_32(rd, ctx); 540 break; 541 542 /* dst = -dst */ 543 case BPF_ALU | BPF_NEG: 544 case BPF_ALU64 | BPF_NEG: 545 emit_sub(rd, RV_REG_ZERO, rd, ctx); 546 if (!is64 && !aux->verifier_zext) 547 emit_zext_32(rd, ctx); 548 break; 549 550 /* dst = BSWAP##imm(dst) */ 551 case BPF_ALU | BPF_END | BPF_FROM_LE: 552 switch (imm) { 553 case 16: 554 emit_slli(rd, rd, 48, ctx); 555 emit_srli(rd, rd, 48, ctx); 556 break; 557 case 32: 558 if (!aux->verifier_zext) 559 emit_zext_32(rd, ctx); 560 break; 561 case 64: 562 /* Do nothing */ 563 break; 564 } 565 break; 566 567 case BPF_ALU | BPF_END | BPF_FROM_BE: 568 emit_li(RV_REG_T2, 0, ctx); 569 570 emit_andi(RV_REG_T1, rd, 0xff, ctx); 571 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 572 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 573 emit_srli(rd, rd, 8, ctx); 574 if (imm == 16) 575 goto out_be; 576 577 emit_andi(RV_REG_T1, rd, 0xff, ctx); 578 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 579 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 580 emit_srli(rd, rd, 8, ctx); 581 582 emit_andi(RV_REG_T1, rd, 0xff, ctx); 583 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 584 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 585 emit_srli(rd, rd, 8, ctx); 586 if (imm == 32) 587 goto out_be; 588 589 emit_andi(RV_REG_T1, rd, 0xff, ctx); 590 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 591 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 592 emit_srli(rd, rd, 8, ctx); 593 594 emit_andi(RV_REG_T1, rd, 0xff, ctx); 595 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 596 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 597 emit_srli(rd, rd, 8, ctx); 598 599 emit_andi(RV_REG_T1, rd, 0xff, ctx); 600 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 601 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 602 emit_srli(rd, rd, 8, ctx); 603 604 emit_andi(RV_REG_T1, rd, 0xff, ctx); 605 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 606 emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx); 607 emit_srli(rd, rd, 8, ctx); 608 out_be: 609 emit_andi(RV_REG_T1, rd, 0xff, ctx); 610 emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx); 611 612 emit_mv(rd, RV_REG_T2, ctx); 613 break; 614 615 /* dst = imm */ 616 case BPF_ALU | BPF_MOV | BPF_K: 617 case BPF_ALU64 | BPF_MOV | BPF_K: 618 emit_imm(rd, imm, ctx); 619 if (!is64 && !aux->verifier_zext) 620 emit_zext_32(rd, ctx); 621 break; 622 623 /* dst = dst OP imm */ 624 case BPF_ALU | BPF_ADD | BPF_K: 625 case BPF_ALU64 | BPF_ADD | BPF_K: 626 if (is_12b_int(imm)) { 627 emit_addi(rd, rd, imm, ctx); 628 } else { 629 emit_imm(RV_REG_T1, imm, ctx); 630 emit_add(rd, rd, RV_REG_T1, ctx); 631 } 632 if (!is64 && !aux->verifier_zext) 633 emit_zext_32(rd, ctx); 634 break; 635 case BPF_ALU | BPF_SUB | BPF_K: 636 case BPF_ALU64 | BPF_SUB | BPF_K: 637 if (is_12b_int(-imm)) { 638 emit_addi(rd, rd, -imm, ctx); 639 } else { 640 emit_imm(RV_REG_T1, imm, ctx); 641 emit_sub(rd, rd, RV_REG_T1, ctx); 642 } 643 if (!is64 && !aux->verifier_zext) 644 emit_zext_32(rd, ctx); 645 break; 646 case BPF_ALU | BPF_AND | BPF_K: 647 case BPF_ALU64 | BPF_AND | BPF_K: 648 if (is_12b_int(imm)) { 649 emit_andi(rd, rd, imm, ctx); 650 } else { 651 emit_imm(RV_REG_T1, imm, ctx); 652 emit_and(rd, rd, RV_REG_T1, ctx); 653 } 654 if (!is64 && !aux->verifier_zext) 655 emit_zext_32(rd, ctx); 656 break; 657 case BPF_ALU | BPF_OR | BPF_K: 658 case BPF_ALU64 | BPF_OR | BPF_K: 659 if (is_12b_int(imm)) { 660 emit(rv_ori(rd, rd, imm), ctx); 661 } else { 662 emit_imm(RV_REG_T1, imm, ctx); 663 emit_or(rd, rd, RV_REG_T1, ctx); 664 } 665 if (!is64 && !aux->verifier_zext) 666 emit_zext_32(rd, ctx); 667 break; 668 case BPF_ALU | BPF_XOR | BPF_K: 669 case BPF_ALU64 | BPF_XOR | BPF_K: 670 if (is_12b_int(imm)) { 671 emit(rv_xori(rd, rd, imm), ctx); 672 } else { 673 emit_imm(RV_REG_T1, imm, ctx); 674 emit_xor(rd, rd, RV_REG_T1, ctx); 675 } 676 if (!is64 && !aux->verifier_zext) 677 emit_zext_32(rd, ctx); 678 break; 679 case BPF_ALU | BPF_MUL | BPF_K: 680 case BPF_ALU64 | BPF_MUL | BPF_K: 681 emit_imm(RV_REG_T1, imm, ctx); 682 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : 683 rv_mulw(rd, rd, RV_REG_T1), ctx); 684 if (!is64 && !aux->verifier_zext) 685 emit_zext_32(rd, ctx); 686 break; 687 case BPF_ALU | BPF_DIV | BPF_K: 688 case BPF_ALU64 | BPF_DIV | BPF_K: 689 emit_imm(RV_REG_T1, imm, ctx); 690 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 691 rv_divuw(rd, rd, RV_REG_T1), ctx); 692 if (!is64 && !aux->verifier_zext) 693 emit_zext_32(rd, ctx); 694 break; 695 case BPF_ALU | BPF_MOD | BPF_K: 696 case BPF_ALU64 | BPF_MOD | BPF_K: 697 emit_imm(RV_REG_T1, imm, ctx); 698 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 699 rv_remuw(rd, rd, RV_REG_T1), ctx); 700 if (!is64 && !aux->verifier_zext) 701 emit_zext_32(rd, ctx); 702 break; 703 case BPF_ALU | BPF_LSH | BPF_K: 704 case BPF_ALU64 | BPF_LSH | BPF_K: 705 emit_slli(rd, rd, imm, ctx); 706 707 if (!is64 && !aux->verifier_zext) 708 emit_zext_32(rd, ctx); 709 break; 710 case BPF_ALU | BPF_RSH | BPF_K: 711 case BPF_ALU64 | BPF_RSH | BPF_K: 712 if (is64) 713 emit_srli(rd, rd, imm, ctx); 714 else 715 emit(rv_srliw(rd, rd, imm), ctx); 716 717 if (!is64 && !aux->verifier_zext) 718 emit_zext_32(rd, ctx); 719 break; 720 case BPF_ALU | BPF_ARSH | BPF_K: 721 case BPF_ALU64 | BPF_ARSH | BPF_K: 722 if (is64) 723 emit_srai(rd, rd, imm, ctx); 724 else 725 emit(rv_sraiw(rd, rd, imm), ctx); 726 727 if (!is64 && !aux->verifier_zext) 728 emit_zext_32(rd, ctx); 729 break; 730 731 /* JUMP off */ 732 case BPF_JMP | BPF_JA: 733 rvoff = rv_offset(i, off, ctx); 734 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 735 if (ret) 736 return ret; 737 break; 738 739 /* IF (dst COND src) JUMP off */ 740 case BPF_JMP | BPF_JEQ | BPF_X: 741 case BPF_JMP32 | BPF_JEQ | BPF_X: 742 case BPF_JMP | BPF_JGT | BPF_X: 743 case BPF_JMP32 | BPF_JGT | BPF_X: 744 case BPF_JMP | BPF_JLT | BPF_X: 745 case BPF_JMP32 | BPF_JLT | BPF_X: 746 case BPF_JMP | BPF_JGE | BPF_X: 747 case BPF_JMP32 | BPF_JGE | BPF_X: 748 case BPF_JMP | BPF_JLE | BPF_X: 749 case BPF_JMP32 | BPF_JLE | BPF_X: 750 case BPF_JMP | BPF_JNE | BPF_X: 751 case BPF_JMP32 | BPF_JNE | BPF_X: 752 case BPF_JMP | BPF_JSGT | BPF_X: 753 case BPF_JMP32 | BPF_JSGT | BPF_X: 754 case BPF_JMP | BPF_JSLT | BPF_X: 755 case BPF_JMP32 | BPF_JSLT | BPF_X: 756 case BPF_JMP | BPF_JSGE | BPF_X: 757 case BPF_JMP32 | BPF_JSGE | BPF_X: 758 case BPF_JMP | BPF_JSLE | BPF_X: 759 case BPF_JMP32 | BPF_JSLE | BPF_X: 760 case BPF_JMP | BPF_JSET | BPF_X: 761 case BPF_JMP32 | BPF_JSET | BPF_X: 762 rvoff = rv_offset(i, off, ctx); 763 if (!is64) { 764 s = ctx->ninsns; 765 if (is_signed_bpf_cond(BPF_OP(code))) 766 emit_sext_32_rd_rs(&rd, &rs, ctx); 767 else 768 emit_zext_32_rd_rs(&rd, &rs, ctx); 769 e = ctx->ninsns; 770 771 /* Adjust for extra insns */ 772 rvoff -= ninsns_rvoff(e - s); 773 } 774 775 if (BPF_OP(code) == BPF_JSET) { 776 /* Adjust for and */ 777 rvoff -= 4; 778 emit_and(RV_REG_T1, rd, rs, ctx); 779 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, 780 ctx); 781 } else { 782 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 783 } 784 break; 785 786 /* IF (dst COND imm) JUMP off */ 787 case BPF_JMP | BPF_JEQ | BPF_K: 788 case BPF_JMP32 | BPF_JEQ | BPF_K: 789 case BPF_JMP | BPF_JGT | BPF_K: 790 case BPF_JMP32 | BPF_JGT | BPF_K: 791 case BPF_JMP | BPF_JLT | BPF_K: 792 case BPF_JMP32 | BPF_JLT | BPF_K: 793 case BPF_JMP | BPF_JGE | BPF_K: 794 case BPF_JMP32 | BPF_JGE | BPF_K: 795 case BPF_JMP | BPF_JLE | BPF_K: 796 case BPF_JMP32 | BPF_JLE | BPF_K: 797 case BPF_JMP | BPF_JNE | BPF_K: 798 case BPF_JMP32 | BPF_JNE | BPF_K: 799 case BPF_JMP | BPF_JSGT | BPF_K: 800 case BPF_JMP32 | BPF_JSGT | BPF_K: 801 case BPF_JMP | BPF_JSLT | BPF_K: 802 case BPF_JMP32 | BPF_JSLT | BPF_K: 803 case BPF_JMP | BPF_JSGE | BPF_K: 804 case BPF_JMP32 | BPF_JSGE | BPF_K: 805 case BPF_JMP | BPF_JSLE | BPF_K: 806 case BPF_JMP32 | BPF_JSLE | BPF_K: 807 rvoff = rv_offset(i, off, ctx); 808 s = ctx->ninsns; 809 if (imm) { 810 emit_imm(RV_REG_T1, imm, ctx); 811 rs = RV_REG_T1; 812 } else { 813 /* If imm is 0, simply use zero register. */ 814 rs = RV_REG_ZERO; 815 } 816 if (!is64) { 817 if (is_signed_bpf_cond(BPF_OP(code))) 818 emit_sext_32_rd(&rd, ctx); 819 else 820 emit_zext_32_rd_t1(&rd, ctx); 821 } 822 e = ctx->ninsns; 823 824 /* Adjust for extra insns */ 825 rvoff -= ninsns_rvoff(e - s); 826 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 827 break; 828 829 case BPF_JMP | BPF_JSET | BPF_K: 830 case BPF_JMP32 | BPF_JSET | BPF_K: 831 rvoff = rv_offset(i, off, ctx); 832 s = ctx->ninsns; 833 if (is_12b_int(imm)) { 834 emit_andi(RV_REG_T1, rd, imm, ctx); 835 } else { 836 emit_imm(RV_REG_T1, imm, ctx); 837 emit_and(RV_REG_T1, rd, RV_REG_T1, ctx); 838 } 839 /* For jset32, we should clear the upper 32 bits of t1, but 840 * sign-extension is sufficient here and saves one instruction, 841 * as t1 is used only in comparison against zero. 842 */ 843 if (!is64 && imm < 0) 844 emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx); 845 e = ctx->ninsns; 846 rvoff -= ninsns_rvoff(e - s); 847 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); 848 break; 849 850 /* function call */ 851 case BPF_JMP | BPF_CALL: 852 { 853 bool fixed; 854 u64 addr; 855 856 mark_call(ctx); 857 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, 858 &fixed); 859 if (ret < 0) 860 return ret; 861 ret = emit_call(fixed, addr, ctx); 862 if (ret) 863 return ret; 864 break; 865 } 866 /* tail call */ 867 case BPF_JMP | BPF_TAIL_CALL: 868 if (emit_bpf_tail_call(i, ctx)) 869 return -1; 870 break; 871 872 /* function return */ 873 case BPF_JMP | BPF_EXIT: 874 if (i == ctx->prog->len - 1) 875 break; 876 877 rvoff = epilogue_offset(ctx); 878 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 879 if (ret) 880 return ret; 881 break; 882 883 /* dst = imm64 */ 884 case BPF_LD | BPF_IMM | BPF_DW: 885 { 886 struct bpf_insn insn1 = insn[1]; 887 u64 imm64; 888 889 imm64 = (u64)insn1.imm << 32 | (u32)imm; 890 emit_imm(rd, imm64, ctx); 891 return 1; 892 } 893 894 /* LDX: dst = *(size *)(src + off) */ 895 case BPF_LDX | BPF_MEM | BPF_B: 896 if (is_12b_int(off)) { 897 emit(rv_lbu(rd, off, rs), ctx); 898 break; 899 } 900 901 emit_imm(RV_REG_T1, off, ctx); 902 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 903 emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 904 if (insn_is_zext(&insn[1])) 905 return 1; 906 break; 907 case BPF_LDX | BPF_MEM | BPF_H: 908 if (is_12b_int(off)) { 909 emit(rv_lhu(rd, off, rs), ctx); 910 break; 911 } 912 913 emit_imm(RV_REG_T1, off, ctx); 914 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 915 emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 916 if (insn_is_zext(&insn[1])) 917 return 1; 918 break; 919 case BPF_LDX | BPF_MEM | BPF_W: 920 if (is_12b_int(off)) { 921 emit(rv_lwu(rd, off, rs), ctx); 922 break; 923 } 924 925 emit_imm(RV_REG_T1, off, ctx); 926 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 927 emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 928 if (insn_is_zext(&insn[1])) 929 return 1; 930 break; 931 case BPF_LDX | BPF_MEM | BPF_DW: 932 if (is_12b_int(off)) { 933 emit_ld(rd, off, rs, ctx); 934 break; 935 } 936 937 emit_imm(RV_REG_T1, off, ctx); 938 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 939 emit_ld(rd, 0, RV_REG_T1, ctx); 940 break; 941 942 /* ST: *(size *)(dst + off) = imm */ 943 case BPF_ST | BPF_MEM | BPF_B: 944 emit_imm(RV_REG_T1, imm, ctx); 945 if (is_12b_int(off)) { 946 emit(rv_sb(rd, off, RV_REG_T1), ctx); 947 break; 948 } 949 950 emit_imm(RV_REG_T2, off, ctx); 951 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 952 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); 953 break; 954 955 case BPF_ST | BPF_MEM | BPF_H: 956 emit_imm(RV_REG_T1, imm, ctx); 957 if (is_12b_int(off)) { 958 emit(rv_sh(rd, off, RV_REG_T1), ctx); 959 break; 960 } 961 962 emit_imm(RV_REG_T2, off, ctx); 963 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 964 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); 965 break; 966 case BPF_ST | BPF_MEM | BPF_W: 967 emit_imm(RV_REG_T1, imm, ctx); 968 if (is_12b_int(off)) { 969 emit_sw(rd, off, RV_REG_T1, ctx); 970 break; 971 } 972 973 emit_imm(RV_REG_T2, off, ctx); 974 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 975 emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); 976 break; 977 case BPF_ST | BPF_MEM | BPF_DW: 978 emit_imm(RV_REG_T1, imm, ctx); 979 if (is_12b_int(off)) { 980 emit_sd(rd, off, RV_REG_T1, ctx); 981 break; 982 } 983 984 emit_imm(RV_REG_T2, off, ctx); 985 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); 986 emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); 987 break; 988 989 /* STX: *(size *)(dst + off) = src */ 990 case BPF_STX | BPF_MEM | BPF_B: 991 if (is_12b_int(off)) { 992 emit(rv_sb(rd, off, rs), ctx); 993 break; 994 } 995 996 emit_imm(RV_REG_T1, off, ctx); 997 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 998 emit(rv_sb(RV_REG_T1, 0, rs), ctx); 999 break; 1000 case BPF_STX | BPF_MEM | BPF_H: 1001 if (is_12b_int(off)) { 1002 emit(rv_sh(rd, off, rs), ctx); 1003 break; 1004 } 1005 1006 emit_imm(RV_REG_T1, off, ctx); 1007 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1008 emit(rv_sh(RV_REG_T1, 0, rs), ctx); 1009 break; 1010 case BPF_STX | BPF_MEM | BPF_W: 1011 if (is_12b_int(off)) { 1012 emit_sw(rd, off, rs, ctx); 1013 break; 1014 } 1015 1016 emit_imm(RV_REG_T1, off, ctx); 1017 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1018 emit_sw(RV_REG_T1, 0, rs, ctx); 1019 break; 1020 case BPF_STX | BPF_MEM | BPF_DW: 1021 if (is_12b_int(off)) { 1022 emit_sd(rd, off, rs, ctx); 1023 break; 1024 } 1025 1026 emit_imm(RV_REG_T1, off, ctx); 1027 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1028 emit_sd(RV_REG_T1, 0, rs, ctx); 1029 break; 1030 /* STX XADD: lock *(u32 *)(dst + off) += src */ 1031 case BPF_STX | BPF_XADD | BPF_W: 1032 /* STX XADD: lock *(u64 *)(dst + off) += src */ 1033 case BPF_STX | BPF_XADD | BPF_DW: 1034 if (off) { 1035 if (is_12b_int(off)) { 1036 emit_addi(RV_REG_T1, rd, off, ctx); 1037 } else { 1038 emit_imm(RV_REG_T1, off, ctx); 1039 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1040 } 1041 1042 rd = RV_REG_T1; 1043 } 1044 1045 emit(BPF_SIZE(code) == BPF_W ? 1046 rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) : 1047 rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx); 1048 break; 1049 default: 1050 pr_err("bpf-jit: unknown opcode %02x\n", code); 1051 return -EINVAL; 1052 } 1053 1054 return 0; 1055 } 1056 1057 void bpf_jit_build_prologue(struct rv_jit_context *ctx) 1058 { 1059 int stack_adjust = 0, store_offset, bpf_stack_adjust; 1060 1061 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); 1062 if (bpf_stack_adjust) 1063 mark_fp(ctx); 1064 1065 if (seen_reg(RV_REG_RA, ctx)) 1066 stack_adjust += 8; 1067 stack_adjust += 8; /* RV_REG_FP */ 1068 if (seen_reg(RV_REG_S1, ctx)) 1069 stack_adjust += 8; 1070 if (seen_reg(RV_REG_S2, ctx)) 1071 stack_adjust += 8; 1072 if (seen_reg(RV_REG_S3, ctx)) 1073 stack_adjust += 8; 1074 if (seen_reg(RV_REG_S4, ctx)) 1075 stack_adjust += 8; 1076 if (seen_reg(RV_REG_S5, ctx)) 1077 stack_adjust += 8; 1078 if (seen_reg(RV_REG_S6, ctx)) 1079 stack_adjust += 8; 1080 1081 stack_adjust = round_up(stack_adjust, 16); 1082 stack_adjust += bpf_stack_adjust; 1083 1084 store_offset = stack_adjust - 8; 1085 1086 /* First instruction is always setting the tail-call-counter 1087 * (TCC) register. This instruction is skipped for tail calls. 1088 * Force using a 4-byte (non-compressed) instruction. 1089 */ 1090 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); 1091 1092 emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx); 1093 1094 if (seen_reg(RV_REG_RA, ctx)) { 1095 emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx); 1096 store_offset -= 8; 1097 } 1098 emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx); 1099 store_offset -= 8; 1100 if (seen_reg(RV_REG_S1, ctx)) { 1101 emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx); 1102 store_offset -= 8; 1103 } 1104 if (seen_reg(RV_REG_S2, ctx)) { 1105 emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx); 1106 store_offset -= 8; 1107 } 1108 if (seen_reg(RV_REG_S3, ctx)) { 1109 emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx); 1110 store_offset -= 8; 1111 } 1112 if (seen_reg(RV_REG_S4, ctx)) { 1113 emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx); 1114 store_offset -= 8; 1115 } 1116 if (seen_reg(RV_REG_S5, ctx)) { 1117 emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx); 1118 store_offset -= 8; 1119 } 1120 if (seen_reg(RV_REG_S6, ctx)) { 1121 emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); 1122 store_offset -= 8; 1123 } 1124 1125 emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); 1126 1127 if (bpf_stack_adjust) 1128 emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx); 1129 1130 /* Program contains calls and tail calls, so RV_REG_TCC need 1131 * to be saved across calls. 1132 */ 1133 if (seen_tail_call(ctx) && seen_call(ctx)) 1134 emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); 1135 1136 ctx->stack_size = stack_adjust; 1137 } 1138 1139 void bpf_jit_build_epilogue(struct rv_jit_context *ctx) 1140 { 1141 __build_epilogue(false, ctx); 1142 } 1143 1144 void *bpf_jit_alloc_exec(unsigned long size) 1145 { 1146 return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, 1147 BPF_JIT_REGION_END, GFP_KERNEL, 1148 PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 1149 __builtin_return_address(0)); 1150 } 1151 1152 void bpf_jit_free_exec(void *addr) 1153 { 1154 return vfree(addr); 1155 } 1156