1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/moduleloader.h> 3 #include <linux/workqueue.h> 4 #include <linux/netdevice.h> 5 #include <linux/filter.h> 6 #include <linux/bpf.h> 7 #include <linux/cache.h> 8 #include <linux/if_vlan.h> 9 10 #include <asm/cacheflush.h> 11 #include <asm/ptrace.h> 12 13 #include "bpf_jit_64.h" 14 15 int bpf_jit_enable __read_mostly; 16 17 static inline bool is_simm13(unsigned int value) 18 { 19 return value + 0x1000 < 0x2000; 20 } 21 22 static inline bool is_simm10(unsigned int value) 23 { 24 return value + 0x200 < 0x400; 25 } 26 27 static inline bool is_simm5(unsigned int value) 28 { 29 return value + 0x10 < 0x20; 30 } 31 32 static inline bool is_sethi(unsigned int value) 33 { 34 return (value & ~0x3fffff) == 0; 35 } 36 37 static void bpf_flush_icache(void *start_, void *end_) 38 { 39 /* Cheetah's I-cache is fully coherent. */ 40 if (tlb_type == spitfire) { 41 unsigned long start = (unsigned long) start_; 42 unsigned long end = (unsigned long) end_; 43 44 start &= ~7UL; 45 end = (end + 7UL) & ~7UL; 46 while (start < end) { 47 flushi(start); 48 start += 32; 49 } 50 } 51 } 52 53 #define SEEN_DATAREF 1 /* might call external helpers */ 54 #define SEEN_XREG 2 /* ebx is used */ 55 #define SEEN_MEM 4 /* use mem[] for temporary storage */ 56 57 #define S13(X) ((X) & 0x1fff) 58 #define S5(X) ((X) & 0x1f) 59 #define IMMED 0x00002000 60 #define RD(X) ((X) << 25) 61 #define RS1(X) ((X) << 14) 62 #define RS2(X) ((X)) 63 #define OP(X) ((X) << 30) 64 #define OP2(X) ((X) << 22) 65 #define OP3(X) ((X) << 19) 66 #define COND(X) (((X) & 0xf) << 25) 67 #define CBCOND(X) (((X) & 0x1f) << 25) 68 #define F1(X) OP(X) 69 #define F2(X, Y) (OP(X) | OP2(Y)) 70 #define F3(X, Y) (OP(X) | OP3(Y)) 71 #define ASI(X) (((X) & 0xff) << 5) 72 73 #define CONDN COND(0x0) 74 #define CONDE COND(0x1) 75 #define CONDLE COND(0x2) 76 #define CONDL COND(0x3) 77 #define CONDLEU COND(0x4) 78 #define CONDCS COND(0x5) 79 #define CONDNEG COND(0x6) 80 #define CONDVC COND(0x7) 81 #define CONDA COND(0x8) 82 #define CONDNE COND(0x9) 83 #define CONDG COND(0xa) 84 #define CONDGE COND(0xb) 85 #define CONDGU COND(0xc) 86 #define CONDCC COND(0xd) 87 #define CONDPOS COND(0xe) 88 #define CONDVS COND(0xf) 89 90 #define CONDGEU CONDCC 91 #define CONDLU CONDCS 92 93 #define WDISP22(X) (((X) >> 2) & 0x3fffff) 94 #define WDISP19(X) (((X) >> 2) & 0x7ffff) 95 96 /* The 10-bit branch displacement for CBCOND is split into two fields */ 97 static u32 WDISP10(u32 off) 98 { 99 u32 ret = ((off >> 2) & 0xff) << 5; 100 101 ret |= ((off >> (2 + 8)) & 0x03) << 19; 102 103 return ret; 104 } 105 106 #define CBCONDE CBCOND(0x09) 107 #define CBCONDLE CBCOND(0x0a) 108 #define CBCONDL CBCOND(0x0b) 109 #define CBCONDLEU CBCOND(0x0c) 110 #define CBCONDCS CBCOND(0x0d) 111 #define CBCONDN CBCOND(0x0e) 112 #define CBCONDVS CBCOND(0x0f) 113 #define CBCONDNE CBCOND(0x19) 114 #define CBCONDG CBCOND(0x1a) 115 #define CBCONDGE CBCOND(0x1b) 116 #define CBCONDGU CBCOND(0x1c) 117 #define CBCONDCC CBCOND(0x1d) 118 #define CBCONDPOS CBCOND(0x1e) 119 #define CBCONDVC CBCOND(0x1f) 120 121 #define CBCONDGEU CBCONDCC 122 #define CBCONDLU CBCONDCS 123 124 #define ANNUL (1 << 29) 125 #define XCC (1 << 21) 126 127 #define BRANCH (F2(0, 1) | XCC) 128 #define CBCOND_OP (F2(0, 3) | XCC) 129 130 #define BA (BRANCH | CONDA) 131 #define BG (BRANCH | CONDG) 132 #define BL (BRANCH | CONDL) 133 #define BLE (BRANCH | CONDLE) 134 #define BGU (BRANCH | CONDGU) 135 #define BLEU (BRANCH | CONDLEU) 136 #define BGE (BRANCH | CONDGE) 137 #define BGEU (BRANCH | CONDGEU) 138 #define BLU (BRANCH | CONDLU) 139 #define BE (BRANCH | CONDE) 140 #define BNE (BRANCH | CONDNE) 141 142 #define SETHI(K, REG) \ 143 (F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff)) 144 #define OR_LO(K, REG) \ 145 (F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG)) 146 147 #define ADD F3(2, 0x00) 148 #define AND F3(2, 0x01) 149 #define ANDCC F3(2, 0x11) 150 #define OR F3(2, 0x02) 151 #define XOR F3(2, 0x03) 152 #define SUB F3(2, 0x04) 153 #define SUBCC F3(2, 0x14) 154 #define MUL F3(2, 0x0a) 155 #define MULX F3(2, 0x09) 156 #define UDIVX F3(2, 0x0d) 157 #define DIV F3(2, 0x0e) 158 #define SLL F3(2, 0x25) 159 #define SLLX (F3(2, 0x25)|(1<<12)) 160 #define SRA F3(2, 0x27) 161 #define SRAX (F3(2, 0x27)|(1<<12)) 162 #define SRL F3(2, 0x26) 163 #define SRLX (F3(2, 0x26)|(1<<12)) 164 #define JMPL F3(2, 0x38) 165 #define SAVE F3(2, 0x3c) 166 #define RESTORE F3(2, 0x3d) 167 #define CALL F1(1) 168 #define BR F2(0, 0x01) 169 #define RD_Y F3(2, 0x28) 170 #define WR_Y F3(2, 0x30) 171 172 #define LD32 F3(3, 0x00) 173 #define LD8 F3(3, 0x01) 174 #define LD16 F3(3, 0x02) 175 #define LD64 F3(3, 0x0b) 176 #define LD64A F3(3, 0x1b) 177 #define ST8 F3(3, 0x05) 178 #define ST16 F3(3, 0x06) 179 #define ST32 F3(3, 0x04) 180 #define ST64 F3(3, 0x0e) 181 182 #define CAS F3(3, 0x3c) 183 #define CASX F3(3, 0x3e) 184 185 #define LDPTR LD64 186 #define BASE_STACKFRAME 176 187 188 #define LD32I (LD32 | IMMED) 189 #define LD8I (LD8 | IMMED) 190 #define LD16I (LD16 | IMMED) 191 #define LD64I (LD64 | IMMED) 192 #define LDPTRI (LDPTR | IMMED) 193 #define ST32I (ST32 | IMMED) 194 195 struct jit_ctx { 196 struct bpf_prog *prog; 197 unsigned int *offset; 198 int idx; 199 int epilogue_offset; 200 bool tmp_1_used; 201 bool tmp_2_used; 202 bool tmp_3_used; 203 bool saw_ld_abs_ind; 204 bool saw_frame_pointer; 205 bool saw_call; 206 bool saw_tail_call; 207 u32 *image; 208 }; 209 210 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 211 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 212 #define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2) 213 #define SKB_DATA_REG (MAX_BPF_JIT_REG + 3) 214 #define TMP_REG_3 (MAX_BPF_JIT_REG + 4) 215 216 /* Map BPF registers to SPARC registers */ 217 static const int bpf2sparc[] = { 218 /* return value from in-kernel function, and exit value from eBPF */ 219 [BPF_REG_0] = O5, 220 221 /* arguments from eBPF program to in-kernel function */ 222 [BPF_REG_1] = O0, 223 [BPF_REG_2] = O1, 224 [BPF_REG_3] = O2, 225 [BPF_REG_4] = O3, 226 [BPF_REG_5] = O4, 227 228 /* callee saved registers that in-kernel function will preserve */ 229 [BPF_REG_6] = L0, 230 [BPF_REG_7] = L1, 231 [BPF_REG_8] = L2, 232 [BPF_REG_9] = L3, 233 234 /* read-only frame pointer to access stack */ 235 [BPF_REG_FP] = L6, 236 237 [BPF_REG_AX] = G7, 238 239 /* temporary register for internal BPF JIT */ 240 [TMP_REG_1] = G1, 241 [TMP_REG_2] = G2, 242 [TMP_REG_3] = G3, 243 244 [SKB_HLEN_REG] = L4, 245 [SKB_DATA_REG] = L5, 246 }; 247 248 static void emit(const u32 insn, struct jit_ctx *ctx) 249 { 250 if (ctx->image != NULL) 251 ctx->image[ctx->idx] = insn; 252 253 ctx->idx++; 254 } 255 256 static void emit_call(u32 *func, struct jit_ctx *ctx) 257 { 258 if (ctx->image != NULL) { 259 void *here = &ctx->image[ctx->idx]; 260 unsigned int off; 261 262 off = (void *)func - here; 263 ctx->image[ctx->idx] = CALL | ((off >> 2) & 0x3fffffff); 264 } 265 ctx->idx++; 266 } 267 268 static void emit_nop(struct jit_ctx *ctx) 269 { 270 emit(SETHI(0, G0), ctx); 271 } 272 273 static void emit_reg_move(u32 from, u32 to, struct jit_ctx *ctx) 274 { 275 emit(OR | RS1(G0) | RS2(from) | RD(to), ctx); 276 } 277 278 /* Emit 32-bit constant, zero extended. */ 279 static void emit_set_const(s32 K, u32 reg, struct jit_ctx *ctx) 280 { 281 emit(SETHI(K, reg), ctx); 282 emit(OR_LO(K, reg), ctx); 283 } 284 285 /* Emit 32-bit constant, sign extended. */ 286 static void emit_set_const_sext(s32 K, u32 reg, struct jit_ctx *ctx) 287 { 288 if (K >= 0) { 289 emit(SETHI(K, reg), ctx); 290 emit(OR_LO(K, reg), ctx); 291 } else { 292 u32 hbits = ~(u32) K; 293 u32 lbits = -0x400 | (u32) K; 294 295 emit(SETHI(hbits, reg), ctx); 296 emit(XOR | IMMED | RS1(reg) | S13(lbits) | RD(reg), ctx); 297 } 298 } 299 300 static void emit_alu(u32 opcode, u32 src, u32 dst, struct jit_ctx *ctx) 301 { 302 emit(opcode | RS1(dst) | RS2(src) | RD(dst), ctx); 303 } 304 305 static void emit_alu3(u32 opcode, u32 a, u32 b, u32 c, struct jit_ctx *ctx) 306 { 307 emit(opcode | RS1(a) | RS2(b) | RD(c), ctx); 308 } 309 310 static void emit_alu_K(unsigned int opcode, unsigned int dst, unsigned int imm, 311 struct jit_ctx *ctx) 312 { 313 bool small_immed = is_simm13(imm); 314 unsigned int insn = opcode; 315 316 insn |= RS1(dst) | RD(dst); 317 if (small_immed) { 318 emit(insn | IMMED | S13(imm), ctx); 319 } else { 320 unsigned int tmp = bpf2sparc[TMP_REG_1]; 321 322 ctx->tmp_1_used = true; 323 324 emit_set_const_sext(imm, tmp, ctx); 325 emit(insn | RS2(tmp), ctx); 326 } 327 } 328 329 static void emit_alu3_K(unsigned int opcode, unsigned int src, unsigned int imm, 330 unsigned int dst, struct jit_ctx *ctx) 331 { 332 bool small_immed = is_simm13(imm); 333 unsigned int insn = opcode; 334 335 insn |= RS1(src) | RD(dst); 336 if (small_immed) { 337 emit(insn | IMMED | S13(imm), ctx); 338 } else { 339 unsigned int tmp = bpf2sparc[TMP_REG_1]; 340 341 ctx->tmp_1_used = true; 342 343 emit_set_const_sext(imm, tmp, ctx); 344 emit(insn | RS2(tmp), ctx); 345 } 346 } 347 348 static void emit_loadimm32(s32 K, unsigned int dest, struct jit_ctx *ctx) 349 { 350 if (K >= 0 && is_simm13(K)) { 351 /* or %g0, K, DEST */ 352 emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx); 353 } else { 354 emit_set_const(K, dest, ctx); 355 } 356 } 357 358 static void emit_loadimm(s32 K, unsigned int dest, struct jit_ctx *ctx) 359 { 360 if (is_simm13(K)) { 361 /* or %g0, K, DEST */ 362 emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx); 363 } else { 364 emit_set_const(K, dest, ctx); 365 } 366 } 367 368 static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx) 369 { 370 if (is_simm13(K)) { 371 /* or %g0, K, DEST */ 372 emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx); 373 } else { 374 emit_set_const_sext(K, dest, ctx); 375 } 376 } 377 378 static void analyze_64bit_constant(u32 high_bits, u32 low_bits, 379 int *hbsp, int *lbsp, int *abbasp) 380 { 381 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; 382 int i; 383 384 lowest_bit_set = highest_bit_set = -1; 385 i = 0; 386 do { 387 if ((lowest_bit_set == -1) && ((low_bits >> i) & 1)) 388 lowest_bit_set = i; 389 if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1)) 390 highest_bit_set = (64 - i - 1); 391 } while (++i < 32 && (highest_bit_set == -1 || 392 lowest_bit_set == -1)); 393 if (i == 32) { 394 i = 0; 395 do { 396 if (lowest_bit_set == -1 && ((high_bits >> i) & 1)) 397 lowest_bit_set = i + 32; 398 if (highest_bit_set == -1 && 399 ((low_bits >> (32 - i - 1)) & 1)) 400 highest_bit_set = 32 - i - 1; 401 } while (++i < 32 && (highest_bit_set == -1 || 402 lowest_bit_set == -1)); 403 } 404 405 all_bits_between_are_set = 1; 406 for (i = lowest_bit_set; i <= highest_bit_set; i++) { 407 if (i < 32) { 408 if ((low_bits & (1 << i)) != 0) 409 continue; 410 } else { 411 if ((high_bits & (1 << (i - 32))) != 0) 412 continue; 413 } 414 all_bits_between_are_set = 0; 415 break; 416 } 417 *hbsp = highest_bit_set; 418 *lbsp = lowest_bit_set; 419 *abbasp = all_bits_between_are_set; 420 } 421 422 static unsigned long create_simple_focus_bits(unsigned long high_bits, 423 unsigned long low_bits, 424 int lowest_bit_set, int shift) 425 { 426 long hi, lo; 427 428 if (lowest_bit_set < 32) { 429 lo = (low_bits >> lowest_bit_set) << shift; 430 hi = ((high_bits << (32 - lowest_bit_set)) << shift); 431 } else { 432 lo = 0; 433 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); 434 } 435 return hi | lo; 436 } 437 438 static bool const64_is_2insns(unsigned long high_bits, 439 unsigned long low_bits) 440 { 441 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; 442 443 if (high_bits == 0 || high_bits == 0xffffffff) 444 return true; 445 446 analyze_64bit_constant(high_bits, low_bits, 447 &highest_bit_set, &lowest_bit_set, 448 &all_bits_between_are_set); 449 450 if ((highest_bit_set == 63 || lowest_bit_set == 0) && 451 all_bits_between_are_set != 0) 452 return true; 453 454 if (highest_bit_set - lowest_bit_set < 21) 455 return true; 456 457 return false; 458 } 459 460 static void sparc_emit_set_const64_quick2(unsigned long high_bits, 461 unsigned long low_imm, 462 unsigned int dest, 463 int shift_count, struct jit_ctx *ctx) 464 { 465 emit_loadimm32(high_bits, dest, ctx); 466 467 /* Now shift it up into place. */ 468 emit_alu_K(SLLX, dest, shift_count, ctx); 469 470 /* If there is a low immediate part piece, finish up by 471 * putting that in as well. 472 */ 473 if (low_imm != 0) 474 emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx); 475 } 476 477 static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx) 478 { 479 int all_bits_between_are_set, lowest_bit_set, highest_bit_set; 480 unsigned int tmp = bpf2sparc[TMP_REG_1]; 481 u32 low_bits = (K & 0xffffffff); 482 u32 high_bits = (K >> 32); 483 484 /* These two tests also take care of all of the one 485 * instruction cases. 486 */ 487 if (high_bits == 0xffffffff && (low_bits & 0x80000000)) 488 return emit_loadimm_sext(K, dest, ctx); 489 if (high_bits == 0x00000000) 490 return emit_loadimm32(K, dest, ctx); 491 492 analyze_64bit_constant(high_bits, low_bits, &highest_bit_set, 493 &lowest_bit_set, &all_bits_between_are_set); 494 495 /* 1) mov -1, %reg 496 * sllx %reg, shift, %reg 497 * 2) mov -1, %reg 498 * srlx %reg, shift, %reg 499 * 3) mov some_small_const, %reg 500 * sllx %reg, shift, %reg 501 */ 502 if (((highest_bit_set == 63 || lowest_bit_set == 0) && 503 all_bits_between_are_set != 0) || 504 ((highest_bit_set - lowest_bit_set) < 12)) { 505 int shift = lowest_bit_set; 506 long the_const = -1; 507 508 if ((highest_bit_set != 63 && lowest_bit_set != 0) || 509 all_bits_between_are_set == 0) { 510 the_const = 511 create_simple_focus_bits(high_bits, low_bits, 512 lowest_bit_set, 0); 513 } else if (lowest_bit_set == 0) 514 shift = -(63 - highest_bit_set); 515 516 emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx); 517 if (shift > 0) 518 emit_alu_K(SLLX, dest, shift, ctx); 519 else if (shift < 0) 520 emit_alu_K(SRLX, dest, -shift, ctx); 521 522 return; 523 } 524 525 /* Now a range of 22 or less bits set somewhere. 526 * 1) sethi %hi(focus_bits), %reg 527 * sllx %reg, shift, %reg 528 * 2) sethi %hi(focus_bits), %reg 529 * srlx %reg, shift, %reg 530 */ 531 if ((highest_bit_set - lowest_bit_set) < 21) { 532 unsigned long focus_bits = 533 create_simple_focus_bits(high_bits, low_bits, 534 lowest_bit_set, 10); 535 536 emit(SETHI(focus_bits, dest), ctx); 537 538 /* If lowest_bit_set == 10 then a sethi alone could 539 * have done it. 540 */ 541 if (lowest_bit_set < 10) 542 emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx); 543 else if (lowest_bit_set > 10) 544 emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx); 545 return; 546 } 547 548 /* Ok, now 3 instruction sequences. */ 549 if (low_bits == 0) { 550 emit_loadimm32(high_bits, dest, ctx); 551 emit_alu_K(SLLX, dest, 32, ctx); 552 return; 553 } 554 555 /* We may be able to do something quick 556 * when the constant is negated, so try that. 557 */ 558 if (const64_is_2insns((~high_bits) & 0xffffffff, 559 (~low_bits) & 0xfffffc00)) { 560 /* NOTE: The trailing bits get XOR'd so we need the 561 * non-negated bits, not the negated ones. 562 */ 563 unsigned long trailing_bits = low_bits & 0x3ff; 564 565 if ((((~high_bits) & 0xffffffff) == 0 && 566 ((~low_bits) & 0x80000000) == 0) || 567 (((~high_bits) & 0xffffffff) == 0xffffffff && 568 ((~low_bits) & 0x80000000) != 0)) { 569 unsigned long fast_int = (~low_bits & 0xffffffff); 570 571 if ((is_sethi(fast_int) && 572 (~high_bits & 0xffffffff) == 0)) { 573 emit(SETHI(fast_int, dest), ctx); 574 } else if (is_simm13(fast_int)) { 575 emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx); 576 } else { 577 emit_loadimm64(fast_int, dest, ctx); 578 } 579 } else { 580 u64 n = ((~low_bits) & 0xfffffc00) | 581 (((unsigned long)((~high_bits) & 0xffffffff))<<32); 582 emit_loadimm64(n, dest, ctx); 583 } 584 585 low_bits = -0x400 | trailing_bits; 586 587 emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx); 588 return; 589 } 590 591 /* 1) sethi %hi(xxx), %reg 592 * or %reg, %lo(xxx), %reg 593 * sllx %reg, yyy, %reg 594 */ 595 if ((highest_bit_set - lowest_bit_set) < 32) { 596 unsigned long focus_bits = 597 create_simple_focus_bits(high_bits, low_bits, 598 lowest_bit_set, 0); 599 600 /* So what we know is that the set bits straddle the 601 * middle of the 64-bit word. 602 */ 603 sparc_emit_set_const64_quick2(focus_bits, 0, dest, 604 lowest_bit_set, ctx); 605 return; 606 } 607 608 /* 1) sethi %hi(high_bits), %reg 609 * or %reg, %lo(high_bits), %reg 610 * sllx %reg, 32, %reg 611 * or %reg, low_bits, %reg 612 */ 613 if (is_simm13(low_bits) && ((int)low_bits > 0)) { 614 sparc_emit_set_const64_quick2(high_bits, low_bits, 615 dest, 32, ctx); 616 return; 617 } 618 619 /* Oh well, we tried... Do a full 64-bit decomposition. */ 620 ctx->tmp_1_used = true; 621 622 emit_loadimm32(high_bits, tmp, ctx); 623 emit_loadimm32(low_bits, dest, ctx); 624 emit_alu_K(SLLX, tmp, 32, ctx); 625 emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx); 626 } 627 628 static void emit_branch(unsigned int br_opc, unsigned int from_idx, unsigned int to_idx, 629 struct jit_ctx *ctx) 630 { 631 unsigned int off = to_idx - from_idx; 632 633 if (br_opc & XCC) 634 emit(br_opc | WDISP19(off << 2), ctx); 635 else 636 emit(br_opc | WDISP22(off << 2), ctx); 637 } 638 639 static void emit_cbcond(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx, 640 const u8 dst, const u8 src, struct jit_ctx *ctx) 641 { 642 unsigned int off = to_idx - from_idx; 643 644 emit(cb_opc | WDISP10(off << 2) | RS1(dst) | RS2(src), ctx); 645 } 646 647 static void emit_cbcondi(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx, 648 const u8 dst, s32 imm, struct jit_ctx *ctx) 649 { 650 unsigned int off = to_idx - from_idx; 651 652 emit(cb_opc | IMMED | WDISP10(off << 2) | RS1(dst) | S5(imm), ctx); 653 } 654 655 #define emit_read_y(REG, CTX) emit(RD_Y | RD(REG), CTX) 656 #define emit_write_y(REG, CTX) emit(WR_Y | IMMED | RS1(REG) | S13(0), CTX) 657 658 #define emit_cmp(R1, R2, CTX) \ 659 emit(SUBCC | RS1(R1) | RS2(R2) | RD(G0), CTX) 660 661 #define emit_cmpi(R1, IMM, CTX) \ 662 emit(SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX) 663 664 #define emit_btst(R1, R2, CTX) \ 665 emit(ANDCC | RS1(R1) | RS2(R2) | RD(G0), CTX) 666 667 #define emit_btsti(R1, IMM, CTX) \ 668 emit(ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX) 669 670 static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, 671 const s32 imm, bool is_imm, int branch_dst, 672 struct jit_ctx *ctx) 673 { 674 bool use_cbcond = (sparc64_elf_hwcap & AV_SPARC_CBCOND) != 0; 675 const u8 tmp = bpf2sparc[TMP_REG_1]; 676 677 branch_dst = ctx->offset[branch_dst]; 678 679 if (!is_simm10(branch_dst - ctx->idx) || 680 BPF_OP(code) == BPF_JSET) 681 use_cbcond = false; 682 683 if (is_imm) { 684 bool fits = true; 685 686 if (use_cbcond) { 687 if (!is_simm5(imm)) 688 fits = false; 689 } else if (!is_simm13(imm)) { 690 fits = false; 691 } 692 if (!fits) { 693 ctx->tmp_1_used = true; 694 emit_loadimm_sext(imm, tmp, ctx); 695 src = tmp; 696 is_imm = false; 697 } 698 } 699 700 if (!use_cbcond) { 701 u32 br_opcode; 702 703 if (BPF_OP(code) == BPF_JSET) { 704 if (is_imm) 705 emit_btsti(dst, imm, ctx); 706 else 707 emit_btst(dst, src, ctx); 708 } else { 709 if (is_imm) 710 emit_cmpi(dst, imm, ctx); 711 else 712 emit_cmp(dst, src, ctx); 713 } 714 switch (BPF_OP(code)) { 715 case BPF_JEQ: 716 br_opcode = BE; 717 break; 718 case BPF_JGT: 719 br_opcode = BGU; 720 break; 721 case BPF_JLT: 722 br_opcode = BLU; 723 break; 724 case BPF_JGE: 725 br_opcode = BGEU; 726 break; 727 case BPF_JLE: 728 br_opcode = BLEU; 729 break; 730 case BPF_JSET: 731 case BPF_JNE: 732 br_opcode = BNE; 733 break; 734 case BPF_JSGT: 735 br_opcode = BG; 736 break; 737 case BPF_JSLT: 738 br_opcode = BL; 739 break; 740 case BPF_JSGE: 741 br_opcode = BGE; 742 break; 743 case BPF_JSLE: 744 br_opcode = BLE; 745 break; 746 default: 747 /* Make sure we dont leak kernel information to the 748 * user. 749 */ 750 return -EFAULT; 751 } 752 emit_branch(br_opcode, ctx->idx, branch_dst, ctx); 753 emit_nop(ctx); 754 } else { 755 u32 cbcond_opcode; 756 757 switch (BPF_OP(code)) { 758 case BPF_JEQ: 759 cbcond_opcode = CBCONDE; 760 break; 761 case BPF_JGT: 762 cbcond_opcode = CBCONDGU; 763 break; 764 case BPF_JLT: 765 cbcond_opcode = CBCONDLU; 766 break; 767 case BPF_JGE: 768 cbcond_opcode = CBCONDGEU; 769 break; 770 case BPF_JLE: 771 cbcond_opcode = CBCONDLEU; 772 break; 773 case BPF_JNE: 774 cbcond_opcode = CBCONDNE; 775 break; 776 case BPF_JSGT: 777 cbcond_opcode = CBCONDG; 778 break; 779 case BPF_JSLT: 780 cbcond_opcode = CBCONDL; 781 break; 782 case BPF_JSGE: 783 cbcond_opcode = CBCONDGE; 784 break; 785 case BPF_JSLE: 786 cbcond_opcode = CBCONDLE; 787 break; 788 default: 789 /* Make sure we dont leak kernel information to the 790 * user. 791 */ 792 return -EFAULT; 793 } 794 cbcond_opcode |= CBCOND_OP; 795 if (is_imm) 796 emit_cbcondi(cbcond_opcode, ctx->idx, branch_dst, 797 dst, imm, ctx); 798 else 799 emit_cbcond(cbcond_opcode, ctx->idx, branch_dst, 800 dst, src, ctx); 801 } 802 return 0; 803 } 804 805 static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb) 806 { 807 const u8 r_headlen = bpf2sparc[SKB_HLEN_REG]; 808 const u8 r_data = bpf2sparc[SKB_DATA_REG]; 809 const u8 r_tmp = bpf2sparc[TMP_REG_1]; 810 unsigned int off; 811 812 off = offsetof(struct sk_buff, len); 813 emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx); 814 815 off = offsetof(struct sk_buff, data_len); 816 emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx); 817 818 emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx); 819 820 off = offsetof(struct sk_buff, data); 821 emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx); 822 } 823 824 /* Just skip the save instruction and the ctx register move. */ 825 #define BPF_TAILCALL_PROLOGUE_SKIP 16 826 #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) 827 828 static void build_prologue(struct jit_ctx *ctx) 829 { 830 s32 stack_needed = BASE_STACKFRAME; 831 832 if (ctx->saw_frame_pointer || ctx->saw_tail_call) { 833 struct bpf_prog *prog = ctx->prog; 834 u32 stack_depth; 835 836 stack_depth = prog->aux->stack_depth; 837 stack_needed += round_up(stack_depth, 16); 838 } 839 840 if (ctx->saw_tail_call) 841 stack_needed += 8; 842 843 /* save %sp, -176, %sp */ 844 emit(SAVE | IMMED | RS1(SP) | S13(-stack_needed) | RD(SP), ctx); 845 846 /* tail_call_cnt = 0 */ 847 if (ctx->saw_tail_call) { 848 u32 off = BPF_TAILCALL_CNT_SP_OFF; 849 850 emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(G0), ctx); 851 } else { 852 emit_nop(ctx); 853 } 854 if (ctx->saw_frame_pointer) { 855 const u8 vfp = bpf2sparc[BPF_REG_FP]; 856 857 emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx); 858 } 859 860 emit_reg_move(I0, O0, ctx); 861 /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ 862 863 if (ctx->saw_ld_abs_ind) 864 load_skb_regs(ctx, bpf2sparc[BPF_REG_1]); 865 } 866 867 static void build_epilogue(struct jit_ctx *ctx) 868 { 869 ctx->epilogue_offset = ctx->idx; 870 871 /* ret (jmpl %i7 + 8, %g0) */ 872 emit(JMPL | IMMED | RS1(I7) | S13(8) | RD(G0), ctx); 873 874 /* restore %i5, %g0, %o0 */ 875 emit(RESTORE | RS1(bpf2sparc[BPF_REG_0]) | RS2(G0) | RD(O0), ctx); 876 } 877 878 static void emit_tail_call(struct jit_ctx *ctx) 879 { 880 const u8 bpf_array = bpf2sparc[BPF_REG_2]; 881 const u8 bpf_index = bpf2sparc[BPF_REG_3]; 882 const u8 tmp = bpf2sparc[TMP_REG_1]; 883 u32 off; 884 885 ctx->saw_tail_call = true; 886 887 off = offsetof(struct bpf_array, map.max_entries); 888 emit(LD32 | IMMED | RS1(bpf_array) | S13(off) | RD(tmp), ctx); 889 emit_cmp(bpf_index, tmp, ctx); 890 #define OFFSET1 17 891 emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET1, ctx); 892 emit_nop(ctx); 893 894 off = BPF_TAILCALL_CNT_SP_OFF; 895 emit(LD32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx); 896 emit_cmpi(tmp, MAX_TAIL_CALL_CNT, ctx); 897 #define OFFSET2 13 898 emit_branch(BGU, ctx->idx, ctx->idx + OFFSET2, ctx); 899 emit_nop(ctx); 900 901 emit_alu_K(ADD, tmp, 1, ctx); 902 off = BPF_TAILCALL_CNT_SP_OFF; 903 emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx); 904 905 emit_alu3_K(SLL, bpf_index, 3, tmp, ctx); 906 emit_alu(ADD, bpf_array, tmp, ctx); 907 off = offsetof(struct bpf_array, ptrs); 908 emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx); 909 910 emit_cmpi(tmp, 0, ctx); 911 #define OFFSET3 5 912 emit_branch(BE, ctx->idx, ctx->idx + OFFSET3, ctx); 913 emit_nop(ctx); 914 915 off = offsetof(struct bpf_prog, bpf_func); 916 emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx); 917 918 off = BPF_TAILCALL_PROLOGUE_SKIP; 919 emit(JMPL | IMMED | RS1(tmp) | S13(off) | RD(G0), ctx); 920 emit_nop(ctx); 921 } 922 923 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) 924 { 925 const u8 code = insn->code; 926 const u8 dst = bpf2sparc[insn->dst_reg]; 927 const u8 src = bpf2sparc[insn->src_reg]; 928 const int i = insn - ctx->prog->insnsi; 929 const s16 off = insn->off; 930 const s32 imm = insn->imm; 931 u32 *func; 932 933 if (insn->src_reg == BPF_REG_FP) 934 ctx->saw_frame_pointer = true; 935 936 switch (code) { 937 /* dst = src */ 938 case BPF_ALU | BPF_MOV | BPF_X: 939 emit_alu3_K(SRL, src, 0, dst, ctx); 940 break; 941 case BPF_ALU64 | BPF_MOV | BPF_X: 942 emit_reg_move(src, dst, ctx); 943 break; 944 /* dst = dst OP src */ 945 case BPF_ALU | BPF_ADD | BPF_X: 946 case BPF_ALU64 | BPF_ADD | BPF_X: 947 emit_alu(ADD, src, dst, ctx); 948 goto do_alu32_trunc; 949 case BPF_ALU | BPF_SUB | BPF_X: 950 case BPF_ALU64 | BPF_SUB | BPF_X: 951 emit_alu(SUB, src, dst, ctx); 952 goto do_alu32_trunc; 953 case BPF_ALU | BPF_AND | BPF_X: 954 case BPF_ALU64 | BPF_AND | BPF_X: 955 emit_alu(AND, src, dst, ctx); 956 goto do_alu32_trunc; 957 case BPF_ALU | BPF_OR | BPF_X: 958 case BPF_ALU64 | BPF_OR | BPF_X: 959 emit_alu(OR, src, dst, ctx); 960 goto do_alu32_trunc; 961 case BPF_ALU | BPF_XOR | BPF_X: 962 case BPF_ALU64 | BPF_XOR | BPF_X: 963 emit_alu(XOR, src, dst, ctx); 964 goto do_alu32_trunc; 965 case BPF_ALU | BPF_MUL | BPF_X: 966 emit_alu(MUL, src, dst, ctx); 967 goto do_alu32_trunc; 968 case BPF_ALU64 | BPF_MUL | BPF_X: 969 emit_alu(MULX, src, dst, ctx); 970 break; 971 case BPF_ALU | BPF_DIV | BPF_X: 972 emit_cmp(src, G0, ctx); 973 emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx); 974 emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx); 975 976 emit_write_y(G0, ctx); 977 emit_alu(DIV, src, dst, ctx); 978 break; 979 980 case BPF_ALU64 | BPF_DIV | BPF_X: 981 emit_cmp(src, G0, ctx); 982 emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx); 983 emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx); 984 985 emit_alu(UDIVX, src, dst, ctx); 986 break; 987 988 case BPF_ALU | BPF_MOD | BPF_X: { 989 const u8 tmp = bpf2sparc[TMP_REG_1]; 990 991 ctx->tmp_1_used = true; 992 993 emit_cmp(src, G0, ctx); 994 emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx); 995 emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx); 996 997 emit_write_y(G0, ctx); 998 emit_alu3(DIV, dst, src, tmp, ctx); 999 emit_alu3(MULX, tmp, src, tmp, ctx); 1000 emit_alu3(SUB, dst, tmp, dst, ctx); 1001 goto do_alu32_trunc; 1002 } 1003 case BPF_ALU64 | BPF_MOD | BPF_X: { 1004 const u8 tmp = bpf2sparc[TMP_REG_1]; 1005 1006 ctx->tmp_1_used = true; 1007 1008 emit_cmp(src, G0, ctx); 1009 emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx); 1010 emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx); 1011 1012 emit_alu3(UDIVX, dst, src, tmp, ctx); 1013 emit_alu3(MULX, tmp, src, tmp, ctx); 1014 emit_alu3(SUB, dst, tmp, dst, ctx); 1015 break; 1016 } 1017 case BPF_ALU | BPF_LSH | BPF_X: 1018 emit_alu(SLL, src, dst, ctx); 1019 goto do_alu32_trunc; 1020 case BPF_ALU64 | BPF_LSH | BPF_X: 1021 emit_alu(SLLX, src, dst, ctx); 1022 break; 1023 case BPF_ALU | BPF_RSH | BPF_X: 1024 emit_alu(SRL, src, dst, ctx); 1025 break; 1026 case BPF_ALU64 | BPF_RSH | BPF_X: 1027 emit_alu(SRLX, src, dst, ctx); 1028 break; 1029 case BPF_ALU | BPF_ARSH | BPF_X: 1030 emit_alu(SRA, src, dst, ctx); 1031 goto do_alu32_trunc; 1032 case BPF_ALU64 | BPF_ARSH | BPF_X: 1033 emit_alu(SRAX, src, dst, ctx); 1034 break; 1035 1036 /* dst = -dst */ 1037 case BPF_ALU | BPF_NEG: 1038 case BPF_ALU64 | BPF_NEG: 1039 emit(SUB | RS1(0) | RS2(dst) | RD(dst), ctx); 1040 goto do_alu32_trunc; 1041 1042 case BPF_ALU | BPF_END | BPF_FROM_BE: 1043 switch (imm) { 1044 case 16: 1045 emit_alu_K(SLL, dst, 16, ctx); 1046 emit_alu_K(SRL, dst, 16, ctx); 1047 break; 1048 case 32: 1049 emit_alu_K(SRL, dst, 0, ctx); 1050 break; 1051 case 64: 1052 /* nop */ 1053 break; 1054 1055 } 1056 break; 1057 1058 /* dst = BSWAP##imm(dst) */ 1059 case BPF_ALU | BPF_END | BPF_FROM_LE: { 1060 const u8 tmp = bpf2sparc[TMP_REG_1]; 1061 const u8 tmp2 = bpf2sparc[TMP_REG_2]; 1062 1063 ctx->tmp_1_used = true; 1064 switch (imm) { 1065 case 16: 1066 emit_alu3_K(AND, dst, 0xff, tmp, ctx); 1067 emit_alu3_K(SRL, dst, 8, dst, ctx); 1068 emit_alu3_K(AND, dst, 0xff, dst, ctx); 1069 emit_alu3_K(SLL, tmp, 8, tmp, ctx); 1070 emit_alu(OR, tmp, dst, ctx); 1071 break; 1072 1073 case 32: 1074 ctx->tmp_2_used = true; 1075 emit_alu3_K(SRL, dst, 24, tmp, ctx); /* tmp = dst >> 24 */ 1076 emit_alu3_K(SRL, dst, 16, tmp2, ctx); /* tmp2 = dst >> 16 */ 1077 emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */ 1078 emit_alu3_K(SLL, tmp2, 8, tmp2, ctx); /* tmp2 = tmp2 << 8 */ 1079 emit_alu(OR, tmp2, tmp, ctx); /* tmp = tmp | tmp2 */ 1080 emit_alu3_K(SRL, dst, 8, tmp2, ctx); /* tmp2 = dst >> 8 */ 1081 emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */ 1082 emit_alu3_K(SLL, tmp2, 16, tmp2, ctx); /* tmp2 = tmp2 << 16 */ 1083 emit_alu(OR, tmp2, tmp, ctx); /* tmp = tmp | tmp2 */ 1084 emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */ 1085 emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */ 1086 emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */ 1087 break; 1088 1089 case 64: 1090 emit_alu3_K(ADD, SP, STACK_BIAS + 128, tmp, ctx); 1091 emit(ST64 | RS1(tmp) | RS2(G0) | RD(dst), ctx); 1092 emit(LD64A | ASI(ASI_PL) | RS1(tmp) | RS2(G0) | RD(dst), ctx); 1093 break; 1094 } 1095 break; 1096 } 1097 /* dst = imm */ 1098 case BPF_ALU | BPF_MOV | BPF_K: 1099 emit_loadimm32(imm, dst, ctx); 1100 break; 1101 case BPF_ALU64 | BPF_MOV | BPF_K: 1102 emit_loadimm_sext(imm, dst, ctx); 1103 break; 1104 /* dst = dst OP imm */ 1105 case BPF_ALU | BPF_ADD | BPF_K: 1106 case BPF_ALU64 | BPF_ADD | BPF_K: 1107 emit_alu_K(ADD, dst, imm, ctx); 1108 goto do_alu32_trunc; 1109 case BPF_ALU | BPF_SUB | BPF_K: 1110 case BPF_ALU64 | BPF_SUB | BPF_K: 1111 emit_alu_K(SUB, dst, imm, ctx); 1112 goto do_alu32_trunc; 1113 case BPF_ALU | BPF_AND | BPF_K: 1114 case BPF_ALU64 | BPF_AND | BPF_K: 1115 emit_alu_K(AND, dst, imm, ctx); 1116 goto do_alu32_trunc; 1117 case BPF_ALU | BPF_OR | BPF_K: 1118 case BPF_ALU64 | BPF_OR | BPF_K: 1119 emit_alu_K(OR, dst, imm, ctx); 1120 goto do_alu32_trunc; 1121 case BPF_ALU | BPF_XOR | BPF_K: 1122 case BPF_ALU64 | BPF_XOR | BPF_K: 1123 emit_alu_K(XOR, dst, imm, ctx); 1124 goto do_alu32_trunc; 1125 case BPF_ALU | BPF_MUL | BPF_K: 1126 emit_alu_K(MUL, dst, imm, ctx); 1127 goto do_alu32_trunc; 1128 case BPF_ALU64 | BPF_MUL | BPF_K: 1129 emit_alu_K(MULX, dst, imm, ctx); 1130 break; 1131 case BPF_ALU | BPF_DIV | BPF_K: 1132 if (imm == 0) 1133 return -EINVAL; 1134 1135 emit_write_y(G0, ctx); 1136 emit_alu_K(DIV, dst, imm, ctx); 1137 goto do_alu32_trunc; 1138 case BPF_ALU64 | BPF_DIV | BPF_K: 1139 if (imm == 0) 1140 return -EINVAL; 1141 1142 emit_alu_K(UDIVX, dst, imm, ctx); 1143 break; 1144 case BPF_ALU64 | BPF_MOD | BPF_K: 1145 case BPF_ALU | BPF_MOD | BPF_K: { 1146 const u8 tmp = bpf2sparc[TMP_REG_2]; 1147 unsigned int div; 1148 1149 if (imm == 0) 1150 return -EINVAL; 1151 1152 div = (BPF_CLASS(code) == BPF_ALU64) ? UDIVX : DIV; 1153 1154 ctx->tmp_2_used = true; 1155 1156 if (BPF_CLASS(code) != BPF_ALU64) 1157 emit_write_y(G0, ctx); 1158 if (is_simm13(imm)) { 1159 emit(div | IMMED | RS1(dst) | S13(imm) | RD(tmp), ctx); 1160 emit(MULX | IMMED | RS1(tmp) | S13(imm) | RD(tmp), ctx); 1161 emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx); 1162 } else { 1163 const u8 tmp1 = bpf2sparc[TMP_REG_1]; 1164 1165 ctx->tmp_1_used = true; 1166 1167 emit_set_const_sext(imm, tmp1, ctx); 1168 emit(div | RS1(dst) | RS2(tmp1) | RD(tmp), ctx); 1169 emit(MULX | RS1(tmp) | RS2(tmp1) | RD(tmp), ctx); 1170 emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx); 1171 } 1172 goto do_alu32_trunc; 1173 } 1174 case BPF_ALU | BPF_LSH | BPF_K: 1175 emit_alu_K(SLL, dst, imm, ctx); 1176 goto do_alu32_trunc; 1177 case BPF_ALU64 | BPF_LSH | BPF_K: 1178 emit_alu_K(SLLX, dst, imm, ctx); 1179 break; 1180 case BPF_ALU | BPF_RSH | BPF_K: 1181 emit_alu_K(SRL, dst, imm, ctx); 1182 break; 1183 case BPF_ALU64 | BPF_RSH | BPF_K: 1184 emit_alu_K(SRLX, dst, imm, ctx); 1185 break; 1186 case BPF_ALU | BPF_ARSH | BPF_K: 1187 emit_alu_K(SRA, dst, imm, ctx); 1188 goto do_alu32_trunc; 1189 case BPF_ALU64 | BPF_ARSH | BPF_K: 1190 emit_alu_K(SRAX, dst, imm, ctx); 1191 break; 1192 1193 do_alu32_trunc: 1194 if (BPF_CLASS(code) == BPF_ALU) 1195 emit_alu_K(SRL, dst, 0, ctx); 1196 break; 1197 1198 /* JUMP off */ 1199 case BPF_JMP | BPF_JA: 1200 emit_branch(BA, ctx->idx, ctx->offset[i + off], ctx); 1201 emit_nop(ctx); 1202 break; 1203 /* IF (dst COND src) JUMP off */ 1204 case BPF_JMP | BPF_JEQ | BPF_X: 1205 case BPF_JMP | BPF_JGT | BPF_X: 1206 case BPF_JMP | BPF_JLT | BPF_X: 1207 case BPF_JMP | BPF_JGE | BPF_X: 1208 case BPF_JMP | BPF_JLE | BPF_X: 1209 case BPF_JMP | BPF_JNE | BPF_X: 1210 case BPF_JMP | BPF_JSGT | BPF_X: 1211 case BPF_JMP | BPF_JSLT | BPF_X: 1212 case BPF_JMP | BPF_JSGE | BPF_X: 1213 case BPF_JMP | BPF_JSLE | BPF_X: 1214 case BPF_JMP | BPF_JSET | BPF_X: { 1215 int err; 1216 1217 err = emit_compare_and_branch(code, dst, src, 0, false, i + off, ctx); 1218 if (err) 1219 return err; 1220 break; 1221 } 1222 /* IF (dst COND imm) JUMP off */ 1223 case BPF_JMP | BPF_JEQ | BPF_K: 1224 case BPF_JMP | BPF_JGT | BPF_K: 1225 case BPF_JMP | BPF_JLT | BPF_K: 1226 case BPF_JMP | BPF_JGE | BPF_K: 1227 case BPF_JMP | BPF_JLE | BPF_K: 1228 case BPF_JMP | BPF_JNE | BPF_K: 1229 case BPF_JMP | BPF_JSGT | BPF_K: 1230 case BPF_JMP | BPF_JSLT | BPF_K: 1231 case BPF_JMP | BPF_JSGE | BPF_K: 1232 case BPF_JMP | BPF_JSLE | BPF_K: 1233 case BPF_JMP | BPF_JSET | BPF_K: { 1234 int err; 1235 1236 err = emit_compare_and_branch(code, dst, 0, imm, true, i + off, ctx); 1237 if (err) 1238 return err; 1239 break; 1240 } 1241 1242 /* function call */ 1243 case BPF_JMP | BPF_CALL: 1244 { 1245 u8 *func = ((u8 *)__bpf_call_base) + imm; 1246 1247 ctx->saw_call = true; 1248 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) 1249 emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); 1250 1251 emit_call((u32 *)func, ctx); 1252 emit_nop(ctx); 1253 1254 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1255 1256 if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) 1257 load_skb_regs(ctx, L7); 1258 break; 1259 } 1260 1261 /* tail call */ 1262 case BPF_JMP | BPF_TAIL_CALL: 1263 emit_tail_call(ctx); 1264 break; 1265 1266 /* function return */ 1267 case BPF_JMP | BPF_EXIT: 1268 /* Optimization: when last instruction is EXIT, 1269 simply fallthrough to epilogue. */ 1270 if (i == ctx->prog->len - 1) 1271 break; 1272 emit_branch(BA, ctx->idx, ctx->epilogue_offset, ctx); 1273 emit_nop(ctx); 1274 break; 1275 1276 /* dst = imm64 */ 1277 case BPF_LD | BPF_IMM | BPF_DW: 1278 { 1279 const struct bpf_insn insn1 = insn[1]; 1280 u64 imm64; 1281 1282 imm64 = (u64)insn1.imm << 32 | (u32)imm; 1283 emit_loadimm64(imm64, dst, ctx); 1284 1285 return 1; 1286 } 1287 1288 /* LDX: dst = *(size *)(src + off) */ 1289 case BPF_LDX | BPF_MEM | BPF_W: 1290 case BPF_LDX | BPF_MEM | BPF_H: 1291 case BPF_LDX | BPF_MEM | BPF_B: 1292 case BPF_LDX | BPF_MEM | BPF_DW: { 1293 const u8 tmp = bpf2sparc[TMP_REG_1]; 1294 u32 opcode = 0, rs2; 1295 1296 ctx->tmp_1_used = true; 1297 switch (BPF_SIZE(code)) { 1298 case BPF_W: 1299 opcode = LD32; 1300 break; 1301 case BPF_H: 1302 opcode = LD16; 1303 break; 1304 case BPF_B: 1305 opcode = LD8; 1306 break; 1307 case BPF_DW: 1308 opcode = LD64; 1309 break; 1310 } 1311 1312 if (is_simm13(off)) { 1313 opcode |= IMMED; 1314 rs2 = S13(off); 1315 } else { 1316 emit_loadimm(off, tmp, ctx); 1317 rs2 = RS2(tmp); 1318 } 1319 emit(opcode | RS1(src) | rs2 | RD(dst), ctx); 1320 break; 1321 } 1322 /* ST: *(size *)(dst + off) = imm */ 1323 case BPF_ST | BPF_MEM | BPF_W: 1324 case BPF_ST | BPF_MEM | BPF_H: 1325 case BPF_ST | BPF_MEM | BPF_B: 1326 case BPF_ST | BPF_MEM | BPF_DW: { 1327 const u8 tmp = bpf2sparc[TMP_REG_1]; 1328 const u8 tmp2 = bpf2sparc[TMP_REG_2]; 1329 u32 opcode = 0, rs2; 1330 1331 ctx->tmp_2_used = true; 1332 emit_loadimm(imm, tmp2, ctx); 1333 1334 switch (BPF_SIZE(code)) { 1335 case BPF_W: 1336 opcode = ST32; 1337 break; 1338 case BPF_H: 1339 opcode = ST16; 1340 break; 1341 case BPF_B: 1342 opcode = ST8; 1343 break; 1344 case BPF_DW: 1345 opcode = ST64; 1346 break; 1347 } 1348 1349 if (is_simm13(off)) { 1350 opcode |= IMMED; 1351 rs2 = S13(off); 1352 } else { 1353 ctx->tmp_1_used = true; 1354 emit_loadimm(off, tmp, ctx); 1355 rs2 = RS2(tmp); 1356 } 1357 emit(opcode | RS1(dst) | rs2 | RD(tmp2), ctx); 1358 break; 1359 } 1360 1361 /* STX: *(size *)(dst + off) = src */ 1362 case BPF_STX | BPF_MEM | BPF_W: 1363 case BPF_STX | BPF_MEM | BPF_H: 1364 case BPF_STX | BPF_MEM | BPF_B: 1365 case BPF_STX | BPF_MEM | BPF_DW: { 1366 const u8 tmp = bpf2sparc[TMP_REG_1]; 1367 u32 opcode = 0, rs2; 1368 1369 switch (BPF_SIZE(code)) { 1370 case BPF_W: 1371 opcode = ST32; 1372 break; 1373 case BPF_H: 1374 opcode = ST16; 1375 break; 1376 case BPF_B: 1377 opcode = ST8; 1378 break; 1379 case BPF_DW: 1380 opcode = ST64; 1381 break; 1382 } 1383 if (is_simm13(off)) { 1384 opcode |= IMMED; 1385 rs2 = S13(off); 1386 } else { 1387 ctx->tmp_1_used = true; 1388 emit_loadimm(off, tmp, ctx); 1389 rs2 = RS2(tmp); 1390 } 1391 emit(opcode | RS1(dst) | rs2 | RD(src), ctx); 1392 break; 1393 } 1394 1395 /* STX XADD: lock *(u32 *)(dst + off) += src */ 1396 case BPF_STX | BPF_XADD | BPF_W: { 1397 const u8 tmp = bpf2sparc[TMP_REG_1]; 1398 const u8 tmp2 = bpf2sparc[TMP_REG_2]; 1399 const u8 tmp3 = bpf2sparc[TMP_REG_3]; 1400 1401 ctx->tmp_1_used = true; 1402 ctx->tmp_2_used = true; 1403 ctx->tmp_3_used = true; 1404 emit_loadimm(off, tmp, ctx); 1405 emit_alu3(ADD, dst, tmp, tmp, ctx); 1406 1407 emit(LD32 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx); 1408 emit_alu3(ADD, tmp2, src, tmp3, ctx); 1409 emit(CAS | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx); 1410 emit_cmp(tmp2, tmp3, ctx); 1411 emit_branch(BNE, 4, 0, ctx); 1412 emit_nop(ctx); 1413 break; 1414 } 1415 /* STX XADD: lock *(u64 *)(dst + off) += src */ 1416 case BPF_STX | BPF_XADD | BPF_DW: { 1417 const u8 tmp = bpf2sparc[TMP_REG_1]; 1418 const u8 tmp2 = bpf2sparc[TMP_REG_2]; 1419 const u8 tmp3 = bpf2sparc[TMP_REG_3]; 1420 1421 ctx->tmp_1_used = true; 1422 ctx->tmp_2_used = true; 1423 ctx->tmp_3_used = true; 1424 emit_loadimm(off, tmp, ctx); 1425 emit_alu3(ADD, dst, tmp, tmp, ctx); 1426 1427 emit(LD64 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx); 1428 emit_alu3(ADD, tmp2, src, tmp3, ctx); 1429 emit(CASX | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx); 1430 emit_cmp(tmp2, tmp3, ctx); 1431 emit_branch(BNE, 4, 0, ctx); 1432 emit_nop(ctx); 1433 break; 1434 } 1435 #define CHOOSE_LOAD_FUNC(K, func) \ 1436 ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) 1437 1438 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ 1439 case BPF_LD | BPF_ABS | BPF_W: 1440 func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word); 1441 goto common_load; 1442 case BPF_LD | BPF_ABS | BPF_H: 1443 func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half); 1444 goto common_load; 1445 case BPF_LD | BPF_ABS | BPF_B: 1446 func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte); 1447 goto common_load; 1448 /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */ 1449 case BPF_LD | BPF_IND | BPF_W: 1450 func = bpf_jit_load_word; 1451 goto common_load; 1452 case BPF_LD | BPF_IND | BPF_H: 1453 func = bpf_jit_load_half; 1454 goto common_load; 1455 1456 case BPF_LD | BPF_IND | BPF_B: 1457 func = bpf_jit_load_byte; 1458 common_load: 1459 ctx->saw_ld_abs_ind = true; 1460 1461 emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx); 1462 emit_loadimm(imm, O1, ctx); 1463 1464 if (BPF_MODE(code) == BPF_IND) 1465 emit_alu(ADD, src, O1, ctx); 1466 1467 emit_call(func, ctx); 1468 emit_alu_K(SRA, O1, 0, ctx); 1469 1470 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); 1471 break; 1472 1473 default: 1474 pr_err_once("unknown opcode %02x\n", code); 1475 return -EINVAL; 1476 } 1477 1478 return 0; 1479 } 1480 1481 static int build_body(struct jit_ctx *ctx) 1482 { 1483 const struct bpf_prog *prog = ctx->prog; 1484 int i; 1485 1486 for (i = 0; i < prog->len; i++) { 1487 const struct bpf_insn *insn = &prog->insnsi[i]; 1488 int ret; 1489 1490 ret = build_insn(insn, ctx); 1491 1492 if (ret > 0) { 1493 i++; 1494 ctx->offset[i] = ctx->idx; 1495 continue; 1496 } 1497 ctx->offset[i] = ctx->idx; 1498 if (ret) 1499 return ret; 1500 } 1501 return 0; 1502 } 1503 1504 static void jit_fill_hole(void *area, unsigned int size) 1505 { 1506 u32 *ptr; 1507 /* We are guaranteed to have aligned memory. */ 1508 for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) 1509 *ptr++ = 0x91d02005; /* ta 5 */ 1510 } 1511 1512 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1513 { 1514 struct bpf_prog *tmp, *orig_prog = prog; 1515 struct bpf_binary_header *header; 1516 bool tmp_blinded = false; 1517 struct jit_ctx ctx; 1518 u32 image_size; 1519 u8 *image_ptr; 1520 int pass; 1521 1522 if (!bpf_jit_enable) 1523 return orig_prog; 1524 1525 tmp = bpf_jit_blind_constants(prog); 1526 /* If blinding was requested and we failed during blinding, 1527 * we must fall back to the interpreter. 1528 */ 1529 if (IS_ERR(tmp)) 1530 return orig_prog; 1531 if (tmp != prog) { 1532 tmp_blinded = true; 1533 prog = tmp; 1534 } 1535 1536 memset(&ctx, 0, sizeof(ctx)); 1537 ctx.prog = prog; 1538 1539 ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL); 1540 if (ctx.offset == NULL) { 1541 prog = orig_prog; 1542 goto out; 1543 } 1544 1545 /* Fake pass to detect features used, and get an accurate assessment 1546 * of what the final image size will be. 1547 */ 1548 if (build_body(&ctx)) { 1549 prog = orig_prog; 1550 goto out_off; 1551 } 1552 build_prologue(&ctx); 1553 build_epilogue(&ctx); 1554 1555 /* Now we know the actual image size. */ 1556 image_size = sizeof(u32) * ctx.idx; 1557 header = bpf_jit_binary_alloc(image_size, &image_ptr, 1558 sizeof(u32), jit_fill_hole); 1559 if (header == NULL) { 1560 prog = orig_prog; 1561 goto out_off; 1562 } 1563 1564 ctx.image = (u32 *)image_ptr; 1565 1566 for (pass = 1; pass < 3; pass++) { 1567 ctx.idx = 0; 1568 1569 build_prologue(&ctx); 1570 1571 if (build_body(&ctx)) { 1572 bpf_jit_binary_free(header); 1573 prog = orig_prog; 1574 goto out_off; 1575 } 1576 1577 build_epilogue(&ctx); 1578 1579 if (bpf_jit_enable > 1) 1580 pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass, 1581 image_size - (ctx.idx * 4), 1582 ctx.tmp_1_used ? '1' : ' ', 1583 ctx.tmp_2_used ? '2' : ' ', 1584 ctx.tmp_3_used ? '3' : ' ', 1585 ctx.saw_ld_abs_ind ? 'L' : ' ', 1586 ctx.saw_frame_pointer ? 'F' : ' ', 1587 ctx.saw_call ? 'C' : ' ', 1588 ctx.saw_tail_call ? 'T' : ' '); 1589 } 1590 1591 if (bpf_jit_enable > 1) 1592 bpf_jit_dump(prog->len, image_size, pass, ctx.image); 1593 1594 bpf_flush_icache(header, (u8 *)header + (header->pages * PAGE_SIZE)); 1595 1596 bpf_jit_binary_lock_ro(header); 1597 1598 prog->bpf_func = (void *)ctx.image; 1599 prog->jited = 1; 1600 prog->jited_len = image_size; 1601 1602 out_off: 1603 kfree(ctx.offset); 1604 out: 1605 if (tmp_blinded) 1606 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1607 tmp : orig_prog); 1608 return prog; 1609 } 1610