1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2016-2018 Netronome Systems, Inc. */ 3 4 #define pr_fmt(fmt) "NFP net bpf: " fmt 5 6 #include <linux/bug.h> 7 #include <linux/bpf.h> 8 #include <linux/filter.h> 9 #include <linux/kernel.h> 10 #include <linux/pkt_cls.h> 11 #include <linux/reciprocal_div.h> 12 #include <linux/unistd.h> 13 14 #include "main.h" 15 #include "../nfp_asm.h" 16 #include "../nfp_net_ctrl.h" 17 18 /* --- NFP prog --- */ 19 /* Foreach "multiple" entries macros provide pos and next<n> pointers. 20 * It's safe to modify the next pointers (but not pos). 21 */ 22 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ 23 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 24 next = list_next_entry(pos, l); \ 25 &(nfp_prog)->insns != &pos->l && \ 26 &(nfp_prog)->insns != &next->l; \ 27 pos = nfp_meta_next(pos), \ 28 next = nfp_meta_next(pos)) 29 30 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ 31 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 32 next = list_next_entry(pos, l), \ 33 next2 = list_next_entry(next, l); \ 34 &(nfp_prog)->insns != &pos->l && \ 35 &(nfp_prog)->insns != &next->l && \ 36 &(nfp_prog)->insns != &next2->l; \ 37 pos = nfp_meta_next(pos), \ 38 next = nfp_meta_next(pos), \ 39 next2 = nfp_meta_next(next)) 40 41 static bool 42 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 43 { 44 return meta->l.prev != &nfp_prog->insns; 45 } 46 47 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) 48 { 49 if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) { 50 pr_warn("instruction limit reached (%u NFP instructions)\n", 51 nfp_prog->prog_len); 52 nfp_prog->error = -ENOSPC; 53 return; 54 } 55 56 nfp_prog->prog[nfp_prog->prog_len] = insn; 57 nfp_prog->prog_len++; 58 } 59 60 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) 61 { 62 return nfp_prog->prog_len; 63 } 64 65 static bool 66 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) 67 { 68 /* If there is a recorded error we may have dropped instructions; 69 * that doesn't have to be due to translator bug, and the translation 70 * will fail anyway, so just return OK. 71 */ 72 if (nfp_prog->error) 73 return true; 74 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); 75 } 76 77 /* --- Emitters --- */ 78 static void 79 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, 80 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, 81 bool indir) 82 { 83 u64 insn; 84 85 insn = FIELD_PREP(OP_CMD_A_SRC, areg) | 86 FIELD_PREP(OP_CMD_CTX, ctx) | 87 FIELD_PREP(OP_CMD_B_SRC, breg) | 88 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | 89 FIELD_PREP(OP_CMD_XFER, xfer) | 90 FIELD_PREP(OP_CMD_CNT, size) | 91 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | 92 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | 93 FIELD_PREP(OP_CMD_INDIR, indir) | 94 FIELD_PREP(OP_CMD_MODE, mode); 95 96 nfp_prog_push(nfp_prog, insn); 97 } 98 99 static void 100 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 101 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) 102 { 103 struct nfp_insn_re_regs reg; 104 int err; 105 106 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); 107 if (err) { 108 nfp_prog->error = err; 109 return; 110 } 111 if (reg.swap) { 112 pr_err("cmd can't swap arguments\n"); 113 nfp_prog->error = -EFAULT; 114 return; 115 } 116 if (reg.dst_lmextn || reg.src_lmextn) { 117 pr_err("cmd can't use LMextn\n"); 118 nfp_prog->error = -EFAULT; 119 return; 120 } 121 122 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx, 123 indir); 124 } 125 126 static void 127 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 128 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) 129 { 130 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false); 131 } 132 133 static void 134 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 135 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) 136 { 137 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true); 138 } 139 140 static void 141 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, 142 enum br_ctx_signal_state css, u16 addr, u8 defer) 143 { 144 u16 addr_lo, addr_hi; 145 u64 insn; 146 147 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); 148 addr_hi = addr != addr_lo; 149 150 insn = OP_BR_BASE | 151 FIELD_PREP(OP_BR_MASK, mask) | 152 FIELD_PREP(OP_BR_EV_PIP, ev_pip) | 153 FIELD_PREP(OP_BR_CSS, css) | 154 FIELD_PREP(OP_BR_DEFBR, defer) | 155 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | 156 FIELD_PREP(OP_BR_ADDR_HI, addr_hi); 157 158 nfp_prog_push(nfp_prog, insn); 159 } 160 161 static void 162 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, 163 enum nfp_relo_type relo) 164 { 165 if (mask == BR_UNC && defer > 2) { 166 pr_err("BUG: branch defer out of bounds %d\n", defer); 167 nfp_prog->error = -EFAULT; 168 return; 169 } 170 171 __emit_br(nfp_prog, mask, 172 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, 173 BR_CSS_NONE, addr, defer); 174 175 nfp_prog->prog[nfp_prog->prog_len - 1] |= 176 FIELD_PREP(OP_RELO_TYPE, relo); 177 } 178 179 static void 180 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) 181 { 182 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); 183 } 184 185 static void 186 __emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer, 187 bool set, bool src_lmextn) 188 { 189 u16 addr_lo, addr_hi; 190 u64 insn; 191 192 addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO)); 193 addr_hi = addr != addr_lo; 194 195 insn = OP_BR_BIT_BASE | 196 FIELD_PREP(OP_BR_BIT_A_SRC, areg) | 197 FIELD_PREP(OP_BR_BIT_B_SRC, breg) | 198 FIELD_PREP(OP_BR_BIT_BV, set) | 199 FIELD_PREP(OP_BR_BIT_DEFBR, defer) | 200 FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) | 201 FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) | 202 FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn); 203 204 nfp_prog_push(nfp_prog, insn); 205 } 206 207 static void 208 emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, 209 u8 defer, bool set, enum nfp_relo_type relo) 210 { 211 struct nfp_insn_re_regs reg; 212 int err; 213 214 /* NOTE: The bit to test is specified as an rotation amount, such that 215 * the bit to test will be placed on the MSB of the result when 216 * doing a rotate right. For bit X, we need right rotate X + 1. 217 */ 218 bit += 1; 219 220 err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false); 221 if (err) { 222 nfp_prog->error = err; 223 return; 224 } 225 226 __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set, 227 reg.src_lmextn); 228 229 nfp_prog->prog[nfp_prog->prog_len - 1] |= 230 FIELD_PREP(OP_RELO_TYPE, relo); 231 } 232 233 static void 234 emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) 235 { 236 emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL); 237 } 238 239 static void 240 __emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, 241 u8 defer, bool dst_lmextn, bool src_lmextn) 242 { 243 u64 insn; 244 245 insn = OP_BR_ALU_BASE | 246 FIELD_PREP(OP_BR_ALU_A_SRC, areg) | 247 FIELD_PREP(OP_BR_ALU_B_SRC, breg) | 248 FIELD_PREP(OP_BR_ALU_DEFBR, defer) | 249 FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) | 250 FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) | 251 FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn); 252 253 nfp_prog_push(nfp_prog, insn); 254 } 255 256 static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer) 257 { 258 struct nfp_insn_ur_regs reg; 259 int err; 260 261 err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), ®); 262 if (err) { 263 nfp_prog->error = err; 264 return; 265 } 266 267 __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn, 268 reg.src_lmextn); 269 } 270 271 static void 272 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, 273 enum immed_width width, bool invert, 274 enum immed_shift shift, bool wr_both, 275 bool dst_lmextn, bool src_lmextn) 276 { 277 u64 insn; 278 279 insn = OP_IMMED_BASE | 280 FIELD_PREP(OP_IMMED_A_SRC, areg) | 281 FIELD_PREP(OP_IMMED_B_SRC, breg) | 282 FIELD_PREP(OP_IMMED_IMM, imm_hi) | 283 FIELD_PREP(OP_IMMED_WIDTH, width) | 284 FIELD_PREP(OP_IMMED_INV, invert) | 285 FIELD_PREP(OP_IMMED_SHIFT, shift) | 286 FIELD_PREP(OP_IMMED_WR_AB, wr_both) | 287 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | 288 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); 289 290 nfp_prog_push(nfp_prog, insn); 291 } 292 293 static void 294 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, 295 enum immed_width width, bool invert, enum immed_shift shift) 296 { 297 struct nfp_insn_ur_regs reg; 298 int err; 299 300 if (swreg_type(dst) == NN_REG_IMM) { 301 nfp_prog->error = -EFAULT; 302 return; 303 } 304 305 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); 306 if (err) { 307 nfp_prog->error = err; 308 return; 309 } 310 311 /* Use reg.dst when destination is No-Dest. */ 312 __emit_immed(nfp_prog, 313 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, 314 reg.breg, imm >> 8, width, invert, shift, 315 reg.wr_both, reg.dst_lmextn, reg.src_lmextn); 316 } 317 318 static void 319 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 320 enum shf_sc sc, u8 shift, 321 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, 322 bool dst_lmextn, bool src_lmextn) 323 { 324 u64 insn; 325 326 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { 327 nfp_prog->error = -EFAULT; 328 return; 329 } 330 331 /* NFP shift instruction has something special. If shift direction is 332 * left then shift amount of 1 to 31 is specified as 32 minus the amount 333 * to shift. 334 * 335 * But no need to do this for indirect shift which has shift amount be 336 * 0. Even after we do this subtraction, shift amount 0 will be turned 337 * into 32 which will eventually be encoded the same as 0 because only 338 * low 5 bits are encoded, but shift amount be 32 will fail the 339 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of 340 * mask range. 341 */ 342 if (sc == SHF_SC_L_SHF && shift) 343 shift = 32 - shift; 344 345 insn = OP_SHF_BASE | 346 FIELD_PREP(OP_SHF_A_SRC, areg) | 347 FIELD_PREP(OP_SHF_SC, sc) | 348 FIELD_PREP(OP_SHF_B_SRC, breg) | 349 FIELD_PREP(OP_SHF_I8, i8) | 350 FIELD_PREP(OP_SHF_SW, sw) | 351 FIELD_PREP(OP_SHF_DST, dst) | 352 FIELD_PREP(OP_SHF_SHIFT, shift) | 353 FIELD_PREP(OP_SHF_OP, op) | 354 FIELD_PREP(OP_SHF_DST_AB, dst_ab) | 355 FIELD_PREP(OP_SHF_WR_AB, wr_both) | 356 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | 357 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); 358 359 nfp_prog_push(nfp_prog, insn); 360 } 361 362 static void 363 emit_shf(struct nfp_prog *nfp_prog, swreg dst, 364 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) 365 { 366 struct nfp_insn_re_regs reg; 367 int err; 368 369 err = swreg_to_restricted(dst, lreg, rreg, ®, true); 370 if (err) { 371 nfp_prog->error = err; 372 return; 373 } 374 375 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, 376 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, 377 reg.dst_lmextn, reg.src_lmextn); 378 } 379 380 static void 381 emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst, 382 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc) 383 { 384 if (sc == SHF_SC_R_ROT) { 385 pr_err("indirect shift is not allowed on rotation\n"); 386 nfp_prog->error = -EFAULT; 387 return; 388 } 389 390 emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0); 391 } 392 393 static void 394 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 395 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, 396 bool dst_lmextn, bool src_lmextn) 397 { 398 u64 insn; 399 400 insn = OP_ALU_BASE | 401 FIELD_PREP(OP_ALU_A_SRC, areg) | 402 FIELD_PREP(OP_ALU_B_SRC, breg) | 403 FIELD_PREP(OP_ALU_DST, dst) | 404 FIELD_PREP(OP_ALU_SW, swap) | 405 FIELD_PREP(OP_ALU_OP, op) | 406 FIELD_PREP(OP_ALU_DST_AB, dst_ab) | 407 FIELD_PREP(OP_ALU_WR_AB, wr_both) | 408 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | 409 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); 410 411 nfp_prog_push(nfp_prog, insn); 412 } 413 414 static void 415 emit_alu(struct nfp_prog *nfp_prog, swreg dst, 416 swreg lreg, enum alu_op op, swreg rreg) 417 { 418 struct nfp_insn_ur_regs reg; 419 int err; 420 421 err = swreg_to_unrestricted(dst, lreg, rreg, ®); 422 if (err) { 423 nfp_prog->error = err; 424 return; 425 } 426 427 __emit_alu(nfp_prog, reg.dst, reg.dst_ab, 428 reg.areg, op, reg.breg, reg.swap, reg.wr_both, 429 reg.dst_lmextn, reg.src_lmextn); 430 } 431 432 static void 433 __emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, 434 enum mul_type type, enum mul_step step, u16 breg, bool swap, 435 bool wr_both, bool dst_lmextn, bool src_lmextn) 436 { 437 u64 insn; 438 439 insn = OP_MUL_BASE | 440 FIELD_PREP(OP_MUL_A_SRC, areg) | 441 FIELD_PREP(OP_MUL_B_SRC, breg) | 442 FIELD_PREP(OP_MUL_STEP, step) | 443 FIELD_PREP(OP_MUL_DST_AB, dst_ab) | 444 FIELD_PREP(OP_MUL_SW, swap) | 445 FIELD_PREP(OP_MUL_TYPE, type) | 446 FIELD_PREP(OP_MUL_WR_AB, wr_both) | 447 FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | 448 FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); 449 450 nfp_prog_push(nfp_prog, insn); 451 } 452 453 static void 454 emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, 455 enum mul_step step, swreg rreg) 456 { 457 struct nfp_insn_ur_regs reg; 458 u16 areg; 459 int err; 460 461 if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { 462 nfp_prog->error = -EINVAL; 463 return; 464 } 465 466 if (step == MUL_LAST || step == MUL_LAST_2) { 467 /* When type is step and step Number is LAST or LAST2, left 468 * source is used as destination. 469 */ 470 err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); 471 areg = reg.dst; 472 } else { 473 err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); 474 areg = reg.areg; 475 } 476 477 if (err) { 478 nfp_prog->error = err; 479 return; 480 } 481 482 __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, 483 reg.wr_both, reg.dst_lmextn, reg.src_lmextn); 484 } 485 486 static void 487 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, 488 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, 489 bool zero, bool swap, bool wr_both, 490 bool dst_lmextn, bool src_lmextn) 491 { 492 u64 insn; 493 494 insn = OP_LDF_BASE | 495 FIELD_PREP(OP_LDF_A_SRC, areg) | 496 FIELD_PREP(OP_LDF_SC, sc) | 497 FIELD_PREP(OP_LDF_B_SRC, breg) | 498 FIELD_PREP(OP_LDF_I8, imm8) | 499 FIELD_PREP(OP_LDF_SW, swap) | 500 FIELD_PREP(OP_LDF_ZF, zero) | 501 FIELD_PREP(OP_LDF_BMASK, bmask) | 502 FIELD_PREP(OP_LDF_SHF, shift) | 503 FIELD_PREP(OP_LDF_WR_AB, wr_both) | 504 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | 505 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); 506 507 nfp_prog_push(nfp_prog, insn); 508 } 509 510 static void 511 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 512 enum shf_sc sc, u8 shift, bool zero) 513 { 514 struct nfp_insn_re_regs reg; 515 int err; 516 517 /* Note: ld_field is special as it uses one of the src regs as dst */ 518 err = swreg_to_restricted(dst, dst, src, ®, true); 519 if (err) { 520 nfp_prog->error = err; 521 return; 522 } 523 524 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, 525 reg.i8, zero, reg.swap, reg.wr_both, 526 reg.dst_lmextn, reg.src_lmextn); 527 } 528 529 static void 530 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 531 enum shf_sc sc, u8 shift) 532 { 533 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); 534 } 535 536 static void 537 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, 538 bool dst_lmextn, bool src_lmextn) 539 { 540 u64 insn; 541 542 insn = OP_LCSR_BASE | 543 FIELD_PREP(OP_LCSR_A_SRC, areg) | 544 FIELD_PREP(OP_LCSR_B_SRC, breg) | 545 FIELD_PREP(OP_LCSR_WRITE, wr) | 546 FIELD_PREP(OP_LCSR_ADDR, addr / 4) | 547 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | 548 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); 549 550 nfp_prog_push(nfp_prog, insn); 551 } 552 553 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) 554 { 555 struct nfp_insn_ur_regs reg; 556 int err; 557 558 /* This instruction takes immeds instead of reg_none() for the ignored 559 * operand, but we can't encode 2 immeds in one instr with our normal 560 * swreg infra so if param is an immed, we encode as reg_none() and 561 * copy the immed to both operands. 562 */ 563 if (swreg_type(src) == NN_REG_IMM) { 564 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); 565 reg.breg = reg.areg; 566 } else { 567 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); 568 } 569 if (err) { 570 nfp_prog->error = err; 571 return; 572 } 573 574 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr, 575 false, reg.src_lmextn); 576 } 577 578 /* CSR value is read in following immed[gpr, 0] */ 579 static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr) 580 { 581 __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false); 582 } 583 584 static void emit_nop(struct nfp_prog *nfp_prog) 585 { 586 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); 587 } 588 589 /* --- Wrappers --- */ 590 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) 591 { 592 if (!(imm & 0xffff0000)) { 593 *val = imm; 594 *shift = IMMED_SHIFT_0B; 595 } else if (!(imm & 0xff0000ff)) { 596 *val = imm >> 8; 597 *shift = IMMED_SHIFT_1B; 598 } else if (!(imm & 0x0000ffff)) { 599 *val = imm >> 16; 600 *shift = IMMED_SHIFT_2B; 601 } else { 602 return false; 603 } 604 605 return true; 606 } 607 608 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) 609 { 610 enum immed_shift shift; 611 u16 val; 612 613 if (pack_immed(imm, &val, &shift)) { 614 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); 615 } else if (pack_immed(~imm, &val, &shift)) { 616 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); 617 } else { 618 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, 619 false, IMMED_SHIFT_0B); 620 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, 621 false, IMMED_SHIFT_2B); 622 } 623 } 624 625 static void 626 wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst) 627 { 628 if (meta->flags & FLAG_INSN_DO_ZEXT) 629 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 630 } 631 632 static void 633 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, 634 enum nfp_relo_type relo) 635 { 636 if (imm > 0xffff) { 637 pr_err("relocation of a large immediate!\n"); 638 nfp_prog->error = -EFAULT; 639 return; 640 } 641 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); 642 643 nfp_prog->prog[nfp_prog->prog_len - 1] |= 644 FIELD_PREP(OP_RELO_TYPE, relo); 645 } 646 647 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) 648 * If the @imm is small enough encode it directly in operand and return 649 * otherwise load @imm to a spare register and return its encoding. 650 */ 651 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 652 { 653 if (FIELD_FIT(UR_REG_IMM_MAX, imm)) 654 return reg_imm(imm); 655 656 wrp_immed(nfp_prog, tmp_reg, imm); 657 return tmp_reg; 658 } 659 660 /* re_load_imm_any() - encode immediate or use tmp register (restricted) 661 * If the @imm is small enough encode it directly in operand and return 662 * otherwise load @imm to a spare register and return its encoding. 663 */ 664 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 665 { 666 if (FIELD_FIT(RE_REG_IMM_MAX, imm)) 667 return reg_imm(imm); 668 669 wrp_immed(nfp_prog, tmp_reg, imm); 670 return tmp_reg; 671 } 672 673 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) 674 { 675 while (count--) 676 emit_nop(nfp_prog); 677 } 678 679 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) 680 { 681 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); 682 } 683 684 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) 685 { 686 wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); 687 } 688 689 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the 690 * result to @dst from low end. 691 */ 692 static void 693 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, 694 u8 offset) 695 { 696 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; 697 u8 mask = (1 << field_len) - 1; 698 699 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); 700 } 701 702 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the 703 * result to @dst from offset, there is no change on the other bits of @dst. 704 */ 705 static void 706 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, 707 u8 field_len, u8 offset) 708 { 709 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; 710 u8 mask = ((1 << field_len) - 1) << offset; 711 712 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8); 713 } 714 715 static void 716 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 717 swreg *rega, swreg *regb) 718 { 719 if (offset == reg_imm(0)) { 720 *rega = reg_a(src_gpr); 721 *regb = reg_b(src_gpr + 1); 722 return; 723 } 724 725 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset); 726 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C, 727 reg_imm(0)); 728 *rega = imm_a(nfp_prog); 729 *regb = imm_b(nfp_prog); 730 } 731 732 /* NFP has Command Push Pull bus which supports bluk memory operations. */ 733 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 734 { 735 bool descending_seq = meta->ldst_gather_len < 0; 736 s16 len = abs(meta->ldst_gather_len); 737 swreg src_base, off; 738 bool src_40bit_addr; 739 unsigned int i; 740 u8 xfer_num; 741 742 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 743 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE; 744 src_base = reg_a(meta->insn.src_reg * 2); 745 xfer_num = round_up(len, 4) / 4; 746 747 if (src_40bit_addr) 748 addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base, 749 &off); 750 751 /* Setup PREV_ALU fields to override memory read length. */ 752 if (len > 32) 753 wrp_immed(nfp_prog, reg_none(), 754 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 755 756 /* Memory read from source addr into transfer-in registers. */ 757 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, 758 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, 759 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32); 760 761 /* Move from transfer-in to transfer-out. */ 762 for (i = 0; i < xfer_num; i++) 763 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); 764 765 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); 766 767 if (len <= 8) { 768 /* Use single direct_ref write8. */ 769 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 770 reg_a(meta->paired_st->dst_reg * 2), off, len - 1, 771 CMD_CTX_SWAP); 772 } else if (len <= 32 && IS_ALIGNED(len, 4)) { 773 /* Use single direct_ref write32. */ 774 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 775 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, 776 CMD_CTX_SWAP); 777 } else if (len <= 32) { 778 /* Use single indirect_ref write8. */ 779 wrp_immed(nfp_prog, reg_none(), 780 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); 781 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 782 reg_a(meta->paired_st->dst_reg * 2), off, 783 len - 1, CMD_CTX_SWAP); 784 } else if (IS_ALIGNED(len, 4)) { 785 /* Use single indirect_ref write32. */ 786 wrp_immed(nfp_prog, reg_none(), 787 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 788 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 789 reg_a(meta->paired_st->dst_reg * 2), off, 790 xfer_num - 1, CMD_CTX_SWAP); 791 } else if (len <= 40) { 792 /* Use one direct_ref write32 to write the first 32-bytes, then 793 * another direct_ref write8 to write the remaining bytes. 794 */ 795 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 796 reg_a(meta->paired_st->dst_reg * 2), off, 7, 797 CMD_CTX_SWAP); 798 799 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, 800 imm_b(nfp_prog)); 801 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, 802 reg_a(meta->paired_st->dst_reg * 2), off, len - 33, 803 CMD_CTX_SWAP); 804 } else { 805 /* Use one indirect_ref write32 to write 4-bytes aligned length, 806 * then another direct_ref write8 to write the remaining bytes. 807 */ 808 u8 new_off; 809 810 wrp_immed(nfp_prog, reg_none(), 811 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); 812 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 813 reg_a(meta->paired_st->dst_reg * 2), off, 814 xfer_num - 2, CMD_CTX_SWAP); 815 new_off = meta->paired_st->off + (xfer_num - 1) * 4; 816 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); 817 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 818 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, 819 (len & 0x3) - 1, CMD_CTX_SWAP); 820 } 821 822 /* TODO: The following extra load is to make sure data flow be identical 823 * before and after we do memory copy optimization. 824 * 825 * The load destination register is not guaranteed to be dead, so we 826 * need to make sure it is loaded with the value the same as before 827 * this transformation. 828 * 829 * These extra loads could be removed once we have accurate register 830 * usage information. 831 */ 832 if (descending_seq) 833 xfer_num = 0; 834 else if (BPF_SIZE(meta->insn.code) != BPF_DW) 835 xfer_num = xfer_num - 1; 836 else 837 xfer_num = xfer_num - 2; 838 839 switch (BPF_SIZE(meta->insn.code)) { 840 case BPF_B: 841 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 842 reg_xfer(xfer_num), 1, 843 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); 844 break; 845 case BPF_H: 846 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 847 reg_xfer(xfer_num), 2, (len & 3) ^ 2); 848 break; 849 case BPF_W: 850 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 851 reg_xfer(0)); 852 break; 853 case BPF_DW: 854 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 855 reg_xfer(xfer_num)); 856 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 857 reg_xfer(xfer_num + 1)); 858 break; 859 } 860 861 if (BPF_SIZE(meta->insn.code) != BPF_DW) 862 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 863 864 return 0; 865 } 866 867 static int 868 data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset, 869 u8 dst_gpr, int size) 870 { 871 unsigned int i; 872 u16 shift, sz; 873 874 /* We load the value from the address indicated in @offset and then 875 * shift out the data we don't need. Note: this is big endian! 876 */ 877 sz = max(size, 4); 878 shift = size < 4 ? 4 - size : 0; 879 880 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, 881 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP); 882 883 i = 0; 884 if (shift) 885 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, 886 reg_xfer(0), SHF_SC_R_SHF, shift * 8); 887 else 888 for (; i * 4 < size; i++) 889 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 890 891 if (i < 2) 892 wrp_zext(nfp_prog, meta, dst_gpr); 893 894 return 0; 895 } 896 897 static int 898 data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 899 u8 dst_gpr, swreg lreg, swreg rreg, int size, 900 enum cmd_mode mode) 901 { 902 unsigned int i; 903 u8 mask, sz; 904 905 /* We load the value from the address indicated in rreg + lreg and then 906 * mask out the data we don't need. Note: this is little endian! 907 */ 908 sz = max(size, 4); 909 mask = size < 4 ? GENMASK(size - 1, 0) : 0; 910 911 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, 912 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP); 913 914 i = 0; 915 if (mask) 916 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, 917 reg_xfer(0), SHF_SC_NONE, 0, true); 918 else 919 for (; i * 4 < size; i++) 920 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 921 922 if (i < 2) 923 wrp_zext(nfp_prog, meta, dst_gpr); 924 925 return 0; 926 } 927 928 static int 929 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 930 u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) 931 { 932 return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr), 933 offset, size, CMD_MODE_32b); 934 } 935 936 static int 937 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 938 u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) 939 { 940 swreg rega, regb; 941 942 addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); 943 944 return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb, 945 size, CMD_MODE_40b_BA); 946 } 947 948 static int 949 construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 950 u16 offset, u16 src, u8 size) 951 { 952 swreg tmp_reg; 953 954 /* Calculate the true offset (src_reg + imm) */ 955 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 956 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); 957 958 /* Check packet length (size guaranteed to fit b/c it's u8) */ 959 emit_alu(nfp_prog, imm_a(nfp_prog), 960 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); 961 emit_alu(nfp_prog, reg_none(), 962 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); 963 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); 964 965 /* Load data */ 966 return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size); 967 } 968 969 static int 970 construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 971 u16 offset, u8 size) 972 { 973 swreg tmp_reg; 974 975 /* Check packet length */ 976 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); 977 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); 978 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); 979 980 /* Load data */ 981 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 982 return data_ld(nfp_prog, meta, tmp_reg, 0, size); 983 } 984 985 static int 986 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 987 u8 src_gpr, u8 size) 988 { 989 unsigned int i; 990 991 for (i = 0; i * 4 < size; i++) 992 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); 993 994 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 995 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); 996 997 return 0; 998 } 999 1000 static int 1001 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 1002 u64 imm, u8 size) 1003 { 1004 wrp_immed(nfp_prog, reg_xfer(0), imm); 1005 if (size == 8) 1006 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); 1007 1008 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 1009 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); 1010 1011 return 0; 1012 } 1013 1014 typedef int 1015 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, 1016 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 1017 bool needs_inc); 1018 1019 static int 1020 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, 1021 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 1022 bool needs_inc) 1023 { 1024 bool should_inc = needs_inc && new_gpr && !last; 1025 u32 idx, src_byte; 1026 enum shf_sc sc; 1027 swreg reg; 1028 int shf; 1029 u8 mask; 1030 1031 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) 1032 return -EOPNOTSUPP; 1033 1034 idx = off / 4; 1035 1036 /* Move the entire word */ 1037 if (size == 4) { 1038 wrp_mov(nfp_prog, reg_both(dst), 1039 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); 1040 return 0; 1041 } 1042 1043 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 1044 return -EOPNOTSUPP; 1045 1046 src_byte = off % 4; 1047 1048 mask = (1 << size) - 1; 1049 mask <<= dst_byte; 1050 1051 if (WARN_ON_ONCE(mask > 0xf)) 1052 return -EOPNOTSUPP; 1053 1054 shf = abs(src_byte - dst_byte) * 8; 1055 if (src_byte == dst_byte) { 1056 sc = SHF_SC_NONE; 1057 } else if (src_byte < dst_byte) { 1058 shf = 32 - shf; 1059 sc = SHF_SC_L_SHF; 1060 } else { 1061 sc = SHF_SC_R_SHF; 1062 } 1063 1064 /* ld_field can address fewer indexes, if offset too large do RMW. 1065 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 1066 */ 1067 if (idx <= RE_REG_LM_IDX_MAX) { 1068 reg = reg_lm(lm3 ? 3 : 0, idx); 1069 } else { 1070 reg = imm_a(nfp_prog); 1071 /* If it's not the first part of the load and we start a new GPR 1072 * that means we are loading a second part of the LMEM word into 1073 * a new GPR. IOW we've already looked that LMEM word and 1074 * therefore it has been loaded into imm_a(). 1075 */ 1076 if (first || !new_gpr) 1077 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 1078 } 1079 1080 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); 1081 1082 if (should_inc) 1083 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 1084 1085 return 0; 1086 } 1087 1088 static int 1089 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, 1090 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 1091 bool needs_inc) 1092 { 1093 bool should_inc = needs_inc && new_gpr && !last; 1094 u32 idx, dst_byte; 1095 enum shf_sc sc; 1096 swreg reg; 1097 int shf; 1098 u8 mask; 1099 1100 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) 1101 return -EOPNOTSUPP; 1102 1103 idx = off / 4; 1104 1105 /* Move the entire word */ 1106 if (size == 4) { 1107 wrp_mov(nfp_prog, 1108 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), 1109 reg_b(src)); 1110 return 0; 1111 } 1112 1113 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 1114 return -EOPNOTSUPP; 1115 1116 dst_byte = off % 4; 1117 1118 mask = (1 << size) - 1; 1119 mask <<= dst_byte; 1120 1121 if (WARN_ON_ONCE(mask > 0xf)) 1122 return -EOPNOTSUPP; 1123 1124 shf = abs(src_byte - dst_byte) * 8; 1125 if (src_byte == dst_byte) { 1126 sc = SHF_SC_NONE; 1127 } else if (src_byte < dst_byte) { 1128 shf = 32 - shf; 1129 sc = SHF_SC_L_SHF; 1130 } else { 1131 sc = SHF_SC_R_SHF; 1132 } 1133 1134 /* ld_field can address fewer indexes, if offset too large do RMW. 1135 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 1136 */ 1137 if (idx <= RE_REG_LM_IDX_MAX) { 1138 reg = reg_lm(lm3 ? 3 : 0, idx); 1139 } else { 1140 reg = imm_a(nfp_prog); 1141 /* Only first and last LMEM locations are going to need RMW, 1142 * the middle location will be overwritten fully. 1143 */ 1144 if (first || last) 1145 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 1146 } 1147 1148 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); 1149 1150 if (new_gpr || last) { 1151 if (idx > RE_REG_LM_IDX_MAX) 1152 wrp_mov(nfp_prog, reg_lm(0, idx), reg); 1153 if (should_inc) 1154 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 1155 } 1156 1157 return 0; 1158 } 1159 1160 static int 1161 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1162 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, 1163 bool clr_gpr, lmem_step step) 1164 { 1165 s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; 1166 bool first = true, last; 1167 bool needs_inc = false; 1168 swreg stack_off_reg; 1169 u8 prev_gpr = 255; 1170 u32 gpr_byte = 0; 1171 bool lm3 = true; 1172 int ret; 1173 1174 if (meta->ptr_not_const || 1175 meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) { 1176 /* Use of the last encountered ptr_off is OK, they all have 1177 * the same alignment. Depend on low bits of value being 1178 * discarded when written to LMaddr register. 1179 */ 1180 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, 1181 stack_imm(nfp_prog)); 1182 1183 emit_alu(nfp_prog, imm_b(nfp_prog), 1184 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); 1185 1186 needs_inc = true; 1187 } else if (off + size <= 64) { 1188 /* We can reach bottom 64B with LMaddr0 */ 1189 lm3 = false; 1190 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { 1191 /* We have to set up a new pointer. If we know the offset 1192 * and the entire access falls into a single 32 byte aligned 1193 * window we won't have to increment the LM pointer. 1194 * The 32 byte alignment is imporant because offset is ORed in 1195 * not added when doing *l$indexN[off]. 1196 */ 1197 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), 1198 stack_imm(nfp_prog)); 1199 emit_alu(nfp_prog, imm_b(nfp_prog), 1200 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 1201 1202 off %= 32; 1203 } else { 1204 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), 1205 stack_imm(nfp_prog)); 1206 1207 emit_alu(nfp_prog, imm_b(nfp_prog), 1208 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 1209 1210 needs_inc = true; 1211 } 1212 if (lm3) { 1213 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); 1214 /* For size < 4 one slot will be filled by zeroing of upper. */ 1215 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); 1216 } 1217 1218 if (clr_gpr && size < 8) 1219 wrp_zext(nfp_prog, meta, gpr); 1220 1221 while (size) { 1222 u32 slice_end; 1223 u8 slice_size; 1224 1225 slice_size = min(size, 4 - gpr_byte); 1226 slice_end = min(off + slice_size, round_up(off + 1, 4)); 1227 slice_size = slice_end - off; 1228 1229 last = slice_size == size; 1230 1231 if (needs_inc) 1232 off %= 4; 1233 1234 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, 1235 first, gpr != prev_gpr, last, lm3, needs_inc); 1236 if (ret) 1237 return ret; 1238 1239 prev_gpr = gpr; 1240 first = false; 1241 1242 gpr_byte += slice_size; 1243 if (gpr_byte >= 4) { 1244 gpr_byte -= 4; 1245 gpr++; 1246 } 1247 1248 size -= slice_size; 1249 off += slice_size; 1250 } 1251 1252 return 0; 1253 } 1254 1255 static void 1256 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) 1257 { 1258 swreg tmp_reg; 1259 1260 if (alu_op == ALU_OP_AND) { 1261 if (!imm) 1262 wrp_immed(nfp_prog, reg_both(dst), 0); 1263 if (!imm || !~imm) 1264 return; 1265 } 1266 if (alu_op == ALU_OP_OR) { 1267 if (!~imm) 1268 wrp_immed(nfp_prog, reg_both(dst), ~0U); 1269 if (!imm || !~imm) 1270 return; 1271 } 1272 if (alu_op == ALU_OP_XOR) { 1273 if (!~imm) 1274 emit_alu(nfp_prog, reg_both(dst), reg_none(), 1275 ALU_OP_NOT, reg_b(dst)); 1276 if (!imm || !~imm) 1277 return; 1278 } 1279 1280 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); 1281 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); 1282 } 1283 1284 static int 1285 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1286 enum alu_op alu_op, bool skip) 1287 { 1288 const struct bpf_insn *insn = &meta->insn; 1289 u64 imm = insn->imm; /* sign extend */ 1290 1291 if (skip) { 1292 meta->flags |= FLAG_INSN_SKIP_NOOP; 1293 return 0; 1294 } 1295 1296 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); 1297 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); 1298 1299 return 0; 1300 } 1301 1302 static int 1303 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1304 enum alu_op alu_op) 1305 { 1306 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1307 1308 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1309 emit_alu(nfp_prog, reg_both(dst + 1), 1310 reg_a(dst + 1), alu_op, reg_b(src + 1)); 1311 1312 return 0; 1313 } 1314 1315 static int 1316 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1317 enum alu_op alu_op) 1318 { 1319 const struct bpf_insn *insn = &meta->insn; 1320 u8 dst = insn->dst_reg * 2; 1321 1322 wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm); 1323 wrp_zext(nfp_prog, meta, dst); 1324 1325 return 0; 1326 } 1327 1328 static int 1329 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1330 enum alu_op alu_op) 1331 { 1332 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1333 1334 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1335 wrp_zext(nfp_prog, meta, dst); 1336 1337 return 0; 1338 } 1339 1340 static void 1341 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, 1342 enum br_mask br_mask, u16 off) 1343 { 1344 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); 1345 emit_br(nfp_prog, br_mask, off, 0); 1346 } 1347 1348 static int 1349 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1350 enum alu_op alu_op, enum br_mask br_mask) 1351 { 1352 const struct bpf_insn *insn = &meta->insn; 1353 1354 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, 1355 insn->src_reg * 2, br_mask, insn->off); 1356 if (is_mbpf_jmp64(meta)) 1357 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, 1358 insn->src_reg * 2 + 1, br_mask, insn->off); 1359 1360 return 0; 1361 } 1362 1363 static const struct jmp_code_map { 1364 enum br_mask br_mask; 1365 bool swap; 1366 } jmp_code_map[] = { 1367 [BPF_JGT >> 4] = { BR_BLO, true }, 1368 [BPF_JGE >> 4] = { BR_BHS, false }, 1369 [BPF_JLT >> 4] = { BR_BLO, false }, 1370 [BPF_JLE >> 4] = { BR_BHS, true }, 1371 [BPF_JSGT >> 4] = { BR_BLT, true }, 1372 [BPF_JSGE >> 4] = { BR_BGE, false }, 1373 [BPF_JSLT >> 4] = { BR_BLT, false }, 1374 [BPF_JSLE >> 4] = { BR_BGE, true }, 1375 }; 1376 1377 static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta) 1378 { 1379 unsigned int op; 1380 1381 op = BPF_OP(meta->insn.code) >> 4; 1382 /* br_mask of 0 is BR_BEQ which we don't use in jump code table */ 1383 if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) || 1384 !jmp_code_map[op].br_mask, 1385 "no code found for jump instruction")) 1386 return NULL; 1387 1388 return &jmp_code_map[op]; 1389 } 1390 1391 static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1392 { 1393 const struct bpf_insn *insn = &meta->insn; 1394 u64 imm = insn->imm; /* sign extend */ 1395 const struct jmp_code_map *code; 1396 enum alu_op alu_op, carry_op; 1397 u8 reg = insn->dst_reg * 2; 1398 swreg tmp_reg; 1399 1400 code = nfp_jmp_code_get(meta); 1401 if (!code) 1402 return -EINVAL; 1403 1404 alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB; 1405 carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C; 1406 1407 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1408 if (!code->swap) 1409 emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg); 1410 else 1411 emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); 1412 1413 if (is_mbpf_jmp64(meta)) { 1414 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1415 if (!code->swap) 1416 emit_alu(nfp_prog, reg_none(), 1417 reg_a(reg + 1), carry_op, tmp_reg); 1418 else 1419 emit_alu(nfp_prog, reg_none(), 1420 tmp_reg, carry_op, reg_a(reg + 1)); 1421 } 1422 1423 emit_br(nfp_prog, code->br_mask, insn->off, 0); 1424 1425 return 0; 1426 } 1427 1428 static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1429 { 1430 const struct bpf_insn *insn = &meta->insn; 1431 const struct jmp_code_map *code; 1432 u8 areg, breg; 1433 1434 code = nfp_jmp_code_get(meta); 1435 if (!code) 1436 return -EINVAL; 1437 1438 areg = insn->dst_reg * 2; 1439 breg = insn->src_reg * 2; 1440 1441 if (code->swap) { 1442 areg ^= breg; 1443 breg ^= areg; 1444 areg ^= breg; 1445 } 1446 1447 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); 1448 if (is_mbpf_jmp64(meta)) 1449 emit_alu(nfp_prog, reg_none(), 1450 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); 1451 emit_br(nfp_prog, code->br_mask, insn->off, 0); 1452 1453 return 0; 1454 } 1455 1456 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) 1457 { 1458 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, 1459 SHF_SC_R_ROT, 8); 1460 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), 1461 SHF_SC_R_ROT, 16); 1462 } 1463 1464 static void 1465 wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, 1466 swreg rreg, bool gen_high_half) 1467 { 1468 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); 1469 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); 1470 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); 1471 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); 1472 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); 1473 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); 1474 if (gen_high_half) 1475 emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, 1476 reg_none()); 1477 else 1478 wrp_immed(nfp_prog, dst_hi, 0); 1479 } 1480 1481 static void 1482 wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, 1483 swreg rreg) 1484 { 1485 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); 1486 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); 1487 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); 1488 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); 1489 } 1490 1491 static int 1492 wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1493 bool gen_high_half, bool ropnd_from_reg) 1494 { 1495 swreg multiplier, multiplicand, dst_hi, dst_lo; 1496 const struct bpf_insn *insn = &meta->insn; 1497 u32 lopnd_max, ropnd_max; 1498 u8 dst_reg; 1499 1500 dst_reg = insn->dst_reg; 1501 multiplicand = reg_a(dst_reg * 2); 1502 dst_hi = reg_both(dst_reg * 2 + 1); 1503 dst_lo = reg_both(dst_reg * 2); 1504 lopnd_max = meta->umax_dst; 1505 if (ropnd_from_reg) { 1506 multiplier = reg_b(insn->src_reg * 2); 1507 ropnd_max = meta->umax_src; 1508 } else { 1509 u32 imm = insn->imm; 1510 1511 multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); 1512 ropnd_max = imm; 1513 } 1514 if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) 1515 wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier, 1516 gen_high_half); 1517 else 1518 wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier); 1519 1520 return 0; 1521 } 1522 1523 static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) 1524 { 1525 swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); 1526 struct reciprocal_value_adv rvalue; 1527 u8 pre_shift, exp; 1528 swreg magic; 1529 1530 if (imm > U32_MAX) { 1531 wrp_immed(nfp_prog, dst_both, 0); 1532 return 0; 1533 } 1534 1535 /* NOTE: because we are using "reciprocal_value_adv" which doesn't 1536 * support "divisor > (1u << 31)", we need to JIT separate NFP sequence 1537 * to handle such case which actually equals to the result of unsigned 1538 * comparison "dst >= imm" which could be calculated using the following 1539 * NFP sequence: 1540 * 1541 * alu[--, dst, -, imm] 1542 * immed[imm, 0] 1543 * alu[dst, imm, +carry, 0] 1544 * 1545 */ 1546 if (imm > 1U << 31) { 1547 swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); 1548 1549 emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b); 1550 wrp_immed(nfp_prog, imm_a(nfp_prog), 0); 1551 emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C, 1552 reg_imm(0)); 1553 return 0; 1554 } 1555 1556 rvalue = reciprocal_value_adv(imm, 32); 1557 exp = rvalue.exp; 1558 if (rvalue.is_wide_m && !(imm & 1)) { 1559 pre_shift = fls(imm & -imm) - 1; 1560 rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift); 1561 } else { 1562 pre_shift = 0; 1563 } 1564 magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); 1565 if (imm == 1U << exp) { 1566 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, 1567 SHF_SC_R_SHF, exp); 1568 } else if (rvalue.is_wide_m) { 1569 wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, 1570 magic, true); 1571 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, 1572 imm_b(nfp_prog)); 1573 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, 1574 SHF_SC_R_SHF, 1); 1575 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, 1576 imm_b(nfp_prog)); 1577 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, 1578 SHF_SC_R_SHF, rvalue.sh - 1); 1579 } else { 1580 if (pre_shift) 1581 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, 1582 dst_b, SHF_SC_R_SHF, pre_shift); 1583 wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true); 1584 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, 1585 dst_b, SHF_SC_R_SHF, rvalue.sh); 1586 } 1587 1588 return 0; 1589 } 1590 1591 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1592 { 1593 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); 1594 struct nfp_bpf_cap_adjust_head *adjust_head; 1595 u32 ret_einval, end; 1596 1597 adjust_head = &nfp_prog->bpf->adjust_head; 1598 1599 /* Optimized version - 5 vs 14 cycles */ 1600 if (nfp_prog->adjust_head_location != UINT_MAX) { 1601 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) 1602 return -EINVAL; 1603 1604 emit_alu(nfp_prog, pptr_reg(nfp_prog), 1605 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog)); 1606 emit_alu(nfp_prog, plen_reg(nfp_prog), 1607 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1608 emit_alu(nfp_prog, pv_len(nfp_prog), 1609 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1610 1611 wrp_immed(nfp_prog, reg_both(0), 0); 1612 wrp_immed(nfp_prog, reg_both(1), 0); 1613 1614 /* TODO: when adjust head is guaranteed to succeed we can 1615 * also eliminate the following if (r0 == 0) branch. 1616 */ 1617 1618 return 0; 1619 } 1620 1621 ret_einval = nfp_prog_current_offset(nfp_prog) + 14; 1622 end = ret_einval + 2; 1623 1624 /* We need to use a temp because offset is just a part of the pkt ptr */ 1625 emit_alu(nfp_prog, tmp, 1626 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); 1627 1628 /* Validate result will fit within FW datapath constraints */ 1629 emit_alu(nfp_prog, reg_none(), 1630 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); 1631 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1632 emit_alu(nfp_prog, reg_none(), 1633 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); 1634 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1635 1636 /* Validate the length is at least ETH_HLEN */ 1637 emit_alu(nfp_prog, tmp_len, 1638 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1639 emit_alu(nfp_prog, reg_none(), 1640 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); 1641 emit_br(nfp_prog, BR_BMI, ret_einval, 0); 1642 1643 /* Load the ret code */ 1644 wrp_immed(nfp_prog, reg_both(0), 0); 1645 wrp_immed(nfp_prog, reg_both(1), 0); 1646 1647 /* Modify the packet metadata */ 1648 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); 1649 1650 /* Skip over the -EINVAL ret code (defer 2) */ 1651 emit_br(nfp_prog, BR_UNC, end, 2); 1652 1653 emit_alu(nfp_prog, plen_reg(nfp_prog), 1654 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1655 emit_alu(nfp_prog, pv_len(nfp_prog), 1656 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1657 1658 /* return -EINVAL target */ 1659 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) 1660 return -EINVAL; 1661 1662 wrp_immed(nfp_prog, reg_both(0), -22); 1663 wrp_immed(nfp_prog, reg_both(1), ~0); 1664 1665 if (!nfp_prog_confirm_current_offset(nfp_prog, end)) 1666 return -EINVAL; 1667 1668 return 0; 1669 } 1670 1671 static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1672 { 1673 u32 ret_einval, end; 1674 swreg plen, delta; 1675 1676 BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); 1677 1678 plen = imm_a(nfp_prog); 1679 delta = reg_a(2 * 2); 1680 1681 ret_einval = nfp_prog_current_offset(nfp_prog) + 9; 1682 end = nfp_prog_current_offset(nfp_prog) + 11; 1683 1684 /* Calculate resulting length */ 1685 emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); 1686 /* delta == 0 is not allowed by the kernel, add must overflow to make 1687 * length smaller. 1688 */ 1689 emit_br(nfp_prog, BR_BCC, ret_einval, 0); 1690 1691 /* if (new_len < 14) then -EINVAL */ 1692 emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); 1693 emit_br(nfp_prog, BR_BMI, ret_einval, 0); 1694 1695 emit_alu(nfp_prog, plen_reg(nfp_prog), 1696 plen_reg(nfp_prog), ALU_OP_ADD, delta); 1697 emit_alu(nfp_prog, pv_len(nfp_prog), 1698 pv_len(nfp_prog), ALU_OP_ADD, delta); 1699 1700 emit_br(nfp_prog, BR_UNC, end, 2); 1701 wrp_immed(nfp_prog, reg_both(0), 0); 1702 wrp_immed(nfp_prog, reg_both(1), 0); 1703 1704 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) 1705 return -EINVAL; 1706 1707 wrp_immed(nfp_prog, reg_both(0), -22); 1708 wrp_immed(nfp_prog, reg_both(1), ~0); 1709 1710 if (!nfp_prog_confirm_current_offset(nfp_prog, end)) 1711 return -EINVAL; 1712 1713 return 0; 1714 } 1715 1716 static int 1717 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1718 { 1719 bool load_lm_ptr; 1720 u32 ret_tgt; 1721 s64 lm_off; 1722 1723 /* We only have to reload LM0 if the key is not at start of stack */ 1724 lm_off = nfp_prog->stack_frame_depth; 1725 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; 1726 load_lm_ptr = meta->arg2.var_off || lm_off; 1727 1728 /* Set LM0 to start of key */ 1729 if (load_lm_ptr) 1730 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); 1731 if (meta->func_id == BPF_FUNC_map_update_elem) 1732 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); 1733 1734 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, 1735 2, RELO_BR_HELPER); 1736 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; 1737 1738 /* Load map ID into A0 */ 1739 wrp_mov(nfp_prog, reg_a(0), reg_a(2)); 1740 1741 /* Load the return address into B0 */ 1742 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); 1743 1744 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) 1745 return -EINVAL; 1746 1747 /* Reset the LM0 pointer */ 1748 if (!load_lm_ptr) 1749 return 0; 1750 1751 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); 1752 wrp_nops(nfp_prog, 3); 1753 1754 return 0; 1755 } 1756 1757 static int 1758 nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1759 { 1760 __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM); 1761 /* CSR value is read in following immed[gpr, 0] */ 1762 emit_immed(nfp_prog, reg_both(0), 0, 1763 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); 1764 emit_immed(nfp_prog, reg_both(1), 0, 1765 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); 1766 return 0; 1767 } 1768 1769 static int 1770 nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1771 { 1772 swreg ptr_type; 1773 u32 ret_tgt; 1774 1775 ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog)); 1776 1777 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; 1778 1779 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, 1780 2, RELO_BR_HELPER); 1781 1782 /* Load ptr type into A1 */ 1783 wrp_mov(nfp_prog, reg_a(1), ptr_type); 1784 1785 /* Load the return address into B0 */ 1786 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); 1787 1788 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) 1789 return -EINVAL; 1790 1791 return 0; 1792 } 1793 1794 static int 1795 nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1796 { 1797 u32 jmp_tgt; 1798 1799 jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5; 1800 1801 /* Make sure the queue id fits into FW field */ 1802 emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2), 1803 ALU_OP_AND_NOT_B, reg_imm(0xff)); 1804 emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2); 1805 1806 /* Set the 'queue selected' bit and the queue value */ 1807 emit_shf(nfp_prog, pv_qsel_set(nfp_prog), 1808 pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1), 1809 SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT); 1810 emit_ld_field(nfp_prog, 1811 pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2), 1812 SHF_SC_NONE, 0); 1813 /* Delay slots end here, we will jump over next instruction if queue 1814 * value fits into the field. 1815 */ 1816 emit_ld_field(nfp_prog, 1817 pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX), 1818 SHF_SC_NONE, 0); 1819 1820 if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt)) 1821 return -EINVAL; 1822 1823 return 0; 1824 } 1825 1826 /* --- Callbacks --- */ 1827 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1828 { 1829 const struct bpf_insn *insn = &meta->insn; 1830 u8 dst = insn->dst_reg * 2; 1831 u8 src = insn->src_reg * 2; 1832 1833 if (insn->src_reg == BPF_REG_10) { 1834 swreg stack_depth_reg; 1835 1836 stack_depth_reg = ur_load_imm_any(nfp_prog, 1837 nfp_prog->stack_frame_depth, 1838 stack_imm(nfp_prog)); 1839 emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog), 1840 ALU_OP_ADD, stack_depth_reg); 1841 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1842 } else { 1843 wrp_reg_mov(nfp_prog, dst, src); 1844 wrp_reg_mov(nfp_prog, dst + 1, src + 1); 1845 } 1846 1847 return 0; 1848 } 1849 1850 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1851 { 1852 u64 imm = meta->insn.imm; /* sign extend */ 1853 1854 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); 1855 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); 1856 1857 return 0; 1858 } 1859 1860 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1861 { 1862 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); 1863 } 1864 1865 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1866 { 1867 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); 1868 } 1869 1870 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1871 { 1872 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); 1873 } 1874 1875 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1876 { 1877 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1878 } 1879 1880 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1881 { 1882 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); 1883 } 1884 1885 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1886 { 1887 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1888 } 1889 1890 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1891 { 1892 const struct bpf_insn *insn = &meta->insn; 1893 1894 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1895 reg_a(insn->dst_reg * 2), ALU_OP_ADD, 1896 reg_b(insn->src_reg * 2)); 1897 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1898 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, 1899 reg_b(insn->src_reg * 2 + 1)); 1900 1901 return 0; 1902 } 1903 1904 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1905 { 1906 const struct bpf_insn *insn = &meta->insn; 1907 u64 imm = insn->imm; /* sign extend */ 1908 1909 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); 1910 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); 1911 1912 return 0; 1913 } 1914 1915 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1916 { 1917 const struct bpf_insn *insn = &meta->insn; 1918 1919 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1920 reg_a(insn->dst_reg * 2), ALU_OP_SUB, 1921 reg_b(insn->src_reg * 2)); 1922 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1923 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, 1924 reg_b(insn->src_reg * 2 + 1)); 1925 1926 return 0; 1927 } 1928 1929 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1930 { 1931 const struct bpf_insn *insn = &meta->insn; 1932 u64 imm = insn->imm; /* sign extend */ 1933 1934 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); 1935 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); 1936 1937 return 0; 1938 } 1939 1940 static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1941 { 1942 return wrp_mul(nfp_prog, meta, true, true); 1943 } 1944 1945 static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1946 { 1947 return wrp_mul(nfp_prog, meta, true, false); 1948 } 1949 1950 static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1951 { 1952 const struct bpf_insn *insn = &meta->insn; 1953 1954 return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm); 1955 } 1956 1957 static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1958 { 1959 /* NOTE: verifier hook has rejected cases for which verifier doesn't 1960 * know whether the source operand is constant or not. 1961 */ 1962 return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src); 1963 } 1964 1965 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1966 { 1967 const struct bpf_insn *insn = &meta->insn; 1968 1969 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), 1970 ALU_OP_SUB, reg_b(insn->dst_reg * 2)); 1971 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), 1972 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); 1973 1974 return 0; 1975 } 1976 1977 /* Pseudo code: 1978 * if shift_amt >= 32 1979 * dst_high = dst_low << shift_amt[4:0] 1980 * dst_low = 0; 1981 * else 1982 * dst_high = (dst_high, dst_low) >> (32 - shift_amt) 1983 * dst_low = dst_low << shift_amt 1984 * 1985 * The indirect shift will use the same logic at runtime. 1986 */ 1987 static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) 1988 { 1989 if (!shift_amt) 1990 return 0; 1991 1992 if (shift_amt < 32) { 1993 emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), 1994 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, 1995 32 - shift_amt); 1996 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 1997 reg_b(dst), SHF_SC_L_SHF, shift_amt); 1998 } else if (shift_amt == 32) { 1999 wrp_reg_mov(nfp_prog, dst + 1, dst); 2000 wrp_immed(nfp_prog, reg_both(dst), 0); 2001 } else if (shift_amt > 32) { 2002 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, 2003 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32); 2004 wrp_immed(nfp_prog, reg_both(dst), 0); 2005 } 2006 2007 return 0; 2008 } 2009 2010 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2011 { 2012 const struct bpf_insn *insn = &meta->insn; 2013 u8 dst = insn->dst_reg * 2; 2014 2015 return __shl_imm64(nfp_prog, dst, insn->imm); 2016 } 2017 2018 static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2019 { 2020 emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB, 2021 reg_b(src)); 2022 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0)); 2023 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, 2024 reg_b(dst), SHF_SC_R_DSHF); 2025 } 2026 2027 /* NOTE: for indirect left shift, HIGH part should be calculated first. */ 2028 static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2029 { 2030 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2031 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2032 reg_b(dst), SHF_SC_L_SHF); 2033 } 2034 2035 static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2036 { 2037 shl_reg64_lt32_high(nfp_prog, dst, src); 2038 shl_reg64_lt32_low(nfp_prog, dst, src); 2039 } 2040 2041 static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2042 { 2043 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2044 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, 2045 reg_b(dst), SHF_SC_L_SHF); 2046 wrp_immed(nfp_prog, reg_both(dst), 0); 2047 } 2048 2049 static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2050 { 2051 const struct bpf_insn *insn = &meta->insn; 2052 u64 umin, umax; 2053 u8 dst, src; 2054 2055 dst = insn->dst_reg * 2; 2056 umin = meta->umin_src; 2057 umax = meta->umax_src; 2058 if (umin == umax) 2059 return __shl_imm64(nfp_prog, dst, umin); 2060 2061 src = insn->src_reg * 2; 2062 if (umax < 32) { 2063 shl_reg64_lt32(nfp_prog, dst, src); 2064 } else if (umin >= 32) { 2065 shl_reg64_ge32(nfp_prog, dst, src); 2066 } else { 2067 /* Generate different instruction sequences depending on runtime 2068 * value of shift amount. 2069 */ 2070 u16 label_ge32, label_end; 2071 2072 label_ge32 = nfp_prog_current_offset(nfp_prog) + 7; 2073 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); 2074 2075 shl_reg64_lt32_high(nfp_prog, dst, src); 2076 label_end = nfp_prog_current_offset(nfp_prog) + 6; 2077 emit_br(nfp_prog, BR_UNC, label_end, 2); 2078 /* shl_reg64_lt32_low packed in delay slot. */ 2079 shl_reg64_lt32_low(nfp_prog, dst, src); 2080 2081 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) 2082 return -EINVAL; 2083 shl_reg64_ge32(nfp_prog, dst, src); 2084 2085 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) 2086 return -EINVAL; 2087 } 2088 2089 return 0; 2090 } 2091 2092 /* Pseudo code: 2093 * if shift_amt >= 32 2094 * dst_high = 0; 2095 * dst_low = dst_high >> shift_amt[4:0] 2096 * else 2097 * dst_high = dst_high >> shift_amt 2098 * dst_low = (dst_high, dst_low) >> shift_amt 2099 * 2100 * The indirect shift will use the same logic at runtime. 2101 */ 2102 static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) 2103 { 2104 if (!shift_amt) 2105 return 0; 2106 2107 if (shift_amt < 32) { 2108 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, 2109 reg_b(dst), SHF_SC_R_DSHF, shift_amt); 2110 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, 2111 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); 2112 } else if (shift_amt == 32) { 2113 wrp_reg_mov(nfp_prog, dst, dst + 1); 2114 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2115 } else if (shift_amt > 32) { 2116 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2117 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); 2118 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2119 } 2120 2121 return 0; 2122 } 2123 2124 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2125 { 2126 const struct bpf_insn *insn = &meta->insn; 2127 u8 dst = insn->dst_reg * 2; 2128 2129 return __shr_imm64(nfp_prog, dst, insn->imm); 2130 } 2131 2132 /* NOTE: for indirect right shift, LOW part should be calculated first. */ 2133 static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2134 { 2135 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2136 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, 2137 reg_b(dst + 1), SHF_SC_R_SHF); 2138 } 2139 2140 static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2141 { 2142 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2143 emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, 2144 reg_b(dst), SHF_SC_R_DSHF); 2145 } 2146 2147 static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2148 { 2149 shr_reg64_lt32_low(nfp_prog, dst, src); 2150 shr_reg64_lt32_high(nfp_prog, dst, src); 2151 } 2152 2153 static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2154 { 2155 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2156 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2157 reg_b(dst + 1), SHF_SC_R_SHF); 2158 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 2159 } 2160 2161 static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2162 { 2163 const struct bpf_insn *insn = &meta->insn; 2164 u64 umin, umax; 2165 u8 dst, src; 2166 2167 dst = insn->dst_reg * 2; 2168 umin = meta->umin_src; 2169 umax = meta->umax_src; 2170 if (umin == umax) 2171 return __shr_imm64(nfp_prog, dst, umin); 2172 2173 src = insn->src_reg * 2; 2174 if (umax < 32) { 2175 shr_reg64_lt32(nfp_prog, dst, src); 2176 } else if (umin >= 32) { 2177 shr_reg64_ge32(nfp_prog, dst, src); 2178 } else { 2179 /* Generate different instruction sequences depending on runtime 2180 * value of shift amount. 2181 */ 2182 u16 label_ge32, label_end; 2183 2184 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; 2185 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); 2186 shr_reg64_lt32_low(nfp_prog, dst, src); 2187 label_end = nfp_prog_current_offset(nfp_prog) + 6; 2188 emit_br(nfp_prog, BR_UNC, label_end, 2); 2189 /* shr_reg64_lt32_high packed in delay slot. */ 2190 shr_reg64_lt32_high(nfp_prog, dst, src); 2191 2192 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) 2193 return -EINVAL; 2194 shr_reg64_ge32(nfp_prog, dst, src); 2195 2196 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) 2197 return -EINVAL; 2198 } 2199 2200 return 0; 2201 } 2202 2203 /* Code logic is the same as __shr_imm64 except ashr requires signedness bit 2204 * told through PREV_ALU result. 2205 */ 2206 static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) 2207 { 2208 if (!shift_amt) 2209 return 0; 2210 2211 if (shift_amt < 32) { 2212 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, 2213 reg_b(dst), SHF_SC_R_DSHF, shift_amt); 2214 /* Set signedness bit. */ 2215 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, 2216 reg_imm(0)); 2217 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, 2218 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); 2219 } else if (shift_amt == 32) { 2220 /* NOTE: this also helps setting signedness bit. */ 2221 wrp_reg_mov(nfp_prog, dst, dst + 1); 2222 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, 2223 reg_b(dst + 1), SHF_SC_R_SHF, 31); 2224 } else if (shift_amt > 32) { 2225 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, 2226 reg_imm(0)); 2227 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, 2228 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); 2229 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, 2230 reg_b(dst + 1), SHF_SC_R_SHF, 31); 2231 } 2232 2233 return 0; 2234 } 2235 2236 static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2237 { 2238 const struct bpf_insn *insn = &meta->insn; 2239 u8 dst = insn->dst_reg * 2; 2240 2241 return __ashr_imm64(nfp_prog, dst, insn->imm); 2242 } 2243 2244 static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2245 { 2246 /* NOTE: the first insn will set both indirect shift amount (source A) 2247 * and signedness bit (MSB of result). 2248 */ 2249 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); 2250 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, 2251 reg_b(dst + 1), SHF_SC_R_SHF); 2252 } 2253 2254 static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2255 { 2256 /* NOTE: it is the same as logic shift because we don't need to shift in 2257 * signedness bit when the shift amount is less than 32. 2258 */ 2259 return shr_reg64_lt32_low(nfp_prog, dst, src); 2260 } 2261 2262 static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2263 { 2264 ashr_reg64_lt32_low(nfp_prog, dst, src); 2265 ashr_reg64_lt32_high(nfp_prog, dst, src); 2266 } 2267 2268 static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) 2269 { 2270 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); 2271 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, 2272 reg_b(dst + 1), SHF_SC_R_SHF); 2273 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, 2274 reg_b(dst + 1), SHF_SC_R_SHF, 31); 2275 } 2276 2277 /* Like ashr_imm64, but need to use indirect shift. */ 2278 static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2279 { 2280 const struct bpf_insn *insn = &meta->insn; 2281 u64 umin, umax; 2282 u8 dst, src; 2283 2284 dst = insn->dst_reg * 2; 2285 umin = meta->umin_src; 2286 umax = meta->umax_src; 2287 if (umin == umax) 2288 return __ashr_imm64(nfp_prog, dst, umin); 2289 2290 src = insn->src_reg * 2; 2291 if (umax < 32) { 2292 ashr_reg64_lt32(nfp_prog, dst, src); 2293 } else if (umin >= 32) { 2294 ashr_reg64_ge32(nfp_prog, dst, src); 2295 } else { 2296 u16 label_ge32, label_end; 2297 2298 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; 2299 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); 2300 ashr_reg64_lt32_low(nfp_prog, dst, src); 2301 label_end = nfp_prog_current_offset(nfp_prog) + 6; 2302 emit_br(nfp_prog, BR_UNC, label_end, 2); 2303 /* ashr_reg64_lt32_high packed in delay slot. */ 2304 ashr_reg64_lt32_high(nfp_prog, dst, src); 2305 2306 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) 2307 return -EINVAL; 2308 ashr_reg64_ge32(nfp_prog, dst, src); 2309 2310 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) 2311 return -EINVAL; 2312 } 2313 2314 return 0; 2315 } 2316 2317 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2318 { 2319 const struct bpf_insn *insn = &meta->insn; 2320 2321 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); 2322 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 2323 2324 return 0; 2325 } 2326 2327 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2328 { 2329 const struct bpf_insn *insn = &meta->insn; 2330 2331 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); 2332 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 2333 2334 return 0; 2335 } 2336 2337 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2338 { 2339 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); 2340 } 2341 2342 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2343 { 2344 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR); 2345 } 2346 2347 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2348 { 2349 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); 2350 } 2351 2352 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2353 { 2354 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND); 2355 } 2356 2357 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2358 { 2359 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); 2360 } 2361 2362 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2363 { 2364 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR); 2365 } 2366 2367 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2368 { 2369 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); 2370 } 2371 2372 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2373 { 2374 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD); 2375 } 2376 2377 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2378 { 2379 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); 2380 } 2381 2382 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2383 { 2384 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB); 2385 } 2386 2387 static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2388 { 2389 return wrp_mul(nfp_prog, meta, false, true); 2390 } 2391 2392 static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2393 { 2394 return wrp_mul(nfp_prog, meta, false, false); 2395 } 2396 2397 static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2398 { 2399 return div_reg64(nfp_prog, meta); 2400 } 2401 2402 static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2403 { 2404 return div_imm64(nfp_prog, meta); 2405 } 2406 2407 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2408 { 2409 u8 dst = meta->insn.dst_reg * 2; 2410 2411 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); 2412 wrp_zext(nfp_prog, meta, dst); 2413 2414 return 0; 2415 } 2416 2417 static int 2418 __ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, 2419 u8 shift_amt) 2420 { 2421 if (shift_amt) { 2422 /* Set signedness bit (MSB of result). */ 2423 emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, 2424 reg_imm(0)); 2425 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, 2426 reg_b(dst), SHF_SC_R_SHF, shift_amt); 2427 } 2428 wrp_zext(nfp_prog, meta, dst); 2429 2430 return 0; 2431 } 2432 2433 static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2434 { 2435 const struct bpf_insn *insn = &meta->insn; 2436 u64 umin, umax; 2437 u8 dst, src; 2438 2439 dst = insn->dst_reg * 2; 2440 umin = meta->umin_src; 2441 umax = meta->umax_src; 2442 if (umin == umax) 2443 return __ashr_imm(nfp_prog, meta, dst, umin); 2444 2445 src = insn->src_reg * 2; 2446 /* NOTE: the first insn will set both indirect shift amount (source A) 2447 * and signedness bit (MSB of result). 2448 */ 2449 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst)); 2450 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, 2451 reg_b(dst), SHF_SC_R_SHF); 2452 wrp_zext(nfp_prog, meta, dst); 2453 2454 return 0; 2455 } 2456 2457 static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2458 { 2459 const struct bpf_insn *insn = &meta->insn; 2460 u8 dst = insn->dst_reg * 2; 2461 2462 return __ashr_imm(nfp_prog, meta, dst, insn->imm); 2463 } 2464 2465 static int 2466 __shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, 2467 u8 shift_amt) 2468 { 2469 if (shift_amt) 2470 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2471 reg_b(dst), SHF_SC_R_SHF, shift_amt); 2472 wrp_zext(nfp_prog, meta, dst); 2473 return 0; 2474 } 2475 2476 static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2477 { 2478 const struct bpf_insn *insn = &meta->insn; 2479 u8 dst = insn->dst_reg * 2; 2480 2481 return __shr_imm(nfp_prog, meta, dst, insn->imm); 2482 } 2483 2484 static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2485 { 2486 const struct bpf_insn *insn = &meta->insn; 2487 u64 umin, umax; 2488 u8 dst, src; 2489 2490 dst = insn->dst_reg * 2; 2491 umin = meta->umin_src; 2492 umax = meta->umax_src; 2493 if (umin == umax) 2494 return __shr_imm(nfp_prog, meta, dst, umin); 2495 2496 src = insn->src_reg * 2; 2497 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); 2498 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2499 reg_b(dst), SHF_SC_R_SHF); 2500 wrp_zext(nfp_prog, meta, dst); 2501 return 0; 2502 } 2503 2504 static int 2505 __shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, 2506 u8 shift_amt) 2507 { 2508 if (shift_amt) 2509 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, 2510 reg_b(dst), SHF_SC_L_SHF, shift_amt); 2511 wrp_zext(nfp_prog, meta, dst); 2512 return 0; 2513 } 2514 2515 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2516 { 2517 const struct bpf_insn *insn = &meta->insn; 2518 u8 dst = insn->dst_reg * 2; 2519 2520 return __shl_imm(nfp_prog, meta, dst, insn->imm); 2521 } 2522 2523 static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2524 { 2525 const struct bpf_insn *insn = &meta->insn; 2526 u64 umin, umax; 2527 u8 dst, src; 2528 2529 dst = insn->dst_reg * 2; 2530 umin = meta->umin_src; 2531 umax = meta->umax_src; 2532 if (umin == umax) 2533 return __shl_imm(nfp_prog, meta, dst, umin); 2534 2535 src = insn->src_reg * 2; 2536 shl_reg64_lt32_low(nfp_prog, dst, src); 2537 wrp_zext(nfp_prog, meta, dst); 2538 return 0; 2539 } 2540 2541 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2542 { 2543 const struct bpf_insn *insn = &meta->insn; 2544 u8 gpr = insn->dst_reg * 2; 2545 2546 switch (insn->imm) { 2547 case 16: 2548 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), 2549 SHF_SC_R_ROT, 8); 2550 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), 2551 SHF_SC_R_SHF, 16); 2552 2553 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 2554 break; 2555 case 32: 2556 wrp_end32(nfp_prog, reg_a(gpr), gpr); 2557 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 2558 break; 2559 case 64: 2560 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); 2561 2562 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); 2563 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); 2564 break; 2565 } 2566 2567 return 0; 2568 } 2569 2570 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2571 { 2572 struct nfp_insn_meta *prev = nfp_meta_prev(meta); 2573 u32 imm_lo, imm_hi; 2574 u8 dst; 2575 2576 dst = prev->insn.dst_reg * 2; 2577 imm_lo = prev->insn.imm; 2578 imm_hi = meta->insn.imm; 2579 2580 wrp_immed(nfp_prog, reg_both(dst), imm_lo); 2581 2582 /* mov is always 1 insn, load imm may be two, so try to use mov */ 2583 if (imm_hi == imm_lo) 2584 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); 2585 else 2586 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); 2587 2588 return 0; 2589 } 2590 2591 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2592 { 2593 meta->double_cb = imm_ld8_part2; 2594 return 0; 2595 } 2596 2597 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2598 { 2599 return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1); 2600 } 2601 2602 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2603 { 2604 return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2); 2605 } 2606 2607 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2608 { 2609 return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4); 2610 } 2611 2612 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2613 { 2614 return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, 2615 meta->insn.src_reg * 2, 1); 2616 } 2617 2618 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2619 { 2620 return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, 2621 meta->insn.src_reg * 2, 2); 2622 } 2623 2624 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2625 { 2626 return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, 2627 meta->insn.src_reg * 2, 4); 2628 } 2629 2630 static int 2631 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2632 unsigned int size, unsigned int ptr_off) 2633 { 2634 return mem_op_stack(nfp_prog, meta, size, ptr_off, 2635 meta->insn.dst_reg * 2, meta->insn.src_reg * 2, 2636 true, wrp_lmem_load); 2637 } 2638 2639 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2640 u8 size) 2641 { 2642 swreg dst = reg_both(meta->insn.dst_reg * 2); 2643 2644 switch (meta->insn.off) { 2645 case offsetof(struct __sk_buff, len): 2646 if (size != FIELD_SIZEOF(struct __sk_buff, len)) 2647 return -EOPNOTSUPP; 2648 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); 2649 break; 2650 case offsetof(struct __sk_buff, data): 2651 if (size != FIELD_SIZEOF(struct __sk_buff, data)) 2652 return -EOPNOTSUPP; 2653 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 2654 break; 2655 case offsetof(struct __sk_buff, data_end): 2656 if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) 2657 return -EOPNOTSUPP; 2658 emit_alu(nfp_prog, dst, 2659 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 2660 break; 2661 default: 2662 return -EOPNOTSUPP; 2663 } 2664 2665 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 2666 2667 return 0; 2668 } 2669 2670 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2671 u8 size) 2672 { 2673 swreg dst = reg_both(meta->insn.dst_reg * 2); 2674 2675 switch (meta->insn.off) { 2676 case offsetof(struct xdp_md, data): 2677 if (size != FIELD_SIZEOF(struct xdp_md, data)) 2678 return -EOPNOTSUPP; 2679 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 2680 break; 2681 case offsetof(struct xdp_md, data_end): 2682 if (size != FIELD_SIZEOF(struct xdp_md, data_end)) 2683 return -EOPNOTSUPP; 2684 emit_alu(nfp_prog, dst, 2685 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 2686 break; 2687 default: 2688 return -EOPNOTSUPP; 2689 } 2690 2691 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 2692 2693 return 0; 2694 } 2695 2696 static int 2697 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2698 unsigned int size) 2699 { 2700 swreg tmp_reg; 2701 2702 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2703 2704 return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2, 2705 tmp_reg, meta->insn.dst_reg * 2, size); 2706 } 2707 2708 static int 2709 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2710 unsigned int size) 2711 { 2712 swreg tmp_reg; 2713 2714 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2715 2716 return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2, 2717 tmp_reg, meta->insn.dst_reg * 2, size); 2718 } 2719 2720 static void 2721 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, 2722 struct nfp_insn_meta *meta) 2723 { 2724 s16 range_start = meta->pkt_cache.range_start; 2725 s16 range_end = meta->pkt_cache.range_end; 2726 swreg src_base, off; 2727 u8 xfer_num, len; 2728 bool indir; 2729 2730 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog)); 2731 src_base = reg_a(meta->insn.src_reg * 2); 2732 len = range_end - range_start; 2733 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; 2734 2735 indir = len > 8 * REG_WIDTH; 2736 /* Setup PREV_ALU for indirect mode. */ 2737 if (indir) 2738 wrp_immed(nfp_prog, reg_none(), 2739 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 2740 2741 /* Cache memory into transfer-in registers. */ 2742 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, 2743 off, xfer_num - 1, CMD_CTX_SWAP, indir); 2744 } 2745 2746 static int 2747 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, 2748 struct nfp_insn_meta *meta, 2749 unsigned int size) 2750 { 2751 s16 range_start = meta->pkt_cache.range_start; 2752 s16 insn_off = meta->insn.off - range_start; 2753 swreg dst_lo, dst_hi, src_lo, src_mid; 2754 u8 dst_gpr = meta->insn.dst_reg * 2; 2755 u8 len_lo = size, len_mid = 0; 2756 u8 idx = insn_off / REG_WIDTH; 2757 u8 off = insn_off % REG_WIDTH; 2758 2759 dst_hi = reg_both(dst_gpr + 1); 2760 dst_lo = reg_both(dst_gpr); 2761 src_lo = reg_xfer(idx); 2762 2763 /* The read length could involve as many as three registers. */ 2764 if (size > REG_WIDTH - off) { 2765 /* Calculate the part in the second register. */ 2766 len_lo = REG_WIDTH - off; 2767 len_mid = size - len_lo; 2768 2769 /* Calculate the part in the third register. */ 2770 if (size > 2 * REG_WIDTH - off) 2771 len_mid = REG_WIDTH; 2772 } 2773 2774 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); 2775 2776 if (!len_mid) { 2777 wrp_zext(nfp_prog, meta, dst_gpr); 2778 return 0; 2779 } 2780 2781 src_mid = reg_xfer(idx + 1); 2782 2783 if (size <= REG_WIDTH) { 2784 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); 2785 wrp_zext(nfp_prog, meta, dst_gpr); 2786 } else { 2787 swreg src_hi = reg_xfer(idx + 2); 2788 2789 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, 2790 REG_WIDTH - len_lo, len_lo); 2791 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo, 2792 REG_WIDTH - len_lo); 2793 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo, 2794 len_lo); 2795 } 2796 2797 return 0; 2798 } 2799 2800 static int 2801 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, 2802 struct nfp_insn_meta *meta, 2803 unsigned int size) 2804 { 2805 swreg dst_lo, dst_hi, src_lo; 2806 u8 dst_gpr, idx; 2807 2808 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; 2809 dst_gpr = meta->insn.dst_reg * 2; 2810 dst_hi = reg_both(dst_gpr + 1); 2811 dst_lo = reg_both(dst_gpr); 2812 src_lo = reg_xfer(idx); 2813 2814 if (size < REG_WIDTH) { 2815 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); 2816 wrp_zext(nfp_prog, meta, dst_gpr); 2817 } else if (size == REG_WIDTH) { 2818 wrp_mov(nfp_prog, dst_lo, src_lo); 2819 wrp_zext(nfp_prog, meta, dst_gpr); 2820 } else { 2821 swreg src_hi = reg_xfer(idx + 1); 2822 2823 wrp_mov(nfp_prog, dst_lo, src_lo); 2824 wrp_mov(nfp_prog, dst_hi, src_hi); 2825 } 2826 2827 return 0; 2828 } 2829 2830 static int 2831 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, 2832 struct nfp_insn_meta *meta, unsigned int size) 2833 { 2834 u8 off = meta->insn.off - meta->pkt_cache.range_start; 2835 2836 if (IS_ALIGNED(off, REG_WIDTH)) 2837 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); 2838 2839 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); 2840 } 2841 2842 static int 2843 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2844 unsigned int size) 2845 { 2846 if (meta->ldst_gather_len) 2847 return nfp_cpp_memcpy(nfp_prog, meta); 2848 2849 if (meta->ptr.type == PTR_TO_CTX) { 2850 if (nfp_prog->type == BPF_PROG_TYPE_XDP) 2851 return mem_ldx_xdp(nfp_prog, meta, size); 2852 else 2853 return mem_ldx_skb(nfp_prog, meta, size); 2854 } 2855 2856 if (meta->ptr.type == PTR_TO_PACKET) { 2857 if (meta->pkt_cache.range_end) { 2858 if (meta->pkt_cache.do_init) 2859 mem_ldx_data_init_pktcache(nfp_prog, meta); 2860 2861 return mem_ldx_data_from_pktcache(nfp_prog, meta, size); 2862 } else { 2863 return mem_ldx_data(nfp_prog, meta, size); 2864 } 2865 } 2866 2867 if (meta->ptr.type == PTR_TO_STACK) 2868 return mem_ldx_stack(nfp_prog, meta, size, 2869 meta->ptr.off + meta->ptr.var_off.value); 2870 2871 if (meta->ptr.type == PTR_TO_MAP_VALUE) 2872 return mem_ldx_emem(nfp_prog, meta, size); 2873 2874 return -EOPNOTSUPP; 2875 } 2876 2877 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2878 { 2879 return mem_ldx(nfp_prog, meta, 1); 2880 } 2881 2882 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2883 { 2884 return mem_ldx(nfp_prog, meta, 2); 2885 } 2886 2887 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2888 { 2889 return mem_ldx(nfp_prog, meta, 4); 2890 } 2891 2892 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2893 { 2894 return mem_ldx(nfp_prog, meta, 8); 2895 } 2896 2897 static int 2898 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2899 unsigned int size) 2900 { 2901 u64 imm = meta->insn.imm; /* sign extend */ 2902 swreg off_reg; 2903 2904 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2905 2906 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 2907 imm, size); 2908 } 2909 2910 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2911 unsigned int size) 2912 { 2913 if (meta->ptr.type == PTR_TO_PACKET) 2914 return mem_st_data(nfp_prog, meta, size); 2915 2916 return -EOPNOTSUPP; 2917 } 2918 2919 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2920 { 2921 return mem_st(nfp_prog, meta, 1); 2922 } 2923 2924 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2925 { 2926 return mem_st(nfp_prog, meta, 2); 2927 } 2928 2929 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2930 { 2931 return mem_st(nfp_prog, meta, 4); 2932 } 2933 2934 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2935 { 2936 return mem_st(nfp_prog, meta, 8); 2937 } 2938 2939 static int 2940 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2941 unsigned int size) 2942 { 2943 swreg off_reg; 2944 2945 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2946 2947 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 2948 meta->insn.src_reg * 2, size); 2949 } 2950 2951 static int 2952 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2953 unsigned int size, unsigned int ptr_off) 2954 { 2955 return mem_op_stack(nfp_prog, meta, size, ptr_off, 2956 meta->insn.src_reg * 2, meta->insn.dst_reg * 2, 2957 false, wrp_lmem_store); 2958 } 2959 2960 static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2961 { 2962 switch (meta->insn.off) { 2963 case offsetof(struct xdp_md, rx_queue_index): 2964 return nfp_queue_select(nfp_prog, meta); 2965 } 2966 2967 WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */ 2968 return -EOPNOTSUPP; 2969 } 2970 2971 static int 2972 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2973 unsigned int size) 2974 { 2975 if (meta->ptr.type == PTR_TO_PACKET) 2976 return mem_stx_data(nfp_prog, meta, size); 2977 2978 if (meta->ptr.type == PTR_TO_STACK) 2979 return mem_stx_stack(nfp_prog, meta, size, 2980 meta->ptr.off + meta->ptr.var_off.value); 2981 2982 return -EOPNOTSUPP; 2983 } 2984 2985 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2986 { 2987 return mem_stx(nfp_prog, meta, 1); 2988 } 2989 2990 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2991 { 2992 return mem_stx(nfp_prog, meta, 2); 2993 } 2994 2995 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2996 { 2997 if (meta->ptr.type == PTR_TO_CTX) 2998 if (nfp_prog->type == BPF_PROG_TYPE_XDP) 2999 return mem_stx_xdp(nfp_prog, meta); 3000 return mem_stx(nfp_prog, meta, 4); 3001 } 3002 3003 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3004 { 3005 return mem_stx(nfp_prog, meta, 8); 3006 } 3007 3008 static int 3009 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) 3010 { 3011 u8 dst_gpr = meta->insn.dst_reg * 2; 3012 u8 src_gpr = meta->insn.src_reg * 2; 3013 unsigned int full_add, out; 3014 swreg addra, addrb, off; 3015 3016 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 3017 3018 /* We can fit 16 bits into command immediate, if we know the immediate 3019 * is guaranteed to either always or never fit into 16 bit we only 3020 * generate code to handle that particular case, otherwise generate 3021 * code for both. 3022 */ 3023 out = nfp_prog_current_offset(nfp_prog); 3024 full_add = nfp_prog_current_offset(nfp_prog); 3025 3026 if (meta->insn.off) { 3027 out += 2; 3028 full_add += 2; 3029 } 3030 if (meta->xadd_maybe_16bit) { 3031 out += 3; 3032 full_add += 3; 3033 } 3034 if (meta->xadd_over_16bit) 3035 out += 2 + is64; 3036 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { 3037 out += 5; 3038 full_add += 5; 3039 } 3040 3041 /* Generate the branch for choosing add_imm vs add */ 3042 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { 3043 swreg max_imm = imm_a(nfp_prog); 3044 3045 wrp_immed(nfp_prog, max_imm, 0xffff); 3046 emit_alu(nfp_prog, reg_none(), 3047 max_imm, ALU_OP_SUB, reg_b(src_gpr)); 3048 emit_alu(nfp_prog, reg_none(), 3049 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1)); 3050 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0); 3051 /* defer for add */ 3052 } 3053 3054 /* If insn has an offset add to the address */ 3055 if (!meta->insn.off) { 3056 addra = reg_a(dst_gpr); 3057 addrb = reg_b(dst_gpr + 1); 3058 } else { 3059 emit_alu(nfp_prog, imma_a(nfp_prog), 3060 reg_a(dst_gpr), ALU_OP_ADD, off); 3061 emit_alu(nfp_prog, imma_b(nfp_prog), 3062 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0)); 3063 addra = imma_a(nfp_prog); 3064 addrb = imma_b(nfp_prog); 3065 } 3066 3067 /* Generate the add_imm if 16 bits are possible */ 3068 if (meta->xadd_maybe_16bit) { 3069 swreg prev_alu = imm_a(nfp_prog); 3070 3071 wrp_immed(nfp_prog, prev_alu, 3072 FIELD_PREP(CMD_OVE_DATA, 2) | 3073 CMD_OVE_LEN | 3074 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); 3075 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); 3076 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, 3077 addra, addrb, 0, CMD_CTX_NO_SWAP); 3078 3079 if (meta->xadd_over_16bit) 3080 emit_br(nfp_prog, BR_UNC, out, 0); 3081 } 3082 3083 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add)) 3084 return -EINVAL; 3085 3086 /* Generate the add if 16 bits are not guaranteed */ 3087 if (meta->xadd_over_16bit) { 3088 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0, 3089 addra, addrb, is64 << 2, 3090 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); 3091 3092 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); 3093 if (is64) 3094 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); 3095 } 3096 3097 if (!nfp_prog_confirm_current_offset(nfp_prog, out)) 3098 return -EINVAL; 3099 3100 return 0; 3101 } 3102 3103 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3104 { 3105 return mem_xadd(nfp_prog, meta, false); 3106 } 3107 3108 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3109 { 3110 return mem_xadd(nfp_prog, meta, true); 3111 } 3112 3113 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3114 { 3115 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); 3116 3117 return 0; 3118 } 3119 3120 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3121 { 3122 const struct bpf_insn *insn = &meta->insn; 3123 u64 imm = insn->imm; /* sign extend */ 3124 swreg or1, or2, tmp_reg; 3125 3126 or1 = reg_a(insn->dst_reg * 2); 3127 or2 = reg_b(insn->dst_reg * 2 + 1); 3128 3129 if (imm & ~0U) { 3130 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 3131 emit_alu(nfp_prog, imm_a(nfp_prog), 3132 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 3133 or1 = imm_a(nfp_prog); 3134 } 3135 3136 if (imm >> 32) { 3137 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 3138 emit_alu(nfp_prog, imm_b(nfp_prog), 3139 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 3140 or2 = imm_b(nfp_prog); 3141 } 3142 3143 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); 3144 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 3145 3146 return 0; 3147 } 3148 3149 static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3150 { 3151 const struct bpf_insn *insn = &meta->insn; 3152 swreg tmp_reg; 3153 3154 tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog)); 3155 emit_alu(nfp_prog, reg_none(), 3156 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 3157 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 3158 3159 return 0; 3160 } 3161 3162 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3163 { 3164 const struct bpf_insn *insn = &meta->insn; 3165 u64 imm = insn->imm; /* sign extend */ 3166 u8 dst_gpr = insn->dst_reg * 2; 3167 swreg tmp_reg; 3168 3169 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 3170 emit_alu(nfp_prog, imm_b(nfp_prog), 3171 reg_a(dst_gpr), ALU_OP_AND, tmp_reg); 3172 /* Upper word of the mask can only be 0 or ~0 from sign extension, 3173 * so either ignore it or OR the whole thing in. 3174 */ 3175 if (is_mbpf_jmp64(meta) && imm >> 32) { 3176 emit_alu(nfp_prog, reg_none(), 3177 reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog)); 3178 } 3179 emit_br(nfp_prog, BR_BNE, insn->off, 0); 3180 3181 return 0; 3182 } 3183 3184 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3185 { 3186 const struct bpf_insn *insn = &meta->insn; 3187 u64 imm = insn->imm; /* sign extend */ 3188 bool is_jmp32 = is_mbpf_jmp32(meta); 3189 swreg tmp_reg; 3190 3191 if (!imm) { 3192 if (is_jmp32) 3193 emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE, 3194 reg_b(insn->dst_reg * 2)); 3195 else 3196 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), 3197 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); 3198 emit_br(nfp_prog, BR_BNE, insn->off, 0); 3199 return 0; 3200 } 3201 3202 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 3203 emit_alu(nfp_prog, reg_none(), 3204 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 3205 emit_br(nfp_prog, BR_BNE, insn->off, 0); 3206 3207 if (is_jmp32) 3208 return 0; 3209 3210 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 3211 emit_alu(nfp_prog, reg_none(), 3212 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 3213 emit_br(nfp_prog, BR_BNE, insn->off, 0); 3214 3215 return 0; 3216 } 3217 3218 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3219 { 3220 const struct bpf_insn *insn = &meta->insn; 3221 3222 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), 3223 ALU_OP_XOR, reg_b(insn->src_reg * 2)); 3224 if (is_mbpf_jmp64(meta)) { 3225 emit_alu(nfp_prog, imm_b(nfp_prog), 3226 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, 3227 reg_b(insn->src_reg * 2 + 1)); 3228 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, 3229 imm_b(nfp_prog)); 3230 } 3231 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 3232 3233 return 0; 3234 } 3235 3236 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3237 { 3238 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); 3239 } 3240 3241 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3242 { 3243 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); 3244 } 3245 3246 static int 3247 bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3248 { 3249 u32 ret_tgt, stack_depth, offset_br; 3250 swreg tmp_reg; 3251 3252 stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN); 3253 /* Space for saving the return address is accounted for by the callee, 3254 * so stack_depth can be zero for the main function. 3255 */ 3256 if (stack_depth) { 3257 tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, 3258 stack_imm(nfp_prog)); 3259 emit_alu(nfp_prog, stack_reg(nfp_prog), 3260 stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg); 3261 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), 3262 NFP_CSR_ACT_LM_ADDR0); 3263 } 3264 3265 /* Two cases for jumping to the callee: 3266 * 3267 * - If callee uses and needs to save R6~R9 then: 3268 * 1. Put the start offset of the callee into imm_b(). This will 3269 * require a fixup step, as we do not necessarily know this 3270 * address yet. 3271 * 2. Put the return address from the callee to the caller into 3272 * register ret_reg(). 3273 * 3. (After defer slots are consumed) Jump to the subroutine that 3274 * pushes the registers to the stack. 3275 * The subroutine acts as a trampoline, and returns to the address in 3276 * imm_b(), i.e. jumps to the callee. 3277 * 3278 * - If callee does not need to save R6~R9 then just load return 3279 * address to the caller in ret_reg(), and jump to the callee 3280 * directly. 3281 * 3282 * Using ret_reg() to pass the return address to the callee is set here 3283 * as a convention. The callee can then push this address onto its 3284 * stack frame in its prologue. The advantages of passing the return 3285 * address through ret_reg(), instead of pushing it to the stack right 3286 * here, are the following: 3287 * - It looks cleaner. 3288 * - If the called function is called multiple time, we get a lower 3289 * program size. 3290 * - We save two no-op instructions that should be added just before 3291 * the emit_br() when stack depth is not null otherwise. 3292 * - If we ever find a register to hold the return address during whole 3293 * execution of the callee, we will not have to push the return 3294 * address to the stack for leaf functions. 3295 */ 3296 if (!meta->jmp_dst) { 3297 pr_err("BUG: BPF-to-BPF call has no destination recorded\n"); 3298 return -ELOOP; 3299 } 3300 if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { 3301 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; 3302 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, 3303 RELO_BR_GO_CALL_PUSH_REGS); 3304 offset_br = nfp_prog_current_offset(nfp_prog); 3305 wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); 3306 } else { 3307 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; 3308 emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1); 3309 offset_br = nfp_prog_current_offset(nfp_prog); 3310 } 3311 wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); 3312 3313 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) 3314 return -EINVAL; 3315 3316 if (stack_depth) { 3317 tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, 3318 stack_imm(nfp_prog)); 3319 emit_alu(nfp_prog, stack_reg(nfp_prog), 3320 stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg); 3321 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), 3322 NFP_CSR_ACT_LM_ADDR0); 3323 wrp_nops(nfp_prog, 3); 3324 } 3325 3326 meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog); 3327 meta->num_insns_after_br -= offset_br; 3328 3329 return 0; 3330 } 3331 3332 static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3333 { 3334 switch (meta->insn.imm) { 3335 case BPF_FUNC_xdp_adjust_head: 3336 return adjust_head(nfp_prog, meta); 3337 case BPF_FUNC_xdp_adjust_tail: 3338 return adjust_tail(nfp_prog, meta); 3339 case BPF_FUNC_map_lookup_elem: 3340 case BPF_FUNC_map_update_elem: 3341 case BPF_FUNC_map_delete_elem: 3342 return map_call_stack_common(nfp_prog, meta); 3343 case BPF_FUNC_get_prandom_u32: 3344 return nfp_get_prandom_u32(nfp_prog, meta); 3345 case BPF_FUNC_perf_event_output: 3346 return nfp_perf_event_output(nfp_prog, meta); 3347 default: 3348 WARN_ONCE(1, "verifier allowed unsupported function\n"); 3349 return -EOPNOTSUPP; 3350 } 3351 } 3352 3353 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3354 { 3355 if (is_mbpf_pseudo_call(meta)) 3356 return bpf_to_bpf_call(nfp_prog, meta); 3357 else 3358 return helper_call(nfp_prog, meta); 3359 } 3360 3361 static bool nfp_is_main_function(struct nfp_insn_meta *meta) 3362 { 3363 return meta->subprog_idx == 0; 3364 } 3365 3366 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3367 { 3368 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); 3369 3370 return 0; 3371 } 3372 3373 static int 3374 nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3375 { 3376 if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { 3377 /* Pop R6~R9 to the stack via related subroutine. 3378 * We loaded the return address to the caller into ret_reg(). 3379 * This means that the subroutine does not come back here, we 3380 * make it jump back to the subprogram caller directly! 3381 */ 3382 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, 3383 RELO_BR_GO_CALL_POP_REGS); 3384 /* Pop return address from the stack. */ 3385 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); 3386 } else { 3387 /* Pop return address from the stack. */ 3388 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); 3389 /* Jump back to caller if no callee-saved registers were used 3390 * by the subprogram. 3391 */ 3392 emit_rtn(nfp_prog, ret_reg(nfp_prog), 0); 3393 } 3394 3395 return 0; 3396 } 3397 3398 static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3399 { 3400 if (nfp_is_main_function(meta)) 3401 return goto_out(nfp_prog, meta); 3402 else 3403 return nfp_subprog_epilogue(nfp_prog, meta); 3404 } 3405 3406 static const instr_cb_t instr_cb[256] = { 3407 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, 3408 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, 3409 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, 3410 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, 3411 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, 3412 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, 3413 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, 3414 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, 3415 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, 3416 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, 3417 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, 3418 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, 3419 [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, 3420 [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, 3421 [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, 3422 [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, 3423 [BPF_ALU64 | BPF_NEG] = neg_reg64, 3424 [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, 3425 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, 3426 [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64, 3427 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, 3428 [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64, 3429 [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64, 3430 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, 3431 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, 3432 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, 3433 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, 3434 [BPF_ALU | BPF_AND | BPF_X] = and_reg, 3435 [BPF_ALU | BPF_AND | BPF_K] = and_imm, 3436 [BPF_ALU | BPF_OR | BPF_X] = or_reg, 3437 [BPF_ALU | BPF_OR | BPF_K] = or_imm, 3438 [BPF_ALU | BPF_ADD | BPF_X] = add_reg, 3439 [BPF_ALU | BPF_ADD | BPF_K] = add_imm, 3440 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, 3441 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, 3442 [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, 3443 [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, 3444 [BPF_ALU | BPF_DIV | BPF_X] = div_reg, 3445 [BPF_ALU | BPF_DIV | BPF_K] = div_imm, 3446 [BPF_ALU | BPF_NEG] = neg_reg, 3447 [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, 3448 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, 3449 [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, 3450 [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, 3451 [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, 3452 [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, 3453 [BPF_ALU | BPF_END | BPF_X] = end_reg32, 3454 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, 3455 [BPF_LD | BPF_ABS | BPF_B] = data_ld1, 3456 [BPF_LD | BPF_ABS | BPF_H] = data_ld2, 3457 [BPF_LD | BPF_ABS | BPF_W] = data_ld4, 3458 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, 3459 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, 3460 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, 3461 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, 3462 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, 3463 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, 3464 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, 3465 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, 3466 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, 3467 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, 3468 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, 3469 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4, 3470 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8, 3471 [BPF_ST | BPF_MEM | BPF_B] = mem_st1, 3472 [BPF_ST | BPF_MEM | BPF_H] = mem_st2, 3473 [BPF_ST | BPF_MEM | BPF_W] = mem_st4, 3474 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, 3475 [BPF_JMP | BPF_JA | BPF_K] = jump, 3476 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, 3477 [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm, 3478 [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm, 3479 [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm, 3480 [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm, 3481 [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm, 3482 [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm, 3483 [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm, 3484 [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm, 3485 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, 3486 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, 3487 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, 3488 [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg, 3489 [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg, 3490 [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg, 3491 [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg, 3492 [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg, 3493 [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg, 3494 [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg, 3495 [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, 3496 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, 3497 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, 3498 [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm, 3499 [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm, 3500 [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm, 3501 [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm, 3502 [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm, 3503 [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm, 3504 [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm, 3505 [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm, 3506 [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm, 3507 [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm, 3508 [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm, 3509 [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg, 3510 [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg, 3511 [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg, 3512 [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg, 3513 [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg, 3514 [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg, 3515 [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg, 3516 [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg, 3517 [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg, 3518 [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg, 3519 [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg, 3520 [BPF_JMP | BPF_CALL] = call, 3521 [BPF_JMP | BPF_EXIT] = jmp_exit, 3522 }; 3523 3524 /* --- Assembler logic --- */ 3525 static int 3526 nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 3527 struct nfp_insn_meta *jmp_dst, u32 br_idx) 3528 { 3529 if (immed_get_value(nfp_prog->prog[br_idx + 1])) { 3530 pr_err("BUG: failed to fix up callee register saving\n"); 3531 return -EINVAL; 3532 } 3533 3534 immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off); 3535 3536 return 0; 3537 } 3538 3539 static int nfp_fixup_branches(struct nfp_prog *nfp_prog) 3540 { 3541 struct nfp_insn_meta *meta, *jmp_dst; 3542 u32 idx, br_idx; 3543 int err; 3544 3545 list_for_each_entry(meta, &nfp_prog->insns, l) { 3546 if (meta->flags & FLAG_INSN_SKIP_MASK) 3547 continue; 3548 if (!is_mbpf_jmp(meta)) 3549 continue; 3550 if (meta->insn.code == (BPF_JMP | BPF_EXIT) && 3551 !nfp_is_main_function(meta)) 3552 continue; 3553 if (is_mbpf_helper_call(meta)) 3554 continue; 3555 3556 if (list_is_last(&meta->l, &nfp_prog->insns)) 3557 br_idx = nfp_prog->last_bpf_off; 3558 else 3559 br_idx = list_next_entry(meta, l)->off - 1; 3560 3561 /* For BPF-to-BPF function call, a stack adjustment sequence is 3562 * generated after the return instruction. Therefore, we must 3563 * withdraw the length of this sequence to have br_idx pointing 3564 * to where the "branch" NFP instruction is expected to be. 3565 */ 3566 if (is_mbpf_pseudo_call(meta)) 3567 br_idx -= meta->num_insns_after_br; 3568 3569 if (!nfp_is_br(nfp_prog->prog[br_idx])) { 3570 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", 3571 br_idx, meta->insn.code, nfp_prog->prog[br_idx]); 3572 return -ELOOP; 3573 } 3574 3575 if (meta->insn.code == (BPF_JMP | BPF_EXIT)) 3576 continue; 3577 3578 /* Leave special branches for later */ 3579 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != 3580 RELO_BR_REL && !is_mbpf_pseudo_call(meta)) 3581 continue; 3582 3583 if (!meta->jmp_dst) { 3584 pr_err("Non-exit jump doesn't have destination info recorded!!\n"); 3585 return -ELOOP; 3586 } 3587 3588 jmp_dst = meta->jmp_dst; 3589 3590 if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) { 3591 pr_err("Branch landing on removed instruction!!\n"); 3592 return -ELOOP; 3593 } 3594 3595 if (is_mbpf_pseudo_call(meta) && 3596 nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { 3597 err = nfp_fixup_immed_relo(nfp_prog, meta, 3598 jmp_dst, br_idx); 3599 if (err) 3600 return err; 3601 } 3602 3603 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != 3604 RELO_BR_REL) 3605 continue; 3606 3607 for (idx = meta->off; idx <= br_idx; idx++) { 3608 if (!nfp_is_br(nfp_prog->prog[idx])) 3609 continue; 3610 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); 3611 } 3612 } 3613 3614 return 0; 3615 } 3616 3617 static void nfp_intro(struct nfp_prog *nfp_prog) 3618 { 3619 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); 3620 emit_alu(nfp_prog, plen_reg(nfp_prog), 3621 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); 3622 } 3623 3624 static void 3625 nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3626 { 3627 /* Save return address into the stack. */ 3628 wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog)); 3629 } 3630 3631 static void 3632 nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 3633 { 3634 unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth; 3635 3636 nfp_prog->stack_frame_depth = round_up(depth, 4); 3637 nfp_subprog_prologue(nfp_prog, meta); 3638 } 3639 3640 bool nfp_is_subprog_start(struct nfp_insn_meta *meta) 3641 { 3642 return meta->flags & FLAG_INSN_IS_SUBPROG_START; 3643 } 3644 3645 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) 3646 { 3647 /* TC direct-action mode: 3648 * 0,1 ok NOT SUPPORTED[1] 3649 * 2 drop 0x22 -> drop, count as stat1 3650 * 4,5 nuke 0x02 -> drop 3651 * 7 redir 0x44 -> redir, count as stat2 3652 * * unspec 0x11 -> pass, count as stat0 3653 * 3654 * [1] We can't support OK and RECLASSIFY because we can't tell TC 3655 * the exact decision made. We are forced to support UNSPEC 3656 * to handle aborts so that's the only one we handle for passing 3657 * packets up the stack. 3658 */ 3659 /* Target for aborts */ 3660 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 3661 3662 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 3663 3664 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 3665 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); 3666 3667 /* Target for normal exits */ 3668 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 3669 3670 /* if R0 > 7 jump to abort */ 3671 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); 3672 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 3673 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 3674 3675 wrp_immed(nfp_prog, reg_b(2), 0x41221211); 3676 wrp_immed(nfp_prog, reg_b(3), 0x41001211); 3677 3678 emit_shf(nfp_prog, reg_a(1), 3679 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); 3680 3681 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 3682 emit_shf(nfp_prog, reg_a(2), 3683 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 3684 3685 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 3686 emit_shf(nfp_prog, reg_b(2), 3687 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); 3688 3689 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 3690 3691 emit_shf(nfp_prog, reg_b(2), 3692 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); 3693 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 3694 } 3695 3696 static void nfp_outro_xdp(struct nfp_prog *nfp_prog) 3697 { 3698 /* XDP return codes: 3699 * 0 aborted 0x82 -> drop, count as stat3 3700 * 1 drop 0x22 -> drop, count as stat1 3701 * 2 pass 0x11 -> pass, count as stat0 3702 * 3 tx 0x44 -> redir, count as stat2 3703 * * unknown 0x82 -> drop, count as stat3 3704 */ 3705 /* Target for aborts */ 3706 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 3707 3708 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 3709 3710 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 3711 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); 3712 3713 /* Target for normal exits */ 3714 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 3715 3716 /* if R0 > 3 jump to abort */ 3717 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); 3718 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 3719 3720 wrp_immed(nfp_prog, reg_b(2), 0x44112282); 3721 3722 emit_shf(nfp_prog, reg_a(1), 3723 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); 3724 3725 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 3726 emit_shf(nfp_prog, reg_b(2), 3727 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 3728 3729 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 3730 3731 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 3732 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 3733 } 3734 3735 static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) 3736 { 3737 unsigned int idx; 3738 3739 for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) 3740 if (nfp_prog->subprog[idx].needs_reg_push) 3741 return true; 3742 3743 return false; 3744 } 3745 3746 static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) 3747 { 3748 u8 reg; 3749 3750 /* Subroutine: Save all callee saved registers (R6 ~ R9). 3751 * imm_b() holds the return address. 3752 */ 3753 nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog); 3754 for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { 3755 u8 adj = (reg - BPF_REG_0) * 2; 3756 u8 idx = (reg - BPF_REG_6) * 2; 3757 3758 /* The first slot in the stack frame is used to push the return 3759 * address in bpf_to_bpf_call(), start just after. 3760 */ 3761 wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj)); 3762 3763 if (reg == BPF_REG_8) 3764 /* Prepare to jump back, last 3 insns use defer slots */ 3765 emit_rtn(nfp_prog, imm_b(nfp_prog), 3); 3766 3767 wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1)); 3768 } 3769 } 3770 3771 static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog) 3772 { 3773 u8 reg; 3774 3775 /* Subroutine: Restore all callee saved registers (R6 ~ R9). 3776 * ret_reg() holds the return address. 3777 */ 3778 nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog); 3779 for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { 3780 u8 adj = (reg - BPF_REG_0) * 2; 3781 u8 idx = (reg - BPF_REG_6) * 2; 3782 3783 /* The first slot in the stack frame holds the return address, 3784 * start popping just after that. 3785 */ 3786 wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx)); 3787 3788 if (reg == BPF_REG_8) 3789 /* Prepare to jump back, last 3 insns use defer slots */ 3790 emit_rtn(nfp_prog, ret_reg(nfp_prog), 3); 3791 3792 wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1)); 3793 } 3794 } 3795 3796 static void nfp_outro(struct nfp_prog *nfp_prog) 3797 { 3798 switch (nfp_prog->type) { 3799 case BPF_PROG_TYPE_SCHED_CLS: 3800 nfp_outro_tc_da(nfp_prog); 3801 break; 3802 case BPF_PROG_TYPE_XDP: 3803 nfp_outro_xdp(nfp_prog); 3804 break; 3805 default: 3806 WARN_ON(1); 3807 } 3808 3809 if (!nfp_prog_needs_callee_reg_save(nfp_prog)) 3810 return; 3811 3812 nfp_push_callee_registers(nfp_prog); 3813 nfp_pop_callee_registers(nfp_prog); 3814 } 3815 3816 static int nfp_translate(struct nfp_prog *nfp_prog) 3817 { 3818 struct nfp_insn_meta *meta; 3819 unsigned int depth; 3820 int err; 3821 3822 depth = nfp_prog->subprog[0].stack_depth; 3823 nfp_prog->stack_frame_depth = round_up(depth, 4); 3824 3825 nfp_intro(nfp_prog); 3826 if (nfp_prog->error) 3827 return nfp_prog->error; 3828 3829 list_for_each_entry(meta, &nfp_prog->insns, l) { 3830 instr_cb_t cb = instr_cb[meta->insn.code]; 3831 3832 meta->off = nfp_prog_current_offset(nfp_prog); 3833 3834 if (nfp_is_subprog_start(meta)) { 3835 nfp_start_subprog(nfp_prog, meta); 3836 if (nfp_prog->error) 3837 return nfp_prog->error; 3838 } 3839 3840 if (meta->flags & FLAG_INSN_SKIP_MASK) { 3841 nfp_prog->n_translated++; 3842 continue; 3843 } 3844 3845 if (nfp_meta_has_prev(nfp_prog, meta) && 3846 nfp_meta_prev(meta)->double_cb) 3847 cb = nfp_meta_prev(meta)->double_cb; 3848 if (!cb) 3849 return -ENOENT; 3850 err = cb(nfp_prog, meta); 3851 if (err) 3852 return err; 3853 if (nfp_prog->error) 3854 return nfp_prog->error; 3855 3856 nfp_prog->n_translated++; 3857 } 3858 3859 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; 3860 3861 nfp_outro(nfp_prog); 3862 if (nfp_prog->error) 3863 return nfp_prog->error; 3864 3865 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); 3866 if (nfp_prog->error) 3867 return nfp_prog->error; 3868 3869 return nfp_fixup_branches(nfp_prog); 3870 } 3871 3872 /* --- Optimizations --- */ 3873 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) 3874 { 3875 struct nfp_insn_meta *meta; 3876 3877 list_for_each_entry(meta, &nfp_prog->insns, l) { 3878 struct bpf_insn insn = meta->insn; 3879 3880 /* Programs converted from cBPF start with register xoring */ 3881 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && 3882 insn.src_reg == insn.dst_reg) 3883 continue; 3884 3885 /* Programs start with R6 = R1 but we ignore the skb pointer */ 3886 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && 3887 insn.src_reg == 1 && insn.dst_reg == 6) 3888 meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; 3889 3890 /* Return as soon as something doesn't match */ 3891 if (!(meta->flags & FLAG_INSN_SKIP_MASK)) 3892 return; 3893 } 3894 } 3895 3896 /* abs(insn.imm) will fit better into unrestricted reg immediate - 3897 * convert add/sub of a negative number into a sub/add of a positive one. 3898 */ 3899 static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) 3900 { 3901 struct nfp_insn_meta *meta; 3902 3903 list_for_each_entry(meta, &nfp_prog->insns, l) { 3904 struct bpf_insn insn = meta->insn; 3905 3906 if (meta->flags & FLAG_INSN_SKIP_MASK) 3907 continue; 3908 3909 if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta)) 3910 continue; 3911 if (BPF_SRC(insn.code) != BPF_K) 3912 continue; 3913 if (insn.imm >= 0) 3914 continue; 3915 3916 if (is_mbpf_jmp(meta)) { 3917 switch (BPF_OP(insn.code)) { 3918 case BPF_JGE: 3919 case BPF_JSGE: 3920 case BPF_JLT: 3921 case BPF_JSLT: 3922 meta->jump_neg_op = true; 3923 break; 3924 default: 3925 continue; 3926 } 3927 } else { 3928 if (BPF_OP(insn.code) == BPF_ADD) 3929 insn.code = BPF_CLASS(insn.code) | BPF_SUB; 3930 else if (BPF_OP(insn.code) == BPF_SUB) 3931 insn.code = BPF_CLASS(insn.code) | BPF_ADD; 3932 else 3933 continue; 3934 3935 meta->insn.code = insn.code | BPF_K; 3936 } 3937 3938 meta->insn.imm = -insn.imm; 3939 } 3940 } 3941 3942 /* Remove masking after load since our load guarantees this is not needed */ 3943 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) 3944 { 3945 struct nfp_insn_meta *meta1, *meta2; 3946 const s32 exp_mask[] = { 3947 [BPF_B] = 0x000000ffU, 3948 [BPF_H] = 0x0000ffffU, 3949 [BPF_W] = 0xffffffffU, 3950 }; 3951 3952 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 3953 struct bpf_insn insn, next; 3954 3955 insn = meta1->insn; 3956 next = meta2->insn; 3957 3958 if (BPF_CLASS(insn.code) != BPF_LD) 3959 continue; 3960 if (BPF_MODE(insn.code) != BPF_ABS && 3961 BPF_MODE(insn.code) != BPF_IND) 3962 continue; 3963 3964 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) 3965 continue; 3966 3967 if (!exp_mask[BPF_SIZE(insn.code)]) 3968 continue; 3969 if (exp_mask[BPF_SIZE(insn.code)] != next.imm) 3970 continue; 3971 3972 if (next.src_reg || next.dst_reg) 3973 continue; 3974 3975 if (meta2->flags & FLAG_INSN_IS_JUMP_DST) 3976 continue; 3977 3978 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; 3979 } 3980 } 3981 3982 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) 3983 { 3984 struct nfp_insn_meta *meta1, *meta2, *meta3; 3985 3986 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { 3987 struct bpf_insn insn, next1, next2; 3988 3989 insn = meta1->insn; 3990 next1 = meta2->insn; 3991 next2 = meta3->insn; 3992 3993 if (BPF_CLASS(insn.code) != BPF_LD) 3994 continue; 3995 if (BPF_MODE(insn.code) != BPF_ABS && 3996 BPF_MODE(insn.code) != BPF_IND) 3997 continue; 3998 if (BPF_SIZE(insn.code) != BPF_W) 3999 continue; 4000 4001 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && 4002 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && 4003 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && 4004 next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) 4005 continue; 4006 4007 if (next1.src_reg || next1.dst_reg || 4008 next2.src_reg || next2.dst_reg) 4009 continue; 4010 4011 if (next1.imm != 0x20 || next2.imm != 0x20) 4012 continue; 4013 4014 if (meta2->flags & FLAG_INSN_IS_JUMP_DST || 4015 meta3->flags & FLAG_INSN_IS_JUMP_DST) 4016 continue; 4017 4018 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; 4019 meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; 4020 } 4021 } 4022 4023 /* load/store pair that forms memory copy sould look like the following: 4024 * 4025 * ld_width R, [addr_src + offset_src] 4026 * st_width [addr_dest + offset_dest], R 4027 * 4028 * The destination register of load and source register of store should 4029 * be the same, load and store should also perform at the same width. 4030 * If either of addr_src or addr_dest is stack pointer, we don't do the 4031 * CPP optimization as stack is modelled by registers on NFP. 4032 */ 4033 static bool 4034 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, 4035 struct nfp_insn_meta *st_meta) 4036 { 4037 struct bpf_insn *ld = &ld_meta->insn; 4038 struct bpf_insn *st = &st_meta->insn; 4039 4040 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) 4041 return false; 4042 4043 if (ld_meta->ptr.type != PTR_TO_PACKET && 4044 ld_meta->ptr.type != PTR_TO_MAP_VALUE) 4045 return false; 4046 4047 if (st_meta->ptr.type != PTR_TO_PACKET) 4048 return false; 4049 4050 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) 4051 return false; 4052 4053 if (ld->dst_reg != st->src_reg) 4054 return false; 4055 4056 /* There is jump to the store insn in this pair. */ 4057 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) 4058 return false; 4059 4060 return true; 4061 } 4062 4063 /* Currently, we only support chaining load/store pairs if: 4064 * 4065 * - Their address base registers are the same. 4066 * - Their address offsets are in the same order. 4067 * - They operate at the same memory width. 4068 * - There is no jump into the middle of them. 4069 */ 4070 static bool 4071 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, 4072 struct nfp_insn_meta *st_meta, 4073 struct bpf_insn *prev_ld, 4074 struct bpf_insn *prev_st) 4075 { 4076 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; 4077 struct bpf_insn *ld = &ld_meta->insn; 4078 struct bpf_insn *st = &st_meta->insn; 4079 s16 prev_ld_off, prev_st_off; 4080 4081 /* This pair is the start pair. */ 4082 if (!prev_ld) 4083 return true; 4084 4085 prev_size = BPF_LDST_BYTES(prev_ld); 4086 curr_size = BPF_LDST_BYTES(ld); 4087 prev_ld_base = prev_ld->src_reg; 4088 prev_st_base = prev_st->dst_reg; 4089 prev_ld_dst = prev_ld->dst_reg; 4090 prev_ld_off = prev_ld->off; 4091 prev_st_off = prev_st->off; 4092 4093 if (ld->dst_reg != prev_ld_dst) 4094 return false; 4095 4096 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) 4097 return false; 4098 4099 if (curr_size != prev_size) 4100 return false; 4101 4102 /* There is jump to the head of this pair. */ 4103 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) 4104 return false; 4105 4106 /* Both in ascending order. */ 4107 if (prev_ld_off + prev_size == ld->off && 4108 prev_st_off + prev_size == st->off) 4109 return true; 4110 4111 /* Both in descending order. */ 4112 if (ld->off + curr_size == prev_ld_off && 4113 st->off + curr_size == prev_st_off) 4114 return true; 4115 4116 return false; 4117 } 4118 4119 /* Return TRUE if cross memory access happens. Cross memory access means 4120 * store area is overlapping with load area that a later load might load 4121 * the value from previous store, for this case we can't treat the sequence 4122 * as an memory copy. 4123 */ 4124 static bool 4125 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, 4126 struct nfp_insn_meta *head_st_meta) 4127 { 4128 s16 head_ld_off, head_st_off, ld_off; 4129 4130 /* Different pointer types does not overlap. */ 4131 if (head_ld_meta->ptr.type != head_st_meta->ptr.type) 4132 return false; 4133 4134 /* load and store are both PTR_TO_PACKET, check ID info. */ 4135 if (head_ld_meta->ptr.id != head_st_meta->ptr.id) 4136 return true; 4137 4138 /* Canonicalize the offsets. Turn all of them against the original 4139 * base register. 4140 */ 4141 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; 4142 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; 4143 ld_off = ld->off + head_ld_meta->ptr.off; 4144 4145 /* Ascending order cross. */ 4146 if (ld_off > head_ld_off && 4147 head_ld_off < head_st_off && ld_off >= head_st_off) 4148 return true; 4149 4150 /* Descending order cross. */ 4151 if (ld_off < head_ld_off && 4152 head_ld_off > head_st_off && ld_off <= head_st_off) 4153 return true; 4154 4155 return false; 4156 } 4157 4158 /* This pass try to identify the following instructoin sequences. 4159 * 4160 * load R, [regA + offA] 4161 * store [regB + offB], R 4162 * load R, [regA + offA + const_imm_A] 4163 * store [regB + offB + const_imm_A], R 4164 * load R, [regA + offA + 2 * const_imm_A] 4165 * store [regB + offB + 2 * const_imm_A], R 4166 * ... 4167 * 4168 * Above sequence is typically generated by compiler when lowering 4169 * memcpy. NFP prefer using CPP instructions to accelerate it. 4170 */ 4171 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) 4172 { 4173 struct nfp_insn_meta *head_ld_meta = NULL; 4174 struct nfp_insn_meta *head_st_meta = NULL; 4175 struct nfp_insn_meta *meta1, *meta2; 4176 struct bpf_insn *prev_ld = NULL; 4177 struct bpf_insn *prev_st = NULL; 4178 u8 count = 0; 4179 4180 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 4181 struct bpf_insn *ld = &meta1->insn; 4182 struct bpf_insn *st = &meta2->insn; 4183 4184 /* Reset record status if any of the following if true: 4185 * - The current insn pair is not load/store. 4186 * - The load/store pair doesn't chain with previous one. 4187 * - The chained load/store pair crossed with previous pair. 4188 * - The chained load/store pair has a total size of memory 4189 * copy beyond 128 bytes which is the maximum length a 4190 * single NFP CPP command can transfer. 4191 */ 4192 if (!curr_pair_is_memcpy(meta1, meta2) || 4193 !curr_pair_chain_with_previous(meta1, meta2, prev_ld, 4194 prev_st) || 4195 (head_ld_meta && (cross_mem_access(ld, head_ld_meta, 4196 head_st_meta) || 4197 head_ld_meta->ldst_gather_len >= 128))) { 4198 if (!count) 4199 continue; 4200 4201 if (count > 1) { 4202 s16 prev_ld_off = prev_ld->off; 4203 s16 prev_st_off = prev_st->off; 4204 s16 head_ld_off = head_ld_meta->insn.off; 4205 4206 if (prev_ld_off < head_ld_off) { 4207 head_ld_meta->insn.off = prev_ld_off; 4208 head_st_meta->insn.off = prev_st_off; 4209 head_ld_meta->ldst_gather_len = 4210 -head_ld_meta->ldst_gather_len; 4211 } 4212 4213 head_ld_meta->paired_st = &head_st_meta->insn; 4214 head_st_meta->flags |= 4215 FLAG_INSN_SKIP_PREC_DEPENDENT; 4216 } else { 4217 head_ld_meta->ldst_gather_len = 0; 4218 } 4219 4220 /* If the chain is ended by an load/store pair then this 4221 * could serve as the new head of the the next chain. 4222 */ 4223 if (curr_pair_is_memcpy(meta1, meta2)) { 4224 head_ld_meta = meta1; 4225 head_st_meta = meta2; 4226 head_ld_meta->ldst_gather_len = 4227 BPF_LDST_BYTES(ld); 4228 meta1 = nfp_meta_next(meta1); 4229 meta2 = nfp_meta_next(meta2); 4230 prev_ld = ld; 4231 prev_st = st; 4232 count = 1; 4233 } else { 4234 head_ld_meta = NULL; 4235 head_st_meta = NULL; 4236 prev_ld = NULL; 4237 prev_st = NULL; 4238 count = 0; 4239 } 4240 4241 continue; 4242 } 4243 4244 if (!head_ld_meta) { 4245 head_ld_meta = meta1; 4246 head_st_meta = meta2; 4247 } else { 4248 meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; 4249 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; 4250 } 4251 4252 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); 4253 meta1 = nfp_meta_next(meta1); 4254 meta2 = nfp_meta_next(meta2); 4255 prev_ld = ld; 4256 prev_st = st; 4257 count++; 4258 } 4259 } 4260 4261 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) 4262 { 4263 struct nfp_insn_meta *meta, *range_node = NULL; 4264 s16 range_start = 0, range_end = 0; 4265 bool cache_avail = false; 4266 struct bpf_insn *insn; 4267 s32 range_ptr_off = 0; 4268 u32 range_ptr_id = 0; 4269 4270 list_for_each_entry(meta, &nfp_prog->insns, l) { 4271 if (meta->flags & FLAG_INSN_IS_JUMP_DST) 4272 cache_avail = false; 4273 4274 if (meta->flags & FLAG_INSN_SKIP_MASK) 4275 continue; 4276 4277 insn = &meta->insn; 4278 4279 if (is_mbpf_store_pkt(meta) || 4280 insn->code == (BPF_JMP | BPF_CALL) || 4281 is_mbpf_classic_store_pkt(meta) || 4282 is_mbpf_classic_load(meta)) { 4283 cache_avail = false; 4284 continue; 4285 } 4286 4287 if (!is_mbpf_load(meta)) 4288 continue; 4289 4290 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { 4291 cache_avail = false; 4292 continue; 4293 } 4294 4295 if (!cache_avail) { 4296 cache_avail = true; 4297 if (range_node) 4298 goto end_current_then_start_new; 4299 goto start_new; 4300 } 4301 4302 /* Check ID to make sure two reads share the same 4303 * variable offset against PTR_TO_PACKET, and check OFF 4304 * to make sure they also share the same constant 4305 * offset. 4306 * 4307 * OFFs don't really need to be the same, because they 4308 * are the constant offsets against PTR_TO_PACKET, so 4309 * for different OFFs, we could canonicalize them to 4310 * offsets against original packet pointer. We don't 4311 * support this. 4312 */ 4313 if (meta->ptr.id == range_ptr_id && 4314 meta->ptr.off == range_ptr_off) { 4315 s16 new_start = range_start; 4316 s16 end, off = insn->off; 4317 s16 new_end = range_end; 4318 bool changed = false; 4319 4320 if (off < range_start) { 4321 new_start = off; 4322 changed = true; 4323 } 4324 4325 end = off + BPF_LDST_BYTES(insn); 4326 if (end > range_end) { 4327 new_end = end; 4328 changed = true; 4329 } 4330 4331 if (!changed) 4332 continue; 4333 4334 if (new_end - new_start <= 64) { 4335 /* Install new range. */ 4336 range_start = new_start; 4337 range_end = new_end; 4338 continue; 4339 } 4340 } 4341 4342 end_current_then_start_new: 4343 range_node->pkt_cache.range_start = range_start; 4344 range_node->pkt_cache.range_end = range_end; 4345 start_new: 4346 range_node = meta; 4347 range_node->pkt_cache.do_init = true; 4348 range_ptr_id = range_node->ptr.id; 4349 range_ptr_off = range_node->ptr.off; 4350 range_start = insn->off; 4351 range_end = insn->off + BPF_LDST_BYTES(insn); 4352 } 4353 4354 if (range_node) { 4355 range_node->pkt_cache.range_start = range_start; 4356 range_node->pkt_cache.range_end = range_end; 4357 } 4358 4359 list_for_each_entry(meta, &nfp_prog->insns, l) { 4360 if (meta->flags & FLAG_INSN_SKIP_MASK) 4361 continue; 4362 4363 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { 4364 if (meta->pkt_cache.do_init) { 4365 range_start = meta->pkt_cache.range_start; 4366 range_end = meta->pkt_cache.range_end; 4367 } else { 4368 meta->pkt_cache.range_start = range_start; 4369 meta->pkt_cache.range_end = range_end; 4370 } 4371 } 4372 } 4373 } 4374 4375 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) 4376 { 4377 nfp_bpf_opt_reg_init(nfp_prog); 4378 4379 nfp_bpf_opt_neg_add_sub(nfp_prog); 4380 nfp_bpf_opt_ld_mask(nfp_prog); 4381 nfp_bpf_opt_ld_shift(nfp_prog); 4382 nfp_bpf_opt_ldst_gather(nfp_prog); 4383 nfp_bpf_opt_pkt_cache(nfp_prog); 4384 4385 return 0; 4386 } 4387 4388 static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) 4389 { 4390 struct nfp_insn_meta *meta1, *meta2; 4391 struct nfp_bpf_map *nfp_map; 4392 struct bpf_map *map; 4393 u32 id; 4394 4395 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 4396 if (meta1->flags & FLAG_INSN_SKIP_MASK || 4397 meta2->flags & FLAG_INSN_SKIP_MASK) 4398 continue; 4399 4400 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || 4401 meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) 4402 continue; 4403 4404 map = (void *)(unsigned long)((u32)meta1->insn.imm | 4405 (u64)meta2->insn.imm << 32); 4406 if (bpf_map_offload_neutral(map)) { 4407 id = map->id; 4408 } else { 4409 nfp_map = map_to_offmap(map)->dev_priv; 4410 id = nfp_map->tid; 4411 } 4412 4413 meta1->insn.imm = id; 4414 meta2->insn.imm = 0; 4415 } 4416 4417 return 0; 4418 } 4419 4420 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) 4421 { 4422 __le64 *ustore = (__force __le64 *)prog; 4423 int i; 4424 4425 for (i = 0; i < len; i++) { 4426 int err; 4427 4428 err = nfp_ustore_check_valid_no_ecc(prog[i]); 4429 if (err) 4430 return err; 4431 4432 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); 4433 } 4434 4435 return 0; 4436 } 4437 4438 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) 4439 { 4440 void *prog; 4441 4442 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); 4443 if (!prog) 4444 return; 4445 4446 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); 4447 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); 4448 kvfree(nfp_prog->prog); 4449 nfp_prog->prog = prog; 4450 } 4451 4452 int nfp_bpf_jit(struct nfp_prog *nfp_prog) 4453 { 4454 int ret; 4455 4456 ret = nfp_bpf_replace_map_ptrs(nfp_prog); 4457 if (ret) 4458 return ret; 4459 4460 ret = nfp_bpf_optimize(nfp_prog); 4461 if (ret) 4462 return ret; 4463 4464 ret = nfp_translate(nfp_prog); 4465 if (ret) { 4466 pr_err("Translation failed with error %d (translated: %u)\n", 4467 ret, nfp_prog->n_translated); 4468 return -EINVAL; 4469 } 4470 4471 nfp_bpf_prog_trim(nfp_prog); 4472 4473 return ret; 4474 } 4475 4476 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog) 4477 { 4478 struct nfp_insn_meta *meta; 4479 4480 /* Another pass to record jump information. */ 4481 list_for_each_entry(meta, &nfp_prog->insns, l) { 4482 struct nfp_insn_meta *dst_meta; 4483 u64 code = meta->insn.code; 4484 unsigned int dst_idx; 4485 bool pseudo_call; 4486 4487 if (!is_mbpf_jmp(meta)) 4488 continue; 4489 if (BPF_OP(code) == BPF_EXIT) 4490 continue; 4491 if (is_mbpf_helper_call(meta)) 4492 continue; 4493 4494 /* If opcode is BPF_CALL at this point, this can only be a 4495 * BPF-to-BPF call (a.k.a pseudo call). 4496 */ 4497 pseudo_call = BPF_OP(code) == BPF_CALL; 4498 4499 if (pseudo_call) 4500 dst_idx = meta->n + 1 + meta->insn.imm; 4501 else 4502 dst_idx = meta->n + 1 + meta->insn.off; 4503 4504 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx); 4505 4506 if (pseudo_call) 4507 dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; 4508 4509 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; 4510 meta->jmp_dst = dst_meta; 4511 } 4512 } 4513 4514 bool nfp_bpf_supported_opcode(u8 code) 4515 { 4516 return !!instr_cb[code]; 4517 } 4518 4519 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) 4520 { 4521 unsigned int i; 4522 u64 *prog; 4523 int err; 4524 4525 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), 4526 GFP_KERNEL); 4527 if (!prog) 4528 return ERR_PTR(-ENOMEM); 4529 4530 for (i = 0; i < nfp_prog->prog_len; i++) { 4531 enum nfp_relo_type special; 4532 u32 val; 4533 u16 off; 4534 4535 special = FIELD_GET(OP_RELO_TYPE, prog[i]); 4536 switch (special) { 4537 case RELO_NONE: 4538 continue; 4539 case RELO_BR_REL: 4540 br_add_offset(&prog[i], bv->start_off); 4541 break; 4542 case RELO_BR_GO_OUT: 4543 br_set_offset(&prog[i], 4544 nfp_prog->tgt_out + bv->start_off); 4545 break; 4546 case RELO_BR_GO_ABORT: 4547 br_set_offset(&prog[i], 4548 nfp_prog->tgt_abort + bv->start_off); 4549 break; 4550 case RELO_BR_GO_CALL_PUSH_REGS: 4551 if (!nfp_prog->tgt_call_push_regs) { 4552 pr_err("BUG: failed to detect subprogram registers needs\n"); 4553 err = -EINVAL; 4554 goto err_free_prog; 4555 } 4556 off = nfp_prog->tgt_call_push_regs + bv->start_off; 4557 br_set_offset(&prog[i], off); 4558 break; 4559 case RELO_BR_GO_CALL_POP_REGS: 4560 if (!nfp_prog->tgt_call_pop_regs) { 4561 pr_err("BUG: failed to detect subprogram registers needs\n"); 4562 err = -EINVAL; 4563 goto err_free_prog; 4564 } 4565 off = nfp_prog->tgt_call_pop_regs + bv->start_off; 4566 br_set_offset(&prog[i], off); 4567 break; 4568 case RELO_BR_NEXT_PKT: 4569 br_set_offset(&prog[i], bv->tgt_done); 4570 break; 4571 case RELO_BR_HELPER: 4572 val = br_get_offset(prog[i]); 4573 val -= BR_OFF_RELO; 4574 switch (val) { 4575 case BPF_FUNC_map_lookup_elem: 4576 val = nfp_prog->bpf->helpers.map_lookup; 4577 break; 4578 case BPF_FUNC_map_update_elem: 4579 val = nfp_prog->bpf->helpers.map_update; 4580 break; 4581 case BPF_FUNC_map_delete_elem: 4582 val = nfp_prog->bpf->helpers.map_delete; 4583 break; 4584 case BPF_FUNC_perf_event_output: 4585 val = nfp_prog->bpf->helpers.perf_event_output; 4586 break; 4587 default: 4588 pr_err("relocation of unknown helper %d\n", 4589 val); 4590 err = -EINVAL; 4591 goto err_free_prog; 4592 } 4593 br_set_offset(&prog[i], val); 4594 break; 4595 case RELO_IMMED_REL: 4596 immed_add_value(&prog[i], bv->start_off); 4597 break; 4598 } 4599 4600 prog[i] &= ~OP_RELO_TYPE; 4601 } 4602 4603 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); 4604 if (err) 4605 goto err_free_prog; 4606 4607 return prog; 4608 4609 err_free_prog: 4610 kfree(prog); 4611 return ERR_PTR(err); 4612 } 4613