1 /* 2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 3 * 4 * This software is dual licensed under the GNU General License Version 2, 5 * June 1991 as shown in the file COPYING in the top-level directory of this 6 * source tree or the BSD 2-Clause License provided below. You have the 7 * option to license this software under the complete terms of either license. 8 * 9 * The BSD 2-Clause License: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * 1. Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "NFP net bpf: " fmt 35 36 #include <linux/bug.h> 37 #include <linux/kernel.h> 38 #include <linux/bpf.h> 39 #include <linux/filter.h> 40 #include <linux/pkt_cls.h> 41 #include <linux/unistd.h> 42 43 #include "main.h" 44 #include "../nfp_asm.h" 45 46 /* --- NFP prog --- */ 47 /* Foreach "multiple" entries macros provide pos and next<n> pointers. 48 * It's safe to modify the next pointers (but not pos). 49 */ 50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ 51 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 52 next = list_next_entry(pos, l); \ 53 &(nfp_prog)->insns != &pos->l && \ 54 &(nfp_prog)->insns != &next->l; \ 55 pos = nfp_meta_next(pos), \ 56 next = nfp_meta_next(pos)) 57 58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ 59 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 60 next = list_next_entry(pos, l), \ 61 next2 = list_next_entry(next, l); \ 62 &(nfp_prog)->insns != &pos->l && \ 63 &(nfp_prog)->insns != &next->l && \ 64 &(nfp_prog)->insns != &next2->l; \ 65 pos = nfp_meta_next(pos), \ 66 next = nfp_meta_next(pos), \ 67 next2 = nfp_meta_next(next)) 68 69 static bool 70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 71 { 72 return meta->l.prev != &nfp_prog->insns; 73 } 74 75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) 76 { 77 if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { 78 nfp_prog->error = -ENOSPC; 79 return; 80 } 81 82 nfp_prog->prog[nfp_prog->prog_len] = insn; 83 nfp_prog->prog_len++; 84 } 85 86 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) 87 { 88 return nfp_prog->start_off + nfp_prog->prog_len; 89 } 90 91 static bool 92 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) 93 { 94 /* If there is a recorded error we may have dropped instructions; 95 * that doesn't have to be due to translator bug, and the translation 96 * will fail anyway, so just return OK. 97 */ 98 if (nfp_prog->error) 99 return true; 100 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); 101 } 102 103 static unsigned int 104 nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) 105 { 106 return offset - nfp_prog->start_off; 107 } 108 109 /* --- Emitters --- */ 110 static void 111 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, 112 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir) 113 { 114 enum cmd_ctx_swap ctx; 115 u64 insn; 116 117 if (sync) 118 ctx = CMD_CTX_SWAP; 119 else 120 ctx = CMD_CTX_NO_SWAP; 121 122 insn = FIELD_PREP(OP_CMD_A_SRC, areg) | 123 FIELD_PREP(OP_CMD_CTX, ctx) | 124 FIELD_PREP(OP_CMD_B_SRC, breg) | 125 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | 126 FIELD_PREP(OP_CMD_XFER, xfer) | 127 FIELD_PREP(OP_CMD_CNT, size) | 128 FIELD_PREP(OP_CMD_SIG, sync) | 129 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | 130 FIELD_PREP(OP_CMD_INDIR, indir) | 131 FIELD_PREP(OP_CMD_MODE, mode); 132 133 nfp_prog_push(nfp_prog, insn); 134 } 135 136 static void 137 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 138 swreg lreg, swreg rreg, u8 size, bool sync, bool indir) 139 { 140 struct nfp_insn_re_regs reg; 141 int err; 142 143 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); 144 if (err) { 145 nfp_prog->error = err; 146 return; 147 } 148 if (reg.swap) { 149 pr_err("cmd can't swap arguments\n"); 150 nfp_prog->error = -EFAULT; 151 return; 152 } 153 if (reg.dst_lmextn || reg.src_lmextn) { 154 pr_err("cmd can't use LMextn\n"); 155 nfp_prog->error = -EFAULT; 156 return; 157 } 158 159 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync, 160 indir); 161 } 162 163 static void 164 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 165 swreg lreg, swreg rreg, u8 size, bool sync) 166 { 167 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false); 168 } 169 170 static void 171 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 172 swreg lreg, swreg rreg, u8 size, bool sync) 173 { 174 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true); 175 } 176 177 static void 178 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, 179 enum br_ctx_signal_state css, u16 addr, u8 defer) 180 { 181 u16 addr_lo, addr_hi; 182 u64 insn; 183 184 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); 185 addr_hi = addr != addr_lo; 186 187 insn = OP_BR_BASE | 188 FIELD_PREP(OP_BR_MASK, mask) | 189 FIELD_PREP(OP_BR_EV_PIP, ev_pip) | 190 FIELD_PREP(OP_BR_CSS, css) | 191 FIELD_PREP(OP_BR_DEFBR, defer) | 192 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | 193 FIELD_PREP(OP_BR_ADDR_HI, addr_hi); 194 195 nfp_prog_push(nfp_prog, insn); 196 } 197 198 static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) 199 { 200 if (defer > 2) { 201 pr_err("BUG: branch defer out of bounds %d\n", defer); 202 nfp_prog->error = -EFAULT; 203 return; 204 } 205 __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); 206 } 207 208 static void 209 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) 210 { 211 __emit_br(nfp_prog, mask, 212 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, 213 BR_CSS_NONE, addr, defer); 214 } 215 216 static void 217 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, 218 enum immed_width width, bool invert, 219 enum immed_shift shift, bool wr_both, 220 bool dst_lmextn, bool src_lmextn) 221 { 222 u64 insn; 223 224 insn = OP_IMMED_BASE | 225 FIELD_PREP(OP_IMMED_A_SRC, areg) | 226 FIELD_PREP(OP_IMMED_B_SRC, breg) | 227 FIELD_PREP(OP_IMMED_IMM, imm_hi) | 228 FIELD_PREP(OP_IMMED_WIDTH, width) | 229 FIELD_PREP(OP_IMMED_INV, invert) | 230 FIELD_PREP(OP_IMMED_SHIFT, shift) | 231 FIELD_PREP(OP_IMMED_WR_AB, wr_both) | 232 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | 233 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); 234 235 nfp_prog_push(nfp_prog, insn); 236 } 237 238 static void 239 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, 240 enum immed_width width, bool invert, enum immed_shift shift) 241 { 242 struct nfp_insn_ur_regs reg; 243 int err; 244 245 if (swreg_type(dst) == NN_REG_IMM) { 246 nfp_prog->error = -EFAULT; 247 return; 248 } 249 250 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); 251 if (err) { 252 nfp_prog->error = err; 253 return; 254 } 255 256 /* Use reg.dst when destination is No-Dest. */ 257 __emit_immed(nfp_prog, 258 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, 259 reg.breg, imm >> 8, width, invert, shift, 260 reg.wr_both, reg.dst_lmextn, reg.src_lmextn); 261 } 262 263 static void 264 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 265 enum shf_sc sc, u8 shift, 266 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, 267 bool dst_lmextn, bool src_lmextn) 268 { 269 u64 insn; 270 271 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { 272 nfp_prog->error = -EFAULT; 273 return; 274 } 275 276 if (sc == SHF_SC_L_SHF) 277 shift = 32 - shift; 278 279 insn = OP_SHF_BASE | 280 FIELD_PREP(OP_SHF_A_SRC, areg) | 281 FIELD_PREP(OP_SHF_SC, sc) | 282 FIELD_PREP(OP_SHF_B_SRC, breg) | 283 FIELD_PREP(OP_SHF_I8, i8) | 284 FIELD_PREP(OP_SHF_SW, sw) | 285 FIELD_PREP(OP_SHF_DST, dst) | 286 FIELD_PREP(OP_SHF_SHIFT, shift) | 287 FIELD_PREP(OP_SHF_OP, op) | 288 FIELD_PREP(OP_SHF_DST_AB, dst_ab) | 289 FIELD_PREP(OP_SHF_WR_AB, wr_both) | 290 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | 291 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); 292 293 nfp_prog_push(nfp_prog, insn); 294 } 295 296 static void 297 emit_shf(struct nfp_prog *nfp_prog, swreg dst, 298 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) 299 { 300 struct nfp_insn_re_regs reg; 301 int err; 302 303 err = swreg_to_restricted(dst, lreg, rreg, ®, true); 304 if (err) { 305 nfp_prog->error = err; 306 return; 307 } 308 309 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, 310 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, 311 reg.dst_lmextn, reg.src_lmextn); 312 } 313 314 static void 315 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 316 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, 317 bool dst_lmextn, bool src_lmextn) 318 { 319 u64 insn; 320 321 insn = OP_ALU_BASE | 322 FIELD_PREP(OP_ALU_A_SRC, areg) | 323 FIELD_PREP(OP_ALU_B_SRC, breg) | 324 FIELD_PREP(OP_ALU_DST, dst) | 325 FIELD_PREP(OP_ALU_SW, swap) | 326 FIELD_PREP(OP_ALU_OP, op) | 327 FIELD_PREP(OP_ALU_DST_AB, dst_ab) | 328 FIELD_PREP(OP_ALU_WR_AB, wr_both) | 329 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | 330 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); 331 332 nfp_prog_push(nfp_prog, insn); 333 } 334 335 static void 336 emit_alu(struct nfp_prog *nfp_prog, swreg dst, 337 swreg lreg, enum alu_op op, swreg rreg) 338 { 339 struct nfp_insn_ur_regs reg; 340 int err; 341 342 err = swreg_to_unrestricted(dst, lreg, rreg, ®); 343 if (err) { 344 nfp_prog->error = err; 345 return; 346 } 347 348 __emit_alu(nfp_prog, reg.dst, reg.dst_ab, 349 reg.areg, op, reg.breg, reg.swap, reg.wr_both, 350 reg.dst_lmextn, reg.src_lmextn); 351 } 352 353 static void 354 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, 355 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, 356 bool zero, bool swap, bool wr_both, 357 bool dst_lmextn, bool src_lmextn) 358 { 359 u64 insn; 360 361 insn = OP_LDF_BASE | 362 FIELD_PREP(OP_LDF_A_SRC, areg) | 363 FIELD_PREP(OP_LDF_SC, sc) | 364 FIELD_PREP(OP_LDF_B_SRC, breg) | 365 FIELD_PREP(OP_LDF_I8, imm8) | 366 FIELD_PREP(OP_LDF_SW, swap) | 367 FIELD_PREP(OP_LDF_ZF, zero) | 368 FIELD_PREP(OP_LDF_BMASK, bmask) | 369 FIELD_PREP(OP_LDF_SHF, shift) | 370 FIELD_PREP(OP_LDF_WR_AB, wr_both) | 371 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | 372 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); 373 374 nfp_prog_push(nfp_prog, insn); 375 } 376 377 static void 378 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 379 enum shf_sc sc, u8 shift, bool zero) 380 { 381 struct nfp_insn_re_regs reg; 382 int err; 383 384 /* Note: ld_field is special as it uses one of the src regs as dst */ 385 err = swreg_to_restricted(dst, dst, src, ®, true); 386 if (err) { 387 nfp_prog->error = err; 388 return; 389 } 390 391 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, 392 reg.i8, zero, reg.swap, reg.wr_both, 393 reg.dst_lmextn, reg.src_lmextn); 394 } 395 396 static void 397 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 398 enum shf_sc sc, u8 shift) 399 { 400 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); 401 } 402 403 static void 404 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, 405 bool dst_lmextn, bool src_lmextn) 406 { 407 u64 insn; 408 409 insn = OP_LCSR_BASE | 410 FIELD_PREP(OP_LCSR_A_SRC, areg) | 411 FIELD_PREP(OP_LCSR_B_SRC, breg) | 412 FIELD_PREP(OP_LCSR_WRITE, wr) | 413 FIELD_PREP(OP_LCSR_ADDR, addr) | 414 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | 415 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); 416 417 nfp_prog_push(nfp_prog, insn); 418 } 419 420 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) 421 { 422 struct nfp_insn_ur_regs reg; 423 int err; 424 425 /* This instruction takes immeds instead of reg_none() for the ignored 426 * operand, but we can't encode 2 immeds in one instr with our normal 427 * swreg infra so if param is an immed, we encode as reg_none() and 428 * copy the immed to both operands. 429 */ 430 if (swreg_type(src) == NN_REG_IMM) { 431 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); 432 reg.breg = reg.areg; 433 } else { 434 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); 435 } 436 if (err) { 437 nfp_prog->error = err; 438 return; 439 } 440 441 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, 442 false, reg.src_lmextn); 443 } 444 445 static void emit_nop(struct nfp_prog *nfp_prog) 446 { 447 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); 448 } 449 450 /* --- Wrappers --- */ 451 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) 452 { 453 if (!(imm & 0xffff0000)) { 454 *val = imm; 455 *shift = IMMED_SHIFT_0B; 456 } else if (!(imm & 0xff0000ff)) { 457 *val = imm >> 8; 458 *shift = IMMED_SHIFT_1B; 459 } else if (!(imm & 0x0000ffff)) { 460 *val = imm >> 16; 461 *shift = IMMED_SHIFT_2B; 462 } else { 463 return false; 464 } 465 466 return true; 467 } 468 469 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) 470 { 471 enum immed_shift shift; 472 u16 val; 473 474 if (pack_immed(imm, &val, &shift)) { 475 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); 476 } else if (pack_immed(~imm, &val, &shift)) { 477 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); 478 } else { 479 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, 480 false, IMMED_SHIFT_0B); 481 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, 482 false, IMMED_SHIFT_2B); 483 } 484 } 485 486 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) 487 * If the @imm is small enough encode it directly in operand and return 488 * otherwise load @imm to a spare register and return its encoding. 489 */ 490 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 491 { 492 if (FIELD_FIT(UR_REG_IMM_MAX, imm)) 493 return reg_imm(imm); 494 495 wrp_immed(nfp_prog, tmp_reg, imm); 496 return tmp_reg; 497 } 498 499 /* re_load_imm_any() - encode immediate or use tmp register (restricted) 500 * If the @imm is small enough encode it directly in operand and return 501 * otherwise load @imm to a spare register and return its encoding. 502 */ 503 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 504 { 505 if (FIELD_FIT(RE_REG_IMM_MAX, imm)) 506 return reg_imm(imm); 507 508 wrp_immed(nfp_prog, tmp_reg, imm); 509 return tmp_reg; 510 } 511 512 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) 513 { 514 while (count--) 515 emit_nop(nfp_prog); 516 } 517 518 static void 519 wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, 520 enum br_special special) 521 { 522 emit_br(nfp_prog, mask, 0, 0); 523 524 nfp_prog->prog[nfp_prog->prog_len - 1] |= 525 FIELD_PREP(OP_BR_SPECIAL, special); 526 } 527 528 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) 529 { 530 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); 531 } 532 533 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) 534 { 535 wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); 536 } 537 538 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the 539 * result to @dst from low end. 540 */ 541 static void 542 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, 543 u8 offset) 544 { 545 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; 546 u8 mask = (1 << field_len) - 1; 547 548 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); 549 } 550 551 /* NFP has Command Push Pull bus which supports bluk memory operations. */ 552 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 553 { 554 bool descending_seq = meta->ldst_gather_len < 0; 555 s16 len = abs(meta->ldst_gather_len); 556 swreg src_base, off; 557 unsigned int i; 558 u8 xfer_num; 559 560 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 561 src_base = reg_a(meta->insn.src_reg * 2); 562 xfer_num = round_up(len, 4) / 4; 563 564 /* Setup PREV_ALU fields to override memory read length. */ 565 if (len > 32) 566 wrp_immed(nfp_prog, reg_none(), 567 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 568 569 /* Memory read from source addr into transfer-in registers. */ 570 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, 571 off, xfer_num - 1, true, len > 32); 572 573 /* Move from transfer-in to transfer-out. */ 574 for (i = 0; i < xfer_num; i++) 575 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); 576 577 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); 578 579 if (len <= 8) { 580 /* Use single direct_ref write8. */ 581 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 582 reg_a(meta->paired_st->dst_reg * 2), off, len - 1, 583 true); 584 } else if (len <= 32 && IS_ALIGNED(len, 4)) { 585 /* Use single direct_ref write32. */ 586 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 587 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, 588 true); 589 } else if (len <= 32) { 590 /* Use single indirect_ref write8. */ 591 wrp_immed(nfp_prog, reg_none(), 592 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); 593 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 594 reg_a(meta->paired_st->dst_reg * 2), off, 595 len - 1, true); 596 } else if (IS_ALIGNED(len, 4)) { 597 /* Use single indirect_ref write32. */ 598 wrp_immed(nfp_prog, reg_none(), 599 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 600 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 601 reg_a(meta->paired_st->dst_reg * 2), off, 602 xfer_num - 1, true); 603 } else if (len <= 40) { 604 /* Use one direct_ref write32 to write the first 32-bytes, then 605 * another direct_ref write8 to write the remaining bytes. 606 */ 607 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 608 reg_a(meta->paired_st->dst_reg * 2), off, 7, 609 true); 610 611 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, 612 imm_b(nfp_prog)); 613 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, 614 reg_a(meta->paired_st->dst_reg * 2), off, len - 33, 615 true); 616 } else { 617 /* Use one indirect_ref write32 to write 4-bytes aligned length, 618 * then another direct_ref write8 to write the remaining bytes. 619 */ 620 u8 new_off; 621 622 wrp_immed(nfp_prog, reg_none(), 623 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); 624 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 625 reg_a(meta->paired_st->dst_reg * 2), off, 626 xfer_num - 2, true); 627 new_off = meta->paired_st->off + (xfer_num - 1) * 4; 628 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); 629 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 630 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, 631 (len & 0x3) - 1, true); 632 } 633 634 /* TODO: The following extra load is to make sure data flow be identical 635 * before and after we do memory copy optimization. 636 * 637 * The load destination register is not guaranteed to be dead, so we 638 * need to make sure it is loaded with the value the same as before 639 * this transformation. 640 * 641 * These extra loads could be removed once we have accurate register 642 * usage information. 643 */ 644 if (descending_seq) 645 xfer_num = 0; 646 else if (BPF_SIZE(meta->insn.code) != BPF_DW) 647 xfer_num = xfer_num - 1; 648 else 649 xfer_num = xfer_num - 2; 650 651 switch (BPF_SIZE(meta->insn.code)) { 652 case BPF_B: 653 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 654 reg_xfer(xfer_num), 1, 655 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); 656 break; 657 case BPF_H: 658 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 659 reg_xfer(xfer_num), 2, (len & 3) ^ 2); 660 break; 661 case BPF_W: 662 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 663 reg_xfer(0)); 664 break; 665 case BPF_DW: 666 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 667 reg_xfer(xfer_num)); 668 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 669 reg_xfer(xfer_num + 1)); 670 break; 671 } 672 673 if (BPF_SIZE(meta->insn.code) != BPF_DW) 674 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 675 676 return 0; 677 } 678 679 static int 680 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) 681 { 682 unsigned int i; 683 u16 shift, sz; 684 685 /* We load the value from the address indicated in @offset and then 686 * shift out the data we don't need. Note: this is big endian! 687 */ 688 sz = max(size, 4); 689 shift = size < 4 ? 4 - size : 0; 690 691 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, 692 pptr_reg(nfp_prog), offset, sz - 1, true); 693 694 i = 0; 695 if (shift) 696 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, 697 reg_xfer(0), SHF_SC_R_SHF, shift * 8); 698 else 699 for (; i * 4 < size; i++) 700 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 701 702 if (i < 2) 703 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 704 705 return 0; 706 } 707 708 static int 709 data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 710 u8 dst_gpr, int size) 711 { 712 unsigned int i; 713 u8 mask, sz; 714 715 /* We load the value from the address indicated in @offset and then 716 * mask out the data we don't need. Note: this is little endian! 717 */ 718 sz = max(size, 4); 719 mask = size < 4 ? GENMASK(size - 1, 0) : 0; 720 721 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, 722 reg_a(src_gpr), offset, sz / 4 - 1, true); 723 724 i = 0; 725 if (mask) 726 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, 727 reg_xfer(0), SHF_SC_NONE, 0, true); 728 else 729 for (; i * 4 < size; i++) 730 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 731 732 if (i < 2) 733 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 734 735 return 0; 736 } 737 738 static int 739 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) 740 { 741 swreg tmp_reg; 742 743 /* Calculate the true offset (src_reg + imm) */ 744 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 745 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); 746 747 /* Check packet length (size guaranteed to fit b/c it's u8) */ 748 emit_alu(nfp_prog, imm_a(nfp_prog), 749 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); 750 emit_alu(nfp_prog, reg_none(), 751 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); 752 wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); 753 754 /* Load data */ 755 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); 756 } 757 758 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) 759 { 760 swreg tmp_reg; 761 762 /* Check packet length */ 763 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); 764 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); 765 wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); 766 767 /* Load data */ 768 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 769 return data_ld(nfp_prog, tmp_reg, 0, size); 770 } 771 772 static int 773 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 774 u8 src_gpr, u8 size) 775 { 776 unsigned int i; 777 778 for (i = 0; i * 4 < size; i++) 779 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); 780 781 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 782 reg_a(dst_gpr), offset, size - 1, true); 783 784 return 0; 785 } 786 787 static int 788 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 789 u64 imm, u8 size) 790 { 791 wrp_immed(nfp_prog, reg_xfer(0), imm); 792 if (size == 8) 793 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); 794 795 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 796 reg_a(dst_gpr), offset, size - 1, true); 797 798 return 0; 799 } 800 801 typedef int 802 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, 803 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 804 bool needs_inc); 805 806 static int 807 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, 808 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 809 bool needs_inc) 810 { 811 bool should_inc = needs_inc && new_gpr && !last; 812 u32 idx, src_byte; 813 enum shf_sc sc; 814 swreg reg; 815 int shf; 816 u8 mask; 817 818 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) 819 return -EOPNOTSUPP; 820 821 idx = off / 4; 822 823 /* Move the entire word */ 824 if (size == 4) { 825 wrp_mov(nfp_prog, reg_both(dst), 826 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); 827 return 0; 828 } 829 830 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 831 return -EOPNOTSUPP; 832 833 src_byte = off % 4; 834 835 mask = (1 << size) - 1; 836 mask <<= dst_byte; 837 838 if (WARN_ON_ONCE(mask > 0xf)) 839 return -EOPNOTSUPP; 840 841 shf = abs(src_byte - dst_byte) * 8; 842 if (src_byte == dst_byte) { 843 sc = SHF_SC_NONE; 844 } else if (src_byte < dst_byte) { 845 shf = 32 - shf; 846 sc = SHF_SC_L_SHF; 847 } else { 848 sc = SHF_SC_R_SHF; 849 } 850 851 /* ld_field can address fewer indexes, if offset too large do RMW. 852 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 853 */ 854 if (idx <= RE_REG_LM_IDX_MAX) { 855 reg = reg_lm(lm3 ? 3 : 0, idx); 856 } else { 857 reg = imm_a(nfp_prog); 858 /* If it's not the first part of the load and we start a new GPR 859 * that means we are loading a second part of the LMEM word into 860 * a new GPR. IOW we've already looked that LMEM word and 861 * therefore it has been loaded into imm_a(). 862 */ 863 if (first || !new_gpr) 864 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 865 } 866 867 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); 868 869 if (should_inc) 870 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 871 872 return 0; 873 } 874 875 static int 876 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, 877 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 878 bool needs_inc) 879 { 880 bool should_inc = needs_inc && new_gpr && !last; 881 u32 idx, dst_byte; 882 enum shf_sc sc; 883 swreg reg; 884 int shf; 885 u8 mask; 886 887 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) 888 return -EOPNOTSUPP; 889 890 idx = off / 4; 891 892 /* Move the entire word */ 893 if (size == 4) { 894 wrp_mov(nfp_prog, 895 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), 896 reg_b(src)); 897 return 0; 898 } 899 900 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 901 return -EOPNOTSUPP; 902 903 dst_byte = off % 4; 904 905 mask = (1 << size) - 1; 906 mask <<= dst_byte; 907 908 if (WARN_ON_ONCE(mask > 0xf)) 909 return -EOPNOTSUPP; 910 911 shf = abs(src_byte - dst_byte) * 8; 912 if (src_byte == dst_byte) { 913 sc = SHF_SC_NONE; 914 } else if (src_byte < dst_byte) { 915 shf = 32 - shf; 916 sc = SHF_SC_L_SHF; 917 } else { 918 sc = SHF_SC_R_SHF; 919 } 920 921 /* ld_field can address fewer indexes, if offset too large do RMW. 922 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 923 */ 924 if (idx <= RE_REG_LM_IDX_MAX) { 925 reg = reg_lm(lm3 ? 3 : 0, idx); 926 } else { 927 reg = imm_a(nfp_prog); 928 /* Only first and last LMEM locations are going to need RMW, 929 * the middle location will be overwritten fully. 930 */ 931 if (first || last) 932 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 933 } 934 935 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); 936 937 if (new_gpr || last) { 938 if (idx > RE_REG_LM_IDX_MAX) 939 wrp_mov(nfp_prog, reg_lm(0, idx), reg); 940 if (should_inc) 941 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 942 } 943 944 return 0; 945 } 946 947 static int 948 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 949 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, 950 bool clr_gpr, lmem_step step) 951 { 952 s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; 953 bool first = true, last; 954 bool needs_inc = false; 955 swreg stack_off_reg; 956 u8 prev_gpr = 255; 957 u32 gpr_byte = 0; 958 bool lm3 = true; 959 int ret; 960 961 if (meta->ptr_not_const) { 962 /* Use of the last encountered ptr_off is OK, they all have 963 * the same alignment. Depend on low bits of value being 964 * discarded when written to LMaddr register. 965 */ 966 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, 967 stack_imm(nfp_prog)); 968 969 emit_alu(nfp_prog, imm_b(nfp_prog), 970 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); 971 972 needs_inc = true; 973 } else if (off + size <= 64) { 974 /* We can reach bottom 64B with LMaddr0 */ 975 lm3 = false; 976 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { 977 /* We have to set up a new pointer. If we know the offset 978 * and the entire access falls into a single 32 byte aligned 979 * window we won't have to increment the LM pointer. 980 * The 32 byte alignment is imporant because offset is ORed in 981 * not added when doing *l$indexN[off]. 982 */ 983 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), 984 stack_imm(nfp_prog)); 985 emit_alu(nfp_prog, imm_b(nfp_prog), 986 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 987 988 off %= 32; 989 } else { 990 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), 991 stack_imm(nfp_prog)); 992 993 emit_alu(nfp_prog, imm_b(nfp_prog), 994 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 995 996 needs_inc = true; 997 } 998 if (lm3) { 999 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); 1000 /* For size < 4 one slot will be filled by zeroing of upper. */ 1001 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); 1002 } 1003 1004 if (clr_gpr && size < 8) 1005 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1006 1007 while (size) { 1008 u32 slice_end; 1009 u8 slice_size; 1010 1011 slice_size = min(size, 4 - gpr_byte); 1012 slice_end = min(off + slice_size, round_up(off + 1, 4)); 1013 slice_size = slice_end - off; 1014 1015 last = slice_size == size; 1016 1017 if (needs_inc) 1018 off %= 4; 1019 1020 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, 1021 first, gpr != prev_gpr, last, lm3, needs_inc); 1022 if (ret) 1023 return ret; 1024 1025 prev_gpr = gpr; 1026 first = false; 1027 1028 gpr_byte += slice_size; 1029 if (gpr_byte >= 4) { 1030 gpr_byte -= 4; 1031 gpr++; 1032 } 1033 1034 size -= slice_size; 1035 off += slice_size; 1036 } 1037 1038 return 0; 1039 } 1040 1041 static void 1042 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) 1043 { 1044 swreg tmp_reg; 1045 1046 if (alu_op == ALU_OP_AND) { 1047 if (!imm) 1048 wrp_immed(nfp_prog, reg_both(dst), 0); 1049 if (!imm || !~imm) 1050 return; 1051 } 1052 if (alu_op == ALU_OP_OR) { 1053 if (!~imm) 1054 wrp_immed(nfp_prog, reg_both(dst), ~0U); 1055 if (!imm || !~imm) 1056 return; 1057 } 1058 if (alu_op == ALU_OP_XOR) { 1059 if (!~imm) 1060 emit_alu(nfp_prog, reg_both(dst), reg_none(), 1061 ALU_OP_NOT, reg_b(dst)); 1062 if (!imm || !~imm) 1063 return; 1064 } 1065 1066 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); 1067 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); 1068 } 1069 1070 static int 1071 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1072 enum alu_op alu_op, bool skip) 1073 { 1074 const struct bpf_insn *insn = &meta->insn; 1075 u64 imm = insn->imm; /* sign extend */ 1076 1077 if (skip) { 1078 meta->skip = true; 1079 return 0; 1080 } 1081 1082 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); 1083 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); 1084 1085 return 0; 1086 } 1087 1088 static int 1089 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1090 enum alu_op alu_op) 1091 { 1092 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1093 1094 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1095 emit_alu(nfp_prog, reg_both(dst + 1), 1096 reg_a(dst + 1), alu_op, reg_b(src + 1)); 1097 1098 return 0; 1099 } 1100 1101 static int 1102 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1103 enum alu_op alu_op, bool skip) 1104 { 1105 const struct bpf_insn *insn = &meta->insn; 1106 1107 if (skip) { 1108 meta->skip = true; 1109 return 0; 1110 } 1111 1112 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); 1113 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1114 1115 return 0; 1116 } 1117 1118 static int 1119 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1120 enum alu_op alu_op) 1121 { 1122 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1123 1124 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1125 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1126 1127 return 0; 1128 } 1129 1130 static void 1131 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, 1132 enum br_mask br_mask, u16 off) 1133 { 1134 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); 1135 emit_br(nfp_prog, br_mask, off, 0); 1136 } 1137 1138 static int 1139 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1140 enum alu_op alu_op, enum br_mask br_mask) 1141 { 1142 const struct bpf_insn *insn = &meta->insn; 1143 1144 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, 1145 insn->src_reg * 2, br_mask, insn->off); 1146 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, 1147 insn->src_reg * 2 + 1, br_mask, insn->off); 1148 1149 return 0; 1150 } 1151 1152 static int 1153 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1154 enum br_mask br_mask, bool swap) 1155 { 1156 const struct bpf_insn *insn = &meta->insn; 1157 u64 imm = insn->imm; /* sign extend */ 1158 u8 reg = insn->dst_reg * 2; 1159 swreg tmp_reg; 1160 1161 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1162 if (!swap) 1163 emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); 1164 else 1165 emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); 1166 1167 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1168 if (!swap) 1169 emit_alu(nfp_prog, reg_none(), 1170 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); 1171 else 1172 emit_alu(nfp_prog, reg_none(), 1173 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); 1174 1175 emit_br(nfp_prog, br_mask, insn->off, 0); 1176 1177 return 0; 1178 } 1179 1180 static int 1181 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1182 enum br_mask br_mask, bool swap) 1183 { 1184 const struct bpf_insn *insn = &meta->insn; 1185 u8 areg, breg; 1186 1187 areg = insn->dst_reg * 2; 1188 breg = insn->src_reg * 2; 1189 1190 if (swap) { 1191 areg ^= breg; 1192 breg ^= areg; 1193 areg ^= breg; 1194 } 1195 1196 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); 1197 emit_alu(nfp_prog, reg_none(), 1198 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); 1199 emit_br(nfp_prog, br_mask, insn->off, 0); 1200 1201 return 0; 1202 } 1203 1204 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) 1205 { 1206 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, 1207 SHF_SC_R_ROT, 8); 1208 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), 1209 SHF_SC_R_ROT, 16); 1210 } 1211 1212 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1213 { 1214 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); 1215 struct nfp_bpf_cap_adjust_head *adjust_head; 1216 u32 ret_einval, end; 1217 1218 adjust_head = &nfp_prog->bpf->adjust_head; 1219 1220 ret_einval = nfp_prog_current_offset(nfp_prog) + 14; 1221 end = ret_einval + 2; 1222 1223 /* We need to use a temp because offset is just a part of the pkt ptr */ 1224 emit_alu(nfp_prog, tmp, 1225 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); 1226 1227 /* Validate result will fit within FW datapath constraints */ 1228 emit_alu(nfp_prog, reg_none(), 1229 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); 1230 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1231 emit_alu(nfp_prog, reg_none(), 1232 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); 1233 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1234 1235 /* Validate the length is at least ETH_HLEN */ 1236 emit_alu(nfp_prog, tmp_len, 1237 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1238 emit_alu(nfp_prog, reg_none(), 1239 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); 1240 emit_br(nfp_prog, BR_BMI, ret_einval, 0); 1241 1242 /* Load the ret code */ 1243 wrp_immed(nfp_prog, reg_both(0), 0); 1244 wrp_immed(nfp_prog, reg_both(1), 0); 1245 1246 /* Modify the packet metadata */ 1247 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); 1248 1249 /* Skip over the -EINVAL ret code (defer 2) */ 1250 emit_br_def(nfp_prog, end, 2); 1251 1252 emit_alu(nfp_prog, plen_reg(nfp_prog), 1253 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1254 emit_alu(nfp_prog, pv_len(nfp_prog), 1255 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1256 1257 /* return -EINVAL target */ 1258 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) 1259 return -EINVAL; 1260 1261 wrp_immed(nfp_prog, reg_both(0), -22); 1262 wrp_immed(nfp_prog, reg_both(1), ~0); 1263 1264 if (!nfp_prog_confirm_current_offset(nfp_prog, end)) 1265 return -EINVAL; 1266 1267 return 0; 1268 } 1269 1270 /* --- Callbacks --- */ 1271 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1272 { 1273 const struct bpf_insn *insn = &meta->insn; 1274 u8 dst = insn->dst_reg * 2; 1275 u8 src = insn->src_reg * 2; 1276 1277 if (insn->src_reg == BPF_REG_10) { 1278 swreg stack_depth_reg; 1279 1280 stack_depth_reg = ur_load_imm_any(nfp_prog, 1281 nfp_prog->stack_depth, 1282 stack_imm(nfp_prog)); 1283 emit_alu(nfp_prog, reg_both(dst), 1284 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); 1285 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1286 } else { 1287 wrp_reg_mov(nfp_prog, dst, src); 1288 wrp_reg_mov(nfp_prog, dst + 1, src + 1); 1289 } 1290 1291 return 0; 1292 } 1293 1294 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1295 { 1296 u64 imm = meta->insn.imm; /* sign extend */ 1297 1298 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); 1299 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); 1300 1301 return 0; 1302 } 1303 1304 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1305 { 1306 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); 1307 } 1308 1309 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1310 { 1311 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); 1312 } 1313 1314 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1315 { 1316 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); 1317 } 1318 1319 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1320 { 1321 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1322 } 1323 1324 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1325 { 1326 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); 1327 } 1328 1329 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1330 { 1331 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1332 } 1333 1334 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1335 { 1336 const struct bpf_insn *insn = &meta->insn; 1337 1338 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1339 reg_a(insn->dst_reg * 2), ALU_OP_ADD, 1340 reg_b(insn->src_reg * 2)); 1341 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1342 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, 1343 reg_b(insn->src_reg * 2 + 1)); 1344 1345 return 0; 1346 } 1347 1348 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1349 { 1350 const struct bpf_insn *insn = &meta->insn; 1351 u64 imm = insn->imm; /* sign extend */ 1352 1353 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); 1354 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); 1355 1356 return 0; 1357 } 1358 1359 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1360 { 1361 const struct bpf_insn *insn = &meta->insn; 1362 1363 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1364 reg_a(insn->dst_reg * 2), ALU_OP_SUB, 1365 reg_b(insn->src_reg * 2)); 1366 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1367 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, 1368 reg_b(insn->src_reg * 2 + 1)); 1369 1370 return 0; 1371 } 1372 1373 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1374 { 1375 const struct bpf_insn *insn = &meta->insn; 1376 u64 imm = insn->imm; /* sign extend */ 1377 1378 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); 1379 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); 1380 1381 return 0; 1382 } 1383 1384 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1385 { 1386 const struct bpf_insn *insn = &meta->insn; 1387 1388 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), 1389 ALU_OP_SUB, reg_b(insn->dst_reg * 2)); 1390 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), 1391 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); 1392 1393 return 0; 1394 } 1395 1396 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1397 { 1398 const struct bpf_insn *insn = &meta->insn; 1399 u8 dst = insn->dst_reg * 2; 1400 1401 if (insn->imm < 32) { 1402 emit_shf(nfp_prog, reg_both(dst + 1), 1403 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), 1404 SHF_SC_R_DSHF, 32 - insn->imm); 1405 emit_shf(nfp_prog, reg_both(dst), 1406 reg_none(), SHF_OP_NONE, reg_b(dst), 1407 SHF_SC_L_SHF, insn->imm); 1408 } else if (insn->imm == 32) { 1409 wrp_reg_mov(nfp_prog, dst + 1, dst); 1410 wrp_immed(nfp_prog, reg_both(dst), 0); 1411 } else if (insn->imm > 32) { 1412 emit_shf(nfp_prog, reg_both(dst + 1), 1413 reg_none(), SHF_OP_NONE, reg_b(dst), 1414 SHF_SC_L_SHF, insn->imm - 32); 1415 wrp_immed(nfp_prog, reg_both(dst), 0); 1416 } 1417 1418 return 0; 1419 } 1420 1421 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1422 { 1423 const struct bpf_insn *insn = &meta->insn; 1424 u8 dst = insn->dst_reg * 2; 1425 1426 if (insn->imm < 32) { 1427 emit_shf(nfp_prog, reg_both(dst), 1428 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), 1429 SHF_SC_R_DSHF, insn->imm); 1430 emit_shf(nfp_prog, reg_both(dst + 1), 1431 reg_none(), SHF_OP_NONE, reg_b(dst + 1), 1432 SHF_SC_R_SHF, insn->imm); 1433 } else if (insn->imm == 32) { 1434 wrp_reg_mov(nfp_prog, dst, dst + 1); 1435 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1436 } else if (insn->imm > 32) { 1437 emit_shf(nfp_prog, reg_both(dst), 1438 reg_none(), SHF_OP_NONE, reg_b(dst + 1), 1439 SHF_SC_R_SHF, insn->imm - 32); 1440 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1441 } 1442 1443 return 0; 1444 } 1445 1446 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1447 { 1448 const struct bpf_insn *insn = &meta->insn; 1449 1450 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); 1451 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1452 1453 return 0; 1454 } 1455 1456 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1457 { 1458 const struct bpf_insn *insn = &meta->insn; 1459 1460 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); 1461 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1462 1463 return 0; 1464 } 1465 1466 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1467 { 1468 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); 1469 } 1470 1471 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1472 { 1473 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); 1474 } 1475 1476 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1477 { 1478 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); 1479 } 1480 1481 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1482 { 1483 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1484 } 1485 1486 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1487 { 1488 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); 1489 } 1490 1491 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1492 { 1493 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1494 } 1495 1496 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1497 { 1498 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); 1499 } 1500 1501 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1502 { 1503 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); 1504 } 1505 1506 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1507 { 1508 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); 1509 } 1510 1511 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1512 { 1513 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); 1514 } 1515 1516 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1517 { 1518 u8 dst = meta->insn.dst_reg * 2; 1519 1520 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); 1521 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1522 1523 return 0; 1524 } 1525 1526 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1527 { 1528 const struct bpf_insn *insn = &meta->insn; 1529 1530 if (!insn->imm) 1531 return 1; /* TODO: zero shift means indirect */ 1532 1533 emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), 1534 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), 1535 SHF_SC_L_SHF, insn->imm); 1536 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1537 1538 return 0; 1539 } 1540 1541 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1542 { 1543 const struct bpf_insn *insn = &meta->insn; 1544 u8 gpr = insn->dst_reg * 2; 1545 1546 switch (insn->imm) { 1547 case 16: 1548 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), 1549 SHF_SC_R_ROT, 8); 1550 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), 1551 SHF_SC_R_SHF, 16); 1552 1553 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1554 break; 1555 case 32: 1556 wrp_end32(nfp_prog, reg_a(gpr), gpr); 1557 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1558 break; 1559 case 64: 1560 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); 1561 1562 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); 1563 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); 1564 break; 1565 } 1566 1567 return 0; 1568 } 1569 1570 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1571 { 1572 struct nfp_insn_meta *prev = nfp_meta_prev(meta); 1573 u32 imm_lo, imm_hi; 1574 u8 dst; 1575 1576 dst = prev->insn.dst_reg * 2; 1577 imm_lo = prev->insn.imm; 1578 imm_hi = meta->insn.imm; 1579 1580 wrp_immed(nfp_prog, reg_both(dst), imm_lo); 1581 1582 /* mov is always 1 insn, load imm may be two, so try to use mov */ 1583 if (imm_hi == imm_lo) 1584 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); 1585 else 1586 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); 1587 1588 return 0; 1589 } 1590 1591 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1592 { 1593 meta->double_cb = imm_ld8_part2; 1594 return 0; 1595 } 1596 1597 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1598 { 1599 return construct_data_ld(nfp_prog, meta->insn.imm, 1); 1600 } 1601 1602 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1603 { 1604 return construct_data_ld(nfp_prog, meta->insn.imm, 2); 1605 } 1606 1607 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1608 { 1609 return construct_data_ld(nfp_prog, meta->insn.imm, 4); 1610 } 1611 1612 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1613 { 1614 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1615 meta->insn.src_reg * 2, 1); 1616 } 1617 1618 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1619 { 1620 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1621 meta->insn.src_reg * 2, 2); 1622 } 1623 1624 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1625 { 1626 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1627 meta->insn.src_reg * 2, 4); 1628 } 1629 1630 static int 1631 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1632 unsigned int size, unsigned int ptr_off) 1633 { 1634 return mem_op_stack(nfp_prog, meta, size, ptr_off, 1635 meta->insn.dst_reg * 2, meta->insn.src_reg * 2, 1636 true, wrp_lmem_load); 1637 } 1638 1639 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1640 u8 size) 1641 { 1642 swreg dst = reg_both(meta->insn.dst_reg * 2); 1643 1644 switch (meta->insn.off) { 1645 case offsetof(struct __sk_buff, len): 1646 if (size != FIELD_SIZEOF(struct __sk_buff, len)) 1647 return -EOPNOTSUPP; 1648 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); 1649 break; 1650 case offsetof(struct __sk_buff, data): 1651 if (size != FIELD_SIZEOF(struct __sk_buff, data)) 1652 return -EOPNOTSUPP; 1653 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 1654 break; 1655 case offsetof(struct __sk_buff, data_end): 1656 if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) 1657 return -EOPNOTSUPP; 1658 emit_alu(nfp_prog, dst, 1659 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 1660 break; 1661 default: 1662 return -EOPNOTSUPP; 1663 } 1664 1665 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1666 1667 return 0; 1668 } 1669 1670 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1671 u8 size) 1672 { 1673 swreg dst = reg_both(meta->insn.dst_reg * 2); 1674 1675 switch (meta->insn.off) { 1676 case offsetof(struct xdp_md, data): 1677 if (size != FIELD_SIZEOF(struct xdp_md, data)) 1678 return -EOPNOTSUPP; 1679 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 1680 break; 1681 case offsetof(struct xdp_md, data_end): 1682 if (size != FIELD_SIZEOF(struct xdp_md, data_end)) 1683 return -EOPNOTSUPP; 1684 emit_alu(nfp_prog, dst, 1685 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 1686 break; 1687 default: 1688 return -EOPNOTSUPP; 1689 } 1690 1691 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1692 1693 return 0; 1694 } 1695 1696 static int 1697 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1698 unsigned int size) 1699 { 1700 swreg tmp_reg; 1701 1702 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1703 1704 return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg, 1705 meta->insn.dst_reg * 2, size); 1706 } 1707 1708 static int 1709 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1710 unsigned int size) 1711 { 1712 if (meta->ldst_gather_len) 1713 return nfp_cpp_memcpy(nfp_prog, meta); 1714 1715 if (meta->ptr.type == PTR_TO_CTX) { 1716 if (nfp_prog->type == BPF_PROG_TYPE_XDP) 1717 return mem_ldx_xdp(nfp_prog, meta, size); 1718 else 1719 return mem_ldx_skb(nfp_prog, meta, size); 1720 } 1721 1722 if (meta->ptr.type == PTR_TO_PACKET) 1723 return mem_ldx_data(nfp_prog, meta, size); 1724 1725 if (meta->ptr.type == PTR_TO_STACK) 1726 return mem_ldx_stack(nfp_prog, meta, size, 1727 meta->ptr.off + meta->ptr.var_off.value); 1728 1729 return -EOPNOTSUPP; 1730 } 1731 1732 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1733 { 1734 return mem_ldx(nfp_prog, meta, 1); 1735 } 1736 1737 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1738 { 1739 return mem_ldx(nfp_prog, meta, 2); 1740 } 1741 1742 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1743 { 1744 return mem_ldx(nfp_prog, meta, 4); 1745 } 1746 1747 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1748 { 1749 return mem_ldx(nfp_prog, meta, 8); 1750 } 1751 1752 static int 1753 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1754 unsigned int size) 1755 { 1756 u64 imm = meta->insn.imm; /* sign extend */ 1757 swreg off_reg; 1758 1759 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1760 1761 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 1762 imm, size); 1763 } 1764 1765 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1766 unsigned int size) 1767 { 1768 if (meta->ptr.type == PTR_TO_PACKET) 1769 return mem_st_data(nfp_prog, meta, size); 1770 1771 return -EOPNOTSUPP; 1772 } 1773 1774 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1775 { 1776 return mem_st(nfp_prog, meta, 1); 1777 } 1778 1779 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1780 { 1781 return mem_st(nfp_prog, meta, 2); 1782 } 1783 1784 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1785 { 1786 return mem_st(nfp_prog, meta, 4); 1787 } 1788 1789 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1790 { 1791 return mem_st(nfp_prog, meta, 8); 1792 } 1793 1794 static int 1795 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1796 unsigned int size) 1797 { 1798 swreg off_reg; 1799 1800 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1801 1802 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 1803 meta->insn.src_reg * 2, size); 1804 } 1805 1806 static int 1807 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1808 unsigned int size, unsigned int ptr_off) 1809 { 1810 return mem_op_stack(nfp_prog, meta, size, ptr_off, 1811 meta->insn.src_reg * 2, meta->insn.dst_reg * 2, 1812 false, wrp_lmem_store); 1813 } 1814 1815 static int 1816 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1817 unsigned int size) 1818 { 1819 if (meta->ptr.type == PTR_TO_PACKET) 1820 return mem_stx_data(nfp_prog, meta, size); 1821 1822 if (meta->ptr.type == PTR_TO_STACK) 1823 return mem_stx_stack(nfp_prog, meta, size, 1824 meta->ptr.off + meta->ptr.var_off.value); 1825 1826 return -EOPNOTSUPP; 1827 } 1828 1829 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1830 { 1831 return mem_stx(nfp_prog, meta, 1); 1832 } 1833 1834 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1835 { 1836 return mem_stx(nfp_prog, meta, 2); 1837 } 1838 1839 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1840 { 1841 return mem_stx(nfp_prog, meta, 4); 1842 } 1843 1844 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1845 { 1846 return mem_stx(nfp_prog, meta, 8); 1847 } 1848 1849 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1850 { 1851 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); 1852 1853 return 0; 1854 } 1855 1856 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1857 { 1858 const struct bpf_insn *insn = &meta->insn; 1859 u64 imm = insn->imm; /* sign extend */ 1860 swreg or1, or2, tmp_reg; 1861 1862 or1 = reg_a(insn->dst_reg * 2); 1863 or2 = reg_b(insn->dst_reg * 2 + 1); 1864 1865 if (imm & ~0U) { 1866 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1867 emit_alu(nfp_prog, imm_a(nfp_prog), 1868 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 1869 or1 = imm_a(nfp_prog); 1870 } 1871 1872 if (imm >> 32) { 1873 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1874 emit_alu(nfp_prog, imm_b(nfp_prog), 1875 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 1876 or2 = imm_b(nfp_prog); 1877 } 1878 1879 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); 1880 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 1881 1882 return 0; 1883 } 1884 1885 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1886 { 1887 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); 1888 } 1889 1890 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1891 { 1892 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); 1893 } 1894 1895 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1896 { 1897 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); 1898 } 1899 1900 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1901 { 1902 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); 1903 } 1904 1905 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1906 { 1907 const struct bpf_insn *insn = &meta->insn; 1908 u64 imm = insn->imm; /* sign extend */ 1909 swreg tmp_reg; 1910 1911 if (!imm) { 1912 meta->skip = true; 1913 return 0; 1914 } 1915 1916 if (imm & ~0U) { 1917 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1918 emit_alu(nfp_prog, reg_none(), 1919 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); 1920 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1921 } 1922 1923 if (imm >> 32) { 1924 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1925 emit_alu(nfp_prog, reg_none(), 1926 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); 1927 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1928 } 1929 1930 return 0; 1931 } 1932 1933 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1934 { 1935 const struct bpf_insn *insn = &meta->insn; 1936 u64 imm = insn->imm; /* sign extend */ 1937 swreg tmp_reg; 1938 1939 if (!imm) { 1940 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), 1941 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); 1942 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1943 return 0; 1944 } 1945 1946 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1947 emit_alu(nfp_prog, reg_none(), 1948 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 1949 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1950 1951 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1952 emit_alu(nfp_prog, reg_none(), 1953 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 1954 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1955 1956 return 0; 1957 } 1958 1959 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1960 { 1961 const struct bpf_insn *insn = &meta->insn; 1962 1963 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), 1964 ALU_OP_XOR, reg_b(insn->src_reg * 2)); 1965 emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), 1966 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); 1967 emit_alu(nfp_prog, reg_none(), 1968 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); 1969 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 1970 1971 return 0; 1972 } 1973 1974 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1975 { 1976 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); 1977 } 1978 1979 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1980 { 1981 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); 1982 } 1983 1984 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1985 { 1986 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); 1987 } 1988 1989 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1990 { 1991 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); 1992 } 1993 1994 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1995 { 1996 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); 1997 } 1998 1999 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2000 { 2001 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); 2002 } 2003 2004 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2005 { 2006 switch (meta->insn.imm) { 2007 case BPF_FUNC_xdp_adjust_head: 2008 return adjust_head(nfp_prog, meta); 2009 default: 2010 WARN_ONCE(1, "verifier allowed unsupported function\n"); 2011 return -EOPNOTSUPP; 2012 } 2013 } 2014 2015 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2016 { 2017 wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); 2018 2019 return 0; 2020 } 2021 2022 static const instr_cb_t instr_cb[256] = { 2023 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, 2024 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, 2025 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, 2026 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, 2027 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, 2028 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, 2029 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, 2030 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, 2031 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, 2032 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, 2033 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, 2034 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, 2035 [BPF_ALU64 | BPF_NEG] = neg_reg64, 2036 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, 2037 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, 2038 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, 2039 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, 2040 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, 2041 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, 2042 [BPF_ALU | BPF_AND | BPF_X] = and_reg, 2043 [BPF_ALU | BPF_AND | BPF_K] = and_imm, 2044 [BPF_ALU | BPF_OR | BPF_X] = or_reg, 2045 [BPF_ALU | BPF_OR | BPF_K] = or_imm, 2046 [BPF_ALU | BPF_ADD | BPF_X] = add_reg, 2047 [BPF_ALU | BPF_ADD | BPF_K] = add_imm, 2048 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, 2049 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, 2050 [BPF_ALU | BPF_NEG] = neg_reg, 2051 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, 2052 [BPF_ALU | BPF_END | BPF_X] = end_reg32, 2053 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, 2054 [BPF_LD | BPF_ABS | BPF_B] = data_ld1, 2055 [BPF_LD | BPF_ABS | BPF_H] = data_ld2, 2056 [BPF_LD | BPF_ABS | BPF_W] = data_ld4, 2057 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, 2058 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, 2059 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, 2060 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, 2061 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, 2062 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, 2063 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, 2064 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, 2065 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, 2066 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, 2067 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, 2068 [BPF_ST | BPF_MEM | BPF_B] = mem_st1, 2069 [BPF_ST | BPF_MEM | BPF_H] = mem_st2, 2070 [BPF_ST | BPF_MEM | BPF_W] = mem_st4, 2071 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, 2072 [BPF_JMP | BPF_JA | BPF_K] = jump, 2073 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, 2074 [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, 2075 [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, 2076 [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, 2077 [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, 2078 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, 2079 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, 2080 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, 2081 [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, 2082 [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, 2083 [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, 2084 [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, 2085 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, 2086 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, 2087 [BPF_JMP | BPF_CALL] = call, 2088 [BPF_JMP | BPF_EXIT] = goto_out, 2089 }; 2090 2091 /* --- Misc code --- */ 2092 static void br_set_offset(u64 *instr, u16 offset) 2093 { 2094 u16 addr_lo, addr_hi; 2095 2096 addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); 2097 addr_hi = offset != addr_lo; 2098 *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); 2099 *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); 2100 *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); 2101 } 2102 2103 /* --- Assembler logic --- */ 2104 static int nfp_fixup_branches(struct nfp_prog *nfp_prog) 2105 { 2106 struct nfp_insn_meta *meta, *jmp_dst; 2107 u32 idx, br_idx; 2108 2109 list_for_each_entry(meta, &nfp_prog->insns, l) { 2110 if (meta->skip) 2111 continue; 2112 if (meta->insn.code == (BPF_JMP | BPF_CALL)) 2113 continue; 2114 if (BPF_CLASS(meta->insn.code) != BPF_JMP) 2115 continue; 2116 2117 if (list_is_last(&meta->l, &nfp_prog->insns)) 2118 idx = nfp_prog->last_bpf_off; 2119 else 2120 idx = list_next_entry(meta, l)->off - 1; 2121 2122 br_idx = nfp_prog_offset_to_index(nfp_prog, idx); 2123 2124 if (!nfp_is_br(nfp_prog->prog[br_idx])) { 2125 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", 2126 br_idx, meta->insn.code, nfp_prog->prog[br_idx]); 2127 return -ELOOP; 2128 } 2129 /* Leave special branches for later */ 2130 if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) 2131 continue; 2132 2133 if (!meta->jmp_dst) { 2134 pr_err("Non-exit jump doesn't have destination info recorded!!\n"); 2135 return -ELOOP; 2136 } 2137 2138 jmp_dst = meta->jmp_dst; 2139 2140 if (jmp_dst->skip) { 2141 pr_err("Branch landing on removed instruction!!\n"); 2142 return -ELOOP; 2143 } 2144 2145 for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); 2146 idx <= br_idx; idx++) { 2147 if (!nfp_is_br(nfp_prog->prog[idx])) 2148 continue; 2149 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); 2150 } 2151 } 2152 2153 /* Fixup 'goto out's separately, they can be scattered around */ 2154 for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { 2155 enum br_special special; 2156 2157 if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) 2158 continue; 2159 2160 special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); 2161 switch (special) { 2162 case OP_BR_NORMAL: 2163 break; 2164 case OP_BR_GO_OUT: 2165 br_set_offset(&nfp_prog->prog[br_idx], 2166 nfp_prog->tgt_out); 2167 break; 2168 case OP_BR_GO_ABORT: 2169 br_set_offset(&nfp_prog->prog[br_idx], 2170 nfp_prog->tgt_abort); 2171 break; 2172 } 2173 2174 nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; 2175 } 2176 2177 return 0; 2178 } 2179 2180 static void nfp_intro(struct nfp_prog *nfp_prog) 2181 { 2182 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); 2183 emit_alu(nfp_prog, plen_reg(nfp_prog), 2184 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); 2185 } 2186 2187 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) 2188 { 2189 /* TC direct-action mode: 2190 * 0,1 ok NOT SUPPORTED[1] 2191 * 2 drop 0x22 -> drop, count as stat1 2192 * 4,5 nuke 0x02 -> drop 2193 * 7 redir 0x44 -> redir, count as stat2 2194 * * unspec 0x11 -> pass, count as stat0 2195 * 2196 * [1] We can't support OK and RECLASSIFY because we can't tell TC 2197 * the exact decision made. We are forced to support UNSPEC 2198 * to handle aborts so that's the only one we handle for passing 2199 * packets up the stack. 2200 */ 2201 /* Target for aborts */ 2202 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 2203 2204 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2205 2206 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2207 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); 2208 2209 /* Target for normal exits */ 2210 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 2211 2212 /* if R0 > 7 jump to abort */ 2213 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); 2214 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 2215 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2216 2217 wrp_immed(nfp_prog, reg_b(2), 0x41221211); 2218 wrp_immed(nfp_prog, reg_b(3), 0x41001211); 2219 2220 emit_shf(nfp_prog, reg_a(1), 2221 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); 2222 2223 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2224 emit_shf(nfp_prog, reg_a(2), 2225 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 2226 2227 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2228 emit_shf(nfp_prog, reg_b(2), 2229 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); 2230 2231 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2232 2233 emit_shf(nfp_prog, reg_b(2), 2234 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); 2235 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 2236 } 2237 2238 static void nfp_outro_xdp(struct nfp_prog *nfp_prog) 2239 { 2240 /* XDP return codes: 2241 * 0 aborted 0x82 -> drop, count as stat3 2242 * 1 drop 0x22 -> drop, count as stat1 2243 * 2 pass 0x11 -> pass, count as stat0 2244 * 3 tx 0x44 -> redir, count as stat2 2245 * * unknown 0x82 -> drop, count as stat3 2246 */ 2247 /* Target for aborts */ 2248 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 2249 2250 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2251 2252 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2253 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); 2254 2255 /* Target for normal exits */ 2256 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 2257 2258 /* if R0 > 3 jump to abort */ 2259 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); 2260 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 2261 2262 wrp_immed(nfp_prog, reg_b(2), 0x44112282); 2263 2264 emit_shf(nfp_prog, reg_a(1), 2265 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); 2266 2267 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2268 emit_shf(nfp_prog, reg_b(2), 2269 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 2270 2271 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2272 2273 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2274 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 2275 } 2276 2277 static void nfp_outro(struct nfp_prog *nfp_prog) 2278 { 2279 switch (nfp_prog->type) { 2280 case BPF_PROG_TYPE_SCHED_CLS: 2281 nfp_outro_tc_da(nfp_prog); 2282 break; 2283 case BPF_PROG_TYPE_XDP: 2284 nfp_outro_xdp(nfp_prog); 2285 break; 2286 default: 2287 WARN_ON(1); 2288 } 2289 } 2290 2291 static int nfp_translate(struct nfp_prog *nfp_prog) 2292 { 2293 struct nfp_insn_meta *meta; 2294 int err; 2295 2296 nfp_intro(nfp_prog); 2297 if (nfp_prog->error) 2298 return nfp_prog->error; 2299 2300 list_for_each_entry(meta, &nfp_prog->insns, l) { 2301 instr_cb_t cb = instr_cb[meta->insn.code]; 2302 2303 meta->off = nfp_prog_current_offset(nfp_prog); 2304 2305 if (meta->skip) { 2306 nfp_prog->n_translated++; 2307 continue; 2308 } 2309 2310 if (nfp_meta_has_prev(nfp_prog, meta) && 2311 nfp_meta_prev(meta)->double_cb) 2312 cb = nfp_meta_prev(meta)->double_cb; 2313 if (!cb) 2314 return -ENOENT; 2315 err = cb(nfp_prog, meta); 2316 if (err) 2317 return err; 2318 2319 nfp_prog->n_translated++; 2320 } 2321 2322 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; 2323 2324 nfp_outro(nfp_prog); 2325 if (nfp_prog->error) 2326 return nfp_prog->error; 2327 2328 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); 2329 if (nfp_prog->error) 2330 return nfp_prog->error; 2331 2332 return nfp_fixup_branches(nfp_prog); 2333 } 2334 2335 /* --- Optimizations --- */ 2336 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) 2337 { 2338 struct nfp_insn_meta *meta; 2339 2340 list_for_each_entry(meta, &nfp_prog->insns, l) { 2341 struct bpf_insn insn = meta->insn; 2342 2343 /* Programs converted from cBPF start with register xoring */ 2344 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && 2345 insn.src_reg == insn.dst_reg) 2346 continue; 2347 2348 /* Programs start with R6 = R1 but we ignore the skb pointer */ 2349 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && 2350 insn.src_reg == 1 && insn.dst_reg == 6) 2351 meta->skip = true; 2352 2353 /* Return as soon as something doesn't match */ 2354 if (!meta->skip) 2355 return; 2356 } 2357 } 2358 2359 /* Remove masking after load since our load guarantees this is not needed */ 2360 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) 2361 { 2362 struct nfp_insn_meta *meta1, *meta2; 2363 const s32 exp_mask[] = { 2364 [BPF_B] = 0x000000ffU, 2365 [BPF_H] = 0x0000ffffU, 2366 [BPF_W] = 0xffffffffU, 2367 }; 2368 2369 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 2370 struct bpf_insn insn, next; 2371 2372 insn = meta1->insn; 2373 next = meta2->insn; 2374 2375 if (BPF_CLASS(insn.code) != BPF_LD) 2376 continue; 2377 if (BPF_MODE(insn.code) != BPF_ABS && 2378 BPF_MODE(insn.code) != BPF_IND) 2379 continue; 2380 2381 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) 2382 continue; 2383 2384 if (!exp_mask[BPF_SIZE(insn.code)]) 2385 continue; 2386 if (exp_mask[BPF_SIZE(insn.code)] != next.imm) 2387 continue; 2388 2389 if (next.src_reg || next.dst_reg) 2390 continue; 2391 2392 if (meta2->flags & FLAG_INSN_IS_JUMP_DST) 2393 continue; 2394 2395 meta2->skip = true; 2396 } 2397 } 2398 2399 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) 2400 { 2401 struct nfp_insn_meta *meta1, *meta2, *meta3; 2402 2403 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { 2404 struct bpf_insn insn, next1, next2; 2405 2406 insn = meta1->insn; 2407 next1 = meta2->insn; 2408 next2 = meta3->insn; 2409 2410 if (BPF_CLASS(insn.code) != BPF_LD) 2411 continue; 2412 if (BPF_MODE(insn.code) != BPF_ABS && 2413 BPF_MODE(insn.code) != BPF_IND) 2414 continue; 2415 if (BPF_SIZE(insn.code) != BPF_W) 2416 continue; 2417 2418 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && 2419 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && 2420 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && 2421 next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) 2422 continue; 2423 2424 if (next1.src_reg || next1.dst_reg || 2425 next2.src_reg || next2.dst_reg) 2426 continue; 2427 2428 if (next1.imm != 0x20 || next2.imm != 0x20) 2429 continue; 2430 2431 if (meta2->flags & FLAG_INSN_IS_JUMP_DST || 2432 meta3->flags & FLAG_INSN_IS_JUMP_DST) 2433 continue; 2434 2435 meta2->skip = true; 2436 meta3->skip = true; 2437 } 2438 } 2439 2440 /* load/store pair that forms memory copy sould look like the following: 2441 * 2442 * ld_width R, [addr_src + offset_src] 2443 * st_width [addr_dest + offset_dest], R 2444 * 2445 * The destination register of load and source register of store should 2446 * be the same, load and store should also perform at the same width. 2447 * If either of addr_src or addr_dest is stack pointer, we don't do the 2448 * CPP optimization as stack is modelled by registers on NFP. 2449 */ 2450 static bool 2451 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, 2452 struct nfp_insn_meta *st_meta) 2453 { 2454 struct bpf_insn *ld = &ld_meta->insn; 2455 struct bpf_insn *st = &st_meta->insn; 2456 2457 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) 2458 return false; 2459 2460 if (ld_meta->ptr.type != PTR_TO_PACKET) 2461 return false; 2462 2463 if (st_meta->ptr.type != PTR_TO_PACKET) 2464 return false; 2465 2466 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) 2467 return false; 2468 2469 if (ld->dst_reg != st->src_reg) 2470 return false; 2471 2472 /* There is jump to the store insn in this pair. */ 2473 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) 2474 return false; 2475 2476 return true; 2477 } 2478 2479 /* Currently, we only support chaining load/store pairs if: 2480 * 2481 * - Their address base registers are the same. 2482 * - Their address offsets are in the same order. 2483 * - They operate at the same memory width. 2484 * - There is no jump into the middle of them. 2485 */ 2486 static bool 2487 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, 2488 struct nfp_insn_meta *st_meta, 2489 struct bpf_insn *prev_ld, 2490 struct bpf_insn *prev_st) 2491 { 2492 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; 2493 struct bpf_insn *ld = &ld_meta->insn; 2494 struct bpf_insn *st = &st_meta->insn; 2495 s16 prev_ld_off, prev_st_off; 2496 2497 /* This pair is the start pair. */ 2498 if (!prev_ld) 2499 return true; 2500 2501 prev_size = BPF_LDST_BYTES(prev_ld); 2502 curr_size = BPF_LDST_BYTES(ld); 2503 prev_ld_base = prev_ld->src_reg; 2504 prev_st_base = prev_st->dst_reg; 2505 prev_ld_dst = prev_ld->dst_reg; 2506 prev_ld_off = prev_ld->off; 2507 prev_st_off = prev_st->off; 2508 2509 if (ld->dst_reg != prev_ld_dst) 2510 return false; 2511 2512 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) 2513 return false; 2514 2515 if (curr_size != prev_size) 2516 return false; 2517 2518 /* There is jump to the head of this pair. */ 2519 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) 2520 return false; 2521 2522 /* Both in ascending order. */ 2523 if (prev_ld_off + prev_size == ld->off && 2524 prev_st_off + prev_size == st->off) 2525 return true; 2526 2527 /* Both in descending order. */ 2528 if (ld->off + curr_size == prev_ld_off && 2529 st->off + curr_size == prev_st_off) 2530 return true; 2531 2532 return false; 2533 } 2534 2535 /* Return TRUE if cross memory access happens. Cross memory access means 2536 * store area is overlapping with load area that a later load might load 2537 * the value from previous store, for this case we can't treat the sequence 2538 * as an memory copy. 2539 */ 2540 static bool 2541 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, 2542 struct nfp_insn_meta *head_st_meta) 2543 { 2544 s16 head_ld_off, head_st_off, ld_off; 2545 2546 /* Different pointer types does not overlap. */ 2547 if (head_ld_meta->ptr.type != head_st_meta->ptr.type) 2548 return false; 2549 2550 /* load and store are both PTR_TO_PACKET, check ID info. */ 2551 if (head_ld_meta->ptr.id != head_st_meta->ptr.id) 2552 return true; 2553 2554 /* Canonicalize the offsets. Turn all of them against the original 2555 * base register. 2556 */ 2557 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; 2558 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; 2559 ld_off = ld->off + head_ld_meta->ptr.off; 2560 2561 /* Ascending order cross. */ 2562 if (ld_off > head_ld_off && 2563 head_ld_off < head_st_off && ld_off >= head_st_off) 2564 return true; 2565 2566 /* Descending order cross. */ 2567 if (ld_off < head_ld_off && 2568 head_ld_off > head_st_off && ld_off <= head_st_off) 2569 return true; 2570 2571 return false; 2572 } 2573 2574 /* This pass try to identify the following instructoin sequences. 2575 * 2576 * load R, [regA + offA] 2577 * store [regB + offB], R 2578 * load R, [regA + offA + const_imm_A] 2579 * store [regB + offB + const_imm_A], R 2580 * load R, [regA + offA + 2 * const_imm_A] 2581 * store [regB + offB + 2 * const_imm_A], R 2582 * ... 2583 * 2584 * Above sequence is typically generated by compiler when lowering 2585 * memcpy. NFP prefer using CPP instructions to accelerate it. 2586 */ 2587 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) 2588 { 2589 struct nfp_insn_meta *head_ld_meta = NULL; 2590 struct nfp_insn_meta *head_st_meta = NULL; 2591 struct nfp_insn_meta *meta1, *meta2; 2592 struct bpf_insn *prev_ld = NULL; 2593 struct bpf_insn *prev_st = NULL; 2594 u8 count = 0; 2595 2596 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 2597 struct bpf_insn *ld = &meta1->insn; 2598 struct bpf_insn *st = &meta2->insn; 2599 2600 /* Reset record status if any of the following if true: 2601 * - The current insn pair is not load/store. 2602 * - The load/store pair doesn't chain with previous one. 2603 * - The chained load/store pair crossed with previous pair. 2604 * - The chained load/store pair has a total size of memory 2605 * copy beyond 128 bytes which is the maximum length a 2606 * single NFP CPP command can transfer. 2607 */ 2608 if (!curr_pair_is_memcpy(meta1, meta2) || 2609 !curr_pair_chain_with_previous(meta1, meta2, prev_ld, 2610 prev_st) || 2611 (head_ld_meta && (cross_mem_access(ld, head_ld_meta, 2612 head_st_meta) || 2613 head_ld_meta->ldst_gather_len >= 128))) { 2614 if (!count) 2615 continue; 2616 2617 if (count > 1) { 2618 s16 prev_ld_off = prev_ld->off; 2619 s16 prev_st_off = prev_st->off; 2620 s16 head_ld_off = head_ld_meta->insn.off; 2621 2622 if (prev_ld_off < head_ld_off) { 2623 head_ld_meta->insn.off = prev_ld_off; 2624 head_st_meta->insn.off = prev_st_off; 2625 head_ld_meta->ldst_gather_len = 2626 -head_ld_meta->ldst_gather_len; 2627 } 2628 2629 head_ld_meta->paired_st = &head_st_meta->insn; 2630 head_st_meta->skip = true; 2631 } else { 2632 head_ld_meta->ldst_gather_len = 0; 2633 } 2634 2635 /* If the chain is ended by an load/store pair then this 2636 * could serve as the new head of the the next chain. 2637 */ 2638 if (curr_pair_is_memcpy(meta1, meta2)) { 2639 head_ld_meta = meta1; 2640 head_st_meta = meta2; 2641 head_ld_meta->ldst_gather_len = 2642 BPF_LDST_BYTES(ld); 2643 meta1 = nfp_meta_next(meta1); 2644 meta2 = nfp_meta_next(meta2); 2645 prev_ld = ld; 2646 prev_st = st; 2647 count = 1; 2648 } else { 2649 head_ld_meta = NULL; 2650 head_st_meta = NULL; 2651 prev_ld = NULL; 2652 prev_st = NULL; 2653 count = 0; 2654 } 2655 2656 continue; 2657 } 2658 2659 if (!head_ld_meta) { 2660 head_ld_meta = meta1; 2661 head_st_meta = meta2; 2662 } else { 2663 meta1->skip = true; 2664 meta2->skip = true; 2665 } 2666 2667 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); 2668 meta1 = nfp_meta_next(meta1); 2669 meta2 = nfp_meta_next(meta2); 2670 prev_ld = ld; 2671 prev_st = st; 2672 count++; 2673 } 2674 } 2675 2676 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) 2677 { 2678 nfp_bpf_opt_reg_init(nfp_prog); 2679 2680 nfp_bpf_opt_ld_mask(nfp_prog); 2681 nfp_bpf_opt_ld_shift(nfp_prog); 2682 nfp_bpf_opt_ldst_gather(nfp_prog); 2683 2684 return 0; 2685 } 2686 2687 static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) 2688 { 2689 int i; 2690 2691 for (i = 0; i < nfp_prog->prog_len; i++) { 2692 int err; 2693 2694 err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); 2695 if (err) 2696 return err; 2697 2698 nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); 2699 2700 ustore[i] = cpu_to_le64(nfp_prog->prog[i]); 2701 } 2702 2703 return 0; 2704 } 2705 2706 int nfp_bpf_jit(struct nfp_prog *nfp_prog) 2707 { 2708 int ret; 2709 2710 ret = nfp_bpf_optimize(nfp_prog); 2711 if (ret) 2712 return ret; 2713 2714 ret = nfp_translate(nfp_prog); 2715 if (ret) { 2716 pr_err("Translation failed with error %d (translated: %u)\n", 2717 ret, nfp_prog->n_translated); 2718 return -EINVAL; 2719 } 2720 2721 return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); 2722 } 2723