1 /* 2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 3 * 4 * This software is dual licensed under the GNU General License Version 2, 5 * June 1991 as shown in the file COPYING in the top-level directory of this 6 * source tree or the BSD 2-Clause License provided below. You have the 7 * option to license this software under the complete terms of either license. 8 * 9 * The BSD 2-Clause License: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * 1. Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "NFP net bpf: " fmt 35 36 #include <linux/bug.h> 37 #include <linux/kernel.h> 38 #include <linux/bpf.h> 39 #include <linux/filter.h> 40 #include <linux/pkt_cls.h> 41 #include <linux/unistd.h> 42 43 #include "main.h" 44 #include "../nfp_asm.h" 45 46 /* --- NFP prog --- */ 47 /* Foreach "multiple" entries macros provide pos and next<n> pointers. 48 * It's safe to modify the next pointers (but not pos). 49 */ 50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ 51 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 52 next = list_next_entry(pos, l); \ 53 &(nfp_prog)->insns != &pos->l && \ 54 &(nfp_prog)->insns != &next->l; \ 55 pos = nfp_meta_next(pos), \ 56 next = nfp_meta_next(pos)) 57 58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ 59 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 60 next = list_next_entry(pos, l), \ 61 next2 = list_next_entry(next, l); \ 62 &(nfp_prog)->insns != &pos->l && \ 63 &(nfp_prog)->insns != &next->l && \ 64 &(nfp_prog)->insns != &next2->l; \ 65 pos = nfp_meta_next(pos), \ 66 next = nfp_meta_next(pos), \ 67 next2 = nfp_meta_next(next)) 68 69 static bool 70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 71 { 72 return meta->l.prev != &nfp_prog->insns; 73 } 74 75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) 76 { 77 if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { 78 nfp_prog->error = -ENOSPC; 79 return; 80 } 81 82 nfp_prog->prog[nfp_prog->prog_len] = insn; 83 nfp_prog->prog_len++; 84 } 85 86 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) 87 { 88 return nfp_prog->start_off + nfp_prog->prog_len; 89 } 90 91 static bool 92 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) 93 { 94 /* If there is a recorded error we may have dropped instructions; 95 * that doesn't have to be due to translator bug, and the translation 96 * will fail anyway, so just return OK. 97 */ 98 if (nfp_prog->error) 99 return true; 100 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); 101 } 102 103 static unsigned int 104 nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) 105 { 106 return offset - nfp_prog->start_off; 107 } 108 109 /* --- Emitters --- */ 110 static void 111 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, 112 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir) 113 { 114 enum cmd_ctx_swap ctx; 115 u64 insn; 116 117 if (sync) 118 ctx = CMD_CTX_SWAP; 119 else 120 ctx = CMD_CTX_NO_SWAP; 121 122 insn = FIELD_PREP(OP_CMD_A_SRC, areg) | 123 FIELD_PREP(OP_CMD_CTX, ctx) | 124 FIELD_PREP(OP_CMD_B_SRC, breg) | 125 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | 126 FIELD_PREP(OP_CMD_XFER, xfer) | 127 FIELD_PREP(OP_CMD_CNT, size) | 128 FIELD_PREP(OP_CMD_SIG, sync) | 129 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | 130 FIELD_PREP(OP_CMD_INDIR, indir) | 131 FIELD_PREP(OP_CMD_MODE, mode); 132 133 nfp_prog_push(nfp_prog, insn); 134 } 135 136 static void 137 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 138 swreg lreg, swreg rreg, u8 size, bool sync, bool indir) 139 { 140 struct nfp_insn_re_regs reg; 141 int err; 142 143 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); 144 if (err) { 145 nfp_prog->error = err; 146 return; 147 } 148 if (reg.swap) { 149 pr_err("cmd can't swap arguments\n"); 150 nfp_prog->error = -EFAULT; 151 return; 152 } 153 if (reg.dst_lmextn || reg.src_lmextn) { 154 pr_err("cmd can't use LMextn\n"); 155 nfp_prog->error = -EFAULT; 156 return; 157 } 158 159 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync, 160 indir); 161 } 162 163 static void 164 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 165 swreg lreg, swreg rreg, u8 size, bool sync) 166 { 167 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false); 168 } 169 170 static void 171 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 172 swreg lreg, swreg rreg, u8 size, bool sync) 173 { 174 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true); 175 } 176 177 static void 178 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, 179 enum br_ctx_signal_state css, u16 addr, u8 defer) 180 { 181 u16 addr_lo, addr_hi; 182 u64 insn; 183 184 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); 185 addr_hi = addr != addr_lo; 186 187 insn = OP_BR_BASE | 188 FIELD_PREP(OP_BR_MASK, mask) | 189 FIELD_PREP(OP_BR_EV_PIP, ev_pip) | 190 FIELD_PREP(OP_BR_CSS, css) | 191 FIELD_PREP(OP_BR_DEFBR, defer) | 192 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | 193 FIELD_PREP(OP_BR_ADDR_HI, addr_hi); 194 195 nfp_prog_push(nfp_prog, insn); 196 } 197 198 static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) 199 { 200 if (defer > 2) { 201 pr_err("BUG: branch defer out of bounds %d\n", defer); 202 nfp_prog->error = -EFAULT; 203 return; 204 } 205 __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); 206 } 207 208 static void 209 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) 210 { 211 __emit_br(nfp_prog, mask, 212 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, 213 BR_CSS_NONE, addr, defer); 214 } 215 216 static void 217 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, 218 enum immed_width width, bool invert, 219 enum immed_shift shift, bool wr_both, 220 bool dst_lmextn, bool src_lmextn) 221 { 222 u64 insn; 223 224 insn = OP_IMMED_BASE | 225 FIELD_PREP(OP_IMMED_A_SRC, areg) | 226 FIELD_PREP(OP_IMMED_B_SRC, breg) | 227 FIELD_PREP(OP_IMMED_IMM, imm_hi) | 228 FIELD_PREP(OP_IMMED_WIDTH, width) | 229 FIELD_PREP(OP_IMMED_INV, invert) | 230 FIELD_PREP(OP_IMMED_SHIFT, shift) | 231 FIELD_PREP(OP_IMMED_WR_AB, wr_both) | 232 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | 233 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); 234 235 nfp_prog_push(nfp_prog, insn); 236 } 237 238 static void 239 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, 240 enum immed_width width, bool invert, enum immed_shift shift) 241 { 242 struct nfp_insn_ur_regs reg; 243 int err; 244 245 if (swreg_type(dst) == NN_REG_IMM) { 246 nfp_prog->error = -EFAULT; 247 return; 248 } 249 250 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); 251 if (err) { 252 nfp_prog->error = err; 253 return; 254 } 255 256 /* Use reg.dst when destination is No-Dest. */ 257 __emit_immed(nfp_prog, 258 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, 259 reg.breg, imm >> 8, width, invert, shift, 260 reg.wr_both, reg.dst_lmextn, reg.src_lmextn); 261 } 262 263 static void 264 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 265 enum shf_sc sc, u8 shift, 266 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, 267 bool dst_lmextn, bool src_lmextn) 268 { 269 u64 insn; 270 271 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { 272 nfp_prog->error = -EFAULT; 273 return; 274 } 275 276 if (sc == SHF_SC_L_SHF) 277 shift = 32 - shift; 278 279 insn = OP_SHF_BASE | 280 FIELD_PREP(OP_SHF_A_SRC, areg) | 281 FIELD_PREP(OP_SHF_SC, sc) | 282 FIELD_PREP(OP_SHF_B_SRC, breg) | 283 FIELD_PREP(OP_SHF_I8, i8) | 284 FIELD_PREP(OP_SHF_SW, sw) | 285 FIELD_PREP(OP_SHF_DST, dst) | 286 FIELD_PREP(OP_SHF_SHIFT, shift) | 287 FIELD_PREP(OP_SHF_OP, op) | 288 FIELD_PREP(OP_SHF_DST_AB, dst_ab) | 289 FIELD_PREP(OP_SHF_WR_AB, wr_both) | 290 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | 291 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); 292 293 nfp_prog_push(nfp_prog, insn); 294 } 295 296 static void 297 emit_shf(struct nfp_prog *nfp_prog, swreg dst, 298 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) 299 { 300 struct nfp_insn_re_regs reg; 301 int err; 302 303 err = swreg_to_restricted(dst, lreg, rreg, ®, true); 304 if (err) { 305 nfp_prog->error = err; 306 return; 307 } 308 309 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, 310 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, 311 reg.dst_lmextn, reg.src_lmextn); 312 } 313 314 static void 315 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 316 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, 317 bool dst_lmextn, bool src_lmextn) 318 { 319 u64 insn; 320 321 insn = OP_ALU_BASE | 322 FIELD_PREP(OP_ALU_A_SRC, areg) | 323 FIELD_PREP(OP_ALU_B_SRC, breg) | 324 FIELD_PREP(OP_ALU_DST, dst) | 325 FIELD_PREP(OP_ALU_SW, swap) | 326 FIELD_PREP(OP_ALU_OP, op) | 327 FIELD_PREP(OP_ALU_DST_AB, dst_ab) | 328 FIELD_PREP(OP_ALU_WR_AB, wr_both) | 329 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | 330 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); 331 332 nfp_prog_push(nfp_prog, insn); 333 } 334 335 static void 336 emit_alu(struct nfp_prog *nfp_prog, swreg dst, 337 swreg lreg, enum alu_op op, swreg rreg) 338 { 339 struct nfp_insn_ur_regs reg; 340 int err; 341 342 err = swreg_to_unrestricted(dst, lreg, rreg, ®); 343 if (err) { 344 nfp_prog->error = err; 345 return; 346 } 347 348 __emit_alu(nfp_prog, reg.dst, reg.dst_ab, 349 reg.areg, op, reg.breg, reg.swap, reg.wr_both, 350 reg.dst_lmextn, reg.src_lmextn); 351 } 352 353 static void 354 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, 355 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, 356 bool zero, bool swap, bool wr_both, 357 bool dst_lmextn, bool src_lmextn) 358 { 359 u64 insn; 360 361 insn = OP_LDF_BASE | 362 FIELD_PREP(OP_LDF_A_SRC, areg) | 363 FIELD_PREP(OP_LDF_SC, sc) | 364 FIELD_PREP(OP_LDF_B_SRC, breg) | 365 FIELD_PREP(OP_LDF_I8, imm8) | 366 FIELD_PREP(OP_LDF_SW, swap) | 367 FIELD_PREP(OP_LDF_ZF, zero) | 368 FIELD_PREP(OP_LDF_BMASK, bmask) | 369 FIELD_PREP(OP_LDF_SHF, shift) | 370 FIELD_PREP(OP_LDF_WR_AB, wr_both) | 371 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | 372 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); 373 374 nfp_prog_push(nfp_prog, insn); 375 } 376 377 static void 378 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 379 enum shf_sc sc, u8 shift, bool zero) 380 { 381 struct nfp_insn_re_regs reg; 382 int err; 383 384 /* Note: ld_field is special as it uses one of the src regs as dst */ 385 err = swreg_to_restricted(dst, dst, src, ®, true); 386 if (err) { 387 nfp_prog->error = err; 388 return; 389 } 390 391 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, 392 reg.i8, zero, reg.swap, reg.wr_both, 393 reg.dst_lmextn, reg.src_lmextn); 394 } 395 396 static void 397 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 398 enum shf_sc sc, u8 shift) 399 { 400 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); 401 } 402 403 static void 404 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, 405 bool dst_lmextn, bool src_lmextn) 406 { 407 u64 insn; 408 409 insn = OP_LCSR_BASE | 410 FIELD_PREP(OP_LCSR_A_SRC, areg) | 411 FIELD_PREP(OP_LCSR_B_SRC, breg) | 412 FIELD_PREP(OP_LCSR_WRITE, wr) | 413 FIELD_PREP(OP_LCSR_ADDR, addr) | 414 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | 415 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); 416 417 nfp_prog_push(nfp_prog, insn); 418 } 419 420 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) 421 { 422 struct nfp_insn_ur_regs reg; 423 int err; 424 425 /* This instruction takes immeds instead of reg_none() for the ignored 426 * operand, but we can't encode 2 immeds in one instr with our normal 427 * swreg infra so if param is an immed, we encode as reg_none() and 428 * copy the immed to both operands. 429 */ 430 if (swreg_type(src) == NN_REG_IMM) { 431 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); 432 reg.breg = reg.areg; 433 } else { 434 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); 435 } 436 if (err) { 437 nfp_prog->error = err; 438 return; 439 } 440 441 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, 442 false, reg.src_lmextn); 443 } 444 445 static void emit_nop(struct nfp_prog *nfp_prog) 446 { 447 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); 448 } 449 450 /* --- Wrappers --- */ 451 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) 452 { 453 if (!(imm & 0xffff0000)) { 454 *val = imm; 455 *shift = IMMED_SHIFT_0B; 456 } else if (!(imm & 0xff0000ff)) { 457 *val = imm >> 8; 458 *shift = IMMED_SHIFT_1B; 459 } else if (!(imm & 0x0000ffff)) { 460 *val = imm >> 16; 461 *shift = IMMED_SHIFT_2B; 462 } else { 463 return false; 464 } 465 466 return true; 467 } 468 469 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) 470 { 471 enum immed_shift shift; 472 u16 val; 473 474 if (pack_immed(imm, &val, &shift)) { 475 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); 476 } else if (pack_immed(~imm, &val, &shift)) { 477 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); 478 } else { 479 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, 480 false, IMMED_SHIFT_0B); 481 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, 482 false, IMMED_SHIFT_2B); 483 } 484 } 485 486 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) 487 * If the @imm is small enough encode it directly in operand and return 488 * otherwise load @imm to a spare register and return its encoding. 489 */ 490 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 491 { 492 if (FIELD_FIT(UR_REG_IMM_MAX, imm)) 493 return reg_imm(imm); 494 495 wrp_immed(nfp_prog, tmp_reg, imm); 496 return tmp_reg; 497 } 498 499 /* re_load_imm_any() - encode immediate or use tmp register (restricted) 500 * If the @imm is small enough encode it directly in operand and return 501 * otherwise load @imm to a spare register and return its encoding. 502 */ 503 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 504 { 505 if (FIELD_FIT(RE_REG_IMM_MAX, imm)) 506 return reg_imm(imm); 507 508 wrp_immed(nfp_prog, tmp_reg, imm); 509 return tmp_reg; 510 } 511 512 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) 513 { 514 while (count--) 515 emit_nop(nfp_prog); 516 } 517 518 static void 519 wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, 520 enum br_special special) 521 { 522 emit_br(nfp_prog, mask, 0, 0); 523 524 nfp_prog->prog[nfp_prog->prog_len - 1] |= 525 FIELD_PREP(OP_BR_SPECIAL, special); 526 } 527 528 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) 529 { 530 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); 531 } 532 533 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) 534 { 535 wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); 536 } 537 538 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the 539 * result to @dst from low end. 540 */ 541 static void 542 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, 543 u8 offset) 544 { 545 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; 546 u8 mask = (1 << field_len) - 1; 547 548 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); 549 } 550 551 /* NFP has Command Push Pull bus which supports bluk memory operations. */ 552 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 553 { 554 bool descending_seq = meta->ldst_gather_len < 0; 555 s16 len = abs(meta->ldst_gather_len); 556 swreg src_base, off; 557 unsigned int i; 558 u8 xfer_num; 559 560 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 561 src_base = reg_a(meta->insn.src_reg * 2); 562 xfer_num = round_up(len, 4) / 4; 563 564 /* Setup PREV_ALU fields to override memory read length. */ 565 if (len > 32) 566 wrp_immed(nfp_prog, reg_none(), 567 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 568 569 /* Memory read from source addr into transfer-in registers. */ 570 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, 571 off, xfer_num - 1, true, len > 32); 572 573 /* Move from transfer-in to transfer-out. */ 574 for (i = 0; i < xfer_num; i++) 575 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); 576 577 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); 578 579 if (len <= 8) { 580 /* Use single direct_ref write8. */ 581 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 582 reg_a(meta->paired_st->dst_reg * 2), off, len - 1, 583 true); 584 } else if (len <= 32 && IS_ALIGNED(len, 4)) { 585 /* Use single direct_ref write32. */ 586 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 587 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, 588 true); 589 } else if (len <= 32) { 590 /* Use single indirect_ref write8. */ 591 wrp_immed(nfp_prog, reg_none(), 592 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); 593 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 594 reg_a(meta->paired_st->dst_reg * 2), off, 595 len - 1, true); 596 } else if (IS_ALIGNED(len, 4)) { 597 /* Use single indirect_ref write32. */ 598 wrp_immed(nfp_prog, reg_none(), 599 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 600 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 601 reg_a(meta->paired_st->dst_reg * 2), off, 602 xfer_num - 1, true); 603 } else if (len <= 40) { 604 /* Use one direct_ref write32 to write the first 32-bytes, then 605 * another direct_ref write8 to write the remaining bytes. 606 */ 607 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 608 reg_a(meta->paired_st->dst_reg * 2), off, 7, 609 true); 610 611 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, 612 imm_b(nfp_prog)); 613 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, 614 reg_a(meta->paired_st->dst_reg * 2), off, len - 33, 615 true); 616 } else { 617 /* Use one indirect_ref write32 to write 4-bytes aligned length, 618 * then another direct_ref write8 to write the remaining bytes. 619 */ 620 u8 new_off; 621 622 wrp_immed(nfp_prog, reg_none(), 623 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); 624 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 625 reg_a(meta->paired_st->dst_reg * 2), off, 626 xfer_num - 2, true); 627 new_off = meta->paired_st->off + (xfer_num - 1) * 4; 628 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); 629 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 630 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, 631 (len & 0x3) - 1, true); 632 } 633 634 /* TODO: The following extra load is to make sure data flow be identical 635 * before and after we do memory copy optimization. 636 * 637 * The load destination register is not guaranteed to be dead, so we 638 * need to make sure it is loaded with the value the same as before 639 * this transformation. 640 * 641 * These extra loads could be removed once we have accurate register 642 * usage information. 643 */ 644 if (descending_seq) 645 xfer_num = 0; 646 else if (BPF_SIZE(meta->insn.code) != BPF_DW) 647 xfer_num = xfer_num - 1; 648 else 649 xfer_num = xfer_num - 2; 650 651 switch (BPF_SIZE(meta->insn.code)) { 652 case BPF_B: 653 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 654 reg_xfer(xfer_num), 1, 655 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); 656 break; 657 case BPF_H: 658 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 659 reg_xfer(xfer_num), 2, (len & 3) ^ 2); 660 break; 661 case BPF_W: 662 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 663 reg_xfer(0)); 664 break; 665 case BPF_DW: 666 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 667 reg_xfer(xfer_num)); 668 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 669 reg_xfer(xfer_num + 1)); 670 break; 671 } 672 673 if (BPF_SIZE(meta->insn.code) != BPF_DW) 674 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 675 676 return 0; 677 } 678 679 static int 680 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) 681 { 682 unsigned int i; 683 u16 shift, sz; 684 685 /* We load the value from the address indicated in @offset and then 686 * shift out the data we don't need. Note: this is big endian! 687 */ 688 sz = max(size, 4); 689 shift = size < 4 ? 4 - size : 0; 690 691 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, 692 pptr_reg(nfp_prog), offset, sz - 1, true); 693 694 i = 0; 695 if (shift) 696 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, 697 reg_xfer(0), SHF_SC_R_SHF, shift * 8); 698 else 699 for (; i * 4 < size; i++) 700 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 701 702 if (i < 2) 703 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 704 705 return 0; 706 } 707 708 static int 709 data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 710 u8 dst_gpr, int size) 711 { 712 unsigned int i; 713 u8 mask, sz; 714 715 /* We load the value from the address indicated in @offset and then 716 * mask out the data we don't need. Note: this is little endian! 717 */ 718 sz = max(size, 4); 719 mask = size < 4 ? GENMASK(size - 1, 0) : 0; 720 721 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, 722 reg_a(src_gpr), offset, sz / 4 - 1, true); 723 724 i = 0; 725 if (mask) 726 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, 727 reg_xfer(0), SHF_SC_NONE, 0, true); 728 else 729 for (; i * 4 < size; i++) 730 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 731 732 if (i < 2) 733 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 734 735 return 0; 736 } 737 738 static int 739 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) 740 { 741 swreg tmp_reg; 742 743 /* Calculate the true offset (src_reg + imm) */ 744 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 745 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); 746 747 /* Check packet length (size guaranteed to fit b/c it's u8) */ 748 emit_alu(nfp_prog, imm_a(nfp_prog), 749 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); 750 emit_alu(nfp_prog, reg_none(), 751 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); 752 wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); 753 754 /* Load data */ 755 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); 756 } 757 758 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) 759 { 760 swreg tmp_reg; 761 762 /* Check packet length */ 763 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); 764 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); 765 wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); 766 767 /* Load data */ 768 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 769 return data_ld(nfp_prog, tmp_reg, 0, size); 770 } 771 772 static int 773 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 774 u8 src_gpr, u8 size) 775 { 776 unsigned int i; 777 778 for (i = 0; i * 4 < size; i++) 779 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); 780 781 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 782 reg_a(dst_gpr), offset, size - 1, true); 783 784 return 0; 785 } 786 787 static int 788 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 789 u64 imm, u8 size) 790 { 791 wrp_immed(nfp_prog, reg_xfer(0), imm); 792 if (size == 8) 793 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); 794 795 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 796 reg_a(dst_gpr), offset, size - 1, true); 797 798 return 0; 799 } 800 801 typedef int 802 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, 803 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 804 bool needs_inc); 805 806 static int 807 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, 808 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 809 bool needs_inc) 810 { 811 bool should_inc = needs_inc && new_gpr && !last; 812 u32 idx, src_byte; 813 enum shf_sc sc; 814 swreg reg; 815 int shf; 816 u8 mask; 817 818 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) 819 return -EOPNOTSUPP; 820 821 idx = off / 4; 822 823 /* Move the entire word */ 824 if (size == 4) { 825 wrp_mov(nfp_prog, reg_both(dst), 826 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); 827 return 0; 828 } 829 830 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 831 return -EOPNOTSUPP; 832 833 src_byte = off % 4; 834 835 mask = (1 << size) - 1; 836 mask <<= dst_byte; 837 838 if (WARN_ON_ONCE(mask > 0xf)) 839 return -EOPNOTSUPP; 840 841 shf = abs(src_byte - dst_byte) * 8; 842 if (src_byte == dst_byte) { 843 sc = SHF_SC_NONE; 844 } else if (src_byte < dst_byte) { 845 shf = 32 - shf; 846 sc = SHF_SC_L_SHF; 847 } else { 848 sc = SHF_SC_R_SHF; 849 } 850 851 /* ld_field can address fewer indexes, if offset too large do RMW. 852 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 853 */ 854 if (idx <= RE_REG_LM_IDX_MAX) { 855 reg = reg_lm(lm3 ? 3 : 0, idx); 856 } else { 857 reg = imm_a(nfp_prog); 858 /* If it's not the first part of the load and we start a new GPR 859 * that means we are loading a second part of the LMEM word into 860 * a new GPR. IOW we've already looked that LMEM word and 861 * therefore it has been loaded into imm_a(). 862 */ 863 if (first || !new_gpr) 864 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 865 } 866 867 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); 868 869 if (should_inc) 870 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 871 872 return 0; 873 } 874 875 static int 876 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, 877 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 878 bool needs_inc) 879 { 880 bool should_inc = needs_inc && new_gpr && !last; 881 u32 idx, dst_byte; 882 enum shf_sc sc; 883 swreg reg; 884 int shf; 885 u8 mask; 886 887 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) 888 return -EOPNOTSUPP; 889 890 idx = off / 4; 891 892 /* Move the entire word */ 893 if (size == 4) { 894 wrp_mov(nfp_prog, 895 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), 896 reg_b(src)); 897 return 0; 898 } 899 900 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 901 return -EOPNOTSUPP; 902 903 dst_byte = off % 4; 904 905 mask = (1 << size) - 1; 906 mask <<= dst_byte; 907 908 if (WARN_ON_ONCE(mask > 0xf)) 909 return -EOPNOTSUPP; 910 911 shf = abs(src_byte - dst_byte) * 8; 912 if (src_byte == dst_byte) { 913 sc = SHF_SC_NONE; 914 } else if (src_byte < dst_byte) { 915 shf = 32 - shf; 916 sc = SHF_SC_L_SHF; 917 } else { 918 sc = SHF_SC_R_SHF; 919 } 920 921 /* ld_field can address fewer indexes, if offset too large do RMW. 922 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 923 */ 924 if (idx <= RE_REG_LM_IDX_MAX) { 925 reg = reg_lm(lm3 ? 3 : 0, idx); 926 } else { 927 reg = imm_a(nfp_prog); 928 /* Only first and last LMEM locations are going to need RMW, 929 * the middle location will be overwritten fully. 930 */ 931 if (first || last) 932 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 933 } 934 935 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); 936 937 if (new_gpr || last) { 938 if (idx > RE_REG_LM_IDX_MAX) 939 wrp_mov(nfp_prog, reg_lm(0, idx), reg); 940 if (should_inc) 941 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 942 } 943 944 return 0; 945 } 946 947 static int 948 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 949 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, 950 bool clr_gpr, lmem_step step) 951 { 952 s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; 953 bool first = true, last; 954 bool needs_inc = false; 955 swreg stack_off_reg; 956 u8 prev_gpr = 255; 957 u32 gpr_byte = 0; 958 bool lm3 = true; 959 int ret; 960 961 if (meta->ptr_not_const) { 962 /* Use of the last encountered ptr_off is OK, they all have 963 * the same alignment. Depend on low bits of value being 964 * discarded when written to LMaddr register. 965 */ 966 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, 967 stack_imm(nfp_prog)); 968 969 emit_alu(nfp_prog, imm_b(nfp_prog), 970 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); 971 972 needs_inc = true; 973 } else if (off + size <= 64) { 974 /* We can reach bottom 64B with LMaddr0 */ 975 lm3 = false; 976 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { 977 /* We have to set up a new pointer. If we know the offset 978 * and the entire access falls into a single 32 byte aligned 979 * window we won't have to increment the LM pointer. 980 * The 32 byte alignment is imporant because offset is ORed in 981 * not added when doing *l$indexN[off]. 982 */ 983 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), 984 stack_imm(nfp_prog)); 985 emit_alu(nfp_prog, imm_b(nfp_prog), 986 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 987 988 off %= 32; 989 } else { 990 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), 991 stack_imm(nfp_prog)); 992 993 emit_alu(nfp_prog, imm_b(nfp_prog), 994 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 995 996 needs_inc = true; 997 } 998 if (lm3) { 999 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); 1000 /* For size < 4 one slot will be filled by zeroing of upper. */ 1001 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); 1002 } 1003 1004 if (clr_gpr && size < 8) 1005 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1006 1007 while (size) { 1008 u32 slice_end; 1009 u8 slice_size; 1010 1011 slice_size = min(size, 4 - gpr_byte); 1012 slice_end = min(off + slice_size, round_up(off + 1, 4)); 1013 slice_size = slice_end - off; 1014 1015 last = slice_size == size; 1016 1017 if (needs_inc) 1018 off %= 4; 1019 1020 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, 1021 first, gpr != prev_gpr, last, lm3, needs_inc); 1022 if (ret) 1023 return ret; 1024 1025 prev_gpr = gpr; 1026 first = false; 1027 1028 gpr_byte += slice_size; 1029 if (gpr_byte >= 4) { 1030 gpr_byte -= 4; 1031 gpr++; 1032 } 1033 1034 size -= slice_size; 1035 off += slice_size; 1036 } 1037 1038 return 0; 1039 } 1040 1041 static void 1042 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) 1043 { 1044 swreg tmp_reg; 1045 1046 if (alu_op == ALU_OP_AND) { 1047 if (!imm) 1048 wrp_immed(nfp_prog, reg_both(dst), 0); 1049 if (!imm || !~imm) 1050 return; 1051 } 1052 if (alu_op == ALU_OP_OR) { 1053 if (!~imm) 1054 wrp_immed(nfp_prog, reg_both(dst), ~0U); 1055 if (!imm || !~imm) 1056 return; 1057 } 1058 if (alu_op == ALU_OP_XOR) { 1059 if (!~imm) 1060 emit_alu(nfp_prog, reg_both(dst), reg_none(), 1061 ALU_OP_NOT, reg_b(dst)); 1062 if (!imm || !~imm) 1063 return; 1064 } 1065 1066 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); 1067 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); 1068 } 1069 1070 static int 1071 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1072 enum alu_op alu_op, bool skip) 1073 { 1074 const struct bpf_insn *insn = &meta->insn; 1075 u64 imm = insn->imm; /* sign extend */ 1076 1077 if (skip) { 1078 meta->skip = true; 1079 return 0; 1080 } 1081 1082 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); 1083 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); 1084 1085 return 0; 1086 } 1087 1088 static int 1089 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1090 enum alu_op alu_op) 1091 { 1092 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1093 1094 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1095 emit_alu(nfp_prog, reg_both(dst + 1), 1096 reg_a(dst + 1), alu_op, reg_b(src + 1)); 1097 1098 return 0; 1099 } 1100 1101 static int 1102 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1103 enum alu_op alu_op, bool skip) 1104 { 1105 const struct bpf_insn *insn = &meta->insn; 1106 1107 if (skip) { 1108 meta->skip = true; 1109 return 0; 1110 } 1111 1112 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); 1113 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1114 1115 return 0; 1116 } 1117 1118 static int 1119 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1120 enum alu_op alu_op) 1121 { 1122 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1123 1124 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1125 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1126 1127 return 0; 1128 } 1129 1130 static void 1131 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, 1132 enum br_mask br_mask, u16 off) 1133 { 1134 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); 1135 emit_br(nfp_prog, br_mask, off, 0); 1136 } 1137 1138 static int 1139 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1140 enum alu_op alu_op, enum br_mask br_mask) 1141 { 1142 const struct bpf_insn *insn = &meta->insn; 1143 1144 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, 1145 insn->src_reg * 2, br_mask, insn->off); 1146 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, 1147 insn->src_reg * 2 + 1, br_mask, insn->off); 1148 1149 return 0; 1150 } 1151 1152 static int 1153 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1154 enum br_mask br_mask, bool swap) 1155 { 1156 const struct bpf_insn *insn = &meta->insn; 1157 u64 imm = insn->imm; /* sign extend */ 1158 u8 reg = insn->dst_reg * 2; 1159 swreg tmp_reg; 1160 1161 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1162 if (!swap) 1163 emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); 1164 else 1165 emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); 1166 1167 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1168 if (!swap) 1169 emit_alu(nfp_prog, reg_none(), 1170 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); 1171 else 1172 emit_alu(nfp_prog, reg_none(), 1173 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); 1174 1175 emit_br(nfp_prog, br_mask, insn->off, 0); 1176 1177 return 0; 1178 } 1179 1180 static int 1181 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1182 enum br_mask br_mask, bool swap) 1183 { 1184 const struct bpf_insn *insn = &meta->insn; 1185 u8 areg, breg; 1186 1187 areg = insn->dst_reg * 2; 1188 breg = insn->src_reg * 2; 1189 1190 if (swap) { 1191 areg ^= breg; 1192 breg ^= areg; 1193 areg ^= breg; 1194 } 1195 1196 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); 1197 emit_alu(nfp_prog, reg_none(), 1198 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); 1199 emit_br(nfp_prog, br_mask, insn->off, 0); 1200 1201 return 0; 1202 } 1203 1204 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) 1205 { 1206 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, 1207 SHF_SC_R_ROT, 8); 1208 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), 1209 SHF_SC_R_ROT, 16); 1210 } 1211 1212 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1213 { 1214 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); 1215 struct nfp_bpf_cap_adjust_head *adjust_head; 1216 u32 ret_einval, end; 1217 1218 adjust_head = &nfp_prog->bpf->adjust_head; 1219 1220 /* Optimized version - 5 vs 14 cycles */ 1221 if (nfp_prog->adjust_head_location != UINT_MAX) { 1222 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) 1223 return -EINVAL; 1224 1225 emit_alu(nfp_prog, pptr_reg(nfp_prog), 1226 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog)); 1227 emit_alu(nfp_prog, plen_reg(nfp_prog), 1228 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1229 emit_alu(nfp_prog, pv_len(nfp_prog), 1230 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1231 1232 wrp_immed(nfp_prog, reg_both(0), 0); 1233 wrp_immed(nfp_prog, reg_both(1), 0); 1234 1235 /* TODO: when adjust head is guaranteed to succeed we can 1236 * also eliminate the following if (r0 == 0) branch. 1237 */ 1238 1239 return 0; 1240 } 1241 1242 ret_einval = nfp_prog_current_offset(nfp_prog) + 14; 1243 end = ret_einval + 2; 1244 1245 /* We need to use a temp because offset is just a part of the pkt ptr */ 1246 emit_alu(nfp_prog, tmp, 1247 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); 1248 1249 /* Validate result will fit within FW datapath constraints */ 1250 emit_alu(nfp_prog, reg_none(), 1251 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); 1252 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1253 emit_alu(nfp_prog, reg_none(), 1254 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); 1255 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1256 1257 /* Validate the length is at least ETH_HLEN */ 1258 emit_alu(nfp_prog, tmp_len, 1259 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1260 emit_alu(nfp_prog, reg_none(), 1261 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); 1262 emit_br(nfp_prog, BR_BMI, ret_einval, 0); 1263 1264 /* Load the ret code */ 1265 wrp_immed(nfp_prog, reg_both(0), 0); 1266 wrp_immed(nfp_prog, reg_both(1), 0); 1267 1268 /* Modify the packet metadata */ 1269 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); 1270 1271 /* Skip over the -EINVAL ret code (defer 2) */ 1272 emit_br_def(nfp_prog, end, 2); 1273 1274 emit_alu(nfp_prog, plen_reg(nfp_prog), 1275 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1276 emit_alu(nfp_prog, pv_len(nfp_prog), 1277 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1278 1279 /* return -EINVAL target */ 1280 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) 1281 return -EINVAL; 1282 1283 wrp_immed(nfp_prog, reg_both(0), -22); 1284 wrp_immed(nfp_prog, reg_both(1), ~0); 1285 1286 if (!nfp_prog_confirm_current_offset(nfp_prog, end)) 1287 return -EINVAL; 1288 1289 return 0; 1290 } 1291 1292 /* --- Callbacks --- */ 1293 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1294 { 1295 const struct bpf_insn *insn = &meta->insn; 1296 u8 dst = insn->dst_reg * 2; 1297 u8 src = insn->src_reg * 2; 1298 1299 if (insn->src_reg == BPF_REG_10) { 1300 swreg stack_depth_reg; 1301 1302 stack_depth_reg = ur_load_imm_any(nfp_prog, 1303 nfp_prog->stack_depth, 1304 stack_imm(nfp_prog)); 1305 emit_alu(nfp_prog, reg_both(dst), 1306 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); 1307 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1308 } else { 1309 wrp_reg_mov(nfp_prog, dst, src); 1310 wrp_reg_mov(nfp_prog, dst + 1, src + 1); 1311 } 1312 1313 return 0; 1314 } 1315 1316 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1317 { 1318 u64 imm = meta->insn.imm; /* sign extend */ 1319 1320 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); 1321 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); 1322 1323 return 0; 1324 } 1325 1326 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1327 { 1328 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); 1329 } 1330 1331 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1332 { 1333 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); 1334 } 1335 1336 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1337 { 1338 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); 1339 } 1340 1341 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1342 { 1343 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1344 } 1345 1346 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1347 { 1348 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); 1349 } 1350 1351 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1352 { 1353 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1354 } 1355 1356 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1357 { 1358 const struct bpf_insn *insn = &meta->insn; 1359 1360 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1361 reg_a(insn->dst_reg * 2), ALU_OP_ADD, 1362 reg_b(insn->src_reg * 2)); 1363 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1364 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, 1365 reg_b(insn->src_reg * 2 + 1)); 1366 1367 return 0; 1368 } 1369 1370 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1371 { 1372 const struct bpf_insn *insn = &meta->insn; 1373 u64 imm = insn->imm; /* sign extend */ 1374 1375 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); 1376 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); 1377 1378 return 0; 1379 } 1380 1381 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1382 { 1383 const struct bpf_insn *insn = &meta->insn; 1384 1385 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1386 reg_a(insn->dst_reg * 2), ALU_OP_SUB, 1387 reg_b(insn->src_reg * 2)); 1388 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1389 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, 1390 reg_b(insn->src_reg * 2 + 1)); 1391 1392 return 0; 1393 } 1394 1395 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1396 { 1397 const struct bpf_insn *insn = &meta->insn; 1398 u64 imm = insn->imm; /* sign extend */ 1399 1400 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); 1401 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); 1402 1403 return 0; 1404 } 1405 1406 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1407 { 1408 const struct bpf_insn *insn = &meta->insn; 1409 1410 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), 1411 ALU_OP_SUB, reg_b(insn->dst_reg * 2)); 1412 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), 1413 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); 1414 1415 return 0; 1416 } 1417 1418 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1419 { 1420 const struct bpf_insn *insn = &meta->insn; 1421 u8 dst = insn->dst_reg * 2; 1422 1423 if (insn->imm < 32) { 1424 emit_shf(nfp_prog, reg_both(dst + 1), 1425 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), 1426 SHF_SC_R_DSHF, 32 - insn->imm); 1427 emit_shf(nfp_prog, reg_both(dst), 1428 reg_none(), SHF_OP_NONE, reg_b(dst), 1429 SHF_SC_L_SHF, insn->imm); 1430 } else if (insn->imm == 32) { 1431 wrp_reg_mov(nfp_prog, dst + 1, dst); 1432 wrp_immed(nfp_prog, reg_both(dst), 0); 1433 } else if (insn->imm > 32) { 1434 emit_shf(nfp_prog, reg_both(dst + 1), 1435 reg_none(), SHF_OP_NONE, reg_b(dst), 1436 SHF_SC_L_SHF, insn->imm - 32); 1437 wrp_immed(nfp_prog, reg_both(dst), 0); 1438 } 1439 1440 return 0; 1441 } 1442 1443 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1444 { 1445 const struct bpf_insn *insn = &meta->insn; 1446 u8 dst = insn->dst_reg * 2; 1447 1448 if (insn->imm < 32) { 1449 emit_shf(nfp_prog, reg_both(dst), 1450 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), 1451 SHF_SC_R_DSHF, insn->imm); 1452 emit_shf(nfp_prog, reg_both(dst + 1), 1453 reg_none(), SHF_OP_NONE, reg_b(dst + 1), 1454 SHF_SC_R_SHF, insn->imm); 1455 } else if (insn->imm == 32) { 1456 wrp_reg_mov(nfp_prog, dst, dst + 1); 1457 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1458 } else if (insn->imm > 32) { 1459 emit_shf(nfp_prog, reg_both(dst), 1460 reg_none(), SHF_OP_NONE, reg_b(dst + 1), 1461 SHF_SC_R_SHF, insn->imm - 32); 1462 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1463 } 1464 1465 return 0; 1466 } 1467 1468 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1469 { 1470 const struct bpf_insn *insn = &meta->insn; 1471 1472 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); 1473 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1474 1475 return 0; 1476 } 1477 1478 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1479 { 1480 const struct bpf_insn *insn = &meta->insn; 1481 1482 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); 1483 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1484 1485 return 0; 1486 } 1487 1488 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1489 { 1490 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); 1491 } 1492 1493 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1494 { 1495 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); 1496 } 1497 1498 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1499 { 1500 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); 1501 } 1502 1503 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1504 { 1505 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1506 } 1507 1508 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1509 { 1510 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); 1511 } 1512 1513 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1514 { 1515 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1516 } 1517 1518 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1519 { 1520 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); 1521 } 1522 1523 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1524 { 1525 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); 1526 } 1527 1528 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1529 { 1530 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); 1531 } 1532 1533 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1534 { 1535 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); 1536 } 1537 1538 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1539 { 1540 u8 dst = meta->insn.dst_reg * 2; 1541 1542 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); 1543 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1544 1545 return 0; 1546 } 1547 1548 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1549 { 1550 const struct bpf_insn *insn = &meta->insn; 1551 1552 if (!insn->imm) 1553 return 1; /* TODO: zero shift means indirect */ 1554 1555 emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), 1556 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), 1557 SHF_SC_L_SHF, insn->imm); 1558 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1559 1560 return 0; 1561 } 1562 1563 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1564 { 1565 const struct bpf_insn *insn = &meta->insn; 1566 u8 gpr = insn->dst_reg * 2; 1567 1568 switch (insn->imm) { 1569 case 16: 1570 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), 1571 SHF_SC_R_ROT, 8); 1572 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), 1573 SHF_SC_R_SHF, 16); 1574 1575 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1576 break; 1577 case 32: 1578 wrp_end32(nfp_prog, reg_a(gpr), gpr); 1579 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1580 break; 1581 case 64: 1582 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); 1583 1584 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); 1585 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); 1586 break; 1587 } 1588 1589 return 0; 1590 } 1591 1592 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1593 { 1594 struct nfp_insn_meta *prev = nfp_meta_prev(meta); 1595 u32 imm_lo, imm_hi; 1596 u8 dst; 1597 1598 dst = prev->insn.dst_reg * 2; 1599 imm_lo = prev->insn.imm; 1600 imm_hi = meta->insn.imm; 1601 1602 wrp_immed(nfp_prog, reg_both(dst), imm_lo); 1603 1604 /* mov is always 1 insn, load imm may be two, so try to use mov */ 1605 if (imm_hi == imm_lo) 1606 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); 1607 else 1608 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); 1609 1610 return 0; 1611 } 1612 1613 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1614 { 1615 meta->double_cb = imm_ld8_part2; 1616 return 0; 1617 } 1618 1619 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1620 { 1621 return construct_data_ld(nfp_prog, meta->insn.imm, 1); 1622 } 1623 1624 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1625 { 1626 return construct_data_ld(nfp_prog, meta->insn.imm, 2); 1627 } 1628 1629 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1630 { 1631 return construct_data_ld(nfp_prog, meta->insn.imm, 4); 1632 } 1633 1634 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1635 { 1636 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1637 meta->insn.src_reg * 2, 1); 1638 } 1639 1640 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1641 { 1642 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1643 meta->insn.src_reg * 2, 2); 1644 } 1645 1646 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1647 { 1648 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1649 meta->insn.src_reg * 2, 4); 1650 } 1651 1652 static int 1653 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1654 unsigned int size, unsigned int ptr_off) 1655 { 1656 return mem_op_stack(nfp_prog, meta, size, ptr_off, 1657 meta->insn.dst_reg * 2, meta->insn.src_reg * 2, 1658 true, wrp_lmem_load); 1659 } 1660 1661 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1662 u8 size) 1663 { 1664 swreg dst = reg_both(meta->insn.dst_reg * 2); 1665 1666 switch (meta->insn.off) { 1667 case offsetof(struct __sk_buff, len): 1668 if (size != FIELD_SIZEOF(struct __sk_buff, len)) 1669 return -EOPNOTSUPP; 1670 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); 1671 break; 1672 case offsetof(struct __sk_buff, data): 1673 if (size != FIELD_SIZEOF(struct __sk_buff, data)) 1674 return -EOPNOTSUPP; 1675 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 1676 break; 1677 case offsetof(struct __sk_buff, data_end): 1678 if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) 1679 return -EOPNOTSUPP; 1680 emit_alu(nfp_prog, dst, 1681 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 1682 break; 1683 default: 1684 return -EOPNOTSUPP; 1685 } 1686 1687 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1688 1689 return 0; 1690 } 1691 1692 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1693 u8 size) 1694 { 1695 swreg dst = reg_both(meta->insn.dst_reg * 2); 1696 1697 switch (meta->insn.off) { 1698 case offsetof(struct xdp_md, data): 1699 if (size != FIELD_SIZEOF(struct xdp_md, data)) 1700 return -EOPNOTSUPP; 1701 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 1702 break; 1703 case offsetof(struct xdp_md, data_end): 1704 if (size != FIELD_SIZEOF(struct xdp_md, data_end)) 1705 return -EOPNOTSUPP; 1706 emit_alu(nfp_prog, dst, 1707 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 1708 break; 1709 default: 1710 return -EOPNOTSUPP; 1711 } 1712 1713 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1714 1715 return 0; 1716 } 1717 1718 static int 1719 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1720 unsigned int size) 1721 { 1722 swreg tmp_reg; 1723 1724 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1725 1726 return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg, 1727 meta->insn.dst_reg * 2, size); 1728 } 1729 1730 static int 1731 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1732 unsigned int size) 1733 { 1734 if (meta->ldst_gather_len) 1735 return nfp_cpp_memcpy(nfp_prog, meta); 1736 1737 if (meta->ptr.type == PTR_TO_CTX) { 1738 if (nfp_prog->type == BPF_PROG_TYPE_XDP) 1739 return mem_ldx_xdp(nfp_prog, meta, size); 1740 else 1741 return mem_ldx_skb(nfp_prog, meta, size); 1742 } 1743 1744 if (meta->ptr.type == PTR_TO_PACKET) 1745 return mem_ldx_data(nfp_prog, meta, size); 1746 1747 if (meta->ptr.type == PTR_TO_STACK) 1748 return mem_ldx_stack(nfp_prog, meta, size, 1749 meta->ptr.off + meta->ptr.var_off.value); 1750 1751 return -EOPNOTSUPP; 1752 } 1753 1754 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1755 { 1756 return mem_ldx(nfp_prog, meta, 1); 1757 } 1758 1759 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1760 { 1761 return mem_ldx(nfp_prog, meta, 2); 1762 } 1763 1764 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1765 { 1766 return mem_ldx(nfp_prog, meta, 4); 1767 } 1768 1769 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1770 { 1771 return mem_ldx(nfp_prog, meta, 8); 1772 } 1773 1774 static int 1775 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1776 unsigned int size) 1777 { 1778 u64 imm = meta->insn.imm; /* sign extend */ 1779 swreg off_reg; 1780 1781 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1782 1783 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 1784 imm, size); 1785 } 1786 1787 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1788 unsigned int size) 1789 { 1790 if (meta->ptr.type == PTR_TO_PACKET) 1791 return mem_st_data(nfp_prog, meta, size); 1792 1793 return -EOPNOTSUPP; 1794 } 1795 1796 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1797 { 1798 return mem_st(nfp_prog, meta, 1); 1799 } 1800 1801 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1802 { 1803 return mem_st(nfp_prog, meta, 2); 1804 } 1805 1806 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1807 { 1808 return mem_st(nfp_prog, meta, 4); 1809 } 1810 1811 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1812 { 1813 return mem_st(nfp_prog, meta, 8); 1814 } 1815 1816 static int 1817 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1818 unsigned int size) 1819 { 1820 swreg off_reg; 1821 1822 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1823 1824 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 1825 meta->insn.src_reg * 2, size); 1826 } 1827 1828 static int 1829 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1830 unsigned int size, unsigned int ptr_off) 1831 { 1832 return mem_op_stack(nfp_prog, meta, size, ptr_off, 1833 meta->insn.src_reg * 2, meta->insn.dst_reg * 2, 1834 false, wrp_lmem_store); 1835 } 1836 1837 static int 1838 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1839 unsigned int size) 1840 { 1841 if (meta->ptr.type == PTR_TO_PACKET) 1842 return mem_stx_data(nfp_prog, meta, size); 1843 1844 if (meta->ptr.type == PTR_TO_STACK) 1845 return mem_stx_stack(nfp_prog, meta, size, 1846 meta->ptr.off + meta->ptr.var_off.value); 1847 1848 return -EOPNOTSUPP; 1849 } 1850 1851 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1852 { 1853 return mem_stx(nfp_prog, meta, 1); 1854 } 1855 1856 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1857 { 1858 return mem_stx(nfp_prog, meta, 2); 1859 } 1860 1861 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1862 { 1863 return mem_stx(nfp_prog, meta, 4); 1864 } 1865 1866 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1867 { 1868 return mem_stx(nfp_prog, meta, 8); 1869 } 1870 1871 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1872 { 1873 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); 1874 1875 return 0; 1876 } 1877 1878 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1879 { 1880 const struct bpf_insn *insn = &meta->insn; 1881 u64 imm = insn->imm; /* sign extend */ 1882 swreg or1, or2, tmp_reg; 1883 1884 or1 = reg_a(insn->dst_reg * 2); 1885 or2 = reg_b(insn->dst_reg * 2 + 1); 1886 1887 if (imm & ~0U) { 1888 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1889 emit_alu(nfp_prog, imm_a(nfp_prog), 1890 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 1891 or1 = imm_a(nfp_prog); 1892 } 1893 1894 if (imm >> 32) { 1895 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1896 emit_alu(nfp_prog, imm_b(nfp_prog), 1897 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 1898 or2 = imm_b(nfp_prog); 1899 } 1900 1901 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); 1902 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 1903 1904 return 0; 1905 } 1906 1907 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1908 { 1909 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); 1910 } 1911 1912 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1913 { 1914 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); 1915 } 1916 1917 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1918 { 1919 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); 1920 } 1921 1922 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1923 { 1924 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); 1925 } 1926 1927 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1928 { 1929 const struct bpf_insn *insn = &meta->insn; 1930 u64 imm = insn->imm; /* sign extend */ 1931 swreg tmp_reg; 1932 1933 if (!imm) { 1934 meta->skip = true; 1935 return 0; 1936 } 1937 1938 if (imm & ~0U) { 1939 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1940 emit_alu(nfp_prog, reg_none(), 1941 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); 1942 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1943 } 1944 1945 if (imm >> 32) { 1946 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1947 emit_alu(nfp_prog, reg_none(), 1948 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); 1949 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1950 } 1951 1952 return 0; 1953 } 1954 1955 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1956 { 1957 const struct bpf_insn *insn = &meta->insn; 1958 u64 imm = insn->imm; /* sign extend */ 1959 swreg tmp_reg; 1960 1961 if (!imm) { 1962 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), 1963 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); 1964 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1965 return 0; 1966 } 1967 1968 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1969 emit_alu(nfp_prog, reg_none(), 1970 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 1971 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1972 1973 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1974 emit_alu(nfp_prog, reg_none(), 1975 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 1976 emit_br(nfp_prog, BR_BNE, insn->off, 0); 1977 1978 return 0; 1979 } 1980 1981 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1982 { 1983 const struct bpf_insn *insn = &meta->insn; 1984 1985 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), 1986 ALU_OP_XOR, reg_b(insn->src_reg * 2)); 1987 emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), 1988 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); 1989 emit_alu(nfp_prog, reg_none(), 1990 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); 1991 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 1992 1993 return 0; 1994 } 1995 1996 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1997 { 1998 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); 1999 } 2000 2001 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2002 { 2003 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); 2004 } 2005 2006 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2007 { 2008 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); 2009 } 2010 2011 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2012 { 2013 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); 2014 } 2015 2016 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2017 { 2018 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); 2019 } 2020 2021 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2022 { 2023 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); 2024 } 2025 2026 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2027 { 2028 switch (meta->insn.imm) { 2029 case BPF_FUNC_xdp_adjust_head: 2030 return adjust_head(nfp_prog, meta); 2031 default: 2032 WARN_ONCE(1, "verifier allowed unsupported function\n"); 2033 return -EOPNOTSUPP; 2034 } 2035 } 2036 2037 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2038 { 2039 wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); 2040 2041 return 0; 2042 } 2043 2044 static const instr_cb_t instr_cb[256] = { 2045 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, 2046 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, 2047 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, 2048 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, 2049 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, 2050 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, 2051 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, 2052 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, 2053 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, 2054 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, 2055 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, 2056 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, 2057 [BPF_ALU64 | BPF_NEG] = neg_reg64, 2058 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, 2059 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, 2060 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, 2061 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, 2062 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, 2063 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, 2064 [BPF_ALU | BPF_AND | BPF_X] = and_reg, 2065 [BPF_ALU | BPF_AND | BPF_K] = and_imm, 2066 [BPF_ALU | BPF_OR | BPF_X] = or_reg, 2067 [BPF_ALU | BPF_OR | BPF_K] = or_imm, 2068 [BPF_ALU | BPF_ADD | BPF_X] = add_reg, 2069 [BPF_ALU | BPF_ADD | BPF_K] = add_imm, 2070 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, 2071 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, 2072 [BPF_ALU | BPF_NEG] = neg_reg, 2073 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, 2074 [BPF_ALU | BPF_END | BPF_X] = end_reg32, 2075 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, 2076 [BPF_LD | BPF_ABS | BPF_B] = data_ld1, 2077 [BPF_LD | BPF_ABS | BPF_H] = data_ld2, 2078 [BPF_LD | BPF_ABS | BPF_W] = data_ld4, 2079 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, 2080 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, 2081 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, 2082 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, 2083 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, 2084 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, 2085 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, 2086 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, 2087 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, 2088 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, 2089 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, 2090 [BPF_ST | BPF_MEM | BPF_B] = mem_st1, 2091 [BPF_ST | BPF_MEM | BPF_H] = mem_st2, 2092 [BPF_ST | BPF_MEM | BPF_W] = mem_st4, 2093 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, 2094 [BPF_JMP | BPF_JA | BPF_K] = jump, 2095 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, 2096 [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, 2097 [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, 2098 [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, 2099 [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, 2100 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, 2101 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, 2102 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, 2103 [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, 2104 [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, 2105 [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, 2106 [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, 2107 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, 2108 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, 2109 [BPF_JMP | BPF_CALL] = call, 2110 [BPF_JMP | BPF_EXIT] = goto_out, 2111 }; 2112 2113 /* --- Misc code --- */ 2114 static void br_set_offset(u64 *instr, u16 offset) 2115 { 2116 u16 addr_lo, addr_hi; 2117 2118 addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); 2119 addr_hi = offset != addr_lo; 2120 *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); 2121 *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); 2122 *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); 2123 } 2124 2125 /* --- Assembler logic --- */ 2126 static int nfp_fixup_branches(struct nfp_prog *nfp_prog) 2127 { 2128 struct nfp_insn_meta *meta, *jmp_dst; 2129 u32 idx, br_idx; 2130 2131 list_for_each_entry(meta, &nfp_prog->insns, l) { 2132 if (meta->skip) 2133 continue; 2134 if (meta->insn.code == (BPF_JMP | BPF_CALL)) 2135 continue; 2136 if (BPF_CLASS(meta->insn.code) != BPF_JMP) 2137 continue; 2138 2139 if (list_is_last(&meta->l, &nfp_prog->insns)) 2140 idx = nfp_prog->last_bpf_off; 2141 else 2142 idx = list_next_entry(meta, l)->off - 1; 2143 2144 br_idx = nfp_prog_offset_to_index(nfp_prog, idx); 2145 2146 if (!nfp_is_br(nfp_prog->prog[br_idx])) { 2147 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", 2148 br_idx, meta->insn.code, nfp_prog->prog[br_idx]); 2149 return -ELOOP; 2150 } 2151 /* Leave special branches for later */ 2152 if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) 2153 continue; 2154 2155 if (!meta->jmp_dst) { 2156 pr_err("Non-exit jump doesn't have destination info recorded!!\n"); 2157 return -ELOOP; 2158 } 2159 2160 jmp_dst = meta->jmp_dst; 2161 2162 if (jmp_dst->skip) { 2163 pr_err("Branch landing on removed instruction!!\n"); 2164 return -ELOOP; 2165 } 2166 2167 for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); 2168 idx <= br_idx; idx++) { 2169 if (!nfp_is_br(nfp_prog->prog[idx])) 2170 continue; 2171 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); 2172 } 2173 } 2174 2175 /* Fixup 'goto out's separately, they can be scattered around */ 2176 for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { 2177 enum br_special special; 2178 2179 if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) 2180 continue; 2181 2182 special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); 2183 switch (special) { 2184 case OP_BR_NORMAL: 2185 break; 2186 case OP_BR_GO_OUT: 2187 br_set_offset(&nfp_prog->prog[br_idx], 2188 nfp_prog->tgt_out); 2189 break; 2190 case OP_BR_GO_ABORT: 2191 br_set_offset(&nfp_prog->prog[br_idx], 2192 nfp_prog->tgt_abort); 2193 break; 2194 } 2195 2196 nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; 2197 } 2198 2199 return 0; 2200 } 2201 2202 static void nfp_intro(struct nfp_prog *nfp_prog) 2203 { 2204 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); 2205 emit_alu(nfp_prog, plen_reg(nfp_prog), 2206 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); 2207 } 2208 2209 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) 2210 { 2211 /* TC direct-action mode: 2212 * 0,1 ok NOT SUPPORTED[1] 2213 * 2 drop 0x22 -> drop, count as stat1 2214 * 4,5 nuke 0x02 -> drop 2215 * 7 redir 0x44 -> redir, count as stat2 2216 * * unspec 0x11 -> pass, count as stat0 2217 * 2218 * [1] We can't support OK and RECLASSIFY because we can't tell TC 2219 * the exact decision made. We are forced to support UNSPEC 2220 * to handle aborts so that's the only one we handle for passing 2221 * packets up the stack. 2222 */ 2223 /* Target for aborts */ 2224 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 2225 2226 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2227 2228 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2229 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); 2230 2231 /* Target for normal exits */ 2232 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 2233 2234 /* if R0 > 7 jump to abort */ 2235 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); 2236 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 2237 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2238 2239 wrp_immed(nfp_prog, reg_b(2), 0x41221211); 2240 wrp_immed(nfp_prog, reg_b(3), 0x41001211); 2241 2242 emit_shf(nfp_prog, reg_a(1), 2243 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); 2244 2245 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2246 emit_shf(nfp_prog, reg_a(2), 2247 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 2248 2249 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2250 emit_shf(nfp_prog, reg_b(2), 2251 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); 2252 2253 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2254 2255 emit_shf(nfp_prog, reg_b(2), 2256 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); 2257 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 2258 } 2259 2260 static void nfp_outro_xdp(struct nfp_prog *nfp_prog) 2261 { 2262 /* XDP return codes: 2263 * 0 aborted 0x82 -> drop, count as stat3 2264 * 1 drop 0x22 -> drop, count as stat1 2265 * 2 pass 0x11 -> pass, count as stat0 2266 * 3 tx 0x44 -> redir, count as stat2 2267 * * unknown 0x82 -> drop, count as stat3 2268 */ 2269 /* Target for aborts */ 2270 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 2271 2272 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2273 2274 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2275 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); 2276 2277 /* Target for normal exits */ 2278 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 2279 2280 /* if R0 > 3 jump to abort */ 2281 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); 2282 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 2283 2284 wrp_immed(nfp_prog, reg_b(2), 0x44112282); 2285 2286 emit_shf(nfp_prog, reg_a(1), 2287 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); 2288 2289 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2290 emit_shf(nfp_prog, reg_b(2), 2291 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 2292 2293 emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); 2294 2295 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2296 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 2297 } 2298 2299 static void nfp_outro(struct nfp_prog *nfp_prog) 2300 { 2301 switch (nfp_prog->type) { 2302 case BPF_PROG_TYPE_SCHED_CLS: 2303 nfp_outro_tc_da(nfp_prog); 2304 break; 2305 case BPF_PROG_TYPE_XDP: 2306 nfp_outro_xdp(nfp_prog); 2307 break; 2308 default: 2309 WARN_ON(1); 2310 } 2311 } 2312 2313 static int nfp_translate(struct nfp_prog *nfp_prog) 2314 { 2315 struct nfp_insn_meta *meta; 2316 int err; 2317 2318 nfp_intro(nfp_prog); 2319 if (nfp_prog->error) 2320 return nfp_prog->error; 2321 2322 list_for_each_entry(meta, &nfp_prog->insns, l) { 2323 instr_cb_t cb = instr_cb[meta->insn.code]; 2324 2325 meta->off = nfp_prog_current_offset(nfp_prog); 2326 2327 if (meta->skip) { 2328 nfp_prog->n_translated++; 2329 continue; 2330 } 2331 2332 if (nfp_meta_has_prev(nfp_prog, meta) && 2333 nfp_meta_prev(meta)->double_cb) 2334 cb = nfp_meta_prev(meta)->double_cb; 2335 if (!cb) 2336 return -ENOENT; 2337 err = cb(nfp_prog, meta); 2338 if (err) 2339 return err; 2340 2341 nfp_prog->n_translated++; 2342 } 2343 2344 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; 2345 2346 nfp_outro(nfp_prog); 2347 if (nfp_prog->error) 2348 return nfp_prog->error; 2349 2350 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); 2351 if (nfp_prog->error) 2352 return nfp_prog->error; 2353 2354 return nfp_fixup_branches(nfp_prog); 2355 } 2356 2357 /* --- Optimizations --- */ 2358 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) 2359 { 2360 struct nfp_insn_meta *meta; 2361 2362 list_for_each_entry(meta, &nfp_prog->insns, l) { 2363 struct bpf_insn insn = meta->insn; 2364 2365 /* Programs converted from cBPF start with register xoring */ 2366 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && 2367 insn.src_reg == insn.dst_reg) 2368 continue; 2369 2370 /* Programs start with R6 = R1 but we ignore the skb pointer */ 2371 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && 2372 insn.src_reg == 1 && insn.dst_reg == 6) 2373 meta->skip = true; 2374 2375 /* Return as soon as something doesn't match */ 2376 if (!meta->skip) 2377 return; 2378 } 2379 } 2380 2381 /* Remove masking after load since our load guarantees this is not needed */ 2382 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) 2383 { 2384 struct nfp_insn_meta *meta1, *meta2; 2385 const s32 exp_mask[] = { 2386 [BPF_B] = 0x000000ffU, 2387 [BPF_H] = 0x0000ffffU, 2388 [BPF_W] = 0xffffffffU, 2389 }; 2390 2391 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 2392 struct bpf_insn insn, next; 2393 2394 insn = meta1->insn; 2395 next = meta2->insn; 2396 2397 if (BPF_CLASS(insn.code) != BPF_LD) 2398 continue; 2399 if (BPF_MODE(insn.code) != BPF_ABS && 2400 BPF_MODE(insn.code) != BPF_IND) 2401 continue; 2402 2403 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) 2404 continue; 2405 2406 if (!exp_mask[BPF_SIZE(insn.code)]) 2407 continue; 2408 if (exp_mask[BPF_SIZE(insn.code)] != next.imm) 2409 continue; 2410 2411 if (next.src_reg || next.dst_reg) 2412 continue; 2413 2414 if (meta2->flags & FLAG_INSN_IS_JUMP_DST) 2415 continue; 2416 2417 meta2->skip = true; 2418 } 2419 } 2420 2421 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) 2422 { 2423 struct nfp_insn_meta *meta1, *meta2, *meta3; 2424 2425 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { 2426 struct bpf_insn insn, next1, next2; 2427 2428 insn = meta1->insn; 2429 next1 = meta2->insn; 2430 next2 = meta3->insn; 2431 2432 if (BPF_CLASS(insn.code) != BPF_LD) 2433 continue; 2434 if (BPF_MODE(insn.code) != BPF_ABS && 2435 BPF_MODE(insn.code) != BPF_IND) 2436 continue; 2437 if (BPF_SIZE(insn.code) != BPF_W) 2438 continue; 2439 2440 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && 2441 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && 2442 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && 2443 next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) 2444 continue; 2445 2446 if (next1.src_reg || next1.dst_reg || 2447 next2.src_reg || next2.dst_reg) 2448 continue; 2449 2450 if (next1.imm != 0x20 || next2.imm != 0x20) 2451 continue; 2452 2453 if (meta2->flags & FLAG_INSN_IS_JUMP_DST || 2454 meta3->flags & FLAG_INSN_IS_JUMP_DST) 2455 continue; 2456 2457 meta2->skip = true; 2458 meta3->skip = true; 2459 } 2460 } 2461 2462 /* load/store pair that forms memory copy sould look like the following: 2463 * 2464 * ld_width R, [addr_src + offset_src] 2465 * st_width [addr_dest + offset_dest], R 2466 * 2467 * The destination register of load and source register of store should 2468 * be the same, load and store should also perform at the same width. 2469 * If either of addr_src or addr_dest is stack pointer, we don't do the 2470 * CPP optimization as stack is modelled by registers on NFP. 2471 */ 2472 static bool 2473 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, 2474 struct nfp_insn_meta *st_meta) 2475 { 2476 struct bpf_insn *ld = &ld_meta->insn; 2477 struct bpf_insn *st = &st_meta->insn; 2478 2479 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) 2480 return false; 2481 2482 if (ld_meta->ptr.type != PTR_TO_PACKET) 2483 return false; 2484 2485 if (st_meta->ptr.type != PTR_TO_PACKET) 2486 return false; 2487 2488 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) 2489 return false; 2490 2491 if (ld->dst_reg != st->src_reg) 2492 return false; 2493 2494 /* There is jump to the store insn in this pair. */ 2495 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) 2496 return false; 2497 2498 return true; 2499 } 2500 2501 /* Currently, we only support chaining load/store pairs if: 2502 * 2503 * - Their address base registers are the same. 2504 * - Their address offsets are in the same order. 2505 * - They operate at the same memory width. 2506 * - There is no jump into the middle of them. 2507 */ 2508 static bool 2509 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, 2510 struct nfp_insn_meta *st_meta, 2511 struct bpf_insn *prev_ld, 2512 struct bpf_insn *prev_st) 2513 { 2514 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; 2515 struct bpf_insn *ld = &ld_meta->insn; 2516 struct bpf_insn *st = &st_meta->insn; 2517 s16 prev_ld_off, prev_st_off; 2518 2519 /* This pair is the start pair. */ 2520 if (!prev_ld) 2521 return true; 2522 2523 prev_size = BPF_LDST_BYTES(prev_ld); 2524 curr_size = BPF_LDST_BYTES(ld); 2525 prev_ld_base = prev_ld->src_reg; 2526 prev_st_base = prev_st->dst_reg; 2527 prev_ld_dst = prev_ld->dst_reg; 2528 prev_ld_off = prev_ld->off; 2529 prev_st_off = prev_st->off; 2530 2531 if (ld->dst_reg != prev_ld_dst) 2532 return false; 2533 2534 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) 2535 return false; 2536 2537 if (curr_size != prev_size) 2538 return false; 2539 2540 /* There is jump to the head of this pair. */ 2541 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) 2542 return false; 2543 2544 /* Both in ascending order. */ 2545 if (prev_ld_off + prev_size == ld->off && 2546 prev_st_off + prev_size == st->off) 2547 return true; 2548 2549 /* Both in descending order. */ 2550 if (ld->off + curr_size == prev_ld_off && 2551 st->off + curr_size == prev_st_off) 2552 return true; 2553 2554 return false; 2555 } 2556 2557 /* Return TRUE if cross memory access happens. Cross memory access means 2558 * store area is overlapping with load area that a later load might load 2559 * the value from previous store, for this case we can't treat the sequence 2560 * as an memory copy. 2561 */ 2562 static bool 2563 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, 2564 struct nfp_insn_meta *head_st_meta) 2565 { 2566 s16 head_ld_off, head_st_off, ld_off; 2567 2568 /* Different pointer types does not overlap. */ 2569 if (head_ld_meta->ptr.type != head_st_meta->ptr.type) 2570 return false; 2571 2572 /* load and store are both PTR_TO_PACKET, check ID info. */ 2573 if (head_ld_meta->ptr.id != head_st_meta->ptr.id) 2574 return true; 2575 2576 /* Canonicalize the offsets. Turn all of them against the original 2577 * base register. 2578 */ 2579 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; 2580 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; 2581 ld_off = ld->off + head_ld_meta->ptr.off; 2582 2583 /* Ascending order cross. */ 2584 if (ld_off > head_ld_off && 2585 head_ld_off < head_st_off && ld_off >= head_st_off) 2586 return true; 2587 2588 /* Descending order cross. */ 2589 if (ld_off < head_ld_off && 2590 head_ld_off > head_st_off && ld_off <= head_st_off) 2591 return true; 2592 2593 return false; 2594 } 2595 2596 /* This pass try to identify the following instructoin sequences. 2597 * 2598 * load R, [regA + offA] 2599 * store [regB + offB], R 2600 * load R, [regA + offA + const_imm_A] 2601 * store [regB + offB + const_imm_A], R 2602 * load R, [regA + offA + 2 * const_imm_A] 2603 * store [regB + offB + 2 * const_imm_A], R 2604 * ... 2605 * 2606 * Above sequence is typically generated by compiler when lowering 2607 * memcpy. NFP prefer using CPP instructions to accelerate it. 2608 */ 2609 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) 2610 { 2611 struct nfp_insn_meta *head_ld_meta = NULL; 2612 struct nfp_insn_meta *head_st_meta = NULL; 2613 struct nfp_insn_meta *meta1, *meta2; 2614 struct bpf_insn *prev_ld = NULL; 2615 struct bpf_insn *prev_st = NULL; 2616 u8 count = 0; 2617 2618 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 2619 struct bpf_insn *ld = &meta1->insn; 2620 struct bpf_insn *st = &meta2->insn; 2621 2622 /* Reset record status if any of the following if true: 2623 * - The current insn pair is not load/store. 2624 * - The load/store pair doesn't chain with previous one. 2625 * - The chained load/store pair crossed with previous pair. 2626 * - The chained load/store pair has a total size of memory 2627 * copy beyond 128 bytes which is the maximum length a 2628 * single NFP CPP command can transfer. 2629 */ 2630 if (!curr_pair_is_memcpy(meta1, meta2) || 2631 !curr_pair_chain_with_previous(meta1, meta2, prev_ld, 2632 prev_st) || 2633 (head_ld_meta && (cross_mem_access(ld, head_ld_meta, 2634 head_st_meta) || 2635 head_ld_meta->ldst_gather_len >= 128))) { 2636 if (!count) 2637 continue; 2638 2639 if (count > 1) { 2640 s16 prev_ld_off = prev_ld->off; 2641 s16 prev_st_off = prev_st->off; 2642 s16 head_ld_off = head_ld_meta->insn.off; 2643 2644 if (prev_ld_off < head_ld_off) { 2645 head_ld_meta->insn.off = prev_ld_off; 2646 head_st_meta->insn.off = prev_st_off; 2647 head_ld_meta->ldst_gather_len = 2648 -head_ld_meta->ldst_gather_len; 2649 } 2650 2651 head_ld_meta->paired_st = &head_st_meta->insn; 2652 head_st_meta->skip = true; 2653 } else { 2654 head_ld_meta->ldst_gather_len = 0; 2655 } 2656 2657 /* If the chain is ended by an load/store pair then this 2658 * could serve as the new head of the the next chain. 2659 */ 2660 if (curr_pair_is_memcpy(meta1, meta2)) { 2661 head_ld_meta = meta1; 2662 head_st_meta = meta2; 2663 head_ld_meta->ldst_gather_len = 2664 BPF_LDST_BYTES(ld); 2665 meta1 = nfp_meta_next(meta1); 2666 meta2 = nfp_meta_next(meta2); 2667 prev_ld = ld; 2668 prev_st = st; 2669 count = 1; 2670 } else { 2671 head_ld_meta = NULL; 2672 head_st_meta = NULL; 2673 prev_ld = NULL; 2674 prev_st = NULL; 2675 count = 0; 2676 } 2677 2678 continue; 2679 } 2680 2681 if (!head_ld_meta) { 2682 head_ld_meta = meta1; 2683 head_st_meta = meta2; 2684 } else { 2685 meta1->skip = true; 2686 meta2->skip = true; 2687 } 2688 2689 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); 2690 meta1 = nfp_meta_next(meta1); 2691 meta2 = nfp_meta_next(meta2); 2692 prev_ld = ld; 2693 prev_st = st; 2694 count++; 2695 } 2696 } 2697 2698 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) 2699 { 2700 nfp_bpf_opt_reg_init(nfp_prog); 2701 2702 nfp_bpf_opt_ld_mask(nfp_prog); 2703 nfp_bpf_opt_ld_shift(nfp_prog); 2704 nfp_bpf_opt_ldst_gather(nfp_prog); 2705 2706 return 0; 2707 } 2708 2709 static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) 2710 { 2711 int i; 2712 2713 for (i = 0; i < nfp_prog->prog_len; i++) { 2714 int err; 2715 2716 err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); 2717 if (err) 2718 return err; 2719 2720 nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); 2721 2722 ustore[i] = cpu_to_le64(nfp_prog->prog[i]); 2723 } 2724 2725 return 0; 2726 } 2727 2728 int nfp_bpf_jit(struct nfp_prog *nfp_prog) 2729 { 2730 int ret; 2731 2732 ret = nfp_bpf_optimize(nfp_prog); 2733 if (ret) 2734 return ret; 2735 2736 ret = nfp_translate(nfp_prog); 2737 if (ret) { 2738 pr_err("Translation failed with error %d (translated: %u)\n", 2739 ret, nfp_prog->n_translated); 2740 return -EINVAL; 2741 } 2742 2743 return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); 2744 } 2745