1 /* 2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 3 * 4 * This software is dual licensed under the GNU General License Version 2, 5 * June 1991 as shown in the file COPYING in the top-level directory of this 6 * source tree or the BSD 2-Clause License provided below. You have the 7 * option to license this software under the complete terms of either license. 8 * 9 * The BSD 2-Clause License: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * 1. Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #define pr_fmt(fmt) "NFP net bpf: " fmt 35 36 #include <linux/bug.h> 37 #include <linux/kernel.h> 38 #include <linux/bpf.h> 39 #include <linux/filter.h> 40 #include <linux/pkt_cls.h> 41 #include <linux/unistd.h> 42 43 #include "main.h" 44 #include "../nfp_asm.h" 45 46 /* --- NFP prog --- */ 47 /* Foreach "multiple" entries macros provide pos and next<n> pointers. 48 * It's safe to modify the next pointers (but not pos). 49 */ 50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ 51 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 52 next = list_next_entry(pos, l); \ 53 &(nfp_prog)->insns != &pos->l && \ 54 &(nfp_prog)->insns != &next->l; \ 55 pos = nfp_meta_next(pos), \ 56 next = nfp_meta_next(pos)) 57 58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ 59 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ 60 next = list_next_entry(pos, l), \ 61 next2 = list_next_entry(next, l); \ 62 &(nfp_prog)->insns != &pos->l && \ 63 &(nfp_prog)->insns != &next->l && \ 64 &(nfp_prog)->insns != &next2->l; \ 65 pos = nfp_meta_next(pos), \ 66 next = nfp_meta_next(pos), \ 67 next2 = nfp_meta_next(next)) 68 69 static bool 70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 71 { 72 return meta->l.prev != &nfp_prog->insns; 73 } 74 75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) 76 { 77 if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { 78 nfp_prog->error = -ENOSPC; 79 return; 80 } 81 82 nfp_prog->prog[nfp_prog->prog_len] = insn; 83 nfp_prog->prog_len++; 84 } 85 86 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) 87 { 88 return nfp_prog->prog_len; 89 } 90 91 static bool 92 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) 93 { 94 /* If there is a recorded error we may have dropped instructions; 95 * that doesn't have to be due to translator bug, and the translation 96 * will fail anyway, so just return OK. 97 */ 98 if (nfp_prog->error) 99 return true; 100 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); 101 } 102 103 /* --- Emitters --- */ 104 static void 105 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, 106 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, 107 bool indir) 108 { 109 u64 insn; 110 111 insn = FIELD_PREP(OP_CMD_A_SRC, areg) | 112 FIELD_PREP(OP_CMD_CTX, ctx) | 113 FIELD_PREP(OP_CMD_B_SRC, breg) | 114 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | 115 FIELD_PREP(OP_CMD_XFER, xfer) | 116 FIELD_PREP(OP_CMD_CNT, size) | 117 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | 118 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | 119 FIELD_PREP(OP_CMD_INDIR, indir) | 120 FIELD_PREP(OP_CMD_MODE, mode); 121 122 nfp_prog_push(nfp_prog, insn); 123 } 124 125 static void 126 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 127 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) 128 { 129 struct nfp_insn_re_regs reg; 130 int err; 131 132 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); 133 if (err) { 134 nfp_prog->error = err; 135 return; 136 } 137 if (reg.swap) { 138 pr_err("cmd can't swap arguments\n"); 139 nfp_prog->error = -EFAULT; 140 return; 141 } 142 if (reg.dst_lmextn || reg.src_lmextn) { 143 pr_err("cmd can't use LMextn\n"); 144 nfp_prog->error = -EFAULT; 145 return; 146 } 147 148 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx, 149 indir); 150 } 151 152 static void 153 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 154 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) 155 { 156 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false); 157 } 158 159 static void 160 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, 161 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) 162 { 163 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true); 164 } 165 166 static void 167 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, 168 enum br_ctx_signal_state css, u16 addr, u8 defer) 169 { 170 u16 addr_lo, addr_hi; 171 u64 insn; 172 173 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); 174 addr_hi = addr != addr_lo; 175 176 insn = OP_BR_BASE | 177 FIELD_PREP(OP_BR_MASK, mask) | 178 FIELD_PREP(OP_BR_EV_PIP, ev_pip) | 179 FIELD_PREP(OP_BR_CSS, css) | 180 FIELD_PREP(OP_BR_DEFBR, defer) | 181 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | 182 FIELD_PREP(OP_BR_ADDR_HI, addr_hi); 183 184 nfp_prog_push(nfp_prog, insn); 185 } 186 187 static void 188 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, 189 enum nfp_relo_type relo) 190 { 191 if (mask == BR_UNC && defer > 2) { 192 pr_err("BUG: branch defer out of bounds %d\n", defer); 193 nfp_prog->error = -EFAULT; 194 return; 195 } 196 197 __emit_br(nfp_prog, mask, 198 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, 199 BR_CSS_NONE, addr, defer); 200 201 nfp_prog->prog[nfp_prog->prog_len - 1] |= 202 FIELD_PREP(OP_RELO_TYPE, relo); 203 } 204 205 static void 206 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) 207 { 208 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); 209 } 210 211 static void 212 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, 213 enum immed_width width, bool invert, 214 enum immed_shift shift, bool wr_both, 215 bool dst_lmextn, bool src_lmextn) 216 { 217 u64 insn; 218 219 insn = OP_IMMED_BASE | 220 FIELD_PREP(OP_IMMED_A_SRC, areg) | 221 FIELD_PREP(OP_IMMED_B_SRC, breg) | 222 FIELD_PREP(OP_IMMED_IMM, imm_hi) | 223 FIELD_PREP(OP_IMMED_WIDTH, width) | 224 FIELD_PREP(OP_IMMED_INV, invert) | 225 FIELD_PREP(OP_IMMED_SHIFT, shift) | 226 FIELD_PREP(OP_IMMED_WR_AB, wr_both) | 227 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | 228 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); 229 230 nfp_prog_push(nfp_prog, insn); 231 } 232 233 static void 234 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, 235 enum immed_width width, bool invert, enum immed_shift shift) 236 { 237 struct nfp_insn_ur_regs reg; 238 int err; 239 240 if (swreg_type(dst) == NN_REG_IMM) { 241 nfp_prog->error = -EFAULT; 242 return; 243 } 244 245 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); 246 if (err) { 247 nfp_prog->error = err; 248 return; 249 } 250 251 /* Use reg.dst when destination is No-Dest. */ 252 __emit_immed(nfp_prog, 253 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, 254 reg.breg, imm >> 8, width, invert, shift, 255 reg.wr_both, reg.dst_lmextn, reg.src_lmextn); 256 } 257 258 static void 259 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 260 enum shf_sc sc, u8 shift, 261 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, 262 bool dst_lmextn, bool src_lmextn) 263 { 264 u64 insn; 265 266 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { 267 nfp_prog->error = -EFAULT; 268 return; 269 } 270 271 if (sc == SHF_SC_L_SHF) 272 shift = 32 - shift; 273 274 insn = OP_SHF_BASE | 275 FIELD_PREP(OP_SHF_A_SRC, areg) | 276 FIELD_PREP(OP_SHF_SC, sc) | 277 FIELD_PREP(OP_SHF_B_SRC, breg) | 278 FIELD_PREP(OP_SHF_I8, i8) | 279 FIELD_PREP(OP_SHF_SW, sw) | 280 FIELD_PREP(OP_SHF_DST, dst) | 281 FIELD_PREP(OP_SHF_SHIFT, shift) | 282 FIELD_PREP(OP_SHF_OP, op) | 283 FIELD_PREP(OP_SHF_DST_AB, dst_ab) | 284 FIELD_PREP(OP_SHF_WR_AB, wr_both) | 285 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | 286 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); 287 288 nfp_prog_push(nfp_prog, insn); 289 } 290 291 static void 292 emit_shf(struct nfp_prog *nfp_prog, swreg dst, 293 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) 294 { 295 struct nfp_insn_re_regs reg; 296 int err; 297 298 err = swreg_to_restricted(dst, lreg, rreg, ®, true); 299 if (err) { 300 nfp_prog->error = err; 301 return; 302 } 303 304 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, 305 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, 306 reg.dst_lmextn, reg.src_lmextn); 307 } 308 309 static void 310 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, 311 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, 312 bool dst_lmextn, bool src_lmextn) 313 { 314 u64 insn; 315 316 insn = OP_ALU_BASE | 317 FIELD_PREP(OP_ALU_A_SRC, areg) | 318 FIELD_PREP(OP_ALU_B_SRC, breg) | 319 FIELD_PREP(OP_ALU_DST, dst) | 320 FIELD_PREP(OP_ALU_SW, swap) | 321 FIELD_PREP(OP_ALU_OP, op) | 322 FIELD_PREP(OP_ALU_DST_AB, dst_ab) | 323 FIELD_PREP(OP_ALU_WR_AB, wr_both) | 324 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | 325 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); 326 327 nfp_prog_push(nfp_prog, insn); 328 } 329 330 static void 331 emit_alu(struct nfp_prog *nfp_prog, swreg dst, 332 swreg lreg, enum alu_op op, swreg rreg) 333 { 334 struct nfp_insn_ur_regs reg; 335 int err; 336 337 err = swreg_to_unrestricted(dst, lreg, rreg, ®); 338 if (err) { 339 nfp_prog->error = err; 340 return; 341 } 342 343 __emit_alu(nfp_prog, reg.dst, reg.dst_ab, 344 reg.areg, op, reg.breg, reg.swap, reg.wr_both, 345 reg.dst_lmextn, reg.src_lmextn); 346 } 347 348 static void 349 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, 350 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, 351 bool zero, bool swap, bool wr_both, 352 bool dst_lmextn, bool src_lmextn) 353 { 354 u64 insn; 355 356 insn = OP_LDF_BASE | 357 FIELD_PREP(OP_LDF_A_SRC, areg) | 358 FIELD_PREP(OP_LDF_SC, sc) | 359 FIELD_PREP(OP_LDF_B_SRC, breg) | 360 FIELD_PREP(OP_LDF_I8, imm8) | 361 FIELD_PREP(OP_LDF_SW, swap) | 362 FIELD_PREP(OP_LDF_ZF, zero) | 363 FIELD_PREP(OP_LDF_BMASK, bmask) | 364 FIELD_PREP(OP_LDF_SHF, shift) | 365 FIELD_PREP(OP_LDF_WR_AB, wr_both) | 366 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | 367 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); 368 369 nfp_prog_push(nfp_prog, insn); 370 } 371 372 static void 373 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 374 enum shf_sc sc, u8 shift, bool zero) 375 { 376 struct nfp_insn_re_regs reg; 377 int err; 378 379 /* Note: ld_field is special as it uses one of the src regs as dst */ 380 err = swreg_to_restricted(dst, dst, src, ®, true); 381 if (err) { 382 nfp_prog->error = err; 383 return; 384 } 385 386 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, 387 reg.i8, zero, reg.swap, reg.wr_both, 388 reg.dst_lmextn, reg.src_lmextn); 389 } 390 391 static void 392 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, 393 enum shf_sc sc, u8 shift) 394 { 395 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); 396 } 397 398 static void 399 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, 400 bool dst_lmextn, bool src_lmextn) 401 { 402 u64 insn; 403 404 insn = OP_LCSR_BASE | 405 FIELD_PREP(OP_LCSR_A_SRC, areg) | 406 FIELD_PREP(OP_LCSR_B_SRC, breg) | 407 FIELD_PREP(OP_LCSR_WRITE, wr) | 408 FIELD_PREP(OP_LCSR_ADDR, addr) | 409 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | 410 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); 411 412 nfp_prog_push(nfp_prog, insn); 413 } 414 415 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) 416 { 417 struct nfp_insn_ur_regs reg; 418 int err; 419 420 /* This instruction takes immeds instead of reg_none() for the ignored 421 * operand, but we can't encode 2 immeds in one instr with our normal 422 * swreg infra so if param is an immed, we encode as reg_none() and 423 * copy the immed to both operands. 424 */ 425 if (swreg_type(src) == NN_REG_IMM) { 426 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); 427 reg.breg = reg.areg; 428 } else { 429 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); 430 } 431 if (err) { 432 nfp_prog->error = err; 433 return; 434 } 435 436 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, 437 false, reg.src_lmextn); 438 } 439 440 static void emit_nop(struct nfp_prog *nfp_prog) 441 { 442 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); 443 } 444 445 /* --- Wrappers --- */ 446 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) 447 { 448 if (!(imm & 0xffff0000)) { 449 *val = imm; 450 *shift = IMMED_SHIFT_0B; 451 } else if (!(imm & 0xff0000ff)) { 452 *val = imm >> 8; 453 *shift = IMMED_SHIFT_1B; 454 } else if (!(imm & 0x0000ffff)) { 455 *val = imm >> 16; 456 *shift = IMMED_SHIFT_2B; 457 } else { 458 return false; 459 } 460 461 return true; 462 } 463 464 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) 465 { 466 enum immed_shift shift; 467 u16 val; 468 469 if (pack_immed(imm, &val, &shift)) { 470 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); 471 } else if (pack_immed(~imm, &val, &shift)) { 472 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); 473 } else { 474 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, 475 false, IMMED_SHIFT_0B); 476 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, 477 false, IMMED_SHIFT_2B); 478 } 479 } 480 481 static void 482 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, 483 enum nfp_relo_type relo) 484 { 485 if (imm > 0xffff) { 486 pr_err("relocation of a large immediate!\n"); 487 nfp_prog->error = -EFAULT; 488 return; 489 } 490 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); 491 492 nfp_prog->prog[nfp_prog->prog_len - 1] |= 493 FIELD_PREP(OP_RELO_TYPE, relo); 494 } 495 496 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) 497 * If the @imm is small enough encode it directly in operand and return 498 * otherwise load @imm to a spare register and return its encoding. 499 */ 500 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 501 { 502 if (FIELD_FIT(UR_REG_IMM_MAX, imm)) 503 return reg_imm(imm); 504 505 wrp_immed(nfp_prog, tmp_reg, imm); 506 return tmp_reg; 507 } 508 509 /* re_load_imm_any() - encode immediate or use tmp register (restricted) 510 * If the @imm is small enough encode it directly in operand and return 511 * otherwise load @imm to a spare register and return its encoding. 512 */ 513 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) 514 { 515 if (FIELD_FIT(RE_REG_IMM_MAX, imm)) 516 return reg_imm(imm); 517 518 wrp_immed(nfp_prog, tmp_reg, imm); 519 return tmp_reg; 520 } 521 522 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) 523 { 524 while (count--) 525 emit_nop(nfp_prog); 526 } 527 528 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) 529 { 530 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); 531 } 532 533 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) 534 { 535 wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); 536 } 537 538 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the 539 * result to @dst from low end. 540 */ 541 static void 542 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, 543 u8 offset) 544 { 545 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; 546 u8 mask = (1 << field_len) - 1; 547 548 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); 549 } 550 551 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the 552 * result to @dst from offset, there is no change on the other bits of @dst. 553 */ 554 static void 555 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, 556 u8 field_len, u8 offset) 557 { 558 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; 559 u8 mask = ((1 << field_len) - 1) << offset; 560 561 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8); 562 } 563 564 static void 565 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 566 swreg *rega, swreg *regb) 567 { 568 if (offset == reg_imm(0)) { 569 *rega = reg_a(src_gpr); 570 *regb = reg_b(src_gpr + 1); 571 return; 572 } 573 574 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset); 575 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C, 576 reg_imm(0)); 577 *rega = imm_a(nfp_prog); 578 *regb = imm_b(nfp_prog); 579 } 580 581 /* NFP has Command Push Pull bus which supports bluk memory operations. */ 582 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 583 { 584 bool descending_seq = meta->ldst_gather_len < 0; 585 s16 len = abs(meta->ldst_gather_len); 586 swreg src_base, off; 587 bool src_40bit_addr; 588 unsigned int i; 589 u8 xfer_num; 590 591 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 592 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE; 593 src_base = reg_a(meta->insn.src_reg * 2); 594 xfer_num = round_up(len, 4) / 4; 595 596 if (src_40bit_addr) 597 addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base, 598 &off); 599 600 /* Setup PREV_ALU fields to override memory read length. */ 601 if (len > 32) 602 wrp_immed(nfp_prog, reg_none(), 603 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 604 605 /* Memory read from source addr into transfer-in registers. */ 606 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, 607 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, 608 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32); 609 610 /* Move from transfer-in to transfer-out. */ 611 for (i = 0; i < xfer_num; i++) 612 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); 613 614 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); 615 616 if (len <= 8) { 617 /* Use single direct_ref write8. */ 618 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 619 reg_a(meta->paired_st->dst_reg * 2), off, len - 1, 620 CMD_CTX_SWAP); 621 } else if (len <= 32 && IS_ALIGNED(len, 4)) { 622 /* Use single direct_ref write32. */ 623 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 624 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, 625 CMD_CTX_SWAP); 626 } else if (len <= 32) { 627 /* Use single indirect_ref write8. */ 628 wrp_immed(nfp_prog, reg_none(), 629 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); 630 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 631 reg_a(meta->paired_st->dst_reg * 2), off, 632 len - 1, CMD_CTX_SWAP); 633 } else if (IS_ALIGNED(len, 4)) { 634 /* Use single indirect_ref write32. */ 635 wrp_immed(nfp_prog, reg_none(), 636 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 637 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 638 reg_a(meta->paired_st->dst_reg * 2), off, 639 xfer_num - 1, CMD_CTX_SWAP); 640 } else if (len <= 40) { 641 /* Use one direct_ref write32 to write the first 32-bytes, then 642 * another direct_ref write8 to write the remaining bytes. 643 */ 644 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 645 reg_a(meta->paired_st->dst_reg * 2), off, 7, 646 CMD_CTX_SWAP); 647 648 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, 649 imm_b(nfp_prog)); 650 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, 651 reg_a(meta->paired_st->dst_reg * 2), off, len - 33, 652 CMD_CTX_SWAP); 653 } else { 654 /* Use one indirect_ref write32 to write 4-bytes aligned length, 655 * then another direct_ref write8 to write the remaining bytes. 656 */ 657 u8 new_off; 658 659 wrp_immed(nfp_prog, reg_none(), 660 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); 661 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, 662 reg_a(meta->paired_st->dst_reg * 2), off, 663 xfer_num - 2, CMD_CTX_SWAP); 664 new_off = meta->paired_st->off + (xfer_num - 1) * 4; 665 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); 666 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 667 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, 668 (len & 0x3) - 1, CMD_CTX_SWAP); 669 } 670 671 /* TODO: The following extra load is to make sure data flow be identical 672 * before and after we do memory copy optimization. 673 * 674 * The load destination register is not guaranteed to be dead, so we 675 * need to make sure it is loaded with the value the same as before 676 * this transformation. 677 * 678 * These extra loads could be removed once we have accurate register 679 * usage information. 680 */ 681 if (descending_seq) 682 xfer_num = 0; 683 else if (BPF_SIZE(meta->insn.code) != BPF_DW) 684 xfer_num = xfer_num - 1; 685 else 686 xfer_num = xfer_num - 2; 687 688 switch (BPF_SIZE(meta->insn.code)) { 689 case BPF_B: 690 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 691 reg_xfer(xfer_num), 1, 692 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); 693 break; 694 case BPF_H: 695 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), 696 reg_xfer(xfer_num), 2, (len & 3) ^ 2); 697 break; 698 case BPF_W: 699 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 700 reg_xfer(0)); 701 break; 702 case BPF_DW: 703 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), 704 reg_xfer(xfer_num)); 705 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 706 reg_xfer(xfer_num + 1)); 707 break; 708 } 709 710 if (BPF_SIZE(meta->insn.code) != BPF_DW) 711 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 712 713 return 0; 714 } 715 716 static int 717 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) 718 { 719 unsigned int i; 720 u16 shift, sz; 721 722 /* We load the value from the address indicated in @offset and then 723 * shift out the data we don't need. Note: this is big endian! 724 */ 725 sz = max(size, 4); 726 shift = size < 4 ? 4 - size : 0; 727 728 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, 729 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP); 730 731 i = 0; 732 if (shift) 733 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, 734 reg_xfer(0), SHF_SC_R_SHF, shift * 8); 735 else 736 for (; i * 4 < size; i++) 737 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 738 739 if (i < 2) 740 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 741 742 return 0; 743 } 744 745 static int 746 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, 747 swreg lreg, swreg rreg, int size, enum cmd_mode mode) 748 { 749 unsigned int i; 750 u8 mask, sz; 751 752 /* We load the value from the address indicated in rreg + lreg and then 753 * mask out the data we don't need. Note: this is little endian! 754 */ 755 sz = max(size, 4); 756 mask = size < 4 ? GENMASK(size - 1, 0) : 0; 757 758 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, 759 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP); 760 761 i = 0; 762 if (mask) 763 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, 764 reg_xfer(0), SHF_SC_NONE, 0, true); 765 else 766 for (; i * 4 < size; i++) 767 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); 768 769 if (i < 2) 770 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); 771 772 return 0; 773 } 774 775 static int 776 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 777 u8 dst_gpr, u8 size) 778 { 779 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset, 780 size, CMD_MODE_32b); 781 } 782 783 static int 784 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, 785 u8 dst_gpr, u8 size) 786 { 787 swreg rega, regb; 788 789 addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); 790 791 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb, 792 size, CMD_MODE_40b_BA); 793 } 794 795 static int 796 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) 797 { 798 swreg tmp_reg; 799 800 /* Calculate the true offset (src_reg + imm) */ 801 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 802 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); 803 804 /* Check packet length (size guaranteed to fit b/c it's u8) */ 805 emit_alu(nfp_prog, imm_a(nfp_prog), 806 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); 807 emit_alu(nfp_prog, reg_none(), 808 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); 809 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); 810 811 /* Load data */ 812 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); 813 } 814 815 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) 816 { 817 swreg tmp_reg; 818 819 /* Check packet length */ 820 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); 821 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); 822 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); 823 824 /* Load data */ 825 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); 826 return data_ld(nfp_prog, tmp_reg, 0, size); 827 } 828 829 static int 830 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 831 u8 src_gpr, u8 size) 832 { 833 unsigned int i; 834 835 for (i = 0; i * 4 < size; i++) 836 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); 837 838 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 839 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); 840 841 return 0; 842 } 843 844 static int 845 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, 846 u64 imm, u8 size) 847 { 848 wrp_immed(nfp_prog, reg_xfer(0), imm); 849 if (size == 8) 850 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); 851 852 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, 853 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); 854 855 return 0; 856 } 857 858 typedef int 859 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, 860 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 861 bool needs_inc); 862 863 static int 864 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, 865 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 866 bool needs_inc) 867 { 868 bool should_inc = needs_inc && new_gpr && !last; 869 u32 idx, src_byte; 870 enum shf_sc sc; 871 swreg reg; 872 int shf; 873 u8 mask; 874 875 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) 876 return -EOPNOTSUPP; 877 878 idx = off / 4; 879 880 /* Move the entire word */ 881 if (size == 4) { 882 wrp_mov(nfp_prog, reg_both(dst), 883 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); 884 return 0; 885 } 886 887 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 888 return -EOPNOTSUPP; 889 890 src_byte = off % 4; 891 892 mask = (1 << size) - 1; 893 mask <<= dst_byte; 894 895 if (WARN_ON_ONCE(mask > 0xf)) 896 return -EOPNOTSUPP; 897 898 shf = abs(src_byte - dst_byte) * 8; 899 if (src_byte == dst_byte) { 900 sc = SHF_SC_NONE; 901 } else if (src_byte < dst_byte) { 902 shf = 32 - shf; 903 sc = SHF_SC_L_SHF; 904 } else { 905 sc = SHF_SC_R_SHF; 906 } 907 908 /* ld_field can address fewer indexes, if offset too large do RMW. 909 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 910 */ 911 if (idx <= RE_REG_LM_IDX_MAX) { 912 reg = reg_lm(lm3 ? 3 : 0, idx); 913 } else { 914 reg = imm_a(nfp_prog); 915 /* If it's not the first part of the load and we start a new GPR 916 * that means we are loading a second part of the LMEM word into 917 * a new GPR. IOW we've already looked that LMEM word and 918 * therefore it has been loaded into imm_a(). 919 */ 920 if (first || !new_gpr) 921 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 922 } 923 924 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); 925 926 if (should_inc) 927 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 928 929 return 0; 930 } 931 932 static int 933 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, 934 unsigned int size, bool first, bool new_gpr, bool last, bool lm3, 935 bool needs_inc) 936 { 937 bool should_inc = needs_inc && new_gpr && !last; 938 u32 idx, dst_byte; 939 enum shf_sc sc; 940 swreg reg; 941 int shf; 942 u8 mask; 943 944 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) 945 return -EOPNOTSUPP; 946 947 idx = off / 4; 948 949 /* Move the entire word */ 950 if (size == 4) { 951 wrp_mov(nfp_prog, 952 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), 953 reg_b(src)); 954 return 0; 955 } 956 957 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) 958 return -EOPNOTSUPP; 959 960 dst_byte = off % 4; 961 962 mask = (1 << size) - 1; 963 mask <<= dst_byte; 964 965 if (WARN_ON_ONCE(mask > 0xf)) 966 return -EOPNOTSUPP; 967 968 shf = abs(src_byte - dst_byte) * 8; 969 if (src_byte == dst_byte) { 970 sc = SHF_SC_NONE; 971 } else if (src_byte < dst_byte) { 972 shf = 32 - shf; 973 sc = SHF_SC_L_SHF; 974 } else { 975 sc = SHF_SC_R_SHF; 976 } 977 978 /* ld_field can address fewer indexes, if offset too large do RMW. 979 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. 980 */ 981 if (idx <= RE_REG_LM_IDX_MAX) { 982 reg = reg_lm(lm3 ? 3 : 0, idx); 983 } else { 984 reg = imm_a(nfp_prog); 985 /* Only first and last LMEM locations are going to need RMW, 986 * the middle location will be overwritten fully. 987 */ 988 if (first || last) 989 wrp_mov(nfp_prog, reg, reg_lm(0, idx)); 990 } 991 992 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); 993 994 if (new_gpr || last) { 995 if (idx > RE_REG_LM_IDX_MAX) 996 wrp_mov(nfp_prog, reg_lm(0, idx), reg); 997 if (should_inc) 998 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); 999 } 1000 1001 return 0; 1002 } 1003 1004 static int 1005 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1006 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, 1007 bool clr_gpr, lmem_step step) 1008 { 1009 s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; 1010 bool first = true, last; 1011 bool needs_inc = false; 1012 swreg stack_off_reg; 1013 u8 prev_gpr = 255; 1014 u32 gpr_byte = 0; 1015 bool lm3 = true; 1016 int ret; 1017 1018 if (meta->ptr_not_const) { 1019 /* Use of the last encountered ptr_off is OK, they all have 1020 * the same alignment. Depend on low bits of value being 1021 * discarded when written to LMaddr register. 1022 */ 1023 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, 1024 stack_imm(nfp_prog)); 1025 1026 emit_alu(nfp_prog, imm_b(nfp_prog), 1027 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); 1028 1029 needs_inc = true; 1030 } else if (off + size <= 64) { 1031 /* We can reach bottom 64B with LMaddr0 */ 1032 lm3 = false; 1033 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { 1034 /* We have to set up a new pointer. If we know the offset 1035 * and the entire access falls into a single 32 byte aligned 1036 * window we won't have to increment the LM pointer. 1037 * The 32 byte alignment is imporant because offset is ORed in 1038 * not added when doing *l$indexN[off]. 1039 */ 1040 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), 1041 stack_imm(nfp_prog)); 1042 emit_alu(nfp_prog, imm_b(nfp_prog), 1043 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 1044 1045 off %= 32; 1046 } else { 1047 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), 1048 stack_imm(nfp_prog)); 1049 1050 emit_alu(nfp_prog, imm_b(nfp_prog), 1051 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); 1052 1053 needs_inc = true; 1054 } 1055 if (lm3) { 1056 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); 1057 /* For size < 4 one slot will be filled by zeroing of upper. */ 1058 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); 1059 } 1060 1061 if (clr_gpr && size < 8) 1062 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1063 1064 while (size) { 1065 u32 slice_end; 1066 u8 slice_size; 1067 1068 slice_size = min(size, 4 - gpr_byte); 1069 slice_end = min(off + slice_size, round_up(off + 1, 4)); 1070 slice_size = slice_end - off; 1071 1072 last = slice_size == size; 1073 1074 if (needs_inc) 1075 off %= 4; 1076 1077 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, 1078 first, gpr != prev_gpr, last, lm3, needs_inc); 1079 if (ret) 1080 return ret; 1081 1082 prev_gpr = gpr; 1083 first = false; 1084 1085 gpr_byte += slice_size; 1086 if (gpr_byte >= 4) { 1087 gpr_byte -= 4; 1088 gpr++; 1089 } 1090 1091 size -= slice_size; 1092 off += slice_size; 1093 } 1094 1095 return 0; 1096 } 1097 1098 static void 1099 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) 1100 { 1101 swreg tmp_reg; 1102 1103 if (alu_op == ALU_OP_AND) { 1104 if (!imm) 1105 wrp_immed(nfp_prog, reg_both(dst), 0); 1106 if (!imm || !~imm) 1107 return; 1108 } 1109 if (alu_op == ALU_OP_OR) { 1110 if (!~imm) 1111 wrp_immed(nfp_prog, reg_both(dst), ~0U); 1112 if (!imm || !~imm) 1113 return; 1114 } 1115 if (alu_op == ALU_OP_XOR) { 1116 if (!~imm) 1117 emit_alu(nfp_prog, reg_both(dst), reg_none(), 1118 ALU_OP_NOT, reg_b(dst)); 1119 if (!imm || !~imm) 1120 return; 1121 } 1122 1123 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); 1124 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); 1125 } 1126 1127 static int 1128 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1129 enum alu_op alu_op, bool skip) 1130 { 1131 const struct bpf_insn *insn = &meta->insn; 1132 u64 imm = insn->imm; /* sign extend */ 1133 1134 if (skip) { 1135 meta->skip = true; 1136 return 0; 1137 } 1138 1139 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); 1140 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); 1141 1142 return 0; 1143 } 1144 1145 static int 1146 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1147 enum alu_op alu_op) 1148 { 1149 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1150 1151 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1152 emit_alu(nfp_prog, reg_both(dst + 1), 1153 reg_a(dst + 1), alu_op, reg_b(src + 1)); 1154 1155 return 0; 1156 } 1157 1158 static int 1159 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1160 enum alu_op alu_op, bool skip) 1161 { 1162 const struct bpf_insn *insn = &meta->insn; 1163 1164 if (skip) { 1165 meta->skip = true; 1166 return 0; 1167 } 1168 1169 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); 1170 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1171 1172 return 0; 1173 } 1174 1175 static int 1176 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1177 enum alu_op alu_op) 1178 { 1179 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; 1180 1181 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); 1182 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1183 1184 return 0; 1185 } 1186 1187 static void 1188 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, 1189 enum br_mask br_mask, u16 off) 1190 { 1191 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); 1192 emit_br(nfp_prog, br_mask, off, 0); 1193 } 1194 1195 static int 1196 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1197 enum alu_op alu_op, enum br_mask br_mask) 1198 { 1199 const struct bpf_insn *insn = &meta->insn; 1200 1201 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, 1202 insn->src_reg * 2, br_mask, insn->off); 1203 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, 1204 insn->src_reg * 2 + 1, br_mask, insn->off); 1205 1206 return 0; 1207 } 1208 1209 static int 1210 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1211 enum br_mask br_mask, bool swap) 1212 { 1213 const struct bpf_insn *insn = &meta->insn; 1214 u64 imm = insn->imm; /* sign extend */ 1215 u8 reg = insn->dst_reg * 2; 1216 swreg tmp_reg; 1217 1218 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 1219 if (!swap) 1220 emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); 1221 else 1222 emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); 1223 1224 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 1225 if (!swap) 1226 emit_alu(nfp_prog, reg_none(), 1227 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); 1228 else 1229 emit_alu(nfp_prog, reg_none(), 1230 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); 1231 1232 emit_br(nfp_prog, br_mask, insn->off, 0); 1233 1234 return 0; 1235 } 1236 1237 static int 1238 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1239 enum br_mask br_mask, bool swap) 1240 { 1241 const struct bpf_insn *insn = &meta->insn; 1242 u8 areg, breg; 1243 1244 areg = insn->dst_reg * 2; 1245 breg = insn->src_reg * 2; 1246 1247 if (swap) { 1248 areg ^= breg; 1249 breg ^= areg; 1250 areg ^= breg; 1251 } 1252 1253 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); 1254 emit_alu(nfp_prog, reg_none(), 1255 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); 1256 emit_br(nfp_prog, br_mask, insn->off, 0); 1257 1258 return 0; 1259 } 1260 1261 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) 1262 { 1263 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, 1264 SHF_SC_R_ROT, 8); 1265 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), 1266 SHF_SC_R_ROT, 16); 1267 } 1268 1269 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1270 { 1271 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); 1272 struct nfp_bpf_cap_adjust_head *adjust_head; 1273 u32 ret_einval, end; 1274 1275 adjust_head = &nfp_prog->bpf->adjust_head; 1276 1277 /* Optimized version - 5 vs 14 cycles */ 1278 if (nfp_prog->adjust_head_location != UINT_MAX) { 1279 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) 1280 return -EINVAL; 1281 1282 emit_alu(nfp_prog, pptr_reg(nfp_prog), 1283 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog)); 1284 emit_alu(nfp_prog, plen_reg(nfp_prog), 1285 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1286 emit_alu(nfp_prog, pv_len(nfp_prog), 1287 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1288 1289 wrp_immed(nfp_prog, reg_both(0), 0); 1290 wrp_immed(nfp_prog, reg_both(1), 0); 1291 1292 /* TODO: when adjust head is guaranteed to succeed we can 1293 * also eliminate the following if (r0 == 0) branch. 1294 */ 1295 1296 return 0; 1297 } 1298 1299 ret_einval = nfp_prog_current_offset(nfp_prog) + 14; 1300 end = ret_einval + 2; 1301 1302 /* We need to use a temp because offset is just a part of the pkt ptr */ 1303 emit_alu(nfp_prog, tmp, 1304 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); 1305 1306 /* Validate result will fit within FW datapath constraints */ 1307 emit_alu(nfp_prog, reg_none(), 1308 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); 1309 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1310 emit_alu(nfp_prog, reg_none(), 1311 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); 1312 emit_br(nfp_prog, BR_BLO, ret_einval, 0); 1313 1314 /* Validate the length is at least ETH_HLEN */ 1315 emit_alu(nfp_prog, tmp_len, 1316 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1317 emit_alu(nfp_prog, reg_none(), 1318 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); 1319 emit_br(nfp_prog, BR_BMI, ret_einval, 0); 1320 1321 /* Load the ret code */ 1322 wrp_immed(nfp_prog, reg_both(0), 0); 1323 wrp_immed(nfp_prog, reg_both(1), 0); 1324 1325 /* Modify the packet metadata */ 1326 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); 1327 1328 /* Skip over the -EINVAL ret code (defer 2) */ 1329 emit_br(nfp_prog, BR_UNC, end, 2); 1330 1331 emit_alu(nfp_prog, plen_reg(nfp_prog), 1332 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1333 emit_alu(nfp_prog, pv_len(nfp_prog), 1334 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); 1335 1336 /* return -EINVAL target */ 1337 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) 1338 return -EINVAL; 1339 1340 wrp_immed(nfp_prog, reg_both(0), -22); 1341 wrp_immed(nfp_prog, reg_both(1), ~0); 1342 1343 if (!nfp_prog_confirm_current_offset(nfp_prog, end)) 1344 return -EINVAL; 1345 1346 return 0; 1347 } 1348 1349 static int 1350 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1351 { 1352 struct bpf_offloaded_map *offmap; 1353 struct nfp_bpf_map *nfp_map; 1354 bool load_lm_ptr; 1355 u32 ret_tgt; 1356 s64 lm_off; 1357 swreg tid; 1358 1359 offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr; 1360 nfp_map = offmap->dev_priv; 1361 1362 /* We only have to reload LM0 if the key is not at start of stack */ 1363 lm_off = nfp_prog->stack_depth; 1364 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; 1365 load_lm_ptr = meta->arg2.var_off || lm_off; 1366 1367 /* Set LM0 to start of key */ 1368 if (load_lm_ptr) 1369 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); 1370 if (meta->func_id == BPF_FUNC_map_update_elem) 1371 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); 1372 1373 /* Load map ID into a register, it should actually fit as an immediate 1374 * but in case it doesn't deal with it here, not in the delay slots. 1375 */ 1376 tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog)); 1377 1378 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, 1379 2, RELO_BR_HELPER); 1380 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; 1381 1382 /* Load map ID into A0 */ 1383 wrp_mov(nfp_prog, reg_a(0), tid); 1384 1385 /* Load the return address into B0 */ 1386 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); 1387 1388 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) 1389 return -EINVAL; 1390 1391 /* Reset the LM0 pointer */ 1392 if (!load_lm_ptr) 1393 return 0; 1394 1395 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); 1396 wrp_nops(nfp_prog, 3); 1397 1398 return 0; 1399 } 1400 1401 /* --- Callbacks --- */ 1402 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1403 { 1404 const struct bpf_insn *insn = &meta->insn; 1405 u8 dst = insn->dst_reg * 2; 1406 u8 src = insn->src_reg * 2; 1407 1408 if (insn->src_reg == BPF_REG_10) { 1409 swreg stack_depth_reg; 1410 1411 stack_depth_reg = ur_load_imm_any(nfp_prog, 1412 nfp_prog->stack_depth, 1413 stack_imm(nfp_prog)); 1414 emit_alu(nfp_prog, reg_both(dst), 1415 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); 1416 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1417 } else { 1418 wrp_reg_mov(nfp_prog, dst, src); 1419 wrp_reg_mov(nfp_prog, dst + 1, src + 1); 1420 } 1421 1422 return 0; 1423 } 1424 1425 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1426 { 1427 u64 imm = meta->insn.imm; /* sign extend */ 1428 1429 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); 1430 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); 1431 1432 return 0; 1433 } 1434 1435 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1436 { 1437 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); 1438 } 1439 1440 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1441 { 1442 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); 1443 } 1444 1445 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1446 { 1447 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); 1448 } 1449 1450 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1451 { 1452 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1453 } 1454 1455 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1456 { 1457 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); 1458 } 1459 1460 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1461 { 1462 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1463 } 1464 1465 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1466 { 1467 const struct bpf_insn *insn = &meta->insn; 1468 1469 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1470 reg_a(insn->dst_reg * 2), ALU_OP_ADD, 1471 reg_b(insn->src_reg * 2)); 1472 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1473 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, 1474 reg_b(insn->src_reg * 2 + 1)); 1475 1476 return 0; 1477 } 1478 1479 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1480 { 1481 const struct bpf_insn *insn = &meta->insn; 1482 u64 imm = insn->imm; /* sign extend */ 1483 1484 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); 1485 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); 1486 1487 return 0; 1488 } 1489 1490 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1491 { 1492 const struct bpf_insn *insn = &meta->insn; 1493 1494 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), 1495 reg_a(insn->dst_reg * 2), ALU_OP_SUB, 1496 reg_b(insn->src_reg * 2)); 1497 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 1498 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, 1499 reg_b(insn->src_reg * 2 + 1)); 1500 1501 return 0; 1502 } 1503 1504 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1505 { 1506 const struct bpf_insn *insn = &meta->insn; 1507 u64 imm = insn->imm; /* sign extend */ 1508 1509 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); 1510 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); 1511 1512 return 0; 1513 } 1514 1515 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1516 { 1517 const struct bpf_insn *insn = &meta->insn; 1518 1519 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), 1520 ALU_OP_SUB, reg_b(insn->dst_reg * 2)); 1521 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), 1522 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); 1523 1524 return 0; 1525 } 1526 1527 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1528 { 1529 const struct bpf_insn *insn = &meta->insn; 1530 u8 dst = insn->dst_reg * 2; 1531 1532 if (insn->imm < 32) { 1533 emit_shf(nfp_prog, reg_both(dst + 1), 1534 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), 1535 SHF_SC_R_DSHF, 32 - insn->imm); 1536 emit_shf(nfp_prog, reg_both(dst), 1537 reg_none(), SHF_OP_NONE, reg_b(dst), 1538 SHF_SC_L_SHF, insn->imm); 1539 } else if (insn->imm == 32) { 1540 wrp_reg_mov(nfp_prog, dst + 1, dst); 1541 wrp_immed(nfp_prog, reg_both(dst), 0); 1542 } else if (insn->imm > 32) { 1543 emit_shf(nfp_prog, reg_both(dst + 1), 1544 reg_none(), SHF_OP_NONE, reg_b(dst), 1545 SHF_SC_L_SHF, insn->imm - 32); 1546 wrp_immed(nfp_prog, reg_both(dst), 0); 1547 } 1548 1549 return 0; 1550 } 1551 1552 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1553 { 1554 const struct bpf_insn *insn = &meta->insn; 1555 u8 dst = insn->dst_reg * 2; 1556 1557 if (insn->imm < 32) { 1558 emit_shf(nfp_prog, reg_both(dst), 1559 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), 1560 SHF_SC_R_DSHF, insn->imm); 1561 emit_shf(nfp_prog, reg_both(dst + 1), 1562 reg_none(), SHF_OP_NONE, reg_b(dst + 1), 1563 SHF_SC_R_SHF, insn->imm); 1564 } else if (insn->imm == 32) { 1565 wrp_reg_mov(nfp_prog, dst, dst + 1); 1566 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1567 } else if (insn->imm > 32) { 1568 emit_shf(nfp_prog, reg_both(dst), 1569 reg_none(), SHF_OP_NONE, reg_b(dst + 1), 1570 SHF_SC_R_SHF, insn->imm - 32); 1571 wrp_immed(nfp_prog, reg_both(dst + 1), 0); 1572 } 1573 1574 return 0; 1575 } 1576 1577 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1578 { 1579 const struct bpf_insn *insn = &meta->insn; 1580 1581 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); 1582 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1583 1584 return 0; 1585 } 1586 1587 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1588 { 1589 const struct bpf_insn *insn = &meta->insn; 1590 1591 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); 1592 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1593 1594 return 0; 1595 } 1596 1597 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1598 { 1599 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); 1600 } 1601 1602 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1603 { 1604 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); 1605 } 1606 1607 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1608 { 1609 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); 1610 } 1611 1612 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1613 { 1614 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); 1615 } 1616 1617 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1618 { 1619 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); 1620 } 1621 1622 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1623 { 1624 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); 1625 } 1626 1627 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1628 { 1629 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); 1630 } 1631 1632 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1633 { 1634 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); 1635 } 1636 1637 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1638 { 1639 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); 1640 } 1641 1642 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1643 { 1644 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); 1645 } 1646 1647 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1648 { 1649 u8 dst = meta->insn.dst_reg * 2; 1650 1651 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); 1652 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1653 1654 return 0; 1655 } 1656 1657 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1658 { 1659 const struct bpf_insn *insn = &meta->insn; 1660 1661 if (!insn->imm) 1662 return 1; /* TODO: zero shift means indirect */ 1663 1664 emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), 1665 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), 1666 SHF_SC_L_SHF, insn->imm); 1667 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); 1668 1669 return 0; 1670 } 1671 1672 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1673 { 1674 const struct bpf_insn *insn = &meta->insn; 1675 u8 gpr = insn->dst_reg * 2; 1676 1677 switch (insn->imm) { 1678 case 16: 1679 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), 1680 SHF_SC_R_ROT, 8); 1681 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), 1682 SHF_SC_R_SHF, 16); 1683 1684 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1685 break; 1686 case 32: 1687 wrp_end32(nfp_prog, reg_a(gpr), gpr); 1688 wrp_immed(nfp_prog, reg_both(gpr + 1), 0); 1689 break; 1690 case 64: 1691 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); 1692 1693 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); 1694 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); 1695 break; 1696 } 1697 1698 return 0; 1699 } 1700 1701 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1702 { 1703 struct nfp_insn_meta *prev = nfp_meta_prev(meta); 1704 u32 imm_lo, imm_hi; 1705 u8 dst; 1706 1707 dst = prev->insn.dst_reg * 2; 1708 imm_lo = prev->insn.imm; 1709 imm_hi = meta->insn.imm; 1710 1711 wrp_immed(nfp_prog, reg_both(dst), imm_lo); 1712 1713 /* mov is always 1 insn, load imm may be two, so try to use mov */ 1714 if (imm_hi == imm_lo) 1715 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); 1716 else 1717 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); 1718 1719 return 0; 1720 } 1721 1722 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1723 { 1724 meta->double_cb = imm_ld8_part2; 1725 return 0; 1726 } 1727 1728 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1729 { 1730 return construct_data_ld(nfp_prog, meta->insn.imm, 1); 1731 } 1732 1733 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1734 { 1735 return construct_data_ld(nfp_prog, meta->insn.imm, 2); 1736 } 1737 1738 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1739 { 1740 return construct_data_ld(nfp_prog, meta->insn.imm, 4); 1741 } 1742 1743 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1744 { 1745 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1746 meta->insn.src_reg * 2, 1); 1747 } 1748 1749 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1750 { 1751 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1752 meta->insn.src_reg * 2, 2); 1753 } 1754 1755 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1756 { 1757 return construct_data_ind_ld(nfp_prog, meta->insn.imm, 1758 meta->insn.src_reg * 2, 4); 1759 } 1760 1761 static int 1762 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1763 unsigned int size, unsigned int ptr_off) 1764 { 1765 return mem_op_stack(nfp_prog, meta, size, ptr_off, 1766 meta->insn.dst_reg * 2, meta->insn.src_reg * 2, 1767 true, wrp_lmem_load); 1768 } 1769 1770 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1771 u8 size) 1772 { 1773 swreg dst = reg_both(meta->insn.dst_reg * 2); 1774 1775 switch (meta->insn.off) { 1776 case offsetof(struct __sk_buff, len): 1777 if (size != FIELD_SIZEOF(struct __sk_buff, len)) 1778 return -EOPNOTSUPP; 1779 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); 1780 break; 1781 case offsetof(struct __sk_buff, data): 1782 if (size != FIELD_SIZEOF(struct __sk_buff, data)) 1783 return -EOPNOTSUPP; 1784 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 1785 break; 1786 case offsetof(struct __sk_buff, data_end): 1787 if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) 1788 return -EOPNOTSUPP; 1789 emit_alu(nfp_prog, dst, 1790 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 1791 break; 1792 default: 1793 return -EOPNOTSUPP; 1794 } 1795 1796 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1797 1798 return 0; 1799 } 1800 1801 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1802 u8 size) 1803 { 1804 swreg dst = reg_both(meta->insn.dst_reg * 2); 1805 1806 switch (meta->insn.off) { 1807 case offsetof(struct xdp_md, data): 1808 if (size != FIELD_SIZEOF(struct xdp_md, data)) 1809 return -EOPNOTSUPP; 1810 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); 1811 break; 1812 case offsetof(struct xdp_md, data_end): 1813 if (size != FIELD_SIZEOF(struct xdp_md, data_end)) 1814 return -EOPNOTSUPP; 1815 emit_alu(nfp_prog, dst, 1816 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); 1817 break; 1818 default: 1819 return -EOPNOTSUPP; 1820 } 1821 1822 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); 1823 1824 return 0; 1825 } 1826 1827 static int 1828 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1829 unsigned int size) 1830 { 1831 swreg tmp_reg; 1832 1833 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1834 1835 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2, 1836 tmp_reg, meta->insn.dst_reg * 2, size); 1837 } 1838 1839 static int 1840 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1841 unsigned int size) 1842 { 1843 swreg tmp_reg; 1844 1845 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 1846 1847 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2, 1848 tmp_reg, meta->insn.dst_reg * 2, size); 1849 } 1850 1851 static void 1852 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, 1853 struct nfp_insn_meta *meta) 1854 { 1855 s16 range_start = meta->pkt_cache.range_start; 1856 s16 range_end = meta->pkt_cache.range_end; 1857 swreg src_base, off; 1858 u8 xfer_num, len; 1859 bool indir; 1860 1861 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog)); 1862 src_base = reg_a(meta->insn.src_reg * 2); 1863 len = range_end - range_start; 1864 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; 1865 1866 indir = len > 8 * REG_WIDTH; 1867 /* Setup PREV_ALU for indirect mode. */ 1868 if (indir) 1869 wrp_immed(nfp_prog, reg_none(), 1870 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); 1871 1872 /* Cache memory into transfer-in registers. */ 1873 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, 1874 off, xfer_num - 1, CMD_CTX_SWAP, indir); 1875 } 1876 1877 static int 1878 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, 1879 struct nfp_insn_meta *meta, 1880 unsigned int size) 1881 { 1882 s16 range_start = meta->pkt_cache.range_start; 1883 s16 insn_off = meta->insn.off - range_start; 1884 swreg dst_lo, dst_hi, src_lo, src_mid; 1885 u8 dst_gpr = meta->insn.dst_reg * 2; 1886 u8 len_lo = size, len_mid = 0; 1887 u8 idx = insn_off / REG_WIDTH; 1888 u8 off = insn_off % REG_WIDTH; 1889 1890 dst_hi = reg_both(dst_gpr + 1); 1891 dst_lo = reg_both(dst_gpr); 1892 src_lo = reg_xfer(idx); 1893 1894 /* The read length could involve as many as three registers. */ 1895 if (size > REG_WIDTH - off) { 1896 /* Calculate the part in the second register. */ 1897 len_lo = REG_WIDTH - off; 1898 len_mid = size - len_lo; 1899 1900 /* Calculate the part in the third register. */ 1901 if (size > 2 * REG_WIDTH - off) 1902 len_mid = REG_WIDTH; 1903 } 1904 1905 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); 1906 1907 if (!len_mid) { 1908 wrp_immed(nfp_prog, dst_hi, 0); 1909 return 0; 1910 } 1911 1912 src_mid = reg_xfer(idx + 1); 1913 1914 if (size <= REG_WIDTH) { 1915 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); 1916 wrp_immed(nfp_prog, dst_hi, 0); 1917 } else { 1918 swreg src_hi = reg_xfer(idx + 2); 1919 1920 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, 1921 REG_WIDTH - len_lo, len_lo); 1922 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo, 1923 REG_WIDTH - len_lo); 1924 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo, 1925 len_lo); 1926 } 1927 1928 return 0; 1929 } 1930 1931 static int 1932 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, 1933 struct nfp_insn_meta *meta, 1934 unsigned int size) 1935 { 1936 swreg dst_lo, dst_hi, src_lo; 1937 u8 dst_gpr, idx; 1938 1939 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; 1940 dst_gpr = meta->insn.dst_reg * 2; 1941 dst_hi = reg_both(dst_gpr + 1); 1942 dst_lo = reg_both(dst_gpr); 1943 src_lo = reg_xfer(idx); 1944 1945 if (size < REG_WIDTH) { 1946 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); 1947 wrp_immed(nfp_prog, dst_hi, 0); 1948 } else if (size == REG_WIDTH) { 1949 wrp_mov(nfp_prog, dst_lo, src_lo); 1950 wrp_immed(nfp_prog, dst_hi, 0); 1951 } else { 1952 swreg src_hi = reg_xfer(idx + 1); 1953 1954 wrp_mov(nfp_prog, dst_lo, src_lo); 1955 wrp_mov(nfp_prog, dst_hi, src_hi); 1956 } 1957 1958 return 0; 1959 } 1960 1961 static int 1962 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, 1963 struct nfp_insn_meta *meta, unsigned int size) 1964 { 1965 u8 off = meta->insn.off - meta->pkt_cache.range_start; 1966 1967 if (IS_ALIGNED(off, REG_WIDTH)) 1968 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); 1969 1970 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); 1971 } 1972 1973 static int 1974 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 1975 unsigned int size) 1976 { 1977 if (meta->ldst_gather_len) 1978 return nfp_cpp_memcpy(nfp_prog, meta); 1979 1980 if (meta->ptr.type == PTR_TO_CTX) { 1981 if (nfp_prog->type == BPF_PROG_TYPE_XDP) 1982 return mem_ldx_xdp(nfp_prog, meta, size); 1983 else 1984 return mem_ldx_skb(nfp_prog, meta, size); 1985 } 1986 1987 if (meta->ptr.type == PTR_TO_PACKET) { 1988 if (meta->pkt_cache.range_end) { 1989 if (meta->pkt_cache.do_init) 1990 mem_ldx_data_init_pktcache(nfp_prog, meta); 1991 1992 return mem_ldx_data_from_pktcache(nfp_prog, meta, size); 1993 } else { 1994 return mem_ldx_data(nfp_prog, meta, size); 1995 } 1996 } 1997 1998 if (meta->ptr.type == PTR_TO_STACK) 1999 return mem_ldx_stack(nfp_prog, meta, size, 2000 meta->ptr.off + meta->ptr.var_off.value); 2001 2002 if (meta->ptr.type == PTR_TO_MAP_VALUE) 2003 return mem_ldx_emem(nfp_prog, meta, size); 2004 2005 return -EOPNOTSUPP; 2006 } 2007 2008 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2009 { 2010 return mem_ldx(nfp_prog, meta, 1); 2011 } 2012 2013 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2014 { 2015 return mem_ldx(nfp_prog, meta, 2); 2016 } 2017 2018 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2019 { 2020 return mem_ldx(nfp_prog, meta, 4); 2021 } 2022 2023 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2024 { 2025 return mem_ldx(nfp_prog, meta, 8); 2026 } 2027 2028 static int 2029 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2030 unsigned int size) 2031 { 2032 u64 imm = meta->insn.imm; /* sign extend */ 2033 swreg off_reg; 2034 2035 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2036 2037 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 2038 imm, size); 2039 } 2040 2041 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2042 unsigned int size) 2043 { 2044 if (meta->ptr.type == PTR_TO_PACKET) 2045 return mem_st_data(nfp_prog, meta, size); 2046 2047 return -EOPNOTSUPP; 2048 } 2049 2050 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2051 { 2052 return mem_st(nfp_prog, meta, 1); 2053 } 2054 2055 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2056 { 2057 return mem_st(nfp_prog, meta, 2); 2058 } 2059 2060 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2061 { 2062 return mem_st(nfp_prog, meta, 4); 2063 } 2064 2065 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2066 { 2067 return mem_st(nfp_prog, meta, 8); 2068 } 2069 2070 static int 2071 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2072 unsigned int size) 2073 { 2074 swreg off_reg; 2075 2076 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2077 2078 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, 2079 meta->insn.src_reg * 2, size); 2080 } 2081 2082 static int 2083 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2084 unsigned int size, unsigned int ptr_off) 2085 { 2086 return mem_op_stack(nfp_prog, meta, size, ptr_off, 2087 meta->insn.src_reg * 2, meta->insn.dst_reg * 2, 2088 false, wrp_lmem_store); 2089 } 2090 2091 static int 2092 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, 2093 unsigned int size) 2094 { 2095 if (meta->ptr.type == PTR_TO_PACKET) 2096 return mem_stx_data(nfp_prog, meta, size); 2097 2098 if (meta->ptr.type == PTR_TO_STACK) 2099 return mem_stx_stack(nfp_prog, meta, size, 2100 meta->ptr.off + meta->ptr.var_off.value); 2101 2102 return -EOPNOTSUPP; 2103 } 2104 2105 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2106 { 2107 return mem_stx(nfp_prog, meta, 1); 2108 } 2109 2110 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2111 { 2112 return mem_stx(nfp_prog, meta, 2); 2113 } 2114 2115 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2116 { 2117 return mem_stx(nfp_prog, meta, 4); 2118 } 2119 2120 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2121 { 2122 return mem_stx(nfp_prog, meta, 8); 2123 } 2124 2125 static int 2126 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) 2127 { 2128 swreg addra, addrb, off, prev_alu = imm_a(nfp_prog); 2129 u8 dst_gpr = meta->insn.dst_reg * 2; 2130 u8 src_gpr = meta->insn.src_reg * 2; 2131 2132 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); 2133 2134 /* If insn has an offset add to the address */ 2135 if (!meta->insn.off) { 2136 addra = reg_a(dst_gpr); 2137 addrb = reg_b(dst_gpr + 1); 2138 } else { 2139 emit_alu(nfp_prog, imma_a(nfp_prog), 2140 reg_a(dst_gpr), ALU_OP_ADD, off); 2141 emit_alu(nfp_prog, imma_b(nfp_prog), 2142 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0)); 2143 addra = imma_a(nfp_prog); 2144 addrb = imma_b(nfp_prog); 2145 } 2146 2147 wrp_immed(nfp_prog, prev_alu, 2148 FIELD_PREP(CMD_OVE_DATA, 2) | 2149 CMD_OVE_LEN | 2150 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); 2151 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); 2152 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, 2153 addra, addrb, 0, CMD_CTX_NO_SWAP); 2154 2155 return 0; 2156 } 2157 2158 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2159 { 2160 return mem_xadd(nfp_prog, meta, false); 2161 } 2162 2163 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2164 { 2165 return mem_xadd(nfp_prog, meta, true); 2166 } 2167 2168 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2169 { 2170 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); 2171 2172 return 0; 2173 } 2174 2175 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2176 { 2177 const struct bpf_insn *insn = &meta->insn; 2178 u64 imm = insn->imm; /* sign extend */ 2179 swreg or1, or2, tmp_reg; 2180 2181 or1 = reg_a(insn->dst_reg * 2); 2182 or2 = reg_b(insn->dst_reg * 2 + 1); 2183 2184 if (imm & ~0U) { 2185 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 2186 emit_alu(nfp_prog, imm_a(nfp_prog), 2187 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 2188 or1 = imm_a(nfp_prog); 2189 } 2190 2191 if (imm >> 32) { 2192 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 2193 emit_alu(nfp_prog, imm_b(nfp_prog), 2194 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 2195 or2 = imm_b(nfp_prog); 2196 } 2197 2198 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); 2199 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 2200 2201 return 0; 2202 } 2203 2204 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2205 { 2206 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); 2207 } 2208 2209 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2210 { 2211 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); 2212 } 2213 2214 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2215 { 2216 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); 2217 } 2218 2219 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2220 { 2221 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); 2222 } 2223 2224 static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2225 { 2226 return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true); 2227 } 2228 2229 static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2230 { 2231 return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false); 2232 } 2233 2234 static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2235 { 2236 return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false); 2237 } 2238 2239 static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2240 { 2241 return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true); 2242 } 2243 2244 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2245 { 2246 const struct bpf_insn *insn = &meta->insn; 2247 u64 imm = insn->imm; /* sign extend */ 2248 swreg tmp_reg; 2249 2250 if (!imm) { 2251 meta->skip = true; 2252 return 0; 2253 } 2254 2255 if (imm & ~0U) { 2256 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 2257 emit_alu(nfp_prog, reg_none(), 2258 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); 2259 emit_br(nfp_prog, BR_BNE, insn->off, 0); 2260 } 2261 2262 if (imm >> 32) { 2263 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 2264 emit_alu(nfp_prog, reg_none(), 2265 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); 2266 emit_br(nfp_prog, BR_BNE, insn->off, 0); 2267 } 2268 2269 return 0; 2270 } 2271 2272 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2273 { 2274 const struct bpf_insn *insn = &meta->insn; 2275 u64 imm = insn->imm; /* sign extend */ 2276 swreg tmp_reg; 2277 2278 if (!imm) { 2279 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), 2280 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); 2281 emit_br(nfp_prog, BR_BNE, insn->off, 0); 2282 return 0; 2283 } 2284 2285 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); 2286 emit_alu(nfp_prog, reg_none(), 2287 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); 2288 emit_br(nfp_prog, BR_BNE, insn->off, 0); 2289 2290 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); 2291 emit_alu(nfp_prog, reg_none(), 2292 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); 2293 emit_br(nfp_prog, BR_BNE, insn->off, 0); 2294 2295 return 0; 2296 } 2297 2298 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2299 { 2300 const struct bpf_insn *insn = &meta->insn; 2301 2302 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), 2303 ALU_OP_XOR, reg_b(insn->src_reg * 2)); 2304 emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), 2305 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); 2306 emit_alu(nfp_prog, reg_none(), 2307 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); 2308 emit_br(nfp_prog, BR_BEQ, insn->off, 0); 2309 2310 return 0; 2311 } 2312 2313 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2314 { 2315 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); 2316 } 2317 2318 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2319 { 2320 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); 2321 } 2322 2323 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2324 { 2325 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); 2326 } 2327 2328 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2329 { 2330 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); 2331 } 2332 2333 static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2334 { 2335 return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true); 2336 } 2337 2338 static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2339 { 2340 return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false); 2341 } 2342 2343 static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2344 { 2345 return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false); 2346 } 2347 2348 static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2349 { 2350 return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true); 2351 } 2352 2353 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2354 { 2355 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); 2356 } 2357 2358 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2359 { 2360 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); 2361 } 2362 2363 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2364 { 2365 switch (meta->insn.imm) { 2366 case BPF_FUNC_xdp_adjust_head: 2367 return adjust_head(nfp_prog, meta); 2368 case BPF_FUNC_map_lookup_elem: 2369 case BPF_FUNC_map_update_elem: 2370 case BPF_FUNC_map_delete_elem: 2371 return map_call_stack_common(nfp_prog, meta); 2372 default: 2373 WARN_ONCE(1, "verifier allowed unsupported function\n"); 2374 return -EOPNOTSUPP; 2375 } 2376 } 2377 2378 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 2379 { 2380 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); 2381 2382 return 0; 2383 } 2384 2385 static const instr_cb_t instr_cb[256] = { 2386 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, 2387 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, 2388 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, 2389 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, 2390 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, 2391 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, 2392 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, 2393 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, 2394 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, 2395 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, 2396 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, 2397 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, 2398 [BPF_ALU64 | BPF_NEG] = neg_reg64, 2399 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, 2400 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, 2401 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, 2402 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, 2403 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, 2404 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, 2405 [BPF_ALU | BPF_AND | BPF_X] = and_reg, 2406 [BPF_ALU | BPF_AND | BPF_K] = and_imm, 2407 [BPF_ALU | BPF_OR | BPF_X] = or_reg, 2408 [BPF_ALU | BPF_OR | BPF_K] = or_imm, 2409 [BPF_ALU | BPF_ADD | BPF_X] = add_reg, 2410 [BPF_ALU | BPF_ADD | BPF_K] = add_imm, 2411 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, 2412 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, 2413 [BPF_ALU | BPF_NEG] = neg_reg, 2414 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, 2415 [BPF_ALU | BPF_END | BPF_X] = end_reg32, 2416 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, 2417 [BPF_LD | BPF_ABS | BPF_B] = data_ld1, 2418 [BPF_LD | BPF_ABS | BPF_H] = data_ld2, 2419 [BPF_LD | BPF_ABS | BPF_W] = data_ld4, 2420 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, 2421 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, 2422 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, 2423 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, 2424 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, 2425 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, 2426 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, 2427 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, 2428 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, 2429 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, 2430 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, 2431 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4, 2432 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8, 2433 [BPF_ST | BPF_MEM | BPF_B] = mem_st1, 2434 [BPF_ST | BPF_MEM | BPF_H] = mem_st2, 2435 [BPF_ST | BPF_MEM | BPF_W] = mem_st4, 2436 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, 2437 [BPF_JMP | BPF_JA | BPF_K] = jump, 2438 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, 2439 [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, 2440 [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, 2441 [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, 2442 [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, 2443 [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm, 2444 [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm, 2445 [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm, 2446 [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm, 2447 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, 2448 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, 2449 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, 2450 [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, 2451 [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, 2452 [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, 2453 [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, 2454 [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg, 2455 [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg, 2456 [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg, 2457 [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg, 2458 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, 2459 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, 2460 [BPF_JMP | BPF_CALL] = call, 2461 [BPF_JMP | BPF_EXIT] = goto_out, 2462 }; 2463 2464 /* --- Assembler logic --- */ 2465 static int nfp_fixup_branches(struct nfp_prog *nfp_prog) 2466 { 2467 struct nfp_insn_meta *meta, *jmp_dst; 2468 u32 idx, br_idx; 2469 2470 list_for_each_entry(meta, &nfp_prog->insns, l) { 2471 if (meta->skip) 2472 continue; 2473 if (meta->insn.code == (BPF_JMP | BPF_CALL)) 2474 continue; 2475 if (BPF_CLASS(meta->insn.code) != BPF_JMP) 2476 continue; 2477 2478 if (list_is_last(&meta->l, &nfp_prog->insns)) 2479 br_idx = nfp_prog->last_bpf_off; 2480 else 2481 br_idx = list_next_entry(meta, l)->off - 1; 2482 2483 if (!nfp_is_br(nfp_prog->prog[br_idx])) { 2484 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", 2485 br_idx, meta->insn.code, nfp_prog->prog[br_idx]); 2486 return -ELOOP; 2487 } 2488 /* Leave special branches for later */ 2489 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != 2490 RELO_BR_REL) 2491 continue; 2492 2493 if (!meta->jmp_dst) { 2494 pr_err("Non-exit jump doesn't have destination info recorded!!\n"); 2495 return -ELOOP; 2496 } 2497 2498 jmp_dst = meta->jmp_dst; 2499 2500 if (jmp_dst->skip) { 2501 pr_err("Branch landing on removed instruction!!\n"); 2502 return -ELOOP; 2503 } 2504 2505 for (idx = meta->off; idx <= br_idx; idx++) { 2506 if (!nfp_is_br(nfp_prog->prog[idx])) 2507 continue; 2508 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); 2509 } 2510 } 2511 2512 return 0; 2513 } 2514 2515 static void nfp_intro(struct nfp_prog *nfp_prog) 2516 { 2517 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); 2518 emit_alu(nfp_prog, plen_reg(nfp_prog), 2519 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); 2520 } 2521 2522 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) 2523 { 2524 /* TC direct-action mode: 2525 * 0,1 ok NOT SUPPORTED[1] 2526 * 2 drop 0x22 -> drop, count as stat1 2527 * 4,5 nuke 0x02 -> drop 2528 * 7 redir 0x44 -> redir, count as stat2 2529 * * unspec 0x11 -> pass, count as stat0 2530 * 2531 * [1] We can't support OK and RECLASSIFY because we can't tell TC 2532 * the exact decision made. We are forced to support UNSPEC 2533 * to handle aborts so that's the only one we handle for passing 2534 * packets up the stack. 2535 */ 2536 /* Target for aborts */ 2537 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 2538 2539 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 2540 2541 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2542 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); 2543 2544 /* Target for normal exits */ 2545 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 2546 2547 /* if R0 > 7 jump to abort */ 2548 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); 2549 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 2550 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2551 2552 wrp_immed(nfp_prog, reg_b(2), 0x41221211); 2553 wrp_immed(nfp_prog, reg_b(3), 0x41001211); 2554 2555 emit_shf(nfp_prog, reg_a(1), 2556 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); 2557 2558 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2559 emit_shf(nfp_prog, reg_a(2), 2560 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 2561 2562 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2563 emit_shf(nfp_prog, reg_b(2), 2564 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); 2565 2566 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 2567 2568 emit_shf(nfp_prog, reg_b(2), 2569 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); 2570 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 2571 } 2572 2573 static void nfp_outro_xdp(struct nfp_prog *nfp_prog) 2574 { 2575 /* XDP return codes: 2576 * 0 aborted 0x82 -> drop, count as stat3 2577 * 1 drop 0x22 -> drop, count as stat1 2578 * 2 pass 0x11 -> pass, count as stat0 2579 * 3 tx 0x44 -> redir, count as stat2 2580 * * unknown 0x82 -> drop, count as stat3 2581 */ 2582 /* Target for aborts */ 2583 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); 2584 2585 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 2586 2587 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2588 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); 2589 2590 /* Target for normal exits */ 2591 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); 2592 2593 /* if R0 > 3 jump to abort */ 2594 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); 2595 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); 2596 2597 wrp_immed(nfp_prog, reg_b(2), 0x44112282); 2598 2599 emit_shf(nfp_prog, reg_a(1), 2600 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); 2601 2602 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); 2603 emit_shf(nfp_prog, reg_b(2), 2604 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); 2605 2606 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); 2607 2608 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); 2609 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); 2610 } 2611 2612 static void nfp_outro(struct nfp_prog *nfp_prog) 2613 { 2614 switch (nfp_prog->type) { 2615 case BPF_PROG_TYPE_SCHED_CLS: 2616 nfp_outro_tc_da(nfp_prog); 2617 break; 2618 case BPF_PROG_TYPE_XDP: 2619 nfp_outro_xdp(nfp_prog); 2620 break; 2621 default: 2622 WARN_ON(1); 2623 } 2624 } 2625 2626 static int nfp_translate(struct nfp_prog *nfp_prog) 2627 { 2628 struct nfp_insn_meta *meta; 2629 int err; 2630 2631 nfp_intro(nfp_prog); 2632 if (nfp_prog->error) 2633 return nfp_prog->error; 2634 2635 list_for_each_entry(meta, &nfp_prog->insns, l) { 2636 instr_cb_t cb = instr_cb[meta->insn.code]; 2637 2638 meta->off = nfp_prog_current_offset(nfp_prog); 2639 2640 if (meta->skip) { 2641 nfp_prog->n_translated++; 2642 continue; 2643 } 2644 2645 if (nfp_meta_has_prev(nfp_prog, meta) && 2646 nfp_meta_prev(meta)->double_cb) 2647 cb = nfp_meta_prev(meta)->double_cb; 2648 if (!cb) 2649 return -ENOENT; 2650 err = cb(nfp_prog, meta); 2651 if (err) 2652 return err; 2653 2654 nfp_prog->n_translated++; 2655 } 2656 2657 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; 2658 2659 nfp_outro(nfp_prog); 2660 if (nfp_prog->error) 2661 return nfp_prog->error; 2662 2663 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); 2664 if (nfp_prog->error) 2665 return nfp_prog->error; 2666 2667 return nfp_fixup_branches(nfp_prog); 2668 } 2669 2670 /* --- Optimizations --- */ 2671 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) 2672 { 2673 struct nfp_insn_meta *meta; 2674 2675 list_for_each_entry(meta, &nfp_prog->insns, l) { 2676 struct bpf_insn insn = meta->insn; 2677 2678 /* Programs converted from cBPF start with register xoring */ 2679 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && 2680 insn.src_reg == insn.dst_reg) 2681 continue; 2682 2683 /* Programs start with R6 = R1 but we ignore the skb pointer */ 2684 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && 2685 insn.src_reg == 1 && insn.dst_reg == 6) 2686 meta->skip = true; 2687 2688 /* Return as soon as something doesn't match */ 2689 if (!meta->skip) 2690 return; 2691 } 2692 } 2693 2694 /* Remove masking after load since our load guarantees this is not needed */ 2695 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) 2696 { 2697 struct nfp_insn_meta *meta1, *meta2; 2698 const s32 exp_mask[] = { 2699 [BPF_B] = 0x000000ffU, 2700 [BPF_H] = 0x0000ffffU, 2701 [BPF_W] = 0xffffffffU, 2702 }; 2703 2704 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 2705 struct bpf_insn insn, next; 2706 2707 insn = meta1->insn; 2708 next = meta2->insn; 2709 2710 if (BPF_CLASS(insn.code) != BPF_LD) 2711 continue; 2712 if (BPF_MODE(insn.code) != BPF_ABS && 2713 BPF_MODE(insn.code) != BPF_IND) 2714 continue; 2715 2716 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) 2717 continue; 2718 2719 if (!exp_mask[BPF_SIZE(insn.code)]) 2720 continue; 2721 if (exp_mask[BPF_SIZE(insn.code)] != next.imm) 2722 continue; 2723 2724 if (next.src_reg || next.dst_reg) 2725 continue; 2726 2727 if (meta2->flags & FLAG_INSN_IS_JUMP_DST) 2728 continue; 2729 2730 meta2->skip = true; 2731 } 2732 } 2733 2734 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) 2735 { 2736 struct nfp_insn_meta *meta1, *meta2, *meta3; 2737 2738 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { 2739 struct bpf_insn insn, next1, next2; 2740 2741 insn = meta1->insn; 2742 next1 = meta2->insn; 2743 next2 = meta3->insn; 2744 2745 if (BPF_CLASS(insn.code) != BPF_LD) 2746 continue; 2747 if (BPF_MODE(insn.code) != BPF_ABS && 2748 BPF_MODE(insn.code) != BPF_IND) 2749 continue; 2750 if (BPF_SIZE(insn.code) != BPF_W) 2751 continue; 2752 2753 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && 2754 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && 2755 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && 2756 next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) 2757 continue; 2758 2759 if (next1.src_reg || next1.dst_reg || 2760 next2.src_reg || next2.dst_reg) 2761 continue; 2762 2763 if (next1.imm != 0x20 || next2.imm != 0x20) 2764 continue; 2765 2766 if (meta2->flags & FLAG_INSN_IS_JUMP_DST || 2767 meta3->flags & FLAG_INSN_IS_JUMP_DST) 2768 continue; 2769 2770 meta2->skip = true; 2771 meta3->skip = true; 2772 } 2773 } 2774 2775 /* load/store pair that forms memory copy sould look like the following: 2776 * 2777 * ld_width R, [addr_src + offset_src] 2778 * st_width [addr_dest + offset_dest], R 2779 * 2780 * The destination register of load and source register of store should 2781 * be the same, load and store should also perform at the same width. 2782 * If either of addr_src or addr_dest is stack pointer, we don't do the 2783 * CPP optimization as stack is modelled by registers on NFP. 2784 */ 2785 static bool 2786 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, 2787 struct nfp_insn_meta *st_meta) 2788 { 2789 struct bpf_insn *ld = &ld_meta->insn; 2790 struct bpf_insn *st = &st_meta->insn; 2791 2792 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) 2793 return false; 2794 2795 if (ld_meta->ptr.type != PTR_TO_PACKET) 2796 return false; 2797 2798 if (st_meta->ptr.type != PTR_TO_PACKET) 2799 return false; 2800 2801 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) 2802 return false; 2803 2804 if (ld->dst_reg != st->src_reg) 2805 return false; 2806 2807 /* There is jump to the store insn in this pair. */ 2808 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) 2809 return false; 2810 2811 return true; 2812 } 2813 2814 /* Currently, we only support chaining load/store pairs if: 2815 * 2816 * - Their address base registers are the same. 2817 * - Their address offsets are in the same order. 2818 * - They operate at the same memory width. 2819 * - There is no jump into the middle of them. 2820 */ 2821 static bool 2822 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, 2823 struct nfp_insn_meta *st_meta, 2824 struct bpf_insn *prev_ld, 2825 struct bpf_insn *prev_st) 2826 { 2827 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; 2828 struct bpf_insn *ld = &ld_meta->insn; 2829 struct bpf_insn *st = &st_meta->insn; 2830 s16 prev_ld_off, prev_st_off; 2831 2832 /* This pair is the start pair. */ 2833 if (!prev_ld) 2834 return true; 2835 2836 prev_size = BPF_LDST_BYTES(prev_ld); 2837 curr_size = BPF_LDST_BYTES(ld); 2838 prev_ld_base = prev_ld->src_reg; 2839 prev_st_base = prev_st->dst_reg; 2840 prev_ld_dst = prev_ld->dst_reg; 2841 prev_ld_off = prev_ld->off; 2842 prev_st_off = prev_st->off; 2843 2844 if (ld->dst_reg != prev_ld_dst) 2845 return false; 2846 2847 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) 2848 return false; 2849 2850 if (curr_size != prev_size) 2851 return false; 2852 2853 /* There is jump to the head of this pair. */ 2854 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) 2855 return false; 2856 2857 /* Both in ascending order. */ 2858 if (prev_ld_off + prev_size == ld->off && 2859 prev_st_off + prev_size == st->off) 2860 return true; 2861 2862 /* Both in descending order. */ 2863 if (ld->off + curr_size == prev_ld_off && 2864 st->off + curr_size == prev_st_off) 2865 return true; 2866 2867 return false; 2868 } 2869 2870 /* Return TRUE if cross memory access happens. Cross memory access means 2871 * store area is overlapping with load area that a later load might load 2872 * the value from previous store, for this case we can't treat the sequence 2873 * as an memory copy. 2874 */ 2875 static bool 2876 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, 2877 struct nfp_insn_meta *head_st_meta) 2878 { 2879 s16 head_ld_off, head_st_off, ld_off; 2880 2881 /* Different pointer types does not overlap. */ 2882 if (head_ld_meta->ptr.type != head_st_meta->ptr.type) 2883 return false; 2884 2885 /* load and store are both PTR_TO_PACKET, check ID info. */ 2886 if (head_ld_meta->ptr.id != head_st_meta->ptr.id) 2887 return true; 2888 2889 /* Canonicalize the offsets. Turn all of them against the original 2890 * base register. 2891 */ 2892 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; 2893 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; 2894 ld_off = ld->off + head_ld_meta->ptr.off; 2895 2896 /* Ascending order cross. */ 2897 if (ld_off > head_ld_off && 2898 head_ld_off < head_st_off && ld_off >= head_st_off) 2899 return true; 2900 2901 /* Descending order cross. */ 2902 if (ld_off < head_ld_off && 2903 head_ld_off > head_st_off && ld_off <= head_st_off) 2904 return true; 2905 2906 return false; 2907 } 2908 2909 /* This pass try to identify the following instructoin sequences. 2910 * 2911 * load R, [regA + offA] 2912 * store [regB + offB], R 2913 * load R, [regA + offA + const_imm_A] 2914 * store [regB + offB + const_imm_A], R 2915 * load R, [regA + offA + 2 * const_imm_A] 2916 * store [regB + offB + 2 * const_imm_A], R 2917 * ... 2918 * 2919 * Above sequence is typically generated by compiler when lowering 2920 * memcpy. NFP prefer using CPP instructions to accelerate it. 2921 */ 2922 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) 2923 { 2924 struct nfp_insn_meta *head_ld_meta = NULL; 2925 struct nfp_insn_meta *head_st_meta = NULL; 2926 struct nfp_insn_meta *meta1, *meta2; 2927 struct bpf_insn *prev_ld = NULL; 2928 struct bpf_insn *prev_st = NULL; 2929 u8 count = 0; 2930 2931 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { 2932 struct bpf_insn *ld = &meta1->insn; 2933 struct bpf_insn *st = &meta2->insn; 2934 2935 /* Reset record status if any of the following if true: 2936 * - The current insn pair is not load/store. 2937 * - The load/store pair doesn't chain with previous one. 2938 * - The chained load/store pair crossed with previous pair. 2939 * - The chained load/store pair has a total size of memory 2940 * copy beyond 128 bytes which is the maximum length a 2941 * single NFP CPP command can transfer. 2942 */ 2943 if (!curr_pair_is_memcpy(meta1, meta2) || 2944 !curr_pair_chain_with_previous(meta1, meta2, prev_ld, 2945 prev_st) || 2946 (head_ld_meta && (cross_mem_access(ld, head_ld_meta, 2947 head_st_meta) || 2948 head_ld_meta->ldst_gather_len >= 128))) { 2949 if (!count) 2950 continue; 2951 2952 if (count > 1) { 2953 s16 prev_ld_off = prev_ld->off; 2954 s16 prev_st_off = prev_st->off; 2955 s16 head_ld_off = head_ld_meta->insn.off; 2956 2957 if (prev_ld_off < head_ld_off) { 2958 head_ld_meta->insn.off = prev_ld_off; 2959 head_st_meta->insn.off = prev_st_off; 2960 head_ld_meta->ldst_gather_len = 2961 -head_ld_meta->ldst_gather_len; 2962 } 2963 2964 head_ld_meta->paired_st = &head_st_meta->insn; 2965 head_st_meta->skip = true; 2966 } else { 2967 head_ld_meta->ldst_gather_len = 0; 2968 } 2969 2970 /* If the chain is ended by an load/store pair then this 2971 * could serve as the new head of the the next chain. 2972 */ 2973 if (curr_pair_is_memcpy(meta1, meta2)) { 2974 head_ld_meta = meta1; 2975 head_st_meta = meta2; 2976 head_ld_meta->ldst_gather_len = 2977 BPF_LDST_BYTES(ld); 2978 meta1 = nfp_meta_next(meta1); 2979 meta2 = nfp_meta_next(meta2); 2980 prev_ld = ld; 2981 prev_st = st; 2982 count = 1; 2983 } else { 2984 head_ld_meta = NULL; 2985 head_st_meta = NULL; 2986 prev_ld = NULL; 2987 prev_st = NULL; 2988 count = 0; 2989 } 2990 2991 continue; 2992 } 2993 2994 if (!head_ld_meta) { 2995 head_ld_meta = meta1; 2996 head_st_meta = meta2; 2997 } else { 2998 meta1->skip = true; 2999 meta2->skip = true; 3000 } 3001 3002 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); 3003 meta1 = nfp_meta_next(meta1); 3004 meta2 = nfp_meta_next(meta2); 3005 prev_ld = ld; 3006 prev_st = st; 3007 count++; 3008 } 3009 } 3010 3011 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) 3012 { 3013 struct nfp_insn_meta *meta, *range_node = NULL; 3014 s16 range_start = 0, range_end = 0; 3015 bool cache_avail = false; 3016 struct bpf_insn *insn; 3017 s32 range_ptr_off = 0; 3018 u32 range_ptr_id = 0; 3019 3020 list_for_each_entry(meta, &nfp_prog->insns, l) { 3021 if (meta->flags & FLAG_INSN_IS_JUMP_DST) 3022 cache_avail = false; 3023 3024 if (meta->skip) 3025 continue; 3026 3027 insn = &meta->insn; 3028 3029 if (is_mbpf_store_pkt(meta) || 3030 insn->code == (BPF_JMP | BPF_CALL) || 3031 is_mbpf_classic_store_pkt(meta) || 3032 is_mbpf_classic_load(meta)) { 3033 cache_avail = false; 3034 continue; 3035 } 3036 3037 if (!is_mbpf_load(meta)) 3038 continue; 3039 3040 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { 3041 cache_avail = false; 3042 continue; 3043 } 3044 3045 if (!cache_avail) { 3046 cache_avail = true; 3047 if (range_node) 3048 goto end_current_then_start_new; 3049 goto start_new; 3050 } 3051 3052 /* Check ID to make sure two reads share the same 3053 * variable offset against PTR_TO_PACKET, and check OFF 3054 * to make sure they also share the same constant 3055 * offset. 3056 * 3057 * OFFs don't really need to be the same, because they 3058 * are the constant offsets against PTR_TO_PACKET, so 3059 * for different OFFs, we could canonicalize them to 3060 * offsets against original packet pointer. We don't 3061 * support this. 3062 */ 3063 if (meta->ptr.id == range_ptr_id && 3064 meta->ptr.off == range_ptr_off) { 3065 s16 new_start = range_start; 3066 s16 end, off = insn->off; 3067 s16 new_end = range_end; 3068 bool changed = false; 3069 3070 if (off < range_start) { 3071 new_start = off; 3072 changed = true; 3073 } 3074 3075 end = off + BPF_LDST_BYTES(insn); 3076 if (end > range_end) { 3077 new_end = end; 3078 changed = true; 3079 } 3080 3081 if (!changed) 3082 continue; 3083 3084 if (new_end - new_start <= 64) { 3085 /* Install new range. */ 3086 range_start = new_start; 3087 range_end = new_end; 3088 continue; 3089 } 3090 } 3091 3092 end_current_then_start_new: 3093 range_node->pkt_cache.range_start = range_start; 3094 range_node->pkt_cache.range_end = range_end; 3095 start_new: 3096 range_node = meta; 3097 range_node->pkt_cache.do_init = true; 3098 range_ptr_id = range_node->ptr.id; 3099 range_ptr_off = range_node->ptr.off; 3100 range_start = insn->off; 3101 range_end = insn->off + BPF_LDST_BYTES(insn); 3102 } 3103 3104 if (range_node) { 3105 range_node->pkt_cache.range_start = range_start; 3106 range_node->pkt_cache.range_end = range_end; 3107 } 3108 3109 list_for_each_entry(meta, &nfp_prog->insns, l) { 3110 if (meta->skip) 3111 continue; 3112 3113 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { 3114 if (meta->pkt_cache.do_init) { 3115 range_start = meta->pkt_cache.range_start; 3116 range_end = meta->pkt_cache.range_end; 3117 } else { 3118 meta->pkt_cache.range_start = range_start; 3119 meta->pkt_cache.range_end = range_end; 3120 } 3121 } 3122 } 3123 } 3124 3125 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) 3126 { 3127 nfp_bpf_opt_reg_init(nfp_prog); 3128 3129 nfp_bpf_opt_ld_mask(nfp_prog); 3130 nfp_bpf_opt_ld_shift(nfp_prog); 3131 nfp_bpf_opt_ldst_gather(nfp_prog); 3132 nfp_bpf_opt_pkt_cache(nfp_prog); 3133 3134 return 0; 3135 } 3136 3137 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) 3138 { 3139 __le64 *ustore = (__force __le64 *)prog; 3140 int i; 3141 3142 for (i = 0; i < len; i++) { 3143 int err; 3144 3145 err = nfp_ustore_check_valid_no_ecc(prog[i]); 3146 if (err) 3147 return err; 3148 3149 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); 3150 } 3151 3152 return 0; 3153 } 3154 3155 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) 3156 { 3157 void *prog; 3158 3159 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); 3160 if (!prog) 3161 return; 3162 3163 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); 3164 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); 3165 kvfree(nfp_prog->prog); 3166 nfp_prog->prog = prog; 3167 } 3168 3169 int nfp_bpf_jit(struct nfp_prog *nfp_prog) 3170 { 3171 int ret; 3172 3173 ret = nfp_bpf_optimize(nfp_prog); 3174 if (ret) 3175 return ret; 3176 3177 ret = nfp_translate(nfp_prog); 3178 if (ret) { 3179 pr_err("Translation failed with error %d (translated: %u)\n", 3180 ret, nfp_prog->n_translated); 3181 return -EINVAL; 3182 } 3183 3184 nfp_bpf_prog_trim(nfp_prog); 3185 3186 return ret; 3187 } 3188 3189 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) 3190 { 3191 struct nfp_insn_meta *meta; 3192 3193 /* Another pass to record jump information. */ 3194 list_for_each_entry(meta, &nfp_prog->insns, l) { 3195 u64 code = meta->insn.code; 3196 3197 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && 3198 BPF_OP(code) != BPF_CALL) { 3199 struct nfp_insn_meta *dst_meta; 3200 unsigned short dst_indx; 3201 3202 dst_indx = meta->n + 1 + meta->insn.off; 3203 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, 3204 cnt); 3205 3206 meta->jmp_dst = dst_meta; 3207 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; 3208 } 3209 } 3210 } 3211 3212 bool nfp_bpf_supported_opcode(u8 code) 3213 { 3214 return !!instr_cb[code]; 3215 } 3216 3217 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) 3218 { 3219 unsigned int i; 3220 u64 *prog; 3221 int err; 3222 3223 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), 3224 GFP_KERNEL); 3225 if (!prog) 3226 return ERR_PTR(-ENOMEM); 3227 3228 for (i = 0; i < nfp_prog->prog_len; i++) { 3229 enum nfp_relo_type special; 3230 u32 val; 3231 3232 special = FIELD_GET(OP_RELO_TYPE, prog[i]); 3233 switch (special) { 3234 case RELO_NONE: 3235 continue; 3236 case RELO_BR_REL: 3237 br_add_offset(&prog[i], bv->start_off); 3238 break; 3239 case RELO_BR_GO_OUT: 3240 br_set_offset(&prog[i], 3241 nfp_prog->tgt_out + bv->start_off); 3242 break; 3243 case RELO_BR_GO_ABORT: 3244 br_set_offset(&prog[i], 3245 nfp_prog->tgt_abort + bv->start_off); 3246 break; 3247 case RELO_BR_NEXT_PKT: 3248 br_set_offset(&prog[i], bv->tgt_done); 3249 break; 3250 case RELO_BR_HELPER: 3251 val = br_get_offset(prog[i]); 3252 val -= BR_OFF_RELO; 3253 switch (val) { 3254 case BPF_FUNC_map_lookup_elem: 3255 val = nfp_prog->bpf->helpers.map_lookup; 3256 break; 3257 case BPF_FUNC_map_update_elem: 3258 val = nfp_prog->bpf->helpers.map_update; 3259 break; 3260 case BPF_FUNC_map_delete_elem: 3261 val = nfp_prog->bpf->helpers.map_delete; 3262 break; 3263 default: 3264 pr_err("relocation of unknown helper %d\n", 3265 val); 3266 err = -EINVAL; 3267 goto err_free_prog; 3268 } 3269 br_set_offset(&prog[i], val); 3270 break; 3271 case RELO_IMMED_REL: 3272 immed_add_value(&prog[i], bv->start_off); 3273 break; 3274 } 3275 3276 prog[i] &= ~OP_RELO_TYPE; 3277 } 3278 3279 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); 3280 if (err) 3281 goto err_free_prog; 3282 3283 return prog; 3284 3285 err_free_prog: 3286 kfree(prog); 3287 return ERR_PTR(err); 3288 } 3289