1 /*
2  * Copyright (C) 2016-2018 Netronome Systems, Inc.
3  *
4  * This software is dual licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree or the BSD 2-Clause License provided below.  You have the
7  * option to license this software under the complete terms of either license.
8  *
9  * The BSD 2-Clause License:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      1. Redistributions of source code must retain the above
16  *         copyright notice, this list of conditions and the following
17  *         disclaimer.
18  *
19  *      2. Redistributions in binary form must reproduce the above
20  *         copyright notice, this list of conditions and the following
21  *         disclaimer in the documentation and/or other materials
22  *         provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt)	"NFP net bpf: " fmt
35 
36 #include <linux/bug.h>
37 #include <linux/kernel.h>
38 #include <linux/bpf.h>
39 #include <linux/filter.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/unistd.h>
42 
43 #include "main.h"
44 #include "../nfp_asm.h"
45 
46 /* --- NFP prog --- */
47 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
48  * It's safe to modify the next pointers (but not pos).
49  */
50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
51 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
52 	     next = list_next_entry(pos, l);			\
53 	     &(nfp_prog)->insns != &pos->l &&			\
54 	     &(nfp_prog)->insns != &next->l;			\
55 	     pos = nfp_meta_next(pos),				\
56 	     next = nfp_meta_next(pos))
57 
58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
59 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
60 	     next = list_next_entry(pos, l),			\
61 	     next2 = list_next_entry(next, l);			\
62 	     &(nfp_prog)->insns != &pos->l &&			\
63 	     &(nfp_prog)->insns != &next->l &&			\
64 	     &(nfp_prog)->insns != &next2->l;			\
65 	     pos = nfp_meta_next(pos),				\
66 	     next = nfp_meta_next(pos),				\
67 	     next2 = nfp_meta_next(next))
68 
69 static bool
70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
71 {
72 	return meta->l.prev != &nfp_prog->insns;
73 }
74 
75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
76 {
77 	if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
78 		pr_warn("instruction limit reached (%u NFP instructions)\n",
79 			nfp_prog->prog_len);
80 		nfp_prog->error = -ENOSPC;
81 		return;
82 	}
83 
84 	nfp_prog->prog[nfp_prog->prog_len] = insn;
85 	nfp_prog->prog_len++;
86 }
87 
88 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
89 {
90 	return nfp_prog->prog_len;
91 }
92 
93 static bool
94 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
95 {
96 	/* If there is a recorded error we may have dropped instructions;
97 	 * that doesn't have to be due to translator bug, and the translation
98 	 * will fail anyway, so just return OK.
99 	 */
100 	if (nfp_prog->error)
101 		return true;
102 	return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
103 }
104 
105 /* --- Emitters --- */
106 static void
107 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
108 	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
109 	   bool indir)
110 {
111 	u64 insn;
112 
113 	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
114 		FIELD_PREP(OP_CMD_CTX, ctx) |
115 		FIELD_PREP(OP_CMD_B_SRC, breg) |
116 		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
117 		FIELD_PREP(OP_CMD_XFER, xfer) |
118 		FIELD_PREP(OP_CMD_CNT, size) |
119 		FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
120 		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
121 		FIELD_PREP(OP_CMD_INDIR, indir) |
122 		FIELD_PREP(OP_CMD_MODE, mode);
123 
124 	nfp_prog_push(nfp_prog, insn);
125 }
126 
127 static void
128 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
129 	     swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
130 {
131 	struct nfp_insn_re_regs reg;
132 	int err;
133 
134 	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
135 	if (err) {
136 		nfp_prog->error = err;
137 		return;
138 	}
139 	if (reg.swap) {
140 		pr_err("cmd can't swap arguments\n");
141 		nfp_prog->error = -EFAULT;
142 		return;
143 	}
144 	if (reg.dst_lmextn || reg.src_lmextn) {
145 		pr_err("cmd can't use LMextn\n");
146 		nfp_prog->error = -EFAULT;
147 		return;
148 	}
149 
150 	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
151 		   indir);
152 }
153 
154 static void
155 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
156 	 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
157 {
158 	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
159 }
160 
161 static void
162 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
163 	       swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
164 {
165 	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
166 }
167 
168 static void
169 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
170 	  enum br_ctx_signal_state css, u16 addr, u8 defer)
171 {
172 	u16 addr_lo, addr_hi;
173 	u64 insn;
174 
175 	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
176 	addr_hi = addr != addr_lo;
177 
178 	insn = OP_BR_BASE |
179 		FIELD_PREP(OP_BR_MASK, mask) |
180 		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
181 		FIELD_PREP(OP_BR_CSS, css) |
182 		FIELD_PREP(OP_BR_DEFBR, defer) |
183 		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
184 		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
185 
186 	nfp_prog_push(nfp_prog, insn);
187 }
188 
189 static void
190 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
191 	     enum nfp_relo_type relo)
192 {
193 	if (mask == BR_UNC && defer > 2) {
194 		pr_err("BUG: branch defer out of bounds %d\n", defer);
195 		nfp_prog->error = -EFAULT;
196 		return;
197 	}
198 
199 	__emit_br(nfp_prog, mask,
200 		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
201 		  BR_CSS_NONE, addr, defer);
202 
203 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
204 		FIELD_PREP(OP_RELO_TYPE, relo);
205 }
206 
207 static void
208 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
209 {
210 	emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
211 }
212 
213 static void
214 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
215 	     enum immed_width width, bool invert,
216 	     enum immed_shift shift, bool wr_both,
217 	     bool dst_lmextn, bool src_lmextn)
218 {
219 	u64 insn;
220 
221 	insn = OP_IMMED_BASE |
222 		FIELD_PREP(OP_IMMED_A_SRC, areg) |
223 		FIELD_PREP(OP_IMMED_B_SRC, breg) |
224 		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
225 		FIELD_PREP(OP_IMMED_WIDTH, width) |
226 		FIELD_PREP(OP_IMMED_INV, invert) |
227 		FIELD_PREP(OP_IMMED_SHIFT, shift) |
228 		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
229 		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
230 		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
231 
232 	nfp_prog_push(nfp_prog, insn);
233 }
234 
235 static void
236 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
237 	   enum immed_width width, bool invert, enum immed_shift shift)
238 {
239 	struct nfp_insn_ur_regs reg;
240 	int err;
241 
242 	if (swreg_type(dst) == NN_REG_IMM) {
243 		nfp_prog->error = -EFAULT;
244 		return;
245 	}
246 
247 	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
248 	if (err) {
249 		nfp_prog->error = err;
250 		return;
251 	}
252 
253 	/* Use reg.dst when destination is No-Dest. */
254 	__emit_immed(nfp_prog,
255 		     swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
256 		     reg.breg, imm >> 8, width, invert, shift,
257 		     reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
258 }
259 
260 static void
261 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
262 	   enum shf_sc sc, u8 shift,
263 	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
264 	   bool dst_lmextn, bool src_lmextn)
265 {
266 	u64 insn;
267 
268 	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
269 		nfp_prog->error = -EFAULT;
270 		return;
271 	}
272 
273 	if (sc == SHF_SC_L_SHF)
274 		shift = 32 - shift;
275 
276 	insn = OP_SHF_BASE |
277 		FIELD_PREP(OP_SHF_A_SRC, areg) |
278 		FIELD_PREP(OP_SHF_SC, sc) |
279 		FIELD_PREP(OP_SHF_B_SRC, breg) |
280 		FIELD_PREP(OP_SHF_I8, i8) |
281 		FIELD_PREP(OP_SHF_SW, sw) |
282 		FIELD_PREP(OP_SHF_DST, dst) |
283 		FIELD_PREP(OP_SHF_SHIFT, shift) |
284 		FIELD_PREP(OP_SHF_OP, op) |
285 		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
286 		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
287 		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
288 		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
289 
290 	nfp_prog_push(nfp_prog, insn);
291 }
292 
293 static void
294 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
295 	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
296 {
297 	struct nfp_insn_re_regs reg;
298 	int err;
299 
300 	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
301 	if (err) {
302 		nfp_prog->error = err;
303 		return;
304 	}
305 
306 	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
307 		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
308 		   reg.dst_lmextn, reg.src_lmextn);
309 }
310 
311 static void
312 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
313 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
314 	   bool dst_lmextn, bool src_lmextn)
315 {
316 	u64 insn;
317 
318 	insn = OP_ALU_BASE |
319 		FIELD_PREP(OP_ALU_A_SRC, areg) |
320 		FIELD_PREP(OP_ALU_B_SRC, breg) |
321 		FIELD_PREP(OP_ALU_DST, dst) |
322 		FIELD_PREP(OP_ALU_SW, swap) |
323 		FIELD_PREP(OP_ALU_OP, op) |
324 		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
325 		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
326 		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
327 		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
328 
329 	nfp_prog_push(nfp_prog, insn);
330 }
331 
332 static void
333 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
334 	 swreg lreg, enum alu_op op, swreg rreg)
335 {
336 	struct nfp_insn_ur_regs reg;
337 	int err;
338 
339 	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
340 	if (err) {
341 		nfp_prog->error = err;
342 		return;
343 	}
344 
345 	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
346 		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
347 		   reg.dst_lmextn, reg.src_lmextn);
348 }
349 
350 static void
351 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
352 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
353 		bool zero, bool swap, bool wr_both,
354 		bool dst_lmextn, bool src_lmextn)
355 {
356 	u64 insn;
357 
358 	insn = OP_LDF_BASE |
359 		FIELD_PREP(OP_LDF_A_SRC, areg) |
360 		FIELD_PREP(OP_LDF_SC, sc) |
361 		FIELD_PREP(OP_LDF_B_SRC, breg) |
362 		FIELD_PREP(OP_LDF_I8, imm8) |
363 		FIELD_PREP(OP_LDF_SW, swap) |
364 		FIELD_PREP(OP_LDF_ZF, zero) |
365 		FIELD_PREP(OP_LDF_BMASK, bmask) |
366 		FIELD_PREP(OP_LDF_SHF, shift) |
367 		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
368 		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
369 		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
370 
371 	nfp_prog_push(nfp_prog, insn);
372 }
373 
374 static void
375 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
376 		  enum shf_sc sc, u8 shift, bool zero)
377 {
378 	struct nfp_insn_re_regs reg;
379 	int err;
380 
381 	/* Note: ld_field is special as it uses one of the src regs as dst */
382 	err = swreg_to_restricted(dst, dst, src, &reg, true);
383 	if (err) {
384 		nfp_prog->error = err;
385 		return;
386 	}
387 
388 	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
389 			reg.i8, zero, reg.swap, reg.wr_both,
390 			reg.dst_lmextn, reg.src_lmextn);
391 }
392 
393 static void
394 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
395 	      enum shf_sc sc, u8 shift)
396 {
397 	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
398 }
399 
400 static void
401 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
402 	    bool dst_lmextn, bool src_lmextn)
403 {
404 	u64 insn;
405 
406 	insn = OP_LCSR_BASE |
407 		FIELD_PREP(OP_LCSR_A_SRC, areg) |
408 		FIELD_PREP(OP_LCSR_B_SRC, breg) |
409 		FIELD_PREP(OP_LCSR_WRITE, wr) |
410 		FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
411 		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
412 		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
413 
414 	nfp_prog_push(nfp_prog, insn);
415 }
416 
417 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
418 {
419 	struct nfp_insn_ur_regs reg;
420 	int err;
421 
422 	/* This instruction takes immeds instead of reg_none() for the ignored
423 	 * operand, but we can't encode 2 immeds in one instr with our normal
424 	 * swreg infra so if param is an immed, we encode as reg_none() and
425 	 * copy the immed to both operands.
426 	 */
427 	if (swreg_type(src) == NN_REG_IMM) {
428 		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
429 		reg.breg = reg.areg;
430 	} else {
431 		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
432 	}
433 	if (err) {
434 		nfp_prog->error = err;
435 		return;
436 	}
437 
438 	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
439 		    false, reg.src_lmextn);
440 }
441 
442 /* CSR value is read in following immed[gpr, 0] */
443 static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
444 {
445 	__emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
446 }
447 
448 static void emit_nop(struct nfp_prog *nfp_prog)
449 {
450 	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
451 }
452 
453 /* --- Wrappers --- */
454 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
455 {
456 	if (!(imm & 0xffff0000)) {
457 		*val = imm;
458 		*shift = IMMED_SHIFT_0B;
459 	} else if (!(imm & 0xff0000ff)) {
460 		*val = imm >> 8;
461 		*shift = IMMED_SHIFT_1B;
462 	} else if (!(imm & 0x0000ffff)) {
463 		*val = imm >> 16;
464 		*shift = IMMED_SHIFT_2B;
465 	} else {
466 		return false;
467 	}
468 
469 	return true;
470 }
471 
472 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
473 {
474 	enum immed_shift shift;
475 	u16 val;
476 
477 	if (pack_immed(imm, &val, &shift)) {
478 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
479 	} else if (pack_immed(~imm, &val, &shift)) {
480 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
481 	} else {
482 		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
483 			   false, IMMED_SHIFT_0B);
484 		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
485 			   false, IMMED_SHIFT_2B);
486 	}
487 }
488 
489 static void
490 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
491 	       enum nfp_relo_type relo)
492 {
493 	if (imm > 0xffff) {
494 		pr_err("relocation of a large immediate!\n");
495 		nfp_prog->error = -EFAULT;
496 		return;
497 	}
498 	emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
499 
500 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
501 		FIELD_PREP(OP_RELO_TYPE, relo);
502 }
503 
504 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
505  * If the @imm is small enough encode it directly in operand and return
506  * otherwise load @imm to a spare register and return its encoding.
507  */
508 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
509 {
510 	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
511 		return reg_imm(imm);
512 
513 	wrp_immed(nfp_prog, tmp_reg, imm);
514 	return tmp_reg;
515 }
516 
517 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
518  * If the @imm is small enough encode it directly in operand and return
519  * otherwise load @imm to a spare register and return its encoding.
520  */
521 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
522 {
523 	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
524 		return reg_imm(imm);
525 
526 	wrp_immed(nfp_prog, tmp_reg, imm);
527 	return tmp_reg;
528 }
529 
530 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
531 {
532 	while (count--)
533 		emit_nop(nfp_prog);
534 }
535 
536 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
537 {
538 	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
539 }
540 
541 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
542 {
543 	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
544 }
545 
546 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
547  * result to @dst from low end.
548  */
549 static void
550 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
551 		u8 offset)
552 {
553 	enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
554 	u8 mask = (1 << field_len) - 1;
555 
556 	emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
557 }
558 
559 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
560  * result to @dst from offset, there is no change on the other bits of @dst.
561  */
562 static void
563 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
564 		   u8 field_len, u8 offset)
565 {
566 	enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
567 	u8 mask = ((1 << field_len) - 1) << offset;
568 
569 	emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
570 }
571 
572 static void
573 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
574 	      swreg *rega, swreg *regb)
575 {
576 	if (offset == reg_imm(0)) {
577 		*rega = reg_a(src_gpr);
578 		*regb = reg_b(src_gpr + 1);
579 		return;
580 	}
581 
582 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
583 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
584 		 reg_imm(0));
585 	*rega = imm_a(nfp_prog);
586 	*regb = imm_b(nfp_prog);
587 }
588 
589 /* NFP has Command Push Pull bus which supports bluk memory operations. */
590 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
591 {
592 	bool descending_seq = meta->ldst_gather_len < 0;
593 	s16 len = abs(meta->ldst_gather_len);
594 	swreg src_base, off;
595 	bool src_40bit_addr;
596 	unsigned int i;
597 	u8 xfer_num;
598 
599 	off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
600 	src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
601 	src_base = reg_a(meta->insn.src_reg * 2);
602 	xfer_num = round_up(len, 4) / 4;
603 
604 	if (src_40bit_addr)
605 		addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
606 			      &off);
607 
608 	/* Setup PREV_ALU fields to override memory read length. */
609 	if (len > 32)
610 		wrp_immed(nfp_prog, reg_none(),
611 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
612 
613 	/* Memory read from source addr into transfer-in registers. */
614 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
615 		     src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
616 		     src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
617 
618 	/* Move from transfer-in to transfer-out. */
619 	for (i = 0; i < xfer_num; i++)
620 		wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
621 
622 	off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
623 
624 	if (len <= 8) {
625 		/* Use single direct_ref write8. */
626 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
627 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
628 			 CMD_CTX_SWAP);
629 	} else if (len <= 32 && IS_ALIGNED(len, 4)) {
630 		/* Use single direct_ref write32. */
631 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
632 			 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
633 			 CMD_CTX_SWAP);
634 	} else if (len <= 32) {
635 		/* Use single indirect_ref write8. */
636 		wrp_immed(nfp_prog, reg_none(),
637 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
638 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
639 			       reg_a(meta->paired_st->dst_reg * 2), off,
640 			       len - 1, CMD_CTX_SWAP);
641 	} else if (IS_ALIGNED(len, 4)) {
642 		/* Use single indirect_ref write32. */
643 		wrp_immed(nfp_prog, reg_none(),
644 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
645 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
646 			       reg_a(meta->paired_st->dst_reg * 2), off,
647 			       xfer_num - 1, CMD_CTX_SWAP);
648 	} else if (len <= 40) {
649 		/* Use one direct_ref write32 to write the first 32-bytes, then
650 		 * another direct_ref write8 to write the remaining bytes.
651 		 */
652 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
653 			 reg_a(meta->paired_st->dst_reg * 2), off, 7,
654 			 CMD_CTX_SWAP);
655 
656 		off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
657 				      imm_b(nfp_prog));
658 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
659 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
660 			 CMD_CTX_SWAP);
661 	} else {
662 		/* Use one indirect_ref write32 to write 4-bytes aligned length,
663 		 * then another direct_ref write8 to write the remaining bytes.
664 		 */
665 		u8 new_off;
666 
667 		wrp_immed(nfp_prog, reg_none(),
668 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
669 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
670 			       reg_a(meta->paired_st->dst_reg * 2), off,
671 			       xfer_num - 2, CMD_CTX_SWAP);
672 		new_off = meta->paired_st->off + (xfer_num - 1) * 4;
673 		off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
674 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
675 			 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
676 			 (len & 0x3) - 1, CMD_CTX_SWAP);
677 	}
678 
679 	/* TODO: The following extra load is to make sure data flow be identical
680 	 *  before and after we do memory copy optimization.
681 	 *
682 	 *  The load destination register is not guaranteed to be dead, so we
683 	 *  need to make sure it is loaded with the value the same as before
684 	 *  this transformation.
685 	 *
686 	 *  These extra loads could be removed once we have accurate register
687 	 *  usage information.
688 	 */
689 	if (descending_seq)
690 		xfer_num = 0;
691 	else if (BPF_SIZE(meta->insn.code) != BPF_DW)
692 		xfer_num = xfer_num - 1;
693 	else
694 		xfer_num = xfer_num - 2;
695 
696 	switch (BPF_SIZE(meta->insn.code)) {
697 	case BPF_B:
698 		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
699 				reg_xfer(xfer_num), 1,
700 				IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
701 		break;
702 	case BPF_H:
703 		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
704 				reg_xfer(xfer_num), 2, (len & 3) ^ 2);
705 		break;
706 	case BPF_W:
707 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
708 			reg_xfer(0));
709 		break;
710 	case BPF_DW:
711 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
712 			reg_xfer(xfer_num));
713 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
714 			reg_xfer(xfer_num + 1));
715 		break;
716 	}
717 
718 	if (BPF_SIZE(meta->insn.code) != BPF_DW)
719 		wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
720 
721 	return 0;
722 }
723 
724 static int
725 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
726 {
727 	unsigned int i;
728 	u16 shift, sz;
729 
730 	/* We load the value from the address indicated in @offset and then
731 	 * shift out the data we don't need.  Note: this is big endian!
732 	 */
733 	sz = max(size, 4);
734 	shift = size < 4 ? 4 - size : 0;
735 
736 	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
737 		 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
738 
739 	i = 0;
740 	if (shift)
741 		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
742 			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
743 	else
744 		for (; i * 4 < size; i++)
745 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
746 
747 	if (i < 2)
748 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
749 
750 	return 0;
751 }
752 
753 static int
754 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
755 		   swreg lreg, swreg rreg, int size, enum cmd_mode mode)
756 {
757 	unsigned int i;
758 	u8 mask, sz;
759 
760 	/* We load the value from the address indicated in rreg + lreg and then
761 	 * mask out the data we don't need.  Note: this is little endian!
762 	 */
763 	sz = max(size, 4);
764 	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
765 
766 	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
767 		 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
768 
769 	i = 0;
770 	if (mask)
771 		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
772 				  reg_xfer(0), SHF_SC_NONE, 0, true);
773 	else
774 		for (; i * 4 < size; i++)
775 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
776 
777 	if (i < 2)
778 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
779 
780 	return 0;
781 }
782 
783 static int
784 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
785 			  u8 dst_gpr, u8 size)
786 {
787 	return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
788 				  size, CMD_MODE_32b);
789 }
790 
791 static int
792 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
793 			  u8 dst_gpr, u8 size)
794 {
795 	swreg rega, regb;
796 
797 	addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
798 
799 	return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
800 				  size, CMD_MODE_40b_BA);
801 }
802 
803 static int
804 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
805 {
806 	swreg tmp_reg;
807 
808 	/* Calculate the true offset (src_reg + imm) */
809 	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
810 	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
811 
812 	/* Check packet length (size guaranteed to fit b/c it's u8) */
813 	emit_alu(nfp_prog, imm_a(nfp_prog),
814 		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
815 	emit_alu(nfp_prog, reg_none(),
816 		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
817 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
818 
819 	/* Load data */
820 	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
821 }
822 
823 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
824 {
825 	swreg tmp_reg;
826 
827 	/* Check packet length */
828 	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
829 	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
830 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
831 
832 	/* Load data */
833 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
834 	return data_ld(nfp_prog, tmp_reg, 0, size);
835 }
836 
837 static int
838 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
839 		    u8 src_gpr, u8 size)
840 {
841 	unsigned int i;
842 
843 	for (i = 0; i * 4 < size; i++)
844 		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
845 
846 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
847 		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
848 
849 	return 0;
850 }
851 
852 static int
853 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
854 		   u64 imm, u8 size)
855 {
856 	wrp_immed(nfp_prog, reg_xfer(0), imm);
857 	if (size == 8)
858 		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
859 
860 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
861 		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
862 
863 	return 0;
864 }
865 
866 typedef int
867 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
868 	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
869 	     bool needs_inc);
870 
871 static int
872 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
873 	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
874 	      bool needs_inc)
875 {
876 	bool should_inc = needs_inc && new_gpr && !last;
877 	u32 idx, src_byte;
878 	enum shf_sc sc;
879 	swreg reg;
880 	int shf;
881 	u8 mask;
882 
883 	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
884 		return -EOPNOTSUPP;
885 
886 	idx = off / 4;
887 
888 	/* Move the entire word */
889 	if (size == 4) {
890 		wrp_mov(nfp_prog, reg_both(dst),
891 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
892 		return 0;
893 	}
894 
895 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
896 		return -EOPNOTSUPP;
897 
898 	src_byte = off % 4;
899 
900 	mask = (1 << size) - 1;
901 	mask <<= dst_byte;
902 
903 	if (WARN_ON_ONCE(mask > 0xf))
904 		return -EOPNOTSUPP;
905 
906 	shf = abs(src_byte - dst_byte) * 8;
907 	if (src_byte == dst_byte) {
908 		sc = SHF_SC_NONE;
909 	} else if (src_byte < dst_byte) {
910 		shf = 32 - shf;
911 		sc = SHF_SC_L_SHF;
912 	} else {
913 		sc = SHF_SC_R_SHF;
914 	}
915 
916 	/* ld_field can address fewer indexes, if offset too large do RMW.
917 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
918 	 */
919 	if (idx <= RE_REG_LM_IDX_MAX) {
920 		reg = reg_lm(lm3 ? 3 : 0, idx);
921 	} else {
922 		reg = imm_a(nfp_prog);
923 		/* If it's not the first part of the load and we start a new GPR
924 		 * that means we are loading a second part of the LMEM word into
925 		 * a new GPR.  IOW we've already looked that LMEM word and
926 		 * therefore it has been loaded into imm_a().
927 		 */
928 		if (first || !new_gpr)
929 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
930 	}
931 
932 	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
933 
934 	if (should_inc)
935 		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
936 
937 	return 0;
938 }
939 
940 static int
941 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
942 	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
943 	       bool needs_inc)
944 {
945 	bool should_inc = needs_inc && new_gpr && !last;
946 	u32 idx, dst_byte;
947 	enum shf_sc sc;
948 	swreg reg;
949 	int shf;
950 	u8 mask;
951 
952 	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
953 		return -EOPNOTSUPP;
954 
955 	idx = off / 4;
956 
957 	/* Move the entire word */
958 	if (size == 4) {
959 		wrp_mov(nfp_prog,
960 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
961 			reg_b(src));
962 		return 0;
963 	}
964 
965 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
966 		return -EOPNOTSUPP;
967 
968 	dst_byte = off % 4;
969 
970 	mask = (1 << size) - 1;
971 	mask <<= dst_byte;
972 
973 	if (WARN_ON_ONCE(mask > 0xf))
974 		return -EOPNOTSUPP;
975 
976 	shf = abs(src_byte - dst_byte) * 8;
977 	if (src_byte == dst_byte) {
978 		sc = SHF_SC_NONE;
979 	} else if (src_byte < dst_byte) {
980 		shf = 32 - shf;
981 		sc = SHF_SC_L_SHF;
982 	} else {
983 		sc = SHF_SC_R_SHF;
984 	}
985 
986 	/* ld_field can address fewer indexes, if offset too large do RMW.
987 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
988 	 */
989 	if (idx <= RE_REG_LM_IDX_MAX) {
990 		reg = reg_lm(lm3 ? 3 : 0, idx);
991 	} else {
992 		reg = imm_a(nfp_prog);
993 		/* Only first and last LMEM locations are going to need RMW,
994 		 * the middle location will be overwritten fully.
995 		 */
996 		if (first || last)
997 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
998 	}
999 
1000 	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
1001 
1002 	if (new_gpr || last) {
1003 		if (idx > RE_REG_LM_IDX_MAX)
1004 			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
1005 		if (should_inc)
1006 			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1007 	}
1008 
1009 	return 0;
1010 }
1011 
1012 static int
1013 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1014 	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1015 	     bool clr_gpr, lmem_step step)
1016 {
1017 	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1018 	bool first = true, last;
1019 	bool needs_inc = false;
1020 	swreg stack_off_reg;
1021 	u8 prev_gpr = 255;
1022 	u32 gpr_byte = 0;
1023 	bool lm3 = true;
1024 	int ret;
1025 
1026 	if (meta->ptr_not_const) {
1027 		/* Use of the last encountered ptr_off is OK, they all have
1028 		 * the same alignment.  Depend on low bits of value being
1029 		 * discarded when written to LMaddr register.
1030 		 */
1031 		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1032 						stack_imm(nfp_prog));
1033 
1034 		emit_alu(nfp_prog, imm_b(nfp_prog),
1035 			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1036 
1037 		needs_inc = true;
1038 	} else if (off + size <= 64) {
1039 		/* We can reach bottom 64B with LMaddr0 */
1040 		lm3 = false;
1041 	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1042 		/* We have to set up a new pointer.  If we know the offset
1043 		 * and the entire access falls into a single 32 byte aligned
1044 		 * window we won't have to increment the LM pointer.
1045 		 * The 32 byte alignment is imporant because offset is ORed in
1046 		 * not added when doing *l$indexN[off].
1047 		 */
1048 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1049 						stack_imm(nfp_prog));
1050 		emit_alu(nfp_prog, imm_b(nfp_prog),
1051 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1052 
1053 		off %= 32;
1054 	} else {
1055 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1056 						stack_imm(nfp_prog));
1057 
1058 		emit_alu(nfp_prog, imm_b(nfp_prog),
1059 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1060 
1061 		needs_inc = true;
1062 	}
1063 	if (lm3) {
1064 		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1065 		/* For size < 4 one slot will be filled by zeroing of upper. */
1066 		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1067 	}
1068 
1069 	if (clr_gpr && size < 8)
1070 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1071 
1072 	while (size) {
1073 		u32 slice_end;
1074 		u8 slice_size;
1075 
1076 		slice_size = min(size, 4 - gpr_byte);
1077 		slice_end = min(off + slice_size, round_up(off + 1, 4));
1078 		slice_size = slice_end - off;
1079 
1080 		last = slice_size == size;
1081 
1082 		if (needs_inc)
1083 			off %= 4;
1084 
1085 		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1086 			   first, gpr != prev_gpr, last, lm3, needs_inc);
1087 		if (ret)
1088 			return ret;
1089 
1090 		prev_gpr = gpr;
1091 		first = false;
1092 
1093 		gpr_byte += slice_size;
1094 		if (gpr_byte >= 4) {
1095 			gpr_byte -= 4;
1096 			gpr++;
1097 		}
1098 
1099 		size -= slice_size;
1100 		off += slice_size;
1101 	}
1102 
1103 	return 0;
1104 }
1105 
1106 static void
1107 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1108 {
1109 	swreg tmp_reg;
1110 
1111 	if (alu_op == ALU_OP_AND) {
1112 		if (!imm)
1113 			wrp_immed(nfp_prog, reg_both(dst), 0);
1114 		if (!imm || !~imm)
1115 			return;
1116 	}
1117 	if (alu_op == ALU_OP_OR) {
1118 		if (!~imm)
1119 			wrp_immed(nfp_prog, reg_both(dst), ~0U);
1120 		if (!imm || !~imm)
1121 			return;
1122 	}
1123 	if (alu_op == ALU_OP_XOR) {
1124 		if (!~imm)
1125 			emit_alu(nfp_prog, reg_both(dst), reg_none(),
1126 				 ALU_OP_NOT, reg_b(dst));
1127 		if (!imm || !~imm)
1128 			return;
1129 	}
1130 
1131 	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1132 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1133 }
1134 
1135 static int
1136 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1137 	      enum alu_op alu_op, bool skip)
1138 {
1139 	const struct bpf_insn *insn = &meta->insn;
1140 	u64 imm = insn->imm; /* sign extend */
1141 
1142 	if (skip) {
1143 		meta->skip = true;
1144 		return 0;
1145 	}
1146 
1147 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1148 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1149 
1150 	return 0;
1151 }
1152 
1153 static int
1154 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1155 	      enum alu_op alu_op)
1156 {
1157 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1158 
1159 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1160 	emit_alu(nfp_prog, reg_both(dst + 1),
1161 		 reg_a(dst + 1), alu_op, reg_b(src + 1));
1162 
1163 	return 0;
1164 }
1165 
1166 static int
1167 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1168 	      enum alu_op alu_op, bool skip)
1169 {
1170 	const struct bpf_insn *insn = &meta->insn;
1171 
1172 	if (skip) {
1173 		meta->skip = true;
1174 		return 0;
1175 	}
1176 
1177 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1178 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1179 
1180 	return 0;
1181 }
1182 
1183 static int
1184 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1185 	      enum alu_op alu_op)
1186 {
1187 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1188 
1189 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1190 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1191 
1192 	return 0;
1193 }
1194 
1195 static void
1196 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1197 		 enum br_mask br_mask, u16 off)
1198 {
1199 	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1200 	emit_br(nfp_prog, br_mask, off, 0);
1201 }
1202 
1203 static int
1204 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1205 	     enum alu_op alu_op, enum br_mask br_mask)
1206 {
1207 	const struct bpf_insn *insn = &meta->insn;
1208 
1209 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1210 			 insn->src_reg * 2, br_mask, insn->off);
1211 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1212 			 insn->src_reg * 2 + 1, br_mask, insn->off);
1213 
1214 	return 0;
1215 }
1216 
1217 static const struct jmp_code_map {
1218 	enum br_mask br_mask;
1219 	bool swap;
1220 } jmp_code_map[] = {
1221 	[BPF_JGT >> 4]	= { BR_BLO, true },
1222 	[BPF_JGE >> 4]	= { BR_BHS, false },
1223 	[BPF_JLT >> 4]	= { BR_BLO, false },
1224 	[BPF_JLE >> 4]	= { BR_BHS, true },
1225 	[BPF_JSGT >> 4]	= { BR_BLT, true },
1226 	[BPF_JSGE >> 4]	= { BR_BGE, false },
1227 	[BPF_JSLT >> 4]	= { BR_BLT, false },
1228 	[BPF_JSLE >> 4]	= { BR_BGE, true },
1229 };
1230 
1231 static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
1232 {
1233 	unsigned int op;
1234 
1235 	op = BPF_OP(meta->insn.code) >> 4;
1236 	/* br_mask of 0 is BR_BEQ which we don't use in jump code table */
1237 	if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
1238 		      !jmp_code_map[op].br_mask,
1239 		      "no code found for jump instruction"))
1240 		return NULL;
1241 
1242 	return &jmp_code_map[op];
1243 }
1244 
1245 static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1246 {
1247 	const struct bpf_insn *insn = &meta->insn;
1248 	u64 imm = insn->imm; /* sign extend */
1249 	const struct jmp_code_map *code;
1250 	enum alu_op alu_op, carry_op;
1251 	u8 reg = insn->dst_reg * 2;
1252 	swreg tmp_reg;
1253 
1254 	code = nfp_jmp_code_get(meta);
1255 	if (!code)
1256 		return -EINVAL;
1257 
1258 	alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
1259 	carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
1260 
1261 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1262 	if (!code->swap)
1263 		emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
1264 	else
1265 		emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
1266 
1267 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1268 	if (!code->swap)
1269 		emit_alu(nfp_prog, reg_none(),
1270 			 reg_a(reg + 1), carry_op, tmp_reg);
1271 	else
1272 		emit_alu(nfp_prog, reg_none(),
1273 			 tmp_reg, carry_op, reg_a(reg + 1));
1274 
1275 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
1276 
1277 	return 0;
1278 }
1279 
1280 static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1281 {
1282 	const struct bpf_insn *insn = &meta->insn;
1283 	const struct jmp_code_map *code;
1284 	u8 areg, breg;
1285 
1286 	code = nfp_jmp_code_get(meta);
1287 	if (!code)
1288 		return -EINVAL;
1289 
1290 	areg = insn->dst_reg * 2;
1291 	breg = insn->src_reg * 2;
1292 
1293 	if (code->swap) {
1294 		areg ^= breg;
1295 		breg ^= areg;
1296 		areg ^= breg;
1297 	}
1298 
1299 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1300 	emit_alu(nfp_prog, reg_none(),
1301 		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1302 	emit_br(nfp_prog, code->br_mask, insn->off, 0);
1303 
1304 	return 0;
1305 }
1306 
1307 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1308 {
1309 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1310 		      SHF_SC_R_ROT, 8);
1311 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1312 		      SHF_SC_R_ROT, 16);
1313 }
1314 
1315 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1316 {
1317 	swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1318 	struct nfp_bpf_cap_adjust_head *adjust_head;
1319 	u32 ret_einval, end;
1320 
1321 	adjust_head = &nfp_prog->bpf->adjust_head;
1322 
1323 	/* Optimized version - 5 vs 14 cycles */
1324 	if (nfp_prog->adjust_head_location != UINT_MAX) {
1325 		if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1326 			return -EINVAL;
1327 
1328 		emit_alu(nfp_prog, pptr_reg(nfp_prog),
1329 			 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1330 		emit_alu(nfp_prog, plen_reg(nfp_prog),
1331 			 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1332 		emit_alu(nfp_prog, pv_len(nfp_prog),
1333 			 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1334 
1335 		wrp_immed(nfp_prog, reg_both(0), 0);
1336 		wrp_immed(nfp_prog, reg_both(1), 0);
1337 
1338 		/* TODO: when adjust head is guaranteed to succeed we can
1339 		 * also eliminate the following if (r0 == 0) branch.
1340 		 */
1341 
1342 		return 0;
1343 	}
1344 
1345 	ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1346 	end = ret_einval + 2;
1347 
1348 	/* We need to use a temp because offset is just a part of the pkt ptr */
1349 	emit_alu(nfp_prog, tmp,
1350 		 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1351 
1352 	/* Validate result will fit within FW datapath constraints */
1353 	emit_alu(nfp_prog, reg_none(),
1354 		 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1355 	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1356 	emit_alu(nfp_prog, reg_none(),
1357 		 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1358 	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1359 
1360 	/* Validate the length is at least ETH_HLEN */
1361 	emit_alu(nfp_prog, tmp_len,
1362 		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1363 	emit_alu(nfp_prog, reg_none(),
1364 		 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1365 	emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1366 
1367 	/* Load the ret code */
1368 	wrp_immed(nfp_prog, reg_both(0), 0);
1369 	wrp_immed(nfp_prog, reg_both(1), 0);
1370 
1371 	/* Modify the packet metadata */
1372 	emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1373 
1374 	/* Skip over the -EINVAL ret code (defer 2) */
1375 	emit_br(nfp_prog, BR_UNC, end, 2);
1376 
1377 	emit_alu(nfp_prog, plen_reg(nfp_prog),
1378 		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1379 	emit_alu(nfp_prog, pv_len(nfp_prog),
1380 		 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1381 
1382 	/* return -EINVAL target */
1383 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1384 		return -EINVAL;
1385 
1386 	wrp_immed(nfp_prog, reg_both(0), -22);
1387 	wrp_immed(nfp_prog, reg_both(1), ~0);
1388 
1389 	if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1390 		return -EINVAL;
1391 
1392 	return 0;
1393 }
1394 
1395 static int
1396 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1397 {
1398 	struct bpf_offloaded_map *offmap;
1399 	struct nfp_bpf_map *nfp_map;
1400 	bool load_lm_ptr;
1401 	u32 ret_tgt;
1402 	s64 lm_off;
1403 	swreg tid;
1404 
1405 	offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
1406 	nfp_map = offmap->dev_priv;
1407 
1408 	/* We only have to reload LM0 if the key is not at start of stack */
1409 	lm_off = nfp_prog->stack_depth;
1410 	lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1411 	load_lm_ptr = meta->arg2.var_off || lm_off;
1412 
1413 	/* Set LM0 to start of key */
1414 	if (load_lm_ptr)
1415 		emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1416 	if (meta->func_id == BPF_FUNC_map_update_elem)
1417 		emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1418 
1419 	/* Load map ID into a register, it should actually fit as an immediate
1420 	 * but in case it doesn't deal with it here, not in the delay slots.
1421 	 */
1422 	tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1423 
1424 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1425 		     2, RELO_BR_HELPER);
1426 	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1427 
1428 	/* Load map ID into A0 */
1429 	wrp_mov(nfp_prog, reg_a(0), tid);
1430 
1431 	/* Load the return address into B0 */
1432 	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1433 
1434 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1435 		return -EINVAL;
1436 
1437 	/* Reset the LM0 pointer */
1438 	if (!load_lm_ptr)
1439 		return 0;
1440 
1441 	emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1442 	wrp_nops(nfp_prog, 3);
1443 
1444 	return 0;
1445 }
1446 
1447 static int
1448 nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1449 {
1450 	__emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
1451 	/* CSR value is read in following immed[gpr, 0] */
1452 	emit_immed(nfp_prog, reg_both(0), 0,
1453 		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1454 	emit_immed(nfp_prog, reg_both(1), 0,
1455 		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1456 	return 0;
1457 }
1458 
1459 static int
1460 nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1461 {
1462 	swreg ptr_type;
1463 	u32 ret_tgt;
1464 
1465 	ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
1466 
1467 	ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
1468 
1469 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1470 		     2, RELO_BR_HELPER);
1471 
1472 	/* Load ptr type into A1 */
1473 	wrp_mov(nfp_prog, reg_a(1), ptr_type);
1474 
1475 	/* Load the return address into B0 */
1476 	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1477 
1478 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1479 		return -EINVAL;
1480 
1481 	return 0;
1482 }
1483 
1484 /* --- Callbacks --- */
1485 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1486 {
1487 	const struct bpf_insn *insn = &meta->insn;
1488 	u8 dst = insn->dst_reg * 2;
1489 	u8 src = insn->src_reg * 2;
1490 
1491 	if (insn->src_reg == BPF_REG_10) {
1492 		swreg stack_depth_reg;
1493 
1494 		stack_depth_reg = ur_load_imm_any(nfp_prog,
1495 						  nfp_prog->stack_depth,
1496 						  stack_imm(nfp_prog));
1497 		emit_alu(nfp_prog, reg_both(dst),
1498 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1499 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1500 	} else {
1501 		wrp_reg_mov(nfp_prog, dst, src);
1502 		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1503 	}
1504 
1505 	return 0;
1506 }
1507 
1508 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1509 {
1510 	u64 imm = meta->insn.imm; /* sign extend */
1511 
1512 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1513 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1514 
1515 	return 0;
1516 }
1517 
1518 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1519 {
1520 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1521 }
1522 
1523 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1524 {
1525 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1526 }
1527 
1528 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1529 {
1530 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1531 }
1532 
1533 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1534 {
1535 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1536 }
1537 
1538 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1539 {
1540 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1541 }
1542 
1543 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1544 {
1545 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1546 }
1547 
1548 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1549 {
1550 	const struct bpf_insn *insn = &meta->insn;
1551 
1552 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1553 		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1554 		 reg_b(insn->src_reg * 2));
1555 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1556 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1557 		 reg_b(insn->src_reg * 2 + 1));
1558 
1559 	return 0;
1560 }
1561 
1562 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1563 {
1564 	const struct bpf_insn *insn = &meta->insn;
1565 	u64 imm = insn->imm; /* sign extend */
1566 
1567 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1568 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1569 
1570 	return 0;
1571 }
1572 
1573 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1574 {
1575 	const struct bpf_insn *insn = &meta->insn;
1576 
1577 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1578 		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1579 		 reg_b(insn->src_reg * 2));
1580 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1581 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1582 		 reg_b(insn->src_reg * 2 + 1));
1583 
1584 	return 0;
1585 }
1586 
1587 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1588 {
1589 	const struct bpf_insn *insn = &meta->insn;
1590 	u64 imm = insn->imm; /* sign extend */
1591 
1592 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1593 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1594 
1595 	return 0;
1596 }
1597 
1598 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1599 {
1600 	const struct bpf_insn *insn = &meta->insn;
1601 
1602 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1603 		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1604 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1605 		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1606 
1607 	return 0;
1608 }
1609 
1610 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1611 {
1612 	const struct bpf_insn *insn = &meta->insn;
1613 	u8 dst = insn->dst_reg * 2;
1614 
1615 	if (insn->imm < 32) {
1616 		emit_shf(nfp_prog, reg_both(dst + 1),
1617 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1618 			 SHF_SC_R_DSHF, 32 - insn->imm);
1619 		emit_shf(nfp_prog, reg_both(dst),
1620 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1621 			 SHF_SC_L_SHF, insn->imm);
1622 	} else if (insn->imm == 32) {
1623 		wrp_reg_mov(nfp_prog, dst + 1, dst);
1624 		wrp_immed(nfp_prog, reg_both(dst), 0);
1625 	} else if (insn->imm > 32) {
1626 		emit_shf(nfp_prog, reg_both(dst + 1),
1627 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1628 			 SHF_SC_L_SHF, insn->imm - 32);
1629 		wrp_immed(nfp_prog, reg_both(dst), 0);
1630 	}
1631 
1632 	return 0;
1633 }
1634 
1635 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1636 {
1637 	const struct bpf_insn *insn = &meta->insn;
1638 	u8 dst = insn->dst_reg * 2;
1639 
1640 	if (insn->imm < 32) {
1641 		emit_shf(nfp_prog, reg_both(dst),
1642 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1643 			 SHF_SC_R_DSHF, insn->imm);
1644 		emit_shf(nfp_prog, reg_both(dst + 1),
1645 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1646 			 SHF_SC_R_SHF, insn->imm);
1647 	} else if (insn->imm == 32) {
1648 		wrp_reg_mov(nfp_prog, dst, dst + 1);
1649 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1650 	} else if (insn->imm > 32) {
1651 		emit_shf(nfp_prog, reg_both(dst),
1652 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1653 			 SHF_SC_R_SHF, insn->imm - 32);
1654 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1655 	}
1656 
1657 	return 0;
1658 }
1659 
1660 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1661 {
1662 	const struct bpf_insn *insn = &meta->insn;
1663 
1664 	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
1665 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1666 
1667 	return 0;
1668 }
1669 
1670 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1671 {
1672 	const struct bpf_insn *insn = &meta->insn;
1673 
1674 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
1675 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1676 
1677 	return 0;
1678 }
1679 
1680 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1681 {
1682 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
1683 }
1684 
1685 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1686 {
1687 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
1688 }
1689 
1690 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1691 {
1692 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
1693 }
1694 
1695 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1696 {
1697 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1698 }
1699 
1700 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1701 {
1702 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
1703 }
1704 
1705 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1706 {
1707 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1708 }
1709 
1710 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1711 {
1712 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
1713 }
1714 
1715 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1716 {
1717 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
1718 }
1719 
1720 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1721 {
1722 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
1723 }
1724 
1725 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1726 {
1727 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
1728 }
1729 
1730 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1731 {
1732 	u8 dst = meta->insn.dst_reg * 2;
1733 
1734 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
1735 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1736 
1737 	return 0;
1738 }
1739 
1740 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1741 {
1742 	const struct bpf_insn *insn = &meta->insn;
1743 
1744 	if (!insn->imm)
1745 		return 1; /* TODO: zero shift means indirect */
1746 
1747 	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
1748 		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
1749 		 SHF_SC_L_SHF, insn->imm);
1750 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1751 
1752 	return 0;
1753 }
1754 
1755 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1756 {
1757 	const struct bpf_insn *insn = &meta->insn;
1758 	u8 gpr = insn->dst_reg * 2;
1759 
1760 	switch (insn->imm) {
1761 	case 16:
1762 		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
1763 			      SHF_SC_R_ROT, 8);
1764 		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
1765 			      SHF_SC_R_SHF, 16);
1766 
1767 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1768 		break;
1769 	case 32:
1770 		wrp_end32(nfp_prog, reg_a(gpr), gpr);
1771 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1772 		break;
1773 	case 64:
1774 		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
1775 
1776 		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
1777 		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
1778 		break;
1779 	}
1780 
1781 	return 0;
1782 }
1783 
1784 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1785 {
1786 	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
1787 	u32 imm_lo, imm_hi;
1788 	u8 dst;
1789 
1790 	dst = prev->insn.dst_reg * 2;
1791 	imm_lo = prev->insn.imm;
1792 	imm_hi = meta->insn.imm;
1793 
1794 	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
1795 
1796 	/* mov is always 1 insn, load imm may be two, so try to use mov */
1797 	if (imm_hi == imm_lo)
1798 		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
1799 	else
1800 		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
1801 
1802 	return 0;
1803 }
1804 
1805 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1806 {
1807 	meta->double_cb = imm_ld8_part2;
1808 	return 0;
1809 }
1810 
1811 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1812 {
1813 	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
1814 }
1815 
1816 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1817 {
1818 	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
1819 }
1820 
1821 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1822 {
1823 	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
1824 }
1825 
1826 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1827 {
1828 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1829 				     meta->insn.src_reg * 2, 1);
1830 }
1831 
1832 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1833 {
1834 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1835 				     meta->insn.src_reg * 2, 2);
1836 }
1837 
1838 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1839 {
1840 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1841 				     meta->insn.src_reg * 2, 4);
1842 }
1843 
1844 static int
1845 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1846 	      unsigned int size, unsigned int ptr_off)
1847 {
1848 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1849 			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
1850 			    true, wrp_lmem_load);
1851 }
1852 
1853 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1854 		       u8 size)
1855 {
1856 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1857 
1858 	switch (meta->insn.off) {
1859 	case offsetof(struct __sk_buff, len):
1860 		if (size != FIELD_SIZEOF(struct __sk_buff, len))
1861 			return -EOPNOTSUPP;
1862 		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
1863 		break;
1864 	case offsetof(struct __sk_buff, data):
1865 		if (size != FIELD_SIZEOF(struct __sk_buff, data))
1866 			return -EOPNOTSUPP;
1867 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1868 		break;
1869 	case offsetof(struct __sk_buff, data_end):
1870 		if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
1871 			return -EOPNOTSUPP;
1872 		emit_alu(nfp_prog, dst,
1873 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1874 		break;
1875 	default:
1876 		return -EOPNOTSUPP;
1877 	}
1878 
1879 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1880 
1881 	return 0;
1882 }
1883 
1884 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1885 		       u8 size)
1886 {
1887 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1888 
1889 	switch (meta->insn.off) {
1890 	case offsetof(struct xdp_md, data):
1891 		if (size != FIELD_SIZEOF(struct xdp_md, data))
1892 			return -EOPNOTSUPP;
1893 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1894 		break;
1895 	case offsetof(struct xdp_md, data_end):
1896 		if (size != FIELD_SIZEOF(struct xdp_md, data_end))
1897 			return -EOPNOTSUPP;
1898 		emit_alu(nfp_prog, dst,
1899 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1900 		break;
1901 	default:
1902 		return -EOPNOTSUPP;
1903 	}
1904 
1905 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1906 
1907 	return 0;
1908 }
1909 
1910 static int
1911 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1912 	     unsigned int size)
1913 {
1914 	swreg tmp_reg;
1915 
1916 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1917 
1918 	return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
1919 					 tmp_reg, meta->insn.dst_reg * 2, size);
1920 }
1921 
1922 static int
1923 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1924 	     unsigned int size)
1925 {
1926 	swreg tmp_reg;
1927 
1928 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1929 
1930 	return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
1931 					 tmp_reg, meta->insn.dst_reg * 2, size);
1932 }
1933 
1934 static void
1935 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
1936 			   struct nfp_insn_meta *meta)
1937 {
1938 	s16 range_start = meta->pkt_cache.range_start;
1939 	s16 range_end = meta->pkt_cache.range_end;
1940 	swreg src_base, off;
1941 	u8 xfer_num, len;
1942 	bool indir;
1943 
1944 	off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
1945 	src_base = reg_a(meta->insn.src_reg * 2);
1946 	len = range_end - range_start;
1947 	xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
1948 
1949 	indir = len > 8 * REG_WIDTH;
1950 	/* Setup PREV_ALU for indirect mode. */
1951 	if (indir)
1952 		wrp_immed(nfp_prog, reg_none(),
1953 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
1954 
1955 	/* Cache memory into transfer-in registers. */
1956 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
1957 		     off, xfer_num - 1, CMD_CTX_SWAP, indir);
1958 }
1959 
1960 static int
1961 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
1962 				     struct nfp_insn_meta *meta,
1963 				     unsigned int size)
1964 {
1965 	s16 range_start = meta->pkt_cache.range_start;
1966 	s16 insn_off = meta->insn.off - range_start;
1967 	swreg dst_lo, dst_hi, src_lo, src_mid;
1968 	u8 dst_gpr = meta->insn.dst_reg * 2;
1969 	u8 len_lo = size, len_mid = 0;
1970 	u8 idx = insn_off / REG_WIDTH;
1971 	u8 off = insn_off % REG_WIDTH;
1972 
1973 	dst_hi = reg_both(dst_gpr + 1);
1974 	dst_lo = reg_both(dst_gpr);
1975 	src_lo = reg_xfer(idx);
1976 
1977 	/* The read length could involve as many as three registers. */
1978 	if (size > REG_WIDTH - off) {
1979 		/* Calculate the part in the second register. */
1980 		len_lo = REG_WIDTH - off;
1981 		len_mid = size - len_lo;
1982 
1983 		/* Calculate the part in the third register. */
1984 		if (size > 2 * REG_WIDTH - off)
1985 			len_mid = REG_WIDTH;
1986 	}
1987 
1988 	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
1989 
1990 	if (!len_mid) {
1991 		wrp_immed(nfp_prog, dst_hi, 0);
1992 		return 0;
1993 	}
1994 
1995 	src_mid = reg_xfer(idx + 1);
1996 
1997 	if (size <= REG_WIDTH) {
1998 		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
1999 		wrp_immed(nfp_prog, dst_hi, 0);
2000 	} else {
2001 		swreg src_hi = reg_xfer(idx + 2);
2002 
2003 		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
2004 				   REG_WIDTH - len_lo, len_lo);
2005 		wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
2006 				REG_WIDTH - len_lo);
2007 		wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
2008 				   len_lo);
2009 	}
2010 
2011 	return 0;
2012 }
2013 
2014 static int
2015 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
2016 				   struct nfp_insn_meta *meta,
2017 				   unsigned int size)
2018 {
2019 	swreg dst_lo, dst_hi, src_lo;
2020 	u8 dst_gpr, idx;
2021 
2022 	idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
2023 	dst_gpr = meta->insn.dst_reg * 2;
2024 	dst_hi = reg_both(dst_gpr + 1);
2025 	dst_lo = reg_both(dst_gpr);
2026 	src_lo = reg_xfer(idx);
2027 
2028 	if (size < REG_WIDTH) {
2029 		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2030 		wrp_immed(nfp_prog, dst_hi, 0);
2031 	} else if (size == REG_WIDTH) {
2032 		wrp_mov(nfp_prog, dst_lo, src_lo);
2033 		wrp_immed(nfp_prog, dst_hi, 0);
2034 	} else {
2035 		swreg src_hi = reg_xfer(idx + 1);
2036 
2037 		wrp_mov(nfp_prog, dst_lo, src_lo);
2038 		wrp_mov(nfp_prog, dst_hi, src_hi);
2039 	}
2040 
2041 	return 0;
2042 }
2043 
2044 static int
2045 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
2046 			   struct nfp_insn_meta *meta, unsigned int size)
2047 {
2048 	u8 off = meta->insn.off - meta->pkt_cache.range_start;
2049 
2050 	if (IS_ALIGNED(off, REG_WIDTH))
2051 		return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
2052 
2053 	return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
2054 }
2055 
2056 static int
2057 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2058 	unsigned int size)
2059 {
2060 	if (meta->ldst_gather_len)
2061 		return nfp_cpp_memcpy(nfp_prog, meta);
2062 
2063 	if (meta->ptr.type == PTR_TO_CTX) {
2064 		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2065 			return mem_ldx_xdp(nfp_prog, meta, size);
2066 		else
2067 			return mem_ldx_skb(nfp_prog, meta, size);
2068 	}
2069 
2070 	if (meta->ptr.type == PTR_TO_PACKET) {
2071 		if (meta->pkt_cache.range_end) {
2072 			if (meta->pkt_cache.do_init)
2073 				mem_ldx_data_init_pktcache(nfp_prog, meta);
2074 
2075 			return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
2076 		} else {
2077 			return mem_ldx_data(nfp_prog, meta, size);
2078 		}
2079 	}
2080 
2081 	if (meta->ptr.type == PTR_TO_STACK)
2082 		return mem_ldx_stack(nfp_prog, meta, size,
2083 				     meta->ptr.off + meta->ptr.var_off.value);
2084 
2085 	if (meta->ptr.type == PTR_TO_MAP_VALUE)
2086 		return mem_ldx_emem(nfp_prog, meta, size);
2087 
2088 	return -EOPNOTSUPP;
2089 }
2090 
2091 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2092 {
2093 	return mem_ldx(nfp_prog, meta, 1);
2094 }
2095 
2096 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2097 {
2098 	return mem_ldx(nfp_prog, meta, 2);
2099 }
2100 
2101 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2102 {
2103 	return mem_ldx(nfp_prog, meta, 4);
2104 }
2105 
2106 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2107 {
2108 	return mem_ldx(nfp_prog, meta, 8);
2109 }
2110 
2111 static int
2112 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2113 	    unsigned int size)
2114 {
2115 	u64 imm = meta->insn.imm; /* sign extend */
2116 	swreg off_reg;
2117 
2118 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2119 
2120 	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2121 				  imm, size);
2122 }
2123 
2124 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2125 		  unsigned int size)
2126 {
2127 	if (meta->ptr.type == PTR_TO_PACKET)
2128 		return mem_st_data(nfp_prog, meta, size);
2129 
2130 	return -EOPNOTSUPP;
2131 }
2132 
2133 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2134 {
2135 	return mem_st(nfp_prog, meta, 1);
2136 }
2137 
2138 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2139 {
2140 	return mem_st(nfp_prog, meta, 2);
2141 }
2142 
2143 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2144 {
2145 	return mem_st(nfp_prog, meta, 4);
2146 }
2147 
2148 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2149 {
2150 	return mem_st(nfp_prog, meta, 8);
2151 }
2152 
2153 static int
2154 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2155 	     unsigned int size)
2156 {
2157 	swreg off_reg;
2158 
2159 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2160 
2161 	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2162 				   meta->insn.src_reg * 2, size);
2163 }
2164 
2165 static int
2166 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2167 	      unsigned int size, unsigned int ptr_off)
2168 {
2169 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
2170 			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
2171 			    false, wrp_lmem_store);
2172 }
2173 
2174 static int
2175 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2176 	unsigned int size)
2177 {
2178 	if (meta->ptr.type == PTR_TO_PACKET)
2179 		return mem_stx_data(nfp_prog, meta, size);
2180 
2181 	if (meta->ptr.type == PTR_TO_STACK)
2182 		return mem_stx_stack(nfp_prog, meta, size,
2183 				     meta->ptr.off + meta->ptr.var_off.value);
2184 
2185 	return -EOPNOTSUPP;
2186 }
2187 
2188 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2189 {
2190 	return mem_stx(nfp_prog, meta, 1);
2191 }
2192 
2193 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2194 {
2195 	return mem_stx(nfp_prog, meta, 2);
2196 }
2197 
2198 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2199 {
2200 	return mem_stx(nfp_prog, meta, 4);
2201 }
2202 
2203 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2204 {
2205 	return mem_stx(nfp_prog, meta, 8);
2206 }
2207 
2208 static int
2209 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2210 {
2211 	u8 dst_gpr = meta->insn.dst_reg * 2;
2212 	u8 src_gpr = meta->insn.src_reg * 2;
2213 	unsigned int full_add, out;
2214 	swreg addra, addrb, off;
2215 
2216 	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2217 
2218 	/* We can fit 16 bits into command immediate, if we know the immediate
2219 	 * is guaranteed to either always or never fit into 16 bit we only
2220 	 * generate code to handle that particular case, otherwise generate
2221 	 * code for both.
2222 	 */
2223 	out = nfp_prog_current_offset(nfp_prog);
2224 	full_add = nfp_prog_current_offset(nfp_prog);
2225 
2226 	if (meta->insn.off) {
2227 		out += 2;
2228 		full_add += 2;
2229 	}
2230 	if (meta->xadd_maybe_16bit) {
2231 		out += 3;
2232 		full_add += 3;
2233 	}
2234 	if (meta->xadd_over_16bit)
2235 		out += 2 + is64;
2236 	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2237 		out += 5;
2238 		full_add += 5;
2239 	}
2240 
2241 	/* Generate the branch for choosing add_imm vs add */
2242 	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2243 		swreg max_imm = imm_a(nfp_prog);
2244 
2245 		wrp_immed(nfp_prog, max_imm, 0xffff);
2246 		emit_alu(nfp_prog, reg_none(),
2247 			 max_imm, ALU_OP_SUB, reg_b(src_gpr));
2248 		emit_alu(nfp_prog, reg_none(),
2249 			 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
2250 		emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
2251 		/* defer for add */
2252 	}
2253 
2254 	/* If insn has an offset add to the address */
2255 	if (!meta->insn.off) {
2256 		addra = reg_a(dst_gpr);
2257 		addrb = reg_b(dst_gpr + 1);
2258 	} else {
2259 		emit_alu(nfp_prog, imma_a(nfp_prog),
2260 			 reg_a(dst_gpr), ALU_OP_ADD, off);
2261 		emit_alu(nfp_prog, imma_b(nfp_prog),
2262 			 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
2263 		addra = imma_a(nfp_prog);
2264 		addrb = imma_b(nfp_prog);
2265 	}
2266 
2267 	/* Generate the add_imm if 16 bits are possible */
2268 	if (meta->xadd_maybe_16bit) {
2269 		swreg prev_alu = imm_a(nfp_prog);
2270 
2271 		wrp_immed(nfp_prog, prev_alu,
2272 			  FIELD_PREP(CMD_OVE_DATA, 2) |
2273 			  CMD_OVE_LEN |
2274 			  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
2275 		wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
2276 		emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
2277 			       addra, addrb, 0, CMD_CTX_NO_SWAP);
2278 
2279 		if (meta->xadd_over_16bit)
2280 			emit_br(nfp_prog, BR_UNC, out, 0);
2281 	}
2282 
2283 	if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
2284 		return -EINVAL;
2285 
2286 	/* Generate the add if 16 bits are not guaranteed */
2287 	if (meta->xadd_over_16bit) {
2288 		emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
2289 			 addra, addrb, is64 << 2,
2290 			 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
2291 
2292 		wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
2293 		if (is64)
2294 			wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
2295 	}
2296 
2297 	if (!nfp_prog_confirm_current_offset(nfp_prog, out))
2298 		return -EINVAL;
2299 
2300 	return 0;
2301 }
2302 
2303 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2304 {
2305 	return mem_xadd(nfp_prog, meta, false);
2306 }
2307 
2308 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2309 {
2310 	return mem_xadd(nfp_prog, meta, true);
2311 }
2312 
2313 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2314 {
2315 	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
2316 
2317 	return 0;
2318 }
2319 
2320 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2321 {
2322 	const struct bpf_insn *insn = &meta->insn;
2323 	u64 imm = insn->imm; /* sign extend */
2324 	swreg or1, or2, tmp_reg;
2325 
2326 	or1 = reg_a(insn->dst_reg * 2);
2327 	or2 = reg_b(insn->dst_reg * 2 + 1);
2328 
2329 	if (imm & ~0U) {
2330 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2331 		emit_alu(nfp_prog, imm_a(nfp_prog),
2332 			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2333 		or1 = imm_a(nfp_prog);
2334 	}
2335 
2336 	if (imm >> 32) {
2337 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2338 		emit_alu(nfp_prog, imm_b(nfp_prog),
2339 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2340 		or2 = imm_b(nfp_prog);
2341 	}
2342 
2343 	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
2344 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2345 
2346 	return 0;
2347 }
2348 
2349 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2350 {
2351 	const struct bpf_insn *insn = &meta->insn;
2352 	u64 imm = insn->imm; /* sign extend */
2353 	swreg tmp_reg;
2354 
2355 	if (!imm) {
2356 		meta->skip = true;
2357 		return 0;
2358 	}
2359 
2360 	if (imm & ~0U) {
2361 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2362 		emit_alu(nfp_prog, reg_none(),
2363 			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
2364 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2365 	}
2366 
2367 	if (imm >> 32) {
2368 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2369 		emit_alu(nfp_prog, reg_none(),
2370 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
2371 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2372 	}
2373 
2374 	return 0;
2375 }
2376 
2377 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2378 {
2379 	const struct bpf_insn *insn = &meta->insn;
2380 	u64 imm = insn->imm; /* sign extend */
2381 	swreg tmp_reg;
2382 
2383 	if (!imm) {
2384 		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
2385 			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
2386 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2387 		return 0;
2388 	}
2389 
2390 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2391 	emit_alu(nfp_prog, reg_none(),
2392 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2393 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
2394 
2395 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2396 	emit_alu(nfp_prog, reg_none(),
2397 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2398 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
2399 
2400 	return 0;
2401 }
2402 
2403 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2404 {
2405 	const struct bpf_insn *insn = &meta->insn;
2406 
2407 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
2408 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
2409 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
2410 		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
2411 	emit_alu(nfp_prog, reg_none(),
2412 		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
2413 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2414 
2415 	return 0;
2416 }
2417 
2418 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2419 {
2420 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
2421 }
2422 
2423 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2424 {
2425 	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
2426 }
2427 
2428 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2429 {
2430 	switch (meta->insn.imm) {
2431 	case BPF_FUNC_xdp_adjust_head:
2432 		return adjust_head(nfp_prog, meta);
2433 	case BPF_FUNC_map_lookup_elem:
2434 	case BPF_FUNC_map_update_elem:
2435 	case BPF_FUNC_map_delete_elem:
2436 		return map_call_stack_common(nfp_prog, meta);
2437 	case BPF_FUNC_get_prandom_u32:
2438 		return nfp_get_prandom_u32(nfp_prog, meta);
2439 	case BPF_FUNC_perf_event_output:
2440 		return nfp_perf_event_output(nfp_prog, meta);
2441 	default:
2442 		WARN_ONCE(1, "verifier allowed unsupported function\n");
2443 		return -EOPNOTSUPP;
2444 	}
2445 }
2446 
2447 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2448 {
2449 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
2450 
2451 	return 0;
2452 }
2453 
2454 static const instr_cb_t instr_cb[256] = {
2455 	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
2456 	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
2457 	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
2458 	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
2459 	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
2460 	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
2461 	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
2462 	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
2463 	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
2464 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
2465 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
2466 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
2467 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
2468 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
2469 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
2470 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
2471 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
2472 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
2473 	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
2474 	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
2475 	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
2476 	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
2477 	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
2478 	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
2479 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
2480 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
2481 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
2482 	[BPF_ALU | BPF_NEG] =		neg_reg,
2483 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
2484 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
2485 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
2486 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
2487 	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
2488 	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
2489 	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
2490 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
2491 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
2492 	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
2493 	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
2494 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
2495 	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
2496 	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
2497 	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
2498 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
2499 	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
2500 	[BPF_STX | BPF_XADD | BPF_W] =	mem_xadd4,
2501 	[BPF_STX | BPF_XADD | BPF_DW] =	mem_xadd8,
2502 	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
2503 	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
2504 	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
2505 	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
2506 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
2507 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
2508 	[BPF_JMP | BPF_JGT | BPF_K] =	cmp_imm,
2509 	[BPF_JMP | BPF_JGE | BPF_K] =	cmp_imm,
2510 	[BPF_JMP | BPF_JLT | BPF_K] =	cmp_imm,
2511 	[BPF_JMP | BPF_JLE | BPF_K] =	cmp_imm,
2512 	[BPF_JMP | BPF_JSGT | BPF_K] =  cmp_imm,
2513 	[BPF_JMP | BPF_JSGE | BPF_K] =  cmp_imm,
2514 	[BPF_JMP | BPF_JSLT | BPF_K] =  cmp_imm,
2515 	[BPF_JMP | BPF_JSLE | BPF_K] =  cmp_imm,
2516 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
2517 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
2518 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
2519 	[BPF_JMP | BPF_JGT | BPF_X] =	cmp_reg,
2520 	[BPF_JMP | BPF_JGE | BPF_X] =	cmp_reg,
2521 	[BPF_JMP | BPF_JLT | BPF_X] =	cmp_reg,
2522 	[BPF_JMP | BPF_JLE | BPF_X] =	cmp_reg,
2523 	[BPF_JMP | BPF_JSGT | BPF_X] =  cmp_reg,
2524 	[BPF_JMP | BPF_JSGE | BPF_X] =  cmp_reg,
2525 	[BPF_JMP | BPF_JSLT | BPF_X] =  cmp_reg,
2526 	[BPF_JMP | BPF_JSLE | BPF_X] =  cmp_reg,
2527 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
2528 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
2529 	[BPF_JMP | BPF_CALL] =		call,
2530 	[BPF_JMP | BPF_EXIT] =		goto_out,
2531 };
2532 
2533 /* --- Assembler logic --- */
2534 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
2535 {
2536 	struct nfp_insn_meta *meta, *jmp_dst;
2537 	u32 idx, br_idx;
2538 
2539 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2540 		if (meta->skip)
2541 			continue;
2542 		if (meta->insn.code == (BPF_JMP | BPF_CALL))
2543 			continue;
2544 		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
2545 			continue;
2546 
2547 		if (list_is_last(&meta->l, &nfp_prog->insns))
2548 			br_idx = nfp_prog->last_bpf_off;
2549 		else
2550 			br_idx = list_next_entry(meta, l)->off - 1;
2551 
2552 		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
2553 			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
2554 			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
2555 			return -ELOOP;
2556 		}
2557 		/* Leave special branches for later */
2558 		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
2559 		    RELO_BR_REL)
2560 			continue;
2561 
2562 		if (!meta->jmp_dst) {
2563 			pr_err("Non-exit jump doesn't have destination info recorded!!\n");
2564 			return -ELOOP;
2565 		}
2566 
2567 		jmp_dst = meta->jmp_dst;
2568 
2569 		if (jmp_dst->skip) {
2570 			pr_err("Branch landing on removed instruction!!\n");
2571 			return -ELOOP;
2572 		}
2573 
2574 		for (idx = meta->off; idx <= br_idx; idx++) {
2575 			if (!nfp_is_br(nfp_prog->prog[idx]))
2576 				continue;
2577 			br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
2578 		}
2579 	}
2580 
2581 	return 0;
2582 }
2583 
2584 static void nfp_intro(struct nfp_prog *nfp_prog)
2585 {
2586 	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
2587 	emit_alu(nfp_prog, plen_reg(nfp_prog),
2588 		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
2589 }
2590 
2591 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
2592 {
2593 	/* TC direct-action mode:
2594 	 *   0,1   ok        NOT SUPPORTED[1]
2595 	 *   2   drop  0x22 -> drop,  count as stat1
2596 	 *   4,5 nuke  0x02 -> drop
2597 	 *   7  redir  0x44 -> redir, count as stat2
2598 	 *   * unspec  0x11 -> pass,  count as stat0
2599 	 *
2600 	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
2601 	 *     the exact decision made.  We are forced to support UNSPEC
2602 	 *     to handle aborts so that's the only one we handle for passing
2603 	 *     packets up the stack.
2604 	 */
2605 	/* Target for aborts */
2606 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2607 
2608 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2609 
2610 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2611 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
2612 
2613 	/* Target for normal exits */
2614 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2615 
2616 	/* if R0 > 7 jump to abort */
2617 	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
2618 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2619 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2620 
2621 	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
2622 	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
2623 
2624 	emit_shf(nfp_prog, reg_a(1),
2625 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
2626 
2627 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2628 	emit_shf(nfp_prog, reg_a(2),
2629 		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2630 
2631 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2632 	emit_shf(nfp_prog, reg_b(2),
2633 		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
2634 
2635 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2636 
2637 	emit_shf(nfp_prog, reg_b(2),
2638 		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
2639 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2640 }
2641 
2642 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
2643 {
2644 	/* XDP return codes:
2645 	 *   0 aborted  0x82 -> drop,  count as stat3
2646 	 *   1    drop  0x22 -> drop,  count as stat1
2647 	 *   2    pass  0x11 -> pass,  count as stat0
2648 	 *   3      tx  0x44 -> redir, count as stat2
2649 	 *   * unknown  0x82 -> drop,  count as stat3
2650 	 */
2651 	/* Target for aborts */
2652 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2653 
2654 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2655 
2656 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2657 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
2658 
2659 	/* Target for normal exits */
2660 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2661 
2662 	/* if R0 > 3 jump to abort */
2663 	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
2664 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2665 
2666 	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
2667 
2668 	emit_shf(nfp_prog, reg_a(1),
2669 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
2670 
2671 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2672 	emit_shf(nfp_prog, reg_b(2),
2673 		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2674 
2675 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2676 
2677 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2678 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2679 }
2680 
2681 static void nfp_outro(struct nfp_prog *nfp_prog)
2682 {
2683 	switch (nfp_prog->type) {
2684 	case BPF_PROG_TYPE_SCHED_CLS:
2685 		nfp_outro_tc_da(nfp_prog);
2686 		break;
2687 	case BPF_PROG_TYPE_XDP:
2688 		nfp_outro_xdp(nfp_prog);
2689 		break;
2690 	default:
2691 		WARN_ON(1);
2692 	}
2693 }
2694 
2695 static int nfp_translate(struct nfp_prog *nfp_prog)
2696 {
2697 	struct nfp_insn_meta *meta;
2698 	int err;
2699 
2700 	nfp_intro(nfp_prog);
2701 	if (nfp_prog->error)
2702 		return nfp_prog->error;
2703 
2704 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2705 		instr_cb_t cb = instr_cb[meta->insn.code];
2706 
2707 		meta->off = nfp_prog_current_offset(nfp_prog);
2708 
2709 		if (meta->skip) {
2710 			nfp_prog->n_translated++;
2711 			continue;
2712 		}
2713 
2714 		if (nfp_meta_has_prev(nfp_prog, meta) &&
2715 		    nfp_meta_prev(meta)->double_cb)
2716 			cb = nfp_meta_prev(meta)->double_cb;
2717 		if (!cb)
2718 			return -ENOENT;
2719 		err = cb(nfp_prog, meta);
2720 		if (err)
2721 			return err;
2722 		if (nfp_prog->error)
2723 			return nfp_prog->error;
2724 
2725 		nfp_prog->n_translated++;
2726 	}
2727 
2728 	nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
2729 
2730 	nfp_outro(nfp_prog);
2731 	if (nfp_prog->error)
2732 		return nfp_prog->error;
2733 
2734 	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
2735 	if (nfp_prog->error)
2736 		return nfp_prog->error;
2737 
2738 	return nfp_fixup_branches(nfp_prog);
2739 }
2740 
2741 /* --- Optimizations --- */
2742 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
2743 {
2744 	struct nfp_insn_meta *meta;
2745 
2746 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2747 		struct bpf_insn insn = meta->insn;
2748 
2749 		/* Programs converted from cBPF start with register xoring */
2750 		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
2751 		    insn.src_reg == insn.dst_reg)
2752 			continue;
2753 
2754 		/* Programs start with R6 = R1 but we ignore the skb pointer */
2755 		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
2756 		    insn.src_reg == 1 && insn.dst_reg == 6)
2757 			meta->skip = true;
2758 
2759 		/* Return as soon as something doesn't match */
2760 		if (!meta->skip)
2761 			return;
2762 	}
2763 }
2764 
2765 /* abs(insn.imm) will fit better into unrestricted reg immediate -
2766  * convert add/sub of a negative number into a sub/add of a positive one.
2767  */
2768 static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
2769 {
2770 	struct nfp_insn_meta *meta;
2771 
2772 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2773 		struct bpf_insn insn = meta->insn;
2774 
2775 		if (meta->skip)
2776 			continue;
2777 
2778 		if (BPF_CLASS(insn.code) != BPF_ALU &&
2779 		    BPF_CLASS(insn.code) != BPF_ALU64 &&
2780 		    BPF_CLASS(insn.code) != BPF_JMP)
2781 			continue;
2782 		if (BPF_SRC(insn.code) != BPF_K)
2783 			continue;
2784 		if (insn.imm >= 0)
2785 			continue;
2786 
2787 		if (BPF_CLASS(insn.code) == BPF_JMP) {
2788 			switch (BPF_OP(insn.code)) {
2789 			case BPF_JGE:
2790 			case BPF_JSGE:
2791 			case BPF_JLT:
2792 			case BPF_JSLT:
2793 				meta->jump_neg_op = true;
2794 				break;
2795 			default:
2796 				continue;
2797 			}
2798 		} else {
2799 			if (BPF_OP(insn.code) == BPF_ADD)
2800 				insn.code = BPF_CLASS(insn.code) | BPF_SUB;
2801 			else if (BPF_OP(insn.code) == BPF_SUB)
2802 				insn.code = BPF_CLASS(insn.code) | BPF_ADD;
2803 			else
2804 				continue;
2805 
2806 			meta->insn.code = insn.code | BPF_K;
2807 		}
2808 
2809 		meta->insn.imm = -insn.imm;
2810 	}
2811 }
2812 
2813 /* Remove masking after load since our load guarantees this is not needed */
2814 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
2815 {
2816 	struct nfp_insn_meta *meta1, *meta2;
2817 	const s32 exp_mask[] = {
2818 		[BPF_B] = 0x000000ffU,
2819 		[BPF_H] = 0x0000ffffU,
2820 		[BPF_W] = 0xffffffffU,
2821 	};
2822 
2823 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2824 		struct bpf_insn insn, next;
2825 
2826 		insn = meta1->insn;
2827 		next = meta2->insn;
2828 
2829 		if (BPF_CLASS(insn.code) != BPF_LD)
2830 			continue;
2831 		if (BPF_MODE(insn.code) != BPF_ABS &&
2832 		    BPF_MODE(insn.code) != BPF_IND)
2833 			continue;
2834 
2835 		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
2836 			continue;
2837 
2838 		if (!exp_mask[BPF_SIZE(insn.code)])
2839 			continue;
2840 		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
2841 			continue;
2842 
2843 		if (next.src_reg || next.dst_reg)
2844 			continue;
2845 
2846 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
2847 			continue;
2848 
2849 		meta2->skip = true;
2850 	}
2851 }
2852 
2853 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2854 {
2855 	struct nfp_insn_meta *meta1, *meta2, *meta3;
2856 
2857 	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
2858 		struct bpf_insn insn, next1, next2;
2859 
2860 		insn = meta1->insn;
2861 		next1 = meta2->insn;
2862 		next2 = meta3->insn;
2863 
2864 		if (BPF_CLASS(insn.code) != BPF_LD)
2865 			continue;
2866 		if (BPF_MODE(insn.code) != BPF_ABS &&
2867 		    BPF_MODE(insn.code) != BPF_IND)
2868 			continue;
2869 		if (BPF_SIZE(insn.code) != BPF_W)
2870 			continue;
2871 
2872 		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
2873 		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
2874 		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
2875 		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
2876 			continue;
2877 
2878 		if (next1.src_reg || next1.dst_reg ||
2879 		    next2.src_reg || next2.dst_reg)
2880 			continue;
2881 
2882 		if (next1.imm != 0x20 || next2.imm != 0x20)
2883 			continue;
2884 
2885 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
2886 		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
2887 			continue;
2888 
2889 		meta2->skip = true;
2890 		meta3->skip = true;
2891 	}
2892 }
2893 
2894 /* load/store pair that forms memory copy sould look like the following:
2895  *
2896  *   ld_width R, [addr_src + offset_src]
2897  *   st_width [addr_dest + offset_dest], R
2898  *
2899  * The destination register of load and source register of store should
2900  * be the same, load and store should also perform at the same width.
2901  * If either of addr_src or addr_dest is stack pointer, we don't do the
2902  * CPP optimization as stack is modelled by registers on NFP.
2903  */
2904 static bool
2905 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
2906 		    struct nfp_insn_meta *st_meta)
2907 {
2908 	struct bpf_insn *ld = &ld_meta->insn;
2909 	struct bpf_insn *st = &st_meta->insn;
2910 
2911 	if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
2912 		return false;
2913 
2914 	if (ld_meta->ptr.type != PTR_TO_PACKET)
2915 		return false;
2916 
2917 	if (st_meta->ptr.type != PTR_TO_PACKET)
2918 		return false;
2919 
2920 	if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
2921 		return false;
2922 
2923 	if (ld->dst_reg != st->src_reg)
2924 		return false;
2925 
2926 	/* There is jump to the store insn in this pair. */
2927 	if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
2928 		return false;
2929 
2930 	return true;
2931 }
2932 
2933 /* Currently, we only support chaining load/store pairs if:
2934  *
2935  *  - Their address base registers are the same.
2936  *  - Their address offsets are in the same order.
2937  *  - They operate at the same memory width.
2938  *  - There is no jump into the middle of them.
2939  */
2940 static bool
2941 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
2942 			      struct nfp_insn_meta *st_meta,
2943 			      struct bpf_insn *prev_ld,
2944 			      struct bpf_insn *prev_st)
2945 {
2946 	u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
2947 	struct bpf_insn *ld = &ld_meta->insn;
2948 	struct bpf_insn *st = &st_meta->insn;
2949 	s16 prev_ld_off, prev_st_off;
2950 
2951 	/* This pair is the start pair. */
2952 	if (!prev_ld)
2953 		return true;
2954 
2955 	prev_size = BPF_LDST_BYTES(prev_ld);
2956 	curr_size = BPF_LDST_BYTES(ld);
2957 	prev_ld_base = prev_ld->src_reg;
2958 	prev_st_base = prev_st->dst_reg;
2959 	prev_ld_dst = prev_ld->dst_reg;
2960 	prev_ld_off = prev_ld->off;
2961 	prev_st_off = prev_st->off;
2962 
2963 	if (ld->dst_reg != prev_ld_dst)
2964 		return false;
2965 
2966 	if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
2967 		return false;
2968 
2969 	if (curr_size != prev_size)
2970 		return false;
2971 
2972 	/* There is jump to the head of this pair. */
2973 	if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
2974 		return false;
2975 
2976 	/* Both in ascending order. */
2977 	if (prev_ld_off + prev_size == ld->off &&
2978 	    prev_st_off + prev_size == st->off)
2979 		return true;
2980 
2981 	/* Both in descending order. */
2982 	if (ld->off + curr_size == prev_ld_off &&
2983 	    st->off + curr_size == prev_st_off)
2984 		return true;
2985 
2986 	return false;
2987 }
2988 
2989 /* Return TRUE if cross memory access happens. Cross memory access means
2990  * store area is overlapping with load area that a later load might load
2991  * the value from previous store, for this case we can't treat the sequence
2992  * as an memory copy.
2993  */
2994 static bool
2995 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
2996 		 struct nfp_insn_meta *head_st_meta)
2997 {
2998 	s16 head_ld_off, head_st_off, ld_off;
2999 
3000 	/* Different pointer types does not overlap. */
3001 	if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
3002 		return false;
3003 
3004 	/* load and store are both PTR_TO_PACKET, check ID info.  */
3005 	if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
3006 		return true;
3007 
3008 	/* Canonicalize the offsets. Turn all of them against the original
3009 	 * base register.
3010 	 */
3011 	head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
3012 	head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
3013 	ld_off = ld->off + head_ld_meta->ptr.off;
3014 
3015 	/* Ascending order cross. */
3016 	if (ld_off > head_ld_off &&
3017 	    head_ld_off < head_st_off && ld_off >= head_st_off)
3018 		return true;
3019 
3020 	/* Descending order cross. */
3021 	if (ld_off < head_ld_off &&
3022 	    head_ld_off > head_st_off && ld_off <= head_st_off)
3023 		return true;
3024 
3025 	return false;
3026 }
3027 
3028 /* This pass try to identify the following instructoin sequences.
3029  *
3030  *   load R, [regA + offA]
3031  *   store [regB + offB], R
3032  *   load R, [regA + offA + const_imm_A]
3033  *   store [regB + offB + const_imm_A], R
3034  *   load R, [regA + offA + 2 * const_imm_A]
3035  *   store [regB + offB + 2 * const_imm_A], R
3036  *   ...
3037  *
3038  * Above sequence is typically generated by compiler when lowering
3039  * memcpy. NFP prefer using CPP instructions to accelerate it.
3040  */
3041 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
3042 {
3043 	struct nfp_insn_meta *head_ld_meta = NULL;
3044 	struct nfp_insn_meta *head_st_meta = NULL;
3045 	struct nfp_insn_meta *meta1, *meta2;
3046 	struct bpf_insn *prev_ld = NULL;
3047 	struct bpf_insn *prev_st = NULL;
3048 	u8 count = 0;
3049 
3050 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3051 		struct bpf_insn *ld = &meta1->insn;
3052 		struct bpf_insn *st = &meta2->insn;
3053 
3054 		/* Reset record status if any of the following if true:
3055 		 *   - The current insn pair is not load/store.
3056 		 *   - The load/store pair doesn't chain with previous one.
3057 		 *   - The chained load/store pair crossed with previous pair.
3058 		 *   - The chained load/store pair has a total size of memory
3059 		 *     copy beyond 128 bytes which is the maximum length a
3060 		 *     single NFP CPP command can transfer.
3061 		 */
3062 		if (!curr_pair_is_memcpy(meta1, meta2) ||
3063 		    !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
3064 						   prev_st) ||
3065 		    (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
3066 						       head_st_meta) ||
3067 				      head_ld_meta->ldst_gather_len >= 128))) {
3068 			if (!count)
3069 				continue;
3070 
3071 			if (count > 1) {
3072 				s16 prev_ld_off = prev_ld->off;
3073 				s16 prev_st_off = prev_st->off;
3074 				s16 head_ld_off = head_ld_meta->insn.off;
3075 
3076 				if (prev_ld_off < head_ld_off) {
3077 					head_ld_meta->insn.off = prev_ld_off;
3078 					head_st_meta->insn.off = prev_st_off;
3079 					head_ld_meta->ldst_gather_len =
3080 						-head_ld_meta->ldst_gather_len;
3081 				}
3082 
3083 				head_ld_meta->paired_st = &head_st_meta->insn;
3084 				head_st_meta->skip = true;
3085 			} else {
3086 				head_ld_meta->ldst_gather_len = 0;
3087 			}
3088 
3089 			/* If the chain is ended by an load/store pair then this
3090 			 * could serve as the new head of the the next chain.
3091 			 */
3092 			if (curr_pair_is_memcpy(meta1, meta2)) {
3093 				head_ld_meta = meta1;
3094 				head_st_meta = meta2;
3095 				head_ld_meta->ldst_gather_len =
3096 					BPF_LDST_BYTES(ld);
3097 				meta1 = nfp_meta_next(meta1);
3098 				meta2 = nfp_meta_next(meta2);
3099 				prev_ld = ld;
3100 				prev_st = st;
3101 				count = 1;
3102 			} else {
3103 				head_ld_meta = NULL;
3104 				head_st_meta = NULL;
3105 				prev_ld = NULL;
3106 				prev_st = NULL;
3107 				count = 0;
3108 			}
3109 
3110 			continue;
3111 		}
3112 
3113 		if (!head_ld_meta) {
3114 			head_ld_meta = meta1;
3115 			head_st_meta = meta2;
3116 		} else {
3117 			meta1->skip = true;
3118 			meta2->skip = true;
3119 		}
3120 
3121 		head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
3122 		meta1 = nfp_meta_next(meta1);
3123 		meta2 = nfp_meta_next(meta2);
3124 		prev_ld = ld;
3125 		prev_st = st;
3126 		count++;
3127 	}
3128 }
3129 
3130 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
3131 {
3132 	struct nfp_insn_meta *meta, *range_node = NULL;
3133 	s16 range_start = 0, range_end = 0;
3134 	bool cache_avail = false;
3135 	struct bpf_insn *insn;
3136 	s32 range_ptr_off = 0;
3137 	u32 range_ptr_id = 0;
3138 
3139 	list_for_each_entry(meta, &nfp_prog->insns, l) {
3140 		if (meta->flags & FLAG_INSN_IS_JUMP_DST)
3141 			cache_avail = false;
3142 
3143 		if (meta->skip)
3144 			continue;
3145 
3146 		insn = &meta->insn;
3147 
3148 		if (is_mbpf_store_pkt(meta) ||
3149 		    insn->code == (BPF_JMP | BPF_CALL) ||
3150 		    is_mbpf_classic_store_pkt(meta) ||
3151 		    is_mbpf_classic_load(meta)) {
3152 			cache_avail = false;
3153 			continue;
3154 		}
3155 
3156 		if (!is_mbpf_load(meta))
3157 			continue;
3158 
3159 		if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
3160 			cache_avail = false;
3161 			continue;
3162 		}
3163 
3164 		if (!cache_avail) {
3165 			cache_avail = true;
3166 			if (range_node)
3167 				goto end_current_then_start_new;
3168 			goto start_new;
3169 		}
3170 
3171 		/* Check ID to make sure two reads share the same
3172 		 * variable offset against PTR_TO_PACKET, and check OFF
3173 		 * to make sure they also share the same constant
3174 		 * offset.
3175 		 *
3176 		 * OFFs don't really need to be the same, because they
3177 		 * are the constant offsets against PTR_TO_PACKET, so
3178 		 * for different OFFs, we could canonicalize them to
3179 		 * offsets against original packet pointer. We don't
3180 		 * support this.
3181 		 */
3182 		if (meta->ptr.id == range_ptr_id &&
3183 		    meta->ptr.off == range_ptr_off) {
3184 			s16 new_start = range_start;
3185 			s16 end, off = insn->off;
3186 			s16 new_end = range_end;
3187 			bool changed = false;
3188 
3189 			if (off < range_start) {
3190 				new_start = off;
3191 				changed = true;
3192 			}
3193 
3194 			end = off + BPF_LDST_BYTES(insn);
3195 			if (end > range_end) {
3196 				new_end = end;
3197 				changed = true;
3198 			}
3199 
3200 			if (!changed)
3201 				continue;
3202 
3203 			if (new_end - new_start <= 64) {
3204 				/* Install new range. */
3205 				range_start = new_start;
3206 				range_end = new_end;
3207 				continue;
3208 			}
3209 		}
3210 
3211 end_current_then_start_new:
3212 		range_node->pkt_cache.range_start = range_start;
3213 		range_node->pkt_cache.range_end = range_end;
3214 start_new:
3215 		range_node = meta;
3216 		range_node->pkt_cache.do_init = true;
3217 		range_ptr_id = range_node->ptr.id;
3218 		range_ptr_off = range_node->ptr.off;
3219 		range_start = insn->off;
3220 		range_end = insn->off + BPF_LDST_BYTES(insn);
3221 	}
3222 
3223 	if (range_node) {
3224 		range_node->pkt_cache.range_start = range_start;
3225 		range_node->pkt_cache.range_end = range_end;
3226 	}
3227 
3228 	list_for_each_entry(meta, &nfp_prog->insns, l) {
3229 		if (meta->skip)
3230 			continue;
3231 
3232 		if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
3233 			if (meta->pkt_cache.do_init) {
3234 				range_start = meta->pkt_cache.range_start;
3235 				range_end = meta->pkt_cache.range_end;
3236 			} else {
3237 				meta->pkt_cache.range_start = range_start;
3238 				meta->pkt_cache.range_end = range_end;
3239 			}
3240 		}
3241 	}
3242 }
3243 
3244 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
3245 {
3246 	nfp_bpf_opt_reg_init(nfp_prog);
3247 
3248 	nfp_bpf_opt_neg_add_sub(nfp_prog);
3249 	nfp_bpf_opt_ld_mask(nfp_prog);
3250 	nfp_bpf_opt_ld_shift(nfp_prog);
3251 	nfp_bpf_opt_ldst_gather(nfp_prog);
3252 	nfp_bpf_opt_pkt_cache(nfp_prog);
3253 
3254 	return 0;
3255 }
3256 
3257 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
3258 {
3259 	__le64 *ustore = (__force __le64 *)prog;
3260 	int i;
3261 
3262 	for (i = 0; i < len; i++) {
3263 		int err;
3264 
3265 		err = nfp_ustore_check_valid_no_ecc(prog[i]);
3266 		if (err)
3267 			return err;
3268 
3269 		ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
3270 	}
3271 
3272 	return 0;
3273 }
3274 
3275 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
3276 {
3277 	void *prog;
3278 
3279 	prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
3280 	if (!prog)
3281 		return;
3282 
3283 	nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
3284 	memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
3285 	kvfree(nfp_prog->prog);
3286 	nfp_prog->prog = prog;
3287 }
3288 
3289 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
3290 {
3291 	int ret;
3292 
3293 	ret = nfp_bpf_optimize(nfp_prog);
3294 	if (ret)
3295 		return ret;
3296 
3297 	ret = nfp_translate(nfp_prog);
3298 	if (ret) {
3299 		pr_err("Translation failed with error %d (translated: %u)\n",
3300 		       ret, nfp_prog->n_translated);
3301 		return -EINVAL;
3302 	}
3303 
3304 	nfp_bpf_prog_trim(nfp_prog);
3305 
3306 	return ret;
3307 }
3308 
3309 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
3310 {
3311 	struct nfp_insn_meta *meta;
3312 
3313 	/* Another pass to record jump information. */
3314 	list_for_each_entry(meta, &nfp_prog->insns, l) {
3315 		u64 code = meta->insn.code;
3316 
3317 		if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
3318 		    BPF_OP(code) != BPF_CALL) {
3319 			struct nfp_insn_meta *dst_meta;
3320 			unsigned short dst_indx;
3321 
3322 			dst_indx = meta->n + 1 + meta->insn.off;
3323 			dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
3324 						     cnt);
3325 
3326 			meta->jmp_dst = dst_meta;
3327 			dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
3328 		}
3329 	}
3330 }
3331 
3332 bool nfp_bpf_supported_opcode(u8 code)
3333 {
3334 	return !!instr_cb[code];
3335 }
3336 
3337 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
3338 {
3339 	unsigned int i;
3340 	u64 *prog;
3341 	int err;
3342 
3343 	prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
3344 		       GFP_KERNEL);
3345 	if (!prog)
3346 		return ERR_PTR(-ENOMEM);
3347 
3348 	for (i = 0; i < nfp_prog->prog_len; i++) {
3349 		enum nfp_relo_type special;
3350 		u32 val;
3351 
3352 		special = FIELD_GET(OP_RELO_TYPE, prog[i]);
3353 		switch (special) {
3354 		case RELO_NONE:
3355 			continue;
3356 		case RELO_BR_REL:
3357 			br_add_offset(&prog[i], bv->start_off);
3358 			break;
3359 		case RELO_BR_GO_OUT:
3360 			br_set_offset(&prog[i],
3361 				      nfp_prog->tgt_out + bv->start_off);
3362 			break;
3363 		case RELO_BR_GO_ABORT:
3364 			br_set_offset(&prog[i],
3365 				      nfp_prog->tgt_abort + bv->start_off);
3366 			break;
3367 		case RELO_BR_NEXT_PKT:
3368 			br_set_offset(&prog[i], bv->tgt_done);
3369 			break;
3370 		case RELO_BR_HELPER:
3371 			val = br_get_offset(prog[i]);
3372 			val -= BR_OFF_RELO;
3373 			switch (val) {
3374 			case BPF_FUNC_map_lookup_elem:
3375 				val = nfp_prog->bpf->helpers.map_lookup;
3376 				break;
3377 			case BPF_FUNC_map_update_elem:
3378 				val = nfp_prog->bpf->helpers.map_update;
3379 				break;
3380 			case BPF_FUNC_map_delete_elem:
3381 				val = nfp_prog->bpf->helpers.map_delete;
3382 				break;
3383 			case BPF_FUNC_perf_event_output:
3384 				val = nfp_prog->bpf->helpers.perf_event_output;
3385 				break;
3386 			default:
3387 				pr_err("relocation of unknown helper %d\n",
3388 				       val);
3389 				err = -EINVAL;
3390 				goto err_free_prog;
3391 			}
3392 			br_set_offset(&prog[i], val);
3393 			break;
3394 		case RELO_IMMED_REL:
3395 			immed_add_value(&prog[i], bv->start_off);
3396 			break;
3397 		}
3398 
3399 		prog[i] &= ~OP_RELO_TYPE;
3400 	}
3401 
3402 	err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
3403 	if (err)
3404 		goto err_free_prog;
3405 
3406 	return prog;
3407 
3408 err_free_prog:
3409 	kfree(prog);
3410 	return ERR_PTR(err);
3411 }
3412