1 /*
2  * Copyright (C) 2016-2017 Netronome Systems, Inc.
3  *
4  * This software is dual licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree or the BSD 2-Clause License provided below.  You have the
7  * option to license this software under the complete terms of either license.
8  *
9  * The BSD 2-Clause License:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      1. Redistributions of source code must retain the above
16  *         copyright notice, this list of conditions and the following
17  *         disclaimer.
18  *
19  *      2. Redistributions in binary form must reproduce the above
20  *         copyright notice, this list of conditions and the following
21  *         disclaimer in the documentation and/or other materials
22  *         provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt)	"NFP net bpf: " fmt
35 
36 #include <linux/bug.h>
37 #include <linux/kernel.h>
38 #include <linux/bpf.h>
39 #include <linux/filter.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/unistd.h>
42 
43 #include "main.h"
44 #include "../nfp_asm.h"
45 
46 /* --- NFP prog --- */
47 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
48  * It's safe to modify the next pointers (but not pos).
49  */
50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
51 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
52 	     next = list_next_entry(pos, l);			\
53 	     &(nfp_prog)->insns != &pos->l &&			\
54 	     &(nfp_prog)->insns != &next->l;			\
55 	     pos = nfp_meta_next(pos),				\
56 	     next = nfp_meta_next(pos))
57 
58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
59 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
60 	     next = list_next_entry(pos, l),			\
61 	     next2 = list_next_entry(next, l);			\
62 	     &(nfp_prog)->insns != &pos->l &&			\
63 	     &(nfp_prog)->insns != &next->l &&			\
64 	     &(nfp_prog)->insns != &next2->l;			\
65 	     pos = nfp_meta_next(pos),				\
66 	     next = nfp_meta_next(pos),				\
67 	     next2 = nfp_meta_next(next))
68 
69 static bool
70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
71 {
72 	return meta->l.prev != &nfp_prog->insns;
73 }
74 
75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
76 {
77 	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
78 		nfp_prog->error = -ENOSPC;
79 		return;
80 	}
81 
82 	nfp_prog->prog[nfp_prog->prog_len] = insn;
83 	nfp_prog->prog_len++;
84 }
85 
86 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
87 {
88 	return nfp_prog->prog_len;
89 }
90 
91 static bool
92 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
93 {
94 	/* If there is a recorded error we may have dropped instructions;
95 	 * that doesn't have to be due to translator bug, and the translation
96 	 * will fail anyway, so just return OK.
97 	 */
98 	if (nfp_prog->error)
99 		return true;
100 	return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
101 }
102 
103 /* --- Emitters --- */
104 static void
105 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
106 	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
107 	   bool indir)
108 {
109 	u64 insn;
110 
111 	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
112 		FIELD_PREP(OP_CMD_CTX, ctx) |
113 		FIELD_PREP(OP_CMD_B_SRC, breg) |
114 		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
115 		FIELD_PREP(OP_CMD_XFER, xfer) |
116 		FIELD_PREP(OP_CMD_CNT, size) |
117 		FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
118 		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
119 		FIELD_PREP(OP_CMD_INDIR, indir) |
120 		FIELD_PREP(OP_CMD_MODE, mode);
121 
122 	nfp_prog_push(nfp_prog, insn);
123 }
124 
125 static void
126 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
127 	     swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
128 {
129 	struct nfp_insn_re_regs reg;
130 	int err;
131 
132 	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
133 	if (err) {
134 		nfp_prog->error = err;
135 		return;
136 	}
137 	if (reg.swap) {
138 		pr_err("cmd can't swap arguments\n");
139 		nfp_prog->error = -EFAULT;
140 		return;
141 	}
142 	if (reg.dst_lmextn || reg.src_lmextn) {
143 		pr_err("cmd can't use LMextn\n");
144 		nfp_prog->error = -EFAULT;
145 		return;
146 	}
147 
148 	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
149 		   indir);
150 }
151 
152 static void
153 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
154 	 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
155 {
156 	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
157 }
158 
159 static void
160 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
161 	       swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
162 {
163 	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
164 }
165 
166 static void
167 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
168 	  enum br_ctx_signal_state css, u16 addr, u8 defer)
169 {
170 	u16 addr_lo, addr_hi;
171 	u64 insn;
172 
173 	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
174 	addr_hi = addr != addr_lo;
175 
176 	insn = OP_BR_BASE |
177 		FIELD_PREP(OP_BR_MASK, mask) |
178 		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
179 		FIELD_PREP(OP_BR_CSS, css) |
180 		FIELD_PREP(OP_BR_DEFBR, defer) |
181 		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
182 		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
183 
184 	nfp_prog_push(nfp_prog, insn);
185 }
186 
187 static void
188 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
189 	     enum nfp_relo_type relo)
190 {
191 	if (mask == BR_UNC && defer > 2) {
192 		pr_err("BUG: branch defer out of bounds %d\n", defer);
193 		nfp_prog->error = -EFAULT;
194 		return;
195 	}
196 
197 	__emit_br(nfp_prog, mask,
198 		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
199 		  BR_CSS_NONE, addr, defer);
200 
201 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
202 		FIELD_PREP(OP_RELO_TYPE, relo);
203 }
204 
205 static void
206 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
207 {
208 	emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
209 }
210 
211 static void
212 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
213 	     enum immed_width width, bool invert,
214 	     enum immed_shift shift, bool wr_both,
215 	     bool dst_lmextn, bool src_lmextn)
216 {
217 	u64 insn;
218 
219 	insn = OP_IMMED_BASE |
220 		FIELD_PREP(OP_IMMED_A_SRC, areg) |
221 		FIELD_PREP(OP_IMMED_B_SRC, breg) |
222 		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
223 		FIELD_PREP(OP_IMMED_WIDTH, width) |
224 		FIELD_PREP(OP_IMMED_INV, invert) |
225 		FIELD_PREP(OP_IMMED_SHIFT, shift) |
226 		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
227 		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
228 		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
229 
230 	nfp_prog_push(nfp_prog, insn);
231 }
232 
233 static void
234 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
235 	   enum immed_width width, bool invert, enum immed_shift shift)
236 {
237 	struct nfp_insn_ur_regs reg;
238 	int err;
239 
240 	if (swreg_type(dst) == NN_REG_IMM) {
241 		nfp_prog->error = -EFAULT;
242 		return;
243 	}
244 
245 	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
246 	if (err) {
247 		nfp_prog->error = err;
248 		return;
249 	}
250 
251 	/* Use reg.dst when destination is No-Dest. */
252 	__emit_immed(nfp_prog,
253 		     swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
254 		     reg.breg, imm >> 8, width, invert, shift,
255 		     reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
256 }
257 
258 static void
259 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
260 	   enum shf_sc sc, u8 shift,
261 	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
262 	   bool dst_lmextn, bool src_lmextn)
263 {
264 	u64 insn;
265 
266 	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
267 		nfp_prog->error = -EFAULT;
268 		return;
269 	}
270 
271 	if (sc == SHF_SC_L_SHF)
272 		shift = 32 - shift;
273 
274 	insn = OP_SHF_BASE |
275 		FIELD_PREP(OP_SHF_A_SRC, areg) |
276 		FIELD_PREP(OP_SHF_SC, sc) |
277 		FIELD_PREP(OP_SHF_B_SRC, breg) |
278 		FIELD_PREP(OP_SHF_I8, i8) |
279 		FIELD_PREP(OP_SHF_SW, sw) |
280 		FIELD_PREP(OP_SHF_DST, dst) |
281 		FIELD_PREP(OP_SHF_SHIFT, shift) |
282 		FIELD_PREP(OP_SHF_OP, op) |
283 		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
284 		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
285 		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
286 		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
287 
288 	nfp_prog_push(nfp_prog, insn);
289 }
290 
291 static void
292 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
293 	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
294 {
295 	struct nfp_insn_re_regs reg;
296 	int err;
297 
298 	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
299 	if (err) {
300 		nfp_prog->error = err;
301 		return;
302 	}
303 
304 	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
305 		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
306 		   reg.dst_lmextn, reg.src_lmextn);
307 }
308 
309 static void
310 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
311 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
312 	   bool dst_lmextn, bool src_lmextn)
313 {
314 	u64 insn;
315 
316 	insn = OP_ALU_BASE |
317 		FIELD_PREP(OP_ALU_A_SRC, areg) |
318 		FIELD_PREP(OP_ALU_B_SRC, breg) |
319 		FIELD_PREP(OP_ALU_DST, dst) |
320 		FIELD_PREP(OP_ALU_SW, swap) |
321 		FIELD_PREP(OP_ALU_OP, op) |
322 		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
323 		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
324 		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
325 		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
326 
327 	nfp_prog_push(nfp_prog, insn);
328 }
329 
330 static void
331 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
332 	 swreg lreg, enum alu_op op, swreg rreg)
333 {
334 	struct nfp_insn_ur_regs reg;
335 	int err;
336 
337 	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
338 	if (err) {
339 		nfp_prog->error = err;
340 		return;
341 	}
342 
343 	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
344 		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
345 		   reg.dst_lmextn, reg.src_lmextn);
346 }
347 
348 static void
349 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
350 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
351 		bool zero, bool swap, bool wr_both,
352 		bool dst_lmextn, bool src_lmextn)
353 {
354 	u64 insn;
355 
356 	insn = OP_LDF_BASE |
357 		FIELD_PREP(OP_LDF_A_SRC, areg) |
358 		FIELD_PREP(OP_LDF_SC, sc) |
359 		FIELD_PREP(OP_LDF_B_SRC, breg) |
360 		FIELD_PREP(OP_LDF_I8, imm8) |
361 		FIELD_PREP(OP_LDF_SW, swap) |
362 		FIELD_PREP(OP_LDF_ZF, zero) |
363 		FIELD_PREP(OP_LDF_BMASK, bmask) |
364 		FIELD_PREP(OP_LDF_SHF, shift) |
365 		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
366 		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
367 		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
368 
369 	nfp_prog_push(nfp_prog, insn);
370 }
371 
372 static void
373 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
374 		  enum shf_sc sc, u8 shift, bool zero)
375 {
376 	struct nfp_insn_re_regs reg;
377 	int err;
378 
379 	/* Note: ld_field is special as it uses one of the src regs as dst */
380 	err = swreg_to_restricted(dst, dst, src, &reg, true);
381 	if (err) {
382 		nfp_prog->error = err;
383 		return;
384 	}
385 
386 	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
387 			reg.i8, zero, reg.swap, reg.wr_both,
388 			reg.dst_lmextn, reg.src_lmextn);
389 }
390 
391 static void
392 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
393 	      enum shf_sc sc, u8 shift)
394 {
395 	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
396 }
397 
398 static void
399 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
400 	    bool dst_lmextn, bool src_lmextn)
401 {
402 	u64 insn;
403 
404 	insn = OP_LCSR_BASE |
405 		FIELD_PREP(OP_LCSR_A_SRC, areg) |
406 		FIELD_PREP(OP_LCSR_B_SRC, breg) |
407 		FIELD_PREP(OP_LCSR_WRITE, wr) |
408 		FIELD_PREP(OP_LCSR_ADDR, addr) |
409 		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
410 		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
411 
412 	nfp_prog_push(nfp_prog, insn);
413 }
414 
415 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
416 {
417 	struct nfp_insn_ur_regs reg;
418 	int err;
419 
420 	/* This instruction takes immeds instead of reg_none() for the ignored
421 	 * operand, but we can't encode 2 immeds in one instr with our normal
422 	 * swreg infra so if param is an immed, we encode as reg_none() and
423 	 * copy the immed to both operands.
424 	 */
425 	if (swreg_type(src) == NN_REG_IMM) {
426 		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
427 		reg.breg = reg.areg;
428 	} else {
429 		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
430 	}
431 	if (err) {
432 		nfp_prog->error = err;
433 		return;
434 	}
435 
436 	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
437 		    false, reg.src_lmextn);
438 }
439 
440 static void emit_nop(struct nfp_prog *nfp_prog)
441 {
442 	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
443 }
444 
445 /* --- Wrappers --- */
446 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
447 {
448 	if (!(imm & 0xffff0000)) {
449 		*val = imm;
450 		*shift = IMMED_SHIFT_0B;
451 	} else if (!(imm & 0xff0000ff)) {
452 		*val = imm >> 8;
453 		*shift = IMMED_SHIFT_1B;
454 	} else if (!(imm & 0x0000ffff)) {
455 		*val = imm >> 16;
456 		*shift = IMMED_SHIFT_2B;
457 	} else {
458 		return false;
459 	}
460 
461 	return true;
462 }
463 
464 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
465 {
466 	enum immed_shift shift;
467 	u16 val;
468 
469 	if (pack_immed(imm, &val, &shift)) {
470 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
471 	} else if (pack_immed(~imm, &val, &shift)) {
472 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
473 	} else {
474 		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
475 			   false, IMMED_SHIFT_0B);
476 		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
477 			   false, IMMED_SHIFT_2B);
478 	}
479 }
480 
481 static void
482 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
483 	       enum nfp_relo_type relo)
484 {
485 	if (imm > 0xffff) {
486 		pr_err("relocation of a large immediate!\n");
487 		nfp_prog->error = -EFAULT;
488 		return;
489 	}
490 	emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
491 
492 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
493 		FIELD_PREP(OP_RELO_TYPE, relo);
494 }
495 
496 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
497  * If the @imm is small enough encode it directly in operand and return
498  * otherwise load @imm to a spare register and return its encoding.
499  */
500 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
501 {
502 	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
503 		return reg_imm(imm);
504 
505 	wrp_immed(nfp_prog, tmp_reg, imm);
506 	return tmp_reg;
507 }
508 
509 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
510  * If the @imm is small enough encode it directly in operand and return
511  * otherwise load @imm to a spare register and return its encoding.
512  */
513 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
514 {
515 	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
516 		return reg_imm(imm);
517 
518 	wrp_immed(nfp_prog, tmp_reg, imm);
519 	return tmp_reg;
520 }
521 
522 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
523 {
524 	while (count--)
525 		emit_nop(nfp_prog);
526 }
527 
528 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
529 {
530 	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
531 }
532 
533 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
534 {
535 	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
536 }
537 
538 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
539  * result to @dst from low end.
540  */
541 static void
542 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
543 		u8 offset)
544 {
545 	enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
546 	u8 mask = (1 << field_len) - 1;
547 
548 	emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
549 }
550 
551 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
552  * result to @dst from offset, there is no change on the other bits of @dst.
553  */
554 static void
555 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
556 		   u8 field_len, u8 offset)
557 {
558 	enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
559 	u8 mask = ((1 << field_len) - 1) << offset;
560 
561 	emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
562 }
563 
564 static void
565 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
566 	      swreg *rega, swreg *regb)
567 {
568 	if (offset == reg_imm(0)) {
569 		*rega = reg_a(src_gpr);
570 		*regb = reg_b(src_gpr + 1);
571 		return;
572 	}
573 
574 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
575 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
576 		 reg_imm(0));
577 	*rega = imm_a(nfp_prog);
578 	*regb = imm_b(nfp_prog);
579 }
580 
581 /* NFP has Command Push Pull bus which supports bluk memory operations. */
582 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
583 {
584 	bool descending_seq = meta->ldst_gather_len < 0;
585 	s16 len = abs(meta->ldst_gather_len);
586 	swreg src_base, off;
587 	bool src_40bit_addr;
588 	unsigned int i;
589 	u8 xfer_num;
590 
591 	off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
592 	src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
593 	src_base = reg_a(meta->insn.src_reg * 2);
594 	xfer_num = round_up(len, 4) / 4;
595 
596 	if (src_40bit_addr)
597 		addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
598 			      &off);
599 
600 	/* Setup PREV_ALU fields to override memory read length. */
601 	if (len > 32)
602 		wrp_immed(nfp_prog, reg_none(),
603 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
604 
605 	/* Memory read from source addr into transfer-in registers. */
606 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
607 		     src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
608 		     src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
609 
610 	/* Move from transfer-in to transfer-out. */
611 	for (i = 0; i < xfer_num; i++)
612 		wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
613 
614 	off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
615 
616 	if (len <= 8) {
617 		/* Use single direct_ref write8. */
618 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
619 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
620 			 CMD_CTX_SWAP);
621 	} else if (len <= 32 && IS_ALIGNED(len, 4)) {
622 		/* Use single direct_ref write32. */
623 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
624 			 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
625 			 CMD_CTX_SWAP);
626 	} else if (len <= 32) {
627 		/* Use single indirect_ref write8. */
628 		wrp_immed(nfp_prog, reg_none(),
629 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
630 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
631 			       reg_a(meta->paired_st->dst_reg * 2), off,
632 			       len - 1, CMD_CTX_SWAP);
633 	} else if (IS_ALIGNED(len, 4)) {
634 		/* Use single indirect_ref write32. */
635 		wrp_immed(nfp_prog, reg_none(),
636 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
637 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
638 			       reg_a(meta->paired_st->dst_reg * 2), off,
639 			       xfer_num - 1, CMD_CTX_SWAP);
640 	} else if (len <= 40) {
641 		/* Use one direct_ref write32 to write the first 32-bytes, then
642 		 * another direct_ref write8 to write the remaining bytes.
643 		 */
644 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
645 			 reg_a(meta->paired_st->dst_reg * 2), off, 7,
646 			 CMD_CTX_SWAP);
647 
648 		off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
649 				      imm_b(nfp_prog));
650 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
651 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
652 			 CMD_CTX_SWAP);
653 	} else {
654 		/* Use one indirect_ref write32 to write 4-bytes aligned length,
655 		 * then another direct_ref write8 to write the remaining bytes.
656 		 */
657 		u8 new_off;
658 
659 		wrp_immed(nfp_prog, reg_none(),
660 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
661 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
662 			       reg_a(meta->paired_st->dst_reg * 2), off,
663 			       xfer_num - 2, CMD_CTX_SWAP);
664 		new_off = meta->paired_st->off + (xfer_num - 1) * 4;
665 		off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
666 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
667 			 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
668 			 (len & 0x3) - 1, CMD_CTX_SWAP);
669 	}
670 
671 	/* TODO: The following extra load is to make sure data flow be identical
672 	 *  before and after we do memory copy optimization.
673 	 *
674 	 *  The load destination register is not guaranteed to be dead, so we
675 	 *  need to make sure it is loaded with the value the same as before
676 	 *  this transformation.
677 	 *
678 	 *  These extra loads could be removed once we have accurate register
679 	 *  usage information.
680 	 */
681 	if (descending_seq)
682 		xfer_num = 0;
683 	else if (BPF_SIZE(meta->insn.code) != BPF_DW)
684 		xfer_num = xfer_num - 1;
685 	else
686 		xfer_num = xfer_num - 2;
687 
688 	switch (BPF_SIZE(meta->insn.code)) {
689 	case BPF_B:
690 		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
691 				reg_xfer(xfer_num), 1,
692 				IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
693 		break;
694 	case BPF_H:
695 		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
696 				reg_xfer(xfer_num), 2, (len & 3) ^ 2);
697 		break;
698 	case BPF_W:
699 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
700 			reg_xfer(0));
701 		break;
702 	case BPF_DW:
703 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
704 			reg_xfer(xfer_num));
705 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
706 			reg_xfer(xfer_num + 1));
707 		break;
708 	}
709 
710 	if (BPF_SIZE(meta->insn.code) != BPF_DW)
711 		wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
712 
713 	return 0;
714 }
715 
716 static int
717 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
718 {
719 	unsigned int i;
720 	u16 shift, sz;
721 
722 	/* We load the value from the address indicated in @offset and then
723 	 * shift out the data we don't need.  Note: this is big endian!
724 	 */
725 	sz = max(size, 4);
726 	shift = size < 4 ? 4 - size : 0;
727 
728 	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
729 		 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
730 
731 	i = 0;
732 	if (shift)
733 		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
734 			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
735 	else
736 		for (; i * 4 < size; i++)
737 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
738 
739 	if (i < 2)
740 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
741 
742 	return 0;
743 }
744 
745 static int
746 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
747 		   swreg lreg, swreg rreg, int size, enum cmd_mode mode)
748 {
749 	unsigned int i;
750 	u8 mask, sz;
751 
752 	/* We load the value from the address indicated in rreg + lreg and then
753 	 * mask out the data we don't need.  Note: this is little endian!
754 	 */
755 	sz = max(size, 4);
756 	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
757 
758 	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
759 		 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
760 
761 	i = 0;
762 	if (mask)
763 		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
764 				  reg_xfer(0), SHF_SC_NONE, 0, true);
765 	else
766 		for (; i * 4 < size; i++)
767 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
768 
769 	if (i < 2)
770 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
771 
772 	return 0;
773 }
774 
775 static int
776 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
777 			  u8 dst_gpr, u8 size)
778 {
779 	return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
780 				  size, CMD_MODE_32b);
781 }
782 
783 static int
784 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
785 			  u8 dst_gpr, u8 size)
786 {
787 	swreg rega, regb;
788 
789 	addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
790 
791 	return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
792 				  size, CMD_MODE_40b_BA);
793 }
794 
795 static int
796 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
797 {
798 	swreg tmp_reg;
799 
800 	/* Calculate the true offset (src_reg + imm) */
801 	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
802 	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
803 
804 	/* Check packet length (size guaranteed to fit b/c it's u8) */
805 	emit_alu(nfp_prog, imm_a(nfp_prog),
806 		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
807 	emit_alu(nfp_prog, reg_none(),
808 		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
809 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
810 
811 	/* Load data */
812 	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
813 }
814 
815 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
816 {
817 	swreg tmp_reg;
818 
819 	/* Check packet length */
820 	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
821 	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
822 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
823 
824 	/* Load data */
825 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
826 	return data_ld(nfp_prog, tmp_reg, 0, size);
827 }
828 
829 static int
830 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
831 		    u8 src_gpr, u8 size)
832 {
833 	unsigned int i;
834 
835 	for (i = 0; i * 4 < size; i++)
836 		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
837 
838 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
839 		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
840 
841 	return 0;
842 }
843 
844 static int
845 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
846 		   u64 imm, u8 size)
847 {
848 	wrp_immed(nfp_prog, reg_xfer(0), imm);
849 	if (size == 8)
850 		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
851 
852 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
853 		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
854 
855 	return 0;
856 }
857 
858 typedef int
859 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
860 	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
861 	     bool needs_inc);
862 
863 static int
864 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
865 	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
866 	      bool needs_inc)
867 {
868 	bool should_inc = needs_inc && new_gpr && !last;
869 	u32 idx, src_byte;
870 	enum shf_sc sc;
871 	swreg reg;
872 	int shf;
873 	u8 mask;
874 
875 	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
876 		return -EOPNOTSUPP;
877 
878 	idx = off / 4;
879 
880 	/* Move the entire word */
881 	if (size == 4) {
882 		wrp_mov(nfp_prog, reg_both(dst),
883 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
884 		return 0;
885 	}
886 
887 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
888 		return -EOPNOTSUPP;
889 
890 	src_byte = off % 4;
891 
892 	mask = (1 << size) - 1;
893 	mask <<= dst_byte;
894 
895 	if (WARN_ON_ONCE(mask > 0xf))
896 		return -EOPNOTSUPP;
897 
898 	shf = abs(src_byte - dst_byte) * 8;
899 	if (src_byte == dst_byte) {
900 		sc = SHF_SC_NONE;
901 	} else if (src_byte < dst_byte) {
902 		shf = 32 - shf;
903 		sc = SHF_SC_L_SHF;
904 	} else {
905 		sc = SHF_SC_R_SHF;
906 	}
907 
908 	/* ld_field can address fewer indexes, if offset too large do RMW.
909 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
910 	 */
911 	if (idx <= RE_REG_LM_IDX_MAX) {
912 		reg = reg_lm(lm3 ? 3 : 0, idx);
913 	} else {
914 		reg = imm_a(nfp_prog);
915 		/* If it's not the first part of the load and we start a new GPR
916 		 * that means we are loading a second part of the LMEM word into
917 		 * a new GPR.  IOW we've already looked that LMEM word and
918 		 * therefore it has been loaded into imm_a().
919 		 */
920 		if (first || !new_gpr)
921 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
922 	}
923 
924 	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
925 
926 	if (should_inc)
927 		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
928 
929 	return 0;
930 }
931 
932 static int
933 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
934 	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
935 	       bool needs_inc)
936 {
937 	bool should_inc = needs_inc && new_gpr && !last;
938 	u32 idx, dst_byte;
939 	enum shf_sc sc;
940 	swreg reg;
941 	int shf;
942 	u8 mask;
943 
944 	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
945 		return -EOPNOTSUPP;
946 
947 	idx = off / 4;
948 
949 	/* Move the entire word */
950 	if (size == 4) {
951 		wrp_mov(nfp_prog,
952 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
953 			reg_b(src));
954 		return 0;
955 	}
956 
957 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
958 		return -EOPNOTSUPP;
959 
960 	dst_byte = off % 4;
961 
962 	mask = (1 << size) - 1;
963 	mask <<= dst_byte;
964 
965 	if (WARN_ON_ONCE(mask > 0xf))
966 		return -EOPNOTSUPP;
967 
968 	shf = abs(src_byte - dst_byte) * 8;
969 	if (src_byte == dst_byte) {
970 		sc = SHF_SC_NONE;
971 	} else if (src_byte < dst_byte) {
972 		shf = 32 - shf;
973 		sc = SHF_SC_L_SHF;
974 	} else {
975 		sc = SHF_SC_R_SHF;
976 	}
977 
978 	/* ld_field can address fewer indexes, if offset too large do RMW.
979 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
980 	 */
981 	if (idx <= RE_REG_LM_IDX_MAX) {
982 		reg = reg_lm(lm3 ? 3 : 0, idx);
983 	} else {
984 		reg = imm_a(nfp_prog);
985 		/* Only first and last LMEM locations are going to need RMW,
986 		 * the middle location will be overwritten fully.
987 		 */
988 		if (first || last)
989 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
990 	}
991 
992 	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
993 
994 	if (new_gpr || last) {
995 		if (idx > RE_REG_LM_IDX_MAX)
996 			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
997 		if (should_inc)
998 			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
999 	}
1000 
1001 	return 0;
1002 }
1003 
1004 static int
1005 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1006 	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1007 	     bool clr_gpr, lmem_step step)
1008 {
1009 	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1010 	bool first = true, last;
1011 	bool needs_inc = false;
1012 	swreg stack_off_reg;
1013 	u8 prev_gpr = 255;
1014 	u32 gpr_byte = 0;
1015 	bool lm3 = true;
1016 	int ret;
1017 
1018 	if (meta->ptr_not_const) {
1019 		/* Use of the last encountered ptr_off is OK, they all have
1020 		 * the same alignment.  Depend on low bits of value being
1021 		 * discarded when written to LMaddr register.
1022 		 */
1023 		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1024 						stack_imm(nfp_prog));
1025 
1026 		emit_alu(nfp_prog, imm_b(nfp_prog),
1027 			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1028 
1029 		needs_inc = true;
1030 	} else if (off + size <= 64) {
1031 		/* We can reach bottom 64B with LMaddr0 */
1032 		lm3 = false;
1033 	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1034 		/* We have to set up a new pointer.  If we know the offset
1035 		 * and the entire access falls into a single 32 byte aligned
1036 		 * window we won't have to increment the LM pointer.
1037 		 * The 32 byte alignment is imporant because offset is ORed in
1038 		 * not added when doing *l$indexN[off].
1039 		 */
1040 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1041 						stack_imm(nfp_prog));
1042 		emit_alu(nfp_prog, imm_b(nfp_prog),
1043 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1044 
1045 		off %= 32;
1046 	} else {
1047 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1048 						stack_imm(nfp_prog));
1049 
1050 		emit_alu(nfp_prog, imm_b(nfp_prog),
1051 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1052 
1053 		needs_inc = true;
1054 	}
1055 	if (lm3) {
1056 		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1057 		/* For size < 4 one slot will be filled by zeroing of upper. */
1058 		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1059 	}
1060 
1061 	if (clr_gpr && size < 8)
1062 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1063 
1064 	while (size) {
1065 		u32 slice_end;
1066 		u8 slice_size;
1067 
1068 		slice_size = min(size, 4 - gpr_byte);
1069 		slice_end = min(off + slice_size, round_up(off + 1, 4));
1070 		slice_size = slice_end - off;
1071 
1072 		last = slice_size == size;
1073 
1074 		if (needs_inc)
1075 			off %= 4;
1076 
1077 		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1078 			   first, gpr != prev_gpr, last, lm3, needs_inc);
1079 		if (ret)
1080 			return ret;
1081 
1082 		prev_gpr = gpr;
1083 		first = false;
1084 
1085 		gpr_byte += slice_size;
1086 		if (gpr_byte >= 4) {
1087 			gpr_byte -= 4;
1088 			gpr++;
1089 		}
1090 
1091 		size -= slice_size;
1092 		off += slice_size;
1093 	}
1094 
1095 	return 0;
1096 }
1097 
1098 static void
1099 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1100 {
1101 	swreg tmp_reg;
1102 
1103 	if (alu_op == ALU_OP_AND) {
1104 		if (!imm)
1105 			wrp_immed(nfp_prog, reg_both(dst), 0);
1106 		if (!imm || !~imm)
1107 			return;
1108 	}
1109 	if (alu_op == ALU_OP_OR) {
1110 		if (!~imm)
1111 			wrp_immed(nfp_prog, reg_both(dst), ~0U);
1112 		if (!imm || !~imm)
1113 			return;
1114 	}
1115 	if (alu_op == ALU_OP_XOR) {
1116 		if (!~imm)
1117 			emit_alu(nfp_prog, reg_both(dst), reg_none(),
1118 				 ALU_OP_NOT, reg_b(dst));
1119 		if (!imm || !~imm)
1120 			return;
1121 	}
1122 
1123 	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1124 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1125 }
1126 
1127 static int
1128 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1129 	      enum alu_op alu_op, bool skip)
1130 {
1131 	const struct bpf_insn *insn = &meta->insn;
1132 	u64 imm = insn->imm; /* sign extend */
1133 
1134 	if (skip) {
1135 		meta->skip = true;
1136 		return 0;
1137 	}
1138 
1139 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1140 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1141 
1142 	return 0;
1143 }
1144 
1145 static int
1146 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1147 	      enum alu_op alu_op)
1148 {
1149 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1150 
1151 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1152 	emit_alu(nfp_prog, reg_both(dst + 1),
1153 		 reg_a(dst + 1), alu_op, reg_b(src + 1));
1154 
1155 	return 0;
1156 }
1157 
1158 static int
1159 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1160 	      enum alu_op alu_op, bool skip)
1161 {
1162 	const struct bpf_insn *insn = &meta->insn;
1163 
1164 	if (skip) {
1165 		meta->skip = true;
1166 		return 0;
1167 	}
1168 
1169 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1170 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1171 
1172 	return 0;
1173 }
1174 
1175 static int
1176 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1177 	      enum alu_op alu_op)
1178 {
1179 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1180 
1181 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1182 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1183 
1184 	return 0;
1185 }
1186 
1187 static void
1188 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1189 		 enum br_mask br_mask, u16 off)
1190 {
1191 	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1192 	emit_br(nfp_prog, br_mask, off, 0);
1193 }
1194 
1195 static int
1196 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1197 	     enum alu_op alu_op, enum br_mask br_mask)
1198 {
1199 	const struct bpf_insn *insn = &meta->insn;
1200 
1201 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1202 			 insn->src_reg * 2, br_mask, insn->off);
1203 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1204 			 insn->src_reg * 2 + 1, br_mask, insn->off);
1205 
1206 	return 0;
1207 }
1208 
1209 static int
1210 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1211 	    enum br_mask br_mask, bool swap)
1212 {
1213 	const struct bpf_insn *insn = &meta->insn;
1214 	u64 imm = insn->imm; /* sign extend */
1215 	u8 reg = insn->dst_reg * 2;
1216 	swreg tmp_reg;
1217 
1218 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1219 	if (!swap)
1220 		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
1221 	else
1222 		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
1223 
1224 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1225 	if (!swap)
1226 		emit_alu(nfp_prog, reg_none(),
1227 			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
1228 	else
1229 		emit_alu(nfp_prog, reg_none(),
1230 			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
1231 
1232 	emit_br(nfp_prog, br_mask, insn->off, 0);
1233 
1234 	return 0;
1235 }
1236 
1237 static int
1238 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1239 	    enum br_mask br_mask, bool swap)
1240 {
1241 	const struct bpf_insn *insn = &meta->insn;
1242 	u8 areg, breg;
1243 
1244 	areg = insn->dst_reg * 2;
1245 	breg = insn->src_reg * 2;
1246 
1247 	if (swap) {
1248 		areg ^= breg;
1249 		breg ^= areg;
1250 		areg ^= breg;
1251 	}
1252 
1253 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1254 	emit_alu(nfp_prog, reg_none(),
1255 		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1256 	emit_br(nfp_prog, br_mask, insn->off, 0);
1257 
1258 	return 0;
1259 }
1260 
1261 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1262 {
1263 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1264 		      SHF_SC_R_ROT, 8);
1265 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1266 		      SHF_SC_R_ROT, 16);
1267 }
1268 
1269 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1270 {
1271 	swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1272 	struct nfp_bpf_cap_adjust_head *adjust_head;
1273 	u32 ret_einval, end;
1274 
1275 	adjust_head = &nfp_prog->bpf->adjust_head;
1276 
1277 	/* Optimized version - 5 vs 14 cycles */
1278 	if (nfp_prog->adjust_head_location != UINT_MAX) {
1279 		if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1280 			return -EINVAL;
1281 
1282 		emit_alu(nfp_prog, pptr_reg(nfp_prog),
1283 			 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1284 		emit_alu(nfp_prog, plen_reg(nfp_prog),
1285 			 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1286 		emit_alu(nfp_prog, pv_len(nfp_prog),
1287 			 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1288 
1289 		wrp_immed(nfp_prog, reg_both(0), 0);
1290 		wrp_immed(nfp_prog, reg_both(1), 0);
1291 
1292 		/* TODO: when adjust head is guaranteed to succeed we can
1293 		 * also eliminate the following if (r0 == 0) branch.
1294 		 */
1295 
1296 		return 0;
1297 	}
1298 
1299 	ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1300 	end = ret_einval + 2;
1301 
1302 	/* We need to use a temp because offset is just a part of the pkt ptr */
1303 	emit_alu(nfp_prog, tmp,
1304 		 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1305 
1306 	/* Validate result will fit within FW datapath constraints */
1307 	emit_alu(nfp_prog, reg_none(),
1308 		 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1309 	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1310 	emit_alu(nfp_prog, reg_none(),
1311 		 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1312 	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1313 
1314 	/* Validate the length is at least ETH_HLEN */
1315 	emit_alu(nfp_prog, tmp_len,
1316 		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1317 	emit_alu(nfp_prog, reg_none(),
1318 		 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1319 	emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1320 
1321 	/* Load the ret code */
1322 	wrp_immed(nfp_prog, reg_both(0), 0);
1323 	wrp_immed(nfp_prog, reg_both(1), 0);
1324 
1325 	/* Modify the packet metadata */
1326 	emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1327 
1328 	/* Skip over the -EINVAL ret code (defer 2) */
1329 	emit_br(nfp_prog, BR_UNC, end, 2);
1330 
1331 	emit_alu(nfp_prog, plen_reg(nfp_prog),
1332 		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1333 	emit_alu(nfp_prog, pv_len(nfp_prog),
1334 		 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1335 
1336 	/* return -EINVAL target */
1337 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1338 		return -EINVAL;
1339 
1340 	wrp_immed(nfp_prog, reg_both(0), -22);
1341 	wrp_immed(nfp_prog, reg_both(1), ~0);
1342 
1343 	if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1344 		return -EINVAL;
1345 
1346 	return 0;
1347 }
1348 
1349 static int
1350 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1351 {
1352 	struct bpf_offloaded_map *offmap;
1353 	struct nfp_bpf_map *nfp_map;
1354 	bool load_lm_ptr;
1355 	u32 ret_tgt;
1356 	s64 lm_off;
1357 	swreg tid;
1358 
1359 	offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
1360 	nfp_map = offmap->dev_priv;
1361 
1362 	/* We only have to reload LM0 if the key is not at start of stack */
1363 	lm_off = nfp_prog->stack_depth;
1364 	lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1365 	load_lm_ptr = meta->arg2.var_off || lm_off;
1366 
1367 	/* Set LM0 to start of key */
1368 	if (load_lm_ptr)
1369 		emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1370 	if (meta->func_id == BPF_FUNC_map_update_elem)
1371 		emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1372 
1373 	/* Load map ID into a register, it should actually fit as an immediate
1374 	 * but in case it doesn't deal with it here, not in the delay slots.
1375 	 */
1376 	tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1377 
1378 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1379 		     2, RELO_BR_HELPER);
1380 	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1381 
1382 	/* Load map ID into A0 */
1383 	wrp_mov(nfp_prog, reg_a(0), tid);
1384 
1385 	/* Load the return address into B0 */
1386 	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1387 
1388 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1389 		return -EINVAL;
1390 
1391 	/* Reset the LM0 pointer */
1392 	if (!load_lm_ptr)
1393 		return 0;
1394 
1395 	emit_csr_wr(nfp_prog, stack_reg(nfp_prog),  NFP_CSR_ACT_LM_ADDR0);
1396 	wrp_nops(nfp_prog, 3);
1397 
1398 	return 0;
1399 }
1400 
1401 /* --- Callbacks --- */
1402 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1403 {
1404 	const struct bpf_insn *insn = &meta->insn;
1405 	u8 dst = insn->dst_reg * 2;
1406 	u8 src = insn->src_reg * 2;
1407 
1408 	if (insn->src_reg == BPF_REG_10) {
1409 		swreg stack_depth_reg;
1410 
1411 		stack_depth_reg = ur_load_imm_any(nfp_prog,
1412 						  nfp_prog->stack_depth,
1413 						  stack_imm(nfp_prog));
1414 		emit_alu(nfp_prog, reg_both(dst),
1415 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1416 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1417 	} else {
1418 		wrp_reg_mov(nfp_prog, dst, src);
1419 		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1420 	}
1421 
1422 	return 0;
1423 }
1424 
1425 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1426 {
1427 	u64 imm = meta->insn.imm; /* sign extend */
1428 
1429 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1430 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1431 
1432 	return 0;
1433 }
1434 
1435 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1436 {
1437 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1438 }
1439 
1440 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1441 {
1442 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1443 }
1444 
1445 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1446 {
1447 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1448 }
1449 
1450 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1451 {
1452 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1453 }
1454 
1455 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1456 {
1457 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1458 }
1459 
1460 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1461 {
1462 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1463 }
1464 
1465 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1466 {
1467 	const struct bpf_insn *insn = &meta->insn;
1468 
1469 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1470 		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1471 		 reg_b(insn->src_reg * 2));
1472 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1473 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1474 		 reg_b(insn->src_reg * 2 + 1));
1475 
1476 	return 0;
1477 }
1478 
1479 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1480 {
1481 	const struct bpf_insn *insn = &meta->insn;
1482 	u64 imm = insn->imm; /* sign extend */
1483 
1484 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1485 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1486 
1487 	return 0;
1488 }
1489 
1490 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1491 {
1492 	const struct bpf_insn *insn = &meta->insn;
1493 
1494 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1495 		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1496 		 reg_b(insn->src_reg * 2));
1497 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1498 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1499 		 reg_b(insn->src_reg * 2 + 1));
1500 
1501 	return 0;
1502 }
1503 
1504 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1505 {
1506 	const struct bpf_insn *insn = &meta->insn;
1507 	u64 imm = insn->imm; /* sign extend */
1508 
1509 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1510 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1511 
1512 	return 0;
1513 }
1514 
1515 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1516 {
1517 	const struct bpf_insn *insn = &meta->insn;
1518 
1519 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1520 		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1521 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1522 		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1523 
1524 	return 0;
1525 }
1526 
1527 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1528 {
1529 	const struct bpf_insn *insn = &meta->insn;
1530 	u8 dst = insn->dst_reg * 2;
1531 
1532 	if (insn->imm < 32) {
1533 		emit_shf(nfp_prog, reg_both(dst + 1),
1534 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1535 			 SHF_SC_R_DSHF, 32 - insn->imm);
1536 		emit_shf(nfp_prog, reg_both(dst),
1537 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1538 			 SHF_SC_L_SHF, insn->imm);
1539 	} else if (insn->imm == 32) {
1540 		wrp_reg_mov(nfp_prog, dst + 1, dst);
1541 		wrp_immed(nfp_prog, reg_both(dst), 0);
1542 	} else if (insn->imm > 32) {
1543 		emit_shf(nfp_prog, reg_both(dst + 1),
1544 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1545 			 SHF_SC_L_SHF, insn->imm - 32);
1546 		wrp_immed(nfp_prog, reg_both(dst), 0);
1547 	}
1548 
1549 	return 0;
1550 }
1551 
1552 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1553 {
1554 	const struct bpf_insn *insn = &meta->insn;
1555 	u8 dst = insn->dst_reg * 2;
1556 
1557 	if (insn->imm < 32) {
1558 		emit_shf(nfp_prog, reg_both(dst),
1559 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1560 			 SHF_SC_R_DSHF, insn->imm);
1561 		emit_shf(nfp_prog, reg_both(dst + 1),
1562 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1563 			 SHF_SC_R_SHF, insn->imm);
1564 	} else if (insn->imm == 32) {
1565 		wrp_reg_mov(nfp_prog, dst, dst + 1);
1566 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1567 	} else if (insn->imm > 32) {
1568 		emit_shf(nfp_prog, reg_both(dst),
1569 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1570 			 SHF_SC_R_SHF, insn->imm - 32);
1571 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1572 	}
1573 
1574 	return 0;
1575 }
1576 
1577 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1578 {
1579 	const struct bpf_insn *insn = &meta->insn;
1580 
1581 	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
1582 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1583 
1584 	return 0;
1585 }
1586 
1587 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1588 {
1589 	const struct bpf_insn *insn = &meta->insn;
1590 
1591 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
1592 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1593 
1594 	return 0;
1595 }
1596 
1597 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1598 {
1599 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
1600 }
1601 
1602 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1603 {
1604 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
1605 }
1606 
1607 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1608 {
1609 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
1610 }
1611 
1612 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1613 {
1614 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1615 }
1616 
1617 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1618 {
1619 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
1620 }
1621 
1622 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1623 {
1624 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1625 }
1626 
1627 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1628 {
1629 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
1630 }
1631 
1632 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1633 {
1634 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
1635 }
1636 
1637 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1638 {
1639 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
1640 }
1641 
1642 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1643 {
1644 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
1645 }
1646 
1647 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1648 {
1649 	u8 dst = meta->insn.dst_reg * 2;
1650 
1651 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
1652 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1653 
1654 	return 0;
1655 }
1656 
1657 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1658 {
1659 	const struct bpf_insn *insn = &meta->insn;
1660 
1661 	if (!insn->imm)
1662 		return 1; /* TODO: zero shift means indirect */
1663 
1664 	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
1665 		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
1666 		 SHF_SC_L_SHF, insn->imm);
1667 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1668 
1669 	return 0;
1670 }
1671 
1672 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1673 {
1674 	const struct bpf_insn *insn = &meta->insn;
1675 	u8 gpr = insn->dst_reg * 2;
1676 
1677 	switch (insn->imm) {
1678 	case 16:
1679 		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
1680 			      SHF_SC_R_ROT, 8);
1681 		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
1682 			      SHF_SC_R_SHF, 16);
1683 
1684 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1685 		break;
1686 	case 32:
1687 		wrp_end32(nfp_prog, reg_a(gpr), gpr);
1688 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1689 		break;
1690 	case 64:
1691 		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
1692 
1693 		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
1694 		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
1695 		break;
1696 	}
1697 
1698 	return 0;
1699 }
1700 
1701 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1702 {
1703 	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
1704 	u32 imm_lo, imm_hi;
1705 	u8 dst;
1706 
1707 	dst = prev->insn.dst_reg * 2;
1708 	imm_lo = prev->insn.imm;
1709 	imm_hi = meta->insn.imm;
1710 
1711 	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
1712 
1713 	/* mov is always 1 insn, load imm may be two, so try to use mov */
1714 	if (imm_hi == imm_lo)
1715 		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
1716 	else
1717 		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
1718 
1719 	return 0;
1720 }
1721 
1722 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1723 {
1724 	meta->double_cb = imm_ld8_part2;
1725 	return 0;
1726 }
1727 
1728 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1729 {
1730 	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
1731 }
1732 
1733 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1734 {
1735 	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
1736 }
1737 
1738 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1739 {
1740 	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
1741 }
1742 
1743 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1744 {
1745 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1746 				     meta->insn.src_reg * 2, 1);
1747 }
1748 
1749 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1750 {
1751 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1752 				     meta->insn.src_reg * 2, 2);
1753 }
1754 
1755 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1756 {
1757 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1758 				     meta->insn.src_reg * 2, 4);
1759 }
1760 
1761 static int
1762 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1763 	      unsigned int size, unsigned int ptr_off)
1764 {
1765 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1766 			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
1767 			    true, wrp_lmem_load);
1768 }
1769 
1770 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1771 		       u8 size)
1772 {
1773 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1774 
1775 	switch (meta->insn.off) {
1776 	case offsetof(struct __sk_buff, len):
1777 		if (size != FIELD_SIZEOF(struct __sk_buff, len))
1778 			return -EOPNOTSUPP;
1779 		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
1780 		break;
1781 	case offsetof(struct __sk_buff, data):
1782 		if (size != FIELD_SIZEOF(struct __sk_buff, data))
1783 			return -EOPNOTSUPP;
1784 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1785 		break;
1786 	case offsetof(struct __sk_buff, data_end):
1787 		if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
1788 			return -EOPNOTSUPP;
1789 		emit_alu(nfp_prog, dst,
1790 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1791 		break;
1792 	default:
1793 		return -EOPNOTSUPP;
1794 	}
1795 
1796 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1797 
1798 	return 0;
1799 }
1800 
1801 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1802 		       u8 size)
1803 {
1804 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1805 
1806 	switch (meta->insn.off) {
1807 	case offsetof(struct xdp_md, data):
1808 		if (size != FIELD_SIZEOF(struct xdp_md, data))
1809 			return -EOPNOTSUPP;
1810 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1811 		break;
1812 	case offsetof(struct xdp_md, data_end):
1813 		if (size != FIELD_SIZEOF(struct xdp_md, data_end))
1814 			return -EOPNOTSUPP;
1815 		emit_alu(nfp_prog, dst,
1816 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1817 		break;
1818 	default:
1819 		return -EOPNOTSUPP;
1820 	}
1821 
1822 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1823 
1824 	return 0;
1825 }
1826 
1827 static int
1828 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1829 	     unsigned int size)
1830 {
1831 	swreg tmp_reg;
1832 
1833 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1834 
1835 	return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
1836 					 tmp_reg, meta->insn.dst_reg * 2, size);
1837 }
1838 
1839 static int
1840 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1841 	     unsigned int size)
1842 {
1843 	swreg tmp_reg;
1844 
1845 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1846 
1847 	return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
1848 					 tmp_reg, meta->insn.dst_reg * 2, size);
1849 }
1850 
1851 static void
1852 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
1853 			   struct nfp_insn_meta *meta)
1854 {
1855 	s16 range_start = meta->pkt_cache.range_start;
1856 	s16 range_end = meta->pkt_cache.range_end;
1857 	swreg src_base, off;
1858 	u8 xfer_num, len;
1859 	bool indir;
1860 
1861 	off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
1862 	src_base = reg_a(meta->insn.src_reg * 2);
1863 	len = range_end - range_start;
1864 	xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
1865 
1866 	indir = len > 8 * REG_WIDTH;
1867 	/* Setup PREV_ALU for indirect mode. */
1868 	if (indir)
1869 		wrp_immed(nfp_prog, reg_none(),
1870 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
1871 
1872 	/* Cache memory into transfer-in registers. */
1873 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
1874 		     off, xfer_num - 1, CMD_CTX_SWAP, indir);
1875 }
1876 
1877 static int
1878 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
1879 				     struct nfp_insn_meta *meta,
1880 				     unsigned int size)
1881 {
1882 	s16 range_start = meta->pkt_cache.range_start;
1883 	s16 insn_off = meta->insn.off - range_start;
1884 	swreg dst_lo, dst_hi, src_lo, src_mid;
1885 	u8 dst_gpr = meta->insn.dst_reg * 2;
1886 	u8 len_lo = size, len_mid = 0;
1887 	u8 idx = insn_off / REG_WIDTH;
1888 	u8 off = insn_off % REG_WIDTH;
1889 
1890 	dst_hi = reg_both(dst_gpr + 1);
1891 	dst_lo = reg_both(dst_gpr);
1892 	src_lo = reg_xfer(idx);
1893 
1894 	/* The read length could involve as many as three registers. */
1895 	if (size > REG_WIDTH - off) {
1896 		/* Calculate the part in the second register. */
1897 		len_lo = REG_WIDTH - off;
1898 		len_mid = size - len_lo;
1899 
1900 		/* Calculate the part in the third register. */
1901 		if (size > 2 * REG_WIDTH - off)
1902 			len_mid = REG_WIDTH;
1903 	}
1904 
1905 	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
1906 
1907 	if (!len_mid) {
1908 		wrp_immed(nfp_prog, dst_hi, 0);
1909 		return 0;
1910 	}
1911 
1912 	src_mid = reg_xfer(idx + 1);
1913 
1914 	if (size <= REG_WIDTH) {
1915 		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
1916 		wrp_immed(nfp_prog, dst_hi, 0);
1917 	} else {
1918 		swreg src_hi = reg_xfer(idx + 2);
1919 
1920 		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
1921 				   REG_WIDTH - len_lo, len_lo);
1922 		wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
1923 				REG_WIDTH - len_lo);
1924 		wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
1925 				   len_lo);
1926 	}
1927 
1928 	return 0;
1929 }
1930 
1931 static int
1932 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
1933 				   struct nfp_insn_meta *meta,
1934 				   unsigned int size)
1935 {
1936 	swreg dst_lo, dst_hi, src_lo;
1937 	u8 dst_gpr, idx;
1938 
1939 	idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
1940 	dst_gpr = meta->insn.dst_reg * 2;
1941 	dst_hi = reg_both(dst_gpr + 1);
1942 	dst_lo = reg_both(dst_gpr);
1943 	src_lo = reg_xfer(idx);
1944 
1945 	if (size < REG_WIDTH) {
1946 		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
1947 		wrp_immed(nfp_prog, dst_hi, 0);
1948 	} else if (size == REG_WIDTH) {
1949 		wrp_mov(nfp_prog, dst_lo, src_lo);
1950 		wrp_immed(nfp_prog, dst_hi, 0);
1951 	} else {
1952 		swreg src_hi = reg_xfer(idx + 1);
1953 
1954 		wrp_mov(nfp_prog, dst_lo, src_lo);
1955 		wrp_mov(nfp_prog, dst_hi, src_hi);
1956 	}
1957 
1958 	return 0;
1959 }
1960 
1961 static int
1962 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
1963 			   struct nfp_insn_meta *meta, unsigned int size)
1964 {
1965 	u8 off = meta->insn.off - meta->pkt_cache.range_start;
1966 
1967 	if (IS_ALIGNED(off, REG_WIDTH))
1968 		return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
1969 
1970 	return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
1971 }
1972 
1973 static int
1974 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1975 	unsigned int size)
1976 {
1977 	if (meta->ldst_gather_len)
1978 		return nfp_cpp_memcpy(nfp_prog, meta);
1979 
1980 	if (meta->ptr.type == PTR_TO_CTX) {
1981 		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
1982 			return mem_ldx_xdp(nfp_prog, meta, size);
1983 		else
1984 			return mem_ldx_skb(nfp_prog, meta, size);
1985 	}
1986 
1987 	if (meta->ptr.type == PTR_TO_PACKET) {
1988 		if (meta->pkt_cache.range_end) {
1989 			if (meta->pkt_cache.do_init)
1990 				mem_ldx_data_init_pktcache(nfp_prog, meta);
1991 
1992 			return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
1993 		} else {
1994 			return mem_ldx_data(nfp_prog, meta, size);
1995 		}
1996 	}
1997 
1998 	if (meta->ptr.type == PTR_TO_STACK)
1999 		return mem_ldx_stack(nfp_prog, meta, size,
2000 				     meta->ptr.off + meta->ptr.var_off.value);
2001 
2002 	if (meta->ptr.type == PTR_TO_MAP_VALUE)
2003 		return mem_ldx_emem(nfp_prog, meta, size);
2004 
2005 	return -EOPNOTSUPP;
2006 }
2007 
2008 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2009 {
2010 	return mem_ldx(nfp_prog, meta, 1);
2011 }
2012 
2013 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2014 {
2015 	return mem_ldx(nfp_prog, meta, 2);
2016 }
2017 
2018 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2019 {
2020 	return mem_ldx(nfp_prog, meta, 4);
2021 }
2022 
2023 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2024 {
2025 	return mem_ldx(nfp_prog, meta, 8);
2026 }
2027 
2028 static int
2029 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2030 	    unsigned int size)
2031 {
2032 	u64 imm = meta->insn.imm; /* sign extend */
2033 	swreg off_reg;
2034 
2035 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2036 
2037 	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2038 				  imm, size);
2039 }
2040 
2041 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2042 		  unsigned int size)
2043 {
2044 	if (meta->ptr.type == PTR_TO_PACKET)
2045 		return mem_st_data(nfp_prog, meta, size);
2046 
2047 	return -EOPNOTSUPP;
2048 }
2049 
2050 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2051 {
2052 	return mem_st(nfp_prog, meta, 1);
2053 }
2054 
2055 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2056 {
2057 	return mem_st(nfp_prog, meta, 2);
2058 }
2059 
2060 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2061 {
2062 	return mem_st(nfp_prog, meta, 4);
2063 }
2064 
2065 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2066 {
2067 	return mem_st(nfp_prog, meta, 8);
2068 }
2069 
2070 static int
2071 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2072 	     unsigned int size)
2073 {
2074 	swreg off_reg;
2075 
2076 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2077 
2078 	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2079 				   meta->insn.src_reg * 2, size);
2080 }
2081 
2082 static int
2083 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2084 	      unsigned int size, unsigned int ptr_off)
2085 {
2086 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
2087 			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
2088 			    false, wrp_lmem_store);
2089 }
2090 
2091 static int
2092 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2093 	unsigned int size)
2094 {
2095 	if (meta->ptr.type == PTR_TO_PACKET)
2096 		return mem_stx_data(nfp_prog, meta, size);
2097 
2098 	if (meta->ptr.type == PTR_TO_STACK)
2099 		return mem_stx_stack(nfp_prog, meta, size,
2100 				     meta->ptr.off + meta->ptr.var_off.value);
2101 
2102 	return -EOPNOTSUPP;
2103 }
2104 
2105 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2106 {
2107 	return mem_stx(nfp_prog, meta, 1);
2108 }
2109 
2110 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2111 {
2112 	return mem_stx(nfp_prog, meta, 2);
2113 }
2114 
2115 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2116 {
2117 	return mem_stx(nfp_prog, meta, 4);
2118 }
2119 
2120 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2121 {
2122 	return mem_stx(nfp_prog, meta, 8);
2123 }
2124 
2125 static int
2126 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2127 {
2128 	swreg addra, addrb, off, prev_alu = imm_a(nfp_prog);
2129 	u8 dst_gpr = meta->insn.dst_reg * 2;
2130 	u8 src_gpr = meta->insn.src_reg * 2;
2131 
2132 	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2133 
2134 	/* If insn has an offset add to the address */
2135 	if (!meta->insn.off) {
2136 		addra = reg_a(dst_gpr);
2137 		addrb = reg_b(dst_gpr + 1);
2138 	} else {
2139 		emit_alu(nfp_prog, imma_a(nfp_prog),
2140 			 reg_a(dst_gpr), ALU_OP_ADD, off);
2141 		emit_alu(nfp_prog, imma_b(nfp_prog),
2142 			 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
2143 		addra = imma_a(nfp_prog);
2144 		addrb = imma_b(nfp_prog);
2145 	}
2146 
2147 	wrp_immed(nfp_prog, prev_alu,
2148 		  FIELD_PREP(CMD_OVE_DATA, 2) |
2149 		  CMD_OVE_LEN |
2150 		  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
2151 	wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
2152 	emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
2153 		       addra, addrb, 0, CMD_CTX_NO_SWAP);
2154 
2155 	return 0;
2156 }
2157 
2158 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2159 {
2160 	return mem_xadd(nfp_prog, meta, false);
2161 }
2162 
2163 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2164 {
2165 	return mem_xadd(nfp_prog, meta, true);
2166 }
2167 
2168 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2169 {
2170 	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
2171 
2172 	return 0;
2173 }
2174 
2175 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2176 {
2177 	const struct bpf_insn *insn = &meta->insn;
2178 	u64 imm = insn->imm; /* sign extend */
2179 	swreg or1, or2, tmp_reg;
2180 
2181 	or1 = reg_a(insn->dst_reg * 2);
2182 	or2 = reg_b(insn->dst_reg * 2 + 1);
2183 
2184 	if (imm & ~0U) {
2185 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2186 		emit_alu(nfp_prog, imm_a(nfp_prog),
2187 			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2188 		or1 = imm_a(nfp_prog);
2189 	}
2190 
2191 	if (imm >> 32) {
2192 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2193 		emit_alu(nfp_prog, imm_b(nfp_prog),
2194 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2195 		or2 = imm_b(nfp_prog);
2196 	}
2197 
2198 	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
2199 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2200 
2201 	return 0;
2202 }
2203 
2204 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2205 {
2206 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
2207 }
2208 
2209 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2210 {
2211 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
2212 }
2213 
2214 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2215 {
2216 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
2217 }
2218 
2219 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2220 {
2221 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
2222 }
2223 
2224 static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2225 {
2226 	return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
2227 }
2228 
2229 static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2230 {
2231 	return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
2232 }
2233 
2234 static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2235 {
2236 	return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
2237 }
2238 
2239 static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2240 {
2241 	return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
2242 }
2243 
2244 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2245 {
2246 	const struct bpf_insn *insn = &meta->insn;
2247 	u64 imm = insn->imm; /* sign extend */
2248 	swreg tmp_reg;
2249 
2250 	if (!imm) {
2251 		meta->skip = true;
2252 		return 0;
2253 	}
2254 
2255 	if (imm & ~0U) {
2256 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2257 		emit_alu(nfp_prog, reg_none(),
2258 			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
2259 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2260 	}
2261 
2262 	if (imm >> 32) {
2263 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2264 		emit_alu(nfp_prog, reg_none(),
2265 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
2266 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2267 	}
2268 
2269 	return 0;
2270 }
2271 
2272 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2273 {
2274 	const struct bpf_insn *insn = &meta->insn;
2275 	u64 imm = insn->imm; /* sign extend */
2276 	swreg tmp_reg;
2277 
2278 	if (!imm) {
2279 		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
2280 			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
2281 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2282 		return 0;
2283 	}
2284 
2285 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2286 	emit_alu(nfp_prog, reg_none(),
2287 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2288 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
2289 
2290 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2291 	emit_alu(nfp_prog, reg_none(),
2292 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2293 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
2294 
2295 	return 0;
2296 }
2297 
2298 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2299 {
2300 	const struct bpf_insn *insn = &meta->insn;
2301 
2302 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
2303 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
2304 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
2305 		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
2306 	emit_alu(nfp_prog, reg_none(),
2307 		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
2308 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2309 
2310 	return 0;
2311 }
2312 
2313 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2314 {
2315 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
2316 }
2317 
2318 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2319 {
2320 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
2321 }
2322 
2323 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2324 {
2325 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
2326 }
2327 
2328 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2329 {
2330 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
2331 }
2332 
2333 static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2334 {
2335 	return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
2336 }
2337 
2338 static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2339 {
2340 	return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
2341 }
2342 
2343 static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2344 {
2345 	return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
2346 }
2347 
2348 static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2349 {
2350 	return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
2351 }
2352 
2353 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2354 {
2355 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
2356 }
2357 
2358 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2359 {
2360 	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
2361 }
2362 
2363 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2364 {
2365 	switch (meta->insn.imm) {
2366 	case BPF_FUNC_xdp_adjust_head:
2367 		return adjust_head(nfp_prog, meta);
2368 	case BPF_FUNC_map_lookup_elem:
2369 	case BPF_FUNC_map_update_elem:
2370 	case BPF_FUNC_map_delete_elem:
2371 		return map_call_stack_common(nfp_prog, meta);
2372 	default:
2373 		WARN_ONCE(1, "verifier allowed unsupported function\n");
2374 		return -EOPNOTSUPP;
2375 	}
2376 }
2377 
2378 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2379 {
2380 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
2381 
2382 	return 0;
2383 }
2384 
2385 static const instr_cb_t instr_cb[256] = {
2386 	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
2387 	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
2388 	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
2389 	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
2390 	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
2391 	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
2392 	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
2393 	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
2394 	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
2395 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
2396 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
2397 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
2398 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
2399 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
2400 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
2401 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
2402 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
2403 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
2404 	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
2405 	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
2406 	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
2407 	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
2408 	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
2409 	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
2410 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
2411 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
2412 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
2413 	[BPF_ALU | BPF_NEG] =		neg_reg,
2414 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
2415 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
2416 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
2417 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
2418 	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
2419 	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
2420 	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
2421 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
2422 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
2423 	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
2424 	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
2425 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
2426 	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
2427 	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
2428 	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
2429 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
2430 	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
2431 	[BPF_STX | BPF_XADD | BPF_W] =	mem_xadd4,
2432 	[BPF_STX | BPF_XADD | BPF_DW] =	mem_xadd8,
2433 	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
2434 	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
2435 	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
2436 	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
2437 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
2438 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
2439 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
2440 	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
2441 	[BPF_JMP | BPF_JLT | BPF_K] =	jlt_imm,
2442 	[BPF_JMP | BPF_JLE | BPF_K] =	jle_imm,
2443 	[BPF_JMP | BPF_JSGT | BPF_K] =  jsgt_imm,
2444 	[BPF_JMP | BPF_JSGE | BPF_K] =  jsge_imm,
2445 	[BPF_JMP | BPF_JSLT | BPF_K] =  jslt_imm,
2446 	[BPF_JMP | BPF_JSLE | BPF_K] =  jsle_imm,
2447 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
2448 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
2449 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
2450 	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
2451 	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
2452 	[BPF_JMP | BPF_JLT | BPF_X] =	jlt_reg,
2453 	[BPF_JMP | BPF_JLE | BPF_X] =	jle_reg,
2454 	[BPF_JMP | BPF_JSGT | BPF_X] =  jsgt_reg,
2455 	[BPF_JMP | BPF_JSGE | BPF_X] =  jsge_reg,
2456 	[BPF_JMP | BPF_JSLT | BPF_X] =  jslt_reg,
2457 	[BPF_JMP | BPF_JSLE | BPF_X] =  jsle_reg,
2458 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
2459 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
2460 	[BPF_JMP | BPF_CALL] =		call,
2461 	[BPF_JMP | BPF_EXIT] =		goto_out,
2462 };
2463 
2464 /* --- Assembler logic --- */
2465 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
2466 {
2467 	struct nfp_insn_meta *meta, *jmp_dst;
2468 	u32 idx, br_idx;
2469 
2470 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2471 		if (meta->skip)
2472 			continue;
2473 		if (meta->insn.code == (BPF_JMP | BPF_CALL))
2474 			continue;
2475 		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
2476 			continue;
2477 
2478 		if (list_is_last(&meta->l, &nfp_prog->insns))
2479 			br_idx = nfp_prog->last_bpf_off;
2480 		else
2481 			br_idx = list_next_entry(meta, l)->off - 1;
2482 
2483 		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
2484 			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
2485 			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
2486 			return -ELOOP;
2487 		}
2488 		/* Leave special branches for later */
2489 		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
2490 		    RELO_BR_REL)
2491 			continue;
2492 
2493 		if (!meta->jmp_dst) {
2494 			pr_err("Non-exit jump doesn't have destination info recorded!!\n");
2495 			return -ELOOP;
2496 		}
2497 
2498 		jmp_dst = meta->jmp_dst;
2499 
2500 		if (jmp_dst->skip) {
2501 			pr_err("Branch landing on removed instruction!!\n");
2502 			return -ELOOP;
2503 		}
2504 
2505 		for (idx = meta->off; idx <= br_idx; idx++) {
2506 			if (!nfp_is_br(nfp_prog->prog[idx]))
2507 				continue;
2508 			br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
2509 		}
2510 	}
2511 
2512 	return 0;
2513 }
2514 
2515 static void nfp_intro(struct nfp_prog *nfp_prog)
2516 {
2517 	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
2518 	emit_alu(nfp_prog, plen_reg(nfp_prog),
2519 		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
2520 }
2521 
2522 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
2523 {
2524 	/* TC direct-action mode:
2525 	 *   0,1   ok        NOT SUPPORTED[1]
2526 	 *   2   drop  0x22 -> drop,  count as stat1
2527 	 *   4,5 nuke  0x02 -> drop
2528 	 *   7  redir  0x44 -> redir, count as stat2
2529 	 *   * unspec  0x11 -> pass,  count as stat0
2530 	 *
2531 	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
2532 	 *     the exact decision made.  We are forced to support UNSPEC
2533 	 *     to handle aborts so that's the only one we handle for passing
2534 	 *     packets up the stack.
2535 	 */
2536 	/* Target for aborts */
2537 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2538 
2539 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2540 
2541 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2542 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
2543 
2544 	/* Target for normal exits */
2545 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2546 
2547 	/* if R0 > 7 jump to abort */
2548 	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
2549 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2550 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2551 
2552 	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
2553 	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
2554 
2555 	emit_shf(nfp_prog, reg_a(1),
2556 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
2557 
2558 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2559 	emit_shf(nfp_prog, reg_a(2),
2560 		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2561 
2562 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2563 	emit_shf(nfp_prog, reg_b(2),
2564 		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
2565 
2566 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2567 
2568 	emit_shf(nfp_prog, reg_b(2),
2569 		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
2570 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2571 }
2572 
2573 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
2574 {
2575 	/* XDP return codes:
2576 	 *   0 aborted  0x82 -> drop,  count as stat3
2577 	 *   1    drop  0x22 -> drop,  count as stat1
2578 	 *   2    pass  0x11 -> pass,  count as stat0
2579 	 *   3      tx  0x44 -> redir, count as stat2
2580 	 *   * unknown  0x82 -> drop,  count as stat3
2581 	 */
2582 	/* Target for aborts */
2583 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2584 
2585 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2586 
2587 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2588 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
2589 
2590 	/* Target for normal exits */
2591 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2592 
2593 	/* if R0 > 3 jump to abort */
2594 	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
2595 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2596 
2597 	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
2598 
2599 	emit_shf(nfp_prog, reg_a(1),
2600 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
2601 
2602 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2603 	emit_shf(nfp_prog, reg_b(2),
2604 		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2605 
2606 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2607 
2608 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2609 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2610 }
2611 
2612 static void nfp_outro(struct nfp_prog *nfp_prog)
2613 {
2614 	switch (nfp_prog->type) {
2615 	case BPF_PROG_TYPE_SCHED_CLS:
2616 		nfp_outro_tc_da(nfp_prog);
2617 		break;
2618 	case BPF_PROG_TYPE_XDP:
2619 		nfp_outro_xdp(nfp_prog);
2620 		break;
2621 	default:
2622 		WARN_ON(1);
2623 	}
2624 }
2625 
2626 static int nfp_translate(struct nfp_prog *nfp_prog)
2627 {
2628 	struct nfp_insn_meta *meta;
2629 	int err;
2630 
2631 	nfp_intro(nfp_prog);
2632 	if (nfp_prog->error)
2633 		return nfp_prog->error;
2634 
2635 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2636 		instr_cb_t cb = instr_cb[meta->insn.code];
2637 
2638 		meta->off = nfp_prog_current_offset(nfp_prog);
2639 
2640 		if (meta->skip) {
2641 			nfp_prog->n_translated++;
2642 			continue;
2643 		}
2644 
2645 		if (nfp_meta_has_prev(nfp_prog, meta) &&
2646 		    nfp_meta_prev(meta)->double_cb)
2647 			cb = nfp_meta_prev(meta)->double_cb;
2648 		if (!cb)
2649 			return -ENOENT;
2650 		err = cb(nfp_prog, meta);
2651 		if (err)
2652 			return err;
2653 
2654 		nfp_prog->n_translated++;
2655 	}
2656 
2657 	nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
2658 
2659 	nfp_outro(nfp_prog);
2660 	if (nfp_prog->error)
2661 		return nfp_prog->error;
2662 
2663 	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
2664 	if (nfp_prog->error)
2665 		return nfp_prog->error;
2666 
2667 	return nfp_fixup_branches(nfp_prog);
2668 }
2669 
2670 /* --- Optimizations --- */
2671 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
2672 {
2673 	struct nfp_insn_meta *meta;
2674 
2675 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2676 		struct bpf_insn insn = meta->insn;
2677 
2678 		/* Programs converted from cBPF start with register xoring */
2679 		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
2680 		    insn.src_reg == insn.dst_reg)
2681 			continue;
2682 
2683 		/* Programs start with R6 = R1 but we ignore the skb pointer */
2684 		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
2685 		    insn.src_reg == 1 && insn.dst_reg == 6)
2686 			meta->skip = true;
2687 
2688 		/* Return as soon as something doesn't match */
2689 		if (!meta->skip)
2690 			return;
2691 	}
2692 }
2693 
2694 /* Remove masking after load since our load guarantees this is not needed */
2695 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
2696 {
2697 	struct nfp_insn_meta *meta1, *meta2;
2698 	const s32 exp_mask[] = {
2699 		[BPF_B] = 0x000000ffU,
2700 		[BPF_H] = 0x0000ffffU,
2701 		[BPF_W] = 0xffffffffU,
2702 	};
2703 
2704 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2705 		struct bpf_insn insn, next;
2706 
2707 		insn = meta1->insn;
2708 		next = meta2->insn;
2709 
2710 		if (BPF_CLASS(insn.code) != BPF_LD)
2711 			continue;
2712 		if (BPF_MODE(insn.code) != BPF_ABS &&
2713 		    BPF_MODE(insn.code) != BPF_IND)
2714 			continue;
2715 
2716 		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
2717 			continue;
2718 
2719 		if (!exp_mask[BPF_SIZE(insn.code)])
2720 			continue;
2721 		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
2722 			continue;
2723 
2724 		if (next.src_reg || next.dst_reg)
2725 			continue;
2726 
2727 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
2728 			continue;
2729 
2730 		meta2->skip = true;
2731 	}
2732 }
2733 
2734 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2735 {
2736 	struct nfp_insn_meta *meta1, *meta2, *meta3;
2737 
2738 	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
2739 		struct bpf_insn insn, next1, next2;
2740 
2741 		insn = meta1->insn;
2742 		next1 = meta2->insn;
2743 		next2 = meta3->insn;
2744 
2745 		if (BPF_CLASS(insn.code) != BPF_LD)
2746 			continue;
2747 		if (BPF_MODE(insn.code) != BPF_ABS &&
2748 		    BPF_MODE(insn.code) != BPF_IND)
2749 			continue;
2750 		if (BPF_SIZE(insn.code) != BPF_W)
2751 			continue;
2752 
2753 		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
2754 		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
2755 		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
2756 		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
2757 			continue;
2758 
2759 		if (next1.src_reg || next1.dst_reg ||
2760 		    next2.src_reg || next2.dst_reg)
2761 			continue;
2762 
2763 		if (next1.imm != 0x20 || next2.imm != 0x20)
2764 			continue;
2765 
2766 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
2767 		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
2768 			continue;
2769 
2770 		meta2->skip = true;
2771 		meta3->skip = true;
2772 	}
2773 }
2774 
2775 /* load/store pair that forms memory copy sould look like the following:
2776  *
2777  *   ld_width R, [addr_src + offset_src]
2778  *   st_width [addr_dest + offset_dest], R
2779  *
2780  * The destination register of load and source register of store should
2781  * be the same, load and store should also perform at the same width.
2782  * If either of addr_src or addr_dest is stack pointer, we don't do the
2783  * CPP optimization as stack is modelled by registers on NFP.
2784  */
2785 static bool
2786 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
2787 		    struct nfp_insn_meta *st_meta)
2788 {
2789 	struct bpf_insn *ld = &ld_meta->insn;
2790 	struct bpf_insn *st = &st_meta->insn;
2791 
2792 	if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
2793 		return false;
2794 
2795 	if (ld_meta->ptr.type != PTR_TO_PACKET)
2796 		return false;
2797 
2798 	if (st_meta->ptr.type != PTR_TO_PACKET)
2799 		return false;
2800 
2801 	if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
2802 		return false;
2803 
2804 	if (ld->dst_reg != st->src_reg)
2805 		return false;
2806 
2807 	/* There is jump to the store insn in this pair. */
2808 	if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
2809 		return false;
2810 
2811 	return true;
2812 }
2813 
2814 /* Currently, we only support chaining load/store pairs if:
2815  *
2816  *  - Their address base registers are the same.
2817  *  - Their address offsets are in the same order.
2818  *  - They operate at the same memory width.
2819  *  - There is no jump into the middle of them.
2820  */
2821 static bool
2822 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
2823 			      struct nfp_insn_meta *st_meta,
2824 			      struct bpf_insn *prev_ld,
2825 			      struct bpf_insn *prev_st)
2826 {
2827 	u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
2828 	struct bpf_insn *ld = &ld_meta->insn;
2829 	struct bpf_insn *st = &st_meta->insn;
2830 	s16 prev_ld_off, prev_st_off;
2831 
2832 	/* This pair is the start pair. */
2833 	if (!prev_ld)
2834 		return true;
2835 
2836 	prev_size = BPF_LDST_BYTES(prev_ld);
2837 	curr_size = BPF_LDST_BYTES(ld);
2838 	prev_ld_base = prev_ld->src_reg;
2839 	prev_st_base = prev_st->dst_reg;
2840 	prev_ld_dst = prev_ld->dst_reg;
2841 	prev_ld_off = prev_ld->off;
2842 	prev_st_off = prev_st->off;
2843 
2844 	if (ld->dst_reg != prev_ld_dst)
2845 		return false;
2846 
2847 	if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
2848 		return false;
2849 
2850 	if (curr_size != prev_size)
2851 		return false;
2852 
2853 	/* There is jump to the head of this pair. */
2854 	if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
2855 		return false;
2856 
2857 	/* Both in ascending order. */
2858 	if (prev_ld_off + prev_size == ld->off &&
2859 	    prev_st_off + prev_size == st->off)
2860 		return true;
2861 
2862 	/* Both in descending order. */
2863 	if (ld->off + curr_size == prev_ld_off &&
2864 	    st->off + curr_size == prev_st_off)
2865 		return true;
2866 
2867 	return false;
2868 }
2869 
2870 /* Return TRUE if cross memory access happens. Cross memory access means
2871  * store area is overlapping with load area that a later load might load
2872  * the value from previous store, for this case we can't treat the sequence
2873  * as an memory copy.
2874  */
2875 static bool
2876 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
2877 		 struct nfp_insn_meta *head_st_meta)
2878 {
2879 	s16 head_ld_off, head_st_off, ld_off;
2880 
2881 	/* Different pointer types does not overlap. */
2882 	if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
2883 		return false;
2884 
2885 	/* load and store are both PTR_TO_PACKET, check ID info.  */
2886 	if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
2887 		return true;
2888 
2889 	/* Canonicalize the offsets. Turn all of them against the original
2890 	 * base register.
2891 	 */
2892 	head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
2893 	head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
2894 	ld_off = ld->off + head_ld_meta->ptr.off;
2895 
2896 	/* Ascending order cross. */
2897 	if (ld_off > head_ld_off &&
2898 	    head_ld_off < head_st_off && ld_off >= head_st_off)
2899 		return true;
2900 
2901 	/* Descending order cross. */
2902 	if (ld_off < head_ld_off &&
2903 	    head_ld_off > head_st_off && ld_off <= head_st_off)
2904 		return true;
2905 
2906 	return false;
2907 }
2908 
2909 /* This pass try to identify the following instructoin sequences.
2910  *
2911  *   load R, [regA + offA]
2912  *   store [regB + offB], R
2913  *   load R, [regA + offA + const_imm_A]
2914  *   store [regB + offB + const_imm_A], R
2915  *   load R, [regA + offA + 2 * const_imm_A]
2916  *   store [regB + offB + 2 * const_imm_A], R
2917  *   ...
2918  *
2919  * Above sequence is typically generated by compiler when lowering
2920  * memcpy. NFP prefer using CPP instructions to accelerate it.
2921  */
2922 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
2923 {
2924 	struct nfp_insn_meta *head_ld_meta = NULL;
2925 	struct nfp_insn_meta *head_st_meta = NULL;
2926 	struct nfp_insn_meta *meta1, *meta2;
2927 	struct bpf_insn *prev_ld = NULL;
2928 	struct bpf_insn *prev_st = NULL;
2929 	u8 count = 0;
2930 
2931 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2932 		struct bpf_insn *ld = &meta1->insn;
2933 		struct bpf_insn *st = &meta2->insn;
2934 
2935 		/* Reset record status if any of the following if true:
2936 		 *   - The current insn pair is not load/store.
2937 		 *   - The load/store pair doesn't chain with previous one.
2938 		 *   - The chained load/store pair crossed with previous pair.
2939 		 *   - The chained load/store pair has a total size of memory
2940 		 *     copy beyond 128 bytes which is the maximum length a
2941 		 *     single NFP CPP command can transfer.
2942 		 */
2943 		if (!curr_pair_is_memcpy(meta1, meta2) ||
2944 		    !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
2945 						   prev_st) ||
2946 		    (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
2947 						       head_st_meta) ||
2948 				      head_ld_meta->ldst_gather_len >= 128))) {
2949 			if (!count)
2950 				continue;
2951 
2952 			if (count > 1) {
2953 				s16 prev_ld_off = prev_ld->off;
2954 				s16 prev_st_off = prev_st->off;
2955 				s16 head_ld_off = head_ld_meta->insn.off;
2956 
2957 				if (prev_ld_off < head_ld_off) {
2958 					head_ld_meta->insn.off = prev_ld_off;
2959 					head_st_meta->insn.off = prev_st_off;
2960 					head_ld_meta->ldst_gather_len =
2961 						-head_ld_meta->ldst_gather_len;
2962 				}
2963 
2964 				head_ld_meta->paired_st = &head_st_meta->insn;
2965 				head_st_meta->skip = true;
2966 			} else {
2967 				head_ld_meta->ldst_gather_len = 0;
2968 			}
2969 
2970 			/* If the chain is ended by an load/store pair then this
2971 			 * could serve as the new head of the the next chain.
2972 			 */
2973 			if (curr_pair_is_memcpy(meta1, meta2)) {
2974 				head_ld_meta = meta1;
2975 				head_st_meta = meta2;
2976 				head_ld_meta->ldst_gather_len =
2977 					BPF_LDST_BYTES(ld);
2978 				meta1 = nfp_meta_next(meta1);
2979 				meta2 = nfp_meta_next(meta2);
2980 				prev_ld = ld;
2981 				prev_st = st;
2982 				count = 1;
2983 			} else {
2984 				head_ld_meta = NULL;
2985 				head_st_meta = NULL;
2986 				prev_ld = NULL;
2987 				prev_st = NULL;
2988 				count = 0;
2989 			}
2990 
2991 			continue;
2992 		}
2993 
2994 		if (!head_ld_meta) {
2995 			head_ld_meta = meta1;
2996 			head_st_meta = meta2;
2997 		} else {
2998 			meta1->skip = true;
2999 			meta2->skip = true;
3000 		}
3001 
3002 		head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
3003 		meta1 = nfp_meta_next(meta1);
3004 		meta2 = nfp_meta_next(meta2);
3005 		prev_ld = ld;
3006 		prev_st = st;
3007 		count++;
3008 	}
3009 }
3010 
3011 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
3012 {
3013 	struct nfp_insn_meta *meta, *range_node = NULL;
3014 	s16 range_start = 0, range_end = 0;
3015 	bool cache_avail = false;
3016 	struct bpf_insn *insn;
3017 	s32 range_ptr_off = 0;
3018 	u32 range_ptr_id = 0;
3019 
3020 	list_for_each_entry(meta, &nfp_prog->insns, l) {
3021 		if (meta->flags & FLAG_INSN_IS_JUMP_DST)
3022 			cache_avail = false;
3023 
3024 		if (meta->skip)
3025 			continue;
3026 
3027 		insn = &meta->insn;
3028 
3029 		if (is_mbpf_store_pkt(meta) ||
3030 		    insn->code == (BPF_JMP | BPF_CALL) ||
3031 		    is_mbpf_classic_store_pkt(meta) ||
3032 		    is_mbpf_classic_load(meta)) {
3033 			cache_avail = false;
3034 			continue;
3035 		}
3036 
3037 		if (!is_mbpf_load(meta))
3038 			continue;
3039 
3040 		if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
3041 			cache_avail = false;
3042 			continue;
3043 		}
3044 
3045 		if (!cache_avail) {
3046 			cache_avail = true;
3047 			if (range_node)
3048 				goto end_current_then_start_new;
3049 			goto start_new;
3050 		}
3051 
3052 		/* Check ID to make sure two reads share the same
3053 		 * variable offset against PTR_TO_PACKET, and check OFF
3054 		 * to make sure they also share the same constant
3055 		 * offset.
3056 		 *
3057 		 * OFFs don't really need to be the same, because they
3058 		 * are the constant offsets against PTR_TO_PACKET, so
3059 		 * for different OFFs, we could canonicalize them to
3060 		 * offsets against original packet pointer. We don't
3061 		 * support this.
3062 		 */
3063 		if (meta->ptr.id == range_ptr_id &&
3064 		    meta->ptr.off == range_ptr_off) {
3065 			s16 new_start = range_start;
3066 			s16 end, off = insn->off;
3067 			s16 new_end = range_end;
3068 			bool changed = false;
3069 
3070 			if (off < range_start) {
3071 				new_start = off;
3072 				changed = true;
3073 			}
3074 
3075 			end = off + BPF_LDST_BYTES(insn);
3076 			if (end > range_end) {
3077 				new_end = end;
3078 				changed = true;
3079 			}
3080 
3081 			if (!changed)
3082 				continue;
3083 
3084 			if (new_end - new_start <= 64) {
3085 				/* Install new range. */
3086 				range_start = new_start;
3087 				range_end = new_end;
3088 				continue;
3089 			}
3090 		}
3091 
3092 end_current_then_start_new:
3093 		range_node->pkt_cache.range_start = range_start;
3094 		range_node->pkt_cache.range_end = range_end;
3095 start_new:
3096 		range_node = meta;
3097 		range_node->pkt_cache.do_init = true;
3098 		range_ptr_id = range_node->ptr.id;
3099 		range_ptr_off = range_node->ptr.off;
3100 		range_start = insn->off;
3101 		range_end = insn->off + BPF_LDST_BYTES(insn);
3102 	}
3103 
3104 	if (range_node) {
3105 		range_node->pkt_cache.range_start = range_start;
3106 		range_node->pkt_cache.range_end = range_end;
3107 	}
3108 
3109 	list_for_each_entry(meta, &nfp_prog->insns, l) {
3110 		if (meta->skip)
3111 			continue;
3112 
3113 		if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
3114 			if (meta->pkt_cache.do_init) {
3115 				range_start = meta->pkt_cache.range_start;
3116 				range_end = meta->pkt_cache.range_end;
3117 			} else {
3118 				meta->pkt_cache.range_start = range_start;
3119 				meta->pkt_cache.range_end = range_end;
3120 			}
3121 		}
3122 	}
3123 }
3124 
3125 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
3126 {
3127 	nfp_bpf_opt_reg_init(nfp_prog);
3128 
3129 	nfp_bpf_opt_ld_mask(nfp_prog);
3130 	nfp_bpf_opt_ld_shift(nfp_prog);
3131 	nfp_bpf_opt_ldst_gather(nfp_prog);
3132 	nfp_bpf_opt_pkt_cache(nfp_prog);
3133 
3134 	return 0;
3135 }
3136 
3137 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
3138 {
3139 	__le64 *ustore = (__force __le64 *)prog;
3140 	int i;
3141 
3142 	for (i = 0; i < len; i++) {
3143 		int err;
3144 
3145 		err = nfp_ustore_check_valid_no_ecc(prog[i]);
3146 		if (err)
3147 			return err;
3148 
3149 		ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
3150 	}
3151 
3152 	return 0;
3153 }
3154 
3155 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
3156 {
3157 	void *prog;
3158 
3159 	prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
3160 	if (!prog)
3161 		return;
3162 
3163 	nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
3164 	memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
3165 	kvfree(nfp_prog->prog);
3166 	nfp_prog->prog = prog;
3167 }
3168 
3169 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
3170 {
3171 	int ret;
3172 
3173 	ret = nfp_bpf_optimize(nfp_prog);
3174 	if (ret)
3175 		return ret;
3176 
3177 	ret = nfp_translate(nfp_prog);
3178 	if (ret) {
3179 		pr_err("Translation failed with error %d (translated: %u)\n",
3180 		       ret, nfp_prog->n_translated);
3181 		return -EINVAL;
3182 	}
3183 
3184 	nfp_bpf_prog_trim(nfp_prog);
3185 
3186 	return ret;
3187 }
3188 
3189 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
3190 {
3191 	struct nfp_insn_meta *meta;
3192 
3193 	/* Another pass to record jump information. */
3194 	list_for_each_entry(meta, &nfp_prog->insns, l) {
3195 		u64 code = meta->insn.code;
3196 
3197 		if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
3198 		    BPF_OP(code) != BPF_CALL) {
3199 			struct nfp_insn_meta *dst_meta;
3200 			unsigned short dst_indx;
3201 
3202 			dst_indx = meta->n + 1 + meta->insn.off;
3203 			dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
3204 						     cnt);
3205 
3206 			meta->jmp_dst = dst_meta;
3207 			dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
3208 		}
3209 	}
3210 }
3211 
3212 bool nfp_bpf_supported_opcode(u8 code)
3213 {
3214 	return !!instr_cb[code];
3215 }
3216 
3217 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
3218 {
3219 	unsigned int i;
3220 	u64 *prog;
3221 	int err;
3222 
3223 	prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
3224 		       GFP_KERNEL);
3225 	if (!prog)
3226 		return ERR_PTR(-ENOMEM);
3227 
3228 	for (i = 0; i < nfp_prog->prog_len; i++) {
3229 		enum nfp_relo_type special;
3230 		u32 val;
3231 
3232 		special = FIELD_GET(OP_RELO_TYPE, prog[i]);
3233 		switch (special) {
3234 		case RELO_NONE:
3235 			continue;
3236 		case RELO_BR_REL:
3237 			br_add_offset(&prog[i], bv->start_off);
3238 			break;
3239 		case RELO_BR_GO_OUT:
3240 			br_set_offset(&prog[i],
3241 				      nfp_prog->tgt_out + bv->start_off);
3242 			break;
3243 		case RELO_BR_GO_ABORT:
3244 			br_set_offset(&prog[i],
3245 				      nfp_prog->tgt_abort + bv->start_off);
3246 			break;
3247 		case RELO_BR_NEXT_PKT:
3248 			br_set_offset(&prog[i], bv->tgt_done);
3249 			break;
3250 		case RELO_BR_HELPER:
3251 			val = br_get_offset(prog[i]);
3252 			val -= BR_OFF_RELO;
3253 			switch (val) {
3254 			case BPF_FUNC_map_lookup_elem:
3255 				val = nfp_prog->bpf->helpers.map_lookup;
3256 				break;
3257 			case BPF_FUNC_map_update_elem:
3258 				val = nfp_prog->bpf->helpers.map_update;
3259 				break;
3260 			case BPF_FUNC_map_delete_elem:
3261 				val = nfp_prog->bpf->helpers.map_delete;
3262 				break;
3263 			default:
3264 				pr_err("relocation of unknown helper %d\n",
3265 				       val);
3266 				err = -EINVAL;
3267 				goto err_free_prog;
3268 			}
3269 			br_set_offset(&prog[i], val);
3270 			break;
3271 		case RELO_IMMED_REL:
3272 			immed_add_value(&prog[i], bv->start_off);
3273 			break;
3274 		}
3275 
3276 		prog[i] &= ~OP_RELO_TYPE;
3277 	}
3278 
3279 	err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
3280 	if (err)
3281 		goto err_free_prog;
3282 
3283 	return prog;
3284 
3285 err_free_prog:
3286 	kfree(prog);
3287 	return ERR_PTR(err);
3288 }
3289