1 /*
2  * Copyright (C) 2016-2017 Netronome Systems, Inc.
3  *
4  * This software is dual licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree or the BSD 2-Clause License provided below.  You have the
7  * option to license this software under the complete terms of either license.
8  *
9  * The BSD 2-Clause License:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      1. Redistributions of source code must retain the above
16  *         copyright notice, this list of conditions and the following
17  *         disclaimer.
18  *
19  *      2. Redistributions in binary form must reproduce the above
20  *         copyright notice, this list of conditions and the following
21  *         disclaimer in the documentation and/or other materials
22  *         provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt)	"NFP net bpf: " fmt
35 
36 #include <linux/kernel.h>
37 #include <linux/bpf.h>
38 #include <linux/filter.h>
39 #include <linux/pkt_cls.h>
40 #include <linux/unistd.h>
41 
42 #include "main.h"
43 #include "../nfp_asm.h"
44 
45 /* --- NFP prog --- */
46 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
47  * It's safe to modify the next pointers (but not pos).
48  */
49 #define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
50 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
51 	     next = list_next_entry(pos, l);			\
52 	     &(nfp_prog)->insns != &pos->l &&			\
53 	     &(nfp_prog)->insns != &next->l;			\
54 	     pos = nfp_meta_next(pos),				\
55 	     next = nfp_meta_next(pos))
56 
57 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
58 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
59 	     next = list_next_entry(pos, l),			\
60 	     next2 = list_next_entry(next, l);			\
61 	     &(nfp_prog)->insns != &pos->l &&			\
62 	     &(nfp_prog)->insns != &next->l &&			\
63 	     &(nfp_prog)->insns != &next2->l;			\
64 	     pos = nfp_meta_next(pos),				\
65 	     next = nfp_meta_next(pos),				\
66 	     next2 = nfp_meta_next(next))
67 
68 static bool
69 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
70 {
71 	return meta->l.prev != &nfp_prog->insns;
72 }
73 
74 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
75 {
76 	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
77 		nfp_prog->error = -ENOSPC;
78 		return;
79 	}
80 
81 	nfp_prog->prog[nfp_prog->prog_len] = insn;
82 	nfp_prog->prog_len++;
83 }
84 
85 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
86 {
87 	return nfp_prog->start_off + nfp_prog->prog_len;
88 }
89 
90 static unsigned int
91 nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
92 {
93 	return offset - nfp_prog->start_off;
94 }
95 
96 /* --- Emitters --- */
97 static void
98 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
99 	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
100 {
101 	enum cmd_ctx_swap ctx;
102 	u64 insn;
103 
104 	if (sync)
105 		ctx = CMD_CTX_SWAP;
106 	else
107 		ctx = CMD_CTX_NO_SWAP;
108 
109 	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
110 		FIELD_PREP(OP_CMD_CTX, ctx) |
111 		FIELD_PREP(OP_CMD_B_SRC, breg) |
112 		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
113 		FIELD_PREP(OP_CMD_XFER, xfer) |
114 		FIELD_PREP(OP_CMD_CNT, size) |
115 		FIELD_PREP(OP_CMD_SIG, sync) |
116 		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
117 		FIELD_PREP(OP_CMD_MODE, mode);
118 
119 	nfp_prog_push(nfp_prog, insn);
120 }
121 
122 static void
123 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
124 	 u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync)
125 {
126 	struct nfp_insn_re_regs reg;
127 	int err;
128 
129 	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
130 	if (err) {
131 		nfp_prog->error = err;
132 		return;
133 	}
134 	if (reg.swap) {
135 		pr_err("cmd can't swap arguments\n");
136 		nfp_prog->error = -EFAULT;
137 		return;
138 	}
139 	if (reg.dst_lmextn || reg.src_lmextn) {
140 		pr_err("cmd can't use LMextn\n");
141 		nfp_prog->error = -EFAULT;
142 		return;
143 	}
144 
145 	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
146 }
147 
148 static void
149 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
150 	  enum br_ctx_signal_state css, u16 addr, u8 defer)
151 {
152 	u16 addr_lo, addr_hi;
153 	u64 insn;
154 
155 	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
156 	addr_hi = addr != addr_lo;
157 
158 	insn = OP_BR_BASE |
159 		FIELD_PREP(OP_BR_MASK, mask) |
160 		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
161 		FIELD_PREP(OP_BR_CSS, css) |
162 		FIELD_PREP(OP_BR_DEFBR, defer) |
163 		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
164 		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
165 
166 	nfp_prog_push(nfp_prog, insn);
167 }
168 
169 static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer)
170 {
171 	if (defer > 2) {
172 		pr_err("BUG: branch defer out of bounds %d\n", defer);
173 		nfp_prog->error = -EFAULT;
174 		return;
175 	}
176 	__emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer);
177 }
178 
179 static void
180 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
181 {
182 	__emit_br(nfp_prog, mask,
183 		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
184 		  BR_CSS_NONE, addr, defer);
185 }
186 
187 static void
188 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
189 	     enum immed_width width, bool invert,
190 	     enum immed_shift shift, bool wr_both,
191 	     bool dst_lmextn, bool src_lmextn)
192 {
193 	u64 insn;
194 
195 	insn = OP_IMMED_BASE |
196 		FIELD_PREP(OP_IMMED_A_SRC, areg) |
197 		FIELD_PREP(OP_IMMED_B_SRC, breg) |
198 		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
199 		FIELD_PREP(OP_IMMED_WIDTH, width) |
200 		FIELD_PREP(OP_IMMED_INV, invert) |
201 		FIELD_PREP(OP_IMMED_SHIFT, shift) |
202 		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
203 		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
204 		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
205 
206 	nfp_prog_push(nfp_prog, insn);
207 }
208 
209 static void
210 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
211 	   enum immed_width width, bool invert, enum immed_shift shift)
212 {
213 	struct nfp_insn_ur_regs reg;
214 	int err;
215 
216 	if (swreg_type(dst) == NN_REG_IMM) {
217 		nfp_prog->error = -EFAULT;
218 		return;
219 	}
220 
221 	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
222 	if (err) {
223 		nfp_prog->error = err;
224 		return;
225 	}
226 
227 	/* Use reg.dst when destination is No-Dest. */
228 	__emit_immed(nfp_prog,
229 		     swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
230 		     reg.breg, imm >> 8, width, invert, shift,
231 		     reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
232 }
233 
234 static void
235 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
236 	   enum shf_sc sc, u8 shift,
237 	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
238 	   bool dst_lmextn, bool src_lmextn)
239 {
240 	u64 insn;
241 
242 	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
243 		nfp_prog->error = -EFAULT;
244 		return;
245 	}
246 
247 	if (sc == SHF_SC_L_SHF)
248 		shift = 32 - shift;
249 
250 	insn = OP_SHF_BASE |
251 		FIELD_PREP(OP_SHF_A_SRC, areg) |
252 		FIELD_PREP(OP_SHF_SC, sc) |
253 		FIELD_PREP(OP_SHF_B_SRC, breg) |
254 		FIELD_PREP(OP_SHF_I8, i8) |
255 		FIELD_PREP(OP_SHF_SW, sw) |
256 		FIELD_PREP(OP_SHF_DST, dst) |
257 		FIELD_PREP(OP_SHF_SHIFT, shift) |
258 		FIELD_PREP(OP_SHF_OP, op) |
259 		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
260 		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
261 		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
262 		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
263 
264 	nfp_prog_push(nfp_prog, insn);
265 }
266 
267 static void
268 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
269 	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
270 {
271 	struct nfp_insn_re_regs reg;
272 	int err;
273 
274 	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
275 	if (err) {
276 		nfp_prog->error = err;
277 		return;
278 	}
279 
280 	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
281 		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
282 		   reg.dst_lmextn, reg.src_lmextn);
283 }
284 
285 static void
286 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
287 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
288 	   bool dst_lmextn, bool src_lmextn)
289 {
290 	u64 insn;
291 
292 	insn = OP_ALU_BASE |
293 		FIELD_PREP(OP_ALU_A_SRC, areg) |
294 		FIELD_PREP(OP_ALU_B_SRC, breg) |
295 		FIELD_PREP(OP_ALU_DST, dst) |
296 		FIELD_PREP(OP_ALU_SW, swap) |
297 		FIELD_PREP(OP_ALU_OP, op) |
298 		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
299 		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
300 		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
301 		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
302 
303 	nfp_prog_push(nfp_prog, insn);
304 }
305 
306 static void
307 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
308 	 swreg lreg, enum alu_op op, swreg rreg)
309 {
310 	struct nfp_insn_ur_regs reg;
311 	int err;
312 
313 	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
314 	if (err) {
315 		nfp_prog->error = err;
316 		return;
317 	}
318 
319 	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
320 		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
321 		   reg.dst_lmextn, reg.src_lmextn);
322 }
323 
324 static void
325 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
326 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
327 		bool zero, bool swap, bool wr_both,
328 		bool dst_lmextn, bool src_lmextn)
329 {
330 	u64 insn;
331 
332 	insn = OP_LDF_BASE |
333 		FIELD_PREP(OP_LDF_A_SRC, areg) |
334 		FIELD_PREP(OP_LDF_SC, sc) |
335 		FIELD_PREP(OP_LDF_B_SRC, breg) |
336 		FIELD_PREP(OP_LDF_I8, imm8) |
337 		FIELD_PREP(OP_LDF_SW, swap) |
338 		FIELD_PREP(OP_LDF_ZF, zero) |
339 		FIELD_PREP(OP_LDF_BMASK, bmask) |
340 		FIELD_PREP(OP_LDF_SHF, shift) |
341 		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
342 		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
343 		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
344 
345 	nfp_prog_push(nfp_prog, insn);
346 }
347 
348 static void
349 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
350 		  enum shf_sc sc, u8 shift, bool zero)
351 {
352 	struct nfp_insn_re_regs reg;
353 	int err;
354 
355 	/* Note: ld_field is special as it uses one of the src regs as dst */
356 	err = swreg_to_restricted(dst, dst, src, &reg, true);
357 	if (err) {
358 		nfp_prog->error = err;
359 		return;
360 	}
361 
362 	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
363 			reg.i8, zero, reg.swap, reg.wr_both,
364 			reg.dst_lmextn, reg.src_lmextn);
365 }
366 
367 static void
368 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
369 	      enum shf_sc sc, u8 shift)
370 {
371 	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
372 }
373 
374 static void
375 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
376 	    bool dst_lmextn, bool src_lmextn)
377 {
378 	u64 insn;
379 
380 	insn = OP_LCSR_BASE |
381 		FIELD_PREP(OP_LCSR_A_SRC, areg) |
382 		FIELD_PREP(OP_LCSR_B_SRC, breg) |
383 		FIELD_PREP(OP_LCSR_WRITE, wr) |
384 		FIELD_PREP(OP_LCSR_ADDR, addr) |
385 		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
386 		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
387 
388 	nfp_prog_push(nfp_prog, insn);
389 }
390 
391 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
392 {
393 	struct nfp_insn_ur_regs reg;
394 	int err;
395 
396 	/* This instruction takes immeds instead of reg_none() for the ignored
397 	 * operand, but we can't encode 2 immeds in one instr with our normal
398 	 * swreg infra so if param is an immed, we encode as reg_none() and
399 	 * copy the immed to both operands.
400 	 */
401 	if (swreg_type(src) == NN_REG_IMM) {
402 		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
403 		reg.breg = reg.areg;
404 	} else {
405 		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
406 	}
407 	if (err) {
408 		nfp_prog->error = err;
409 		return;
410 	}
411 
412 	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
413 		    false, reg.src_lmextn);
414 }
415 
416 static void emit_nop(struct nfp_prog *nfp_prog)
417 {
418 	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
419 }
420 
421 /* --- Wrappers --- */
422 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
423 {
424 	if (!(imm & 0xffff0000)) {
425 		*val = imm;
426 		*shift = IMMED_SHIFT_0B;
427 	} else if (!(imm & 0xff0000ff)) {
428 		*val = imm >> 8;
429 		*shift = IMMED_SHIFT_1B;
430 	} else if (!(imm & 0x0000ffff)) {
431 		*val = imm >> 16;
432 		*shift = IMMED_SHIFT_2B;
433 	} else {
434 		return false;
435 	}
436 
437 	return true;
438 }
439 
440 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
441 {
442 	enum immed_shift shift;
443 	u16 val;
444 
445 	if (pack_immed(imm, &val, &shift)) {
446 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
447 	} else if (pack_immed(~imm, &val, &shift)) {
448 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
449 	} else {
450 		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
451 			   false, IMMED_SHIFT_0B);
452 		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
453 			   false, IMMED_SHIFT_2B);
454 	}
455 }
456 
457 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
458  * If the @imm is small enough encode it directly in operand and return
459  * otherwise load @imm to a spare register and return its encoding.
460  */
461 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
462 {
463 	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
464 		return reg_imm(imm);
465 
466 	wrp_immed(nfp_prog, tmp_reg, imm);
467 	return tmp_reg;
468 }
469 
470 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
471  * If the @imm is small enough encode it directly in operand and return
472  * otherwise load @imm to a spare register and return its encoding.
473  */
474 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
475 {
476 	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
477 		return reg_imm(imm);
478 
479 	wrp_immed(nfp_prog, tmp_reg, imm);
480 	return tmp_reg;
481 }
482 
483 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
484 {
485 	while (count--)
486 		emit_nop(nfp_prog);
487 }
488 
489 static void
490 wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
491 	       enum br_special special)
492 {
493 	emit_br(nfp_prog, mask, 0, 0);
494 
495 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
496 		FIELD_PREP(OP_BR_SPECIAL, special);
497 }
498 
499 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
500 {
501 	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
502 }
503 
504 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
505 {
506 	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
507 }
508 
509 static int
510 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
511 {
512 	unsigned int i;
513 	u16 shift, sz;
514 
515 	/* We load the value from the address indicated in @offset and then
516 	 * shift out the data we don't need.  Note: this is big endian!
517 	 */
518 	sz = max(size, 4);
519 	shift = size < 4 ? 4 - size : 0;
520 
521 	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
522 		 pptr_reg(nfp_prog), offset, sz - 1, true);
523 
524 	i = 0;
525 	if (shift)
526 		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
527 			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
528 	else
529 		for (; i * 4 < size; i++)
530 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
531 
532 	if (i < 2)
533 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
534 
535 	return 0;
536 }
537 
538 static int
539 data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
540 		   u8 dst_gpr, int size)
541 {
542 	unsigned int i;
543 	u8 mask, sz;
544 
545 	/* We load the value from the address indicated in @offset and then
546 	 * mask out the data we don't need.  Note: this is little endian!
547 	 */
548 	sz = max(size, 4);
549 	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
550 
551 	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0,
552 		 reg_a(src_gpr), offset, sz / 4 - 1, true);
553 
554 	i = 0;
555 	if (mask)
556 		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
557 				  reg_xfer(0), SHF_SC_NONE, 0, true);
558 	else
559 		for (; i * 4 < size; i++)
560 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
561 
562 	if (i < 2)
563 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
564 
565 	return 0;
566 }
567 
568 static int
569 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
570 {
571 	swreg tmp_reg;
572 
573 	/* Calculate the true offset (src_reg + imm) */
574 	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
575 	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
576 
577 	/* Check packet length (size guaranteed to fit b/c it's u8) */
578 	emit_alu(nfp_prog, imm_a(nfp_prog),
579 		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
580 	emit_alu(nfp_prog, reg_none(),
581 		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
582 	wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
583 
584 	/* Load data */
585 	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
586 }
587 
588 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
589 {
590 	swreg tmp_reg;
591 
592 	/* Check packet length */
593 	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
594 	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
595 	wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
596 
597 	/* Load data */
598 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
599 	return data_ld(nfp_prog, tmp_reg, 0, size);
600 }
601 
602 static int
603 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
604 		    u8 src_gpr, u8 size)
605 {
606 	unsigned int i;
607 
608 	for (i = 0; i * 4 < size; i++)
609 		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
610 
611 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
612 		 reg_a(dst_gpr), offset, size - 1, true);
613 
614 	return 0;
615 }
616 
617 static int
618 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
619 		   u64 imm, u8 size)
620 {
621 	wrp_immed(nfp_prog, reg_xfer(0), imm);
622 	if (size == 8)
623 		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
624 
625 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
626 		 reg_a(dst_gpr), offset, size - 1, true);
627 
628 	return 0;
629 }
630 
631 typedef int
632 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
633 	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
634 	     bool needs_inc);
635 
636 static int
637 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
638 	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
639 	      bool needs_inc)
640 {
641 	bool should_inc = needs_inc && new_gpr && !last;
642 	u32 idx, src_byte;
643 	enum shf_sc sc;
644 	swreg reg;
645 	int shf;
646 	u8 mask;
647 
648 	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
649 		return -EOPNOTSUPP;
650 
651 	idx = off / 4;
652 
653 	/* Move the entire word */
654 	if (size == 4) {
655 		wrp_mov(nfp_prog, reg_both(dst),
656 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
657 		return 0;
658 	}
659 
660 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
661 		return -EOPNOTSUPP;
662 
663 	src_byte = off % 4;
664 
665 	mask = (1 << size) - 1;
666 	mask <<= dst_byte;
667 
668 	if (WARN_ON_ONCE(mask > 0xf))
669 		return -EOPNOTSUPP;
670 
671 	shf = abs(src_byte - dst_byte) * 8;
672 	if (src_byte == dst_byte) {
673 		sc = SHF_SC_NONE;
674 	} else if (src_byte < dst_byte) {
675 		shf = 32 - shf;
676 		sc = SHF_SC_L_SHF;
677 	} else {
678 		sc = SHF_SC_R_SHF;
679 	}
680 
681 	/* ld_field can address fewer indexes, if offset too large do RMW.
682 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
683 	 */
684 	if (idx <= RE_REG_LM_IDX_MAX) {
685 		reg = reg_lm(lm3 ? 3 : 0, idx);
686 	} else {
687 		reg = imm_a(nfp_prog);
688 		/* If it's not the first part of the load and we start a new GPR
689 		 * that means we are loading a second part of the LMEM word into
690 		 * a new GPR.  IOW we've already looked that LMEM word and
691 		 * therefore it has been loaded into imm_a().
692 		 */
693 		if (first || !new_gpr)
694 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
695 	}
696 
697 	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
698 
699 	if (should_inc)
700 		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
701 
702 	return 0;
703 }
704 
705 static int
706 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
707 	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
708 	       bool needs_inc)
709 {
710 	bool should_inc = needs_inc && new_gpr && !last;
711 	u32 idx, dst_byte;
712 	enum shf_sc sc;
713 	swreg reg;
714 	int shf;
715 	u8 mask;
716 
717 	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
718 		return -EOPNOTSUPP;
719 
720 	idx = off / 4;
721 
722 	/* Move the entire word */
723 	if (size == 4) {
724 		wrp_mov(nfp_prog,
725 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
726 			reg_b(src));
727 		return 0;
728 	}
729 
730 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
731 		return -EOPNOTSUPP;
732 
733 	dst_byte = off % 4;
734 
735 	mask = (1 << size) - 1;
736 	mask <<= dst_byte;
737 
738 	if (WARN_ON_ONCE(mask > 0xf))
739 		return -EOPNOTSUPP;
740 
741 	shf = abs(src_byte - dst_byte) * 8;
742 	if (src_byte == dst_byte) {
743 		sc = SHF_SC_NONE;
744 	} else if (src_byte < dst_byte) {
745 		shf = 32 - shf;
746 		sc = SHF_SC_L_SHF;
747 	} else {
748 		sc = SHF_SC_R_SHF;
749 	}
750 
751 	/* ld_field can address fewer indexes, if offset too large do RMW.
752 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
753 	 */
754 	if (idx <= RE_REG_LM_IDX_MAX) {
755 		reg = reg_lm(lm3 ? 3 : 0, idx);
756 	} else {
757 		reg = imm_a(nfp_prog);
758 		/* Only first and last LMEM locations are going to need RMW,
759 		 * the middle location will be overwritten fully.
760 		 */
761 		if (first || last)
762 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
763 	}
764 
765 	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
766 
767 	if (new_gpr || last) {
768 		if (idx > RE_REG_LM_IDX_MAX)
769 			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
770 		if (should_inc)
771 			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
772 	}
773 
774 	return 0;
775 }
776 
777 static int
778 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
779 	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
780 	     bool clr_gpr, lmem_step step)
781 {
782 	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
783 	bool first = true, last;
784 	bool needs_inc = false;
785 	swreg stack_off_reg;
786 	u8 prev_gpr = 255;
787 	u32 gpr_byte = 0;
788 	bool lm3 = true;
789 	int ret;
790 
791 	if (meta->ptr_not_const) {
792 		/* Use of the last encountered ptr_off is OK, they all have
793 		 * the same alignment.  Depend on low bits of value being
794 		 * discarded when written to LMaddr register.
795 		 */
796 		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
797 						stack_imm(nfp_prog));
798 
799 		emit_alu(nfp_prog, imm_b(nfp_prog),
800 			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
801 
802 		needs_inc = true;
803 	} else if (off + size <= 64) {
804 		/* We can reach bottom 64B with LMaddr0 */
805 		lm3 = false;
806 	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
807 		/* We have to set up a new pointer.  If we know the offset
808 		 * and the entire access falls into a single 32 byte aligned
809 		 * window we won't have to increment the LM pointer.
810 		 * The 32 byte alignment is imporant because offset is ORed in
811 		 * not added when doing *l$indexN[off].
812 		 */
813 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
814 						stack_imm(nfp_prog));
815 		emit_alu(nfp_prog, imm_b(nfp_prog),
816 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
817 
818 		off %= 32;
819 	} else {
820 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
821 						stack_imm(nfp_prog));
822 
823 		emit_alu(nfp_prog, imm_b(nfp_prog),
824 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
825 
826 		needs_inc = true;
827 	}
828 	if (lm3) {
829 		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
830 		/* For size < 4 one slot will be filled by zeroing of upper. */
831 		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
832 	}
833 
834 	if (clr_gpr && size < 8)
835 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
836 
837 	while (size) {
838 		u32 slice_end;
839 		u8 slice_size;
840 
841 		slice_size = min(size, 4 - gpr_byte);
842 		slice_end = min(off + slice_size, round_up(off + 1, 4));
843 		slice_size = slice_end - off;
844 
845 		last = slice_size == size;
846 
847 		if (needs_inc)
848 			off %= 4;
849 
850 		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
851 			   first, gpr != prev_gpr, last, lm3, needs_inc);
852 		if (ret)
853 			return ret;
854 
855 		prev_gpr = gpr;
856 		first = false;
857 
858 		gpr_byte += slice_size;
859 		if (gpr_byte >= 4) {
860 			gpr_byte -= 4;
861 			gpr++;
862 		}
863 
864 		size -= slice_size;
865 		off += slice_size;
866 	}
867 
868 	return 0;
869 }
870 
871 static void
872 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
873 {
874 	swreg tmp_reg;
875 
876 	if (alu_op == ALU_OP_AND) {
877 		if (!imm)
878 			wrp_immed(nfp_prog, reg_both(dst), 0);
879 		if (!imm || !~imm)
880 			return;
881 	}
882 	if (alu_op == ALU_OP_OR) {
883 		if (!~imm)
884 			wrp_immed(nfp_prog, reg_both(dst), ~0U);
885 		if (!imm || !~imm)
886 			return;
887 	}
888 	if (alu_op == ALU_OP_XOR) {
889 		if (!~imm)
890 			emit_alu(nfp_prog, reg_both(dst), reg_none(),
891 				 ALU_OP_NOT, reg_b(dst));
892 		if (!imm || !~imm)
893 			return;
894 	}
895 
896 	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
897 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
898 }
899 
900 static int
901 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
902 	      enum alu_op alu_op, bool skip)
903 {
904 	const struct bpf_insn *insn = &meta->insn;
905 	u64 imm = insn->imm; /* sign extend */
906 
907 	if (skip) {
908 		meta->skip = true;
909 		return 0;
910 	}
911 
912 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
913 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
914 
915 	return 0;
916 }
917 
918 static int
919 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
920 	      enum alu_op alu_op)
921 {
922 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
923 
924 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
925 	emit_alu(nfp_prog, reg_both(dst + 1),
926 		 reg_a(dst + 1), alu_op, reg_b(src + 1));
927 
928 	return 0;
929 }
930 
931 static int
932 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
933 	      enum alu_op alu_op, bool skip)
934 {
935 	const struct bpf_insn *insn = &meta->insn;
936 
937 	if (skip) {
938 		meta->skip = true;
939 		return 0;
940 	}
941 
942 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
943 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
944 
945 	return 0;
946 }
947 
948 static int
949 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
950 	      enum alu_op alu_op)
951 {
952 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
953 
954 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
955 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
956 
957 	return 0;
958 }
959 
960 static void
961 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
962 		 enum br_mask br_mask, u16 off)
963 {
964 	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
965 	emit_br(nfp_prog, br_mask, off, 0);
966 }
967 
968 static int
969 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
970 	     enum alu_op alu_op, enum br_mask br_mask)
971 {
972 	const struct bpf_insn *insn = &meta->insn;
973 
974 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
975 			 insn->src_reg * 2, br_mask, insn->off);
976 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
977 			 insn->src_reg * 2 + 1, br_mask, insn->off);
978 
979 	return 0;
980 }
981 
982 static int
983 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
984 	    enum br_mask br_mask, bool swap)
985 {
986 	const struct bpf_insn *insn = &meta->insn;
987 	u64 imm = insn->imm; /* sign extend */
988 	u8 reg = insn->dst_reg * 2;
989 	swreg tmp_reg;
990 
991 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
992 	if (!swap)
993 		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
994 	else
995 		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
996 
997 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
998 	if (!swap)
999 		emit_alu(nfp_prog, reg_none(),
1000 			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
1001 	else
1002 		emit_alu(nfp_prog, reg_none(),
1003 			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
1004 
1005 	emit_br(nfp_prog, br_mask, insn->off, 0);
1006 
1007 	return 0;
1008 }
1009 
1010 static int
1011 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1012 	    enum br_mask br_mask, bool swap)
1013 {
1014 	const struct bpf_insn *insn = &meta->insn;
1015 	u8 areg, breg;
1016 
1017 	areg = insn->dst_reg * 2;
1018 	breg = insn->src_reg * 2;
1019 
1020 	if (swap) {
1021 		areg ^= breg;
1022 		breg ^= areg;
1023 		areg ^= breg;
1024 	}
1025 
1026 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1027 	emit_alu(nfp_prog, reg_none(),
1028 		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1029 	emit_br(nfp_prog, br_mask, insn->off, 0);
1030 
1031 	return 0;
1032 }
1033 
1034 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1035 {
1036 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1037 		      SHF_SC_R_ROT, 8);
1038 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1039 		      SHF_SC_R_ROT, 16);
1040 }
1041 
1042 /* --- Callbacks --- */
1043 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1044 {
1045 	const struct bpf_insn *insn = &meta->insn;
1046 	u8 dst = insn->dst_reg * 2;
1047 	u8 src = insn->src_reg * 2;
1048 
1049 	if (insn->src_reg == BPF_REG_10) {
1050 		swreg stack_depth_reg;
1051 
1052 		stack_depth_reg = ur_load_imm_any(nfp_prog,
1053 						  nfp_prog->stack_depth,
1054 						  stack_imm(nfp_prog));
1055 		emit_alu(nfp_prog, reg_both(dst),
1056 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1057 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1058 	} else {
1059 		wrp_reg_mov(nfp_prog, dst, src);
1060 		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1061 	}
1062 
1063 	return 0;
1064 }
1065 
1066 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1067 {
1068 	u64 imm = meta->insn.imm; /* sign extend */
1069 
1070 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1071 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1072 
1073 	return 0;
1074 }
1075 
1076 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1077 {
1078 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1079 }
1080 
1081 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1082 {
1083 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1084 }
1085 
1086 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1087 {
1088 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1089 }
1090 
1091 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1092 {
1093 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1094 }
1095 
1096 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1097 {
1098 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1099 }
1100 
1101 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1102 {
1103 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1104 }
1105 
1106 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1107 {
1108 	const struct bpf_insn *insn = &meta->insn;
1109 
1110 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1111 		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1112 		 reg_b(insn->src_reg * 2));
1113 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1114 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1115 		 reg_b(insn->src_reg * 2 + 1));
1116 
1117 	return 0;
1118 }
1119 
1120 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1121 {
1122 	const struct bpf_insn *insn = &meta->insn;
1123 	u64 imm = insn->imm; /* sign extend */
1124 
1125 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1126 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1127 
1128 	return 0;
1129 }
1130 
1131 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1132 {
1133 	const struct bpf_insn *insn = &meta->insn;
1134 
1135 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1136 		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1137 		 reg_b(insn->src_reg * 2));
1138 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1139 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1140 		 reg_b(insn->src_reg * 2 + 1));
1141 
1142 	return 0;
1143 }
1144 
1145 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1146 {
1147 	const struct bpf_insn *insn = &meta->insn;
1148 	u64 imm = insn->imm; /* sign extend */
1149 
1150 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1151 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1152 
1153 	return 0;
1154 }
1155 
1156 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1157 {
1158 	const struct bpf_insn *insn = &meta->insn;
1159 
1160 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1161 		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1162 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1163 		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1164 
1165 	return 0;
1166 }
1167 
1168 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1169 {
1170 	const struct bpf_insn *insn = &meta->insn;
1171 	u8 dst = insn->dst_reg * 2;
1172 
1173 	if (insn->imm < 32) {
1174 		emit_shf(nfp_prog, reg_both(dst + 1),
1175 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1176 			 SHF_SC_R_DSHF, 32 - insn->imm);
1177 		emit_shf(nfp_prog, reg_both(dst),
1178 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1179 			 SHF_SC_L_SHF, insn->imm);
1180 	} else if (insn->imm == 32) {
1181 		wrp_reg_mov(nfp_prog, dst + 1, dst);
1182 		wrp_immed(nfp_prog, reg_both(dst), 0);
1183 	} else if (insn->imm > 32) {
1184 		emit_shf(nfp_prog, reg_both(dst + 1),
1185 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1186 			 SHF_SC_L_SHF, insn->imm - 32);
1187 		wrp_immed(nfp_prog, reg_both(dst), 0);
1188 	}
1189 
1190 	return 0;
1191 }
1192 
1193 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1194 {
1195 	const struct bpf_insn *insn = &meta->insn;
1196 	u8 dst = insn->dst_reg * 2;
1197 
1198 	if (insn->imm < 32) {
1199 		emit_shf(nfp_prog, reg_both(dst),
1200 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1201 			 SHF_SC_R_DSHF, insn->imm);
1202 		emit_shf(nfp_prog, reg_both(dst + 1),
1203 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1204 			 SHF_SC_R_SHF, insn->imm);
1205 	} else if (insn->imm == 32) {
1206 		wrp_reg_mov(nfp_prog, dst, dst + 1);
1207 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1208 	} else if (insn->imm > 32) {
1209 		emit_shf(nfp_prog, reg_both(dst),
1210 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1211 			 SHF_SC_R_SHF, insn->imm - 32);
1212 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1213 	}
1214 
1215 	return 0;
1216 }
1217 
1218 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1219 {
1220 	const struct bpf_insn *insn = &meta->insn;
1221 
1222 	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
1223 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1224 
1225 	return 0;
1226 }
1227 
1228 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1229 {
1230 	const struct bpf_insn *insn = &meta->insn;
1231 
1232 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
1233 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1234 
1235 	return 0;
1236 }
1237 
1238 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1239 {
1240 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
1241 }
1242 
1243 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1244 {
1245 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
1246 }
1247 
1248 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1249 {
1250 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
1251 }
1252 
1253 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1254 {
1255 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1256 }
1257 
1258 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1259 {
1260 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
1261 }
1262 
1263 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1264 {
1265 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1266 }
1267 
1268 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1269 {
1270 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
1271 }
1272 
1273 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1274 {
1275 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
1276 }
1277 
1278 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1279 {
1280 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
1281 }
1282 
1283 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1284 {
1285 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
1286 }
1287 
1288 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1289 {
1290 	u8 dst = meta->insn.dst_reg * 2;
1291 
1292 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
1293 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1294 
1295 	return 0;
1296 }
1297 
1298 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1299 {
1300 	const struct bpf_insn *insn = &meta->insn;
1301 
1302 	if (!insn->imm)
1303 		return 1; /* TODO: zero shift means indirect */
1304 
1305 	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
1306 		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
1307 		 SHF_SC_L_SHF, insn->imm);
1308 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1309 
1310 	return 0;
1311 }
1312 
1313 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1314 {
1315 	const struct bpf_insn *insn = &meta->insn;
1316 	u8 gpr = insn->dst_reg * 2;
1317 
1318 	switch (insn->imm) {
1319 	case 16:
1320 		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
1321 			      SHF_SC_R_ROT, 8);
1322 		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
1323 			      SHF_SC_R_SHF, 16);
1324 
1325 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1326 		break;
1327 	case 32:
1328 		wrp_end32(nfp_prog, reg_a(gpr), gpr);
1329 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1330 		break;
1331 	case 64:
1332 		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
1333 
1334 		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
1335 		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
1336 		break;
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1343 {
1344 	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
1345 	u32 imm_lo, imm_hi;
1346 	u8 dst;
1347 
1348 	dst = prev->insn.dst_reg * 2;
1349 	imm_lo = prev->insn.imm;
1350 	imm_hi = meta->insn.imm;
1351 
1352 	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
1353 
1354 	/* mov is always 1 insn, load imm may be two, so try to use mov */
1355 	if (imm_hi == imm_lo)
1356 		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
1357 	else
1358 		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
1359 
1360 	return 0;
1361 }
1362 
1363 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1364 {
1365 	meta->double_cb = imm_ld8_part2;
1366 	return 0;
1367 }
1368 
1369 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1370 {
1371 	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
1372 }
1373 
1374 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1375 {
1376 	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
1377 }
1378 
1379 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1380 {
1381 	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
1382 }
1383 
1384 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1385 {
1386 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1387 				     meta->insn.src_reg * 2, 1);
1388 }
1389 
1390 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1391 {
1392 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1393 				     meta->insn.src_reg * 2, 2);
1394 }
1395 
1396 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1397 {
1398 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1399 				     meta->insn.src_reg * 2, 4);
1400 }
1401 
1402 static int
1403 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1404 	      unsigned int size, unsigned int ptr_off)
1405 {
1406 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1407 			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
1408 			    true, wrp_lmem_load);
1409 }
1410 
1411 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1412 		       u8 size)
1413 {
1414 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1415 
1416 	switch (meta->insn.off) {
1417 	case offsetof(struct __sk_buff, len):
1418 		if (size != FIELD_SIZEOF(struct __sk_buff, len))
1419 			return -EOPNOTSUPP;
1420 		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
1421 		break;
1422 	case offsetof(struct __sk_buff, data):
1423 		if (size != FIELD_SIZEOF(struct __sk_buff, data))
1424 			return -EOPNOTSUPP;
1425 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1426 		break;
1427 	case offsetof(struct __sk_buff, data_end):
1428 		if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
1429 			return -EOPNOTSUPP;
1430 		emit_alu(nfp_prog, dst,
1431 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1432 		break;
1433 	default:
1434 		return -EOPNOTSUPP;
1435 	}
1436 
1437 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1438 
1439 	return 0;
1440 }
1441 
1442 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1443 		       u8 size)
1444 {
1445 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1446 
1447 	switch (meta->insn.off) {
1448 	case offsetof(struct xdp_md, data):
1449 		if (size != FIELD_SIZEOF(struct xdp_md, data))
1450 			return -EOPNOTSUPP;
1451 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1452 		break;
1453 	case offsetof(struct xdp_md, data_end):
1454 		if (size != FIELD_SIZEOF(struct xdp_md, data_end))
1455 			return -EOPNOTSUPP;
1456 		emit_alu(nfp_prog, dst,
1457 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1458 		break;
1459 	default:
1460 		return -EOPNOTSUPP;
1461 	}
1462 
1463 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1464 
1465 	return 0;
1466 }
1467 
1468 static int
1469 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1470 	     unsigned int size)
1471 {
1472 	swreg tmp_reg;
1473 
1474 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1475 
1476 	return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg,
1477 				  meta->insn.dst_reg * 2, size);
1478 }
1479 
1480 static int
1481 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1482 	unsigned int size)
1483 {
1484 	if (meta->ptr.type == PTR_TO_CTX) {
1485 		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
1486 			return mem_ldx_xdp(nfp_prog, meta, size);
1487 		else
1488 			return mem_ldx_skb(nfp_prog, meta, size);
1489 	}
1490 
1491 	if (meta->ptr.type == PTR_TO_PACKET)
1492 		return mem_ldx_data(nfp_prog, meta, size);
1493 
1494 	if (meta->ptr.type == PTR_TO_STACK)
1495 		return mem_ldx_stack(nfp_prog, meta, size,
1496 				     meta->ptr.off + meta->ptr.var_off.value);
1497 
1498 	return -EOPNOTSUPP;
1499 }
1500 
1501 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1502 {
1503 	return mem_ldx(nfp_prog, meta, 1);
1504 }
1505 
1506 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1507 {
1508 	return mem_ldx(nfp_prog, meta, 2);
1509 }
1510 
1511 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1512 {
1513 	return mem_ldx(nfp_prog, meta, 4);
1514 }
1515 
1516 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1517 {
1518 	return mem_ldx(nfp_prog, meta, 8);
1519 }
1520 
1521 static int
1522 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1523 	    unsigned int size)
1524 {
1525 	u64 imm = meta->insn.imm; /* sign extend */
1526 	swreg off_reg;
1527 
1528 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1529 
1530 	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1531 				  imm, size);
1532 }
1533 
1534 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1535 		  unsigned int size)
1536 {
1537 	if (meta->ptr.type == PTR_TO_PACKET)
1538 		return mem_st_data(nfp_prog, meta, size);
1539 
1540 	return -EOPNOTSUPP;
1541 }
1542 
1543 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1544 {
1545 	return mem_st(nfp_prog, meta, 1);
1546 }
1547 
1548 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1549 {
1550 	return mem_st(nfp_prog, meta, 2);
1551 }
1552 
1553 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1554 {
1555 	return mem_st(nfp_prog, meta, 4);
1556 }
1557 
1558 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1559 {
1560 	return mem_st(nfp_prog, meta, 8);
1561 }
1562 
1563 static int
1564 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1565 	     unsigned int size)
1566 {
1567 	swreg off_reg;
1568 
1569 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1570 
1571 	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1572 				   meta->insn.src_reg * 2, size);
1573 }
1574 
1575 static int
1576 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1577 	      unsigned int size, unsigned int ptr_off)
1578 {
1579 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1580 			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
1581 			    false, wrp_lmem_store);
1582 }
1583 
1584 static int
1585 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1586 	unsigned int size)
1587 {
1588 	if (meta->ptr.type == PTR_TO_PACKET)
1589 		return mem_stx_data(nfp_prog, meta, size);
1590 
1591 	if (meta->ptr.type == PTR_TO_STACK)
1592 		return mem_stx_stack(nfp_prog, meta, size,
1593 				     meta->ptr.off + meta->ptr.var_off.value);
1594 
1595 	return -EOPNOTSUPP;
1596 }
1597 
1598 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1599 {
1600 	return mem_stx(nfp_prog, meta, 1);
1601 }
1602 
1603 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1604 {
1605 	return mem_stx(nfp_prog, meta, 2);
1606 }
1607 
1608 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1609 {
1610 	return mem_stx(nfp_prog, meta, 4);
1611 }
1612 
1613 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1614 {
1615 	return mem_stx(nfp_prog, meta, 8);
1616 }
1617 
1618 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1619 {
1620 	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
1621 
1622 	return 0;
1623 }
1624 
1625 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1626 {
1627 	const struct bpf_insn *insn = &meta->insn;
1628 	u64 imm = insn->imm; /* sign extend */
1629 	swreg or1, or2, tmp_reg;
1630 
1631 	or1 = reg_a(insn->dst_reg * 2);
1632 	or2 = reg_b(insn->dst_reg * 2 + 1);
1633 
1634 	if (imm & ~0U) {
1635 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1636 		emit_alu(nfp_prog, imm_a(nfp_prog),
1637 			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
1638 		or1 = imm_a(nfp_prog);
1639 	}
1640 
1641 	if (imm >> 32) {
1642 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1643 		emit_alu(nfp_prog, imm_b(nfp_prog),
1644 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
1645 		or2 = imm_b(nfp_prog);
1646 	}
1647 
1648 	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
1649 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
1650 
1651 	return 0;
1652 }
1653 
1654 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1655 {
1656 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
1657 }
1658 
1659 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1660 {
1661 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
1662 }
1663 
1664 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1665 {
1666 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
1667 }
1668 
1669 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1670 {
1671 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
1672 }
1673 
1674 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1675 {
1676 	const struct bpf_insn *insn = &meta->insn;
1677 	u64 imm = insn->imm; /* sign extend */
1678 	swreg tmp_reg;
1679 
1680 	if (!imm) {
1681 		meta->skip = true;
1682 		return 0;
1683 	}
1684 
1685 	if (imm & ~0U) {
1686 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1687 		emit_alu(nfp_prog, reg_none(),
1688 			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
1689 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
1690 	}
1691 
1692 	if (imm >> 32) {
1693 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1694 		emit_alu(nfp_prog, reg_none(),
1695 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
1696 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
1697 	}
1698 
1699 	return 0;
1700 }
1701 
1702 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1703 {
1704 	const struct bpf_insn *insn = &meta->insn;
1705 	u64 imm = insn->imm; /* sign extend */
1706 	swreg tmp_reg;
1707 
1708 	if (!imm) {
1709 		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
1710 			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
1711 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
1712 		return 0;
1713 	}
1714 
1715 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1716 	emit_alu(nfp_prog, reg_none(),
1717 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
1718 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
1719 
1720 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1721 	emit_alu(nfp_prog, reg_none(),
1722 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
1723 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
1724 
1725 	return 0;
1726 }
1727 
1728 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1729 {
1730 	const struct bpf_insn *insn = &meta->insn;
1731 
1732 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
1733 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
1734 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
1735 		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
1736 	emit_alu(nfp_prog, reg_none(),
1737 		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
1738 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
1739 
1740 	return 0;
1741 }
1742 
1743 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1744 {
1745 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
1746 }
1747 
1748 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1749 {
1750 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
1751 }
1752 
1753 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1754 {
1755 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
1756 }
1757 
1758 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1759 {
1760 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
1761 }
1762 
1763 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1764 {
1765 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
1766 }
1767 
1768 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1769 {
1770 	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
1771 }
1772 
1773 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1774 {
1775 	wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
1776 
1777 	return 0;
1778 }
1779 
1780 static const instr_cb_t instr_cb[256] = {
1781 	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
1782 	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
1783 	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
1784 	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
1785 	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
1786 	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
1787 	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
1788 	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
1789 	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
1790 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
1791 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
1792 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
1793 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
1794 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
1795 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
1796 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
1797 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
1798 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
1799 	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
1800 	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
1801 	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
1802 	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
1803 	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
1804 	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
1805 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
1806 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
1807 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
1808 	[BPF_ALU | BPF_NEG] =		neg_reg,
1809 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
1810 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
1811 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
1812 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
1813 	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
1814 	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
1815 	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
1816 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
1817 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
1818 	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
1819 	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
1820 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
1821 	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
1822 	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
1823 	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
1824 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
1825 	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
1826 	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
1827 	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
1828 	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
1829 	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
1830 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
1831 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
1832 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
1833 	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
1834 	[BPF_JMP | BPF_JLT | BPF_K] =	jlt_imm,
1835 	[BPF_JMP | BPF_JLE | BPF_K] =	jle_imm,
1836 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
1837 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
1838 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
1839 	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
1840 	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
1841 	[BPF_JMP | BPF_JLT | BPF_X] =	jlt_reg,
1842 	[BPF_JMP | BPF_JLE | BPF_X] =	jle_reg,
1843 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
1844 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
1845 	[BPF_JMP | BPF_EXIT] =		goto_out,
1846 };
1847 
1848 /* --- Misc code --- */
1849 static void br_set_offset(u64 *instr, u16 offset)
1850 {
1851 	u16 addr_lo, addr_hi;
1852 
1853 	addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
1854 	addr_hi = offset != addr_lo;
1855 	*instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
1856 	*instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
1857 	*instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
1858 }
1859 
1860 /* --- Assembler logic --- */
1861 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
1862 {
1863 	struct nfp_insn_meta *meta, *jmp_dst;
1864 	u32 idx, br_idx;
1865 
1866 	list_for_each_entry(meta, &nfp_prog->insns, l) {
1867 		if (meta->skip)
1868 			continue;
1869 		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
1870 			continue;
1871 
1872 		if (list_is_last(&meta->l, &nfp_prog->insns))
1873 			idx = nfp_prog->last_bpf_off;
1874 		else
1875 			idx = list_next_entry(meta, l)->off - 1;
1876 
1877 		br_idx = nfp_prog_offset_to_index(nfp_prog, idx);
1878 
1879 		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
1880 			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
1881 			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
1882 			return -ELOOP;
1883 		}
1884 		/* Leave special branches for later */
1885 		if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
1886 			continue;
1887 
1888 		if (!meta->jmp_dst) {
1889 			pr_err("Non-exit jump doesn't have destination info recorded!!\n");
1890 			return -ELOOP;
1891 		}
1892 
1893 		jmp_dst = meta->jmp_dst;
1894 
1895 		if (jmp_dst->skip) {
1896 			pr_err("Branch landing on removed instruction!!\n");
1897 			return -ELOOP;
1898 		}
1899 
1900 		for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off);
1901 		     idx <= br_idx; idx++) {
1902 			if (!nfp_is_br(nfp_prog->prog[idx]))
1903 				continue;
1904 			br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
1905 		}
1906 	}
1907 
1908 	/* Fixup 'goto out's separately, they can be scattered around */
1909 	for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) {
1910 		enum br_special special;
1911 
1912 		if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE)
1913 			continue;
1914 
1915 		special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]);
1916 		switch (special) {
1917 		case OP_BR_NORMAL:
1918 			break;
1919 		case OP_BR_GO_OUT:
1920 			br_set_offset(&nfp_prog->prog[br_idx],
1921 				      nfp_prog->tgt_out);
1922 			break;
1923 		case OP_BR_GO_ABORT:
1924 			br_set_offset(&nfp_prog->prog[br_idx],
1925 				      nfp_prog->tgt_abort);
1926 			break;
1927 		}
1928 
1929 		nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL;
1930 	}
1931 
1932 	return 0;
1933 }
1934 
1935 static void nfp_intro(struct nfp_prog *nfp_prog)
1936 {
1937 	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
1938 	emit_alu(nfp_prog, plen_reg(nfp_prog),
1939 		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
1940 }
1941 
1942 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
1943 {
1944 	/* TC direct-action mode:
1945 	 *   0,1   ok        NOT SUPPORTED[1]
1946 	 *   2   drop  0x22 -> drop,  count as stat1
1947 	 *   4,5 nuke  0x02 -> drop
1948 	 *   7  redir  0x44 -> redir, count as stat2
1949 	 *   * unspec  0x11 -> pass,  count as stat0
1950 	 *
1951 	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
1952 	 *     the exact decision made.  We are forced to support UNSPEC
1953 	 *     to handle aborts so that's the only one we handle for passing
1954 	 *     packets up the stack.
1955 	 */
1956 	/* Target for aborts */
1957 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
1958 
1959 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
1960 
1961 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
1962 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
1963 
1964 	/* Target for normal exits */
1965 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
1966 
1967 	/* if R0 > 7 jump to abort */
1968 	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
1969 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
1970 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
1971 
1972 	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
1973 	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
1974 
1975 	emit_shf(nfp_prog, reg_a(1),
1976 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
1977 
1978 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
1979 	emit_shf(nfp_prog, reg_a(2),
1980 		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
1981 
1982 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
1983 	emit_shf(nfp_prog, reg_b(2),
1984 		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
1985 
1986 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
1987 
1988 	emit_shf(nfp_prog, reg_b(2),
1989 		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
1990 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
1991 }
1992 
1993 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
1994 {
1995 	/* XDP return codes:
1996 	 *   0 aborted  0x82 -> drop,  count as stat3
1997 	 *   1    drop  0x22 -> drop,  count as stat1
1998 	 *   2    pass  0x11 -> pass,  count as stat0
1999 	 *   3      tx  0x44 -> redir, count as stat2
2000 	 *   * unknown  0x82 -> drop,  count as stat3
2001 	 */
2002 	/* Target for aborts */
2003 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2004 
2005 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
2006 
2007 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2008 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
2009 
2010 	/* Target for normal exits */
2011 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2012 
2013 	/* if R0 > 3 jump to abort */
2014 	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
2015 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2016 
2017 	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
2018 
2019 	emit_shf(nfp_prog, reg_a(1),
2020 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
2021 
2022 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2023 	emit_shf(nfp_prog, reg_b(2),
2024 		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2025 
2026 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
2027 
2028 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2029 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2030 }
2031 
2032 static void nfp_outro(struct nfp_prog *nfp_prog)
2033 {
2034 	switch (nfp_prog->type) {
2035 	case BPF_PROG_TYPE_SCHED_CLS:
2036 		nfp_outro_tc_da(nfp_prog);
2037 		break;
2038 	case BPF_PROG_TYPE_XDP:
2039 		nfp_outro_xdp(nfp_prog);
2040 		break;
2041 	default:
2042 		WARN_ON(1);
2043 	}
2044 }
2045 
2046 static int nfp_translate(struct nfp_prog *nfp_prog)
2047 {
2048 	struct nfp_insn_meta *meta;
2049 	int err;
2050 
2051 	nfp_intro(nfp_prog);
2052 	if (nfp_prog->error)
2053 		return nfp_prog->error;
2054 
2055 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2056 		instr_cb_t cb = instr_cb[meta->insn.code];
2057 
2058 		meta->off = nfp_prog_current_offset(nfp_prog);
2059 
2060 		if (meta->skip) {
2061 			nfp_prog->n_translated++;
2062 			continue;
2063 		}
2064 
2065 		if (nfp_meta_has_prev(nfp_prog, meta) &&
2066 		    nfp_meta_prev(meta)->double_cb)
2067 			cb = nfp_meta_prev(meta)->double_cb;
2068 		if (!cb)
2069 			return -ENOENT;
2070 		err = cb(nfp_prog, meta);
2071 		if (err)
2072 			return err;
2073 
2074 		nfp_prog->n_translated++;
2075 	}
2076 
2077 	nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
2078 
2079 	nfp_outro(nfp_prog);
2080 	if (nfp_prog->error)
2081 		return nfp_prog->error;
2082 
2083 	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
2084 	if (nfp_prog->error)
2085 		return nfp_prog->error;
2086 
2087 	return nfp_fixup_branches(nfp_prog);
2088 }
2089 
2090 /* --- Optimizations --- */
2091 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
2092 {
2093 	struct nfp_insn_meta *meta;
2094 
2095 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2096 		struct bpf_insn insn = meta->insn;
2097 
2098 		/* Programs converted from cBPF start with register xoring */
2099 		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
2100 		    insn.src_reg == insn.dst_reg)
2101 			continue;
2102 
2103 		/* Programs start with R6 = R1 but we ignore the skb pointer */
2104 		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
2105 		    insn.src_reg == 1 && insn.dst_reg == 6)
2106 			meta->skip = true;
2107 
2108 		/* Return as soon as something doesn't match */
2109 		if (!meta->skip)
2110 			return;
2111 	}
2112 }
2113 
2114 /* Remove masking after load since our load guarantees this is not needed */
2115 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
2116 {
2117 	struct nfp_insn_meta *meta1, *meta2;
2118 	const s32 exp_mask[] = {
2119 		[BPF_B] = 0x000000ffU,
2120 		[BPF_H] = 0x0000ffffU,
2121 		[BPF_W] = 0xffffffffU,
2122 	};
2123 
2124 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2125 		struct bpf_insn insn, next;
2126 
2127 		insn = meta1->insn;
2128 		next = meta2->insn;
2129 
2130 		if (BPF_CLASS(insn.code) != BPF_LD)
2131 			continue;
2132 		if (BPF_MODE(insn.code) != BPF_ABS &&
2133 		    BPF_MODE(insn.code) != BPF_IND)
2134 			continue;
2135 
2136 		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
2137 			continue;
2138 
2139 		if (!exp_mask[BPF_SIZE(insn.code)])
2140 			continue;
2141 		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
2142 			continue;
2143 
2144 		if (next.src_reg || next.dst_reg)
2145 			continue;
2146 
2147 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
2148 			continue;
2149 
2150 		meta2->skip = true;
2151 	}
2152 }
2153 
2154 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2155 {
2156 	struct nfp_insn_meta *meta1, *meta2, *meta3;
2157 
2158 	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
2159 		struct bpf_insn insn, next1, next2;
2160 
2161 		insn = meta1->insn;
2162 		next1 = meta2->insn;
2163 		next2 = meta3->insn;
2164 
2165 		if (BPF_CLASS(insn.code) != BPF_LD)
2166 			continue;
2167 		if (BPF_MODE(insn.code) != BPF_ABS &&
2168 		    BPF_MODE(insn.code) != BPF_IND)
2169 			continue;
2170 		if (BPF_SIZE(insn.code) != BPF_W)
2171 			continue;
2172 
2173 		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
2174 		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
2175 		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
2176 		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
2177 			continue;
2178 
2179 		if (next1.src_reg || next1.dst_reg ||
2180 		    next2.src_reg || next2.dst_reg)
2181 			continue;
2182 
2183 		if (next1.imm != 0x20 || next2.imm != 0x20)
2184 			continue;
2185 
2186 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
2187 		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
2188 			continue;
2189 
2190 		meta2->skip = true;
2191 		meta3->skip = true;
2192 	}
2193 }
2194 
2195 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
2196 {
2197 	nfp_bpf_opt_reg_init(nfp_prog);
2198 
2199 	nfp_bpf_opt_ld_mask(nfp_prog);
2200 	nfp_bpf_opt_ld_shift(nfp_prog);
2201 
2202 	return 0;
2203 }
2204 
2205 static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore)
2206 {
2207 	int i;
2208 
2209 	for (i = 0; i < nfp_prog->prog_len; i++) {
2210 		int err;
2211 
2212 		err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]);
2213 		if (err)
2214 			return err;
2215 
2216 		nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]);
2217 
2218 		ustore[i] = cpu_to_le64(nfp_prog->prog[i]);
2219 	}
2220 
2221 	return 0;
2222 }
2223 
2224 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
2225 {
2226 	int ret;
2227 
2228 	ret = nfp_bpf_optimize(nfp_prog);
2229 	if (ret)
2230 		return ret;
2231 
2232 	ret = nfp_translate(nfp_prog);
2233 	if (ret) {
2234 		pr_err("Translation failed with error %d (translated: %u)\n",
2235 		       ret, nfp_prog->n_translated);
2236 		return -EINVAL;
2237 	}
2238 
2239 	return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog);
2240 }
2241