1 /*
2  * Copyright (C) 2016-2017 Netronome Systems, Inc.
3  *
4  * This software is dual licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree or the BSD 2-Clause License provided below.  You have the
7  * option to license this software under the complete terms of either license.
8  *
9  * The BSD 2-Clause License:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      1. Redistributions of source code must retain the above
16  *         copyright notice, this list of conditions and the following
17  *         disclaimer.
18  *
19  *      2. Redistributions in binary form must reproduce the above
20  *         copyright notice, this list of conditions and the following
21  *         disclaimer in the documentation and/or other materials
22  *         provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt)	"NFP net bpf: " fmt
35 
36 #include <linux/kernel.h>
37 #include <linux/bpf.h>
38 #include <linux/filter.h>
39 #include <linux/pkt_cls.h>
40 #include <linux/unistd.h>
41 
42 #include "main.h"
43 #include "../nfp_asm.h"
44 
45 /* --- NFP prog --- */
46 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
47  * It's safe to modify the next pointers (but not pos).
48  */
49 #define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
50 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
51 	     next = list_next_entry(pos, l);			\
52 	     &(nfp_prog)->insns != &pos->l &&			\
53 	     &(nfp_prog)->insns != &next->l;			\
54 	     pos = nfp_meta_next(pos),				\
55 	     next = nfp_meta_next(pos))
56 
57 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
58 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
59 	     next = list_next_entry(pos, l),			\
60 	     next2 = list_next_entry(next, l);			\
61 	     &(nfp_prog)->insns != &pos->l &&			\
62 	     &(nfp_prog)->insns != &next->l &&			\
63 	     &(nfp_prog)->insns != &next2->l;			\
64 	     pos = nfp_meta_next(pos),				\
65 	     next = nfp_meta_next(pos),				\
66 	     next2 = nfp_meta_next(next))
67 
68 static bool
69 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
70 {
71 	return meta->l.prev != &nfp_prog->insns;
72 }
73 
74 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
75 {
76 	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
77 		nfp_prog->error = -ENOSPC;
78 		return;
79 	}
80 
81 	nfp_prog->prog[nfp_prog->prog_len] = insn;
82 	nfp_prog->prog_len++;
83 }
84 
85 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
86 {
87 	return nfp_prog->start_off + nfp_prog->prog_len;
88 }
89 
90 static unsigned int
91 nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
92 {
93 	return offset - nfp_prog->start_off;
94 }
95 
96 /* --- Emitters --- */
97 static void
98 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
99 	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
100 {
101 	enum cmd_ctx_swap ctx;
102 	u64 insn;
103 
104 	if (sync)
105 		ctx = CMD_CTX_SWAP;
106 	else
107 		ctx = CMD_CTX_NO_SWAP;
108 
109 	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
110 		FIELD_PREP(OP_CMD_CTX, ctx) |
111 		FIELD_PREP(OP_CMD_B_SRC, breg) |
112 		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
113 		FIELD_PREP(OP_CMD_XFER, xfer) |
114 		FIELD_PREP(OP_CMD_CNT, size) |
115 		FIELD_PREP(OP_CMD_SIG, sync) |
116 		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
117 		FIELD_PREP(OP_CMD_MODE, mode);
118 
119 	nfp_prog_push(nfp_prog, insn);
120 }
121 
122 static void
123 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
124 	 u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync)
125 {
126 	struct nfp_insn_re_regs reg;
127 	int err;
128 
129 	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
130 	if (err) {
131 		nfp_prog->error = err;
132 		return;
133 	}
134 	if (reg.swap) {
135 		pr_err("cmd can't swap arguments\n");
136 		nfp_prog->error = -EFAULT;
137 		return;
138 	}
139 	if (reg.dst_lmextn || reg.src_lmextn) {
140 		pr_err("cmd can't use LMextn\n");
141 		nfp_prog->error = -EFAULT;
142 		return;
143 	}
144 
145 	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
146 }
147 
148 static void
149 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
150 	  enum br_ctx_signal_state css, u16 addr, u8 defer)
151 {
152 	u16 addr_lo, addr_hi;
153 	u64 insn;
154 
155 	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
156 	addr_hi = addr != addr_lo;
157 
158 	insn = OP_BR_BASE |
159 		FIELD_PREP(OP_BR_MASK, mask) |
160 		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
161 		FIELD_PREP(OP_BR_CSS, css) |
162 		FIELD_PREP(OP_BR_DEFBR, defer) |
163 		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
164 		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
165 
166 	nfp_prog_push(nfp_prog, insn);
167 }
168 
169 static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer)
170 {
171 	if (defer > 2) {
172 		pr_err("BUG: branch defer out of bounds %d\n", defer);
173 		nfp_prog->error = -EFAULT;
174 		return;
175 	}
176 	__emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer);
177 }
178 
179 static void
180 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
181 {
182 	__emit_br(nfp_prog, mask,
183 		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
184 		  BR_CSS_NONE, addr, defer);
185 }
186 
187 static void
188 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
189 	     enum immed_width width, bool invert,
190 	     enum immed_shift shift, bool wr_both,
191 	     bool dst_lmextn, bool src_lmextn)
192 {
193 	u64 insn;
194 
195 	insn = OP_IMMED_BASE |
196 		FIELD_PREP(OP_IMMED_A_SRC, areg) |
197 		FIELD_PREP(OP_IMMED_B_SRC, breg) |
198 		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
199 		FIELD_PREP(OP_IMMED_WIDTH, width) |
200 		FIELD_PREP(OP_IMMED_INV, invert) |
201 		FIELD_PREP(OP_IMMED_SHIFT, shift) |
202 		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
203 		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
204 		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
205 
206 	nfp_prog_push(nfp_prog, insn);
207 }
208 
209 static void
210 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
211 	   enum immed_width width, bool invert, enum immed_shift shift)
212 {
213 	struct nfp_insn_ur_regs reg;
214 	int err;
215 
216 	if (swreg_type(dst) == NN_REG_IMM) {
217 		nfp_prog->error = -EFAULT;
218 		return;
219 	}
220 
221 	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
222 	if (err) {
223 		nfp_prog->error = err;
224 		return;
225 	}
226 
227 	__emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
228 		     invert, shift, reg.wr_both,
229 		     reg.dst_lmextn, reg.src_lmextn);
230 }
231 
232 static void
233 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
234 	   enum shf_sc sc, u8 shift,
235 	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
236 	   bool dst_lmextn, bool src_lmextn)
237 {
238 	u64 insn;
239 
240 	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
241 		nfp_prog->error = -EFAULT;
242 		return;
243 	}
244 
245 	if (sc == SHF_SC_L_SHF)
246 		shift = 32 - shift;
247 
248 	insn = OP_SHF_BASE |
249 		FIELD_PREP(OP_SHF_A_SRC, areg) |
250 		FIELD_PREP(OP_SHF_SC, sc) |
251 		FIELD_PREP(OP_SHF_B_SRC, breg) |
252 		FIELD_PREP(OP_SHF_I8, i8) |
253 		FIELD_PREP(OP_SHF_SW, sw) |
254 		FIELD_PREP(OP_SHF_DST, dst) |
255 		FIELD_PREP(OP_SHF_SHIFT, shift) |
256 		FIELD_PREP(OP_SHF_OP, op) |
257 		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
258 		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
259 		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
260 		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
261 
262 	nfp_prog_push(nfp_prog, insn);
263 }
264 
265 static void
266 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
267 	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
268 {
269 	struct nfp_insn_re_regs reg;
270 	int err;
271 
272 	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
273 	if (err) {
274 		nfp_prog->error = err;
275 		return;
276 	}
277 
278 	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
279 		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
280 		   reg.dst_lmextn, reg.src_lmextn);
281 }
282 
283 static void
284 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
285 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
286 	   bool dst_lmextn, bool src_lmextn)
287 {
288 	u64 insn;
289 
290 	insn = OP_ALU_BASE |
291 		FIELD_PREP(OP_ALU_A_SRC, areg) |
292 		FIELD_PREP(OP_ALU_B_SRC, breg) |
293 		FIELD_PREP(OP_ALU_DST, dst) |
294 		FIELD_PREP(OP_ALU_SW, swap) |
295 		FIELD_PREP(OP_ALU_OP, op) |
296 		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
297 		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
298 		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
299 		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
300 
301 	nfp_prog_push(nfp_prog, insn);
302 }
303 
304 static void
305 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
306 	 swreg lreg, enum alu_op op, swreg rreg)
307 {
308 	struct nfp_insn_ur_regs reg;
309 	int err;
310 
311 	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
312 	if (err) {
313 		nfp_prog->error = err;
314 		return;
315 	}
316 
317 	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
318 		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
319 		   reg.dst_lmextn, reg.src_lmextn);
320 }
321 
322 static void
323 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
324 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
325 		bool zero, bool swap, bool wr_both,
326 		bool dst_lmextn, bool src_lmextn)
327 {
328 	u64 insn;
329 
330 	insn = OP_LDF_BASE |
331 		FIELD_PREP(OP_LDF_A_SRC, areg) |
332 		FIELD_PREP(OP_LDF_SC, sc) |
333 		FIELD_PREP(OP_LDF_B_SRC, breg) |
334 		FIELD_PREP(OP_LDF_I8, imm8) |
335 		FIELD_PREP(OP_LDF_SW, swap) |
336 		FIELD_PREP(OP_LDF_ZF, zero) |
337 		FIELD_PREP(OP_LDF_BMASK, bmask) |
338 		FIELD_PREP(OP_LDF_SHF, shift) |
339 		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
340 		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
341 		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
342 
343 	nfp_prog_push(nfp_prog, insn);
344 }
345 
346 static void
347 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
348 		  enum shf_sc sc, u8 shift, bool zero)
349 {
350 	struct nfp_insn_re_regs reg;
351 	int err;
352 
353 	/* Note: ld_field is special as it uses one of the src regs as dst */
354 	err = swreg_to_restricted(dst, dst, src, &reg, true);
355 	if (err) {
356 		nfp_prog->error = err;
357 		return;
358 	}
359 
360 	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
361 			reg.i8, zero, reg.swap, reg.wr_both,
362 			reg.dst_lmextn, reg.src_lmextn);
363 }
364 
365 static void
366 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
367 	      enum shf_sc sc, u8 shift)
368 {
369 	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
370 }
371 
372 static void
373 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
374 	    bool dst_lmextn, bool src_lmextn)
375 {
376 	u64 insn;
377 
378 	insn = OP_LCSR_BASE |
379 		FIELD_PREP(OP_LCSR_A_SRC, areg) |
380 		FIELD_PREP(OP_LCSR_B_SRC, breg) |
381 		FIELD_PREP(OP_LCSR_WRITE, wr) |
382 		FIELD_PREP(OP_LCSR_ADDR, addr) |
383 		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
384 		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
385 
386 	nfp_prog_push(nfp_prog, insn);
387 }
388 
389 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
390 {
391 	struct nfp_insn_ur_regs reg;
392 	int err;
393 
394 	/* This instruction takes immeds instead of reg_none() for the ignored
395 	 * operand, but we can't encode 2 immeds in one instr with our normal
396 	 * swreg infra so if param is an immed, we encode as reg_none() and
397 	 * copy the immed to both operands.
398 	 */
399 	if (swreg_type(src) == NN_REG_IMM) {
400 		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
401 		reg.breg = reg.areg;
402 	} else {
403 		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
404 	}
405 	if (err) {
406 		nfp_prog->error = err;
407 		return;
408 	}
409 
410 	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
411 		    false, reg.src_lmextn);
412 }
413 
414 static void emit_nop(struct nfp_prog *nfp_prog)
415 {
416 	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
417 }
418 
419 /* --- Wrappers --- */
420 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
421 {
422 	if (!(imm & 0xffff0000)) {
423 		*val = imm;
424 		*shift = IMMED_SHIFT_0B;
425 	} else if (!(imm & 0xff0000ff)) {
426 		*val = imm >> 8;
427 		*shift = IMMED_SHIFT_1B;
428 	} else if (!(imm & 0x0000ffff)) {
429 		*val = imm >> 16;
430 		*shift = IMMED_SHIFT_2B;
431 	} else {
432 		return false;
433 	}
434 
435 	return true;
436 }
437 
438 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
439 {
440 	enum immed_shift shift;
441 	u16 val;
442 
443 	if (pack_immed(imm, &val, &shift)) {
444 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
445 	} else if (pack_immed(~imm, &val, &shift)) {
446 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
447 	} else {
448 		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
449 			   false, IMMED_SHIFT_0B);
450 		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
451 			   false, IMMED_SHIFT_2B);
452 	}
453 }
454 
455 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
456  * If the @imm is small enough encode it directly in operand and return
457  * otherwise load @imm to a spare register and return its encoding.
458  */
459 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
460 {
461 	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
462 		return reg_imm(imm);
463 
464 	wrp_immed(nfp_prog, tmp_reg, imm);
465 	return tmp_reg;
466 }
467 
468 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
469  * If the @imm is small enough encode it directly in operand and return
470  * otherwise load @imm to a spare register and return its encoding.
471  */
472 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
473 {
474 	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
475 		return reg_imm(imm);
476 
477 	wrp_immed(nfp_prog, tmp_reg, imm);
478 	return tmp_reg;
479 }
480 
481 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
482 {
483 	while (count--)
484 		emit_nop(nfp_prog);
485 }
486 
487 static void
488 wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
489 	       enum br_special special)
490 {
491 	emit_br(nfp_prog, mask, 0, 0);
492 
493 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
494 		FIELD_PREP(OP_BR_SPECIAL, special);
495 }
496 
497 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
498 {
499 	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
500 }
501 
502 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
503 {
504 	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
505 }
506 
507 static int
508 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
509 {
510 	unsigned int i;
511 	u16 shift, sz;
512 
513 	/* We load the value from the address indicated in @offset and then
514 	 * shift out the data we don't need.  Note: this is big endian!
515 	 */
516 	sz = max(size, 4);
517 	shift = size < 4 ? 4 - size : 0;
518 
519 	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
520 		 pptr_reg(nfp_prog), offset, sz - 1, true);
521 
522 	i = 0;
523 	if (shift)
524 		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
525 			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
526 	else
527 		for (; i * 4 < size; i++)
528 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
529 
530 	if (i < 2)
531 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
532 
533 	return 0;
534 }
535 
536 static int
537 data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
538 		   u8 dst_gpr, int size)
539 {
540 	unsigned int i;
541 	u8 mask, sz;
542 
543 	/* We load the value from the address indicated in @offset and then
544 	 * mask out the data we don't need.  Note: this is little endian!
545 	 */
546 	sz = max(size, 4);
547 	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
548 
549 	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0,
550 		 reg_a(src_gpr), offset, sz / 4 - 1, true);
551 
552 	i = 0;
553 	if (mask)
554 		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
555 				  reg_xfer(0), SHF_SC_NONE, 0, true);
556 	else
557 		for (; i * 4 < size; i++)
558 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
559 
560 	if (i < 2)
561 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
562 
563 	return 0;
564 }
565 
566 static int
567 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
568 {
569 	swreg tmp_reg;
570 
571 	/* Calculate the true offset (src_reg + imm) */
572 	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
573 	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
574 
575 	/* Check packet length (size guaranteed to fit b/c it's u8) */
576 	emit_alu(nfp_prog, imm_a(nfp_prog),
577 		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
578 	emit_alu(nfp_prog, reg_none(),
579 		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
580 	wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
581 
582 	/* Load data */
583 	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
584 }
585 
586 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
587 {
588 	swreg tmp_reg;
589 
590 	/* Check packet length */
591 	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
592 	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
593 	wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
594 
595 	/* Load data */
596 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
597 	return data_ld(nfp_prog, tmp_reg, 0, size);
598 }
599 
600 static int
601 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
602 		    u8 src_gpr, u8 size)
603 {
604 	unsigned int i;
605 
606 	for (i = 0; i * 4 < size; i++)
607 		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
608 
609 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
610 		 reg_a(dst_gpr), offset, size - 1, true);
611 
612 	return 0;
613 }
614 
615 static int
616 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
617 		   u64 imm, u8 size)
618 {
619 	wrp_immed(nfp_prog, reg_xfer(0), imm);
620 	if (size == 8)
621 		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
622 
623 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
624 		 reg_a(dst_gpr), offset, size - 1, true);
625 
626 	return 0;
627 }
628 
629 typedef int
630 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
631 	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
632 	     bool needs_inc);
633 
634 static int
635 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
636 	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
637 	      bool needs_inc)
638 {
639 	bool should_inc = needs_inc && new_gpr && !last;
640 	u32 idx, src_byte;
641 	enum shf_sc sc;
642 	swreg reg;
643 	int shf;
644 	u8 mask;
645 
646 	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
647 		return -EOPNOTSUPP;
648 
649 	idx = off / 4;
650 
651 	/* Move the entire word */
652 	if (size == 4) {
653 		wrp_mov(nfp_prog, reg_both(dst),
654 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
655 		return 0;
656 	}
657 
658 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
659 		return -EOPNOTSUPP;
660 
661 	src_byte = off % 4;
662 
663 	mask = (1 << size) - 1;
664 	mask <<= dst_byte;
665 
666 	if (WARN_ON_ONCE(mask > 0xf))
667 		return -EOPNOTSUPP;
668 
669 	shf = abs(src_byte - dst_byte) * 8;
670 	if (src_byte == dst_byte) {
671 		sc = SHF_SC_NONE;
672 	} else if (src_byte < dst_byte) {
673 		shf = 32 - shf;
674 		sc = SHF_SC_L_SHF;
675 	} else {
676 		sc = SHF_SC_R_SHF;
677 	}
678 
679 	/* ld_field can address fewer indexes, if offset too large do RMW.
680 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
681 	 */
682 	if (idx <= RE_REG_LM_IDX_MAX) {
683 		reg = reg_lm(lm3 ? 3 : 0, idx);
684 	} else {
685 		reg = imm_a(nfp_prog);
686 		/* If it's not the first part of the load and we start a new GPR
687 		 * that means we are loading a second part of the LMEM word into
688 		 * a new GPR.  IOW we've already looked that LMEM word and
689 		 * therefore it has been loaded into imm_a().
690 		 */
691 		if (first || !new_gpr)
692 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
693 	}
694 
695 	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
696 
697 	if (should_inc)
698 		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
699 
700 	return 0;
701 }
702 
703 static int
704 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
705 	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
706 	       bool needs_inc)
707 {
708 	bool should_inc = needs_inc && new_gpr && !last;
709 	u32 idx, dst_byte;
710 	enum shf_sc sc;
711 	swreg reg;
712 	int shf;
713 	u8 mask;
714 
715 	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
716 		return -EOPNOTSUPP;
717 
718 	idx = off / 4;
719 
720 	/* Move the entire word */
721 	if (size == 4) {
722 		wrp_mov(nfp_prog,
723 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
724 			reg_b(src));
725 		return 0;
726 	}
727 
728 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
729 		return -EOPNOTSUPP;
730 
731 	dst_byte = off % 4;
732 
733 	mask = (1 << size) - 1;
734 	mask <<= dst_byte;
735 
736 	if (WARN_ON_ONCE(mask > 0xf))
737 		return -EOPNOTSUPP;
738 
739 	shf = abs(src_byte - dst_byte) * 8;
740 	if (src_byte == dst_byte) {
741 		sc = SHF_SC_NONE;
742 	} else if (src_byte < dst_byte) {
743 		shf = 32 - shf;
744 		sc = SHF_SC_L_SHF;
745 	} else {
746 		sc = SHF_SC_R_SHF;
747 	}
748 
749 	/* ld_field can address fewer indexes, if offset too large do RMW.
750 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
751 	 */
752 	if (idx <= RE_REG_LM_IDX_MAX) {
753 		reg = reg_lm(lm3 ? 3 : 0, idx);
754 	} else {
755 		reg = imm_a(nfp_prog);
756 		/* Only first and last LMEM locations are going to need RMW,
757 		 * the middle location will be overwritten fully.
758 		 */
759 		if (first || last)
760 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
761 	}
762 
763 	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
764 
765 	if (new_gpr || last) {
766 		if (idx > RE_REG_LM_IDX_MAX)
767 			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
768 		if (should_inc)
769 			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
770 	}
771 
772 	return 0;
773 }
774 
775 static int
776 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
777 	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
778 	     bool clr_gpr, lmem_step step)
779 {
780 	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
781 	bool first = true, last;
782 	bool needs_inc = false;
783 	swreg stack_off_reg;
784 	u8 prev_gpr = 255;
785 	u32 gpr_byte = 0;
786 	bool lm3 = true;
787 	int ret;
788 
789 	if (meta->ptr_not_const) {
790 		/* Use of the last encountered ptr_off is OK, they all have
791 		 * the same alignment.  Depend on low bits of value being
792 		 * discarded when written to LMaddr register.
793 		 */
794 		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
795 						stack_imm(nfp_prog));
796 
797 		emit_alu(nfp_prog, imm_b(nfp_prog),
798 			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
799 
800 		needs_inc = true;
801 	} else if (off + size <= 64) {
802 		/* We can reach bottom 64B with LMaddr0 */
803 		lm3 = false;
804 	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
805 		/* We have to set up a new pointer.  If we know the offset
806 		 * and the entire access falls into a single 32 byte aligned
807 		 * window we won't have to increment the LM pointer.
808 		 * The 32 byte alignment is imporant because offset is ORed in
809 		 * not added when doing *l$indexN[off].
810 		 */
811 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
812 						stack_imm(nfp_prog));
813 		emit_alu(nfp_prog, imm_b(nfp_prog),
814 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
815 
816 		off %= 32;
817 	} else {
818 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
819 						stack_imm(nfp_prog));
820 
821 		emit_alu(nfp_prog, imm_b(nfp_prog),
822 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
823 
824 		needs_inc = true;
825 	}
826 	if (lm3) {
827 		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
828 		/* For size < 4 one slot will be filled by zeroing of upper. */
829 		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
830 	}
831 
832 	if (clr_gpr && size < 8)
833 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
834 
835 	while (size) {
836 		u32 slice_end;
837 		u8 slice_size;
838 
839 		slice_size = min(size, 4 - gpr_byte);
840 		slice_end = min(off + slice_size, round_up(off + 1, 4));
841 		slice_size = slice_end - off;
842 
843 		last = slice_size == size;
844 
845 		if (needs_inc)
846 			off %= 4;
847 
848 		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
849 			   first, gpr != prev_gpr, last, lm3, needs_inc);
850 		if (ret)
851 			return ret;
852 
853 		prev_gpr = gpr;
854 		first = false;
855 
856 		gpr_byte += slice_size;
857 		if (gpr_byte >= 4) {
858 			gpr_byte -= 4;
859 			gpr++;
860 		}
861 
862 		size -= slice_size;
863 		off += slice_size;
864 	}
865 
866 	return 0;
867 }
868 
869 static void
870 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
871 {
872 	swreg tmp_reg;
873 
874 	if (alu_op == ALU_OP_AND) {
875 		if (!imm)
876 			wrp_immed(nfp_prog, reg_both(dst), 0);
877 		if (!imm || !~imm)
878 			return;
879 	}
880 	if (alu_op == ALU_OP_OR) {
881 		if (!~imm)
882 			wrp_immed(nfp_prog, reg_both(dst), ~0U);
883 		if (!imm || !~imm)
884 			return;
885 	}
886 	if (alu_op == ALU_OP_XOR) {
887 		if (!~imm)
888 			emit_alu(nfp_prog, reg_both(dst), reg_none(),
889 				 ALU_OP_NOT, reg_b(dst));
890 		if (!imm || !~imm)
891 			return;
892 	}
893 
894 	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
895 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
896 }
897 
898 static int
899 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
900 	      enum alu_op alu_op, bool skip)
901 {
902 	const struct bpf_insn *insn = &meta->insn;
903 	u64 imm = insn->imm; /* sign extend */
904 
905 	if (skip) {
906 		meta->skip = true;
907 		return 0;
908 	}
909 
910 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
911 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
912 
913 	return 0;
914 }
915 
916 static int
917 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
918 	      enum alu_op alu_op)
919 {
920 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
921 
922 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
923 	emit_alu(nfp_prog, reg_both(dst + 1),
924 		 reg_a(dst + 1), alu_op, reg_b(src + 1));
925 
926 	return 0;
927 }
928 
929 static int
930 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
931 	      enum alu_op alu_op, bool skip)
932 {
933 	const struct bpf_insn *insn = &meta->insn;
934 
935 	if (skip) {
936 		meta->skip = true;
937 		return 0;
938 	}
939 
940 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
941 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
942 
943 	return 0;
944 }
945 
946 static int
947 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
948 	      enum alu_op alu_op)
949 {
950 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
951 
952 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
953 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
954 
955 	return 0;
956 }
957 
958 static void
959 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
960 		 enum br_mask br_mask, u16 off)
961 {
962 	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
963 	emit_br(nfp_prog, br_mask, off, 0);
964 }
965 
966 static int
967 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
968 	     enum alu_op alu_op, enum br_mask br_mask)
969 {
970 	const struct bpf_insn *insn = &meta->insn;
971 
972 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
973 			 insn->src_reg * 2, br_mask, insn->off);
974 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
975 			 insn->src_reg * 2 + 1, br_mask, insn->off);
976 
977 	return 0;
978 }
979 
980 static int
981 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
982 	    enum br_mask br_mask, bool swap)
983 {
984 	const struct bpf_insn *insn = &meta->insn;
985 	u64 imm = insn->imm; /* sign extend */
986 	u8 reg = insn->dst_reg * 2;
987 	swreg tmp_reg;
988 
989 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
990 	if (!swap)
991 		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
992 	else
993 		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
994 
995 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
996 	if (!swap)
997 		emit_alu(nfp_prog, reg_none(),
998 			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
999 	else
1000 		emit_alu(nfp_prog, reg_none(),
1001 			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
1002 
1003 	emit_br(nfp_prog, br_mask, insn->off, 0);
1004 
1005 	return 0;
1006 }
1007 
1008 static int
1009 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1010 	    enum br_mask br_mask, bool swap)
1011 {
1012 	const struct bpf_insn *insn = &meta->insn;
1013 	u8 areg, breg;
1014 
1015 	areg = insn->dst_reg * 2;
1016 	breg = insn->src_reg * 2;
1017 
1018 	if (swap) {
1019 		areg ^= breg;
1020 		breg ^= areg;
1021 		areg ^= breg;
1022 	}
1023 
1024 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1025 	emit_alu(nfp_prog, reg_none(),
1026 		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1027 	emit_br(nfp_prog, br_mask, insn->off, 0);
1028 
1029 	return 0;
1030 }
1031 
1032 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1033 {
1034 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1035 		      SHF_SC_R_ROT, 8);
1036 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1037 		      SHF_SC_R_ROT, 16);
1038 }
1039 
1040 /* --- Callbacks --- */
1041 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1042 {
1043 	const struct bpf_insn *insn = &meta->insn;
1044 	u8 dst = insn->dst_reg * 2;
1045 	u8 src = insn->src_reg * 2;
1046 
1047 	if (insn->src_reg == BPF_REG_10) {
1048 		swreg stack_depth_reg;
1049 
1050 		stack_depth_reg = ur_load_imm_any(nfp_prog,
1051 						  nfp_prog->stack_depth,
1052 						  stack_imm(nfp_prog));
1053 		emit_alu(nfp_prog, reg_both(dst),
1054 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1055 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1056 	} else {
1057 		wrp_reg_mov(nfp_prog, dst, src);
1058 		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1059 	}
1060 
1061 	return 0;
1062 }
1063 
1064 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1065 {
1066 	u64 imm = meta->insn.imm; /* sign extend */
1067 
1068 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1069 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1070 
1071 	return 0;
1072 }
1073 
1074 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1075 {
1076 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1077 }
1078 
1079 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1080 {
1081 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1082 }
1083 
1084 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1085 {
1086 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1087 }
1088 
1089 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1090 {
1091 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1092 }
1093 
1094 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1095 {
1096 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1097 }
1098 
1099 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1100 {
1101 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1102 }
1103 
1104 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1105 {
1106 	const struct bpf_insn *insn = &meta->insn;
1107 
1108 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1109 		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1110 		 reg_b(insn->src_reg * 2));
1111 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1112 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1113 		 reg_b(insn->src_reg * 2 + 1));
1114 
1115 	return 0;
1116 }
1117 
1118 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1119 {
1120 	const struct bpf_insn *insn = &meta->insn;
1121 	u64 imm = insn->imm; /* sign extend */
1122 
1123 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1124 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1125 
1126 	return 0;
1127 }
1128 
1129 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1130 {
1131 	const struct bpf_insn *insn = &meta->insn;
1132 
1133 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1134 		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1135 		 reg_b(insn->src_reg * 2));
1136 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1137 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1138 		 reg_b(insn->src_reg * 2 + 1));
1139 
1140 	return 0;
1141 }
1142 
1143 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1144 {
1145 	const struct bpf_insn *insn = &meta->insn;
1146 	u64 imm = insn->imm; /* sign extend */
1147 
1148 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1149 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1150 
1151 	return 0;
1152 }
1153 
1154 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1155 {
1156 	const struct bpf_insn *insn = &meta->insn;
1157 
1158 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1159 		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1160 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1161 		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1162 
1163 	return 0;
1164 }
1165 
1166 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1167 {
1168 	const struct bpf_insn *insn = &meta->insn;
1169 	u8 dst = insn->dst_reg * 2;
1170 
1171 	if (insn->imm < 32) {
1172 		emit_shf(nfp_prog, reg_both(dst + 1),
1173 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1174 			 SHF_SC_R_DSHF, 32 - insn->imm);
1175 		emit_shf(nfp_prog, reg_both(dst),
1176 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1177 			 SHF_SC_L_SHF, insn->imm);
1178 	} else if (insn->imm == 32) {
1179 		wrp_reg_mov(nfp_prog, dst + 1, dst);
1180 		wrp_immed(nfp_prog, reg_both(dst), 0);
1181 	} else if (insn->imm > 32) {
1182 		emit_shf(nfp_prog, reg_both(dst + 1),
1183 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1184 			 SHF_SC_L_SHF, insn->imm - 32);
1185 		wrp_immed(nfp_prog, reg_both(dst), 0);
1186 	}
1187 
1188 	return 0;
1189 }
1190 
1191 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1192 {
1193 	const struct bpf_insn *insn = &meta->insn;
1194 	u8 dst = insn->dst_reg * 2;
1195 
1196 	if (insn->imm < 32) {
1197 		emit_shf(nfp_prog, reg_both(dst),
1198 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1199 			 SHF_SC_R_DSHF, insn->imm);
1200 		emit_shf(nfp_prog, reg_both(dst + 1),
1201 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1202 			 SHF_SC_R_SHF, insn->imm);
1203 	} else if (insn->imm == 32) {
1204 		wrp_reg_mov(nfp_prog, dst, dst + 1);
1205 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1206 	} else if (insn->imm > 32) {
1207 		emit_shf(nfp_prog, reg_both(dst),
1208 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1209 			 SHF_SC_R_SHF, insn->imm - 32);
1210 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1211 	}
1212 
1213 	return 0;
1214 }
1215 
1216 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1217 {
1218 	const struct bpf_insn *insn = &meta->insn;
1219 
1220 	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
1221 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1222 
1223 	return 0;
1224 }
1225 
1226 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1227 {
1228 	const struct bpf_insn *insn = &meta->insn;
1229 
1230 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
1231 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1232 
1233 	return 0;
1234 }
1235 
1236 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1237 {
1238 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
1239 }
1240 
1241 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1242 {
1243 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
1244 }
1245 
1246 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1247 {
1248 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
1249 }
1250 
1251 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1252 {
1253 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1254 }
1255 
1256 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1257 {
1258 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
1259 }
1260 
1261 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1262 {
1263 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1264 }
1265 
1266 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1267 {
1268 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
1269 }
1270 
1271 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1272 {
1273 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
1274 }
1275 
1276 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1277 {
1278 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
1279 }
1280 
1281 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1282 {
1283 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
1284 }
1285 
1286 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1287 {
1288 	u8 dst = meta->insn.dst_reg * 2;
1289 
1290 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
1291 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1292 
1293 	return 0;
1294 }
1295 
1296 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1297 {
1298 	const struct bpf_insn *insn = &meta->insn;
1299 
1300 	if (!insn->imm)
1301 		return 1; /* TODO: zero shift means indirect */
1302 
1303 	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
1304 		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
1305 		 SHF_SC_L_SHF, insn->imm);
1306 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1307 
1308 	return 0;
1309 }
1310 
1311 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1312 {
1313 	const struct bpf_insn *insn = &meta->insn;
1314 	u8 gpr = insn->dst_reg * 2;
1315 
1316 	switch (insn->imm) {
1317 	case 16:
1318 		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
1319 			      SHF_SC_R_ROT, 8);
1320 		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
1321 			      SHF_SC_R_SHF, 16);
1322 
1323 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1324 		break;
1325 	case 32:
1326 		wrp_end32(nfp_prog, reg_a(gpr), gpr);
1327 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1328 		break;
1329 	case 64:
1330 		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
1331 
1332 		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
1333 		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
1334 		break;
1335 	}
1336 
1337 	return 0;
1338 }
1339 
1340 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1341 {
1342 	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
1343 	u32 imm_lo, imm_hi;
1344 	u8 dst;
1345 
1346 	dst = prev->insn.dst_reg * 2;
1347 	imm_lo = prev->insn.imm;
1348 	imm_hi = meta->insn.imm;
1349 
1350 	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
1351 
1352 	/* mov is always 1 insn, load imm may be two, so try to use mov */
1353 	if (imm_hi == imm_lo)
1354 		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
1355 	else
1356 		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
1357 
1358 	return 0;
1359 }
1360 
1361 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1362 {
1363 	meta->double_cb = imm_ld8_part2;
1364 	return 0;
1365 }
1366 
1367 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1368 {
1369 	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
1370 }
1371 
1372 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1373 {
1374 	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
1375 }
1376 
1377 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1378 {
1379 	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
1380 }
1381 
1382 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1383 {
1384 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1385 				     meta->insn.src_reg * 2, 1);
1386 }
1387 
1388 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1389 {
1390 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1391 				     meta->insn.src_reg * 2, 2);
1392 }
1393 
1394 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1395 {
1396 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1397 				     meta->insn.src_reg * 2, 4);
1398 }
1399 
1400 static int
1401 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1402 	      unsigned int size, unsigned int ptr_off)
1403 {
1404 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1405 			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
1406 			    true, wrp_lmem_load);
1407 }
1408 
1409 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1410 		       u8 size)
1411 {
1412 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1413 
1414 	switch (meta->insn.off) {
1415 	case offsetof(struct __sk_buff, len):
1416 		if (size != FIELD_SIZEOF(struct __sk_buff, len))
1417 			return -EOPNOTSUPP;
1418 		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
1419 		break;
1420 	case offsetof(struct __sk_buff, data):
1421 		if (size != FIELD_SIZEOF(struct __sk_buff, data))
1422 			return -EOPNOTSUPP;
1423 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1424 		break;
1425 	case offsetof(struct __sk_buff, data_end):
1426 		if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
1427 			return -EOPNOTSUPP;
1428 		emit_alu(nfp_prog, dst,
1429 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1430 		break;
1431 	default:
1432 		return -EOPNOTSUPP;
1433 	}
1434 
1435 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1436 
1437 	return 0;
1438 }
1439 
1440 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1441 		       u8 size)
1442 {
1443 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1444 
1445 	switch (meta->insn.off) {
1446 	case offsetof(struct xdp_md, data):
1447 		if (size != FIELD_SIZEOF(struct xdp_md, data))
1448 			return -EOPNOTSUPP;
1449 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1450 		break;
1451 	case offsetof(struct xdp_md, data_end):
1452 		if (size != FIELD_SIZEOF(struct xdp_md, data_end))
1453 			return -EOPNOTSUPP;
1454 		emit_alu(nfp_prog, dst,
1455 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1456 		break;
1457 	default:
1458 		return -EOPNOTSUPP;
1459 	}
1460 
1461 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1462 
1463 	return 0;
1464 }
1465 
1466 static int
1467 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1468 	     unsigned int size)
1469 {
1470 	swreg tmp_reg;
1471 
1472 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1473 
1474 	return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg,
1475 				  meta->insn.dst_reg * 2, size);
1476 }
1477 
1478 static int
1479 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1480 	unsigned int size)
1481 {
1482 	if (meta->ptr.type == PTR_TO_CTX) {
1483 		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
1484 			return mem_ldx_xdp(nfp_prog, meta, size);
1485 		else
1486 			return mem_ldx_skb(nfp_prog, meta, size);
1487 	}
1488 
1489 	if (meta->ptr.type == PTR_TO_PACKET)
1490 		return mem_ldx_data(nfp_prog, meta, size);
1491 
1492 	if (meta->ptr.type == PTR_TO_STACK)
1493 		return mem_ldx_stack(nfp_prog, meta, size,
1494 				     meta->ptr.off + meta->ptr.var_off.value);
1495 
1496 	return -EOPNOTSUPP;
1497 }
1498 
1499 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1500 {
1501 	return mem_ldx(nfp_prog, meta, 1);
1502 }
1503 
1504 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1505 {
1506 	return mem_ldx(nfp_prog, meta, 2);
1507 }
1508 
1509 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1510 {
1511 	return mem_ldx(nfp_prog, meta, 4);
1512 }
1513 
1514 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1515 {
1516 	return mem_ldx(nfp_prog, meta, 8);
1517 }
1518 
1519 static int
1520 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1521 	    unsigned int size)
1522 {
1523 	u64 imm = meta->insn.imm; /* sign extend */
1524 	swreg off_reg;
1525 
1526 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1527 
1528 	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1529 				  imm, size);
1530 }
1531 
1532 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1533 		  unsigned int size)
1534 {
1535 	if (meta->ptr.type == PTR_TO_PACKET)
1536 		return mem_st_data(nfp_prog, meta, size);
1537 
1538 	return -EOPNOTSUPP;
1539 }
1540 
1541 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1542 {
1543 	return mem_st(nfp_prog, meta, 1);
1544 }
1545 
1546 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1547 {
1548 	return mem_st(nfp_prog, meta, 2);
1549 }
1550 
1551 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1552 {
1553 	return mem_st(nfp_prog, meta, 4);
1554 }
1555 
1556 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1557 {
1558 	return mem_st(nfp_prog, meta, 8);
1559 }
1560 
1561 static int
1562 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1563 	     unsigned int size)
1564 {
1565 	swreg off_reg;
1566 
1567 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1568 
1569 	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1570 				   meta->insn.src_reg * 2, size);
1571 }
1572 
1573 static int
1574 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1575 	      unsigned int size, unsigned int ptr_off)
1576 {
1577 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1578 			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
1579 			    false, wrp_lmem_store);
1580 }
1581 
1582 static int
1583 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1584 	unsigned int size)
1585 {
1586 	if (meta->ptr.type == PTR_TO_PACKET)
1587 		return mem_stx_data(nfp_prog, meta, size);
1588 
1589 	if (meta->ptr.type == PTR_TO_STACK)
1590 		return mem_stx_stack(nfp_prog, meta, size,
1591 				     meta->ptr.off + meta->ptr.var_off.value);
1592 
1593 	return -EOPNOTSUPP;
1594 }
1595 
1596 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1597 {
1598 	return mem_stx(nfp_prog, meta, 1);
1599 }
1600 
1601 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1602 {
1603 	return mem_stx(nfp_prog, meta, 2);
1604 }
1605 
1606 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1607 {
1608 	return mem_stx(nfp_prog, meta, 4);
1609 }
1610 
1611 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1612 {
1613 	return mem_stx(nfp_prog, meta, 8);
1614 }
1615 
1616 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1617 {
1618 	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
1619 
1620 	return 0;
1621 }
1622 
1623 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1624 {
1625 	const struct bpf_insn *insn = &meta->insn;
1626 	u64 imm = insn->imm; /* sign extend */
1627 	swreg or1, or2, tmp_reg;
1628 
1629 	or1 = reg_a(insn->dst_reg * 2);
1630 	or2 = reg_b(insn->dst_reg * 2 + 1);
1631 
1632 	if (imm & ~0U) {
1633 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1634 		emit_alu(nfp_prog, imm_a(nfp_prog),
1635 			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
1636 		or1 = imm_a(nfp_prog);
1637 	}
1638 
1639 	if (imm >> 32) {
1640 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1641 		emit_alu(nfp_prog, imm_b(nfp_prog),
1642 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
1643 		or2 = imm_b(nfp_prog);
1644 	}
1645 
1646 	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
1647 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
1648 
1649 	return 0;
1650 }
1651 
1652 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1653 {
1654 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
1655 }
1656 
1657 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1658 {
1659 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
1660 }
1661 
1662 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1663 {
1664 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
1665 }
1666 
1667 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1668 {
1669 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
1670 }
1671 
1672 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1673 {
1674 	const struct bpf_insn *insn = &meta->insn;
1675 	u64 imm = insn->imm; /* sign extend */
1676 	swreg tmp_reg;
1677 
1678 	if (!imm) {
1679 		meta->skip = true;
1680 		return 0;
1681 	}
1682 
1683 	if (imm & ~0U) {
1684 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1685 		emit_alu(nfp_prog, reg_none(),
1686 			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
1687 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
1688 	}
1689 
1690 	if (imm >> 32) {
1691 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1692 		emit_alu(nfp_prog, reg_none(),
1693 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
1694 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
1695 	}
1696 
1697 	return 0;
1698 }
1699 
1700 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1701 {
1702 	const struct bpf_insn *insn = &meta->insn;
1703 	u64 imm = insn->imm; /* sign extend */
1704 	swreg tmp_reg;
1705 
1706 	if (!imm) {
1707 		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
1708 			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
1709 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
1710 		return 0;
1711 	}
1712 
1713 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1714 	emit_alu(nfp_prog, reg_none(),
1715 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
1716 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
1717 
1718 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1719 	emit_alu(nfp_prog, reg_none(),
1720 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
1721 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
1722 
1723 	return 0;
1724 }
1725 
1726 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1727 {
1728 	const struct bpf_insn *insn = &meta->insn;
1729 
1730 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
1731 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
1732 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
1733 		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
1734 	emit_alu(nfp_prog, reg_none(),
1735 		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
1736 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
1737 
1738 	return 0;
1739 }
1740 
1741 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1742 {
1743 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
1744 }
1745 
1746 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1747 {
1748 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
1749 }
1750 
1751 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1752 {
1753 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
1754 }
1755 
1756 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1757 {
1758 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
1759 }
1760 
1761 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1762 {
1763 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
1764 }
1765 
1766 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1767 {
1768 	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
1769 }
1770 
1771 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1772 {
1773 	wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
1774 
1775 	return 0;
1776 }
1777 
1778 static const instr_cb_t instr_cb[256] = {
1779 	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
1780 	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
1781 	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
1782 	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
1783 	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
1784 	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
1785 	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
1786 	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
1787 	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
1788 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
1789 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
1790 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
1791 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
1792 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
1793 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
1794 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
1795 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
1796 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
1797 	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
1798 	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
1799 	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
1800 	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
1801 	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
1802 	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
1803 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
1804 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
1805 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
1806 	[BPF_ALU | BPF_NEG] =		neg_reg,
1807 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
1808 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
1809 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
1810 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
1811 	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
1812 	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
1813 	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
1814 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
1815 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
1816 	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
1817 	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
1818 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
1819 	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
1820 	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
1821 	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
1822 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
1823 	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
1824 	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
1825 	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
1826 	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
1827 	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
1828 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
1829 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
1830 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
1831 	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
1832 	[BPF_JMP | BPF_JLT | BPF_K] =	jlt_imm,
1833 	[BPF_JMP | BPF_JLE | BPF_K] =	jle_imm,
1834 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
1835 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
1836 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
1837 	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
1838 	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
1839 	[BPF_JMP | BPF_JLT | BPF_X] =	jlt_reg,
1840 	[BPF_JMP | BPF_JLE | BPF_X] =	jle_reg,
1841 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
1842 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
1843 	[BPF_JMP | BPF_EXIT] =		goto_out,
1844 };
1845 
1846 /* --- Misc code --- */
1847 static void br_set_offset(u64 *instr, u16 offset)
1848 {
1849 	u16 addr_lo, addr_hi;
1850 
1851 	addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
1852 	addr_hi = offset != addr_lo;
1853 	*instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
1854 	*instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
1855 	*instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
1856 }
1857 
1858 /* --- Assembler logic --- */
1859 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
1860 {
1861 	struct nfp_insn_meta *meta, *jmp_dst;
1862 	u32 idx, br_idx;
1863 
1864 	list_for_each_entry(meta, &nfp_prog->insns, l) {
1865 		if (meta->skip)
1866 			continue;
1867 		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
1868 			continue;
1869 
1870 		if (list_is_last(&meta->l, &nfp_prog->insns))
1871 			idx = nfp_prog->last_bpf_off;
1872 		else
1873 			idx = list_next_entry(meta, l)->off - 1;
1874 
1875 		br_idx = nfp_prog_offset_to_index(nfp_prog, idx);
1876 
1877 		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
1878 			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
1879 			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
1880 			return -ELOOP;
1881 		}
1882 		/* Leave special branches for later */
1883 		if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
1884 			continue;
1885 
1886 		if (!meta->jmp_dst) {
1887 			pr_err("Non-exit jump doesn't have destination info recorded!!\n");
1888 			return -ELOOP;
1889 		}
1890 
1891 		jmp_dst = meta->jmp_dst;
1892 
1893 		if (jmp_dst->skip) {
1894 			pr_err("Branch landing on removed instruction!!\n");
1895 			return -ELOOP;
1896 		}
1897 
1898 		for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off);
1899 		     idx <= br_idx; idx++) {
1900 			if (!nfp_is_br(nfp_prog->prog[idx]))
1901 				continue;
1902 			br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
1903 		}
1904 	}
1905 
1906 	/* Fixup 'goto out's separately, they can be scattered around */
1907 	for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) {
1908 		enum br_special special;
1909 
1910 		if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE)
1911 			continue;
1912 
1913 		special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]);
1914 		switch (special) {
1915 		case OP_BR_NORMAL:
1916 			break;
1917 		case OP_BR_GO_OUT:
1918 			br_set_offset(&nfp_prog->prog[br_idx],
1919 				      nfp_prog->tgt_out);
1920 			break;
1921 		case OP_BR_GO_ABORT:
1922 			br_set_offset(&nfp_prog->prog[br_idx],
1923 				      nfp_prog->tgt_abort);
1924 			break;
1925 		}
1926 
1927 		nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL;
1928 	}
1929 
1930 	return 0;
1931 }
1932 
1933 static void nfp_intro(struct nfp_prog *nfp_prog)
1934 {
1935 	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
1936 	emit_alu(nfp_prog, plen_reg(nfp_prog),
1937 		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
1938 }
1939 
1940 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
1941 {
1942 	/* TC direct-action mode:
1943 	 *   0,1   ok        NOT SUPPORTED[1]
1944 	 *   2   drop  0x22 -> drop,  count as stat1
1945 	 *   4,5 nuke  0x02 -> drop
1946 	 *   7  redir  0x44 -> redir, count as stat2
1947 	 *   * unspec  0x11 -> pass,  count as stat0
1948 	 *
1949 	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
1950 	 *     the exact decision made.  We are forced to support UNSPEC
1951 	 *     to handle aborts so that's the only one we handle for passing
1952 	 *     packets up the stack.
1953 	 */
1954 	/* Target for aborts */
1955 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
1956 
1957 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
1958 
1959 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
1960 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
1961 
1962 	/* Target for normal exits */
1963 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
1964 
1965 	/* if R0 > 7 jump to abort */
1966 	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
1967 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
1968 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
1969 
1970 	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
1971 	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
1972 
1973 	emit_shf(nfp_prog, reg_a(1),
1974 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
1975 
1976 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
1977 	emit_shf(nfp_prog, reg_a(2),
1978 		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
1979 
1980 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
1981 	emit_shf(nfp_prog, reg_b(2),
1982 		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
1983 
1984 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
1985 
1986 	emit_shf(nfp_prog, reg_b(2),
1987 		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
1988 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
1989 }
1990 
1991 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
1992 {
1993 	/* XDP return codes:
1994 	 *   0 aborted  0x82 -> drop,  count as stat3
1995 	 *   1    drop  0x22 -> drop,  count as stat1
1996 	 *   2    pass  0x11 -> pass,  count as stat0
1997 	 *   3      tx  0x44 -> redir, count as stat2
1998 	 *   * unknown  0x82 -> drop,  count as stat3
1999 	 */
2000 	/* Target for aborts */
2001 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2002 
2003 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
2004 
2005 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2006 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
2007 
2008 	/* Target for normal exits */
2009 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2010 
2011 	/* if R0 > 3 jump to abort */
2012 	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
2013 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2014 
2015 	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
2016 
2017 	emit_shf(nfp_prog, reg_a(1),
2018 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
2019 
2020 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2021 	emit_shf(nfp_prog, reg_b(2),
2022 		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2023 
2024 	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
2025 
2026 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2027 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2028 }
2029 
2030 static void nfp_outro(struct nfp_prog *nfp_prog)
2031 {
2032 	switch (nfp_prog->type) {
2033 	case BPF_PROG_TYPE_SCHED_CLS:
2034 		nfp_outro_tc_da(nfp_prog);
2035 		break;
2036 	case BPF_PROG_TYPE_XDP:
2037 		nfp_outro_xdp(nfp_prog);
2038 		break;
2039 	default:
2040 		WARN_ON(1);
2041 	}
2042 }
2043 
2044 static int nfp_translate(struct nfp_prog *nfp_prog)
2045 {
2046 	struct nfp_insn_meta *meta;
2047 	int err;
2048 
2049 	nfp_intro(nfp_prog);
2050 	if (nfp_prog->error)
2051 		return nfp_prog->error;
2052 
2053 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2054 		instr_cb_t cb = instr_cb[meta->insn.code];
2055 
2056 		meta->off = nfp_prog_current_offset(nfp_prog);
2057 
2058 		if (meta->skip) {
2059 			nfp_prog->n_translated++;
2060 			continue;
2061 		}
2062 
2063 		if (nfp_meta_has_prev(nfp_prog, meta) &&
2064 		    nfp_meta_prev(meta)->double_cb)
2065 			cb = nfp_meta_prev(meta)->double_cb;
2066 		if (!cb)
2067 			return -ENOENT;
2068 		err = cb(nfp_prog, meta);
2069 		if (err)
2070 			return err;
2071 
2072 		nfp_prog->n_translated++;
2073 	}
2074 
2075 	nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
2076 
2077 	nfp_outro(nfp_prog);
2078 	if (nfp_prog->error)
2079 		return nfp_prog->error;
2080 
2081 	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
2082 	if (nfp_prog->error)
2083 		return nfp_prog->error;
2084 
2085 	return nfp_fixup_branches(nfp_prog);
2086 }
2087 
2088 /* --- Optimizations --- */
2089 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
2090 {
2091 	struct nfp_insn_meta *meta;
2092 
2093 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2094 		struct bpf_insn insn = meta->insn;
2095 
2096 		/* Programs converted from cBPF start with register xoring */
2097 		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
2098 		    insn.src_reg == insn.dst_reg)
2099 			continue;
2100 
2101 		/* Programs start with R6 = R1 but we ignore the skb pointer */
2102 		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
2103 		    insn.src_reg == 1 && insn.dst_reg == 6)
2104 			meta->skip = true;
2105 
2106 		/* Return as soon as something doesn't match */
2107 		if (!meta->skip)
2108 			return;
2109 	}
2110 }
2111 
2112 /* Remove masking after load since our load guarantees this is not needed */
2113 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
2114 {
2115 	struct nfp_insn_meta *meta1, *meta2;
2116 	const s32 exp_mask[] = {
2117 		[BPF_B] = 0x000000ffU,
2118 		[BPF_H] = 0x0000ffffU,
2119 		[BPF_W] = 0xffffffffU,
2120 	};
2121 
2122 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2123 		struct bpf_insn insn, next;
2124 
2125 		insn = meta1->insn;
2126 		next = meta2->insn;
2127 
2128 		if (BPF_CLASS(insn.code) != BPF_LD)
2129 			continue;
2130 		if (BPF_MODE(insn.code) != BPF_ABS &&
2131 		    BPF_MODE(insn.code) != BPF_IND)
2132 			continue;
2133 
2134 		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
2135 			continue;
2136 
2137 		if (!exp_mask[BPF_SIZE(insn.code)])
2138 			continue;
2139 		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
2140 			continue;
2141 
2142 		if (next.src_reg || next.dst_reg)
2143 			continue;
2144 
2145 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
2146 			continue;
2147 
2148 		meta2->skip = true;
2149 	}
2150 }
2151 
2152 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2153 {
2154 	struct nfp_insn_meta *meta1, *meta2, *meta3;
2155 
2156 	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
2157 		struct bpf_insn insn, next1, next2;
2158 
2159 		insn = meta1->insn;
2160 		next1 = meta2->insn;
2161 		next2 = meta3->insn;
2162 
2163 		if (BPF_CLASS(insn.code) != BPF_LD)
2164 			continue;
2165 		if (BPF_MODE(insn.code) != BPF_ABS &&
2166 		    BPF_MODE(insn.code) != BPF_IND)
2167 			continue;
2168 		if (BPF_SIZE(insn.code) != BPF_W)
2169 			continue;
2170 
2171 		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
2172 		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
2173 		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
2174 		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
2175 			continue;
2176 
2177 		if (next1.src_reg || next1.dst_reg ||
2178 		    next2.src_reg || next2.dst_reg)
2179 			continue;
2180 
2181 		if (next1.imm != 0x20 || next2.imm != 0x20)
2182 			continue;
2183 
2184 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
2185 		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
2186 			continue;
2187 
2188 		meta2->skip = true;
2189 		meta3->skip = true;
2190 	}
2191 }
2192 
2193 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
2194 {
2195 	nfp_bpf_opt_reg_init(nfp_prog);
2196 
2197 	nfp_bpf_opt_ld_mask(nfp_prog);
2198 	nfp_bpf_opt_ld_shift(nfp_prog);
2199 
2200 	return 0;
2201 }
2202 
2203 static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore)
2204 {
2205 	int i;
2206 
2207 	for (i = 0; i < nfp_prog->prog_len; i++) {
2208 		int err;
2209 
2210 		err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]);
2211 		if (err)
2212 			return err;
2213 
2214 		nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]);
2215 
2216 		ustore[i] = cpu_to_le64(nfp_prog->prog[i]);
2217 	}
2218 
2219 	return 0;
2220 }
2221 
2222 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
2223 {
2224 	int ret;
2225 
2226 	ret = nfp_bpf_optimize(nfp_prog);
2227 	if (ret)
2228 		return ret;
2229 
2230 	ret = nfp_translate(nfp_prog);
2231 	if (ret) {
2232 		pr_err("Translation failed with error %d (translated: %u)\n",
2233 		       ret, nfp_prog->n_translated);
2234 		return -EINVAL;
2235 	}
2236 
2237 	return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog);
2238 }
2239