1 /*
2  * Copyright (C) 2016-2017 Netronome Systems, Inc.
3  *
4  * This software is dual licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree or the BSD 2-Clause License provided below.  You have the
7  * option to license this software under the complete terms of either license.
8  *
9  * The BSD 2-Clause License:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      1. Redistributions of source code must retain the above
16  *         copyright notice, this list of conditions and the following
17  *         disclaimer.
18  *
19  *      2. Redistributions in binary form must reproduce the above
20  *         copyright notice, this list of conditions and the following
21  *         disclaimer in the documentation and/or other materials
22  *         provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #define pr_fmt(fmt)	"NFP net bpf: " fmt
35 
36 #include <linux/bug.h>
37 #include <linux/kernel.h>
38 #include <linux/bpf.h>
39 #include <linux/filter.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/unistd.h>
42 
43 #include "main.h"
44 #include "../nfp_asm.h"
45 
46 /* --- NFP prog --- */
47 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
48  * It's safe to modify the next pointers (but not pos).
49  */
50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
51 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
52 	     next = list_next_entry(pos, l);			\
53 	     &(nfp_prog)->insns != &pos->l &&			\
54 	     &(nfp_prog)->insns != &next->l;			\
55 	     pos = nfp_meta_next(pos),				\
56 	     next = nfp_meta_next(pos))
57 
58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
59 	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
60 	     next = list_next_entry(pos, l),			\
61 	     next2 = list_next_entry(next, l);			\
62 	     &(nfp_prog)->insns != &pos->l &&			\
63 	     &(nfp_prog)->insns != &next->l &&			\
64 	     &(nfp_prog)->insns != &next2->l;			\
65 	     pos = nfp_meta_next(pos),				\
66 	     next = nfp_meta_next(pos),				\
67 	     next2 = nfp_meta_next(next))
68 
69 static bool
70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
71 {
72 	return meta->l.prev != &nfp_prog->insns;
73 }
74 
75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
76 {
77 	if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
78 		pr_warn("instruction limit reached (%u NFP instructions)\n",
79 			nfp_prog->prog_len);
80 		nfp_prog->error = -ENOSPC;
81 		return;
82 	}
83 
84 	nfp_prog->prog[nfp_prog->prog_len] = insn;
85 	nfp_prog->prog_len++;
86 }
87 
88 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
89 {
90 	return nfp_prog->prog_len;
91 }
92 
93 static bool
94 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
95 {
96 	/* If there is a recorded error we may have dropped instructions;
97 	 * that doesn't have to be due to translator bug, and the translation
98 	 * will fail anyway, so just return OK.
99 	 */
100 	if (nfp_prog->error)
101 		return true;
102 	return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
103 }
104 
105 /* --- Emitters --- */
106 static void
107 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
108 	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir)
109 {
110 	enum cmd_ctx_swap ctx;
111 	u64 insn;
112 
113 	if (sync)
114 		ctx = CMD_CTX_SWAP;
115 	else
116 		ctx = CMD_CTX_NO_SWAP;
117 
118 	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
119 		FIELD_PREP(OP_CMD_CTX, ctx) |
120 		FIELD_PREP(OP_CMD_B_SRC, breg) |
121 		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
122 		FIELD_PREP(OP_CMD_XFER, xfer) |
123 		FIELD_PREP(OP_CMD_CNT, size) |
124 		FIELD_PREP(OP_CMD_SIG, sync) |
125 		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
126 		FIELD_PREP(OP_CMD_INDIR, indir) |
127 		FIELD_PREP(OP_CMD_MODE, mode);
128 
129 	nfp_prog_push(nfp_prog, insn);
130 }
131 
132 static void
133 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
134 	     swreg lreg, swreg rreg, u8 size, bool sync, bool indir)
135 {
136 	struct nfp_insn_re_regs reg;
137 	int err;
138 
139 	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
140 	if (err) {
141 		nfp_prog->error = err;
142 		return;
143 	}
144 	if (reg.swap) {
145 		pr_err("cmd can't swap arguments\n");
146 		nfp_prog->error = -EFAULT;
147 		return;
148 	}
149 	if (reg.dst_lmextn || reg.src_lmextn) {
150 		pr_err("cmd can't use LMextn\n");
151 		nfp_prog->error = -EFAULT;
152 		return;
153 	}
154 
155 	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync,
156 		   indir);
157 }
158 
159 static void
160 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
161 	 swreg lreg, swreg rreg, u8 size, bool sync)
162 {
163 	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false);
164 }
165 
166 static void
167 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
168 	       swreg lreg, swreg rreg, u8 size, bool sync)
169 {
170 	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
171 }
172 
173 static void
174 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
175 	  enum br_ctx_signal_state css, u16 addr, u8 defer)
176 {
177 	u16 addr_lo, addr_hi;
178 	u64 insn;
179 
180 	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
181 	addr_hi = addr != addr_lo;
182 
183 	insn = OP_BR_BASE |
184 		FIELD_PREP(OP_BR_MASK, mask) |
185 		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
186 		FIELD_PREP(OP_BR_CSS, css) |
187 		FIELD_PREP(OP_BR_DEFBR, defer) |
188 		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
189 		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
190 
191 	nfp_prog_push(nfp_prog, insn);
192 }
193 
194 static void
195 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
196 	     enum nfp_relo_type relo)
197 {
198 	if (mask == BR_UNC && defer > 2) {
199 		pr_err("BUG: branch defer out of bounds %d\n", defer);
200 		nfp_prog->error = -EFAULT;
201 		return;
202 	}
203 
204 	__emit_br(nfp_prog, mask,
205 		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
206 		  BR_CSS_NONE, addr, defer);
207 
208 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
209 		FIELD_PREP(OP_RELO_TYPE, relo);
210 }
211 
212 static void
213 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
214 {
215 	emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
216 }
217 
218 static void
219 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
220 	     enum immed_width width, bool invert,
221 	     enum immed_shift shift, bool wr_both,
222 	     bool dst_lmextn, bool src_lmextn)
223 {
224 	u64 insn;
225 
226 	insn = OP_IMMED_BASE |
227 		FIELD_PREP(OP_IMMED_A_SRC, areg) |
228 		FIELD_PREP(OP_IMMED_B_SRC, breg) |
229 		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
230 		FIELD_PREP(OP_IMMED_WIDTH, width) |
231 		FIELD_PREP(OP_IMMED_INV, invert) |
232 		FIELD_PREP(OP_IMMED_SHIFT, shift) |
233 		FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
234 		FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
235 		FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
236 
237 	nfp_prog_push(nfp_prog, insn);
238 }
239 
240 static void
241 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
242 	   enum immed_width width, bool invert, enum immed_shift shift)
243 {
244 	struct nfp_insn_ur_regs reg;
245 	int err;
246 
247 	if (swreg_type(dst) == NN_REG_IMM) {
248 		nfp_prog->error = -EFAULT;
249 		return;
250 	}
251 
252 	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
253 	if (err) {
254 		nfp_prog->error = err;
255 		return;
256 	}
257 
258 	/* Use reg.dst when destination is No-Dest. */
259 	__emit_immed(nfp_prog,
260 		     swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
261 		     reg.breg, imm >> 8, width, invert, shift,
262 		     reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
263 }
264 
265 static void
266 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
267 	   enum shf_sc sc, u8 shift,
268 	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
269 	   bool dst_lmextn, bool src_lmextn)
270 {
271 	u64 insn;
272 
273 	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
274 		nfp_prog->error = -EFAULT;
275 		return;
276 	}
277 
278 	if (sc == SHF_SC_L_SHF)
279 		shift = 32 - shift;
280 
281 	insn = OP_SHF_BASE |
282 		FIELD_PREP(OP_SHF_A_SRC, areg) |
283 		FIELD_PREP(OP_SHF_SC, sc) |
284 		FIELD_PREP(OP_SHF_B_SRC, breg) |
285 		FIELD_PREP(OP_SHF_I8, i8) |
286 		FIELD_PREP(OP_SHF_SW, sw) |
287 		FIELD_PREP(OP_SHF_DST, dst) |
288 		FIELD_PREP(OP_SHF_SHIFT, shift) |
289 		FIELD_PREP(OP_SHF_OP, op) |
290 		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
291 		FIELD_PREP(OP_SHF_WR_AB, wr_both) |
292 		FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
293 		FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
294 
295 	nfp_prog_push(nfp_prog, insn);
296 }
297 
298 static void
299 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
300 	 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
301 {
302 	struct nfp_insn_re_regs reg;
303 	int err;
304 
305 	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
306 	if (err) {
307 		nfp_prog->error = err;
308 		return;
309 	}
310 
311 	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
312 		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
313 		   reg.dst_lmextn, reg.src_lmextn);
314 }
315 
316 static void
317 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
318 	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
319 	   bool dst_lmextn, bool src_lmextn)
320 {
321 	u64 insn;
322 
323 	insn = OP_ALU_BASE |
324 		FIELD_PREP(OP_ALU_A_SRC, areg) |
325 		FIELD_PREP(OP_ALU_B_SRC, breg) |
326 		FIELD_PREP(OP_ALU_DST, dst) |
327 		FIELD_PREP(OP_ALU_SW, swap) |
328 		FIELD_PREP(OP_ALU_OP, op) |
329 		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
330 		FIELD_PREP(OP_ALU_WR_AB, wr_both) |
331 		FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
332 		FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
333 
334 	nfp_prog_push(nfp_prog, insn);
335 }
336 
337 static void
338 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
339 	 swreg lreg, enum alu_op op, swreg rreg)
340 {
341 	struct nfp_insn_ur_regs reg;
342 	int err;
343 
344 	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
345 	if (err) {
346 		nfp_prog->error = err;
347 		return;
348 	}
349 
350 	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
351 		   reg.areg, op, reg.breg, reg.swap, reg.wr_both,
352 		   reg.dst_lmextn, reg.src_lmextn);
353 }
354 
355 static void
356 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
357 		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
358 		bool zero, bool swap, bool wr_both,
359 		bool dst_lmextn, bool src_lmextn)
360 {
361 	u64 insn;
362 
363 	insn = OP_LDF_BASE |
364 		FIELD_PREP(OP_LDF_A_SRC, areg) |
365 		FIELD_PREP(OP_LDF_SC, sc) |
366 		FIELD_PREP(OP_LDF_B_SRC, breg) |
367 		FIELD_PREP(OP_LDF_I8, imm8) |
368 		FIELD_PREP(OP_LDF_SW, swap) |
369 		FIELD_PREP(OP_LDF_ZF, zero) |
370 		FIELD_PREP(OP_LDF_BMASK, bmask) |
371 		FIELD_PREP(OP_LDF_SHF, shift) |
372 		FIELD_PREP(OP_LDF_WR_AB, wr_both) |
373 		FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
374 		FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
375 
376 	nfp_prog_push(nfp_prog, insn);
377 }
378 
379 static void
380 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
381 		  enum shf_sc sc, u8 shift, bool zero)
382 {
383 	struct nfp_insn_re_regs reg;
384 	int err;
385 
386 	/* Note: ld_field is special as it uses one of the src regs as dst */
387 	err = swreg_to_restricted(dst, dst, src, &reg, true);
388 	if (err) {
389 		nfp_prog->error = err;
390 		return;
391 	}
392 
393 	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
394 			reg.i8, zero, reg.swap, reg.wr_both,
395 			reg.dst_lmextn, reg.src_lmextn);
396 }
397 
398 static void
399 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
400 	      enum shf_sc sc, u8 shift)
401 {
402 	emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
403 }
404 
405 static void
406 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
407 	    bool dst_lmextn, bool src_lmextn)
408 {
409 	u64 insn;
410 
411 	insn = OP_LCSR_BASE |
412 		FIELD_PREP(OP_LCSR_A_SRC, areg) |
413 		FIELD_PREP(OP_LCSR_B_SRC, breg) |
414 		FIELD_PREP(OP_LCSR_WRITE, wr) |
415 		FIELD_PREP(OP_LCSR_ADDR, addr) |
416 		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
417 		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
418 
419 	nfp_prog_push(nfp_prog, insn);
420 }
421 
422 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
423 {
424 	struct nfp_insn_ur_regs reg;
425 	int err;
426 
427 	/* This instruction takes immeds instead of reg_none() for the ignored
428 	 * operand, but we can't encode 2 immeds in one instr with our normal
429 	 * swreg infra so if param is an immed, we encode as reg_none() and
430 	 * copy the immed to both operands.
431 	 */
432 	if (swreg_type(src) == NN_REG_IMM) {
433 		err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
434 		reg.breg = reg.areg;
435 	} else {
436 		err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
437 	}
438 	if (err) {
439 		nfp_prog->error = err;
440 		return;
441 	}
442 
443 	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
444 		    false, reg.src_lmextn);
445 }
446 
447 static void emit_nop(struct nfp_prog *nfp_prog)
448 {
449 	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
450 }
451 
452 /* --- Wrappers --- */
453 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
454 {
455 	if (!(imm & 0xffff0000)) {
456 		*val = imm;
457 		*shift = IMMED_SHIFT_0B;
458 	} else if (!(imm & 0xff0000ff)) {
459 		*val = imm >> 8;
460 		*shift = IMMED_SHIFT_1B;
461 	} else if (!(imm & 0x0000ffff)) {
462 		*val = imm >> 16;
463 		*shift = IMMED_SHIFT_2B;
464 	} else {
465 		return false;
466 	}
467 
468 	return true;
469 }
470 
471 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
472 {
473 	enum immed_shift shift;
474 	u16 val;
475 
476 	if (pack_immed(imm, &val, &shift)) {
477 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
478 	} else if (pack_immed(~imm, &val, &shift)) {
479 		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
480 	} else {
481 		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
482 			   false, IMMED_SHIFT_0B);
483 		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
484 			   false, IMMED_SHIFT_2B);
485 	}
486 }
487 
488 static void
489 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
490 	       enum nfp_relo_type relo)
491 {
492 	if (imm > 0xffff) {
493 		pr_err("relocation of a large immediate!\n");
494 		nfp_prog->error = -EFAULT;
495 		return;
496 	}
497 	emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
498 
499 	nfp_prog->prog[nfp_prog->prog_len - 1] |=
500 		FIELD_PREP(OP_RELO_TYPE, relo);
501 }
502 
503 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
504  * If the @imm is small enough encode it directly in operand and return
505  * otherwise load @imm to a spare register and return its encoding.
506  */
507 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
508 {
509 	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
510 		return reg_imm(imm);
511 
512 	wrp_immed(nfp_prog, tmp_reg, imm);
513 	return tmp_reg;
514 }
515 
516 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
517  * If the @imm is small enough encode it directly in operand and return
518  * otherwise load @imm to a spare register and return its encoding.
519  */
520 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
521 {
522 	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
523 		return reg_imm(imm);
524 
525 	wrp_immed(nfp_prog, tmp_reg, imm);
526 	return tmp_reg;
527 }
528 
529 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
530 {
531 	while (count--)
532 		emit_nop(nfp_prog);
533 }
534 
535 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
536 {
537 	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
538 }
539 
540 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
541 {
542 	wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
543 }
544 
545 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
546  * result to @dst from low end.
547  */
548 static void
549 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
550 		u8 offset)
551 {
552 	enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
553 	u8 mask = (1 << field_len) - 1;
554 
555 	emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
556 }
557 
558 static void
559 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
560 	      swreg *rega, swreg *regb)
561 {
562 	if (offset == reg_imm(0)) {
563 		*rega = reg_a(src_gpr);
564 		*regb = reg_b(src_gpr + 1);
565 		return;
566 	}
567 
568 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
569 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
570 		 reg_imm(0));
571 	*rega = imm_a(nfp_prog);
572 	*regb = imm_b(nfp_prog);
573 }
574 
575 /* NFP has Command Push Pull bus which supports bluk memory operations. */
576 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
577 {
578 	bool descending_seq = meta->ldst_gather_len < 0;
579 	s16 len = abs(meta->ldst_gather_len);
580 	swreg src_base, off;
581 	bool src_40bit_addr;
582 	unsigned int i;
583 	u8 xfer_num;
584 
585 	off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
586 	src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
587 	src_base = reg_a(meta->insn.src_reg * 2);
588 	xfer_num = round_up(len, 4) / 4;
589 
590 	if (src_40bit_addr)
591 		addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
592 			      &off);
593 
594 	/* Setup PREV_ALU fields to override memory read length. */
595 	if (len > 32)
596 		wrp_immed(nfp_prog, reg_none(),
597 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
598 
599 	/* Memory read from source addr into transfer-in registers. */
600 	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
601 		     src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
602 		     src_base, off, xfer_num - 1, true, len > 32);
603 
604 	/* Move from transfer-in to transfer-out. */
605 	for (i = 0; i < xfer_num; i++)
606 		wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
607 
608 	off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
609 
610 	if (len <= 8) {
611 		/* Use single direct_ref write8. */
612 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
613 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
614 			 true);
615 	} else if (len <= 32 && IS_ALIGNED(len, 4)) {
616 		/* Use single direct_ref write32. */
617 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
618 			 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
619 			 true);
620 	} else if (len <= 32) {
621 		/* Use single indirect_ref write8. */
622 		wrp_immed(nfp_prog, reg_none(),
623 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
624 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
625 			       reg_a(meta->paired_st->dst_reg * 2), off,
626 			       len - 1, true);
627 	} else if (IS_ALIGNED(len, 4)) {
628 		/* Use single indirect_ref write32. */
629 		wrp_immed(nfp_prog, reg_none(),
630 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
631 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
632 			       reg_a(meta->paired_st->dst_reg * 2), off,
633 			       xfer_num - 1, true);
634 	} else if (len <= 40) {
635 		/* Use one direct_ref write32 to write the first 32-bytes, then
636 		 * another direct_ref write8 to write the remaining bytes.
637 		 */
638 		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
639 			 reg_a(meta->paired_st->dst_reg * 2), off, 7,
640 			 true);
641 
642 		off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
643 				      imm_b(nfp_prog));
644 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
645 			 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
646 			 true);
647 	} else {
648 		/* Use one indirect_ref write32 to write 4-bytes aligned length,
649 		 * then another direct_ref write8 to write the remaining bytes.
650 		 */
651 		u8 new_off;
652 
653 		wrp_immed(nfp_prog, reg_none(),
654 			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
655 		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
656 			       reg_a(meta->paired_st->dst_reg * 2), off,
657 			       xfer_num - 2, true);
658 		new_off = meta->paired_st->off + (xfer_num - 1) * 4;
659 		off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
660 		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
661 			 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
662 			 (len & 0x3) - 1, true);
663 	}
664 
665 	/* TODO: The following extra load is to make sure data flow be identical
666 	 *  before and after we do memory copy optimization.
667 	 *
668 	 *  The load destination register is not guaranteed to be dead, so we
669 	 *  need to make sure it is loaded with the value the same as before
670 	 *  this transformation.
671 	 *
672 	 *  These extra loads could be removed once we have accurate register
673 	 *  usage information.
674 	 */
675 	if (descending_seq)
676 		xfer_num = 0;
677 	else if (BPF_SIZE(meta->insn.code) != BPF_DW)
678 		xfer_num = xfer_num - 1;
679 	else
680 		xfer_num = xfer_num - 2;
681 
682 	switch (BPF_SIZE(meta->insn.code)) {
683 	case BPF_B:
684 		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
685 				reg_xfer(xfer_num), 1,
686 				IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
687 		break;
688 	case BPF_H:
689 		wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
690 				reg_xfer(xfer_num), 2, (len & 3) ^ 2);
691 		break;
692 	case BPF_W:
693 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
694 			reg_xfer(0));
695 		break;
696 	case BPF_DW:
697 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
698 			reg_xfer(xfer_num));
699 		wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
700 			reg_xfer(xfer_num + 1));
701 		break;
702 	}
703 
704 	if (BPF_SIZE(meta->insn.code) != BPF_DW)
705 		wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
706 
707 	return 0;
708 }
709 
710 static int
711 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
712 {
713 	unsigned int i;
714 	u16 shift, sz;
715 
716 	/* We load the value from the address indicated in @offset and then
717 	 * shift out the data we don't need.  Note: this is big endian!
718 	 */
719 	sz = max(size, 4);
720 	shift = size < 4 ? 4 - size : 0;
721 
722 	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
723 		 pptr_reg(nfp_prog), offset, sz - 1, true);
724 
725 	i = 0;
726 	if (shift)
727 		emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
728 			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
729 	else
730 		for (; i * 4 < size; i++)
731 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
732 
733 	if (i < 2)
734 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
735 
736 	return 0;
737 }
738 
739 static int
740 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
741 		   swreg lreg, swreg rreg, int size, enum cmd_mode mode)
742 {
743 	unsigned int i;
744 	u8 mask, sz;
745 
746 	/* We load the value from the address indicated in rreg + lreg and then
747 	 * mask out the data we don't need.  Note: this is little endian!
748 	 */
749 	sz = max(size, 4);
750 	mask = size < 4 ? GENMASK(size - 1, 0) : 0;
751 
752 	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
753 		 lreg, rreg, sz / 4 - 1, true);
754 
755 	i = 0;
756 	if (mask)
757 		emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
758 				  reg_xfer(0), SHF_SC_NONE, 0, true);
759 	else
760 		for (; i * 4 < size; i++)
761 			wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
762 
763 	if (i < 2)
764 		wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
765 
766 	return 0;
767 }
768 
769 static int
770 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
771 			  u8 dst_gpr, u8 size)
772 {
773 	return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
774 				  size, CMD_MODE_32b);
775 }
776 
777 static int
778 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
779 			  u8 dst_gpr, u8 size)
780 {
781 	swreg rega, regb;
782 
783 	addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
784 
785 	return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
786 				  size, CMD_MODE_40b_BA);
787 }
788 
789 static int
790 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
791 {
792 	swreg tmp_reg;
793 
794 	/* Calculate the true offset (src_reg + imm) */
795 	tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
796 	emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
797 
798 	/* Check packet length (size guaranteed to fit b/c it's u8) */
799 	emit_alu(nfp_prog, imm_a(nfp_prog),
800 		 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
801 	emit_alu(nfp_prog, reg_none(),
802 		 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
803 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
804 
805 	/* Load data */
806 	return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
807 }
808 
809 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
810 {
811 	swreg tmp_reg;
812 
813 	/* Check packet length */
814 	tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
815 	emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
816 	emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
817 
818 	/* Load data */
819 	tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
820 	return data_ld(nfp_prog, tmp_reg, 0, size);
821 }
822 
823 static int
824 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
825 		    u8 src_gpr, u8 size)
826 {
827 	unsigned int i;
828 
829 	for (i = 0; i * 4 < size; i++)
830 		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
831 
832 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
833 		 reg_a(dst_gpr), offset, size - 1, true);
834 
835 	return 0;
836 }
837 
838 static int
839 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
840 		   u64 imm, u8 size)
841 {
842 	wrp_immed(nfp_prog, reg_xfer(0), imm);
843 	if (size == 8)
844 		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
845 
846 	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
847 		 reg_a(dst_gpr), offset, size - 1, true);
848 
849 	return 0;
850 }
851 
852 typedef int
853 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
854 	     unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
855 	     bool needs_inc);
856 
857 static int
858 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
859 	      unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
860 	      bool needs_inc)
861 {
862 	bool should_inc = needs_inc && new_gpr && !last;
863 	u32 idx, src_byte;
864 	enum shf_sc sc;
865 	swreg reg;
866 	int shf;
867 	u8 mask;
868 
869 	if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
870 		return -EOPNOTSUPP;
871 
872 	idx = off / 4;
873 
874 	/* Move the entire word */
875 	if (size == 4) {
876 		wrp_mov(nfp_prog, reg_both(dst),
877 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
878 		return 0;
879 	}
880 
881 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
882 		return -EOPNOTSUPP;
883 
884 	src_byte = off % 4;
885 
886 	mask = (1 << size) - 1;
887 	mask <<= dst_byte;
888 
889 	if (WARN_ON_ONCE(mask > 0xf))
890 		return -EOPNOTSUPP;
891 
892 	shf = abs(src_byte - dst_byte) * 8;
893 	if (src_byte == dst_byte) {
894 		sc = SHF_SC_NONE;
895 	} else if (src_byte < dst_byte) {
896 		shf = 32 - shf;
897 		sc = SHF_SC_L_SHF;
898 	} else {
899 		sc = SHF_SC_R_SHF;
900 	}
901 
902 	/* ld_field can address fewer indexes, if offset too large do RMW.
903 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
904 	 */
905 	if (idx <= RE_REG_LM_IDX_MAX) {
906 		reg = reg_lm(lm3 ? 3 : 0, idx);
907 	} else {
908 		reg = imm_a(nfp_prog);
909 		/* If it's not the first part of the load and we start a new GPR
910 		 * that means we are loading a second part of the LMEM word into
911 		 * a new GPR.  IOW we've already looked that LMEM word and
912 		 * therefore it has been loaded into imm_a().
913 		 */
914 		if (first || !new_gpr)
915 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
916 	}
917 
918 	emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
919 
920 	if (should_inc)
921 		wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
922 
923 	return 0;
924 }
925 
926 static int
927 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
928 	       unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
929 	       bool needs_inc)
930 {
931 	bool should_inc = needs_inc && new_gpr && !last;
932 	u32 idx, dst_byte;
933 	enum shf_sc sc;
934 	swreg reg;
935 	int shf;
936 	u8 mask;
937 
938 	if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
939 		return -EOPNOTSUPP;
940 
941 	idx = off / 4;
942 
943 	/* Move the entire word */
944 	if (size == 4) {
945 		wrp_mov(nfp_prog,
946 			should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
947 			reg_b(src));
948 		return 0;
949 	}
950 
951 	if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
952 		return -EOPNOTSUPP;
953 
954 	dst_byte = off % 4;
955 
956 	mask = (1 << size) - 1;
957 	mask <<= dst_byte;
958 
959 	if (WARN_ON_ONCE(mask > 0xf))
960 		return -EOPNOTSUPP;
961 
962 	shf = abs(src_byte - dst_byte) * 8;
963 	if (src_byte == dst_byte) {
964 		sc = SHF_SC_NONE;
965 	} else if (src_byte < dst_byte) {
966 		shf = 32 - shf;
967 		sc = SHF_SC_L_SHF;
968 	} else {
969 		sc = SHF_SC_R_SHF;
970 	}
971 
972 	/* ld_field can address fewer indexes, if offset too large do RMW.
973 	 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
974 	 */
975 	if (idx <= RE_REG_LM_IDX_MAX) {
976 		reg = reg_lm(lm3 ? 3 : 0, idx);
977 	} else {
978 		reg = imm_a(nfp_prog);
979 		/* Only first and last LMEM locations are going to need RMW,
980 		 * the middle location will be overwritten fully.
981 		 */
982 		if (first || last)
983 			wrp_mov(nfp_prog, reg, reg_lm(0, idx));
984 	}
985 
986 	emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
987 
988 	if (new_gpr || last) {
989 		if (idx > RE_REG_LM_IDX_MAX)
990 			wrp_mov(nfp_prog, reg_lm(0, idx), reg);
991 		if (should_inc)
992 			wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
993 	}
994 
995 	return 0;
996 }
997 
998 static int
999 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1000 	     unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1001 	     bool clr_gpr, lmem_step step)
1002 {
1003 	s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1004 	bool first = true, last;
1005 	bool needs_inc = false;
1006 	swreg stack_off_reg;
1007 	u8 prev_gpr = 255;
1008 	u32 gpr_byte = 0;
1009 	bool lm3 = true;
1010 	int ret;
1011 
1012 	if (meta->ptr_not_const) {
1013 		/* Use of the last encountered ptr_off is OK, they all have
1014 		 * the same alignment.  Depend on low bits of value being
1015 		 * discarded when written to LMaddr register.
1016 		 */
1017 		stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1018 						stack_imm(nfp_prog));
1019 
1020 		emit_alu(nfp_prog, imm_b(nfp_prog),
1021 			 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1022 
1023 		needs_inc = true;
1024 	} else if (off + size <= 64) {
1025 		/* We can reach bottom 64B with LMaddr0 */
1026 		lm3 = false;
1027 	} else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1028 		/* We have to set up a new pointer.  If we know the offset
1029 		 * and the entire access falls into a single 32 byte aligned
1030 		 * window we won't have to increment the LM pointer.
1031 		 * The 32 byte alignment is imporant because offset is ORed in
1032 		 * not added when doing *l$indexN[off].
1033 		 */
1034 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1035 						stack_imm(nfp_prog));
1036 		emit_alu(nfp_prog, imm_b(nfp_prog),
1037 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1038 
1039 		off %= 32;
1040 	} else {
1041 		stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1042 						stack_imm(nfp_prog));
1043 
1044 		emit_alu(nfp_prog, imm_b(nfp_prog),
1045 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1046 
1047 		needs_inc = true;
1048 	}
1049 	if (lm3) {
1050 		emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1051 		/* For size < 4 one slot will be filled by zeroing of upper. */
1052 		wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1053 	}
1054 
1055 	if (clr_gpr && size < 8)
1056 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1057 
1058 	while (size) {
1059 		u32 slice_end;
1060 		u8 slice_size;
1061 
1062 		slice_size = min(size, 4 - gpr_byte);
1063 		slice_end = min(off + slice_size, round_up(off + 1, 4));
1064 		slice_size = slice_end - off;
1065 
1066 		last = slice_size == size;
1067 
1068 		if (needs_inc)
1069 			off %= 4;
1070 
1071 		ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1072 			   first, gpr != prev_gpr, last, lm3, needs_inc);
1073 		if (ret)
1074 			return ret;
1075 
1076 		prev_gpr = gpr;
1077 		first = false;
1078 
1079 		gpr_byte += slice_size;
1080 		if (gpr_byte >= 4) {
1081 			gpr_byte -= 4;
1082 			gpr++;
1083 		}
1084 
1085 		size -= slice_size;
1086 		off += slice_size;
1087 	}
1088 
1089 	return 0;
1090 }
1091 
1092 static void
1093 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1094 {
1095 	swreg tmp_reg;
1096 
1097 	if (alu_op == ALU_OP_AND) {
1098 		if (!imm)
1099 			wrp_immed(nfp_prog, reg_both(dst), 0);
1100 		if (!imm || !~imm)
1101 			return;
1102 	}
1103 	if (alu_op == ALU_OP_OR) {
1104 		if (!~imm)
1105 			wrp_immed(nfp_prog, reg_both(dst), ~0U);
1106 		if (!imm || !~imm)
1107 			return;
1108 	}
1109 	if (alu_op == ALU_OP_XOR) {
1110 		if (!~imm)
1111 			emit_alu(nfp_prog, reg_both(dst), reg_none(),
1112 				 ALU_OP_NOT, reg_b(dst));
1113 		if (!imm || !~imm)
1114 			return;
1115 	}
1116 
1117 	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1118 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1119 }
1120 
1121 static int
1122 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1123 	      enum alu_op alu_op, bool skip)
1124 {
1125 	const struct bpf_insn *insn = &meta->insn;
1126 	u64 imm = insn->imm; /* sign extend */
1127 
1128 	if (skip) {
1129 		meta->skip = true;
1130 		return 0;
1131 	}
1132 
1133 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1134 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1135 
1136 	return 0;
1137 }
1138 
1139 static int
1140 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1141 	      enum alu_op alu_op)
1142 {
1143 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1144 
1145 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1146 	emit_alu(nfp_prog, reg_both(dst + 1),
1147 		 reg_a(dst + 1), alu_op, reg_b(src + 1));
1148 
1149 	return 0;
1150 }
1151 
1152 static int
1153 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1154 	      enum alu_op alu_op, bool skip)
1155 {
1156 	const struct bpf_insn *insn = &meta->insn;
1157 
1158 	if (skip) {
1159 		meta->skip = true;
1160 		return 0;
1161 	}
1162 
1163 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1164 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1165 
1166 	return 0;
1167 }
1168 
1169 static int
1170 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1171 	      enum alu_op alu_op)
1172 {
1173 	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1174 
1175 	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1176 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1177 
1178 	return 0;
1179 }
1180 
1181 static void
1182 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1183 		 enum br_mask br_mask, u16 off)
1184 {
1185 	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1186 	emit_br(nfp_prog, br_mask, off, 0);
1187 }
1188 
1189 static int
1190 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1191 	     enum alu_op alu_op, enum br_mask br_mask)
1192 {
1193 	const struct bpf_insn *insn = &meta->insn;
1194 
1195 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1196 			 insn->src_reg * 2, br_mask, insn->off);
1197 	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1198 			 insn->src_reg * 2 + 1, br_mask, insn->off);
1199 
1200 	return 0;
1201 }
1202 
1203 static int
1204 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1205 	    enum br_mask br_mask, bool swap)
1206 {
1207 	const struct bpf_insn *insn = &meta->insn;
1208 	u64 imm = insn->imm; /* sign extend */
1209 	u8 reg = insn->dst_reg * 2;
1210 	swreg tmp_reg;
1211 
1212 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1213 	if (!swap)
1214 		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
1215 	else
1216 		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
1217 
1218 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1219 	if (!swap)
1220 		emit_alu(nfp_prog, reg_none(),
1221 			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
1222 	else
1223 		emit_alu(nfp_prog, reg_none(),
1224 			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
1225 
1226 	emit_br(nfp_prog, br_mask, insn->off, 0);
1227 
1228 	return 0;
1229 }
1230 
1231 static int
1232 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1233 	    enum br_mask br_mask, bool swap)
1234 {
1235 	const struct bpf_insn *insn = &meta->insn;
1236 	u8 areg, breg;
1237 
1238 	areg = insn->dst_reg * 2;
1239 	breg = insn->src_reg * 2;
1240 
1241 	if (swap) {
1242 		areg ^= breg;
1243 		breg ^= areg;
1244 		areg ^= breg;
1245 	}
1246 
1247 	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1248 	emit_alu(nfp_prog, reg_none(),
1249 		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1250 	emit_br(nfp_prog, br_mask, insn->off, 0);
1251 
1252 	return 0;
1253 }
1254 
1255 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1256 {
1257 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1258 		      SHF_SC_R_ROT, 8);
1259 	emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1260 		      SHF_SC_R_ROT, 16);
1261 }
1262 
1263 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1264 {
1265 	swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1266 	struct nfp_bpf_cap_adjust_head *adjust_head;
1267 	u32 ret_einval, end;
1268 
1269 	adjust_head = &nfp_prog->bpf->adjust_head;
1270 
1271 	/* Optimized version - 5 vs 14 cycles */
1272 	if (nfp_prog->adjust_head_location != UINT_MAX) {
1273 		if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1274 			return -EINVAL;
1275 
1276 		emit_alu(nfp_prog, pptr_reg(nfp_prog),
1277 			 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1278 		emit_alu(nfp_prog, plen_reg(nfp_prog),
1279 			 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1280 		emit_alu(nfp_prog, pv_len(nfp_prog),
1281 			 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1282 
1283 		wrp_immed(nfp_prog, reg_both(0), 0);
1284 		wrp_immed(nfp_prog, reg_both(1), 0);
1285 
1286 		/* TODO: when adjust head is guaranteed to succeed we can
1287 		 * also eliminate the following if (r0 == 0) branch.
1288 		 */
1289 
1290 		return 0;
1291 	}
1292 
1293 	ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1294 	end = ret_einval + 2;
1295 
1296 	/* We need to use a temp because offset is just a part of the pkt ptr */
1297 	emit_alu(nfp_prog, tmp,
1298 		 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1299 
1300 	/* Validate result will fit within FW datapath constraints */
1301 	emit_alu(nfp_prog, reg_none(),
1302 		 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1303 	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1304 	emit_alu(nfp_prog, reg_none(),
1305 		 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1306 	emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1307 
1308 	/* Validate the length is at least ETH_HLEN */
1309 	emit_alu(nfp_prog, tmp_len,
1310 		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1311 	emit_alu(nfp_prog, reg_none(),
1312 		 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1313 	emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1314 
1315 	/* Load the ret code */
1316 	wrp_immed(nfp_prog, reg_both(0), 0);
1317 	wrp_immed(nfp_prog, reg_both(1), 0);
1318 
1319 	/* Modify the packet metadata */
1320 	emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1321 
1322 	/* Skip over the -EINVAL ret code (defer 2) */
1323 	emit_br(nfp_prog, BR_UNC, end, 2);
1324 
1325 	emit_alu(nfp_prog, plen_reg(nfp_prog),
1326 		 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1327 	emit_alu(nfp_prog, pv_len(nfp_prog),
1328 		 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1329 
1330 	/* return -EINVAL target */
1331 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1332 		return -EINVAL;
1333 
1334 	wrp_immed(nfp_prog, reg_both(0), -22);
1335 	wrp_immed(nfp_prog, reg_both(1), ~0);
1336 
1337 	if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1338 		return -EINVAL;
1339 
1340 	return 0;
1341 }
1342 
1343 static int
1344 map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1345 {
1346 	struct bpf_offloaded_map *offmap;
1347 	struct nfp_bpf_map *nfp_map;
1348 	bool load_lm_ptr;
1349 	u32 ret_tgt;
1350 	s64 lm_off;
1351 	swreg tid;
1352 
1353 	offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
1354 	nfp_map = offmap->dev_priv;
1355 
1356 	/* We only have to reload LM0 if the key is not at start of stack */
1357 	lm_off = nfp_prog->stack_depth;
1358 	lm_off += meta->arg2.var_off.value + meta->arg2.off;
1359 	load_lm_ptr = meta->arg2_var_off || lm_off;
1360 
1361 	/* Set LM0 to start of key */
1362 	if (load_lm_ptr)
1363 		emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1364 
1365 	/* Load map ID into a register, it should actually fit as an immediate
1366 	 * but in case it doesn't deal with it here, not in the delay slots.
1367 	 */
1368 	tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1369 
1370 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
1371 		     2, RELO_BR_HELPER);
1372 	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1373 
1374 	/* Load map ID into A0 */
1375 	wrp_mov(nfp_prog, reg_a(0), tid);
1376 
1377 	/* Load the return address into B0 */
1378 	wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1379 
1380 	if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1381 		return -EINVAL;
1382 
1383 	/* Reset the LM0 pointer */
1384 	if (!load_lm_ptr)
1385 		return 0;
1386 
1387 	emit_csr_wr(nfp_prog, stack_reg(nfp_prog),  NFP_CSR_ACT_LM_ADDR0);
1388 	wrp_nops(nfp_prog, 3);
1389 
1390 	return 0;
1391 }
1392 
1393 /* --- Callbacks --- */
1394 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1395 {
1396 	const struct bpf_insn *insn = &meta->insn;
1397 	u8 dst = insn->dst_reg * 2;
1398 	u8 src = insn->src_reg * 2;
1399 
1400 	if (insn->src_reg == BPF_REG_10) {
1401 		swreg stack_depth_reg;
1402 
1403 		stack_depth_reg = ur_load_imm_any(nfp_prog,
1404 						  nfp_prog->stack_depth,
1405 						  stack_imm(nfp_prog));
1406 		emit_alu(nfp_prog, reg_both(dst),
1407 			 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1408 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1409 	} else {
1410 		wrp_reg_mov(nfp_prog, dst, src);
1411 		wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1412 	}
1413 
1414 	return 0;
1415 }
1416 
1417 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1418 {
1419 	u64 imm = meta->insn.imm; /* sign extend */
1420 
1421 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1422 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1423 
1424 	return 0;
1425 }
1426 
1427 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1428 {
1429 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1430 }
1431 
1432 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1433 {
1434 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1435 }
1436 
1437 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1438 {
1439 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1440 }
1441 
1442 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1443 {
1444 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1445 }
1446 
1447 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1448 {
1449 	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1450 }
1451 
1452 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1453 {
1454 	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1455 }
1456 
1457 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1458 {
1459 	const struct bpf_insn *insn = &meta->insn;
1460 
1461 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1462 		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1463 		 reg_b(insn->src_reg * 2));
1464 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1465 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1466 		 reg_b(insn->src_reg * 2 + 1));
1467 
1468 	return 0;
1469 }
1470 
1471 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1472 {
1473 	const struct bpf_insn *insn = &meta->insn;
1474 	u64 imm = insn->imm; /* sign extend */
1475 
1476 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1477 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1478 
1479 	return 0;
1480 }
1481 
1482 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1483 {
1484 	const struct bpf_insn *insn = &meta->insn;
1485 
1486 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1487 		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1488 		 reg_b(insn->src_reg * 2));
1489 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1490 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1491 		 reg_b(insn->src_reg * 2 + 1));
1492 
1493 	return 0;
1494 }
1495 
1496 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1497 {
1498 	const struct bpf_insn *insn = &meta->insn;
1499 	u64 imm = insn->imm; /* sign extend */
1500 
1501 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1502 	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1503 
1504 	return 0;
1505 }
1506 
1507 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1508 {
1509 	const struct bpf_insn *insn = &meta->insn;
1510 
1511 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1512 		 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1513 	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1514 		 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1515 
1516 	return 0;
1517 }
1518 
1519 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1520 {
1521 	const struct bpf_insn *insn = &meta->insn;
1522 	u8 dst = insn->dst_reg * 2;
1523 
1524 	if (insn->imm < 32) {
1525 		emit_shf(nfp_prog, reg_both(dst + 1),
1526 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1527 			 SHF_SC_R_DSHF, 32 - insn->imm);
1528 		emit_shf(nfp_prog, reg_both(dst),
1529 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1530 			 SHF_SC_L_SHF, insn->imm);
1531 	} else if (insn->imm == 32) {
1532 		wrp_reg_mov(nfp_prog, dst + 1, dst);
1533 		wrp_immed(nfp_prog, reg_both(dst), 0);
1534 	} else if (insn->imm > 32) {
1535 		emit_shf(nfp_prog, reg_both(dst + 1),
1536 			 reg_none(), SHF_OP_NONE, reg_b(dst),
1537 			 SHF_SC_L_SHF, insn->imm - 32);
1538 		wrp_immed(nfp_prog, reg_both(dst), 0);
1539 	}
1540 
1541 	return 0;
1542 }
1543 
1544 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1545 {
1546 	const struct bpf_insn *insn = &meta->insn;
1547 	u8 dst = insn->dst_reg * 2;
1548 
1549 	if (insn->imm < 32) {
1550 		emit_shf(nfp_prog, reg_both(dst),
1551 			 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1552 			 SHF_SC_R_DSHF, insn->imm);
1553 		emit_shf(nfp_prog, reg_both(dst + 1),
1554 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1555 			 SHF_SC_R_SHF, insn->imm);
1556 	} else if (insn->imm == 32) {
1557 		wrp_reg_mov(nfp_prog, dst, dst + 1);
1558 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1559 	} else if (insn->imm > 32) {
1560 		emit_shf(nfp_prog, reg_both(dst),
1561 			 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1562 			 SHF_SC_R_SHF, insn->imm - 32);
1563 		wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1564 	}
1565 
1566 	return 0;
1567 }
1568 
1569 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1570 {
1571 	const struct bpf_insn *insn = &meta->insn;
1572 
1573 	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
1574 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1575 
1576 	return 0;
1577 }
1578 
1579 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1580 {
1581 	const struct bpf_insn *insn = &meta->insn;
1582 
1583 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
1584 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1585 
1586 	return 0;
1587 }
1588 
1589 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1590 {
1591 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
1592 }
1593 
1594 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1595 {
1596 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
1597 }
1598 
1599 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1600 {
1601 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
1602 }
1603 
1604 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1605 {
1606 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1607 }
1608 
1609 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1610 {
1611 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
1612 }
1613 
1614 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1615 {
1616 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1617 }
1618 
1619 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1620 {
1621 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
1622 }
1623 
1624 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1625 {
1626 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
1627 }
1628 
1629 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1630 {
1631 	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
1632 }
1633 
1634 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1635 {
1636 	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
1637 }
1638 
1639 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1640 {
1641 	u8 dst = meta->insn.dst_reg * 2;
1642 
1643 	emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
1644 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1645 
1646 	return 0;
1647 }
1648 
1649 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1650 {
1651 	const struct bpf_insn *insn = &meta->insn;
1652 
1653 	if (!insn->imm)
1654 		return 1; /* TODO: zero shift means indirect */
1655 
1656 	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
1657 		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
1658 		 SHF_SC_L_SHF, insn->imm);
1659 	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1660 
1661 	return 0;
1662 }
1663 
1664 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1665 {
1666 	const struct bpf_insn *insn = &meta->insn;
1667 	u8 gpr = insn->dst_reg * 2;
1668 
1669 	switch (insn->imm) {
1670 	case 16:
1671 		emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
1672 			      SHF_SC_R_ROT, 8);
1673 		emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
1674 			      SHF_SC_R_SHF, 16);
1675 
1676 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1677 		break;
1678 	case 32:
1679 		wrp_end32(nfp_prog, reg_a(gpr), gpr);
1680 		wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1681 		break;
1682 	case 64:
1683 		wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
1684 
1685 		wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
1686 		wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
1687 		break;
1688 	}
1689 
1690 	return 0;
1691 }
1692 
1693 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1694 {
1695 	struct nfp_insn_meta *prev = nfp_meta_prev(meta);
1696 	u32 imm_lo, imm_hi;
1697 	u8 dst;
1698 
1699 	dst = prev->insn.dst_reg * 2;
1700 	imm_lo = prev->insn.imm;
1701 	imm_hi = meta->insn.imm;
1702 
1703 	wrp_immed(nfp_prog, reg_both(dst), imm_lo);
1704 
1705 	/* mov is always 1 insn, load imm may be two, so try to use mov */
1706 	if (imm_hi == imm_lo)
1707 		wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
1708 	else
1709 		wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
1710 
1711 	return 0;
1712 }
1713 
1714 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1715 {
1716 	meta->double_cb = imm_ld8_part2;
1717 	return 0;
1718 }
1719 
1720 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1721 {
1722 	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
1723 }
1724 
1725 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1726 {
1727 	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
1728 }
1729 
1730 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1731 {
1732 	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
1733 }
1734 
1735 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1736 {
1737 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1738 				     meta->insn.src_reg * 2, 1);
1739 }
1740 
1741 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1742 {
1743 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1744 				     meta->insn.src_reg * 2, 2);
1745 }
1746 
1747 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1748 {
1749 	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1750 				     meta->insn.src_reg * 2, 4);
1751 }
1752 
1753 static int
1754 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1755 	      unsigned int size, unsigned int ptr_off)
1756 {
1757 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1758 			    meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
1759 			    true, wrp_lmem_load);
1760 }
1761 
1762 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1763 		       u8 size)
1764 {
1765 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1766 
1767 	switch (meta->insn.off) {
1768 	case offsetof(struct __sk_buff, len):
1769 		if (size != FIELD_SIZEOF(struct __sk_buff, len))
1770 			return -EOPNOTSUPP;
1771 		wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
1772 		break;
1773 	case offsetof(struct __sk_buff, data):
1774 		if (size != FIELD_SIZEOF(struct __sk_buff, data))
1775 			return -EOPNOTSUPP;
1776 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1777 		break;
1778 	case offsetof(struct __sk_buff, data_end):
1779 		if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
1780 			return -EOPNOTSUPP;
1781 		emit_alu(nfp_prog, dst,
1782 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1783 		break;
1784 	default:
1785 		return -EOPNOTSUPP;
1786 	}
1787 
1788 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1789 
1790 	return 0;
1791 }
1792 
1793 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1794 		       u8 size)
1795 {
1796 	swreg dst = reg_both(meta->insn.dst_reg * 2);
1797 
1798 	switch (meta->insn.off) {
1799 	case offsetof(struct xdp_md, data):
1800 		if (size != FIELD_SIZEOF(struct xdp_md, data))
1801 			return -EOPNOTSUPP;
1802 		wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1803 		break;
1804 	case offsetof(struct xdp_md, data_end):
1805 		if (size != FIELD_SIZEOF(struct xdp_md, data_end))
1806 			return -EOPNOTSUPP;
1807 		emit_alu(nfp_prog, dst,
1808 			 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1809 		break;
1810 	default:
1811 		return -EOPNOTSUPP;
1812 	}
1813 
1814 	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1815 
1816 	return 0;
1817 }
1818 
1819 static int
1820 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1821 	     unsigned int size)
1822 {
1823 	swreg tmp_reg;
1824 
1825 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1826 
1827 	return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
1828 					 tmp_reg, meta->insn.dst_reg * 2, size);
1829 }
1830 
1831 static int
1832 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1833 	     unsigned int size)
1834 {
1835 	swreg tmp_reg;
1836 
1837 	tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1838 
1839 	return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
1840 					 tmp_reg, meta->insn.dst_reg * 2, size);
1841 }
1842 
1843 static int
1844 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1845 	unsigned int size)
1846 {
1847 	if (meta->ldst_gather_len)
1848 		return nfp_cpp_memcpy(nfp_prog, meta);
1849 
1850 	if (meta->ptr.type == PTR_TO_CTX) {
1851 		if (nfp_prog->type == BPF_PROG_TYPE_XDP)
1852 			return mem_ldx_xdp(nfp_prog, meta, size);
1853 		else
1854 			return mem_ldx_skb(nfp_prog, meta, size);
1855 	}
1856 
1857 	if (meta->ptr.type == PTR_TO_PACKET)
1858 		return mem_ldx_data(nfp_prog, meta, size);
1859 
1860 	if (meta->ptr.type == PTR_TO_STACK)
1861 		return mem_ldx_stack(nfp_prog, meta, size,
1862 				     meta->ptr.off + meta->ptr.var_off.value);
1863 
1864 	if (meta->ptr.type == PTR_TO_MAP_VALUE)
1865 		return mem_ldx_emem(nfp_prog, meta, size);
1866 
1867 	return -EOPNOTSUPP;
1868 }
1869 
1870 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1871 {
1872 	return mem_ldx(nfp_prog, meta, 1);
1873 }
1874 
1875 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1876 {
1877 	return mem_ldx(nfp_prog, meta, 2);
1878 }
1879 
1880 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1881 {
1882 	return mem_ldx(nfp_prog, meta, 4);
1883 }
1884 
1885 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1886 {
1887 	return mem_ldx(nfp_prog, meta, 8);
1888 }
1889 
1890 static int
1891 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1892 	    unsigned int size)
1893 {
1894 	u64 imm = meta->insn.imm; /* sign extend */
1895 	swreg off_reg;
1896 
1897 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1898 
1899 	return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1900 				  imm, size);
1901 }
1902 
1903 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1904 		  unsigned int size)
1905 {
1906 	if (meta->ptr.type == PTR_TO_PACKET)
1907 		return mem_st_data(nfp_prog, meta, size);
1908 
1909 	return -EOPNOTSUPP;
1910 }
1911 
1912 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1913 {
1914 	return mem_st(nfp_prog, meta, 1);
1915 }
1916 
1917 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1918 {
1919 	return mem_st(nfp_prog, meta, 2);
1920 }
1921 
1922 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1923 {
1924 	return mem_st(nfp_prog, meta, 4);
1925 }
1926 
1927 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1928 {
1929 	return mem_st(nfp_prog, meta, 8);
1930 }
1931 
1932 static int
1933 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1934 	     unsigned int size)
1935 {
1936 	swreg off_reg;
1937 
1938 	off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1939 
1940 	return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1941 				   meta->insn.src_reg * 2, size);
1942 }
1943 
1944 static int
1945 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1946 	      unsigned int size, unsigned int ptr_off)
1947 {
1948 	return mem_op_stack(nfp_prog, meta, size, ptr_off,
1949 			    meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
1950 			    false, wrp_lmem_store);
1951 }
1952 
1953 static int
1954 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1955 	unsigned int size)
1956 {
1957 	if (meta->ptr.type == PTR_TO_PACKET)
1958 		return mem_stx_data(nfp_prog, meta, size);
1959 
1960 	if (meta->ptr.type == PTR_TO_STACK)
1961 		return mem_stx_stack(nfp_prog, meta, size,
1962 				     meta->ptr.off + meta->ptr.var_off.value);
1963 
1964 	return -EOPNOTSUPP;
1965 }
1966 
1967 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1968 {
1969 	return mem_stx(nfp_prog, meta, 1);
1970 }
1971 
1972 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1973 {
1974 	return mem_stx(nfp_prog, meta, 2);
1975 }
1976 
1977 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1978 {
1979 	return mem_stx(nfp_prog, meta, 4);
1980 }
1981 
1982 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1983 {
1984 	return mem_stx(nfp_prog, meta, 8);
1985 }
1986 
1987 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1988 {
1989 	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
1990 
1991 	return 0;
1992 }
1993 
1994 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1995 {
1996 	const struct bpf_insn *insn = &meta->insn;
1997 	u64 imm = insn->imm; /* sign extend */
1998 	swreg or1, or2, tmp_reg;
1999 
2000 	or1 = reg_a(insn->dst_reg * 2);
2001 	or2 = reg_b(insn->dst_reg * 2 + 1);
2002 
2003 	if (imm & ~0U) {
2004 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2005 		emit_alu(nfp_prog, imm_a(nfp_prog),
2006 			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2007 		or1 = imm_a(nfp_prog);
2008 	}
2009 
2010 	if (imm >> 32) {
2011 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2012 		emit_alu(nfp_prog, imm_b(nfp_prog),
2013 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2014 		or2 = imm_b(nfp_prog);
2015 	}
2016 
2017 	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
2018 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2019 
2020 	return 0;
2021 }
2022 
2023 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2024 {
2025 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
2026 }
2027 
2028 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2029 {
2030 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
2031 }
2032 
2033 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2034 {
2035 	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
2036 }
2037 
2038 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2039 {
2040 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
2041 }
2042 
2043 static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2044 {
2045 	return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
2046 }
2047 
2048 static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2049 {
2050 	return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
2051 }
2052 
2053 static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2054 {
2055 	return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
2056 }
2057 
2058 static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2059 {
2060 	return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
2061 }
2062 
2063 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2064 {
2065 	const struct bpf_insn *insn = &meta->insn;
2066 	u64 imm = insn->imm; /* sign extend */
2067 	swreg tmp_reg;
2068 
2069 	if (!imm) {
2070 		meta->skip = true;
2071 		return 0;
2072 	}
2073 
2074 	if (imm & ~0U) {
2075 		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2076 		emit_alu(nfp_prog, reg_none(),
2077 			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
2078 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2079 	}
2080 
2081 	if (imm >> 32) {
2082 		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2083 		emit_alu(nfp_prog, reg_none(),
2084 			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
2085 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2086 	}
2087 
2088 	return 0;
2089 }
2090 
2091 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2092 {
2093 	const struct bpf_insn *insn = &meta->insn;
2094 	u64 imm = insn->imm; /* sign extend */
2095 	swreg tmp_reg;
2096 
2097 	if (!imm) {
2098 		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
2099 			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
2100 		emit_br(nfp_prog, BR_BNE, insn->off, 0);
2101 		return 0;
2102 	}
2103 
2104 	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2105 	emit_alu(nfp_prog, reg_none(),
2106 		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2107 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
2108 
2109 	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2110 	emit_alu(nfp_prog, reg_none(),
2111 		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2112 	emit_br(nfp_prog, BR_BNE, insn->off, 0);
2113 
2114 	return 0;
2115 }
2116 
2117 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2118 {
2119 	const struct bpf_insn *insn = &meta->insn;
2120 
2121 	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
2122 		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
2123 	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
2124 		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
2125 	emit_alu(nfp_prog, reg_none(),
2126 		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
2127 	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2128 
2129 	return 0;
2130 }
2131 
2132 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2133 {
2134 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
2135 }
2136 
2137 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2138 {
2139 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
2140 }
2141 
2142 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2143 {
2144 	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
2145 }
2146 
2147 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2148 {
2149 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
2150 }
2151 
2152 static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2153 {
2154 	return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
2155 }
2156 
2157 static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2158 {
2159 	return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
2160 }
2161 
2162 static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2163 {
2164 	return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
2165 }
2166 
2167 static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2168 {
2169 	return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
2170 }
2171 
2172 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2173 {
2174 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
2175 }
2176 
2177 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2178 {
2179 	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
2180 }
2181 
2182 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2183 {
2184 	switch (meta->insn.imm) {
2185 	case BPF_FUNC_xdp_adjust_head:
2186 		return adjust_head(nfp_prog, meta);
2187 	case BPF_FUNC_map_lookup_elem:
2188 		return map_lookup_stack(nfp_prog, meta);
2189 	default:
2190 		WARN_ONCE(1, "verifier allowed unsupported function\n");
2191 		return -EOPNOTSUPP;
2192 	}
2193 }
2194 
2195 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2196 {
2197 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
2198 
2199 	return 0;
2200 }
2201 
2202 static const instr_cb_t instr_cb[256] = {
2203 	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
2204 	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
2205 	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
2206 	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
2207 	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
2208 	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
2209 	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
2210 	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
2211 	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
2212 	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
2213 	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
2214 	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
2215 	[BPF_ALU64 | BPF_NEG] =		neg_reg64,
2216 	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
2217 	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
2218 	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
2219 	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
2220 	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
2221 	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
2222 	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
2223 	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
2224 	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
2225 	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
2226 	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
2227 	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
2228 	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
2229 	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
2230 	[BPF_ALU | BPF_NEG] =		neg_reg,
2231 	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
2232 	[BPF_ALU | BPF_END | BPF_X] =	end_reg32,
2233 	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
2234 	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
2235 	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
2236 	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
2237 	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
2238 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
2239 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
2240 	[BPF_LDX | BPF_MEM | BPF_B] =	mem_ldx1,
2241 	[BPF_LDX | BPF_MEM | BPF_H] =	mem_ldx2,
2242 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
2243 	[BPF_LDX | BPF_MEM | BPF_DW] =	mem_ldx8,
2244 	[BPF_STX | BPF_MEM | BPF_B] =	mem_stx1,
2245 	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
2246 	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
2247 	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
2248 	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
2249 	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
2250 	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
2251 	[BPF_ST | BPF_MEM | BPF_DW] =	mem_st8,
2252 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
2253 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
2254 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
2255 	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
2256 	[BPF_JMP | BPF_JLT | BPF_K] =	jlt_imm,
2257 	[BPF_JMP | BPF_JLE | BPF_K] =	jle_imm,
2258 	[BPF_JMP | BPF_JSGT | BPF_K] =  jsgt_imm,
2259 	[BPF_JMP | BPF_JSGE | BPF_K] =  jsge_imm,
2260 	[BPF_JMP | BPF_JSLT | BPF_K] =  jslt_imm,
2261 	[BPF_JMP | BPF_JSLE | BPF_K] =  jsle_imm,
2262 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
2263 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
2264 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
2265 	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
2266 	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
2267 	[BPF_JMP | BPF_JLT | BPF_X] =	jlt_reg,
2268 	[BPF_JMP | BPF_JLE | BPF_X] =	jle_reg,
2269 	[BPF_JMP | BPF_JSGT | BPF_X] =  jsgt_reg,
2270 	[BPF_JMP | BPF_JSGE | BPF_X] =  jsge_reg,
2271 	[BPF_JMP | BPF_JSLT | BPF_X] =  jslt_reg,
2272 	[BPF_JMP | BPF_JSLE | BPF_X] =  jsle_reg,
2273 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
2274 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
2275 	[BPF_JMP | BPF_CALL] =		call,
2276 	[BPF_JMP | BPF_EXIT] =		goto_out,
2277 };
2278 
2279 /* --- Assembler logic --- */
2280 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
2281 {
2282 	struct nfp_insn_meta *meta, *jmp_dst;
2283 	u32 idx, br_idx;
2284 
2285 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2286 		if (meta->skip)
2287 			continue;
2288 		if (meta->insn.code == (BPF_JMP | BPF_CALL))
2289 			continue;
2290 		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
2291 			continue;
2292 
2293 		if (list_is_last(&meta->l, &nfp_prog->insns))
2294 			br_idx = nfp_prog->last_bpf_off;
2295 		else
2296 			br_idx = list_next_entry(meta, l)->off - 1;
2297 
2298 		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
2299 			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
2300 			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
2301 			return -ELOOP;
2302 		}
2303 		/* Leave special branches for later */
2304 		if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
2305 		    RELO_BR_REL)
2306 			continue;
2307 
2308 		if (!meta->jmp_dst) {
2309 			pr_err("Non-exit jump doesn't have destination info recorded!!\n");
2310 			return -ELOOP;
2311 		}
2312 
2313 		jmp_dst = meta->jmp_dst;
2314 
2315 		if (jmp_dst->skip) {
2316 			pr_err("Branch landing on removed instruction!!\n");
2317 			return -ELOOP;
2318 		}
2319 
2320 		for (idx = meta->off; idx <= br_idx; idx++) {
2321 			if (!nfp_is_br(nfp_prog->prog[idx]))
2322 				continue;
2323 			br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
2324 		}
2325 	}
2326 
2327 	return 0;
2328 }
2329 
2330 static void nfp_intro(struct nfp_prog *nfp_prog)
2331 {
2332 	wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
2333 	emit_alu(nfp_prog, plen_reg(nfp_prog),
2334 		 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
2335 }
2336 
2337 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
2338 {
2339 	/* TC direct-action mode:
2340 	 *   0,1   ok        NOT SUPPORTED[1]
2341 	 *   2   drop  0x22 -> drop,  count as stat1
2342 	 *   4,5 nuke  0x02 -> drop
2343 	 *   7  redir  0x44 -> redir, count as stat2
2344 	 *   * unspec  0x11 -> pass,  count as stat0
2345 	 *
2346 	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
2347 	 *     the exact decision made.  We are forced to support UNSPEC
2348 	 *     to handle aborts so that's the only one we handle for passing
2349 	 *     packets up the stack.
2350 	 */
2351 	/* Target for aborts */
2352 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2353 
2354 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2355 
2356 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2357 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
2358 
2359 	/* Target for normal exits */
2360 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2361 
2362 	/* if R0 > 7 jump to abort */
2363 	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
2364 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2365 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2366 
2367 	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
2368 	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
2369 
2370 	emit_shf(nfp_prog, reg_a(1),
2371 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
2372 
2373 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2374 	emit_shf(nfp_prog, reg_a(2),
2375 		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2376 
2377 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2378 	emit_shf(nfp_prog, reg_b(2),
2379 		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
2380 
2381 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2382 
2383 	emit_shf(nfp_prog, reg_b(2),
2384 		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
2385 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2386 }
2387 
2388 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
2389 {
2390 	/* XDP return codes:
2391 	 *   0 aborted  0x82 -> drop,  count as stat3
2392 	 *   1    drop  0x22 -> drop,  count as stat1
2393 	 *   2    pass  0x11 -> pass,  count as stat0
2394 	 *   3      tx  0x44 -> redir, count as stat2
2395 	 *   * unknown  0x82 -> drop,  count as stat3
2396 	 */
2397 	/* Target for aborts */
2398 	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2399 
2400 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2401 
2402 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2403 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
2404 
2405 	/* Target for normal exits */
2406 	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2407 
2408 	/* if R0 > 3 jump to abort */
2409 	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
2410 	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2411 
2412 	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
2413 
2414 	emit_shf(nfp_prog, reg_a(1),
2415 		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
2416 
2417 	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2418 	emit_shf(nfp_prog, reg_b(2),
2419 		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2420 
2421 	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2422 
2423 	wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2424 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2425 }
2426 
2427 static void nfp_outro(struct nfp_prog *nfp_prog)
2428 {
2429 	switch (nfp_prog->type) {
2430 	case BPF_PROG_TYPE_SCHED_CLS:
2431 		nfp_outro_tc_da(nfp_prog);
2432 		break;
2433 	case BPF_PROG_TYPE_XDP:
2434 		nfp_outro_xdp(nfp_prog);
2435 		break;
2436 	default:
2437 		WARN_ON(1);
2438 	}
2439 }
2440 
2441 static int nfp_translate(struct nfp_prog *nfp_prog)
2442 {
2443 	struct nfp_insn_meta *meta;
2444 	int err;
2445 
2446 	nfp_intro(nfp_prog);
2447 	if (nfp_prog->error)
2448 		return nfp_prog->error;
2449 
2450 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2451 		instr_cb_t cb = instr_cb[meta->insn.code];
2452 
2453 		meta->off = nfp_prog_current_offset(nfp_prog);
2454 
2455 		if (meta->skip) {
2456 			nfp_prog->n_translated++;
2457 			continue;
2458 		}
2459 
2460 		if (nfp_meta_has_prev(nfp_prog, meta) &&
2461 		    nfp_meta_prev(meta)->double_cb)
2462 			cb = nfp_meta_prev(meta)->double_cb;
2463 		if (!cb)
2464 			return -ENOENT;
2465 		err = cb(nfp_prog, meta);
2466 		if (err)
2467 			return err;
2468 		if (nfp_prog->error)
2469 			return nfp_prog->error;
2470 
2471 		nfp_prog->n_translated++;
2472 	}
2473 
2474 	nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
2475 
2476 	nfp_outro(nfp_prog);
2477 	if (nfp_prog->error)
2478 		return nfp_prog->error;
2479 
2480 	wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
2481 	if (nfp_prog->error)
2482 		return nfp_prog->error;
2483 
2484 	return nfp_fixup_branches(nfp_prog);
2485 }
2486 
2487 /* --- Optimizations --- */
2488 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
2489 {
2490 	struct nfp_insn_meta *meta;
2491 
2492 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2493 		struct bpf_insn insn = meta->insn;
2494 
2495 		/* Programs converted from cBPF start with register xoring */
2496 		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
2497 		    insn.src_reg == insn.dst_reg)
2498 			continue;
2499 
2500 		/* Programs start with R6 = R1 but we ignore the skb pointer */
2501 		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
2502 		    insn.src_reg == 1 && insn.dst_reg == 6)
2503 			meta->skip = true;
2504 
2505 		/* Return as soon as something doesn't match */
2506 		if (!meta->skip)
2507 			return;
2508 	}
2509 }
2510 
2511 /* Remove masking after load since our load guarantees this is not needed */
2512 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
2513 {
2514 	struct nfp_insn_meta *meta1, *meta2;
2515 	const s32 exp_mask[] = {
2516 		[BPF_B] = 0x000000ffU,
2517 		[BPF_H] = 0x0000ffffU,
2518 		[BPF_W] = 0xffffffffU,
2519 	};
2520 
2521 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2522 		struct bpf_insn insn, next;
2523 
2524 		insn = meta1->insn;
2525 		next = meta2->insn;
2526 
2527 		if (BPF_CLASS(insn.code) != BPF_LD)
2528 			continue;
2529 		if (BPF_MODE(insn.code) != BPF_ABS &&
2530 		    BPF_MODE(insn.code) != BPF_IND)
2531 			continue;
2532 
2533 		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
2534 			continue;
2535 
2536 		if (!exp_mask[BPF_SIZE(insn.code)])
2537 			continue;
2538 		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
2539 			continue;
2540 
2541 		if (next.src_reg || next.dst_reg)
2542 			continue;
2543 
2544 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
2545 			continue;
2546 
2547 		meta2->skip = true;
2548 	}
2549 }
2550 
2551 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2552 {
2553 	struct nfp_insn_meta *meta1, *meta2, *meta3;
2554 
2555 	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
2556 		struct bpf_insn insn, next1, next2;
2557 
2558 		insn = meta1->insn;
2559 		next1 = meta2->insn;
2560 		next2 = meta3->insn;
2561 
2562 		if (BPF_CLASS(insn.code) != BPF_LD)
2563 			continue;
2564 		if (BPF_MODE(insn.code) != BPF_ABS &&
2565 		    BPF_MODE(insn.code) != BPF_IND)
2566 			continue;
2567 		if (BPF_SIZE(insn.code) != BPF_W)
2568 			continue;
2569 
2570 		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
2571 		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
2572 		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
2573 		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
2574 			continue;
2575 
2576 		if (next1.src_reg || next1.dst_reg ||
2577 		    next2.src_reg || next2.dst_reg)
2578 			continue;
2579 
2580 		if (next1.imm != 0x20 || next2.imm != 0x20)
2581 			continue;
2582 
2583 		if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
2584 		    meta3->flags & FLAG_INSN_IS_JUMP_DST)
2585 			continue;
2586 
2587 		meta2->skip = true;
2588 		meta3->skip = true;
2589 	}
2590 }
2591 
2592 /* load/store pair that forms memory copy sould look like the following:
2593  *
2594  *   ld_width R, [addr_src + offset_src]
2595  *   st_width [addr_dest + offset_dest], R
2596  *
2597  * The destination register of load and source register of store should
2598  * be the same, load and store should also perform at the same width.
2599  * If either of addr_src or addr_dest is stack pointer, we don't do the
2600  * CPP optimization as stack is modelled by registers on NFP.
2601  */
2602 static bool
2603 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
2604 		    struct nfp_insn_meta *st_meta)
2605 {
2606 	struct bpf_insn *ld = &ld_meta->insn;
2607 	struct bpf_insn *st = &st_meta->insn;
2608 
2609 	if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
2610 		return false;
2611 
2612 	if (ld_meta->ptr.type != PTR_TO_PACKET)
2613 		return false;
2614 
2615 	if (st_meta->ptr.type != PTR_TO_PACKET)
2616 		return false;
2617 
2618 	if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
2619 		return false;
2620 
2621 	if (ld->dst_reg != st->src_reg)
2622 		return false;
2623 
2624 	/* There is jump to the store insn in this pair. */
2625 	if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
2626 		return false;
2627 
2628 	return true;
2629 }
2630 
2631 /* Currently, we only support chaining load/store pairs if:
2632  *
2633  *  - Their address base registers are the same.
2634  *  - Their address offsets are in the same order.
2635  *  - They operate at the same memory width.
2636  *  - There is no jump into the middle of them.
2637  */
2638 static bool
2639 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
2640 			      struct nfp_insn_meta *st_meta,
2641 			      struct bpf_insn *prev_ld,
2642 			      struct bpf_insn *prev_st)
2643 {
2644 	u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
2645 	struct bpf_insn *ld = &ld_meta->insn;
2646 	struct bpf_insn *st = &st_meta->insn;
2647 	s16 prev_ld_off, prev_st_off;
2648 
2649 	/* This pair is the start pair. */
2650 	if (!prev_ld)
2651 		return true;
2652 
2653 	prev_size = BPF_LDST_BYTES(prev_ld);
2654 	curr_size = BPF_LDST_BYTES(ld);
2655 	prev_ld_base = prev_ld->src_reg;
2656 	prev_st_base = prev_st->dst_reg;
2657 	prev_ld_dst = prev_ld->dst_reg;
2658 	prev_ld_off = prev_ld->off;
2659 	prev_st_off = prev_st->off;
2660 
2661 	if (ld->dst_reg != prev_ld_dst)
2662 		return false;
2663 
2664 	if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
2665 		return false;
2666 
2667 	if (curr_size != prev_size)
2668 		return false;
2669 
2670 	/* There is jump to the head of this pair. */
2671 	if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
2672 		return false;
2673 
2674 	/* Both in ascending order. */
2675 	if (prev_ld_off + prev_size == ld->off &&
2676 	    prev_st_off + prev_size == st->off)
2677 		return true;
2678 
2679 	/* Both in descending order. */
2680 	if (ld->off + curr_size == prev_ld_off &&
2681 	    st->off + curr_size == prev_st_off)
2682 		return true;
2683 
2684 	return false;
2685 }
2686 
2687 /* Return TRUE if cross memory access happens. Cross memory access means
2688  * store area is overlapping with load area that a later load might load
2689  * the value from previous store, for this case we can't treat the sequence
2690  * as an memory copy.
2691  */
2692 static bool
2693 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
2694 		 struct nfp_insn_meta *head_st_meta)
2695 {
2696 	s16 head_ld_off, head_st_off, ld_off;
2697 
2698 	/* Different pointer types does not overlap. */
2699 	if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
2700 		return false;
2701 
2702 	/* load and store are both PTR_TO_PACKET, check ID info.  */
2703 	if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
2704 		return true;
2705 
2706 	/* Canonicalize the offsets. Turn all of them against the original
2707 	 * base register.
2708 	 */
2709 	head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
2710 	head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
2711 	ld_off = ld->off + head_ld_meta->ptr.off;
2712 
2713 	/* Ascending order cross. */
2714 	if (ld_off > head_ld_off &&
2715 	    head_ld_off < head_st_off && ld_off >= head_st_off)
2716 		return true;
2717 
2718 	/* Descending order cross. */
2719 	if (ld_off < head_ld_off &&
2720 	    head_ld_off > head_st_off && ld_off <= head_st_off)
2721 		return true;
2722 
2723 	return false;
2724 }
2725 
2726 /* This pass try to identify the following instructoin sequences.
2727  *
2728  *   load R, [regA + offA]
2729  *   store [regB + offB], R
2730  *   load R, [regA + offA + const_imm_A]
2731  *   store [regB + offB + const_imm_A], R
2732  *   load R, [regA + offA + 2 * const_imm_A]
2733  *   store [regB + offB + 2 * const_imm_A], R
2734  *   ...
2735  *
2736  * Above sequence is typically generated by compiler when lowering
2737  * memcpy. NFP prefer using CPP instructions to accelerate it.
2738  */
2739 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
2740 {
2741 	struct nfp_insn_meta *head_ld_meta = NULL;
2742 	struct nfp_insn_meta *head_st_meta = NULL;
2743 	struct nfp_insn_meta *meta1, *meta2;
2744 	struct bpf_insn *prev_ld = NULL;
2745 	struct bpf_insn *prev_st = NULL;
2746 	u8 count = 0;
2747 
2748 	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2749 		struct bpf_insn *ld = &meta1->insn;
2750 		struct bpf_insn *st = &meta2->insn;
2751 
2752 		/* Reset record status if any of the following if true:
2753 		 *   - The current insn pair is not load/store.
2754 		 *   - The load/store pair doesn't chain with previous one.
2755 		 *   - The chained load/store pair crossed with previous pair.
2756 		 *   - The chained load/store pair has a total size of memory
2757 		 *     copy beyond 128 bytes which is the maximum length a
2758 		 *     single NFP CPP command can transfer.
2759 		 */
2760 		if (!curr_pair_is_memcpy(meta1, meta2) ||
2761 		    !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
2762 						   prev_st) ||
2763 		    (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
2764 						       head_st_meta) ||
2765 				      head_ld_meta->ldst_gather_len >= 128))) {
2766 			if (!count)
2767 				continue;
2768 
2769 			if (count > 1) {
2770 				s16 prev_ld_off = prev_ld->off;
2771 				s16 prev_st_off = prev_st->off;
2772 				s16 head_ld_off = head_ld_meta->insn.off;
2773 
2774 				if (prev_ld_off < head_ld_off) {
2775 					head_ld_meta->insn.off = prev_ld_off;
2776 					head_st_meta->insn.off = prev_st_off;
2777 					head_ld_meta->ldst_gather_len =
2778 						-head_ld_meta->ldst_gather_len;
2779 				}
2780 
2781 				head_ld_meta->paired_st = &head_st_meta->insn;
2782 				head_st_meta->skip = true;
2783 			} else {
2784 				head_ld_meta->ldst_gather_len = 0;
2785 			}
2786 
2787 			/* If the chain is ended by an load/store pair then this
2788 			 * could serve as the new head of the the next chain.
2789 			 */
2790 			if (curr_pair_is_memcpy(meta1, meta2)) {
2791 				head_ld_meta = meta1;
2792 				head_st_meta = meta2;
2793 				head_ld_meta->ldst_gather_len =
2794 					BPF_LDST_BYTES(ld);
2795 				meta1 = nfp_meta_next(meta1);
2796 				meta2 = nfp_meta_next(meta2);
2797 				prev_ld = ld;
2798 				prev_st = st;
2799 				count = 1;
2800 			} else {
2801 				head_ld_meta = NULL;
2802 				head_st_meta = NULL;
2803 				prev_ld = NULL;
2804 				prev_st = NULL;
2805 				count = 0;
2806 			}
2807 
2808 			continue;
2809 		}
2810 
2811 		if (!head_ld_meta) {
2812 			head_ld_meta = meta1;
2813 			head_st_meta = meta2;
2814 		} else {
2815 			meta1->skip = true;
2816 			meta2->skip = true;
2817 		}
2818 
2819 		head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
2820 		meta1 = nfp_meta_next(meta1);
2821 		meta2 = nfp_meta_next(meta2);
2822 		prev_ld = ld;
2823 		prev_st = st;
2824 		count++;
2825 	}
2826 }
2827 
2828 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
2829 {
2830 	nfp_bpf_opt_reg_init(nfp_prog);
2831 
2832 	nfp_bpf_opt_ld_mask(nfp_prog);
2833 	nfp_bpf_opt_ld_shift(nfp_prog);
2834 	nfp_bpf_opt_ldst_gather(nfp_prog);
2835 
2836 	return 0;
2837 }
2838 
2839 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
2840 {
2841 	__le64 *ustore = (__force __le64 *)prog;
2842 	int i;
2843 
2844 	for (i = 0; i < len; i++) {
2845 		int err;
2846 
2847 		err = nfp_ustore_check_valid_no_ecc(prog[i]);
2848 		if (err)
2849 			return err;
2850 
2851 		ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
2852 	}
2853 
2854 	return 0;
2855 }
2856 
2857 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
2858 {
2859 	void *prog;
2860 
2861 	prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
2862 	if (!prog)
2863 		return;
2864 
2865 	nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
2866 	memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
2867 	kvfree(nfp_prog->prog);
2868 	nfp_prog->prog = prog;
2869 }
2870 
2871 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
2872 {
2873 	int ret;
2874 
2875 	ret = nfp_bpf_optimize(nfp_prog);
2876 	if (ret)
2877 		return ret;
2878 
2879 	ret = nfp_translate(nfp_prog);
2880 	if (ret) {
2881 		pr_err("Translation failed with error %d (translated: %u)\n",
2882 		       ret, nfp_prog->n_translated);
2883 		return -EINVAL;
2884 	}
2885 
2886 	nfp_bpf_prog_trim(nfp_prog);
2887 
2888 	return ret;
2889 }
2890 
2891 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
2892 {
2893 	struct nfp_insn_meta *meta;
2894 
2895 	/* Another pass to record jump information. */
2896 	list_for_each_entry(meta, &nfp_prog->insns, l) {
2897 		u64 code = meta->insn.code;
2898 
2899 		if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
2900 		    BPF_OP(code) != BPF_CALL) {
2901 			struct nfp_insn_meta *dst_meta;
2902 			unsigned short dst_indx;
2903 
2904 			dst_indx = meta->n + 1 + meta->insn.off;
2905 			dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
2906 						     cnt);
2907 
2908 			meta->jmp_dst = dst_meta;
2909 			dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
2910 		}
2911 	}
2912 }
2913 
2914 bool nfp_bpf_supported_opcode(u8 code)
2915 {
2916 	return !!instr_cb[code];
2917 }
2918 
2919 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
2920 {
2921 	unsigned int i;
2922 	u64 *prog;
2923 	int err;
2924 
2925 	prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
2926 		       GFP_KERNEL);
2927 	if (!prog)
2928 		return ERR_PTR(-ENOMEM);
2929 
2930 	for (i = 0; i < nfp_prog->prog_len; i++) {
2931 		enum nfp_relo_type special;
2932 		u32 val;
2933 
2934 		special = FIELD_GET(OP_RELO_TYPE, prog[i]);
2935 		switch (special) {
2936 		case RELO_NONE:
2937 			continue;
2938 		case RELO_BR_REL:
2939 			br_add_offset(&prog[i], bv->start_off);
2940 			break;
2941 		case RELO_BR_GO_OUT:
2942 			br_set_offset(&prog[i],
2943 				      nfp_prog->tgt_out + bv->start_off);
2944 			break;
2945 		case RELO_BR_GO_ABORT:
2946 			br_set_offset(&prog[i],
2947 				      nfp_prog->tgt_abort + bv->start_off);
2948 			break;
2949 		case RELO_BR_NEXT_PKT:
2950 			br_set_offset(&prog[i], bv->tgt_done);
2951 			break;
2952 		case RELO_BR_HELPER:
2953 			val = br_get_offset(prog[i]);
2954 			val -= BR_OFF_RELO;
2955 			switch (val) {
2956 			case BPF_FUNC_map_lookup_elem:
2957 				val = nfp_prog->bpf->helpers.map_lookup;
2958 				break;
2959 			default:
2960 				pr_err("relocation of unknown helper %d\n",
2961 				       val);
2962 				err = -EINVAL;
2963 				goto err_free_prog;
2964 			}
2965 			br_set_offset(&prog[i], val);
2966 			break;
2967 		case RELO_IMMED_REL:
2968 			immed_add_value(&prog[i], bv->start_off);
2969 			break;
2970 		}
2971 
2972 		prog[i] &= ~OP_RELO_TYPE;
2973 	}
2974 
2975 	err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
2976 	if (err)
2977 		goto err_free_prog;
2978 
2979 	return prog;
2980 
2981 err_free_prog:
2982 	kfree(prog);
2983 	return ERR_PTR(err);
2984 }
2985