xref: /openbmc/linux/drivers/net/ethernet/intel/ice/ice_xsk.c (revision 6246ed09111fbb17168619006b4380103c6673c3)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Intel Corporation. */
3 
4 #include <linux/bpf_trace.h>
5 #include <net/xdp_sock_drv.h>
6 #include <net/xdp.h>
7 #include "ice.h"
8 #include "ice_base.h"
9 #include "ice_type.h"
10 #include "ice_xsk.h"
11 #include "ice_txrx.h"
12 #include "ice_txrx_lib.h"
13 #include "ice_lib.h"
14 
15 static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
16 {
17 	return &rx_ring->xdp_buf[idx];
18 }
19 
20 /**
21  * ice_qp_reset_stats - Resets all stats for rings of given index
22  * @vsi: VSI that contains rings of interest
23  * @q_idx: ring index in array
24  */
25 static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
26 {
27 	memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
28 	       sizeof(vsi->rx_rings[q_idx]->rx_stats));
29 	memset(&vsi->tx_rings[q_idx]->stats, 0,
30 	       sizeof(vsi->tx_rings[q_idx]->stats));
31 	if (ice_is_xdp_ena_vsi(vsi))
32 		memset(&vsi->xdp_rings[q_idx]->stats, 0,
33 		       sizeof(vsi->xdp_rings[q_idx]->stats));
34 }
35 
36 /**
37  * ice_qp_clean_rings - Cleans all the rings of a given index
38  * @vsi: VSI that contains rings of interest
39  * @q_idx: ring index in array
40  */
41 static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
42 {
43 	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
44 	if (ice_is_xdp_ena_vsi(vsi)) {
45 		synchronize_rcu();
46 		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
47 	}
48 	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
49 }
50 
51 /**
52  * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
53  * @vsi: VSI that has netdev
54  * @q_vector: q_vector that has NAPI context
55  * @enable: true for enable, false for disable
56  */
57 static void
58 ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
59 		     bool enable)
60 {
61 	if (!vsi->netdev || !q_vector)
62 		return;
63 
64 	if (enable)
65 		napi_enable(&q_vector->napi);
66 	else
67 		napi_disable(&q_vector->napi);
68 }
69 
70 /**
71  * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
72  * @vsi: the VSI that contains queue vector being un-configured
73  * @rx_ring: Rx ring that will have its IRQ disabled
74  * @q_vector: queue vector
75  */
76 static void
77 ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
78 		 struct ice_q_vector *q_vector)
79 {
80 	struct ice_pf *pf = vsi->back;
81 	struct ice_hw *hw = &pf->hw;
82 	int base = vsi->base_vector;
83 	u16 reg;
84 	u32 val;
85 
86 	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
87 	 * here only QINT_RQCTL
88 	 */
89 	reg = rx_ring->reg_idx;
90 	val = rd32(hw, QINT_RQCTL(reg));
91 	val &= ~QINT_RQCTL_CAUSE_ENA_M;
92 	wr32(hw, QINT_RQCTL(reg), val);
93 
94 	if (q_vector) {
95 		u16 v_idx = q_vector->v_idx;
96 
97 		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
98 		ice_flush(hw);
99 		synchronize_irq(pf->msix_entries[v_idx + base].vector);
100 	}
101 }
102 
103 /**
104  * ice_qvec_cfg_msix - Enable IRQ for given queue vector
105  * @vsi: the VSI that contains queue vector
106  * @q_vector: queue vector
107  */
108 static void
109 ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
110 {
111 	u16 reg_idx = q_vector->reg_idx;
112 	struct ice_pf *pf = vsi->back;
113 	struct ice_hw *hw = &pf->hw;
114 	struct ice_tx_ring *tx_ring;
115 	struct ice_rx_ring *rx_ring;
116 
117 	ice_cfg_itr(hw, q_vector);
118 
119 	ice_for_each_tx_ring(tx_ring, q_vector->tx)
120 		ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
121 				      q_vector->tx.itr_idx);
122 
123 	ice_for_each_rx_ring(rx_ring, q_vector->rx)
124 		ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
125 				      q_vector->rx.itr_idx);
126 
127 	ice_flush(hw);
128 }
129 
130 /**
131  * ice_qvec_ena_irq - Enable IRQ for given queue vector
132  * @vsi: the VSI that contains queue vector
133  * @q_vector: queue vector
134  */
135 static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
136 {
137 	struct ice_pf *pf = vsi->back;
138 	struct ice_hw *hw = &pf->hw;
139 
140 	ice_irq_dynamic_ena(hw, vsi, q_vector);
141 
142 	ice_flush(hw);
143 }
144 
145 /**
146  * ice_qp_dis - Disables a queue pair
147  * @vsi: VSI of interest
148  * @q_idx: ring index in array
149  *
150  * Returns 0 on success, negative on failure.
151  */
152 static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
153 {
154 	struct ice_txq_meta txq_meta = { };
155 	struct ice_q_vector *q_vector;
156 	struct ice_tx_ring *tx_ring;
157 	struct ice_rx_ring *rx_ring;
158 	int timeout = 50;
159 	int err;
160 
161 	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
162 		return -EINVAL;
163 
164 	tx_ring = vsi->tx_rings[q_idx];
165 	rx_ring = vsi->rx_rings[q_idx];
166 	q_vector = rx_ring->q_vector;
167 
168 	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
169 		timeout--;
170 		if (!timeout)
171 			return -EBUSY;
172 		usleep_range(1000, 2000);
173 	}
174 	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
175 
176 	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
177 
178 	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
179 	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
180 	if (err)
181 		return err;
182 	if (ice_is_xdp_ena_vsi(vsi)) {
183 		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
184 
185 		memset(&txq_meta, 0, sizeof(txq_meta));
186 		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
187 		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
188 					   &txq_meta);
189 		if (err)
190 			return err;
191 	}
192 	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
193 	if (err)
194 		return err;
195 	ice_clean_rx_ring(rx_ring);
196 
197 	ice_qvec_toggle_napi(vsi, q_vector, false);
198 	ice_qp_clean_rings(vsi, q_idx);
199 	ice_qp_reset_stats(vsi, q_idx);
200 
201 	return 0;
202 }
203 
204 /**
205  * ice_qp_ena - Enables a queue pair
206  * @vsi: VSI of interest
207  * @q_idx: ring index in array
208  *
209  * Returns 0 on success, negative on failure.
210  */
211 static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
212 {
213 	struct ice_aqc_add_tx_qgrp *qg_buf;
214 	struct ice_q_vector *q_vector;
215 	struct ice_tx_ring *tx_ring;
216 	struct ice_rx_ring *rx_ring;
217 	u16 size;
218 	int err;
219 
220 	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
221 		return -EINVAL;
222 
223 	size = struct_size(qg_buf, txqs, 1);
224 	qg_buf = kzalloc(size, GFP_KERNEL);
225 	if (!qg_buf)
226 		return -ENOMEM;
227 
228 	qg_buf->num_txqs = 1;
229 
230 	tx_ring = vsi->tx_rings[q_idx];
231 	rx_ring = vsi->rx_rings[q_idx];
232 	q_vector = rx_ring->q_vector;
233 
234 	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
235 	if (err)
236 		goto free_buf;
237 
238 	if (ice_is_xdp_ena_vsi(vsi)) {
239 		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
240 
241 		memset(qg_buf, 0, size);
242 		qg_buf->num_txqs = 1;
243 		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
244 		if (err)
245 			goto free_buf;
246 		ice_set_ring_xdp(xdp_ring);
247 		ice_tx_xsk_pool(vsi, q_idx);
248 	}
249 
250 	err = ice_vsi_cfg_rxq(rx_ring);
251 	if (err)
252 		goto free_buf;
253 
254 	ice_qvec_cfg_msix(vsi, q_vector);
255 
256 	err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
257 	if (err)
258 		goto free_buf;
259 
260 	clear_bit(ICE_CFG_BUSY, vsi->state);
261 	ice_qvec_toggle_napi(vsi, q_vector, true);
262 	ice_qvec_ena_irq(vsi, q_vector);
263 
264 	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
265 free_buf:
266 	kfree(qg_buf);
267 	return err;
268 }
269 
270 /**
271  * ice_xsk_pool_disable - disable a buffer pool region
272  * @vsi: Current VSI
273  * @qid: queue ID
274  *
275  * Returns 0 on success, negative on failure
276  */
277 static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
278 {
279 	struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
280 
281 	if (!pool)
282 		return -EINVAL;
283 
284 	clear_bit(qid, vsi->af_xdp_zc_qps);
285 	xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
286 
287 	return 0;
288 }
289 
290 /**
291  * ice_xsk_pool_enable - enable a buffer pool region
292  * @vsi: Current VSI
293  * @pool: pointer to a requested buffer pool region
294  * @qid: queue ID
295  *
296  * Returns 0 on success, negative on failure
297  */
298 static int
299 ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
300 {
301 	int err;
302 
303 	if (vsi->type != ICE_VSI_PF)
304 		return -EINVAL;
305 
306 	if (qid >= vsi->netdev->real_num_rx_queues ||
307 	    qid >= vsi->netdev->real_num_tx_queues)
308 		return -EINVAL;
309 
310 	err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
311 			       ICE_RX_DMA_ATTR);
312 	if (err)
313 		return err;
314 
315 	set_bit(qid, vsi->af_xdp_zc_qps);
316 
317 	return 0;
318 }
319 
320 /**
321  * ice_realloc_rx_xdp_bufs - reallocate for either XSK or normal buffer
322  * @rx_ring: Rx ring
323  * @pool_present: is pool for XSK present
324  *
325  * Try allocating memory and return ENOMEM, if failed to allocate.
326  * If allocation was successful, substitute buffer with allocated one.
327  * Returns 0 on success, negative on failure
328  */
329 static int
330 ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
331 {
332 	size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) :
333 					  sizeof(*rx_ring->rx_buf);
334 	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
335 
336 	if (!sw_ring)
337 		return -ENOMEM;
338 
339 	if (pool_present) {
340 		kfree(rx_ring->rx_buf);
341 		rx_ring->rx_buf = NULL;
342 		rx_ring->xdp_buf = sw_ring;
343 	} else {
344 		kfree(rx_ring->xdp_buf);
345 		rx_ring->xdp_buf = NULL;
346 		rx_ring->rx_buf = sw_ring;
347 	}
348 
349 	return 0;
350 }
351 
352 /**
353  * ice_realloc_zc_buf - reallocate XDP ZC queue pairs
354  * @vsi: Current VSI
355  * @zc: is zero copy set
356  *
357  * Reallocate buffer for rx_rings that might be used by XSK.
358  * XDP requires more memory, than rx_buf provides.
359  * Returns 0 on success, negative on failure
360  */
361 int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
362 {
363 	struct ice_rx_ring *rx_ring;
364 	unsigned long q;
365 
366 	for_each_set_bit(q, vsi->af_xdp_zc_qps,
367 			 max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
368 		rx_ring = vsi->rx_rings[q];
369 		if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
370 			return -ENOMEM;
371 	}
372 
373 	return 0;
374 }
375 
376 /**
377  * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
378  * @vsi: Current VSI
379  * @pool: buffer pool to enable/associate to a ring, NULL to disable
380  * @qid: queue ID
381  *
382  * Returns 0 on success, negative on failure
383  */
384 int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
385 {
386 	bool if_running, pool_present = !!pool;
387 	int ret = 0, pool_failure = 0;
388 
389 	if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
390 		netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
391 		pool_failure = -EINVAL;
392 		goto failure;
393 	}
394 
395 	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
396 
397 	if (if_running) {
398 		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
399 
400 		ret = ice_qp_dis(vsi, qid);
401 		if (ret) {
402 			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
403 			goto xsk_pool_if_up;
404 		}
405 
406 		ret = ice_realloc_rx_xdp_bufs(rx_ring, pool_present);
407 		if (ret)
408 			goto xsk_pool_if_up;
409 	}
410 
411 	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
412 				      ice_xsk_pool_disable(vsi, qid);
413 
414 xsk_pool_if_up:
415 	if (if_running) {
416 		ret = ice_qp_ena(vsi, qid);
417 		if (!ret && pool_present)
418 			napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
419 		else if (ret)
420 			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
421 	}
422 
423 failure:
424 	if (pool_failure) {
425 		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
426 			   pool_present ? "en" : "dis", pool_failure);
427 		return pool_failure;
428 	}
429 
430 	return ret;
431 }
432 
433 /**
434  * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
435  * @pool: XSK Buffer pool to pull the buffers from
436  * @xdp: SW ring of xdp_buff that will hold the buffers
437  * @rx_desc: Pointer to Rx descriptors that will be filled
438  * @count: The number of buffers to allocate
439  *
440  * This function allocates a number of Rx buffers from the fill ring
441  * or the internal recycle mechanism and places them on the Rx ring.
442  *
443  * Note that ring wrap should be handled by caller of this function.
444  *
445  * Returns the amount of allocated Rx descriptors
446  */
447 static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
448 			     union ice_32b_rx_flex_desc *rx_desc, u16 count)
449 {
450 	dma_addr_t dma;
451 	u16 buffs;
452 	int i;
453 
454 	buffs = xsk_buff_alloc_batch(pool, xdp, count);
455 	for (i = 0; i < buffs; i++) {
456 		dma = xsk_buff_xdp_get_dma(*xdp);
457 		rx_desc->read.pkt_addr = cpu_to_le64(dma);
458 		rx_desc->wb.status_error0 = 0;
459 
460 		rx_desc++;
461 		xdp++;
462 	}
463 
464 	return buffs;
465 }
466 
467 /**
468  * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
469  * @rx_ring: Rx ring
470  * @count: The number of buffers to allocate
471  *
472  * Place the @count of descriptors onto Rx ring. Handle the ring wrap
473  * for case where space from next_to_use up to the end of ring is less
474  * than @count. Finally do a tail bump.
475  *
476  * Returns true if all allocations were successful, false if any fail.
477  */
478 static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
479 {
480 	u32 nb_buffs_extra = 0, nb_buffs = 0;
481 	union ice_32b_rx_flex_desc *rx_desc;
482 	u16 ntu = rx_ring->next_to_use;
483 	u16 total_count = count;
484 	struct xdp_buff **xdp;
485 
486 	rx_desc = ICE_RX_DESC(rx_ring, ntu);
487 	xdp = ice_xdp_buf(rx_ring, ntu);
488 
489 	if (ntu + count >= rx_ring->count) {
490 		nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
491 						   rx_desc,
492 						   rx_ring->count - ntu);
493 		if (nb_buffs_extra != rx_ring->count - ntu) {
494 			ntu += nb_buffs_extra;
495 			goto exit;
496 		}
497 		rx_desc = ICE_RX_DESC(rx_ring, 0);
498 		xdp = ice_xdp_buf(rx_ring, 0);
499 		ntu = 0;
500 		count -= nb_buffs_extra;
501 		ice_release_rx_desc(rx_ring, 0);
502 	}
503 
504 	nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
505 
506 	ntu += nb_buffs;
507 	if (ntu == rx_ring->count)
508 		ntu = 0;
509 
510 exit:
511 	if (rx_ring->next_to_use != ntu)
512 		ice_release_rx_desc(rx_ring, ntu);
513 
514 	return total_count == (nb_buffs_extra + nb_buffs);
515 }
516 
517 /**
518  * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
519  * @rx_ring: Rx ring
520  * @count: The number of buffers to allocate
521  *
522  * Wrapper for internal allocation routine; figure out how many tail
523  * bumps should take place based on the given threshold
524  *
525  * Returns true if all calls to internal alloc routine succeeded
526  */
527 bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
528 {
529 	u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
530 	u16 leftover, i, tail_bumps;
531 
532 	tail_bumps = count / rx_thresh;
533 	leftover = count - (tail_bumps * rx_thresh);
534 
535 	for (i = 0; i < tail_bumps; i++)
536 		if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
537 			return false;
538 	return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
539 }
540 
541 /**
542  * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
543  * @rx_ring: Rx ring
544  */
545 static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
546 {
547 	int ntc = rx_ring->next_to_clean + 1;
548 
549 	ntc = (ntc < rx_ring->count) ? ntc : 0;
550 	rx_ring->next_to_clean = ntc;
551 	prefetch(ICE_RX_DESC(rx_ring, ntc));
552 }
553 
554 /**
555  * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
556  * @rx_ring: Rx ring
557  * @xdp: Pointer to XDP buffer
558  *
559  * This function allocates a new skb from a zero-copy Rx buffer.
560  *
561  * Returns the skb on success, NULL on failure.
562  */
563 static struct sk_buff *
564 ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
565 {
566 	unsigned int totalsize = xdp->data_end - xdp->data_meta;
567 	unsigned int metasize = xdp->data - xdp->data_meta;
568 	struct sk_buff *skb;
569 
570 	net_prefetch(xdp->data_meta);
571 
572 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
573 			       GFP_ATOMIC | __GFP_NOWARN);
574 	if (unlikely(!skb))
575 		return NULL;
576 
577 	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
578 	       ALIGN(totalsize, sizeof(long)));
579 
580 	if (metasize) {
581 		skb_metadata_set(skb, metasize);
582 		__skb_pull(skb, metasize);
583 	}
584 
585 	xsk_buff_free(xdp);
586 	return skb;
587 }
588 
589 /**
590  * ice_run_xdp_zc - Executes an XDP program in zero-copy path
591  * @rx_ring: Rx ring
592  * @xdp: xdp_buff used as input to the XDP program
593  * @xdp_prog: XDP program to run
594  * @xdp_ring: ring to be used for XDP_TX action
595  *
596  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
597  */
598 static int
599 ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
600 	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
601 {
602 	int err, result = ICE_XDP_PASS;
603 	u32 act;
604 
605 	act = bpf_prog_run_xdp(xdp_prog, xdp);
606 
607 	if (likely(act == XDP_REDIRECT)) {
608 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
609 		if (!err)
610 			return ICE_XDP_REDIR;
611 		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
612 			result = ICE_XDP_EXIT;
613 		else
614 			result = ICE_XDP_CONSUMED;
615 		goto out_failure;
616 	}
617 
618 	switch (act) {
619 	case XDP_PASS:
620 		break;
621 	case XDP_TX:
622 		result = ice_xmit_xdp_buff(xdp, xdp_ring);
623 		if (result == ICE_XDP_CONSUMED)
624 			goto out_failure;
625 		break;
626 	case XDP_DROP:
627 		result = ICE_XDP_CONSUMED;
628 		break;
629 	default:
630 		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
631 		fallthrough;
632 	case XDP_ABORTED:
633 		result = ICE_XDP_CONSUMED;
634 out_failure:
635 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
636 		break;
637 	}
638 
639 	return result;
640 }
641 
642 /**
643  * ice_clean_rx_irq_zc - consumes packets from the hardware ring
644  * @rx_ring: AF_XDP Rx ring
645  * @budget: NAPI budget
646  *
647  * Returns number of processed packets on success, remaining budget on failure.
648  */
649 int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
650 {
651 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
652 	struct ice_tx_ring *xdp_ring;
653 	unsigned int xdp_xmit = 0;
654 	struct bpf_prog *xdp_prog;
655 	bool failure = false;
656 	int entries_to_alloc;
657 
658 	/* ZC patch is enabled only when XDP program is set,
659 	 * so here it can not be NULL
660 	 */
661 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
662 	xdp_ring = rx_ring->xdp_ring;
663 
664 	while (likely(total_rx_packets < (unsigned int)budget)) {
665 		union ice_32b_rx_flex_desc *rx_desc;
666 		unsigned int size, xdp_res = 0;
667 		struct xdp_buff *xdp;
668 		struct sk_buff *skb;
669 		u16 stat_err_bits;
670 		u16 vlan_tag = 0;
671 		u16 rx_ptype;
672 
673 		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
674 
675 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
676 		if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
677 			break;
678 
679 		/* This memory barrier is needed to keep us from reading
680 		 * any other fields out of the rx_desc until we have
681 		 * verified the descriptor has been written back.
682 		 */
683 		dma_rmb();
684 
685 		if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
686 			break;
687 
688 		xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
689 
690 		size = le16_to_cpu(rx_desc->wb.pkt_len) &
691 				   ICE_RX_FLX_DESC_PKT_LEN_M;
692 		if (!size) {
693 			xdp->data = NULL;
694 			xdp->data_end = NULL;
695 			xdp->data_hard_start = NULL;
696 			xdp->data_meta = NULL;
697 			goto construct_skb;
698 		}
699 
700 		xsk_buff_set_size(xdp, size);
701 		xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
702 
703 		xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
704 		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
705 			xdp_xmit |= xdp_res;
706 		} else if (xdp_res == ICE_XDP_EXIT) {
707 			failure = true;
708 			break;
709 		} else if (xdp_res == ICE_XDP_CONSUMED) {
710 			xsk_buff_free(xdp);
711 		} else if (xdp_res == ICE_XDP_PASS) {
712 			goto construct_skb;
713 		}
714 
715 		total_rx_bytes += size;
716 		total_rx_packets++;
717 
718 		ice_bump_ntc(rx_ring);
719 		continue;
720 
721 construct_skb:
722 		/* XDP_PASS path */
723 		skb = ice_construct_skb_zc(rx_ring, xdp);
724 		if (!skb) {
725 			rx_ring->rx_stats.alloc_buf_failed++;
726 			break;
727 		}
728 
729 		ice_bump_ntc(rx_ring);
730 
731 		if (eth_skb_pad(skb)) {
732 			skb = NULL;
733 			continue;
734 		}
735 
736 		total_rx_bytes += skb->len;
737 		total_rx_packets++;
738 
739 		vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
740 
741 		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
742 				       ICE_RX_FLEX_DESC_PTYPE_M;
743 
744 		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
745 		ice_receive_skb(rx_ring, skb, vlan_tag);
746 	}
747 
748 	entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
749 	if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
750 		failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
751 
752 	ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
753 	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
754 
755 	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
756 		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
757 			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
758 		else
759 			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
760 
761 		return (int)total_rx_packets;
762 	}
763 
764 	return failure ? budget : (int)total_rx_packets;
765 }
766 
767 /**
768  * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
769  * @xdp_ring: XDP Tx ring
770  * @tx_buf: Tx buffer to clean
771  */
772 static void
773 ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
774 {
775 	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
776 	xdp_ring->xdp_tx_active--;
777 	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
778 			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
779 	dma_unmap_len_set(tx_buf, len, 0);
780 }
781 
782 /**
783  * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
784  * @xdp_ring: XDP Tx ring
785  */
786 static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
787 {
788 	u16 ntc = xdp_ring->next_to_clean;
789 	struct ice_tx_desc *tx_desc;
790 	u16 cnt = xdp_ring->count;
791 	struct ice_tx_buf *tx_buf;
792 	u16 xsk_frames = 0;
793 	u16 last_rs;
794 	int i;
795 
796 	last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
797 	tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
798 	if ((tx_desc->cmd_type_offset_bsz &
799 	    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
800 		if (last_rs >= ntc)
801 			xsk_frames = last_rs - ntc + 1;
802 		else
803 			xsk_frames = last_rs + cnt - ntc + 1;
804 	}
805 
806 	if (!xsk_frames)
807 		return;
808 
809 	if (likely(!xdp_ring->xdp_tx_active))
810 		goto skip;
811 
812 	ntc = xdp_ring->next_to_clean;
813 	for (i = 0; i < xsk_frames; i++) {
814 		tx_buf = &xdp_ring->tx_buf[ntc];
815 
816 		if (tx_buf->raw_buf) {
817 			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
818 			tx_buf->raw_buf = NULL;
819 		} else {
820 			xsk_frames++;
821 		}
822 
823 		ntc++;
824 		if (ntc >= xdp_ring->count)
825 			ntc = 0;
826 	}
827 skip:
828 	tx_desc->cmd_type_offset_bsz = 0;
829 	xdp_ring->next_to_clean += xsk_frames;
830 	if (xdp_ring->next_to_clean >= cnt)
831 		xdp_ring->next_to_clean -= cnt;
832 	if (xsk_frames)
833 		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
834 }
835 
836 /**
837  * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
838  * @xdp_ring: XDP ring to produce the HW Tx descriptor on
839  * @desc: AF_XDP descriptor to pull the DMA address and length from
840  * @total_bytes: bytes accumulator that will be used for stats update
841  */
842 static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
843 			 unsigned int *total_bytes)
844 {
845 	struct ice_tx_desc *tx_desc;
846 	dma_addr_t dma;
847 
848 	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
849 	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
850 
851 	tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
852 	tx_desc->buf_addr = cpu_to_le64(dma);
853 	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
854 						      0, desc->len, 0);
855 
856 	*total_bytes += desc->len;
857 }
858 
859 /**
860  * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
861  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
862  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
863  * @total_bytes: bytes accumulator that will be used for stats update
864  */
865 static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
866 			       unsigned int *total_bytes)
867 {
868 	u16 ntu = xdp_ring->next_to_use;
869 	struct ice_tx_desc *tx_desc;
870 	u32 i;
871 
872 	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
873 		dma_addr_t dma;
874 
875 		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
876 		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
877 
878 		tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
879 		tx_desc->buf_addr = cpu_to_le64(dma);
880 		tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
881 							      0, descs[i].len, 0);
882 
883 		*total_bytes += descs[i].len;
884 	}
885 
886 	xdp_ring->next_to_use = ntu;
887 }
888 
889 /**
890  * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
891  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
892  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
893  * @nb_pkts: count of packets to be send
894  * @total_bytes: bytes accumulator that will be used for stats update
895  */
896 static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
897 				u32 nb_pkts, unsigned int *total_bytes)
898 {
899 	u32 batched, leftover, i;
900 
901 	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
902 	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
903 	for (i = 0; i < batched; i += PKTS_PER_BATCH)
904 		ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
905 	for (; i < batched + leftover; i++)
906 		ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
907 }
908 
909 /**
910  * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
911  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
912  */
913 static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
914 {
915 	u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
916 	struct ice_tx_desc *tx_desc;
917 
918 	tx_desc = ICE_TX_DESC(xdp_ring, ntu);
919 	tx_desc->cmd_type_offset_bsz |=
920 		cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
921 }
922 
923 /**
924  * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
925  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
926  *
927  * Returns true if there is no more work that needs to be done, false otherwise
928  */
929 bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
930 {
931 	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
932 	u32 nb_pkts, nb_processed = 0;
933 	unsigned int total_bytes = 0;
934 	int budget;
935 
936 	ice_clean_xdp_irq_zc(xdp_ring);
937 
938 	budget = ICE_DESC_UNUSED(xdp_ring);
939 	budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
940 
941 	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
942 	if (!nb_pkts)
943 		return true;
944 
945 	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
946 		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
947 		ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
948 		xdp_ring->next_to_use = 0;
949 	}
950 
951 	ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
952 			    &total_bytes);
953 
954 	ice_set_rs_bit(xdp_ring);
955 	ice_xdp_ring_update_tail(xdp_ring);
956 	ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
957 
958 	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
959 		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
960 
961 	return nb_pkts < budget;
962 }
963 
964 /**
965  * ice_xsk_wakeup - Implements ndo_xsk_wakeup
966  * @netdev: net_device
967  * @queue_id: queue to wake up
968  * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
969  *
970  * Returns negative on error, zero otherwise.
971  */
972 int
973 ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
974 	       u32 __always_unused flags)
975 {
976 	struct ice_netdev_priv *np = netdev_priv(netdev);
977 	struct ice_q_vector *q_vector;
978 	struct ice_vsi *vsi = np->vsi;
979 	struct ice_tx_ring *ring;
980 
981 	if (test_bit(ICE_VSI_DOWN, vsi->state))
982 		return -ENETDOWN;
983 
984 	if (!ice_is_xdp_ena_vsi(vsi))
985 		return -EINVAL;
986 
987 	if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
988 		return -EINVAL;
989 
990 	ring = vsi->rx_rings[queue_id]->xdp_ring;
991 
992 	if (!ring->xsk_pool)
993 		return -EINVAL;
994 
995 	/* The idea here is that if NAPI is running, mark a miss, so
996 	 * it will run again. If not, trigger an interrupt and
997 	 * schedule the NAPI from interrupt context. If NAPI would be
998 	 * scheduled here, the interrupt affinity would not be
999 	 * honored.
1000 	 */
1001 	q_vector = ring->q_vector;
1002 	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
1003 		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
1004 
1005 	return 0;
1006 }
1007 
1008 /**
1009  * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
1010  * @vsi: VSI to be checked
1011  *
1012  * Returns true if any of the Rx rings has an AF_XDP buff pool attached
1013  */
1014 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
1015 {
1016 	int i;
1017 
1018 	ice_for_each_rxq(vsi, i) {
1019 		if (xsk_get_pool_from_qid(vsi->netdev, i))
1020 			return true;
1021 	}
1022 
1023 	return false;
1024 }
1025 
1026 /**
1027  * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
1028  * @rx_ring: ring to be cleaned
1029  */
1030 void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
1031 {
1032 	u16 ntc = rx_ring->next_to_clean;
1033 	u16 ntu = rx_ring->next_to_use;
1034 
1035 	while (ntc != ntu) {
1036 		struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
1037 
1038 		xsk_buff_free(xdp);
1039 		ntc++;
1040 		if (ntc >= rx_ring->count)
1041 			ntc = 0;
1042 	}
1043 }
1044 
1045 /**
1046  * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
1047  * @xdp_ring: XDP_Tx ring
1048  */
1049 void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
1050 {
1051 	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
1052 	u32 xsk_frames = 0;
1053 
1054 	while (ntc != ntu) {
1055 		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
1056 
1057 		if (tx_buf->raw_buf)
1058 			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
1059 		else
1060 			xsk_frames++;
1061 
1062 		tx_buf->raw_buf = NULL;
1063 
1064 		ntc++;
1065 		if (ntc >= xdp_ring->count)
1066 			ntc = 0;
1067 	}
1068 
1069 	if (xsk_frames)
1070 		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
1071 }
1072