1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019, Intel Corporation. */
3 
4 #include <linux/bpf_trace.h>
5 #include <net/xdp_sock_drv.h>
6 #include <net/xdp.h>
7 #include "ice.h"
8 #include "ice_base.h"
9 #include "ice_type.h"
10 #include "ice_xsk.h"
11 #include "ice_txrx.h"
12 #include "ice_txrx_lib.h"
13 #include "ice_lib.h"
14 
15 static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
16 {
17 	return &rx_ring->xdp_buf[idx];
18 }
19 
20 /**
21  * ice_qp_reset_stats - Resets all stats for rings of given index
22  * @vsi: VSI that contains rings of interest
23  * @q_idx: ring index in array
24  */
25 static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
26 {
27 	memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
28 	       sizeof(vsi->rx_rings[q_idx]->rx_stats));
29 	memset(&vsi->tx_rings[q_idx]->stats, 0,
30 	       sizeof(vsi->tx_rings[q_idx]->stats));
31 	if (ice_is_xdp_ena_vsi(vsi))
32 		memset(&vsi->xdp_rings[q_idx]->stats, 0,
33 		       sizeof(vsi->xdp_rings[q_idx]->stats));
34 }
35 
36 /**
37  * ice_qp_clean_rings - Cleans all the rings of a given index
38  * @vsi: VSI that contains rings of interest
39  * @q_idx: ring index in array
40  */
41 static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
42 {
43 	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
44 	if (ice_is_xdp_ena_vsi(vsi)) {
45 		synchronize_rcu();
46 		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
47 	}
48 	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
49 }
50 
51 /**
52  * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
53  * @vsi: VSI that has netdev
54  * @q_vector: q_vector that has NAPI context
55  * @enable: true for enable, false for disable
56  */
57 static void
58 ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
59 		     bool enable)
60 {
61 	if (!vsi->netdev || !q_vector)
62 		return;
63 
64 	if (enable)
65 		napi_enable(&q_vector->napi);
66 	else
67 		napi_disable(&q_vector->napi);
68 }
69 
70 /**
71  * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
72  * @vsi: the VSI that contains queue vector being un-configured
73  * @rx_ring: Rx ring that will have its IRQ disabled
74  * @q_vector: queue vector
75  */
76 static void
77 ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
78 		 struct ice_q_vector *q_vector)
79 {
80 	struct ice_pf *pf = vsi->back;
81 	struct ice_hw *hw = &pf->hw;
82 	int base = vsi->base_vector;
83 	u16 reg;
84 	u32 val;
85 
86 	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
87 	 * here only QINT_RQCTL
88 	 */
89 	reg = rx_ring->reg_idx;
90 	val = rd32(hw, QINT_RQCTL(reg));
91 	val &= ~QINT_RQCTL_CAUSE_ENA_M;
92 	wr32(hw, QINT_RQCTL(reg), val);
93 
94 	if (q_vector) {
95 		u16 v_idx = q_vector->v_idx;
96 
97 		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
98 		ice_flush(hw);
99 		synchronize_irq(pf->msix_entries[v_idx + base].vector);
100 	}
101 }
102 
103 /**
104  * ice_qvec_cfg_msix - Enable IRQ for given queue vector
105  * @vsi: the VSI that contains queue vector
106  * @q_vector: queue vector
107  */
108 static void
109 ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
110 {
111 	u16 reg_idx = q_vector->reg_idx;
112 	struct ice_pf *pf = vsi->back;
113 	struct ice_hw *hw = &pf->hw;
114 	struct ice_tx_ring *tx_ring;
115 	struct ice_rx_ring *rx_ring;
116 
117 	ice_cfg_itr(hw, q_vector);
118 
119 	ice_for_each_tx_ring(tx_ring, q_vector->tx)
120 		ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
121 				      q_vector->tx.itr_idx);
122 
123 	ice_for_each_rx_ring(rx_ring, q_vector->rx)
124 		ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
125 				      q_vector->rx.itr_idx);
126 
127 	ice_flush(hw);
128 }
129 
130 /**
131  * ice_qvec_ena_irq - Enable IRQ for given queue vector
132  * @vsi: the VSI that contains queue vector
133  * @q_vector: queue vector
134  */
135 static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
136 {
137 	struct ice_pf *pf = vsi->back;
138 	struct ice_hw *hw = &pf->hw;
139 
140 	ice_irq_dynamic_ena(hw, vsi, q_vector);
141 
142 	ice_flush(hw);
143 }
144 
145 /**
146  * ice_qp_dis - Disables a queue pair
147  * @vsi: VSI of interest
148  * @q_idx: ring index in array
149  *
150  * Returns 0 on success, negative on failure.
151  */
152 static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
153 {
154 	struct ice_txq_meta txq_meta = { };
155 	struct ice_q_vector *q_vector;
156 	struct ice_tx_ring *tx_ring;
157 	struct ice_rx_ring *rx_ring;
158 	int timeout = 50;
159 	int err;
160 
161 	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
162 		return -EINVAL;
163 
164 	tx_ring = vsi->tx_rings[q_idx];
165 	rx_ring = vsi->rx_rings[q_idx];
166 	q_vector = rx_ring->q_vector;
167 
168 	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
169 		timeout--;
170 		if (!timeout)
171 			return -EBUSY;
172 		usleep_range(1000, 2000);
173 	}
174 	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
175 
176 	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
177 
178 	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
179 	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
180 	if (err)
181 		return err;
182 	if (ice_is_xdp_ena_vsi(vsi)) {
183 		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
184 
185 		memset(&txq_meta, 0, sizeof(txq_meta));
186 		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
187 		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
188 					   &txq_meta);
189 		if (err)
190 			return err;
191 	}
192 	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
193 	if (err)
194 		return err;
195 	ice_clean_rx_ring(rx_ring);
196 
197 	ice_qvec_toggle_napi(vsi, q_vector, false);
198 	ice_qp_clean_rings(vsi, q_idx);
199 	ice_qp_reset_stats(vsi, q_idx);
200 
201 	return 0;
202 }
203 
204 /**
205  * ice_qp_ena - Enables a queue pair
206  * @vsi: VSI of interest
207  * @q_idx: ring index in array
208  *
209  * Returns 0 on success, negative on failure.
210  */
211 static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
212 {
213 	struct ice_aqc_add_tx_qgrp *qg_buf;
214 	struct ice_q_vector *q_vector;
215 	struct ice_tx_ring *tx_ring;
216 	struct ice_rx_ring *rx_ring;
217 	u16 size;
218 	int err;
219 
220 	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
221 		return -EINVAL;
222 
223 	size = struct_size(qg_buf, txqs, 1);
224 	qg_buf = kzalloc(size, GFP_KERNEL);
225 	if (!qg_buf)
226 		return -ENOMEM;
227 
228 	qg_buf->num_txqs = 1;
229 
230 	tx_ring = vsi->tx_rings[q_idx];
231 	rx_ring = vsi->rx_rings[q_idx];
232 	q_vector = rx_ring->q_vector;
233 
234 	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
235 	if (err)
236 		goto free_buf;
237 
238 	if (ice_is_xdp_ena_vsi(vsi)) {
239 		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
240 
241 		memset(qg_buf, 0, size);
242 		qg_buf->num_txqs = 1;
243 		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
244 		if (err)
245 			goto free_buf;
246 		ice_set_ring_xdp(xdp_ring);
247 		ice_tx_xsk_pool(vsi, q_idx);
248 	}
249 
250 	err = ice_vsi_cfg_rxq(rx_ring);
251 	if (err)
252 		goto free_buf;
253 
254 	ice_qvec_cfg_msix(vsi, q_vector);
255 
256 	err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
257 	if (err)
258 		goto free_buf;
259 
260 	clear_bit(ICE_CFG_BUSY, vsi->state);
261 	ice_qvec_toggle_napi(vsi, q_vector, true);
262 	ice_qvec_ena_irq(vsi, q_vector);
263 
264 	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
265 free_buf:
266 	kfree(qg_buf);
267 	return err;
268 }
269 
270 /**
271  * ice_xsk_pool_disable - disable a buffer pool region
272  * @vsi: Current VSI
273  * @qid: queue ID
274  *
275  * Returns 0 on success, negative on failure
276  */
277 static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
278 {
279 	struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
280 
281 	if (!pool)
282 		return -EINVAL;
283 
284 	clear_bit(qid, vsi->af_xdp_zc_qps);
285 	xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
286 
287 	return 0;
288 }
289 
290 /**
291  * ice_xsk_pool_enable - enable a buffer pool region
292  * @vsi: Current VSI
293  * @pool: pointer to a requested buffer pool region
294  * @qid: queue ID
295  *
296  * Returns 0 on success, negative on failure
297  */
298 static int
299 ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
300 {
301 	int err;
302 
303 	if (vsi->type != ICE_VSI_PF)
304 		return -EINVAL;
305 
306 	if (qid >= vsi->netdev->real_num_rx_queues ||
307 	    qid >= vsi->netdev->real_num_tx_queues)
308 		return -EINVAL;
309 
310 	err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
311 			       ICE_RX_DMA_ATTR);
312 	if (err)
313 		return err;
314 
315 	set_bit(qid, vsi->af_xdp_zc_qps);
316 
317 	return 0;
318 }
319 
320 /**
321  * ice_realloc_rx_xdp_bufs - reallocate for either XSK or normal buffer
322  * @rx_ring: Rx ring
323  * @pool_present: is pool for XSK present
324  *
325  * Try allocating memory and return ENOMEM, if failed to allocate.
326  * If allocation was successful, substitute buffer with allocated one.
327  * Returns 0 on success, negative on failure
328  */
329 static int
330 ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
331 {
332 	size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) :
333 					  sizeof(*rx_ring->rx_buf);
334 	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
335 
336 	if (!sw_ring)
337 		return -ENOMEM;
338 
339 	if (pool_present) {
340 		kfree(rx_ring->rx_buf);
341 		rx_ring->rx_buf = NULL;
342 		rx_ring->xdp_buf = sw_ring;
343 	} else {
344 		kfree(rx_ring->xdp_buf);
345 		rx_ring->xdp_buf = NULL;
346 		rx_ring->rx_buf = sw_ring;
347 	}
348 
349 	return 0;
350 }
351 
352 /**
353  * ice_realloc_zc_buf - reallocate XDP ZC queue pairs
354  * @vsi: Current VSI
355  * @zc: is zero copy set
356  *
357  * Reallocate buffer for rx_rings that might be used by XSK.
358  * XDP requires more memory, than rx_buf provides.
359  * Returns 0 on success, negative on failure
360  */
361 int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
362 {
363 	struct ice_rx_ring *rx_ring;
364 	unsigned long q;
365 
366 	for_each_set_bit(q, vsi->af_xdp_zc_qps,
367 			 max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
368 		rx_ring = vsi->rx_rings[q];
369 		if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
370 			return -ENOMEM;
371 	}
372 
373 	return 0;
374 }
375 
376 /**
377  * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
378  * @vsi: Current VSI
379  * @pool: buffer pool to enable/associate to a ring, NULL to disable
380  * @qid: queue ID
381  *
382  * Returns 0 on success, negative on failure
383  */
384 int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
385 {
386 	bool if_running, pool_present = !!pool;
387 	int ret = 0, pool_failure = 0;
388 
389 	if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
390 		netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
391 		pool_failure = -EINVAL;
392 		goto failure;
393 	}
394 
395 	if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
396 	    !is_power_of_2(vsi->tx_rings[qid]->count)) {
397 		netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
398 		pool_failure = -EINVAL;
399 		goto failure;
400 	}
401 
402 	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
403 
404 	if (if_running) {
405 		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
406 
407 		ret = ice_qp_dis(vsi, qid);
408 		if (ret) {
409 			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
410 			goto xsk_pool_if_up;
411 		}
412 
413 		ret = ice_realloc_rx_xdp_bufs(rx_ring, pool_present);
414 		if (ret)
415 			goto xsk_pool_if_up;
416 	}
417 
418 	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
419 				      ice_xsk_pool_disable(vsi, qid);
420 
421 xsk_pool_if_up:
422 	if (if_running) {
423 		ret = ice_qp_ena(vsi, qid);
424 		if (!ret && pool_present)
425 			napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
426 		else if (ret)
427 			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
428 	}
429 
430 failure:
431 	if (pool_failure) {
432 		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
433 			   pool_present ? "en" : "dis", pool_failure);
434 		return pool_failure;
435 	}
436 
437 	return ret;
438 }
439 
440 /**
441  * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
442  * @pool: XSK Buffer pool to pull the buffers from
443  * @xdp: SW ring of xdp_buff that will hold the buffers
444  * @rx_desc: Pointer to Rx descriptors that will be filled
445  * @count: The number of buffers to allocate
446  *
447  * This function allocates a number of Rx buffers from the fill ring
448  * or the internal recycle mechanism and places them on the Rx ring.
449  *
450  * Note that ring wrap should be handled by caller of this function.
451  *
452  * Returns the amount of allocated Rx descriptors
453  */
454 static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
455 			     union ice_32b_rx_flex_desc *rx_desc, u16 count)
456 {
457 	dma_addr_t dma;
458 	u16 buffs;
459 	int i;
460 
461 	buffs = xsk_buff_alloc_batch(pool, xdp, count);
462 	for (i = 0; i < buffs; i++) {
463 		dma = xsk_buff_xdp_get_dma(*xdp);
464 		rx_desc->read.pkt_addr = cpu_to_le64(dma);
465 		rx_desc->wb.status_error0 = 0;
466 
467 		rx_desc++;
468 		xdp++;
469 	}
470 
471 	return buffs;
472 }
473 
474 /**
475  * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
476  * @rx_ring: Rx ring
477  * @count: The number of buffers to allocate
478  *
479  * Place the @count of descriptors onto Rx ring. Handle the ring wrap
480  * for case where space from next_to_use up to the end of ring is less
481  * than @count. Finally do a tail bump.
482  *
483  * Returns true if all allocations were successful, false if any fail.
484  */
485 static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
486 {
487 	u32 nb_buffs_extra = 0, nb_buffs = 0;
488 	union ice_32b_rx_flex_desc *rx_desc;
489 	u16 ntu = rx_ring->next_to_use;
490 	u16 total_count = count;
491 	struct xdp_buff **xdp;
492 
493 	rx_desc = ICE_RX_DESC(rx_ring, ntu);
494 	xdp = ice_xdp_buf(rx_ring, ntu);
495 
496 	if (ntu + count >= rx_ring->count) {
497 		nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
498 						   rx_desc,
499 						   rx_ring->count - ntu);
500 		if (nb_buffs_extra != rx_ring->count - ntu) {
501 			ntu += nb_buffs_extra;
502 			goto exit;
503 		}
504 		rx_desc = ICE_RX_DESC(rx_ring, 0);
505 		xdp = ice_xdp_buf(rx_ring, 0);
506 		ntu = 0;
507 		count -= nb_buffs_extra;
508 		ice_release_rx_desc(rx_ring, 0);
509 	}
510 
511 	nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
512 
513 	ntu += nb_buffs;
514 	if (ntu == rx_ring->count)
515 		ntu = 0;
516 
517 exit:
518 	if (rx_ring->next_to_use != ntu)
519 		ice_release_rx_desc(rx_ring, ntu);
520 
521 	return total_count == (nb_buffs_extra + nb_buffs);
522 }
523 
524 /**
525  * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
526  * @rx_ring: Rx ring
527  * @count: The number of buffers to allocate
528  *
529  * Wrapper for internal allocation routine; figure out how many tail
530  * bumps should take place based on the given threshold
531  *
532  * Returns true if all calls to internal alloc routine succeeded
533  */
534 bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
535 {
536 	u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
537 	u16 batched, leftover, i, tail_bumps;
538 
539 	batched = ALIGN_DOWN(count, rx_thresh);
540 	tail_bumps = batched / rx_thresh;
541 	leftover = count & (rx_thresh - 1);
542 
543 	for (i = 0; i < tail_bumps; i++)
544 		if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
545 			return false;
546 	return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
547 }
548 
549 /**
550  * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
551  * @rx_ring: Rx ring
552  */
553 static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
554 {
555 	int ntc = rx_ring->next_to_clean + 1;
556 
557 	ntc = (ntc < rx_ring->count) ? ntc : 0;
558 	rx_ring->next_to_clean = ntc;
559 	prefetch(ICE_RX_DESC(rx_ring, ntc));
560 }
561 
562 /**
563  * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
564  * @rx_ring: Rx ring
565  * @xdp: Pointer to XDP buffer
566  *
567  * This function allocates a new skb from a zero-copy Rx buffer.
568  *
569  * Returns the skb on success, NULL on failure.
570  */
571 static struct sk_buff *
572 ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
573 {
574 	unsigned int totalsize = xdp->data_end - xdp->data_meta;
575 	unsigned int metasize = xdp->data - xdp->data_meta;
576 	struct sk_buff *skb;
577 
578 	net_prefetch(xdp->data_meta);
579 
580 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
581 			       GFP_ATOMIC | __GFP_NOWARN);
582 	if (unlikely(!skb))
583 		return NULL;
584 
585 	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
586 	       ALIGN(totalsize, sizeof(long)));
587 
588 	if (metasize) {
589 		skb_metadata_set(skb, metasize);
590 		__skb_pull(skb, metasize);
591 	}
592 
593 	xsk_buff_free(xdp);
594 	return skb;
595 }
596 
597 /**
598  * ice_run_xdp_zc - Executes an XDP program in zero-copy path
599  * @rx_ring: Rx ring
600  * @xdp: xdp_buff used as input to the XDP program
601  * @xdp_prog: XDP program to run
602  * @xdp_ring: ring to be used for XDP_TX action
603  *
604  * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
605  */
606 static int
607 ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
608 	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
609 {
610 	int err, result = ICE_XDP_PASS;
611 	u32 act;
612 
613 	act = bpf_prog_run_xdp(xdp_prog, xdp);
614 
615 	if (likely(act == XDP_REDIRECT)) {
616 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
617 		if (!err)
618 			return ICE_XDP_REDIR;
619 		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
620 			result = ICE_XDP_EXIT;
621 		else
622 			result = ICE_XDP_CONSUMED;
623 		goto out_failure;
624 	}
625 
626 	switch (act) {
627 	case XDP_PASS:
628 		break;
629 	case XDP_TX:
630 		result = ice_xmit_xdp_buff(xdp, xdp_ring);
631 		if (result == ICE_XDP_CONSUMED)
632 			goto out_failure;
633 		break;
634 	case XDP_DROP:
635 		result = ICE_XDP_CONSUMED;
636 		break;
637 	default:
638 		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
639 		fallthrough;
640 	case XDP_ABORTED:
641 		result = ICE_XDP_CONSUMED;
642 out_failure:
643 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
644 		break;
645 	}
646 
647 	return result;
648 }
649 
650 /**
651  * ice_clean_rx_irq_zc - consumes packets from the hardware ring
652  * @rx_ring: AF_XDP Rx ring
653  * @budget: NAPI budget
654  *
655  * Returns number of processed packets on success, remaining budget on failure.
656  */
657 int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
658 {
659 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
660 	struct ice_tx_ring *xdp_ring;
661 	unsigned int xdp_xmit = 0;
662 	struct bpf_prog *xdp_prog;
663 	bool failure = false;
664 	int entries_to_alloc;
665 
666 	/* ZC patch is enabled only when XDP program is set,
667 	 * so here it can not be NULL
668 	 */
669 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
670 	xdp_ring = rx_ring->xdp_ring;
671 
672 	while (likely(total_rx_packets < (unsigned int)budget)) {
673 		union ice_32b_rx_flex_desc *rx_desc;
674 		unsigned int size, xdp_res = 0;
675 		struct xdp_buff *xdp;
676 		struct sk_buff *skb;
677 		u16 stat_err_bits;
678 		u16 vlan_tag = 0;
679 		u16 rx_ptype;
680 
681 		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
682 
683 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
684 		if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
685 			break;
686 
687 		/* This memory barrier is needed to keep us from reading
688 		 * any other fields out of the rx_desc until we have
689 		 * verified the descriptor has been written back.
690 		 */
691 		dma_rmb();
692 
693 		if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
694 			break;
695 
696 		xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
697 
698 		size = le16_to_cpu(rx_desc->wb.pkt_len) &
699 				   ICE_RX_FLX_DESC_PKT_LEN_M;
700 		if (!size) {
701 			xdp->data = NULL;
702 			xdp->data_end = NULL;
703 			xdp->data_hard_start = NULL;
704 			xdp->data_meta = NULL;
705 			goto construct_skb;
706 		}
707 
708 		xsk_buff_set_size(xdp, size);
709 		xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
710 
711 		xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
712 		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
713 			xdp_xmit |= xdp_res;
714 		} else if (xdp_res == ICE_XDP_EXIT) {
715 			failure = true;
716 			break;
717 		} else if (xdp_res == ICE_XDP_CONSUMED) {
718 			xsk_buff_free(xdp);
719 		} else if (xdp_res == ICE_XDP_PASS) {
720 			goto construct_skb;
721 		}
722 
723 		total_rx_bytes += size;
724 		total_rx_packets++;
725 
726 		ice_bump_ntc(rx_ring);
727 		continue;
728 
729 construct_skb:
730 		/* XDP_PASS path */
731 		skb = ice_construct_skb_zc(rx_ring, xdp);
732 		if (!skb) {
733 			rx_ring->rx_stats.alloc_buf_failed++;
734 			break;
735 		}
736 
737 		ice_bump_ntc(rx_ring);
738 
739 		if (eth_skb_pad(skb)) {
740 			skb = NULL;
741 			continue;
742 		}
743 
744 		total_rx_bytes += skb->len;
745 		total_rx_packets++;
746 
747 		vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
748 
749 		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
750 				       ICE_RX_FLEX_DESC_PTYPE_M;
751 
752 		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
753 		ice_receive_skb(rx_ring, skb, vlan_tag);
754 	}
755 
756 	entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
757 	if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
758 		failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
759 
760 	ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
761 	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
762 
763 	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
764 		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
765 			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
766 		else
767 			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
768 
769 		return (int)total_rx_packets;
770 	}
771 
772 	return failure ? budget : (int)total_rx_packets;
773 }
774 
775 /**
776  * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
777  * @xdp_ring: XDP Tx ring
778  * @tx_buf: Tx buffer to clean
779  */
780 static void
781 ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
782 {
783 	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
784 	xdp_ring->xdp_tx_active--;
785 	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
786 			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
787 	dma_unmap_len_set(tx_buf, len, 0);
788 }
789 
790 /**
791  * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
792  * @xdp_ring: XDP ring to clean
793  * @napi_budget: amount of descriptors that NAPI allows us to clean
794  *
795  * Returns count of cleaned descriptors
796  */
797 static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
798 {
799 	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
800 	int budget = napi_budget / tx_thresh;
801 	u16 next_dd = xdp_ring->next_dd;
802 	u16 ntc, cleared_dds = 0;
803 
804 	do {
805 		struct ice_tx_desc *next_dd_desc;
806 		u16 desc_cnt = xdp_ring->count;
807 		struct ice_tx_buf *tx_buf;
808 		u32 xsk_frames;
809 		u16 i;
810 
811 		next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
812 		if (!(next_dd_desc->cmd_type_offset_bsz &
813 		    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
814 			break;
815 
816 		cleared_dds++;
817 		xsk_frames = 0;
818 		if (likely(!xdp_ring->xdp_tx_active)) {
819 			xsk_frames = tx_thresh;
820 			goto skip;
821 		}
822 
823 		ntc = xdp_ring->next_to_clean;
824 
825 		for (i = 0; i < tx_thresh; i++) {
826 			tx_buf = &xdp_ring->tx_buf[ntc];
827 
828 			if (tx_buf->raw_buf) {
829 				ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
830 				tx_buf->raw_buf = NULL;
831 			} else {
832 				xsk_frames++;
833 			}
834 
835 			ntc++;
836 			if (ntc >= xdp_ring->count)
837 				ntc = 0;
838 		}
839 skip:
840 		xdp_ring->next_to_clean += tx_thresh;
841 		if (xdp_ring->next_to_clean >= desc_cnt)
842 			xdp_ring->next_to_clean -= desc_cnt;
843 		if (xsk_frames)
844 			xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
845 		next_dd_desc->cmd_type_offset_bsz = 0;
846 		next_dd = next_dd + tx_thresh;
847 		if (next_dd >= desc_cnt)
848 			next_dd = tx_thresh - 1;
849 	} while (--budget);
850 
851 	xdp_ring->next_dd = next_dd;
852 
853 	return cleared_dds * tx_thresh;
854 }
855 
856 /**
857  * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
858  * @xdp_ring: XDP ring to produce the HW Tx descriptor on
859  * @desc: AF_XDP descriptor to pull the DMA address and length from
860  * @total_bytes: bytes accumulator that will be used for stats update
861  */
862 static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
863 			 unsigned int *total_bytes)
864 {
865 	struct ice_tx_desc *tx_desc;
866 	dma_addr_t dma;
867 
868 	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
869 	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
870 
871 	tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
872 	tx_desc->buf_addr = cpu_to_le64(dma);
873 	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
874 						      0, desc->len, 0);
875 
876 	*total_bytes += desc->len;
877 }
878 
879 /**
880  * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
881  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
882  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
883  * @total_bytes: bytes accumulator that will be used for stats update
884  */
885 static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
886 			       unsigned int *total_bytes)
887 {
888 	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
889 	u16 ntu = xdp_ring->next_to_use;
890 	struct ice_tx_desc *tx_desc;
891 	u32 i;
892 
893 	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
894 		dma_addr_t dma;
895 
896 		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
897 		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
898 
899 		tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
900 		tx_desc->buf_addr = cpu_to_le64(dma);
901 		tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
902 							      0, descs[i].len, 0);
903 
904 		*total_bytes += descs[i].len;
905 	}
906 
907 	xdp_ring->next_to_use = ntu;
908 
909 	if (xdp_ring->next_to_use > xdp_ring->next_rs) {
910 		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
911 		tx_desc->cmd_type_offset_bsz |=
912 			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
913 		xdp_ring->next_rs += tx_thresh;
914 	}
915 }
916 
917 /**
918  * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
919  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
920  * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
921  * @nb_pkts: count of packets to be send
922  * @total_bytes: bytes accumulator that will be used for stats update
923  */
924 static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
925 				u32 nb_pkts, unsigned int *total_bytes)
926 {
927 	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
928 	u32 batched, leftover, i;
929 
930 	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
931 	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
932 	for (i = 0; i < batched; i += PKTS_PER_BATCH)
933 		ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
934 	for (; i < batched + leftover; i++)
935 		ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
936 
937 	if (xdp_ring->next_to_use > xdp_ring->next_rs) {
938 		struct ice_tx_desc *tx_desc;
939 
940 		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
941 		tx_desc->cmd_type_offset_bsz |=
942 			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
943 		xdp_ring->next_rs += tx_thresh;
944 	}
945 }
946 
947 /**
948  * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
949  * @xdp_ring: XDP ring to produce the HW Tx descriptors on
950  * @budget: number of free descriptors on HW Tx ring that can be used
951  * @napi_budget: amount of descriptors that NAPI allows us to clean
952  *
953  * Returns true if there is no more work that needs to be done, false otherwise
954  */
955 bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
956 {
957 	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
958 	u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
959 	u32 nb_pkts, nb_processed = 0;
960 	unsigned int total_bytes = 0;
961 
962 	if (budget < tx_thresh)
963 		budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
964 
965 	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
966 	if (!nb_pkts)
967 		return true;
968 
969 	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
970 		struct ice_tx_desc *tx_desc;
971 
972 		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
973 		ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
974 		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
975 		tx_desc->cmd_type_offset_bsz |=
976 			cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
977 		xdp_ring->next_rs = tx_thresh - 1;
978 		xdp_ring->next_to_use = 0;
979 	}
980 
981 	ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
982 			    &total_bytes);
983 
984 	ice_xdp_ring_update_tail(xdp_ring);
985 	ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
986 
987 	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
988 		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
989 
990 	return nb_pkts < budget;
991 }
992 
993 /**
994  * ice_xsk_wakeup - Implements ndo_xsk_wakeup
995  * @netdev: net_device
996  * @queue_id: queue to wake up
997  * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
998  *
999  * Returns negative on error, zero otherwise.
1000  */
1001 int
1002 ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
1003 	       u32 __always_unused flags)
1004 {
1005 	struct ice_netdev_priv *np = netdev_priv(netdev);
1006 	struct ice_q_vector *q_vector;
1007 	struct ice_vsi *vsi = np->vsi;
1008 	struct ice_tx_ring *ring;
1009 
1010 	if (test_bit(ICE_VSI_DOWN, vsi->state))
1011 		return -ENETDOWN;
1012 
1013 	if (!ice_is_xdp_ena_vsi(vsi))
1014 		return -EINVAL;
1015 
1016 	if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
1017 		return -EINVAL;
1018 
1019 	ring = vsi->rx_rings[queue_id]->xdp_ring;
1020 
1021 	if (!ring->xsk_pool)
1022 		return -EINVAL;
1023 
1024 	/* The idea here is that if NAPI is running, mark a miss, so
1025 	 * it will run again. If not, trigger an interrupt and
1026 	 * schedule the NAPI from interrupt context. If NAPI would be
1027 	 * scheduled here, the interrupt affinity would not be
1028 	 * honored.
1029 	 */
1030 	q_vector = ring->q_vector;
1031 	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
1032 		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
1033 
1034 	return 0;
1035 }
1036 
1037 /**
1038  * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
1039  * @vsi: VSI to be checked
1040  *
1041  * Returns true if any of the Rx rings has an AF_XDP buff pool attached
1042  */
1043 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
1044 {
1045 	int i;
1046 
1047 	ice_for_each_rxq(vsi, i) {
1048 		if (xsk_get_pool_from_qid(vsi->netdev, i))
1049 			return true;
1050 	}
1051 
1052 	return false;
1053 }
1054 
1055 /**
1056  * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
1057  * @rx_ring: ring to be cleaned
1058  */
1059 void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
1060 {
1061 	u16 count_mask = rx_ring->count - 1;
1062 	u16 ntc = rx_ring->next_to_clean;
1063 	u16 ntu = rx_ring->next_to_use;
1064 
1065 	for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
1066 		struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
1067 
1068 		xsk_buff_free(xdp);
1069 	}
1070 }
1071 
1072 /**
1073  * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
1074  * @xdp_ring: XDP_Tx ring
1075  */
1076 void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
1077 {
1078 	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
1079 	u32 xsk_frames = 0;
1080 
1081 	while (ntc != ntu) {
1082 		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
1083 
1084 		if (tx_buf->raw_buf)
1085 			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
1086 		else
1087 			xsk_frames++;
1088 
1089 		tx_buf->raw_buf = NULL;
1090 
1091 		ntc++;
1092 		if (ntc >= xdp_ring->count)
1093 			ntc = 0;
1094 	}
1095 
1096 	if (xsk_frames)
1097 		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
1098 }
1099