xref: /openbmc/linux/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c (revision 3e7759b94a0fcfdd6771caa64a37dda7ce825874)
1  /*
2   * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
3   *
4   * This software is available to you under a choice of one of two
5   * licenses.  You may choose to be licensed under the terms of the GNU
6   * General Public License (GPL) Version 2, available from the file
7   * COPYING in the main directory of this source tree, or the
8   * OpenIB.org BSD license below:
9   *
10   *     Redistribution and use in source and binary forms, with or
11   *     without modification, are permitted provided that the following
12   *     conditions are met:
13   *
14   *      - Redistributions of source code must retain the above
15   *        copyright notice, this list of conditions and the following
16   *        disclaimer.
17   *
18   *      - Redistributions in binary form must reproduce the above
19   *        copyright notice, this list of conditions and the following
20   *        disclaimer in the documentation and/or other materials
21   *        provided with the distribution.
22   *
23   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30   * SOFTWARE.
31   */
32  
33  #include <linux/bpf_trace.h>
34  #include <net/xdp_sock_drv.h>
35  #include "en/xdp.h"
36  #include "en/params.h"
37  #include <linux/bitfield.h>
38  #include <net/page_pool/helpers.h>
39  
mlx5e_xdp_max_mtu(struct mlx5e_params * params,struct mlx5e_xsk_param * xsk)40  int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
41  {
42  	int hr = mlx5e_get_linear_rq_headroom(params, xsk);
43  
44  	/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
45  	 * The condition checked in mlx5e_rx_is_linear_skb is:
46  	 *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
47  	 *   (Note that hw_mtu == sw_mtu + hard_mtu.)
48  	 * What is returned from this function is:
49  	 *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
50  	 * After assigning sw_mtu := max_mtu, the left side of (1) turns to
51  	 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
52  	 * because both PAGE_SIZE and S are already aligned. Any number greater
53  	 * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
54  	 * so max_mtu is the maximum MTU allowed.
55  	 */
56  
57  	return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
58  }
59  
60  static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq * sq,struct mlx5e_rq * rq,struct xdp_buff * xdp)61  mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
62  		    struct xdp_buff *xdp)
63  {
64  	struct page *page = virt_to_page(xdp->data);
65  	struct mlx5e_xmit_data_frags xdptxdf = {};
66  	struct mlx5e_xmit_data *xdptxd;
67  	struct xdp_frame *xdpf;
68  	dma_addr_t dma_addr;
69  	int i;
70  
71  	xdpf = xdp_convert_buff_to_frame(xdp);
72  	if (unlikely(!xdpf))
73  		return false;
74  
75  	xdptxd = &xdptxdf.xd;
76  	xdptxd->data = xdpf->data;
77  	xdptxd->len  = xdpf->len;
78  	xdptxd->has_frags = xdp_frame_has_frags(xdpf);
79  
80  	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
81  		/* The xdp_buff was in the UMEM and was copied into a newly
82  		 * allocated page. The UMEM page was returned via the ZCA, and
83  		 * this new page has to be mapped at this point and has to be
84  		 * unmapped and returned via xdp_return_frame on completion.
85  		 */
86  
87  		/* Prevent double recycling of the UMEM page. Even in case this
88  		 * function returns false, the xdp_buff shouldn't be recycled,
89  		 * as it was already done in xdp_convert_zc_to_xdp_frame.
90  		 */
91  		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
92  
93  		if (unlikely(xdptxd->has_frags))
94  			return false;
95  
96  		dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len,
97  					  DMA_TO_DEVICE);
98  		if (dma_mapping_error(sq->pdev, dma_addr)) {
99  			xdp_return_frame(xdpf);
100  			return false;
101  		}
102  
103  		xdptxd->dma_addr = dma_addr;
104  
105  		if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
106  					      mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
107  			return false;
108  
109  		/* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
110  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
111  				     (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
112  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
113  				     (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
114  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
115  				     (union mlx5e_xdp_info) { .frame.dma_addr = dma_addr });
116  		return true;
117  	}
118  
119  	/* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
120  	 * that points to the same memory region as the original xdp_buff. It
121  	 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
122  	 * mode.
123  	 */
124  
125  	dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
126  	dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL);
127  
128  	if (xdptxd->has_frags) {
129  		xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
130  		xdptxdf.dma_arr = NULL;
131  
132  		for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
133  			skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
134  			dma_addr_t addr;
135  			u32 len;
136  
137  			addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
138  				skb_frag_off(frag);
139  			len = skb_frag_size(frag);
140  			dma_sync_single_for_device(sq->pdev, addr, len,
141  						   DMA_BIDIRECTIONAL);
142  		}
143  	}
144  
145  	xdptxd->dma_addr = dma_addr;
146  
147  	if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
148  				      mlx5e_xmit_xdp_frame, sq, xdptxd, 0)))
149  		return false;
150  
151  	/* xmit_mode == MLX5E_XDP_XMIT_MODE_PAGE */
152  	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
153  			     (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_PAGE });
154  
155  	if (xdptxd->has_frags) {
156  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
157  				     (union mlx5e_xdp_info)
158  				     { .page.num = 1 + xdptxdf.sinfo->nr_frags });
159  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
160  				     (union mlx5e_xdp_info) { .page.page = page });
161  		for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) {
162  			skb_frag_t *frag = &xdptxdf.sinfo->frags[i];
163  
164  			mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
165  					     (union mlx5e_xdp_info)
166  					     { .page.page = skb_frag_page(frag) });
167  		}
168  	} else {
169  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
170  				     (union mlx5e_xdp_info) { .page.num = 1 });
171  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
172  				     (union mlx5e_xdp_info) { .page.page = page });
173  	}
174  
175  	return true;
176  }
177  
mlx5e_xdp_rx_timestamp(const struct xdp_md * ctx,u64 * timestamp)178  static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
179  {
180  	const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
181  
182  	if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp)))
183  		return -ENODATA;
184  
185  	*timestamp =  mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time,
186  					 _ctx->rq->clock, get_cqe_ts(_ctx->cqe));
187  	return 0;
188  }
189  
190  /* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
191  #define RSS_TYPE_MAX_TABLE	16 /* 4-bits max 16 entries */
192  #define RSS_L4		GENMASK(1, 0)
193  #define RSS_L3		GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */
194  
195  /* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
196  enum mlx5_rss_hash_type {
197  	RSS_TYPE_NO_HASH	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) |
198  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
199  	RSS_TYPE_L3_IPV4	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
200  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
201  	RSS_TYPE_L4_IPV4_TCP	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
202  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
203  	RSS_TYPE_L4_IPV4_UDP	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
204  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
205  	RSS_TYPE_L4_IPV4_IPSEC	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
206  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
207  	RSS_TYPE_L3_IPV6	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
208  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
209  	RSS_TYPE_L4_IPV6_TCP	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
210  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
211  	RSS_TYPE_L4_IPV6_UDP	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
212  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
213  	RSS_TYPE_L4_IPV6_IPSEC	= (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
214  				   FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
215  };
216  
217  /* Invalid combinations will simply return zero, allows no boundary checks */
218  static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = {
219  	[RSS_TYPE_NO_HASH]	 = XDP_RSS_TYPE_NONE,
220  	[1]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
221  	[2]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
222  	[3]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
223  	[RSS_TYPE_L3_IPV4]	 = XDP_RSS_TYPE_L3_IPV4,
224  	[RSS_TYPE_L4_IPV4_TCP]	 = XDP_RSS_TYPE_L4_IPV4_TCP,
225  	[RSS_TYPE_L4_IPV4_UDP]	 = XDP_RSS_TYPE_L4_IPV4_UDP,
226  	[RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC,
227  	[RSS_TYPE_L3_IPV6]	 = XDP_RSS_TYPE_L3_IPV6,
228  	[RSS_TYPE_L4_IPV6_TCP]	 = XDP_RSS_TYPE_L4_IPV6_TCP,
229  	[RSS_TYPE_L4_IPV6_UDP]   = XDP_RSS_TYPE_L4_IPV6_UDP,
230  	[RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC,
231  	[12]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
232  	[13]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
233  	[14]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
234  	[15]			 = XDP_RSS_TYPE_NONE, /* Implicit zero */
235  };
236  
mlx5e_xdp_rx_hash(const struct xdp_md * ctx,u32 * hash,enum xdp_rss_hash_type * rss_type)237  static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
238  			     enum xdp_rss_hash_type *rss_type)
239  {
240  	const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
241  	const struct mlx5_cqe64 *cqe = _ctx->cqe;
242  	u32 hash_type, l4_type, ip_type, lookup;
243  
244  	if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
245  		return -ENODATA;
246  
247  	*hash = be32_to_cpu(cqe->rss_hash_result);
248  
249  	hash_type = cqe->rss_hash_type;
250  	BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */
251  	ip_type = hash_type & CQE_RSS_HTYPE_IP;
252  	l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type);
253  	lookup = ip_type | l4_type;
254  	*rss_type = mlx5_xdp_rss_type[lookup];
255  
256  	return 0;
257  }
258  
259  const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
260  	.xmo_rx_timestamp		= mlx5e_xdp_rx_timestamp,
261  	.xmo_rx_hash			= mlx5e_xdp_rx_hash,
262  };
263  
264  /* returns true if packet was consumed by xdp */
mlx5e_xdp_handle(struct mlx5e_rq * rq,struct bpf_prog * prog,struct mlx5e_xdp_buff * mxbuf)265  bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
266  		      struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf)
267  {
268  	struct xdp_buff *xdp = &mxbuf->xdp;
269  	u32 act;
270  	int err;
271  
272  	act = bpf_prog_run_xdp(prog, xdp);
273  	switch (act) {
274  	case XDP_PASS:
275  		return false;
276  	case XDP_TX:
277  		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, xdp)))
278  			goto xdp_abort;
279  		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
280  		return true;
281  	case XDP_REDIRECT:
282  		/* When XDP enabled then page-refcnt==1 here */
283  		err = xdp_do_redirect(rq->netdev, xdp, prog);
284  		if (unlikely(err))
285  			goto xdp_abort;
286  		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
287  		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
288  		rq->stats->xdp_redirect++;
289  		return true;
290  	default:
291  		bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
292  		fallthrough;
293  	case XDP_ABORTED:
294  xdp_abort:
295  		trace_xdp_exception(rq->netdev, prog, act);
296  		fallthrough;
297  	case XDP_DROP:
298  		rq->stats->xdp_drop++;
299  		return true;
300  	}
301  }
302  
mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq * sq,u16 size)303  static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
304  {
305  	struct mlx5_wq_cyc *wq = &sq->wq;
306  	u16 pi, contig_wqebbs;
307  
308  	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
309  	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
310  	if (unlikely(contig_wqebbs < size)) {
311  		struct mlx5e_xdp_wqe_info *wi, *edge_wi;
312  
313  		wi = &sq->db.wqe_info[pi];
314  		edge_wi = wi + contig_wqebbs;
315  
316  		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
317  		for (; wi < edge_wi; wi++) {
318  			*wi = (struct mlx5e_xdp_wqe_info) {
319  				.num_wqebbs = 1,
320  				.num_pkts = 0,
321  			};
322  			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
323  		}
324  		sq->stats->nops += contig_wqebbs;
325  
326  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
327  	}
328  
329  	return pi;
330  }
331  
mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq * sq)332  static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
333  {
334  	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
335  	struct mlx5e_xdpsq_stats *stats = sq->stats;
336  	struct mlx5e_tx_wqe *wqe;
337  	u16 pi;
338  
339  	pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
340  	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
341  	net_prefetchw(wqe->data);
342  
343  	*session = (struct mlx5e_tx_mpwqe) {
344  		.wqe = wqe,
345  		.bytes_count = 0,
346  		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
347  		.pkt_count = 0,
348  		.inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
349  	};
350  
351  	stats->mpwqe++;
352  }
353  
mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq * sq)354  void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
355  {
356  	struct mlx5_wq_cyc       *wq    = &sq->wq;
357  	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
358  	struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
359  	u16 ds_count = session->ds_count;
360  	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
361  	struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
362  
363  	cseg->opmod_idx_opcode =
364  		cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
365  	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
366  
367  	wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
368  	wi->num_pkts   = session->pkt_count;
369  
370  	sq->pc += wi->num_wqebbs;
371  
372  	sq->doorbell_cseg = cseg;
373  
374  	session->wqe = NULL; /* Close session */
375  }
376  
377  enum {
378  	MLX5E_XDP_CHECK_OK = 1,
379  	MLX5E_XDP_CHECK_START_MPWQE = 2,
380  };
381  
mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq * sq)382  INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
383  {
384  	if (unlikely(!sq->mpwqe.wqe)) {
385  		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
386  						     sq->stop_room))) {
387  			/* SQ is full, ring doorbell */
388  			mlx5e_xmit_xdp_doorbell(sq);
389  			sq->stats->full++;
390  			return -EBUSY;
391  		}
392  
393  		return MLX5E_XDP_CHECK_START_MPWQE;
394  	}
395  
396  	return MLX5E_XDP_CHECK_OK;
397  }
398  
399  INDIRECT_CALLABLE_SCOPE bool
400  mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
401  		     int check_result);
402  
403  INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq * sq,struct mlx5e_xmit_data * xdptxd,int check_result)404  mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
405  			   int check_result)
406  {
407  	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
408  	struct mlx5e_xdpsq_stats *stats = sq->stats;
409  	struct mlx5e_xmit_data *p = xdptxd;
410  	struct mlx5e_xmit_data tmp;
411  
412  	if (xdptxd->has_frags) {
413  		struct mlx5e_xmit_data_frags *xdptxdf =
414  			container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
415  
416  		if (!!xdptxd->len + xdptxdf->sinfo->nr_frags > 1) {
417  			/* MPWQE is enabled, but a multi-buffer packet is queued for
418  			 * transmission. MPWQE can't send fragmented packets, so close
419  			 * the current session and fall back to a regular WQE.
420  			 */
421  			if (unlikely(sq->mpwqe.wqe))
422  				mlx5e_xdp_mpwqe_complete(sq);
423  			return mlx5e_xmit_xdp_frame(sq, xdptxd, 0);
424  		}
425  		if (!xdptxd->len) {
426  			skb_frag_t *frag = &xdptxdf->sinfo->frags[0];
427  
428  			tmp.data = skb_frag_address(frag);
429  			tmp.len = skb_frag_size(frag);
430  			tmp.dma_addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[0] :
431  				page_pool_get_dma_addr(skb_frag_page(frag)) +
432  				skb_frag_off(frag);
433  			p = &tmp;
434  		}
435  	}
436  
437  	if (unlikely(p->len > sq->hw_mtu)) {
438  		stats->err++;
439  		return false;
440  	}
441  
442  	if (!check_result)
443  		check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
444  	if (unlikely(check_result < 0))
445  		return false;
446  
447  	if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
448  		/* Start the session when nothing can fail, so it's guaranteed
449  		 * that if there is an active session, it has at least one dseg,
450  		 * and it's safe to complete it at any time.
451  		 */
452  		mlx5e_xdp_mpwqe_session_start(sq);
453  	}
454  
455  	mlx5e_xdp_mpwqe_add_dseg(sq, p, stats);
456  
457  	if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
458  		mlx5e_xdp_mpwqe_complete(sq);
459  
460  	stats->xmit++;
461  	return true;
462  }
463  
mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq * sq,int stop_room)464  static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
465  {
466  	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
467  		/* SQ is full, ring doorbell */
468  		mlx5e_xmit_xdp_doorbell(sq);
469  		sq->stats->full++;
470  		return -EBUSY;
471  	}
472  
473  	return MLX5E_XDP_CHECK_OK;
474  }
475  
mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq * sq)476  INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
477  {
478  	return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
479  }
480  
481  INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq * sq,struct mlx5e_xmit_data * xdptxd,int check_result)482  mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
483  		     int check_result)
484  {
485  	struct mlx5e_xmit_data_frags *xdptxdf =
486  		container_of(xdptxd, struct mlx5e_xmit_data_frags, xd);
487  	struct mlx5_wq_cyc       *wq   = &sq->wq;
488  	struct mlx5_wqe_ctrl_seg *cseg;
489  	struct mlx5_wqe_data_seg *dseg;
490  	struct mlx5_wqe_eth_seg *eseg;
491  	struct mlx5e_tx_wqe *wqe;
492  
493  	dma_addr_t dma_addr = xdptxd->dma_addr;
494  	u32 dma_len = xdptxd->len;
495  	u16 ds_cnt, inline_hdr_sz;
496  	unsigned int frags_size;
497  	u8 num_wqebbs = 1;
498  	int num_frags = 0;
499  	bool inline_ok;
500  	bool linear;
501  	u16 pi;
502  
503  	struct mlx5e_xdpsq_stats *stats = sq->stats;
504  
505  	inline_ok = sq->min_inline_mode == MLX5_INLINE_MODE_NONE ||
506  		dma_len >= MLX5E_XDP_MIN_INLINE;
507  	frags_size = xdptxd->has_frags ? xdptxdf->sinfo->xdp_frags_size : 0;
508  
509  	if (unlikely(!inline_ok || sq->hw_mtu < dma_len + frags_size)) {
510  		stats->err++;
511  		return false;
512  	}
513  
514  	inline_hdr_sz = 0;
515  	if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
516  		inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
517  
518  	linear = !!(dma_len - inline_hdr_sz);
519  	ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + linear + !!inline_hdr_sz;
520  
521  	/* check_result must be 0 if sinfo is passed. */
522  	if (!check_result) {
523  		int stop_room = 1;
524  
525  		if (xdptxd->has_frags) {
526  			ds_cnt += xdptxdf->sinfo->nr_frags;
527  			num_frags = xdptxdf->sinfo->nr_frags;
528  			num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
529  			/* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
530  			 * enough to hold all fragments.
531  			 */
532  			stop_room = MLX5E_STOP_ROOM(num_wqebbs);
533  		}
534  
535  		check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
536  	}
537  	if (unlikely(check_result < 0))
538  		return false;
539  
540  	pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
541  	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
542  	net_prefetchw(wqe);
543  
544  	cseg = &wqe->ctrl;
545  	eseg = &wqe->eth;
546  	dseg = wqe->data;
547  
548  	/* copy the inline part if required */
549  	if (inline_hdr_sz) {
550  		memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
551  		memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
552  		       inline_hdr_sz - sizeof(eseg->inline_hdr.start));
553  		dma_len  -= inline_hdr_sz;
554  		dma_addr += inline_hdr_sz;
555  		dseg++;
556  	}
557  
558  	/* write the dma part */
559  	if (linear) {
560  		dseg->addr       = cpu_to_be64(dma_addr);
561  		dseg->byte_count = cpu_to_be32(dma_len);
562  		dseg->lkey       = sq->mkey_be;
563  		dseg++;
564  	}
565  
566  	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
567  
568  	if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
569  		int i;
570  
571  		memset(&cseg->trailer, 0, sizeof(cseg->trailer));
572  		memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
573  
574  		eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
575  
576  		for (i = 0; i < num_frags; i++) {
577  			skb_frag_t *frag = &xdptxdf->sinfo->frags[i];
578  			dma_addr_t addr;
579  
580  			addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] :
581  				page_pool_get_dma_addr(skb_frag_page(frag)) +
582  				skb_frag_off(frag);
583  
584  			dseg->addr = cpu_to_be64(addr);
585  			dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
586  			dseg->lkey = sq->mkey_be;
587  			dseg++;
588  		}
589  
590  		cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
591  
592  		sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
593  			.num_wqebbs = num_wqebbs,
594  			.num_pkts = 1,
595  		};
596  
597  		sq->pc += num_wqebbs;
598  	} else {
599  		cseg->fm_ce_se = 0;
600  
601  		sq->pc++;
602  	}
603  
604  	sq->doorbell_cseg = cseg;
605  
606  	stats->xmit++;
607  	return true;
608  }
609  
mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq * sq,struct mlx5e_xdp_wqe_info * wi,u32 * xsk_frames,struct xdp_frame_bulk * bq)610  static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
611  				  struct mlx5e_xdp_wqe_info *wi,
612  				  u32 *xsk_frames,
613  				  struct xdp_frame_bulk *bq)
614  {
615  	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
616  	u16 i;
617  
618  	for (i = 0; i < wi->num_pkts; i++) {
619  		union mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
620  
621  		switch (xdpi.mode) {
622  		case MLX5E_XDP_XMIT_MODE_FRAME: {
623  			/* XDP_TX from the XSK RQ and XDP_REDIRECT */
624  			struct xdp_frame *xdpf;
625  			dma_addr_t dma_addr;
626  
627  			xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
628  			xdpf = xdpi.frame.xdpf;
629  			xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
630  			dma_addr = xdpi.frame.dma_addr;
631  
632  			dma_unmap_single(sq->pdev, dma_addr,
633  					 xdpf->len, DMA_TO_DEVICE);
634  			if (xdp_frame_has_frags(xdpf)) {
635  				struct skb_shared_info *sinfo;
636  				int j;
637  
638  				sinfo = xdp_get_shared_info_from_frame(xdpf);
639  				for (j = 0; j < sinfo->nr_frags; j++) {
640  					skb_frag_t *frag = &sinfo->frags[j];
641  
642  					xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
643  					dma_addr = xdpi.frame.dma_addr;
644  
645  					dma_unmap_single(sq->pdev, dma_addr,
646  							 skb_frag_size(frag), DMA_TO_DEVICE);
647  				}
648  			}
649  			xdp_return_frame_bulk(xdpf, bq);
650  			break;
651  		}
652  		case MLX5E_XDP_XMIT_MODE_PAGE: {
653  			/* XDP_TX from the regular RQ */
654  			u8 num, n = 0;
655  
656  			xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
657  			num = xdpi.page.num;
658  
659  			do {
660  				struct page *page;
661  
662  				xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
663  				page = xdpi.page.page;
664  
665  				/* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
666  				 * as we know this is a page_pool page.
667  				 */
668  				page_pool_recycle_direct(page->pp, page);
669  			} while (++n < num);
670  
671  			break;
672  		}
673  		case MLX5E_XDP_XMIT_MODE_XSK:
674  			/* AF_XDP send */
675  			(*xsk_frames)++;
676  			break;
677  		default:
678  			WARN_ON_ONCE(true);
679  		}
680  	}
681  }
682  
mlx5e_poll_xdpsq_cq(struct mlx5e_cq * cq)683  bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
684  {
685  	struct xdp_frame_bulk bq;
686  	struct mlx5e_xdpsq *sq;
687  	struct mlx5_cqe64 *cqe;
688  	u32 xsk_frames = 0;
689  	u16 sqcc;
690  	int i;
691  
692  	xdp_frame_bulk_init(&bq);
693  
694  	sq = container_of(cq, struct mlx5e_xdpsq, cq);
695  
696  	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
697  		return false;
698  
699  	cqe = mlx5_cqwq_get_cqe(&cq->wq);
700  	if (!cqe)
701  		return false;
702  
703  	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
704  	 * otherwise a cq overrun may occur
705  	 */
706  	sqcc = sq->cc;
707  
708  	i = 0;
709  	do {
710  		struct mlx5e_xdp_wqe_info *wi;
711  		u16 wqe_counter, ci;
712  		bool last_wqe;
713  
714  		mlx5_cqwq_pop(&cq->wq);
715  
716  		wqe_counter = be16_to_cpu(cqe->wqe_counter);
717  
718  		do {
719  			last_wqe = (sqcc == wqe_counter);
720  			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
721  			wi = &sq->db.wqe_info[ci];
722  
723  			sqcc += wi->num_wqebbs;
724  
725  			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
726  		} while (!last_wqe);
727  
728  		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
729  			netdev_WARN_ONCE(sq->channel->netdev,
730  					 "Bad OP in XDPSQ CQE: 0x%x\n",
731  					 get_cqe_opcode(cqe));
732  			mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
733  					     (struct mlx5_err_cqe *)cqe);
734  			mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
735  		}
736  	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
737  
738  	xdp_flush_frame_bulk(&bq);
739  
740  	if (xsk_frames)
741  		xsk_tx_completed(sq->xsk_pool, xsk_frames);
742  
743  	sq->stats->cqes += i;
744  
745  	mlx5_cqwq_update_db_record(&cq->wq);
746  
747  	/* ensure cq space is freed before enabling more cqes */
748  	wmb();
749  
750  	sq->cc = sqcc;
751  	return (i == MLX5E_TX_CQ_POLL_BUDGET);
752  }
753  
mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq * sq)754  void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
755  {
756  	struct xdp_frame_bulk bq;
757  	u32 xsk_frames = 0;
758  
759  	xdp_frame_bulk_init(&bq);
760  
761  	rcu_read_lock(); /* need for xdp_return_frame_bulk */
762  
763  	while (sq->cc != sq->pc) {
764  		struct mlx5e_xdp_wqe_info *wi;
765  		u16 ci;
766  
767  		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
768  		wi = &sq->db.wqe_info[ci];
769  
770  		sq->cc += wi->num_wqebbs;
771  
772  		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
773  	}
774  
775  	xdp_flush_frame_bulk(&bq);
776  	rcu_read_unlock();
777  
778  	if (xsk_frames)
779  		xsk_tx_completed(sq->xsk_pool, xsk_frames);
780  }
781  
mlx5e_xdp_xmit(struct net_device * dev,int n,struct xdp_frame ** frames,u32 flags)782  int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
783  		   u32 flags)
784  {
785  	struct mlx5e_priv *priv = netdev_priv(dev);
786  	struct mlx5e_xdpsq *sq;
787  	int nxmit = 0;
788  	int sq_num;
789  	int i;
790  
791  	/* this flag is sufficient, no need to test internal sq state */
792  	if (unlikely(!mlx5e_xdp_tx_is_enabled(priv)))
793  		return -ENETDOWN;
794  
795  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
796  		return -EINVAL;
797  
798  	sq_num = smp_processor_id();
799  
800  	if (unlikely(sq_num >= priv->channels.num))
801  		return -ENXIO;
802  
803  	sq = &priv->channels.c[sq_num]->xdpsq;
804  
805  	for (i = 0; i < n; i++) {
806  		struct mlx5e_xmit_data_frags xdptxdf = {};
807  		struct xdp_frame *xdpf = frames[i];
808  		dma_addr_t dma_arr[MAX_SKB_FRAGS];
809  		struct mlx5e_xmit_data *xdptxd;
810  		bool ret;
811  
812  		xdptxd = &xdptxdf.xd;
813  		xdptxd->data = xdpf->data;
814  		xdptxd->len = xdpf->len;
815  		xdptxd->has_frags = xdp_frame_has_frags(xdpf);
816  		xdptxd->dma_addr = dma_map_single(sq->pdev, xdptxd->data,
817  						  xdptxd->len, DMA_TO_DEVICE);
818  
819  		if (unlikely(dma_mapping_error(sq->pdev, xdptxd->dma_addr)))
820  			break;
821  
822  		if (xdptxd->has_frags) {
823  			int j;
824  
825  			xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf);
826  			xdptxdf.dma_arr = dma_arr;
827  			for (j = 0; j < xdptxdf.sinfo->nr_frags; j++) {
828  				skb_frag_t *frag = &xdptxdf.sinfo->frags[j];
829  
830  				dma_arr[j] = dma_map_single(sq->pdev, skb_frag_address(frag),
831  							    skb_frag_size(frag), DMA_TO_DEVICE);
832  
833  				if (!dma_mapping_error(sq->pdev, dma_arr[j]))
834  					continue;
835  				/* mapping error */
836  				while (--j >= 0)
837  					dma_unmap_single(sq->pdev, dma_arr[j],
838  							 skb_frag_size(&xdptxdf.sinfo->frags[j]),
839  							 DMA_TO_DEVICE);
840  				goto out;
841  			}
842  		}
843  
844  		ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
845  				      mlx5e_xmit_xdp_frame, sq, xdptxd, 0);
846  		if (unlikely(!ret)) {
847  			int j;
848  
849  			dma_unmap_single(sq->pdev, xdptxd->dma_addr,
850  					 xdptxd->len, DMA_TO_DEVICE);
851  			if (!xdptxd->has_frags)
852  				break;
853  			for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
854  				dma_unmap_single(sq->pdev, dma_arr[j],
855  						 skb_frag_size(&xdptxdf.sinfo->frags[j]),
856  						 DMA_TO_DEVICE);
857  			break;
858  		}
859  
860  		/* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */
861  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
862  				     (union mlx5e_xdp_info) { .mode = MLX5E_XDP_XMIT_MODE_FRAME });
863  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
864  				     (union mlx5e_xdp_info) { .frame.xdpf = xdpf });
865  		mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
866  				     (union mlx5e_xdp_info) { .frame.dma_addr = xdptxd->dma_addr });
867  		if (xdptxd->has_frags) {
868  			int j;
869  
870  			for (j = 0; j < xdptxdf.sinfo->nr_frags; j++)
871  				mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo,
872  						     (union mlx5e_xdp_info)
873  						     { .frame.dma_addr = dma_arr[j] });
874  		}
875  		nxmit++;
876  	}
877  
878  out:
879  	if (sq->mpwqe.wqe)
880  		mlx5e_xdp_mpwqe_complete(sq);
881  
882  	if (flags & XDP_XMIT_FLUSH)
883  		mlx5e_xmit_xdp_doorbell(sq);
884  
885  	return nxmit;
886  }
887  
mlx5e_xdp_rx_poll_complete(struct mlx5e_rq * rq)888  void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
889  {
890  	struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
891  
892  	if (xdpsq->mpwqe.wqe)
893  		mlx5e_xdp_mpwqe_complete(xdpsq);
894  
895  	mlx5e_xmit_xdp_doorbell(xdpsq);
896  
897  	if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
898  		xdp_do_flush_map();
899  		__clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
900  	}
901  }
902  
mlx5e_set_xmit_fp(struct mlx5e_xdpsq * sq,bool is_mpw)903  void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
904  {
905  	sq->xmit_xdp_frame_check = is_mpw ?
906  		mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
907  	sq->xmit_xdp_frame = is_mpw ?
908  		mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
909  }
910