1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include "rx.h"
5 #include "en/xdp.h"
6 #include <net/xdp_sock_drv.h>
7 #include <linux/filter.h>
8 
9 /* RX data path */
10 
xsk_buff_to_mxbuf(struct xdp_buff * xdp)11 static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp)
12 {
13 	/* mlx5e_xdp_buff shares its layout with xdp_buff_xsk
14 	 * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
15 	 */
16 	return (struct mlx5e_xdp_buff *)xdp;
17 }
18 
mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq * rq,u16 ix)19 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
20 {
21 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
22 	struct mlx5e_icosq *icosq = rq->icosq;
23 	struct mlx5_wq_cyc *wq = &icosq->wq;
24 	struct mlx5e_umr_wqe *umr_wqe;
25 	struct xdp_buff **xsk_buffs;
26 	int batch, i;
27 	u32 offset; /* 17-bit value with MTT. */
28 	u16 pi;
29 
30 	if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
31 		goto err;
32 
33 	XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
34 	xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs;
35 	batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs,
36 				     rq->mpwqe.pages_per_wqe);
37 
38 	/* If batch < pages_per_wqe, either:
39 	 * 1. Some (or all) descriptors were invalid.
40 	 * 2. dma_need_sync is true, and it fell back to allocating one frame.
41 	 * In either case, try to continue allocating frames one by one, until
42 	 * the first error, which will mean there are no more valid descriptors.
43 	 */
44 	for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
45 		xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool);
46 		if (unlikely(!xsk_buffs[batch]))
47 			goto err_reuse_batch;
48 	}
49 
50 	pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
51 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
52 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
53 
54 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
55 		for (i = 0; i < batch; i++) {
56 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
57 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
58 
59 			umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
60 				.ptag = cpu_to_be64(addr | MLX5_EN_WR),
61 			};
62 			mxbuf->rq = rq;
63 		}
64 	} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
65 		for (i = 0; i < batch; i++) {
66 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
67 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
68 
69 			umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
70 				.key = rq->mkey_be,
71 				.va = cpu_to_be64(addr),
72 			};
73 			mxbuf->rq = rq;
74 		}
75 	} else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
76 		u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
77 
78 		for (i = 0; i < batch; i++) {
79 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
80 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
81 
82 			umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
83 				.key = rq->mkey_be,
84 				.va = cpu_to_be64(addr),
85 			};
86 			umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
87 				.key = rq->mkey_be,
88 				.va = cpu_to_be64(addr + mapping_size),
89 			};
90 			umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
91 				.key = rq->mkey_be,
92 				.va = cpu_to_be64(addr + mapping_size * 2),
93 			};
94 			umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
95 				.key = rq->mkey_be,
96 				.va = cpu_to_be64(rq->wqe_overflow.addr),
97 			};
98 			mxbuf->rq = rq;
99 		}
100 	} else {
101 		__be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
102 					      rq->xsk_pool->chunk_size);
103 		__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
104 
105 		for (i = 0; i < batch; i++) {
106 			struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
107 			dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
108 
109 			umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
110 				.key = rq->mkey_be,
111 				.va = cpu_to_be64(addr),
112 				.bcount = frame_size,
113 			};
114 			umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
115 				.key = rq->mkey_be,
116 				.va = cpu_to_be64(rq->wqe_overflow.addr),
117 				.bcount = pad_size,
118 			};
119 			mxbuf->rq = rq;
120 		}
121 	}
122 
123 	bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
124 	wi->consumed_strides = 0;
125 
126 	umr_wqe->ctrl.opmod_idx_opcode =
127 		cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
128 
129 	/* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
130 	offset = ix * rq->mpwqe.mtts_per_wqe;
131 	if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
132 		offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
133 	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
134 		offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
135 	else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
136 		offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
137 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
138 
139 	icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
140 		.wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
141 		.num_wqebbs = rq->mpwqe.umr_wqebbs,
142 		.umr.rq = rq,
143 	};
144 
145 	icosq->pc += rq->mpwqe.umr_wqebbs;
146 
147 	icosq->doorbell_cseg = &umr_wqe->ctrl;
148 
149 	return 0;
150 
151 err_reuse_batch:
152 	while (--batch >= 0)
153 		xsk_buff_free(xsk_buffs[batch]);
154 
155 err:
156 	rq->stats->buff_alloc_err++;
157 	return -ENOMEM;
158 }
159 
mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)160 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
161 {
162 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
163 	struct xdp_buff **buffs;
164 	u32 contig, alloc;
165 	int i;
166 
167 	/* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the
168 	 * rq->wqe.alloc_units->xsk_buffs array allocated here.
169 	 */
170 	buffs = rq->wqe.alloc_units->xsk_buffs;
171 	contig = mlx5_wq_cyc_get_size(wq) - ix;
172 	if (wqe_bulk <= contig) {
173 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
174 	} else {
175 		alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
176 		if (likely(alloc == contig))
177 			alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
178 	}
179 
180 	for (i = 0; i < alloc; i++) {
181 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
182 		struct mlx5e_wqe_frag_info *frag;
183 		struct mlx5e_rx_wqe_cyc *wqe;
184 		dma_addr_t addr;
185 
186 		wqe = mlx5_wq_cyc_get_wqe(wq, j);
187 		/* Assumes log_num_frags == 0. */
188 		frag = &rq->wqe.frags[j];
189 
190 		addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
191 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
192 		frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
193 	}
194 
195 	return alloc;
196 }
197 
mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq * rq,u16 ix,int wqe_bulk)198 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
199 {
200 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
201 	int i;
202 
203 	for (i = 0; i < wqe_bulk; i++) {
204 		int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
205 		struct mlx5e_wqe_frag_info *frag;
206 		struct mlx5e_rx_wqe_cyc *wqe;
207 		dma_addr_t addr;
208 
209 		wqe = mlx5_wq_cyc_get_wqe(wq, j);
210 		/* Assumes log_num_frags == 0. */
211 		frag = &rq->wqe.frags[j];
212 
213 		*frag->xskp = xsk_buff_alloc(rq->xsk_pool);
214 		if (unlikely(!*frag->xskp))
215 			return i;
216 
217 		addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
218 		wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
219 		frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
220 	}
221 
222 	return wqe_bulk;
223 }
224 
mlx5e_xsk_construct_skb(struct mlx5e_rq * rq,struct xdp_buff * xdp)225 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
226 {
227 	u32 totallen = xdp->data_end - xdp->data_meta;
228 	u32 metalen = xdp->data - xdp->data_meta;
229 	struct sk_buff *skb;
230 
231 	skb = napi_alloc_skb(rq->cq.napi, totallen);
232 	if (unlikely(!skb)) {
233 		rq->stats->buff_alloc_err++;
234 		return NULL;
235 	}
236 
237 	skb_put_data(skb, xdp->data_meta, totallen);
238 
239 	if (metalen) {
240 		skb_metadata_set(skb, metalen);
241 		__skb_pull(skb, metalen);
242 	}
243 
244 	return skb;
245 }
246 
mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq * rq,struct mlx5e_mpw_info * wi,struct mlx5_cqe64 * cqe,u16 cqe_bcnt,u32 head_offset,u32 page_idx)247 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
248 						    struct mlx5e_mpw_info *wi,
249 						    struct mlx5_cqe64 *cqe,
250 						    u16 cqe_bcnt,
251 						    u32 head_offset,
252 						    u32 page_idx)
253 {
254 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]);
255 	struct bpf_prog *prog;
256 
257 	/* Check packet size. Note LRO doesn't use linear SKB */
258 	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
259 		rq->stats->oversize_pkts_sw_drop++;
260 		return NULL;
261 	}
262 
263 	/* head_offset is not used in this function, because xdp->data and the
264 	 * DMA address point directly to the necessary place. Furthermore, in
265 	 * the current implementation, UMR pages are mapped to XSK frames, so
266 	 * head_offset should always be 0.
267 	 */
268 	WARN_ON_ONCE(head_offset);
269 
270 	/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
271 	mxbuf->cqe = cqe;
272 	xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
273 	xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
274 	net_prefetch(mxbuf->xdp.data);
275 
276 	/* Possible flows:
277 	 * - XDP_REDIRECT to XSKMAP:
278 	 *   The page is owned by the userspace from now.
279 	 * - XDP_TX and other XDP_REDIRECTs:
280 	 *   The page was returned by ZCA and recycled.
281 	 * - XDP_DROP:
282 	 *   Recycle the page.
283 	 * - XDP_PASS:
284 	 *   Allocate an SKB, copy the data and recycle the page.
285 	 *
286 	 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
287 	 * size is the same as the Driver RX Ring's size, and pages for WQEs are
288 	 * allocated first from the Reuse Ring, so it has enough space.
289 	 */
290 
291 	prog = rcu_dereference(rq->xdp_prog);
292 	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
293 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
294 			__set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
295 		return NULL; /* page/packet was consumed by XDP */
296 	}
297 
298 	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
299 	 * frame. On SKB allocation failure, NULL is returned.
300 	 */
301 	return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
302 }
303 
mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq * rq,struct mlx5e_wqe_frag_info * wi,struct mlx5_cqe64 * cqe,u32 cqe_bcnt)304 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
305 					      struct mlx5e_wqe_frag_info *wi,
306 					      struct mlx5_cqe64 *cqe,
307 					      u32 cqe_bcnt)
308 {
309 	struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp);
310 	struct bpf_prog *prog;
311 
312 	/* wi->offset is not used in this function, because xdp->data and the
313 	 * DMA address point directly to the necessary place. Furthermore, the
314 	 * XSK allocator allocates frames per packet, instead of pages, so
315 	 * wi->offset should always be 0.
316 	 */
317 	WARN_ON_ONCE(wi->offset);
318 
319 	/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
320 	mxbuf->cqe = cqe;
321 	xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
322 	xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
323 	net_prefetch(mxbuf->xdp.data);
324 
325 	prog = rcu_dereference(rq->xdp_prog);
326 	if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
327 		if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
328 			wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
329 		return NULL; /* page/packet was consumed by XDP */
330 	}
331 
332 	/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
333 	 * will be handled by mlx5e_free_rx_wqe.
334 	 * On SKB allocation failure, NULL is returned.
335 	 */
336 	return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
337 }
338