1 /*
2  * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/ip.h>
34 #include <linux/ipv6.h>
35 #include <linux/tcp.h>
36 #include <linux/bitmap.h>
37 #include <linux/filter.h>
38 #include <net/ip6_checksum.h>
39 #include <net/page_pool.h>
40 #include <net/inet_ecn.h>
41 #include <net/gro.h>
42 #include <net/udp.h>
43 #include <net/tcp.h>
44 #include "en.h"
45 #include "en/txrx.h"
46 #include "en_tc.h"
47 #include "eswitch.h"
48 #include "en_rep.h"
49 #include "en/rep/tc.h"
50 #include "ipoib/ipoib.h"
51 #include "en_accel/ipsec.h"
52 #include "en_accel/ipsec_rxtx.h"
53 #include "en_accel/ktls_txrx.h"
54 #include "en/xdp.h"
55 #include "en/xsk/rx.h"
56 #include "en/health.h"
57 #include "en/params.h"
58 #include "devlink.h"
59 #include "en/devlink.h"
60 
61 static struct sk_buff *
62 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
63 				u16 cqe_bcnt, u32 head_offset, u32 page_idx);
64 static struct sk_buff *
65 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
66 				   u16 cqe_bcnt, u32 head_offset, u32 page_idx);
67 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
68 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
69 static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
70 
71 const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
72 	.handle_rx_cqe       = mlx5e_handle_rx_cqe,
73 	.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
74 	.handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
75 };
76 
77 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
78 {
79 	return config->rx_filter == HWTSTAMP_FILTER_ALL;
80 }
81 
82 static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
83 				       u32 cqcc, void *data)
84 {
85 	u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc);
86 
87 	memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64));
88 }
89 
90 static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
91 					 struct mlx5_cqwq *wq,
92 					 u32 cqcc)
93 {
94 	struct mlx5e_cq_decomp *cqd = &rq->cqd;
95 	struct mlx5_cqe64 *title = &cqd->title;
96 
97 	mlx5e_read_cqe_slot(wq, cqcc, title);
98 	cqd->left        = be32_to_cpu(title->byte_cnt);
99 	cqd->wqe_counter = be16_to_cpu(title->wqe_counter);
100 	rq->stats->cqe_compress_blks++;
101 }
102 
103 static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq,
104 					    struct mlx5e_cq_decomp *cqd,
105 					    u32 cqcc)
106 {
107 	mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr);
108 	cqd->mini_arr_idx = 0;
109 }
110 
111 static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n)
112 {
113 	u32 cqcc   = wq->cc;
114 	u8  op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1;
115 	u32 ci     = mlx5_cqwq_ctr2ix(wq, cqcc);
116 	u32 wq_sz  = mlx5_cqwq_get_size(wq);
117 	u32 ci_top = min_t(u32, wq_sz, ci + n);
118 
119 	for (; ci < ci_top; ci++, n--) {
120 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
121 
122 		cqe->op_own = op_own;
123 	}
124 
125 	if (unlikely(ci == wq_sz)) {
126 		op_own = !op_own;
127 		for (ci = 0; ci < n; ci++) {
128 			struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
129 
130 			cqe->op_own = op_own;
131 		}
132 	}
133 }
134 
135 static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
136 					struct mlx5_cqwq *wq,
137 					u32 cqcc)
138 {
139 	struct mlx5e_cq_decomp *cqd = &rq->cqd;
140 	struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx];
141 	struct mlx5_cqe64 *title = &cqd->title;
142 
143 	title->byte_cnt     = mini_cqe->byte_cnt;
144 	title->check_sum    = mini_cqe->checksum;
145 	title->op_own      &= 0xf0;
146 	title->op_own      |= 0x01 & (cqcc >> wq->fbc.log_sz);
147 
148 	/* state bit set implies linked-list striding RQ wq type and
149 	 * HW stride index capability supported
150 	 */
151 	if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) {
152 		title->wqe_counter = mini_cqe->stridx;
153 		return;
154 	}
155 
156 	/* HW stride index capability not supported */
157 	title->wqe_counter = cpu_to_be16(cqd->wqe_counter);
158 	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
159 		cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title);
160 	else
161 		cqd->wqe_counter =
162 			mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1);
163 }
164 
165 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
166 						struct mlx5_cqwq *wq,
167 						u32 cqcc)
168 {
169 	struct mlx5e_cq_decomp *cqd = &rq->cqd;
170 
171 	mlx5e_decompress_cqe(rq, wq, cqcc);
172 	cqd->title.rss_hash_type   = 0;
173 	cqd->title.rss_hash_result = 0;
174 }
175 
176 static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
177 					     struct mlx5_cqwq *wq,
178 					     int update_owner_only,
179 					     int budget_rem)
180 {
181 	struct mlx5e_cq_decomp *cqd = &rq->cqd;
182 	u32 cqcc = wq->cc + update_owner_only;
183 	u32 cqe_count;
184 	u32 i;
185 
186 	cqe_count = min_t(u32, cqd->left, budget_rem);
187 
188 	for (i = update_owner_only; i < cqe_count;
189 	     i++, cqd->mini_arr_idx++, cqcc++) {
190 		if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
191 			mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
192 
193 		mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
194 		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
195 				mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
196 				rq, &cqd->title);
197 	}
198 	mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
199 	wq->cc = cqcc;
200 	cqd->left -= cqe_count;
201 	rq->stats->cqe_compress_pkts += cqe_count;
202 
203 	return cqe_count;
204 }
205 
206 static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
207 					      struct mlx5_cqwq *wq,
208 					      int budget_rem)
209 {
210 	struct mlx5e_cq_decomp *cqd = &rq->cqd;
211 	u32 cc = wq->cc;
212 
213 	mlx5e_read_title_slot(rq, wq, cc);
214 	mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
215 	mlx5e_decompress_cqe(rq, wq, cc);
216 	INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
217 			mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
218 			rq, &cqd->title);
219 	cqd->mini_arr_idx++;
220 
221 	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
222 }
223 
224 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page)
225 {
226 	struct mlx5e_page_cache *cache = &rq->page_cache;
227 	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
228 	struct mlx5e_rq_stats *stats = rq->stats;
229 
230 	if (tail_next == cache->head) {
231 		stats->cache_full++;
232 		return false;
233 	}
234 
235 	if (!dev_page_is_reusable(page)) {
236 		stats->cache_waive++;
237 		return false;
238 	}
239 
240 	cache->page_cache[cache->tail].page = page;
241 	cache->page_cache[cache->tail].addr = page_pool_get_dma_addr(page);
242 	cache->tail = tail_next;
243 	return true;
244 }
245 
246 static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
247 				      struct mlx5e_dma_info *dma_info)
248 {
249 	struct mlx5e_page_cache *cache = &rq->page_cache;
250 	struct mlx5e_rq_stats *stats = rq->stats;
251 
252 	if (unlikely(cache->head == cache->tail)) {
253 		stats->cache_empty++;
254 		return false;
255 	}
256 
257 	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
258 		stats->cache_busy++;
259 		return false;
260 	}
261 
262 	*dma_info = cache->page_cache[cache->head];
263 	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
264 	stats->cache_reuse++;
265 
266 	dma_sync_single_for_device(rq->pdev, dma_info->addr,
267 				   PAGE_SIZE,
268 				   DMA_FROM_DEVICE);
269 	return true;
270 }
271 
272 static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
273 					struct mlx5e_dma_info *dma_info)
274 {
275 	if (mlx5e_rx_cache_get(rq, dma_info))
276 		return 0;
277 
278 	dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
279 	if (unlikely(!dma_info->page))
280 		return -ENOMEM;
281 
282 	dma_info->addr = dma_map_page_attrs(rq->pdev, dma_info->page, 0, PAGE_SIZE,
283 					    rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC);
284 	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
285 		page_pool_recycle_direct(rq->page_pool, dma_info->page);
286 		dma_info->page = NULL;
287 		return -ENOMEM;
288 	}
289 	page_pool_set_dma_addr(dma_info->page, dma_info->addr);
290 
291 	return 0;
292 }
293 
294 static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
295 				   struct mlx5e_dma_info *dma_info)
296 {
297 	if (rq->xsk_pool)
298 		return mlx5e_xsk_page_alloc_pool(rq, dma_info);
299 	else
300 		return mlx5e_page_alloc_pool(rq, dma_info);
301 }
302 
303 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page)
304 {
305 	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
306 
307 	dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir,
308 			     DMA_ATTR_SKIP_CPU_SYNC);
309 	page_pool_set_dma_addr(page, 0);
310 }
311 
312 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle)
313 {
314 	if (likely(recycle)) {
315 		if (mlx5e_rx_cache_put(rq, page))
316 			return;
317 
318 		mlx5e_page_dma_unmap(rq, page);
319 		page_pool_recycle_direct(rq->page_pool, page);
320 	} else {
321 		mlx5e_page_dma_unmap(rq, page);
322 		page_pool_release_page(rq->page_pool, page);
323 		put_page(page);
324 	}
325 }
326 
327 static inline void mlx5e_page_release(struct mlx5e_rq *rq,
328 				      struct mlx5e_dma_info *dma_info,
329 				      bool recycle)
330 {
331 	if (rq->xsk_pool)
332 		/* The `recycle` parameter is ignored, and the page is always
333 		 * put into the Reuse Ring, because there is no way to return
334 		 * the page to the userspace when the interface goes down.
335 		 */
336 		xsk_buff_free(dma_info->xsk);
337 	else
338 		mlx5e_page_release_dynamic(rq, dma_info->page, recycle);
339 }
340 
341 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
342 				    struct mlx5e_wqe_frag_info *frag)
343 {
344 	int err = 0;
345 
346 	if (!frag->offset)
347 		/* On first frag (offset == 0), replenish page (dma_info actually).
348 		 * Other frags that point to the same dma_info (with a different
349 		 * offset) should just use the new one without replenishing again
350 		 * by themselves.
351 		 */
352 		err = mlx5e_page_alloc(rq, frag->di);
353 
354 	return err;
355 }
356 
357 static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
358 				     struct mlx5e_wqe_frag_info *frag,
359 				     bool recycle)
360 {
361 	if (frag->last_in_page)
362 		mlx5e_page_release(rq, frag->di, recycle);
363 }
364 
365 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
366 {
367 	return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags];
368 }
369 
370 static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
371 			      u16 ix)
372 {
373 	struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix);
374 	int err;
375 	int i;
376 
377 	for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) {
378 		u16 headroom;
379 
380 		err = mlx5e_get_rx_frag(rq, frag);
381 		if (unlikely(err))
382 			goto free_frags;
383 
384 		headroom = i == 0 ? rq->buff.headroom : 0;
385 		wqe->data[i].addr = cpu_to_be64(frag->di->addr +
386 						frag->offset + headroom);
387 	}
388 
389 	return 0;
390 
391 free_frags:
392 	while (--i >= 0)
393 		mlx5e_put_rx_frag(rq, --frag, true);
394 
395 	return err;
396 }
397 
398 static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
399 				     struct mlx5e_wqe_frag_info *wi,
400 				     bool recycle)
401 {
402 	int i;
403 
404 	for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
405 		mlx5e_put_rx_frag(rq, wi, recycle);
406 }
407 
408 static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
409 {
410 	struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
411 
412 	mlx5e_free_rx_wqe(rq, wi, false);
413 }
414 
415 static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
416 {
417 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
418 	int err;
419 	int i;
420 
421 	if (rq->xsk_pool) {
422 		int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
423 
424 		/* Check in advance that we have enough frames, instead of
425 		 * allocating one-by-one, failing and moving frames to the
426 		 * Reuse Ring.
427 		 */
428 		if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, pages_desired)))
429 			return -ENOMEM;
430 	}
431 
432 	for (i = 0; i < wqe_bulk; i++) {
433 		struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
434 
435 		err = mlx5e_alloc_rx_wqe(rq, wqe, ix + i);
436 		if (unlikely(err))
437 			goto free_wqes;
438 	}
439 
440 	return 0;
441 
442 free_wqes:
443 	while (--i >= 0)
444 		mlx5e_dealloc_rx_wqe(rq, ix + i);
445 
446 	return err;
447 }
448 
449 static inline void
450 mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
451 		   struct mlx5e_dma_info *di, u32 frag_offset, u32 len,
452 		   unsigned int truesize)
453 {
454 	dma_sync_single_for_cpu(rq->pdev,
455 				di->addr + frag_offset,
456 				len, DMA_FROM_DEVICE);
457 	page_ref_inc(di->page);
458 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
459 			di->page, frag_offset, len, truesize);
460 }
461 
462 static inline void
463 mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
464 		      struct mlx5e_dma_info *dma_info,
465 		      int offset_from, int dma_offset, u32 headlen)
466 {
467 	const void *from = page_address(dma_info->page) + offset_from;
468 	/* Aligning len to sizeof(long) optimizes memcpy performance */
469 	unsigned int len = ALIGN(headlen, sizeof(long));
470 
471 	dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len,
472 				DMA_FROM_DEVICE);
473 	skb_copy_to_linear_data(skb, from, len);
474 }
475 
476 static void
477 mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
478 {
479 	bool no_xdp_xmit;
480 	struct mlx5e_dma_info *dma_info = wi->umr.dma_info;
481 	int i;
482 
483 	/* A common case for AF_XDP. */
484 	if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE))
485 		return;
486 
487 	no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap,
488 				   MLX5_MPWRQ_PAGES_PER_WQE);
489 
490 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
491 		if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
492 			mlx5e_page_release(rq, &dma_info[i], recycle);
493 }
494 
495 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
496 {
497 	struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
498 
499 	do {
500 		u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head);
501 
502 		mlx5_wq_ll_push(wq, next_wqe_index);
503 	} while (--n);
504 
505 	/* ensure wqes are visible to device before updating doorbell record */
506 	dma_wmb();
507 
508 	mlx5_wq_ll_update_db_record(wq);
509 }
510 
511 /* This function returns the size of the continuous free space inside a bitmap
512  * that starts from first and no longer than len including circular ones.
513  */
514 static int bitmap_find_window(unsigned long *bitmap, int len,
515 			      int bitmap_size, int first)
516 {
517 	int next_one, count;
518 
519 	next_one = find_next_bit(bitmap, bitmap_size, first);
520 	if (next_one == bitmap_size) {
521 		if (bitmap_size - first >= len)
522 			return len;
523 		next_one = find_next_bit(bitmap, bitmap_size, 0);
524 		count = next_one + bitmap_size - first;
525 	} else {
526 		count = next_one - first;
527 	}
528 
529 	return min(len, count);
530 }
531 
532 static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
533 			  __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
534 {
535 	memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
536 	umr_wqe->ctrl.opmod_idx_opcode =
537 		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
538 			     MLX5_OPCODE_UMR);
539 	umr_wqe->ctrl.umr_mkey = key;
540 	umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
541 					    | MLX5E_KLM_UMR_DS_CNT(klm_len));
542 	umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
543 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
544 	umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
545 	umr_wqe->uctrl.mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
546 }
547 
548 static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
549 				     struct mlx5e_icosq *sq,
550 				     u16 klm_entries, u16 index)
551 {
552 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
553 	u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
554 	u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
555 	struct page *page = shampo->last_page;
556 	u64 addr = shampo->last_addr;
557 	struct mlx5e_dma_info *dma_info;
558 	struct mlx5e_umr_wqe *umr_wqe;
559 	int headroom, i;
560 
561 	headroom = rq->buff.headroom;
562 	new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
563 	entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT);
564 	wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
565 	pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
566 	umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
567 	build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
568 
569 	for (i = 0; i < entries; i++, index++) {
570 		dma_info = &shampo->info[index];
571 		if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
572 					 MLX5_UMR_KLM_ALIGNMENT))
573 			goto update_klm;
574 		header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
575 			MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
576 		if (!(header_offset & (PAGE_SIZE - 1))) {
577 			err = mlx5e_page_alloc(rq, dma_info);
578 			if (unlikely(err))
579 				goto err_unmap;
580 			addr = dma_info->addr;
581 			page = dma_info->page;
582 		} else {
583 			dma_info->addr = addr + header_offset;
584 			dma_info->page = page;
585 		}
586 
587 update_klm:
588 		umr_wqe->inline_klms[i].bcount =
589 			cpu_to_be32(MLX5E_RX_MAX_HEAD);
590 		umr_wqe->inline_klms[i].key    = cpu_to_be32(lkey);
591 		umr_wqe->inline_klms[i].va     =
592 			cpu_to_be64(dma_info->addr + headroom);
593 	}
594 
595 	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
596 		.wqe_type	= MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
597 		.num_wqebbs	= wqe_bbs,
598 		.shampo.len	= new_entries,
599 	};
600 
601 	shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
602 	shampo->last_page = page;
603 	shampo->last_addr = addr;
604 	sq->pc += wqe_bbs;
605 	sq->doorbell_cseg = &umr_wqe->ctrl;
606 
607 	return 0;
608 
609 err_unmap:
610 	while (--i >= 0) {
611 		dma_info = &shampo->info[--index];
612 		if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
613 			dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
614 			mlx5e_page_release(rq, dma_info, true);
615 		}
616 	}
617 	rq->stats->buff_alloc_err++;
618 	return err;
619 }
620 
621 static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
622 {
623 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
624 	u16 klm_entries, num_wqe, index, entries_before;
625 	struct mlx5e_icosq *sq = rq->icosq;
626 	int i, err, max_klm_entries, len;
627 
628 	max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
629 	klm_entries = bitmap_find_window(shampo->bitmap,
630 					 shampo->hd_per_wqe,
631 					 shampo->hd_per_wq, shampo->pi);
632 	if (!klm_entries)
633 		return 0;
634 
635 	klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
636 	index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT);
637 	entries_before = shampo->hd_per_wq - index;
638 
639 	if (unlikely(entries_before < klm_entries))
640 		num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
641 			  DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
642 	else
643 		num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
644 
645 	for (i = 0; i < num_wqe; i++) {
646 		len = (klm_entries > max_klm_entries) ? max_klm_entries :
647 							klm_entries;
648 		if (unlikely(index + len > shampo->hd_per_wq))
649 			len = shampo->hd_per_wq - index;
650 		err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
651 		if (unlikely(err))
652 			return err;
653 		index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
654 		klm_entries -= len;
655 	}
656 
657 	return 0;
658 }
659 
660 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
661 {
662 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
663 	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
664 	struct mlx5e_icosq *sq = rq->icosq;
665 	struct mlx5_wq_cyc *wq = &sq->wq;
666 	struct mlx5e_umr_wqe *umr_wqe;
667 	u16 pi;
668 	int err;
669 	int i;
670 
671 	/* Check in advance that we have enough frames, instead of allocating
672 	 * one-by-one, failing and moving frames to the Reuse Ring.
673 	 */
674 	if (rq->xsk_pool &&
675 	    unlikely(!xsk_buff_can_alloc(rq->xsk_pool, MLX5_MPWRQ_PAGES_PER_WQE))) {
676 		err = -ENOMEM;
677 		goto err;
678 	}
679 
680 	if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
681 		err = mlx5e_alloc_rx_hd_mpwqe(rq);
682 		if (unlikely(err))
683 			goto err;
684 	}
685 
686 	pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
687 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
688 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
689 
690 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
691 		err = mlx5e_page_alloc(rq, dma_info);
692 		if (unlikely(err))
693 			goto err_unmap;
694 		umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
695 	}
696 
697 	bitmap_zero(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE);
698 	wi->consumed_strides = 0;
699 
700 	umr_wqe->ctrl.opmod_idx_opcode =
701 		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
702 			    MLX5_OPCODE_UMR);
703 	umr_wqe->uctrl.xlt_offset =
704 		cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix)));
705 
706 	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
707 		.wqe_type   = MLX5E_ICOSQ_WQE_UMR_RX,
708 		.num_wqebbs = MLX5E_UMR_WQEBBS,
709 		.umr.rq     = rq,
710 	};
711 
712 	sq->pc += MLX5E_UMR_WQEBBS;
713 
714 	sq->doorbell_cseg = &umr_wqe->ctrl;
715 
716 	return 0;
717 
718 err_unmap:
719 	while (--i >= 0) {
720 		dma_info--;
721 		mlx5e_page_release(rq, dma_info, true);
722 	}
723 
724 err:
725 	rq->stats->buff_alloc_err++;
726 
727 	return err;
728 }
729 
730 /* This function is responsible to dealloc SHAMPO header buffer.
731  * close == true specifies that we are in the middle of closing RQ operation so
732  * we go over all the entries and if they are not in use we free them,
733  * otherwise we only go over a specific range inside the header buffer that are
734  * not in use.
735  */
736 void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close)
737 {
738 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
739 	int hd_per_wq = shampo->hd_per_wq;
740 	struct page *deleted_page = NULL;
741 	struct mlx5e_dma_info *hd_info;
742 	int i, index = start;
743 
744 	for (i = 0; i < len; i++, index++) {
745 		if (index == hd_per_wq)
746 			index = 0;
747 
748 		if (close && !test_bit(index, shampo->bitmap))
749 			continue;
750 
751 		hd_info = &shampo->info[index];
752 		hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
753 		if (hd_info->page != deleted_page) {
754 			deleted_page = hd_info->page;
755 			mlx5e_page_release(rq, hd_info, false);
756 		}
757 	}
758 
759 	if (start + len > hd_per_wq) {
760 		len -= hd_per_wq - start;
761 		bitmap_clear(shampo->bitmap, start, hd_per_wq - start);
762 		start = 0;
763 	}
764 
765 	bitmap_clear(shampo->bitmap, start, len);
766 }
767 
768 static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
769 {
770 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
771 	/* Don't recycle, this function is called on rq/netdev close */
772 	mlx5e_free_rx_mpwqe(rq, wi, false);
773 }
774 
775 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
776 {
777 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
778 	u8 wqe_bulk;
779 	int err;
780 
781 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
782 		return false;
783 
784 	wqe_bulk = rq->wqe.info.wqe_bulk;
785 
786 	if (mlx5_wq_cyc_missing(wq) < wqe_bulk)
787 		return false;
788 
789 	if (rq->page_pool)
790 		page_pool_nid_changed(rq->page_pool, numa_mem_id());
791 
792 	do {
793 		u16 head = mlx5_wq_cyc_get_head(wq);
794 
795 		err = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk);
796 		if (unlikely(err)) {
797 			rq->stats->buff_alloc_err++;
798 			break;
799 		}
800 
801 		mlx5_wq_cyc_push_n(wq, wqe_bulk);
802 	} while (mlx5_wq_cyc_missing(wq) >= wqe_bulk);
803 
804 	/* ensure wqes are visible to device before updating doorbell record */
805 	dma_wmb();
806 
807 	mlx5_wq_cyc_update_db_record(wq);
808 
809 	return !!err;
810 }
811 
812 void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
813 {
814 	u16 sqcc;
815 
816 	sqcc = sq->cc;
817 
818 	while (sqcc != sq->pc) {
819 		struct mlx5e_icosq_wqe_info *wi;
820 		u16 ci;
821 
822 		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
823 		wi = &sq->db.wqe_info[ci];
824 		sqcc += wi->num_wqebbs;
825 #ifdef CONFIG_MLX5_EN_TLS
826 		switch (wi->wqe_type) {
827 		case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
828 			mlx5e_ktls_handle_ctx_completion(wi);
829 			break;
830 		case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
831 			mlx5e_ktls_handle_get_psv_completion(wi, sq);
832 			break;
833 		}
834 #endif
835 	}
836 	sq->cc = sqcc;
837 }
838 
839 static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr,
840 				       struct mlx5e_icosq *sq)
841 {
842 	struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq);
843 	struct mlx5e_shampo_hd *shampo;
844 	/* assume 1:1 relationship between RQ and icosq */
845 	struct mlx5e_rq *rq = &c->rq;
846 	int end, from, len = umr.len;
847 
848 	shampo = rq->mpwqe.shampo;
849 	end = shampo->hd_per_wq;
850 	from = shampo->ci;
851 	if (from + len > shampo->hd_per_wq) {
852 		len -= end - from;
853 		bitmap_set(shampo->bitmap, from, end - from);
854 		from = 0;
855 	}
856 
857 	bitmap_set(shampo->bitmap, from, len);
858 	shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1);
859 }
860 
861 int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
862 {
863 	struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
864 	struct mlx5_cqe64 *cqe;
865 	u16 sqcc;
866 	int i;
867 
868 	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
869 		return 0;
870 
871 	cqe = mlx5_cqwq_get_cqe(&cq->wq);
872 	if (likely(!cqe))
873 		return 0;
874 
875 	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
876 	 * otherwise a cq overrun may occur
877 	 */
878 	sqcc = sq->cc;
879 
880 	i = 0;
881 	do {
882 		u16 wqe_counter;
883 		bool last_wqe;
884 
885 		mlx5_cqwq_pop(&cq->wq);
886 
887 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
888 
889 		do {
890 			struct mlx5e_icosq_wqe_info *wi;
891 			u16 ci;
892 
893 			last_wqe = (sqcc == wqe_counter);
894 
895 			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
896 			wi = &sq->db.wqe_info[ci];
897 			sqcc += wi->num_wqebbs;
898 
899 			if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
900 				netdev_WARN_ONCE(cq->netdev,
901 						 "Bad OP in ICOSQ CQE: 0x%x\n",
902 						 get_cqe_opcode(cqe));
903 				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
904 						     (struct mlx5_err_cqe *)cqe);
905 				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
906 				if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
907 					queue_work(cq->priv->wq, &sq->recover_work);
908 				break;
909 			}
910 
911 			switch (wi->wqe_type) {
912 			case MLX5E_ICOSQ_WQE_UMR_RX:
913 				wi->umr.rq->mpwqe.umr_completed++;
914 				break;
915 			case MLX5E_ICOSQ_WQE_NOP:
916 				break;
917 			case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR:
918 				mlx5e_handle_shampo_hd_umr(wi->shampo, sq);
919 				break;
920 #ifdef CONFIG_MLX5_EN_TLS
921 			case MLX5E_ICOSQ_WQE_UMR_TLS:
922 				break;
923 			case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
924 				mlx5e_ktls_handle_ctx_completion(wi);
925 				break;
926 			case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
927 				mlx5e_ktls_handle_get_psv_completion(wi, sq);
928 				break;
929 #endif
930 			default:
931 				netdev_WARN_ONCE(cq->netdev,
932 						 "Bad WQE type in ICOSQ WQE info: 0x%x\n",
933 						 wi->wqe_type);
934 			}
935 		} while (!last_wqe);
936 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
937 
938 	sq->cc = sqcc;
939 
940 	mlx5_cqwq_update_db_record(&cq->wq);
941 
942 	return i;
943 }
944 
945 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
946 {
947 	struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
948 	u8  umr_completed = rq->mpwqe.umr_completed;
949 	struct mlx5e_icosq *sq = rq->icosq;
950 	int alloc_err = 0;
951 	u8  missing, i;
952 	u16 head;
953 
954 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
955 		return false;
956 
957 	if (umr_completed) {
958 		mlx5e_post_rx_mpwqe(rq, umr_completed);
959 		rq->mpwqe.umr_in_progress -= umr_completed;
960 		rq->mpwqe.umr_completed = 0;
961 	}
962 
963 	missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress;
964 
965 	if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
966 		rq->stats->congst_umr++;
967 
968 	if (likely(missing < rq->mpwqe.min_wqe_bulk))
969 		return false;
970 
971 	if (rq->page_pool)
972 		page_pool_nid_changed(rq->page_pool, numa_mem_id());
973 
974 	head = rq->mpwqe.actual_wq_head;
975 	i = missing;
976 	do {
977 		alloc_err = mlx5e_alloc_rx_mpwqe(rq, head);
978 
979 		if (unlikely(alloc_err))
980 			break;
981 		head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
982 	} while (--i);
983 
984 	rq->mpwqe.umr_last_bulk    = missing - i;
985 	if (sq->doorbell_cseg) {
986 		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
987 		sq->doorbell_cseg = NULL;
988 	}
989 
990 	rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk;
991 	rq->mpwqe.actual_wq_head   = head;
992 
993 	/* If XSK Fill Ring doesn't have enough frames, report the error, so
994 	 * that one of the actions can be performed:
995 	 * 1. If need_wakeup is used, signal that the application has to kick
996 	 * the driver when it refills the Fill Ring.
997 	 * 2. Otherwise, busy poll by rescheduling the NAPI poll.
998 	 */
999 	if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool))
1000 		return true;
1001 
1002 	return false;
1003 }
1004 
1005 static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
1006 {
1007 	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
1008 	u8 tcp_ack     = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
1009 			 (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
1010 
1011 	tcp->check                      = 0;
1012 	tcp->psh                        = get_cqe_lro_tcppsh(cqe);
1013 
1014 	if (tcp_ack) {
1015 		tcp->ack                = 1;
1016 		tcp->ack_seq            = cqe->lro.ack_seq_num;
1017 		tcp->window             = cqe->lro.tcp_win;
1018 	}
1019 }
1020 
1021 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
1022 				 u32 cqe_bcnt)
1023 {
1024 	struct ethhdr	*eth = (struct ethhdr *)(skb->data);
1025 	struct tcphdr	*tcp;
1026 	int network_depth = 0;
1027 	__wsum check;
1028 	__be16 proto;
1029 	u16 tot_len;
1030 	void *ip_p;
1031 
1032 	proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
1033 
1034 	tot_len = cqe_bcnt - network_depth;
1035 	ip_p = skb->data + network_depth;
1036 
1037 	if (proto == htons(ETH_P_IP)) {
1038 		struct iphdr *ipv4 = ip_p;
1039 
1040 		tcp = ip_p + sizeof(struct iphdr);
1041 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1042 
1043 		ipv4->ttl               = cqe->lro.min_ttl;
1044 		ipv4->tot_len           = cpu_to_be16(tot_len);
1045 		ipv4->check             = 0;
1046 		ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
1047 						       ipv4->ihl);
1048 
1049 		mlx5e_lro_update_tcp_hdr(cqe, tcp);
1050 		check = csum_partial(tcp, tcp->doff * 4,
1051 				     csum_unfold((__force __sum16)cqe->check_sum));
1052 		/* Almost done, don't forget the pseudo header */
1053 		tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
1054 					       tot_len - sizeof(struct iphdr),
1055 					       IPPROTO_TCP, check);
1056 	} else {
1057 		u16 payload_len = tot_len - sizeof(struct ipv6hdr);
1058 		struct ipv6hdr *ipv6 = ip_p;
1059 
1060 		tcp = ip_p + sizeof(struct ipv6hdr);
1061 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1062 
1063 		ipv6->hop_limit         = cqe->lro.min_ttl;
1064 		ipv6->payload_len       = cpu_to_be16(payload_len);
1065 
1066 		mlx5e_lro_update_tcp_hdr(cqe, tcp);
1067 		check = csum_partial(tcp, tcp->doff * 4,
1068 				     csum_unfold((__force __sum16)cqe->check_sum));
1069 		/* Almost done, don't forget the pseudo header */
1070 		tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
1071 					     IPPROTO_TCP, check);
1072 	}
1073 }
1074 
1075 static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
1076 {
1077 	struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index];
1078 	u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom;
1079 
1080 	return page_address(last_head->page) + head_offset;
1081 }
1082 
1083 static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4)
1084 {
1085 	int udp_off = rq->hw_gro_data->fk.control.thoff;
1086 	struct sk_buff *skb = rq->hw_gro_data->skb;
1087 	struct udphdr *uh;
1088 
1089 	uh = (struct udphdr *)(skb->data + udp_off);
1090 	uh->len = htons(skb->len - udp_off);
1091 
1092 	if (uh->check)
1093 		uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr,
1094 					  ipv4->daddr, 0);
1095 
1096 	skb->csum_start = (unsigned char *)uh - skb->head;
1097 	skb->csum_offset = offsetof(struct udphdr, check);
1098 
1099 	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
1100 }
1101 
1102 static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6)
1103 {
1104 	int udp_off = rq->hw_gro_data->fk.control.thoff;
1105 	struct sk_buff *skb = rq->hw_gro_data->skb;
1106 	struct udphdr *uh;
1107 
1108 	uh = (struct udphdr *)(skb->data + udp_off);
1109 	uh->len = htons(skb->len - udp_off);
1110 
1111 	if (uh->check)
1112 		uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr,
1113 					  &ipv6->daddr, 0);
1114 
1115 	skb->csum_start = (unsigned char *)uh - skb->head;
1116 	skb->csum_offset = offsetof(struct udphdr, check);
1117 
1118 	skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
1119 }
1120 
1121 static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
1122 					      struct tcphdr *skb_tcp_hd)
1123 {
1124 	u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe);
1125 	struct tcphdr *last_tcp_hd;
1126 	void *last_hd_addr;
1127 
1128 	last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
1129 	last_tcp_hd =  last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff;
1130 	tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH);
1131 }
1132 
1133 static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4,
1134 					     struct mlx5_cqe64 *cqe, bool match)
1135 {
1136 	int tcp_off = rq->hw_gro_data->fk.control.thoff;
1137 	struct sk_buff *skb = rq->hw_gro_data->skb;
1138 	struct tcphdr *tcp;
1139 
1140 	tcp = (struct tcphdr *)(skb->data + tcp_off);
1141 	if (match)
1142 		mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
1143 
1144 	tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr,
1145 				   ipv4->daddr, 0);
1146 	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
1147 	if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id)
1148 		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
1149 
1150 	skb->csum_start = (unsigned char *)tcp - skb->head;
1151 	skb->csum_offset = offsetof(struct tcphdr, check);
1152 
1153 	if (tcp->cwr)
1154 		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
1155 }
1156 
1157 static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6,
1158 					     struct mlx5_cqe64 *cqe, bool match)
1159 {
1160 	int tcp_off = rq->hw_gro_data->fk.control.thoff;
1161 	struct sk_buff *skb = rq->hw_gro_data->skb;
1162 	struct tcphdr *tcp;
1163 
1164 	tcp = (struct tcphdr *)(skb->data + tcp_off);
1165 	if (match)
1166 		mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp);
1167 
1168 	tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr,
1169 				   &ipv6->daddr, 0);
1170 	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
1171 	skb->csum_start = (unsigned char *)tcp - skb->head;
1172 	skb->csum_offset = offsetof(struct tcphdr, check);
1173 
1174 	if (tcp->cwr)
1175 		skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
1176 }
1177 
1178 static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
1179 {
1180 	bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP));
1181 	struct sk_buff *skb = rq->hw_gro_data->skb;
1182 
1183 	skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
1184 	skb->ip_summed = CHECKSUM_PARTIAL;
1185 
1186 	if (is_ipv4) {
1187 		int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr);
1188 		struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff);
1189 		__be16 newlen = htons(skb->len - nhoff);
1190 
1191 		csum_replace2(&ipv4->check, ipv4->tot_len, newlen);
1192 		ipv4->tot_len = newlen;
1193 
1194 		if (ipv4->protocol == IPPROTO_TCP)
1195 			mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match);
1196 		else
1197 			mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4);
1198 	} else {
1199 		int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr);
1200 		struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff);
1201 
1202 		ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6));
1203 
1204 		if (ipv6->nexthdr == IPPROTO_TCP)
1205 			mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match);
1206 		else
1207 			mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6);
1208 	}
1209 }
1210 
1211 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
1212 				      struct sk_buff *skb)
1213 {
1214 	u8 cht = cqe->rss_hash_type;
1215 	int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 :
1216 		 (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 :
1217 					    PKT_HASH_TYPE_NONE;
1218 	skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
1219 }
1220 
1221 static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
1222 					__be16 *proto)
1223 {
1224 	*proto = ((struct ethhdr *)skb->data)->h_proto;
1225 	*proto = __vlan_get_protocol(skb, *proto, network_depth);
1226 
1227 	if (*proto == htons(ETH_P_IP))
1228 		return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
1229 
1230 	if (*proto == htons(ETH_P_IPV6))
1231 		return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
1232 
1233 	return false;
1234 }
1235 
1236 static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
1237 {
1238 	int network_depth = 0;
1239 	__be16 proto;
1240 	void *ip;
1241 	int rc;
1242 
1243 	if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
1244 		return;
1245 
1246 	ip = skb->data + network_depth;
1247 	rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
1248 					 IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
1249 
1250 	rq->stats->ecn_mark += !!rc;
1251 }
1252 
1253 static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
1254 {
1255 	void *ip_p = skb->data + network_depth;
1256 
1257 	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
1258 					    ((struct ipv6hdr *)ip_p)->nexthdr;
1259 }
1260 
1261 #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
1262 
1263 #define MAX_PADDING 8
1264 
1265 static void
1266 tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
1267 		       struct mlx5e_rq_stats *stats)
1268 {
1269 	stats->csum_complete_tail_slow++;
1270 	skb->csum = csum_block_add(skb->csum,
1271 				   skb_checksum(skb, offset, len, 0),
1272 				   offset);
1273 }
1274 
1275 static void
1276 tail_padding_csum(struct sk_buff *skb, int offset,
1277 		  struct mlx5e_rq_stats *stats)
1278 {
1279 	u8 tail_padding[MAX_PADDING];
1280 	int len = skb->len - offset;
1281 	void *tail;
1282 
1283 	if (unlikely(len > MAX_PADDING)) {
1284 		tail_padding_csum_slow(skb, offset, len, stats);
1285 		return;
1286 	}
1287 
1288 	tail = skb_header_pointer(skb, offset, len, tail_padding);
1289 	if (unlikely(!tail)) {
1290 		tail_padding_csum_slow(skb, offset, len, stats);
1291 		return;
1292 	}
1293 
1294 	stats->csum_complete_tail++;
1295 	skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
1296 }
1297 
1298 static void
1299 mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto,
1300 		     struct mlx5e_rq_stats *stats)
1301 {
1302 	struct ipv6hdr *ip6;
1303 	struct iphdr   *ip4;
1304 	int pkt_len;
1305 
1306 	/* Fixup vlan headers, if any */
1307 	if (network_depth > ETH_HLEN)
1308 		/* CQE csum is calculated from the IP header and does
1309 		 * not cover VLAN headers (if present). This will add
1310 		 * the checksum manually.
1311 		 */
1312 		skb->csum = csum_partial(skb->data + ETH_HLEN,
1313 					 network_depth - ETH_HLEN,
1314 					 skb->csum);
1315 
1316 	/* Fixup tail padding, if any */
1317 	switch (proto) {
1318 	case htons(ETH_P_IP):
1319 		ip4 = (struct iphdr *)(skb->data + network_depth);
1320 		pkt_len = network_depth + ntohs(ip4->tot_len);
1321 		break;
1322 	case htons(ETH_P_IPV6):
1323 		ip6 = (struct ipv6hdr *)(skb->data + network_depth);
1324 		pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
1325 		break;
1326 	default:
1327 		return;
1328 	}
1329 
1330 	if (likely(pkt_len >= skb->len))
1331 		return;
1332 
1333 	tail_padding_csum(skb, pkt_len, stats);
1334 }
1335 
1336 static inline void mlx5e_handle_csum(struct net_device *netdev,
1337 				     struct mlx5_cqe64 *cqe,
1338 				     struct mlx5e_rq *rq,
1339 				     struct sk_buff *skb,
1340 				     bool   lro)
1341 {
1342 	struct mlx5e_rq_stats *stats = rq->stats;
1343 	int network_depth = 0;
1344 	__be16 proto;
1345 
1346 	if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
1347 		goto csum_none;
1348 
1349 	if (lro) {
1350 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1351 		stats->csum_unnecessary++;
1352 		return;
1353 	}
1354 
1355 	/* True when explicitly set via priv flag, or XDP prog is loaded */
1356 	if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
1357 	    get_cqe_tls_offload(cqe))
1358 		goto csum_unnecessary;
1359 
1360 	/* CQE csum doesn't cover padding octets in short ethernet
1361 	 * frames. And the pad field is appended prior to calculating
1362 	 * and appending the FCS field.
1363 	 *
1364 	 * Detecting these padded frames requires to verify and parse
1365 	 * IP headers, so we simply force all those small frames to be
1366 	 * CHECKSUM_UNNECESSARY even if they are not padded.
1367 	 */
1368 	if (short_frame(skb->len))
1369 		goto csum_unnecessary;
1370 
1371 	if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
1372 		if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
1373 			goto csum_unnecessary;
1374 
1375 		stats->csum_complete++;
1376 		skb->ip_summed = CHECKSUM_COMPLETE;
1377 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
1378 
1379 		if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
1380 			return; /* CQE csum covers all received bytes */
1381 
1382 		/* csum might need some fixups ...*/
1383 		mlx5e_skb_csum_fixup(skb, network_depth, proto, stats);
1384 		return;
1385 	}
1386 
1387 csum_unnecessary:
1388 	if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
1389 		   (cqe->hds_ip_ext & CQE_L4_OK))) {
1390 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1391 		if (cqe_is_tunneled(cqe)) {
1392 			skb->csum_level = 1;
1393 			skb->encapsulation = 1;
1394 			stats->csum_unnecessary_inner++;
1395 			return;
1396 		}
1397 		stats->csum_unnecessary++;
1398 		return;
1399 	}
1400 csum_none:
1401 	skb->ip_summed = CHECKSUM_NONE;
1402 	stats->csum_none++;
1403 }
1404 
1405 #define MLX5E_CE_BIT_MASK 0x80
1406 
1407 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
1408 				      u32 cqe_bcnt,
1409 				      struct mlx5e_rq *rq,
1410 				      struct sk_buff *skb)
1411 {
1412 	u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
1413 	struct mlx5e_rq_stats *stats = rq->stats;
1414 	struct net_device *netdev = rq->netdev;
1415 
1416 	skb->mac_len = ETH_HLEN;
1417 
1418 	if (unlikely(get_cqe_tls_offload(cqe)))
1419 		mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
1420 
1421 	if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
1422 		mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
1423 
1424 	if (lro_num_seg > 1) {
1425 		mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
1426 		skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
1427 		/* Subtract one since we already counted this as one
1428 		 * "regular" packet in mlx5e_complete_rx_cqe()
1429 		 */
1430 		stats->packets += lro_num_seg - 1;
1431 		stats->lro_packets++;
1432 		stats->lro_bytes += cqe_bcnt;
1433 	}
1434 
1435 	if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
1436 		skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
1437 								  rq->clock, get_cqe_ts(cqe));
1438 	skb_record_rx_queue(skb, rq->ix);
1439 
1440 	if (likely(netdev->features & NETIF_F_RXHASH))
1441 		mlx5e_skb_set_hash(cqe, skb);
1442 
1443 	if (cqe_has_vlan(cqe)) {
1444 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
1445 				       be16_to_cpu(cqe->vlan_info));
1446 		stats->removed_vlan_packets++;
1447 	}
1448 
1449 	skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
1450 
1451 	mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
1452 	/* checking CE bit in cqe - MSB in ml_path field */
1453 	if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
1454 		mlx5e_enable_ecn(rq, skb);
1455 
1456 	skb->protocol = eth_type_trans(skb, netdev);
1457 
1458 	if (unlikely(mlx5e_skb_is_multicast(skb)))
1459 		stats->mcast_packets++;
1460 }
1461 
1462 static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq,
1463 					 struct mlx5_cqe64 *cqe,
1464 					 u32 cqe_bcnt,
1465 					 struct sk_buff *skb)
1466 {
1467 	struct mlx5e_rq_stats *stats = rq->stats;
1468 
1469 	stats->packets++;
1470 	stats->gro_packets++;
1471 	stats->bytes += cqe_bcnt;
1472 	stats->gro_bytes += cqe_bcnt;
1473 	if (NAPI_GRO_CB(skb)->count != 1)
1474 		return;
1475 	mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
1476 	skb_reset_network_header(skb);
1477 	if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) {
1478 		napi_gro_receive(rq->cq.napi, skb);
1479 		rq->hw_gro_data->skb = NULL;
1480 	}
1481 }
1482 
1483 static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
1484 					 struct mlx5_cqe64 *cqe,
1485 					 u32 cqe_bcnt,
1486 					 struct sk_buff *skb)
1487 {
1488 	struct mlx5e_rq_stats *stats = rq->stats;
1489 
1490 	stats->packets++;
1491 	stats->bytes += cqe_bcnt;
1492 	mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
1493 }
1494 
1495 static inline
1496 struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
1497 				       u32 frag_size, u16 headroom,
1498 				       u32 cqe_bcnt, u32 metasize)
1499 {
1500 	struct sk_buff *skb = build_skb(va, frag_size);
1501 
1502 	if (unlikely(!skb)) {
1503 		rq->stats->buff_alloc_err++;
1504 		return NULL;
1505 	}
1506 
1507 	skb_reserve(skb, headroom);
1508 	skb_put(skb, cqe_bcnt);
1509 
1510 	if (metasize)
1511 		skb_metadata_set(skb, metasize);
1512 
1513 	return skb;
1514 }
1515 
1516 static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
1517 				u32 len, struct xdp_buff *xdp)
1518 {
1519 	xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
1520 	xdp_prepare_buff(xdp, va, headroom, len, true);
1521 }
1522 
1523 static struct sk_buff *
1524 mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
1525 			  u32 cqe_bcnt)
1526 {
1527 	struct mlx5e_dma_info *di = wi->di;
1528 	u16 rx_headroom = rq->buff.headroom;
1529 	struct bpf_prog *prog;
1530 	struct sk_buff *skb;
1531 	u32 metasize = 0;
1532 	void *va, *data;
1533 	u32 frag_size;
1534 
1535 	va             = page_address(di->page) + wi->offset;
1536 	data           = va + rx_headroom;
1537 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1538 
1539 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
1540 				      frag_size, DMA_FROM_DEVICE);
1541 	net_prefetch(data);
1542 
1543 	prog = rcu_dereference(rq->xdp_prog);
1544 	if (prog) {
1545 		struct xdp_buff xdp;
1546 
1547 		net_prefetchw(va); /* xdp_frame data area */
1548 		mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
1549 		if (mlx5e_xdp_handle(rq, di->page, prog, &xdp))
1550 			return NULL; /* page/packet was consumed by XDP */
1551 
1552 		rx_headroom = xdp.data - xdp.data_hard_start;
1553 		metasize = xdp.data - xdp.data_meta;
1554 		cqe_bcnt = xdp.data_end - xdp.data;
1555 	}
1556 	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1557 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
1558 	if (unlikely(!skb))
1559 		return NULL;
1560 
1561 	/* queue up for recycling/reuse */
1562 	page_ref_inc(di->page);
1563 
1564 	return skb;
1565 }
1566 
1567 static struct sk_buff *
1568 mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
1569 			     u32 cqe_bcnt)
1570 {
1571 	struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
1572 	struct mlx5e_wqe_frag_info *head_wi = wi;
1573 	u16 rx_headroom = rq->buff.headroom;
1574 	struct mlx5e_dma_info *di = wi->di;
1575 	struct skb_shared_info *sinfo;
1576 	u32 frag_consumed_bytes;
1577 	struct bpf_prog *prog;
1578 	struct xdp_buff xdp;
1579 	struct sk_buff *skb;
1580 	u32 truesize;
1581 	void *va;
1582 
1583 	va = page_address(di->page) + wi->offset;
1584 	frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
1585 
1586 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
1587 				      rq->buff.frame0_sz, DMA_FROM_DEVICE);
1588 	net_prefetchw(va); /* xdp_frame data area */
1589 	net_prefetch(va + rx_headroom);
1590 
1591 	mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp);
1592 	sinfo = xdp_get_shared_info_from_buff(&xdp);
1593 	truesize = 0;
1594 
1595 	cqe_bcnt -= frag_consumed_bytes;
1596 	frag_info++;
1597 	wi++;
1598 
1599 	while (cqe_bcnt) {
1600 		skb_frag_t *frag;
1601 
1602 		di = wi->di;
1603 
1604 		frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
1605 
1606 		dma_sync_single_for_cpu(rq->pdev, di->addr + wi->offset,
1607 					frag_consumed_bytes, DMA_FROM_DEVICE);
1608 
1609 		if (!xdp_buff_has_frags(&xdp)) {
1610 			/* Init on the first fragment to avoid cold cache access
1611 			 * when possible.
1612 			 */
1613 			sinfo->nr_frags = 0;
1614 			sinfo->xdp_frags_size = 0;
1615 			xdp_buff_set_frags_flag(&xdp);
1616 		}
1617 
1618 		frag = &sinfo->frags[sinfo->nr_frags++];
1619 		__skb_frag_set_page(frag, di->page);
1620 		skb_frag_off_set(frag, wi->offset);
1621 		skb_frag_size_set(frag, frag_consumed_bytes);
1622 
1623 		if (page_is_pfmemalloc(di->page))
1624 			xdp_buff_set_frag_pfmemalloc(&xdp);
1625 
1626 		sinfo->xdp_frags_size += frag_consumed_bytes;
1627 		truesize += frag_info->frag_stride;
1628 
1629 		cqe_bcnt -= frag_consumed_bytes;
1630 		frag_info++;
1631 		wi++;
1632 	}
1633 
1634 	di = head_wi->di;
1635 
1636 	prog = rcu_dereference(rq->xdp_prog);
1637 	if (prog && mlx5e_xdp_handle(rq, di->page, prog, &xdp)) {
1638 		if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
1639 			int i;
1640 
1641 			for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
1642 				mlx5e_put_rx_frag(rq, &head_wi[i], true);
1643 		}
1644 		return NULL; /* page/packet was consumed by XDP */
1645 	}
1646 
1647 	skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz,
1648 				     xdp.data - xdp.data_hard_start,
1649 				     xdp.data_end - xdp.data,
1650 				     xdp.data - xdp.data_meta);
1651 	if (unlikely(!skb))
1652 		return NULL;
1653 
1654 	page_ref_inc(di->page);
1655 
1656 	if (unlikely(xdp_buff_has_frags(&xdp))) {
1657 		int i;
1658 
1659 		/* sinfo->nr_frags is reset by build_skb, calculate again. */
1660 		xdp_update_skb_shared_info(skb, wi - head_wi - 1,
1661 					   sinfo->xdp_frags_size, truesize,
1662 					   xdp_buff_is_frag_pfmemalloc(&xdp));
1663 
1664 		for (i = 0; i < sinfo->nr_frags; i++) {
1665 			skb_frag_t *frag = &sinfo->frags[i];
1666 
1667 			page_ref_inc(skb_frag_page(frag));
1668 		}
1669 	}
1670 
1671 	return skb;
1672 }
1673 
1674 static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1675 {
1676 	struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
1677 	struct mlx5e_priv *priv = rq->priv;
1678 
1679 	if (cqe_syndrome_needs_recover(err_cqe->syndrome) &&
1680 	    !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) {
1681 		mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe);
1682 		queue_work(priv->wq, &rq->recover_work);
1683 	}
1684 }
1685 
1686 static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1687 {
1688 	trigger_report(rq, cqe);
1689 	rq->stats->wqe_err++;
1690 }
1691 
1692 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1693 {
1694 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
1695 	struct mlx5e_wqe_frag_info *wi;
1696 	struct sk_buff *skb;
1697 	u32 cqe_bcnt;
1698 	u16 ci;
1699 
1700 	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
1701 	wi       = get_frag(rq, ci);
1702 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
1703 
1704 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1705 		mlx5e_handle_rx_err_cqe(rq, cqe);
1706 		goto free_wqe;
1707 	}
1708 
1709 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
1710 			      mlx5e_skb_from_cqe_linear,
1711 			      mlx5e_skb_from_cqe_nonlinear,
1712 			      rq, wi, cqe_bcnt);
1713 	if (!skb) {
1714 		/* probably for XDP */
1715 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
1716 			/* do not return page to cache,
1717 			 * it will be returned on XDP_TX completion.
1718 			 */
1719 			goto wq_cyc_pop;
1720 		}
1721 		goto free_wqe;
1722 	}
1723 
1724 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
1725 
1726 	if (mlx5e_cqe_regb_chain(cqe))
1727 		if (!mlx5e_tc_update_skb(cqe, skb)) {
1728 			dev_kfree_skb_any(skb);
1729 			goto free_wqe;
1730 		}
1731 
1732 	napi_gro_receive(rq->cq.napi, skb);
1733 
1734 free_wqe:
1735 	mlx5e_free_rx_wqe(rq, wi, true);
1736 wq_cyc_pop:
1737 	mlx5_wq_cyc_pop(wq);
1738 }
1739 
1740 #ifdef CONFIG_MLX5_ESWITCH
1741 static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1742 {
1743 	struct net_device *netdev = rq->netdev;
1744 	struct mlx5e_priv *priv = netdev_priv(netdev);
1745 	struct mlx5e_rep_priv *rpriv  = priv->ppriv;
1746 	struct mlx5_eswitch_rep *rep = rpriv->rep;
1747 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
1748 	struct mlx5e_wqe_frag_info *wi;
1749 	struct sk_buff *skb;
1750 	u32 cqe_bcnt;
1751 	u16 ci;
1752 
1753 	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
1754 	wi       = get_frag(rq, ci);
1755 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
1756 
1757 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1758 		mlx5e_handle_rx_err_cqe(rq, cqe);
1759 		goto free_wqe;
1760 	}
1761 
1762 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
1763 			      mlx5e_skb_from_cqe_linear,
1764 			      mlx5e_skb_from_cqe_nonlinear,
1765 			      rq, wi, cqe_bcnt);
1766 	if (!skb) {
1767 		/* probably for XDP */
1768 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
1769 			/* do not return page to cache,
1770 			 * it will be returned on XDP_TX completion.
1771 			 */
1772 			goto wq_cyc_pop;
1773 		}
1774 		goto free_wqe;
1775 	}
1776 
1777 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
1778 
1779 	if (rep->vlan && skb_vlan_tag_present(skb))
1780 		skb_vlan_pop(skb);
1781 
1782 	mlx5e_rep_tc_receive(cqe, rq, skb);
1783 
1784 free_wqe:
1785 	mlx5e_free_rx_wqe(rq, wi, true);
1786 wq_cyc_pop:
1787 	mlx5_wq_cyc_pop(wq);
1788 }
1789 
1790 static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
1791 {
1792 	u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
1793 	u16 wqe_id         = be16_to_cpu(cqe->wqe_id);
1794 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
1795 	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
1796 	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
1797 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
1798 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
1799 	struct mlx5e_rx_wqe_ll *wqe;
1800 	struct mlx5_wq_ll *wq;
1801 	struct sk_buff *skb;
1802 	u16 cqe_bcnt;
1803 
1804 	wi->consumed_strides += cstrides;
1805 
1806 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
1807 		mlx5e_handle_rx_err_cqe(rq, cqe);
1808 		goto mpwrq_cqe_out;
1809 	}
1810 
1811 	if (unlikely(mpwrq_is_filler_cqe(cqe))) {
1812 		struct mlx5e_rq_stats *stats = rq->stats;
1813 
1814 		stats->mpwqe_filler_cqes++;
1815 		stats->mpwqe_filler_strides += cstrides;
1816 		goto mpwrq_cqe_out;
1817 	}
1818 
1819 	cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
1820 
1821 	skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
1822 			      mlx5e_skb_from_cqe_mpwrq_linear,
1823 			      mlx5e_skb_from_cqe_mpwrq_nonlinear,
1824 			      rq, wi, cqe_bcnt, head_offset, page_idx);
1825 	if (!skb)
1826 		goto mpwrq_cqe_out;
1827 
1828 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
1829 
1830 	mlx5e_rep_tc_receive(cqe, rq, skb);
1831 
1832 mpwrq_cqe_out:
1833 	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
1834 		return;
1835 
1836 	wq  = &rq->mpwqe.wq;
1837 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
1838 	mlx5e_free_rx_mpwqe(rq, wi, true);
1839 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
1840 }
1841 
1842 const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
1843 	.handle_rx_cqe       = mlx5e_handle_rx_cqe_rep,
1844 	.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
1845 };
1846 #endif
1847 
1848 static void
1849 mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
1850 		    u32 data_bcnt, u32 data_offset)
1851 {
1852 	net_prefetchw(skb->data);
1853 
1854 	while (data_bcnt) {
1855 		u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
1856 		unsigned int truesize;
1857 
1858 		if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
1859 			truesize = pg_consumed_bytes;
1860 		else
1861 			truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
1862 
1863 		mlx5e_add_skb_frag(rq, skb, di, data_offset,
1864 				   pg_consumed_bytes, truesize);
1865 
1866 		data_bcnt -= pg_consumed_bytes;
1867 		data_offset = 0;
1868 		di++;
1869 	}
1870 }
1871 
1872 static struct sk_buff *
1873 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1874 				   u16 cqe_bcnt, u32 head_offset, u32 page_idx)
1875 {
1876 	u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
1877 	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
1878 	u32 frag_offset    = head_offset + headlen;
1879 	u32 byte_cnt       = cqe_bcnt - headlen;
1880 	struct mlx5e_dma_info *head_di = di;
1881 	struct sk_buff *skb;
1882 
1883 	skb = napi_alloc_skb(rq->cq.napi,
1884 			     ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
1885 	if (unlikely(!skb)) {
1886 		rq->stats->buff_alloc_err++;
1887 		return NULL;
1888 	}
1889 
1890 	net_prefetchw(skb->data);
1891 
1892 	if (unlikely(frag_offset >= PAGE_SIZE)) {
1893 		di++;
1894 		frag_offset -= PAGE_SIZE;
1895 	}
1896 
1897 	mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset);
1898 	/* copy header */
1899 	mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen);
1900 	/* skb linear part was allocated with headlen and aligned to long */
1901 	skb->tail += headlen;
1902 	skb->len  += headlen;
1903 
1904 	return skb;
1905 }
1906 
1907 static struct sk_buff *
1908 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1909 				u16 cqe_bcnt, u32 head_offset, u32 page_idx)
1910 {
1911 	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
1912 	u16 rx_headroom = rq->buff.headroom;
1913 	struct bpf_prog *prog;
1914 	struct sk_buff *skb;
1915 	u32 metasize = 0;
1916 	void *va, *data;
1917 	u32 frag_size;
1918 
1919 	/* Check packet size. Note LRO doesn't use linear SKB */
1920 	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
1921 		rq->stats->oversize_pkts_sw_drop++;
1922 		return NULL;
1923 	}
1924 
1925 	va             = page_address(di->page) + head_offset;
1926 	data           = va + rx_headroom;
1927 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1928 
1929 	dma_sync_single_range_for_cpu(rq->pdev, di->addr, head_offset,
1930 				      frag_size, DMA_FROM_DEVICE);
1931 	net_prefetch(data);
1932 
1933 	prog = rcu_dereference(rq->xdp_prog);
1934 	if (prog) {
1935 		struct xdp_buff xdp;
1936 
1937 		net_prefetchw(va); /* xdp_frame data area */
1938 		mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
1939 		if (mlx5e_xdp_handle(rq, di->page, prog, &xdp)) {
1940 			if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
1941 				__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
1942 			return NULL; /* page/packet was consumed by XDP */
1943 		}
1944 
1945 		rx_headroom = xdp.data - xdp.data_hard_start;
1946 		metasize = xdp.data - xdp.data_meta;
1947 		cqe_bcnt = xdp.data_end - xdp.data;
1948 	}
1949 	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
1950 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
1951 	if (unlikely(!skb))
1952 		return NULL;
1953 
1954 	/* queue up for recycling/reuse */
1955 	page_ref_inc(di->page);
1956 
1957 	return skb;
1958 }
1959 
1960 static struct sk_buff *
1961 mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
1962 			  struct mlx5_cqe64 *cqe, u16 header_index)
1963 {
1964 	struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
1965 	u16 head_offset = head->addr & (PAGE_SIZE - 1);
1966 	u16 head_size = cqe->shampo.header_size;
1967 	u16 rx_headroom = rq->buff.headroom;
1968 	struct sk_buff *skb = NULL;
1969 	void *hdr, *data;
1970 	u32 frag_size;
1971 
1972 	hdr		= page_address(head->page) + head_offset;
1973 	data		= hdr + rx_headroom;
1974 	frag_size	= MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
1975 
1976 	if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
1977 		/* build SKB around header */
1978 		dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE);
1979 		prefetchw(hdr);
1980 		prefetch(data);
1981 		skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
1982 
1983 		if (unlikely(!skb))
1984 			return NULL;
1985 
1986 		/* queue up for recycling/reuse */
1987 		page_ref_inc(head->page);
1988 
1989 	} else {
1990 		/* allocate SKB and copy header for large header */
1991 		rq->stats->gro_large_hds++;
1992 		skb = napi_alloc_skb(rq->cq.napi,
1993 				     ALIGN(head_size, sizeof(long)));
1994 		if (unlikely(!skb)) {
1995 			rq->stats->buff_alloc_err++;
1996 			return NULL;
1997 		}
1998 
1999 		prefetchw(skb->data);
2000 		mlx5e_copy_skb_header(rq->pdev, skb, head,
2001 				      head_offset + rx_headroom,
2002 				      rx_headroom, head_size);
2003 		/* skb linear part was allocated with headlen and aligned to long */
2004 		skb->tail += head_size;
2005 		skb->len  += head_size;
2006 	}
2007 	return skb;
2008 }
2009 
2010 static void
2011 mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz)
2012 {
2013 	skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
2014 	unsigned int frag_size = skb_frag_size(last_frag);
2015 	unsigned int frag_truesize;
2016 
2017 	frag_truesize = ALIGN(frag_size, BIT(log_stride_sz));
2018 	skb->truesize += frag_truesize - frag_size;
2019 }
2020 
2021 static void
2022 mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match)
2023 {
2024 	struct sk_buff *skb = rq->hw_gro_data->skb;
2025 	struct mlx5e_rq_stats *stats = rq->stats;
2026 
2027 	stats->gro_skbs++;
2028 	if (likely(skb_shinfo(skb)->nr_frags))
2029 		mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz);
2030 	if (NAPI_GRO_CB(skb)->count > 1)
2031 		mlx5e_shampo_update_hdr(rq, cqe, match);
2032 	napi_gro_receive(rq->cq.napi, skb);
2033 	rq->hw_gro_data->skb = NULL;
2034 }
2035 
2036 static bool
2037 mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt)
2038 {
2039 	int nr_frags = skb_shinfo(skb)->nr_frags;
2040 
2041 	return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE;
2042 }
2043 
2044 static void
2045 mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
2046 {
2047 	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
2048 	u64 addr = shampo->info[header_index].addr;
2049 
2050 	if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
2051 		shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
2052 		mlx5e_page_release(rq, &shampo->info[header_index], true);
2053 	}
2054 	bitmap_clear(shampo->bitmap, header_index, 1);
2055 }
2056 
2057 static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2058 {
2059 	u16 data_bcnt		= mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
2060 	u16 header_index	= mlx5e_shampo_get_cqe_header_index(rq, cqe);
2061 	u32 wqe_offset		= be32_to_cpu(cqe->shampo.data_offset);
2062 	u16 cstrides		= mpwrq_get_cqe_consumed_strides(cqe);
2063 	u32 data_offset		= wqe_offset & (PAGE_SIZE - 1);
2064 	u32 cqe_bcnt		= mpwrq_get_cqe_byte_cnt(cqe);
2065 	u16 wqe_id		= be16_to_cpu(cqe->wqe_id);
2066 	u32 page_idx		= wqe_offset >> PAGE_SHIFT;
2067 	u16 head_size		= cqe->shampo.header_size;
2068 	struct sk_buff **skb	= &rq->hw_gro_data->skb;
2069 	bool flush		= cqe->shampo.flush;
2070 	bool match		= cqe->shampo.match;
2071 	struct mlx5e_rq_stats *stats = rq->stats;
2072 	struct mlx5e_rx_wqe_ll *wqe;
2073 	struct mlx5e_dma_info *di;
2074 	struct mlx5e_mpw_info *wi;
2075 	struct mlx5_wq_ll *wq;
2076 
2077 	wi = &rq->mpwqe.info[wqe_id];
2078 	wi->consumed_strides += cstrides;
2079 
2080 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2081 		mlx5e_handle_rx_err_cqe(rq, cqe);
2082 		goto mpwrq_cqe_out;
2083 	}
2084 
2085 	if (unlikely(mpwrq_is_filler_cqe(cqe))) {
2086 		stats->mpwqe_filler_cqes++;
2087 		stats->mpwqe_filler_strides += cstrides;
2088 		goto mpwrq_cqe_out;
2089 	}
2090 
2091 	stats->gro_match_packets += match;
2092 
2093 	if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) {
2094 		match = false;
2095 		mlx5e_shampo_flush_skb(rq, cqe, match);
2096 	}
2097 
2098 	if (!*skb) {
2099 		if (likely(head_size))
2100 			*skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
2101 		else
2102 			*skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
2103 								  page_idx);
2104 		if (unlikely(!*skb))
2105 			goto free_hd_entry;
2106 
2107 		NAPI_GRO_CB(*skb)->count = 1;
2108 		skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size;
2109 	} else {
2110 		NAPI_GRO_CB(*skb)->count++;
2111 		if (NAPI_GRO_CB(*skb)->count == 2 &&
2112 		    rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) {
2113 			void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index);
2114 			int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff -
2115 				    sizeof(struct iphdr);
2116 			struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff);
2117 
2118 			rq->hw_gro_data->second_ip_id = ntohs(iph->id);
2119 		}
2120 	}
2121 
2122 	if (likely(head_size)) {
2123 		di = &wi->umr.dma_info[page_idx];
2124 		mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset);
2125 	}
2126 
2127 	mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb);
2128 	if (flush)
2129 		mlx5e_shampo_flush_skb(rq, cqe, match);
2130 free_hd_entry:
2131 	mlx5e_free_rx_shampo_hd_entry(rq, header_index);
2132 mpwrq_cqe_out:
2133 	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
2134 		return;
2135 
2136 	wq  = &rq->mpwqe.wq;
2137 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
2138 	mlx5e_free_rx_mpwqe(rq, wi, true);
2139 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
2140 }
2141 
2142 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2143 {
2144 	u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
2145 	u16 wqe_id         = be16_to_cpu(cqe->wqe_id);
2146 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
2147 	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
2148 	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
2149 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
2150 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
2151 	struct mlx5e_rx_wqe_ll *wqe;
2152 	struct mlx5_wq_ll *wq;
2153 	struct sk_buff *skb;
2154 	u16 cqe_bcnt;
2155 
2156 	wi->consumed_strides += cstrides;
2157 
2158 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2159 		mlx5e_handle_rx_err_cqe(rq, cqe);
2160 		goto mpwrq_cqe_out;
2161 	}
2162 
2163 	if (unlikely(mpwrq_is_filler_cqe(cqe))) {
2164 		struct mlx5e_rq_stats *stats = rq->stats;
2165 
2166 		stats->mpwqe_filler_cqes++;
2167 		stats->mpwqe_filler_strides += cstrides;
2168 		goto mpwrq_cqe_out;
2169 	}
2170 
2171 	cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
2172 
2173 	skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
2174 			      mlx5e_skb_from_cqe_mpwrq_linear,
2175 			      mlx5e_skb_from_cqe_mpwrq_nonlinear,
2176 			      rq, wi, cqe_bcnt, head_offset, page_idx);
2177 	if (!skb)
2178 		goto mpwrq_cqe_out;
2179 
2180 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
2181 
2182 	if (mlx5e_cqe_regb_chain(cqe))
2183 		if (!mlx5e_tc_update_skb(cqe, skb)) {
2184 			dev_kfree_skb_any(skb);
2185 			goto mpwrq_cqe_out;
2186 		}
2187 
2188 	napi_gro_receive(rq->cq.napi, skb);
2189 
2190 mpwrq_cqe_out:
2191 	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
2192 		return;
2193 
2194 	wq  = &rq->mpwqe.wq;
2195 	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
2196 	mlx5e_free_rx_mpwqe(rq, wi, true);
2197 	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
2198 }
2199 
2200 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
2201 {
2202 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
2203 	struct mlx5_cqwq *cqwq = &cq->wq;
2204 	struct mlx5_cqe64 *cqe;
2205 	int work_done = 0;
2206 
2207 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
2208 		return 0;
2209 
2210 	if (rq->cqd.left) {
2211 		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
2212 		if (work_done >= budget)
2213 			goto out;
2214 	}
2215 
2216 	cqe = mlx5_cqwq_get_cqe(cqwq);
2217 	if (!cqe) {
2218 		if (unlikely(work_done))
2219 			goto out;
2220 		return 0;
2221 	}
2222 
2223 	do {
2224 		if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
2225 			work_done +=
2226 				mlx5e_decompress_cqes_start(rq, cqwq,
2227 							    budget - work_done);
2228 			continue;
2229 		}
2230 
2231 		mlx5_cqwq_pop(cqwq);
2232 
2233 		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
2234 				mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
2235 				rq, cqe);
2236 	} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
2237 
2238 out:
2239 	if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb)
2240 		mlx5e_shampo_flush_skb(rq, NULL, false);
2241 
2242 	if (rcu_access_pointer(rq->xdp_prog))
2243 		mlx5e_xdp_rx_poll_complete(rq);
2244 
2245 	mlx5_cqwq_update_db_record(cqwq);
2246 
2247 	/* ensure cq space is freed before enabling more cqes */
2248 	wmb();
2249 
2250 	return work_done;
2251 }
2252 
2253 #ifdef CONFIG_MLX5_CORE_IPOIB
2254 
2255 #define MLX5_IB_GRH_SGID_OFFSET 8
2256 #define MLX5_IB_GRH_DGID_OFFSET 24
2257 #define MLX5_GID_SIZE           16
2258 
2259 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
2260 					 struct mlx5_cqe64 *cqe,
2261 					 u32 cqe_bcnt,
2262 					 struct sk_buff *skb)
2263 {
2264 	struct hwtstamp_config *tstamp;
2265 	struct mlx5e_rq_stats *stats;
2266 	struct net_device *netdev;
2267 	struct mlx5e_priv *priv;
2268 	char *pseudo_header;
2269 	u32 flags_rqpn;
2270 	u32 qpn;
2271 	u8 *dgid;
2272 	u8 g;
2273 
2274 	qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
2275 	netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
2276 
2277 	/* No mapping present, cannot process SKB. This might happen if a child
2278 	 * interface is going down while having unprocessed CQEs on parent RQ
2279 	 */
2280 	if (unlikely(!netdev)) {
2281 		/* TODO: add drop counters support */
2282 		skb->dev = NULL;
2283 		pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
2284 		return;
2285 	}
2286 
2287 	priv = mlx5i_epriv(netdev);
2288 	tstamp = &priv->tstamp;
2289 	stats = rq->stats;
2290 
2291 	flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
2292 	g = (flags_rqpn >> 28) & 3;
2293 	dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
2294 	if ((!g) || dgid[0] != 0xff)
2295 		skb->pkt_type = PACKET_HOST;
2296 	else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0)
2297 		skb->pkt_type = PACKET_BROADCAST;
2298 	else
2299 		skb->pkt_type = PACKET_MULTICAST;
2300 
2301 	/* Drop packets that this interface sent, ie multicast packets
2302 	 * that the HCA has replicated.
2303 	 */
2304 	if (g && (qpn == (flags_rqpn & 0xffffff)) &&
2305 	    (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
2306 		    MLX5_GID_SIZE) == 0)) {
2307 		skb->dev = NULL;
2308 		return;
2309 	}
2310 
2311 	skb_pull(skb, MLX5_IB_GRH_BYTES);
2312 
2313 	skb->protocol = *((__be16 *)(skb->data));
2314 
2315 	if (netdev->features & NETIF_F_RXCSUM) {
2316 		skb->ip_summed = CHECKSUM_COMPLETE;
2317 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
2318 		stats->csum_complete++;
2319 	} else {
2320 		skb->ip_summed = CHECKSUM_NONE;
2321 		stats->csum_none++;
2322 	}
2323 
2324 	if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
2325 		skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time,
2326 								  rq->clock, get_cqe_ts(cqe));
2327 	skb_record_rx_queue(skb, rq->ix);
2328 
2329 	if (likely(netdev->features & NETIF_F_RXHASH))
2330 		mlx5e_skb_set_hash(cqe, skb);
2331 
2332 	/* 20 bytes of ipoib header and 4 for encap existing */
2333 	pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN);
2334 	memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN);
2335 	skb_reset_mac_header(skb);
2336 	skb_pull(skb, MLX5_IPOIB_HARD_LEN);
2337 
2338 	skb->dev = netdev;
2339 
2340 	stats->packets++;
2341 	stats->bytes += cqe_bcnt;
2342 }
2343 
2344 static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2345 {
2346 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
2347 	struct mlx5e_wqe_frag_info *wi;
2348 	struct sk_buff *skb;
2349 	u32 cqe_bcnt;
2350 	u16 ci;
2351 
2352 	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
2353 	wi       = get_frag(rq, ci);
2354 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
2355 
2356 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2357 		rq->stats->wqe_err++;
2358 		goto wq_free_wqe;
2359 	}
2360 
2361 	skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
2362 			      mlx5e_skb_from_cqe_linear,
2363 			      mlx5e_skb_from_cqe_nonlinear,
2364 			      rq, wi, cqe_bcnt);
2365 	if (!skb)
2366 		goto wq_free_wqe;
2367 
2368 	mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
2369 	if (unlikely(!skb->dev)) {
2370 		dev_kfree_skb_any(skb);
2371 		goto wq_free_wqe;
2372 	}
2373 	napi_gro_receive(rq->cq.napi, skb);
2374 
2375 wq_free_wqe:
2376 	mlx5e_free_rx_wqe(rq, wi, true);
2377 	mlx5_wq_cyc_pop(wq);
2378 }
2379 
2380 const struct mlx5e_rx_handlers mlx5i_rx_handlers = {
2381 	.handle_rx_cqe       = mlx5i_handle_rx_cqe,
2382 	.handle_rx_cqe_mpwqe = NULL, /* Not supported */
2383 };
2384 #endif /* CONFIG_MLX5_CORE_IPOIB */
2385 
2386 int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk)
2387 {
2388 	struct net_device *netdev = rq->netdev;
2389 	struct mlx5_core_dev *mdev = rq->mdev;
2390 	struct mlx5e_priv *priv = rq->priv;
2391 
2392 	switch (rq->wq_type) {
2393 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
2394 		rq->mpwqe.skb_from_cqe_mpwrq = xsk ?
2395 			mlx5e_xsk_skb_from_cqe_mpwrq_linear :
2396 			mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ?
2397 				mlx5e_skb_from_cqe_mpwrq_linear :
2398 				mlx5e_skb_from_cqe_mpwrq_nonlinear;
2399 		rq->post_wqes = mlx5e_post_rx_mpwqes;
2400 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
2401 
2402 		if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
2403 			rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
2404 			if (!rq->handle_rx_cqe) {
2405 				netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
2406 				return -EINVAL;
2407 			}
2408 		} else {
2409 			rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
2410 			if (!rq->handle_rx_cqe) {
2411 				netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
2412 				return -EINVAL;
2413 			}
2414 		}
2415 
2416 		break;
2417 	default: /* MLX5_WQ_TYPE_CYCLIC */
2418 		rq->wqe.skb_from_cqe = xsk ?
2419 			mlx5e_xsk_skb_from_cqe_linear :
2420 			mlx5e_rx_is_linear_skb(params, NULL) ?
2421 				mlx5e_skb_from_cqe_linear :
2422 				mlx5e_skb_from_cqe_nonlinear;
2423 		rq->post_wqes = mlx5e_post_rx_wqes;
2424 		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
2425 		rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe;
2426 		if (!rq->handle_rx_cqe) {
2427 			netdev_err(netdev, "RX handler of RQ is not set\n");
2428 			return -EINVAL;
2429 		}
2430 	}
2431 
2432 	return 0;
2433 }
2434 
2435 static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
2436 {
2437 	struct mlx5e_priv *priv = netdev_priv(rq->netdev);
2438 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
2439 	struct mlx5e_wqe_frag_info *wi;
2440 	struct devlink_port *dl_port;
2441 	struct sk_buff *skb;
2442 	u32 cqe_bcnt;
2443 	u16 trap_id;
2444 	u16 ci;
2445 
2446 	trap_id  = get_cqe_flow_tag(cqe);
2447 	ci       = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter));
2448 	wi       = get_frag(rq, ci);
2449 	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
2450 
2451 	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
2452 		rq->stats->wqe_err++;
2453 		goto free_wqe;
2454 	}
2455 
2456 	skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt);
2457 	if (!skb)
2458 		goto free_wqe;
2459 
2460 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
2461 	skb_push(skb, ETH_HLEN);
2462 
2463 	dl_port = mlx5e_devlink_get_dl_port(priv);
2464 	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
2465 	dev_kfree_skb_any(skb);
2466 
2467 free_wqe:
2468 	mlx5e_free_rx_wqe(rq, wi, false);
2469 	mlx5_wq_cyc_pop(wq);
2470 }
2471 
2472 void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params)
2473 {
2474 	rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params, NULL) ?
2475 			       mlx5e_skb_from_cqe_linear :
2476 			       mlx5e_skb_from_cqe_nonlinear;
2477 	rq->post_wqes = mlx5e_post_rx_wqes;
2478 	rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
2479 	rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe;
2480 }
2481